Python BrainDQN Examples

Programming Language: Python

Namespace/Package Name: DQN

Class/Type: BrainDQN

Examples at hotexamples.com: 6

Python BrainDQN - 6 examples found. These are the top rated real world Python examples of DQN.BrainDQN extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

BrainDQN(5)

getAction(3)

getAction_1(2)

setInitState(2)

setPerception(2)

getAction_test(1)

getLoss(1)

Example #1

Show file

def playWeChatJump():
    actions = 10
    brain = BrainDQN(actions)  #action采用one-hot编码
    observation0 = game.init_state()
    brain.setInitState(observation0)
    while 1 != 0:
        action = brain.getAction()
        nextObservation, reward, terminal = game.frame_step(action)
        nextObservation = np.reshape(nextObservation, (80, 80, 1))
        brain.setPerception(nextObservation, action, reward, terminal)

Example #2

Show file

import numpy as np
import matplotlib.pyplot as plt

P_1 = [round(0.1 * i / 2.0, 2) for i in range(1, 9)]
P_2 = [round(0.1 * i / 2.0, 2) for i in range(1, 9)]
actions = len(P_2)

Loss = []
Success = []
Fre = []

noise = 3
num_sensor = 10  # N
policy = 2  # choose power change policy for PU, it should be 1(Multi-step) or 2(Single step)

brain = BrainDQN(actions, num_sensor)
com = GameState(P_1, P_2, noise, num_sensor)
terminal = True
recording = 100000

while (recording > 0):
    # initialization
    if (terminal == True):
        com.ini()
        observation0, reward0, terminal = com.frame_step(
            np.zeros(actions), policy, False)
        brain.setInitState(observation0)

    # train
    action, recording = brain.getAction()
    nextObservation, reward, terminal = com.frame_step(action, policy, True)

Example #3

Show file

LOSS = []

SENSOR_NUM = 10
ACTION_NUM = 26
record = 95000
Time = 10

#构造状态生成函数A*sin(omega*t)
N = 10000
t = np.linspace(0, 10000, num=N)  #步进为1，共采样10000个数据
omega = math.pi / 25
A = 10
x1 = A * np.sin(omega * t)

DQN1 = BrainDQN(SENSOR_NUM, ACTION_NUM)
ENV = ENVIRNOMENT(ACTION_NUM)

state = x1[Time]
stateInput = ENV.creat_sensor(Power=x1[Time], sensor_num=SENSOR_NUM)  #通过

action_input = DQN1.getAction_1(action_num=ACTION_NUM, stateInput=stateInput)
print(action_input)

# reward = ENV.get_reward(stateInput= stateInput,actionInput= action_input)
# nextState = ENV.creat_sensor(Power= x1[time+1],sensor_num= SENSOR_NUM)
#
# #将stateinput、actioninput、reward、nextState放入replaymemory中
# LOSS.append(DQN1.get_loss(currentState= stateInput,nextState= nextState,action=action_input,reward= reward))
# time = time + 1
# record = record -1

Example #4

Show file

File: main.py Project: ziping0509X/SENSOR

LOSS = []

SENSOR_NUM = 10
ACTION_NUM = 26
record = 95000
time = 0

#构造状态生成函数A*sin(omega*t)
N = 100000
t = np.linspace(0, 100000, num=N)  #步进为1，共采样10000个数据
omega = math.pi / 25
A = 10
x1 = A * np.sin(omega * t)

DQN = BrainDQN(SENSOR_NUM, ACTION_NUM)
ENV = ENVIRNOMENT(ACTION_NUM)
R = []
R_total = 0
while not record == 0:
    if time > 90000:
        break
    state = x1[time]
    stateInput = ENV.creat_sensor(Power=x1[time], sensor_num=SENSOR_NUM)
    action_input = DQN.getAction(action_num=ACTION_NUM, stateInput=stateInput)
    reward = ENV.get_reward(stateInput=stateInput, actionInput=action_input)
    #print(reward)
    R_total += reward
    R.append(R_total)
    nextState = ENV.creat_sensor(Power=x1[time + 1], sensor_num=SENSOR_NUM)

Example #5

Show file

File: main.py Project: ziping0509X/SensorModel

t2 = np.linspace(0,8,num= 9)
omega = math.pi/9
A = 10
sig = A * np.sin(omega*t1)

#get actionChoice matrix
def actionChoice(sig):
    actionChoice = []
    for i in range(5):
        actionChoice.append(sig[i])
    for i in range(10,14):
        actionChoice.append(sig[i])
    return actionChoice
actionChoice = actionChoice(sig)

Qnetwork = BrainDQN(SENSOR_NUM,ACTION_NUM)
Env = ENVIRONMENT(SensorNum= SENSOR_NUM,ActionNum=ACTION_NUM,ActionChoice=actionChoice)

Reward = []
R_total = 0
Loss = []
ActionShow = []

while Time < N-1:

    if Time < 2000: #使用先验经验训练3个周期
        stateInput = Env.creatSensor(Power=sig[Time])
        print("iterations:%d" % Time)
        actionIput = Qnetwork.getAction_1(actionNum=ACTION_NUM, stateInput=stateInput,Time= Time)
        reward,actionshow = Env.getReward(stateInput=stateInput, actionInput=actionIput)
        ActionShow.append(actionshow)

Example #6

Show file

SU_powers = [round(i / 10.0, 2) for i in range(1, 15)]
actions = len(SU_powers)

PU_num = 2
# rd.randint(1, 10)
SU_num = 5
# rd.randint(1, 10)

Loss = []
Success = []
Fre = []

noise = 3
policy = 1  # choose power change policy for PU, it should be 1(Multi-step) or 2(Single step)

brain = BrainDQN(actions, PU_num, SU_num)
com = GameState(PU_powers, SU_powers, noise, PU_num, SU_num)
terminal = True
recording = 1000

while (recording > 0):
    # initialization
    if (terminal == True):
        com.ini()
        observation0, reward0, terminal = com.frame_step(
            np.zeros(actions), policy, False)
        brain.setInitState(observation0)
    # train
    action, recording = brain.getAction()
    nextObservation, reward, terminal = com.frame_step(action, policy, True)
    loss = brain.setPerception(nextObservation, action, reward)