Example #1
0
def playWeChatJump():
    actions = 10
    brain = BrainDQN(actions)  #action采用one-hot编码
    observation0 = game.init_state()
    brain.setInitState(observation0)
    while 1 != 0:
        action = brain.getAction()
        nextObservation, reward, terminal = game.frame_step(action)
        nextObservation = np.reshape(nextObservation, (80, 80, 1))
        brain.setPerception(nextObservation, action, reward, terminal)
Example #2
0
import numpy as np
import matplotlib.pyplot as plt

P_1 = [round(0.1 * i / 2.0, 2) for i in range(1, 9)]
P_2 = [round(0.1 * i / 2.0, 2) for i in range(1, 9)]
actions = len(P_2)

Loss = []
Success = []
Fre = []

noise = 3
num_sensor = 10  # N
policy = 2  # choose power change policy for PU, it should be 1(Multi-step) or 2(Single step)

brain = BrainDQN(actions, num_sensor)
com = GameState(P_1, P_2, noise, num_sensor)
terminal = True
recording = 100000

while (recording > 0):
    # initialization
    if (terminal == True):
        com.ini()
        observation0, reward0, terminal = com.frame_step(
            np.zeros(actions), policy, False)
        brain.setInitState(observation0)

    # train
    action, recording = brain.getAction()
    nextObservation, reward, terminal = com.frame_step(action, policy, True)
Example #3
0
LOSS = []

SENSOR_NUM = 10
ACTION_NUM = 26
record = 95000
Time = 10

#构造状态生成函数A*sin(omega*t)
N = 10000
t = np.linspace(0, 10000, num=N)  #步进为1,共采样10000个数据
omega = math.pi / 25
A = 10
x1 = A * np.sin(omega * t)

DQN1 = BrainDQN(SENSOR_NUM, ACTION_NUM)
ENV = ENVIRNOMENT(ACTION_NUM)

state = x1[Time]
stateInput = ENV.creat_sensor(Power=x1[Time], sensor_num=SENSOR_NUM)  #通过

action_input = DQN1.getAction_1(action_num=ACTION_NUM, stateInput=stateInput)
print(action_input)

# reward = ENV.get_reward(stateInput= stateInput,actionInput= action_input)
# nextState = ENV.creat_sensor(Power= x1[time+1],sensor_num= SENSOR_NUM)
#
# #将stateinput、actioninput、reward、nextState放入replaymemory中
# LOSS.append(DQN1.get_loss(currentState= stateInput,nextState= nextState,action=action_input,reward= reward))
# time = time + 1
# record = record -1
Example #4
0
LOSS = []

SENSOR_NUM = 10
ACTION_NUM = 26
record = 95000
time = 0

#构造状态生成函数A*sin(omega*t)
N = 100000
t = np.linspace(0, 100000, num=N)  #步进为1,共采样10000个数据
omega = math.pi / 25
A = 10
x1 = A * np.sin(omega * t)

DQN = BrainDQN(SENSOR_NUM, ACTION_NUM)
ENV = ENVIRNOMENT(ACTION_NUM)
R = []
R_total = 0
while not record == 0:
    if time > 90000:
        break
    state = x1[time]
    stateInput = ENV.creat_sensor(Power=x1[time], sensor_num=SENSOR_NUM)
    action_input = DQN.getAction(action_num=ACTION_NUM, stateInput=stateInput)
    reward = ENV.get_reward(stateInput=stateInput, actionInput=action_input)
    #print(reward)
    R_total += reward
    R.append(R_total)
    nextState = ENV.creat_sensor(Power=x1[time + 1], sensor_num=SENSOR_NUM)
Example #5
0
t2 = np.linspace(0,8,num= 9)
omega = math.pi/9
A = 10
sig = A * np.sin(omega*t1)

#get actionChoice matrix
def actionChoice(sig):
    actionChoice = []
    for i in range(5):
        actionChoice.append(sig[i])
    for i in range(10,14):
        actionChoice.append(sig[i])
    return actionChoice
actionChoice = actionChoice(sig)

Qnetwork = BrainDQN(SENSOR_NUM,ACTION_NUM)
Env = ENVIRONMENT(SensorNum= SENSOR_NUM,ActionNum=ACTION_NUM,ActionChoice=actionChoice)

Reward = []
R_total = 0
Loss = []
ActionShow = []

while Time < N-1:

    if Time < 2000: #使用先验经验训练3个周期
        stateInput = Env.creatSensor(Power=sig[Time])
        print("iterations:%d" % Time)
        actionIput = Qnetwork.getAction_1(actionNum=ACTION_NUM, stateInput=stateInput,Time= Time)
        reward,actionshow = Env.getReward(stateInput=stateInput, actionInput=actionIput)
        ActionShow.append(actionshow)
Example #6
0
SU_powers = [round(i / 10.0, 2) for i in range(1, 15)]
actions = len(SU_powers)

PU_num = 2
# rd.randint(1, 10)
SU_num = 5
# rd.randint(1, 10)

Loss = []
Success = []
Fre = []

noise = 3
policy = 1  # choose power change policy for PU, it should be 1(Multi-step) or 2(Single step)

brain = BrainDQN(actions, PU_num, SU_num)
com = GameState(PU_powers, SU_powers, noise, PU_num, SU_num)
terminal = True
recording = 1000

while (recording > 0):
    # initialization
    if (terminal == True):
        com.ini()
        observation0, reward0, terminal = com.frame_step(
            np.zeros(actions), policy, False)
        brain.setInitState(observation0)
    # train
    action, recording = brain.getAction()
    nextObservation, reward, terminal = com.frame_step(action, policy, True)
    loss = brain.setPerception(nextObservation, action, reward)