def playWeChatJump(): actions = 10 brain = BrainDQN(actions) #action采用one-hot编码 observation0 = game.init_state() brain.setInitState(observation0) while 1 != 0: action = brain.getAction() nextObservation, reward, terminal = game.frame_step(action) nextObservation = np.reshape(nextObservation, (80, 80, 1)) brain.setPerception(nextObservation, action, reward, terminal)
import numpy as np import matplotlib.pyplot as plt P_1 = [round(0.1 * i / 2.0, 2) for i in range(1, 9)] P_2 = [round(0.1 * i / 2.0, 2) for i in range(1, 9)] actions = len(P_2) Loss = [] Success = [] Fre = [] noise = 3 num_sensor = 10 # N policy = 2 # choose power change policy for PU, it should be 1(Multi-step) or 2(Single step) brain = BrainDQN(actions, num_sensor) com = GameState(P_1, P_2, noise, num_sensor) terminal = True recording = 100000 while (recording > 0): # initialization if (terminal == True): com.ini() observation0, reward0, terminal = com.frame_step( np.zeros(actions), policy, False) brain.setInitState(observation0) # train action, recording = brain.getAction() nextObservation, reward, terminal = com.frame_step(action, policy, True)
LOSS = [] SENSOR_NUM = 10 ACTION_NUM = 26 record = 95000 Time = 10 #构造状态生成函数A*sin(omega*t) N = 10000 t = np.linspace(0, 10000, num=N) #步进为1,共采样10000个数据 omega = math.pi / 25 A = 10 x1 = A * np.sin(omega * t) DQN1 = BrainDQN(SENSOR_NUM, ACTION_NUM) ENV = ENVIRNOMENT(ACTION_NUM) state = x1[Time] stateInput = ENV.creat_sensor(Power=x1[Time], sensor_num=SENSOR_NUM) #通过 action_input = DQN1.getAction_1(action_num=ACTION_NUM, stateInput=stateInput) print(action_input) # reward = ENV.get_reward(stateInput= stateInput,actionInput= action_input) # nextState = ENV.creat_sensor(Power= x1[time+1],sensor_num= SENSOR_NUM) # # #将stateinput、actioninput、reward、nextState放入replaymemory中 # LOSS.append(DQN1.get_loss(currentState= stateInput,nextState= nextState,action=action_input,reward= reward)) # time = time + 1 # record = record -1
LOSS = [] SENSOR_NUM = 10 ACTION_NUM = 26 record = 95000 time = 0 #构造状态生成函数A*sin(omega*t) N = 100000 t = np.linspace(0, 100000, num=N) #步进为1,共采样10000个数据 omega = math.pi / 25 A = 10 x1 = A * np.sin(omega * t) DQN = BrainDQN(SENSOR_NUM, ACTION_NUM) ENV = ENVIRNOMENT(ACTION_NUM) R = [] R_total = 0 while not record == 0: if time > 90000: break state = x1[time] stateInput = ENV.creat_sensor(Power=x1[time], sensor_num=SENSOR_NUM) action_input = DQN.getAction(action_num=ACTION_NUM, stateInput=stateInput) reward = ENV.get_reward(stateInput=stateInput, actionInput=action_input) #print(reward) R_total += reward R.append(R_total) nextState = ENV.creat_sensor(Power=x1[time + 1], sensor_num=SENSOR_NUM)
t2 = np.linspace(0,8,num= 9) omega = math.pi/9 A = 10 sig = A * np.sin(omega*t1) #get actionChoice matrix def actionChoice(sig): actionChoice = [] for i in range(5): actionChoice.append(sig[i]) for i in range(10,14): actionChoice.append(sig[i]) return actionChoice actionChoice = actionChoice(sig) Qnetwork = BrainDQN(SENSOR_NUM,ACTION_NUM) Env = ENVIRONMENT(SensorNum= SENSOR_NUM,ActionNum=ACTION_NUM,ActionChoice=actionChoice) Reward = [] R_total = 0 Loss = [] ActionShow = [] while Time < N-1: if Time < 2000: #使用先验经验训练3个周期 stateInput = Env.creatSensor(Power=sig[Time]) print("iterations:%d" % Time) actionIput = Qnetwork.getAction_1(actionNum=ACTION_NUM, stateInput=stateInput,Time= Time) reward,actionshow = Env.getReward(stateInput=stateInput, actionInput=actionIput) ActionShow.append(actionshow)
SU_powers = [round(i / 10.0, 2) for i in range(1, 15)] actions = len(SU_powers) PU_num = 2 # rd.randint(1, 10) SU_num = 5 # rd.randint(1, 10) Loss = [] Success = [] Fre = [] noise = 3 policy = 1 # choose power change policy for PU, it should be 1(Multi-step) or 2(Single step) brain = BrainDQN(actions, PU_num, SU_num) com = GameState(PU_powers, SU_powers, noise, PU_num, SU_num) terminal = True recording = 1000 while (recording > 0): # initialization if (terminal == True): com.ini() observation0, reward0, terminal = com.frame_step( np.zeros(actions), policy, False) brain.setInitState(observation0) # train action, recording = brain.getAction() nextObservation, reward, terminal = com.frame_step(action, policy, True) loss = brain.setPerception(nextObservation, action, reward)