def playWeChatJump(): actions = 10 brain = BrainDQN(actions) #action采用one-hot编码 observation0 = game.init_state() brain.setInitState(observation0) while 1 != 0: action = brain.getAction() nextObservation, reward, terminal = game.frame_step(action) nextObservation = np.reshape(nextObservation, (80, 80, 1)) brain.setPerception(nextObservation, action, reward, terminal)
brain = BrainDQN(actions, num_sensor) com = GameState(P_1, P_2, noise, num_sensor) terminal = True recording = 100000 while (recording > 0): # initialization if (terminal == True): com.ini() observation0, reward0, terminal = com.frame_step( np.zeros(actions), policy, False) brain.setInitState(observation0) # train action, recording = brain.getAction() nextObservation, reward, terminal = com.frame_step(action, policy, True) loss = brain.setPerception(nextObservation, action, reward) # test if (recording + 1) % 500 == 0: Loss.append(loss) print "iteration : %d , loss : %f ." % (100000 - recording, loss) success = 0.0 fre = 0 num = 1000.0 for ind in range(1000): T = 0 com.ini_test()
print("iterations:%d" % Time) actionIput = Qnetwork.getAction_1(actionNum=ACTION_NUM, stateInput=stateInput,Time= Time) reward,actionshow = Env.getReward(stateInput=stateInput, actionInput=actionIput) ActionShow.append(actionshow) nextState = Env.creatSensor(Power=sig[Time + 1]) loss = Qnetwork.getLoss(currentState=stateInput, nextState=nextState, action=actionIput, reward=reward) Time = Time + 1 R_total += reward Reward.append(R_total) else: #get satate\action\reward\nextstate stateInput = Env.creatSensor(Power=sig[Time]) print("iterations:%d" %Time) actionIput = Qnetwork.getAction(actionNum= ACTION_NUM,stateInput= stateInput) reward,actionshow = Env.getReward(stateInput= stateInput,actionInput= actionIput) ActionShow.append(actionshow) nextState = Env.creatSensor(Power= sig[Time+1]) #get loss and train Qnetwork loss = Qnetwork.getLoss(currentState= stateInput,nextState= nextState,action=actionIput,reward= reward) R_total += reward Reward.append(R_total) if not loss == 0: Loss.append(loss) Time = Time + 1