Ejemplo n.º 1
0
def playWeChatJump():
    actions = 10
    brain = BrainDQN(actions)  #action采用one-hot编码
    observation0 = game.init_state()
    brain.setInitState(observation0)
    while 1 != 0:
        action = brain.getAction()
        nextObservation, reward, terminal = game.frame_step(action)
        nextObservation = np.reshape(nextObservation, (80, 80, 1))
        brain.setPerception(nextObservation, action, reward, terminal)
Ejemplo n.º 2
0
noise = 3
num_sensor = 10  # N
policy = 2  # choose power change policy for PU, it should be 1(Multi-step) or 2(Single step)

brain = BrainDQN(actions, num_sensor)
com = GameState(P_1, P_2, noise, num_sensor)
terminal = True
recording = 100000

while (recording > 0):
    # initialization
    if (terminal == True):
        com.ini()
        observation0, reward0, terminal = com.frame_step(
            np.zeros(actions), policy, False)
        brain.setInitState(observation0)

    # train
    action, recording = brain.getAction()
    nextObservation, reward, terminal = com.frame_step(action, policy, True)
    loss = brain.setPerception(nextObservation, action, reward)

    # test
    if (recording + 1) % 500 == 0:

        Loss.append(loss)
        print "iteration : %d , loss : %f ." % (100000 - recording, loss)

        success = 0.0
        fre = 0
        num = 1000.0