Ejemplo n.º 1
0
def playWeChatJump():
    actions = 10
    brain = BrainDQN(actions)  #action采用one-hot编码
    observation0 = game.init_state()
    brain.setInitState(observation0)
    while 1 != 0:
        action = brain.getAction()
        nextObservation, reward, terminal = game.frame_step(action)
        nextObservation = np.reshape(nextObservation, (80, 80, 1))
        brain.setPerception(nextObservation, action, reward, terminal)
Ejemplo n.º 2
0
brain = BrainDQN(actions, num_sensor)
com = GameState(P_1, P_2, noise, num_sensor)
terminal = True
recording = 100000

while (recording > 0):
    # initialization
    if (terminal == True):
        com.ini()
        observation0, reward0, terminal = com.frame_step(
            np.zeros(actions), policy, False)
        brain.setInitState(observation0)

    # train
    action, recording = brain.getAction()
    nextObservation, reward, terminal = com.frame_step(action, policy, True)
    loss = brain.setPerception(nextObservation, action, reward)

    # test
    if (recording + 1) % 500 == 0:

        Loss.append(loss)
        print "iteration : %d , loss : %f ." % (100000 - recording, loss)

        success = 0.0
        fre = 0
        num = 1000.0
        for ind in range(1000):
            T = 0
            com.ini_test()
Ejemplo n.º 3
0
        print("iterations:%d" % Time)
        actionIput = Qnetwork.getAction_1(actionNum=ACTION_NUM, stateInput=stateInput,Time= Time)
        reward,actionshow = Env.getReward(stateInput=stateInput, actionInput=actionIput)
        ActionShow.append(actionshow)
        nextState = Env.creatSensor(Power=sig[Time + 1])
        loss = Qnetwork.getLoss(currentState=stateInput, nextState=nextState, action=actionIput, reward=reward)
        Time = Time + 1
        R_total += reward
        Reward.append(R_total)

    else:

        #get satate\action\reward\nextstate
        stateInput = Env.creatSensor(Power=sig[Time])
        print("iterations:%d" %Time)
        actionIput = Qnetwork.getAction(actionNum= ACTION_NUM,stateInput= stateInput)
        reward,actionshow = Env.getReward(stateInput= stateInput,actionInput= actionIput)
        ActionShow.append(actionshow)
        nextState = Env.creatSensor(Power= sig[Time+1])

        #get loss and train Qnetwork
        loss = Qnetwork.getLoss(currentState= stateInput,nextState= nextState,action=actionIput,reward= reward)

        R_total += reward
        Reward.append(R_total)

        if not loss == 0:
            Loss.append(loss)

        Time = Time + 1