observation = env.reset()
    ep_r = 0
    while True:
        env.render()

        action = RL.choose_action(observation)

        observation_, reward, done, info = env.step(action)

        position, velocity = observation_

        reward = abs(position - (-0.5))  # r in [0, 1]

        RL.store_transition(observation, action, reward, observation_)

        ep_r += reward
        if total_steps > 1000:
            RL.learn()

        if done:
            get = '| Get' if observation_[
                0] >= env.unwrapped.goal_position else '| ----'
            print('Epi: ', i_episode, get, '| Ep_r: ', round(ep_r, 4),
                  '| Epsilon: ', round(RL.epsilon, 2))
            break

        observation = observation_
        total_steps += 1

RL.plot_cost()
Beispiel #2
0
            # break while loop when end of this episode
            if done:
                print('game over')
                print(env.balance)
                break
            step += 1

    # end of game

    #env.destroy()


if __name__ == "__main__":
    # maze game
    env = FX()
    DQN = DQN(
        env.n_actions,
        env.n_features,
        learning_rate=0.01,
        reward_decay=0.9,
        e_greedy=0.9,
        replace_target_iter=200,
        memory_size=2000,
        # output_graph=True
    )
    #     env.after(100, run_maze)
    run_maze()
    #     env.mainloop()
    DQN.plot_cost()