def update():
    for episode in range(100):
        # initial observation
        observation = env.reset()

        while True:
            env.render()
            # RL choose action based on observation
            action = RL.choose_action(str(observation))

            # RL take action and get next observation and reward
            observation_, reward, done = env.step(action)

            # RL learn from this transition
            RL.learn(str(observation), action, reward, str(observation_))

            # swap observation
            observation = observation_
            if done:
                break

    # end of navigation
    print('Reinforcement Learning done successful')
    env.destroy()

if __name__ == "__main__":
    env = Maze()
    RL = QLearningTable(actions=list(range(env.n_actions)))

    env.after(100, update)
    env.mainloop()
Esempio n. 2
0
            step += 1

    # end of game
    print('game over')
    env.destroy()


if __name__ == "__main__":
    # maze game
    env = Maze()

    #    RL = DeepQNetwork(env.n_actions, env.n_features,
    #                      learning_rate=0.01,
    #                      reward_decay=0.9,
    #                      e_greedy=0.9,
    #                      replace_target_iter=200,
    #                      memory_size=2000
    #                      )

    # param tuning by hand, best version for now
    RL = DeepQNetwork(env.n_actions,
                      env.n_features,
                      learning_rate=0.005,
                      reward_decay=0.8,
                      e_greedy=0.8,
                      replace_target_iter=200,
                      memory_size=2000)
    env.after(100, run_maze)
    env.mainloop()
    RL.plot_cost()
    exit()
Esempio n. 3
0
            # =============================================================================
            #             if (episode+1) >= 3:
            #                 action = RL.choose_ma_action(str(observation))
            #             else:
            #                 action = RL.choose_action(str(observation))
            # =============================================================================
            #action = RL.choose_ma_action(str(observation))
            action = RL.choose_entropy_action(str(observation))
            #action = RL.choose_action(str(observation))
            observation_, reward, done = env.step(action)
            total_reward_value = total_reward_value + reward
            RL.learn(str(observation), action, reward, str(observation_))
            observation = observation_
            if done:
                break
        print('{} episode over'.format(episode + 1))
        print('average reward {}'.format(total_reward_value / (episode + 1)))
        average_reward.append(total_reward_value / (episode + 1))
    #print(RL.q_table)
    #RL.q_table.to_csv('3-2.csv',header=True,index=True)
    print(average_reward)
    env.destroy()
    #RL.q_table.to_clipboard()


if __name__ == '__main__':
    env = Maze()
    RL = QLearningTable(actions=list(range(env.n_actions)))
    env.after(2000, update)
    env.mainloop()