예제 #1
0
        while True:
            # fresh env
            env.render()

            # RL choose action based on observation
            action = RL.choose_action(str(observation))

            # RL take action and get next observation and reward
            observation_, reward, done = env.step(action)

            # RL learn from this transition
            RL.learn(str(observation), action, reward, str(observation_))

            # swap observation
            observation = observation_

            # break while loop when end of this episode
            if done:
                break

    # end of game
    print('game over')
    env.destroy()


if __name__ == "__main__":
    env = Maze()
    RL = QLearningTable(actions=list(range(env.n_actions)))
    env.after(100, update)
    env.mainloop()
            # RL choose action based on observation
            action = RL.choose_action(str(observation))

            # RL take action and get next observation and reward
            observation_, reward, done = env.step(action)

            # RL learn from this transition
            RL.learn(str(observation), action, reward, str(observation_))

            # swap observation
            observation = observation_

            # break while loop when end of this episode
            if done:
                break

    # end of game
    print('game over')
    env.destroy()


if __name__ == "__main__":
    env = Maze()
    # import numpy as np
    # print(np.arange(4))
    # RL = QLearningTable(actions=list(np.arange(4)))
    RL = QLearningTable(actions=list(range(env.n_actions)))

    env.after(100, update)
    env.mainloop()
예제 #3
0
    experiments = []

    # # alg0 (Aynsc)
    # env0 = Maze(agentXY,goalXY,wall_shape, pits)
    # RL0 = rlalg0(actions=list(range(env0.n_actions)))
    # data0={}
    # env0.after(10, update(env0, RL0, data0, episodes))
    # env0.mainloop()
    # experiments = [(env0,RL0, data0)]

    # alg2 (SARSA)
    env2 = Maze(agentXY, goalXY, wall_shape, pits)
    RL2 = rlalg2(actions=list(range(env2.n_actions)))
    data2 = {}
    env2.after(10, update(env2, RL2, data2, episodes))
    env2.mainloop()
    experiments.append((env2, RL2, data2))

    # alg1 (Q-Learning)
    env1 = Maze(agentXY, goalXY, wall_shape, pits)
    RL1 = rlalg1(actions=list(range(env1.n_actions)))
    data1 = {}
    env1.after(10, update(env1, RL1, data1, episodes))
    env1.mainloop()
    experiments.append((env1, RL1, data1))

    # alg4 (Expected Sarsa)
    env4 = Maze(agentXY, goalXY, wall_shape, pits)
    RL4 = rlalg4(actions=list(range(env4.n_actions)))
    data4 = {}
    env4.after(10, update(env4, RL4, data4, episodes))
            # break while loop when end of this episode
            if done:
                break
            step += 1

    # end of game
    print('game over')
    env.destroy()


if __name__ == "__main__":
    # maze game
    env = Maze()
    # RL方法选择DQN
    RL = DeepQNetwork(env.n_actions, env.n_features,
                      learning_rate=0.01,    # 学习效率设为0.01()
                      reward_decay=0.9,      # 预计回报衰减
                      e_greedy=0.9,          # 选择最大Q值对应的动作的概率
                      replace_target_iter=200,   # 每隔200步替换一次target_net的参数
                      memory_size=2000,     # 记忆上限
                      output_graph=True,   # 输出神经网络训练模型
                      restore_network=False,
                      save_network=False
                      )
    RL.restore_net()
    env.after(100, run_maze)  # after语句可以实现定时器循环
    env.mainloop()  # mainloop就进入到事件(消息)循环
    save_path = RL.save_net()
    RL.plot_cost()  # 观看神经网络的误差曲线
예제 #5
0
            # reinforcement learning
            if (step > 5) and (step % 5 == 0):
                RL.learn()

            # take action
            observation = observation_

            if done: break
            step += 1

    print('game over')
    env.destroy()


if __name__ == "__main__":
    # maze game
    env = Maze()

    RL = DeepQNetwork(env.n_actions,
                      env.n_features,
                      learning_rate=0.01,
                      reward_decay=0.9,
                      e_greedy=0.9,
                      replace_target_iter=200,
                      memory_size=2000)
    # output_graph=True)

    env.after(100, run_maze)  # wait for initialize
    env.mainloop()  # start environment
    RL.plot_cost()
예제 #6
0
                               [3, 6], [4, 6], [5, 6]])
        pits = []

    if Task == "T3":
        #Task 3
        wall_shape = np.array([[7, 4], [7, 3], [6, 3], [6, 2], [5, 2], [4, 2],
                               [3, 2], [3, 3], [3, 4], [3, 5], [3, 6], [4, 6],
                               [5, 6]])
        pits = np.array([[1, 3], [0, 5], [7, 7]])

    # sarsa
    env1 = Maze(agentXY, goalXY, wall_shape, pits)
    RL1 = rlalg1(actions=list(range(env1.n_actions)))
    data1 = {}
    env1.after(10, update(env1, RL1, data1, episodes))
    env1.mainloop()
    experiments = [(env1, RL1, data1)]

    # Q-learning
    env2 = Maze(agentXY, goalXY, wall_shape, pits)
    RL2 = rlalg2(actions=list(range(env2.n_actions)))
    data2 = {}
    env2.after(10, update(env2, RL2, data2, episodes))
    env2.mainloop()
    experiments.append((env2, RL2, data2))

    # Expected Sarsa
    env3 = Maze(agentXY, goalXY, wall_shape, pits)
    RL3 = rlalg3(actions=list(range(env3.n_actions)))
    data3 = {}
    env3.after(10, update(env3, RL3, data3, episodes))