while True: # fresh env env.render() # RL choose action based on observation action = RL.choose_action(str(observation)) # RL take action and get next observation and reward observation_, reward, done = env.step(action) # RL learn from this transition RL.learn(str(observation), action, reward, str(observation_)) # swap observation observation = observation_ # break while loop when end of this episode if done: break # end of game print('game over') env.destroy() if __name__ == "__main__": env = Maze() RL = QLearningTable(actions=list(range(env.n_actions))) env.after(100, update) env.mainloop()
# RL choose action based on observation action = RL.choose_action(str(observation)) # RL take action and get next observation and reward observation_, reward, done = env.step(action) # RL learn from this transition RL.learn(str(observation), action, reward, str(observation_)) # swap observation observation = observation_ # break while loop when end of this episode if done: break # end of game print('game over') env.destroy() if __name__ == "__main__": env = Maze() # import numpy as np # print(np.arange(4)) # RL = QLearningTable(actions=list(np.arange(4))) RL = QLearningTable(actions=list(range(env.n_actions))) env.after(100, update) env.mainloop()
experiments = [] # # alg0 (Aynsc) # env0 = Maze(agentXY,goalXY,wall_shape, pits) # RL0 = rlalg0(actions=list(range(env0.n_actions))) # data0={} # env0.after(10, update(env0, RL0, data0, episodes)) # env0.mainloop() # experiments = [(env0,RL0, data0)] # alg2 (SARSA) env2 = Maze(agentXY, goalXY, wall_shape, pits) RL2 = rlalg2(actions=list(range(env2.n_actions))) data2 = {} env2.after(10, update(env2, RL2, data2, episodes)) env2.mainloop() experiments.append((env2, RL2, data2)) # alg1 (Q-Learning) env1 = Maze(agentXY, goalXY, wall_shape, pits) RL1 = rlalg1(actions=list(range(env1.n_actions))) data1 = {} env1.after(10, update(env1, RL1, data1, episodes)) env1.mainloop() experiments.append((env1, RL1, data1)) # alg4 (Expected Sarsa) env4 = Maze(agentXY, goalXY, wall_shape, pits) RL4 = rlalg4(actions=list(range(env4.n_actions))) data4 = {} env4.after(10, update(env4, RL4, data4, episodes))
# break while loop when end of this episode if done: break step += 1 # end of game print('game over') env.destroy() if __name__ == "__main__": # maze game env = Maze() # RL方法选择DQN RL = DeepQNetwork(env.n_actions, env.n_features, learning_rate=0.01, # 学习效率设为0.01() reward_decay=0.9, # 预计回报衰减 e_greedy=0.9, # 选择最大Q值对应的动作的概率 replace_target_iter=200, # 每隔200步替换一次target_net的参数 memory_size=2000, # 记忆上限 output_graph=True, # 输出神经网络训练模型 restore_network=False, save_network=False ) RL.restore_net() env.after(100, run_maze) # after语句可以实现定时器循环 env.mainloop() # mainloop就进入到事件(消息)循环 save_path = RL.save_net() RL.plot_cost() # 观看神经网络的误差曲线
# reinforcement learning if (step > 5) and (step % 5 == 0): RL.learn() # take action observation = observation_ if done: break step += 1 print('game over') env.destroy() if __name__ == "__main__": # maze game env = Maze() RL = DeepQNetwork(env.n_actions, env.n_features, learning_rate=0.01, reward_decay=0.9, e_greedy=0.9, replace_target_iter=200, memory_size=2000) # output_graph=True) env.after(100, run_maze) # wait for initialize env.mainloop() # start environment RL.plot_cost()
[3, 6], [4, 6], [5, 6]]) pits = [] if Task == "T3": #Task 3 wall_shape = np.array([[7, 4], [7, 3], [6, 3], [6, 2], [5, 2], [4, 2], [3, 2], [3, 3], [3, 4], [3, 5], [3, 6], [4, 6], [5, 6]]) pits = np.array([[1, 3], [0, 5], [7, 7]]) # sarsa env1 = Maze(agentXY, goalXY, wall_shape, pits) RL1 = rlalg1(actions=list(range(env1.n_actions))) data1 = {} env1.after(10, update(env1, RL1, data1, episodes)) env1.mainloop() experiments = [(env1, RL1, data1)] # Q-learning env2 = Maze(agentXY, goalXY, wall_shape, pits) RL2 = rlalg2(actions=list(range(env2.n_actions))) data2 = {} env2.after(10, update(env2, RL2, data2, episodes)) env2.mainloop() experiments.append((env2, RL2, data2)) # Expected Sarsa env3 = Maze(agentXY, goalXY, wall_shape, pits) RL3 = rlalg3(actions=list(range(env3.n_actions))) data3 = {} env3.after(10, update(env3, RL3, data3, episodes))