if __name__ == "__main__": env = Maze() RL = DuelingDQN( env.n_actions, env.n_features, #observation/state 的属性,如长宽高 learning_rate=0.01, reward_decay=0.9, e_greedy=0.9, dueling=True, replace_target_iter=200, # 每 200 步替换一次 target_net 的参数 memory_size=2000, # 记忆上限 # output_graph=True # 是否输出 tensorboard 文件 ) env.after(100, run_maze) #进行强化学习训练 env.mainloop() # 观看训练时间曲线 his_dueling = np.vstack((episodes, steps)) file = open('his_dueling', 'wb') pickle.dump(his_dueling, file) file.close() plt.plot(his_dueling[0, :], his_dueling[1, :] - his_dueling[1, 0], c='b', label='Dueling DQN') plt.legend(loc='best') # legend图例,其中’loc’参数有多种,’best’表示自动分配最佳位置 plt.ylabel('total training time') plt.xlabel('episode') plt.grid() # 显示网格线 1=True=默认显示;0=False=不显示
env.render() # RL take action and get next observation and reward observation_, reward, done = env.step(action) # RL choose action based on next observation action_ = RL.choose_action(str(observation_)) # RL learn from this transition (s, a, r, s, a) ==> Sarsa RL.learn(str(observation), action, reward, str(observation_), action_) # swap observation and action observation = observation_ action = action_ # break while loop when end of this episode if done: break # end of game print('game over') env.destroy() if __name__ == "__main__": env = Maze() RL = SarsaTable(actions=list(range(env.n_actions))) env.after(100, update) env.mainloop()