def update(): for episode in range(100): # initial observation observation = env.reset() while True: env.render() # RL choose action based on observation action = RL.choose_action(str(observation)) # RL take action and get next observation and reward observation_, reward, done = env.step(action) # RL learn from this transition RL.learn(str(observation), action, reward, str(observation_)) # swap observation observation = observation_ if done: break # end of navigation print('Reinforcement Learning done successful') env.destroy() if __name__ == "__main__": env = Maze() RL = QLearningTable(actions=list(range(env.n_actions))) env.after(100, update) env.mainloop()
step += 1 # end of game print('game over') env.destroy() if __name__ == "__main__": # maze game env = Maze() # RL = DeepQNetwork(env.n_actions, env.n_features, # learning_rate=0.01, # reward_decay=0.9, # e_greedy=0.9, # replace_target_iter=200, # memory_size=2000 # ) # param tuning by hand, best version for now RL = DeepQNetwork(env.n_actions, env.n_features, learning_rate=0.005, reward_decay=0.8, e_greedy=0.8, replace_target_iter=200, memory_size=2000) env.after(100, run_maze) env.mainloop() RL.plot_cost() exit()
# ============================================================================= # if (episode+1) >= 3: # action = RL.choose_ma_action(str(observation)) # else: # action = RL.choose_action(str(observation)) # ============================================================================= #action = RL.choose_ma_action(str(observation)) action = RL.choose_entropy_action(str(observation)) #action = RL.choose_action(str(observation)) observation_, reward, done = env.step(action) total_reward_value = total_reward_value + reward RL.learn(str(observation), action, reward, str(observation_)) observation = observation_ if done: break print('{} episode over'.format(episode + 1)) print('average reward {}'.format(total_reward_value / (episode + 1))) average_reward.append(total_reward_value / (episode + 1)) #print(RL.q_table) #RL.q_table.to_csv('3-2.csv',header=True,index=True) print(average_reward) env.destroy() #RL.q_table.to_clipboard() if __name__ == '__main__': env = Maze() RL = QLearningTable(actions=list(range(env.n_actions))) env.after(2000, update) env.mainloop()