def main(): agent = QLearningAgent() if os.path.isfile(FILE): with open(FILE, 'r') as f: agent.Q = pickle.load(f) for i in range(NUM_GAMES): print(i) agent.train() with open(FILE, 'w') as f: agent.save(f)
# time.sleep(0.5) env.render() if done: break return total_reward # 使用gym创建迷宫环境,设置is_slippery为False降低环境难度 env = gym.make("FrozenLake-v0", is_slippery=False) # 0 left, 1 down, 2 right, 3 up # 创建一个agent实例,输入超参数 agent = QLearningAgent( obs_n=env.observation_space.n, act_n=env.action_space.n, learning_rate=0.1, gamma=0.9, e_greed=0.1) # 训练500个episode,打印每个episode的分数 for episode in range(500): ep_reward, ep_steps = run_episode(env, agent, True) print('Episode %s: steps = %s , reward = %.1f' % (episode, ep_steps, ep_reward)) # 全部训练结束,查看算法效果 test_reward = test_episode(env, agent) # 保存Q table if(test_reward == 1): agent.save() print('test reward = %.1f' % (test_reward))