Esempio n. 1
0
def main():
    agent = QLearningAgent()
    if os.path.isfile(FILE):
        with open(FILE, 'r') as f:
            agent.Q = pickle.load(f)
    for i in range(NUM_GAMES):
        print(i)
        agent.train()
    with open(FILE, 'w') as f:
        agent.save(f)
Esempio n. 2
0
        # time.sleep(0.5)
        env.render()
        if done:
            break
    return total_reward

# 使用gym创建迷宫环境,设置is_slippery为False降低环境难度
env = gym.make("FrozenLake-v0", is_slippery=False)  # 0 left, 1 down, 2 right, 3 up

# 创建一个agent实例,输入超参数
agent = QLearningAgent(
        obs_n=env.observation_space.n,
        act_n=env.action_space.n,
        learning_rate=0.1,
        gamma=0.9,
        e_greed=0.1)


# 训练500个episode,打印每个episode的分数
for episode in range(500):
    ep_reward, ep_steps = run_episode(env, agent, True)
    print('Episode %s: steps = %s , reward = %.1f' % (episode, ep_steps, ep_reward))

# 全部训练结束,查看算法效果
test_reward = test_episode(env, agent)

# 保存Q table
if(test_reward == 1):
    agent.save()

print('test reward = %.1f' % (test_reward))