コード例 #1
0
ファイル: task0_train.py プロジェクト: wzhcoder/easy-rl
def env_agent_config(cfg, seed=1):
    env = RacetrackEnv()
    action_dim = 9
    agent = FisrtVisitMC(action_dim, cfg)
    return env, agent
コード例 #2
0
            ep_reward += reward
            one_ep_transition.append((state, action, reward))
            state = next_state
            if done:
                break
        rewards.append(ep_reward)
        if ma_rewards:
            ma_rewards.append(ma_rewards[-1] * 0.9 + ep_reward * 0.1)
        else:
            ma_rewards.append(ep_reward)
        agent.update(one_ep_transition)
        if (i_episode + 1) % 10 == 0:
            print("Episode:{}/{}: Reward:{}".format(i_episode + 1,
                                                    mc_cfg.n_episodes,
                                                    ep_reward))
    return rewards, ma_rewards


if __name__ == "__main__":
    mc_cfg = MCConfig()
    env = RacetrackEnv()
    n_actions = 9
    agent = FisrtVisitMC(n_actions, mc_cfg)
    rewards, ma_rewards = mc_train(mc_cfg, env, agent)
    save_results(rewards, ma_rewards, tag='train', path=RESULT_PATH)
    plot_rewards(rewards,
                 ma_rewards,
                 tag="train",
                 algo="On-Policy First-Visit MC Control",
                 path=RESULT_PATH)
コード例 #3
0
ファイル: task0_train.py プロジェクト: wzhcoder/easy-rl
def env_agent_config(cfg, seed=1):
    env = RacetrackEnv()
    action_dim = 9
    agent = Sarsa(action_dim, cfg)
    return env, agent