def env_agent_config(cfg, seed=1): env = RacetrackEnv() action_dim = 9 agent = FisrtVisitMC(action_dim, cfg) return env, agent
ep_reward += reward one_ep_transition.append((state, action, reward)) state = next_state if done: break rewards.append(ep_reward) if ma_rewards: ma_rewards.append(ma_rewards[-1] * 0.9 + ep_reward * 0.1) else: ma_rewards.append(ep_reward) agent.update(one_ep_transition) if (i_episode + 1) % 10 == 0: print("Episode:{}/{}: Reward:{}".format(i_episode + 1, mc_cfg.n_episodes, ep_reward)) return rewards, ma_rewards if __name__ == "__main__": mc_cfg = MCConfig() env = RacetrackEnv() n_actions = 9 agent = FisrtVisitMC(n_actions, mc_cfg) rewards, ma_rewards = mc_train(mc_cfg, env, agent) save_results(rewards, ma_rewards, tag='train', path=RESULT_PATH) plot_rewards(rewards, ma_rewards, tag="train", algo="On-Policy First-Visit MC Control", path=RESULT_PATH)
def env_agent_config(cfg, seed=1): env = RacetrackEnv() action_dim = 9 agent = Sarsa(action_dim, cfg) return env, agent