episodes = args.episode time_steps = args.steps epsilon = args.epsilon render = args.render # --- run algorithm --- # td_lam.load_models() stats = td_lam.train(env, episodes, time_steps, epsilon) # td_lam.save_models() # --- visualize the results --- result_folder = Path(__file__).resolve().parent / 'results' viz = Visualizer(result_path=result_folder) viz.plot_episode_length(stats, plot_name='td_episode_length_{}'.format( args.exp_count)) viz.plot_reward(stats, plot_name='td_rewards_{}'.format(args.exp_count)) # --- animation --- if render: with contextlib.closing(ContinuousCartPoleEnv()) as env: for _ in range(2): s = env.reset() for _ in range(300): env.render() a = td_lam.get_action(s, epsilon=0.02) s, _, d, _ = env.step(a) if d: break
action_dim, gamma=0.99, hidden_dim=hidden_dim, policy_lr=policy_lr, baseline_lr=baseline_lr, d2c=d2c_converter) # --- run algorithm --- if load_flag: reinforce.load_models(model_name=exp_count) stats = reinforce.train(env=env, episodes=episodes, time_steps=timesteps) reinforce.save_models(model_name=exp_count) # --- visualize the results --- result_folder = Path(__file__).resolve().parent / 'results' viz = Visualizer(result_path=result_folder) viz.plot_episode_length(stats, plot_name=f'r_d_episodes_{exp_count}') viz.plot_reward(stats, plot_name=f'r_d_rewards_{exp_count}') # --- animation --- if render_flag: with contextlib.closing(ContinuousCartPoleEnv()) as env: for _ in range(2): s = env.reset() for _ in range(500): env.render() a, _ = reinforce.get_action(s) s, _, d, _ = env.step(a) if d: break
critic_lr=critic_lr, K_epochs=K_epochs, eps_clip=eps_clip, entropy_coeff=entropy_coeff, verbose=verbose_flag) if load_flag: ppo.load_models(model_name=exp_count) stats = ppo.train(env=env, episodes=episodes, timesteps=timesteps, update_timestep=update_timestep) ppo.save_models(model_name=exp_count) # --- visualize the results --- result_folder = Path(__file__).resolve().parent / 'results' viz = Visualizer(result_path=result_folder) viz.plot_episode_length(stats, plot_name=f'ppo_c_episodes_{exp_count}') viz.plot_reward(stats, plot_name=f'ppo_c_rewards_{exp_count}') # --- animation --- if render_flag: with contextlib.closing(ContinuousCartPoleEnv()) as env: for _ in range(3): s = env.reset() for _ in range(500): env.render() a = ppo.get_action(s) s, _, d, _ = env.step(a) if d: break