def test_agent_stats_partial_fit_and_tuple_env(): # Define train and evaluation envs train_env = (GridWorld, None ) # tuple (constructor, kwargs) must also work in AgentStats # Parameters params = {"n_episodes": 500} horizon = 20 # Run AgentStats stats = AgentStats(DummyAgent, train_env, init_kwargs=params, n_fit=4, eval_horizon=10) stats2 = AgentStats(DummyAgent, train_env, init_kwargs=params, n_fit=4, eval_horizon=10) # set some writers stats.set_writer(0, None) stats.set_writer(3, None) # Run partial fit stats.partial_fit(0.1) stats.partial_fit(0.5) for agent in stats.fitted_agents: assert agent.fraction_fitted == 0.6 for _ in range(2): stats.partial_fit(0.5) for agent in stats.fitted_agents: assert agent.fraction_fitted == 1.0 # Run fit stats2.fit() # learning curves plot_episode_rewards([stats], cumulative=True, show=False) # compare final policies compare_policies([stats], eval_horizon=horizon, n_sim=10, show=False)
def test_agent_stats_partial_fit(): # Define train and evaluation envs train_env = GridWorld() eval_env = GridWorld() # Parameters params = {"n_episodes": 500} horizon = 20 # Check DummyAgent agent = DummyAgent(train_env, **params) agent.fit() agent.policy(None) # Run AgentStats stats = AgentStats(DummyAgent, train_env, init_kwargs=params, n_fit=4, eval_horizon=10) # Run partial fit stats.partial_fit(0.1) stats.partial_fit(0.5) for agent in stats.fitted_agents: assert agent.fraction_fitted == 0.6 for _ in range(2): stats.partial_fit(0.5) for agent in stats.fitted_agents: assert agent.fraction_fitted == 1.0 # learning curves plot_episode_rewards([stats], cumulative=True, show=False) # compare final policies compare_policies([stats], eval_env, eval_horizon=horizon, n_sim=10, show=False)
from rlberry.agents.ppo import PPOAgent from rlberry.envs.benchmarks.ball_exploration import PBall2D from rlberry.seeding import seeding from rlberry.stats import AgentStats, plot_episode_rewards, compare_policies seeding.set_global_seed(1223) env = PBall2D() n_episodes = 400 horizon = 100 ppo_params = {} ppo_params['n_episodes'] = 400 ppo_params['horizon'] = 100 ppo_params['gamma'] = 0.99 ppo_params['learning_rate'] = 0.001 ppo_params['eps_clip'] = 0.2 ppo_params['k_epochs'] = 4 ppo_stats = AgentStats(PPOAgent, env, eval_horizon=100, init_kwargs=ppo_params, n_fit=2) ppo_stats.partial_fit(0.3) plot_episode_rewards([ppo_stats], show=False, cumulative=True) compare_policies([ppo_stats], show=False) ppo_stats.partial_fit(0.2) plot_episode_rewards([ppo_stats], show=False, cumulative=True) compare_policies([ppo_stats], show=True)