def test_agent_stats_partial_fit_and_tuple_env(): # Define train and evaluation envs train_env = (GridWorld, None ) # tuple (constructor, kwargs) must also work in AgentStats # Parameters params = {"n_episodes": 500} horizon = 20 # Run AgentStats stats = AgentStats(DummyAgent, train_env, init_kwargs=params, n_fit=4, eval_horizon=10) stats2 = AgentStats(DummyAgent, train_env, init_kwargs=params, n_fit=4, eval_horizon=10) # set some writers stats.set_writer(0, None) stats.set_writer(3, None) # Run partial fit stats.partial_fit(0.1) stats.partial_fit(0.5) for agent in stats.fitted_agents: assert agent.fraction_fitted == 0.6 for _ in range(2): stats.partial_fit(0.5) for agent in stats.fitted_agents: assert agent.fraction_fitted == 1.0 # Run fit stats2.fit() # learning curves plot_episode_rewards([stats], cumulative=True, show=False) # compare final policies compare_policies([stats], eval_horizon=horizon, n_sim=10, show=False)
def test_agent_stats_seeding(): sd.set_global_seed(3456) for env in [MountainCar(), (gym_make, {'env_name': 'MountainCar-v0'})]: agent_stats = AgentStats(RSUCBVIAgent, env, init_kwargs={ 'n_episodes': 2, 'horizon': 10 }, n_fit=6) agent_stats.fit() for ii in range(2, agent_stats.n_fit): traj1 = get_env_trajectory(agent_stats.fitted_agents[ii - 2].env, horizon=10) traj2 = get_env_trajectory(agent_stats.fitted_agents[ii - 1].env, horizon=10) traj3 = get_env_trajectory(agent_stats.fitted_agents[ii].env, horizon=10) assert not compare_trajectories(traj1, traj2) assert not compare_trajectories(traj1, traj3) assert not compare_trajectories(traj2, traj3)
n_trials=10, timeout=None, n_sim=5, n_fit=2, n_jobs=2, sampler_method='optuna_default') initial_n_trials = len(ppo_stats.study.trials) # save ppo_stats.save('ppo_stats_backup') del ppo_stats # load ppo_stats = AgentStats.load('ppo_stats_backup') # continue previous optimization, now with 5s of timeout best_trial, data = ppo_stats.optimize_hyperparams(n_trials=10, timeout=5, n_sim=5, n_fit=2, n_jobs=2, continue_previous=True) print("number of initial trials = ", initial_n_trials) print("number of trials after continuing= ", len(ppo_stats.study.trials)) print("----") print("fitting agents after choosing hyperparams...") ppo_stats.fit() # fit the 4 agents