예제 #1
0
def test_agent_stats_partial_fit_and_tuple_env():
    # Define train and evaluation envs
    train_env = (GridWorld, None
                 )  # tuple (constructor, kwargs) must also work in AgentStats

    # Parameters
    params = {"n_episodes": 500}
    horizon = 20

    # Run AgentStats
    stats = AgentStats(DummyAgent,
                       train_env,
                       init_kwargs=params,
                       n_fit=4,
                       eval_horizon=10)
    stats2 = AgentStats(DummyAgent,
                        train_env,
                        init_kwargs=params,
                        n_fit=4,
                        eval_horizon=10)
    # set some writers
    stats.set_writer(0, None)
    stats.set_writer(3, None)

    # Run partial fit
    stats.partial_fit(0.1)
    stats.partial_fit(0.5)
    for agent in stats.fitted_agents:
        assert agent.fraction_fitted == 0.6
    for _ in range(2):
        stats.partial_fit(0.5)
        for agent in stats.fitted_agents:
            assert agent.fraction_fitted == 1.0

    # Run fit
    stats2.fit()

    # learning curves
    plot_episode_rewards([stats], cumulative=True, show=False)

    # compare final policies
    compare_policies([stats], eval_horizon=horizon, n_sim=10, show=False)
예제 #2
0
def test_agent_stats_seeding():
    sd.set_global_seed(3456)
    for env in [MountainCar(), (gym_make, {'env_name': 'MountainCar-v0'})]:
        agent_stats = AgentStats(RSUCBVIAgent,
                                 env,
                                 init_kwargs={
                                     'n_episodes': 2,
                                     'horizon': 10
                                 },
                                 n_fit=6)
        agent_stats.fit()

        for ii in range(2, agent_stats.n_fit):
            traj1 = get_env_trajectory(agent_stats.fitted_agents[ii - 2].env,
                                       horizon=10)
            traj2 = get_env_trajectory(agent_stats.fitted_agents[ii - 1].env,
                                       horizon=10)
            traj3 = get_env_trajectory(agent_stats.fitted_agents[ii].env,
                                       horizon=10)
            assert not compare_trajectories(traj1, traj2)
            assert not compare_trajectories(traj1, traj3)
            assert not compare_trajectories(traj2, traj3)
예제 #3
0
    n_trials=10,
    timeout=None,
    n_sim=5,
    n_fit=2,
    n_jobs=2,
    sampler_method='optuna_default')

initial_n_trials = len(ppo_stats.study.trials)

# save
ppo_stats.save('ppo_stats_backup')
del ppo_stats

# load
ppo_stats = AgentStats.load('ppo_stats_backup')

# continue previous optimization, now with 5s of timeout
best_trial, data = ppo_stats.optimize_hyperparams(n_trials=10,
                                                  timeout=5,
                                                  n_sim=5,
                                                  n_fit=2,
                                                  n_jobs=2,
                                                  continue_previous=True)

print("number of initial trials = ", initial_n_trials)
print("number of trials after continuing= ", len(ppo_stats.study.trials))

print("----")
print("fitting agents after choosing hyperparams...")
ppo_stats.fit()  # fit the 4 agents