Python AgentStats.fit Examples

Programming Language: Python

Namespace/Package Name: rlberry.stats

Class/Type: AgentStats

Method/Function: fit

Examples at hotexamples.com: 3

Python AgentStats.fit - 3 examples found. These are the top rated real world Python examples of rlberry.stats.AgentStats.fit extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

AgentStats(23)

load(5)

optimize_hyperparams(5)

save(4)

fit(3)

partial_fit(3)

set_writer(3)

Example #1

Show file

File: test_agent_stats.py Project: antoine-moulin/rlberry

def test_agent_stats_partial_fit_and_tuple_env():
    # Define train and evaluation envs
    train_env = (GridWorld, None
                 )  # tuple (constructor, kwargs) must also work in AgentStats

    # Parameters
    params = {"n_episodes": 500}
    horizon = 20

    # Run AgentStats
    stats = AgentStats(DummyAgent,
                       train_env,
                       init_kwargs=params,
                       n_fit=4,
                       eval_horizon=10)
    stats2 = AgentStats(DummyAgent,
                        train_env,
                        init_kwargs=params,
                        n_fit=4,
                        eval_horizon=10)
    # set some writers
    stats.set_writer(0, None)
    stats.set_writer(3, None)

    # Run partial fit
    stats.partial_fit(0.1)
    stats.partial_fit(0.5)
    for agent in stats.fitted_agents:
        assert agent.fraction_fitted == 0.6
    for _ in range(2):
        stats.partial_fit(0.5)
        for agent in stats.fitted_agents:
            assert agent.fraction_fitted == 1.0

    # Run fit
    stats2.fit()

    # learning curves
    plot_episode_rewards([stats], cumulative=True, show=False)

    # compare final policies
    compare_policies([stats], eval_horizon=horizon, n_sim=10, show=False)

Example #2

Show file

def test_agent_stats_seeding():
    sd.set_global_seed(3456)
    for env in [MountainCar(), (gym_make, {'env_name': 'MountainCar-v0'})]:
        agent_stats = AgentStats(RSUCBVIAgent,
                                 env,
                                 init_kwargs={
                                     'n_episodes': 2,
                                     'horizon': 10
                                 },
                                 n_fit=6)
        agent_stats.fit()

        for ii in range(2, agent_stats.n_fit):
            traj1 = get_env_trajectory(agent_stats.fitted_agents[ii - 2].env,
                                       horizon=10)
            traj2 = get_env_trajectory(agent_stats.fitted_agents[ii - 1].env,
                                       horizon=10)
            traj3 = get_env_trajectory(agent_stats.fitted_agents[ii].env,
                                       horizon=10)
            assert not compare_trajectories(traj1, traj2)
            assert not compare_trajectories(traj1, traj3)
            assert not compare_trajectories(traj2, traj3)

Example #3

Show file

    n_trials=10,
    timeout=None,
    n_sim=5,
    n_fit=2,
    n_jobs=2,
    sampler_method='optuna_default')

initial_n_trials = len(ppo_stats.study.trials)

# save
ppo_stats.save('ppo_stats_backup')
del ppo_stats

# load
ppo_stats = AgentStats.load('ppo_stats_backup')

# continue previous optimization, now with 5s of timeout
best_trial, data = ppo_stats.optimize_hyperparams(n_trials=10,
                                                  timeout=5,
                                                  n_sim=5,
                                                  n_fit=2,
                                                  n_jobs=2,
                                                  continue_previous=True)

print("number of initial trials = ", initial_n_trials)
print("number of trials after continuing= ", len(ppo_stats.study.trials))

print("----")
print("fitting agents after choosing hyperparams...")
ppo_stats.fit()  # fit the 4 agents