예제 #1
0
def test_discount_optimization():
    seeding.set_global_seed(42)

    class ValueIterationAgentToOptimize(ValueIterationAgent):
        @classmethod
        def sample_parameters(cls, trial):
            """
            Sample hyperparameters for hyperparam optimization using Optuna (https://optuna.org/)
            """
            gamma = trial.suggest_categorical('gamma', [0.1, 0.99])
            return {'gamma': gamma}

    env = GridWorld(nrows=3, ncols=10,
                    reward_at={(1, 1): 0.1, (2, 9): 1.0},
                    walls=((1, 4), (2, 4), (1, 5)),
                    success_probability=0.9)

    vi_params = {'gamma': 0.1, 'epsilon': 1e-3}

    vi_stats = AgentStats(ValueIterationAgentToOptimize, env, eval_horizon=20, init_kwargs=vi_params, n_fit=4, n_jobs=1)

    vi_stats.optimize_hyperparams(n_trials=5, timeout=30, n_sim=5, n_fit=1, n_jobs=1,
                                  sampler_method='random', pruner_method='none')

    assert vi_stats.best_hyperparams['gamma'] == 0.99
예제 #2
0
def test_hyperparam_optim_random():
    # Define train env
    train_env = GridWorld()

    # Parameters
    params = {"n_episodes": 500}

    # Run AgentStats
    stats_agent = AgentStats(DummyAgent, train_env, init_kwargs=params,
                             n_fit=4, eval_horizon=10, n_jobs=1)

    # test hyperparameter optimization with random sampler
    stats_agent.optimize_hyperparams(sampler_method="random")
예제 #3
0
def test_hyperparam_optim_tpe():
    # Define trainenv
    train_env = GridWorld()

    # Parameters
    params = {"n_episodes": 500}

    # Run AgentStats
    stats_agent = AgentStats(DummyAgent, train_env, init_kwargs=params,
                             n_fit=4, eval_horizon=10, n_jobs=1)

    # test hyperparameter optimization with TPE sampler
    # using hyperopt default values
    sampler_kwargs = TPESampler.hyperopt_parameters()
    stats_agent.optimize_hyperparams(sampler_kwargs=sampler_kwargs)
예제 #4
0
def test_hyperparam_optim_grid():
    # Define train env
    train_env = GridWorld()

    # Parameters
    params = {"n_episodes": 500}

    # Run AgentStats
    stats_agent = AgentStats(DummyAgent, train_env, init_kwargs=params,
                             n_fit=4, eval_horizon=10, n_jobs=1)

    # test hyperparameter optimization with grid sampler
    search_space = {"hyperparameter1": [1, 2, 3],
                    "hyperparameter2": [-5, 0, 5]}
    sampler_kwargs = {"search_space": search_space}
    stats_agent.optimize_hyperparams(n_trials=3*3,
                                     sampler_method="grid",
                                     sampler_kwargs=sampler_kwargs)
예제 #5
0
}

# -------------------------------
# Run AgentStats and save results
# --------------------------------
ppo_stats = AgentStats(PPOAgent,
                       train_env,
                       eval_horizon=HORIZON,
                       init_kwargs=params_ppo,
                       n_fit=4)

# hyperparam optim
best_trial, data = ppo_stats.optimize_hyperparams(
    n_trials=10,
    timeout=None,
    n_sim=5,
    n_fit=2,
    n_jobs=2,
    sampler_method='optuna_default')

initial_n_trials = len(ppo_stats.study.trials)

# save
ppo_stats.save('ppo_stats_backup')
del ppo_stats

# load
ppo_stats = AgentStats.load('ppo_stats_backup')

# continue previous optimization, now with 5s of timeout
best_trial, data = ppo_stats.optimize_hyperparams(n_trials=10,