def test_discount_optimization(): seeding.set_global_seed(42) class ValueIterationAgentToOptimize(ValueIterationAgent): @classmethod def sample_parameters(cls, trial): """ Sample hyperparameters for hyperparam optimization using Optuna (https://optuna.org/) """ gamma = trial.suggest_categorical('gamma', [0.1, 0.99]) return {'gamma': gamma} env = GridWorld(nrows=3, ncols=10, reward_at={(1, 1): 0.1, (2, 9): 1.0}, walls=((1, 4), (2, 4), (1, 5)), success_probability=0.9) vi_params = {'gamma': 0.1, 'epsilon': 1e-3} vi_stats = AgentStats(ValueIterationAgentToOptimize, env, eval_horizon=20, init_kwargs=vi_params, n_fit=4, n_jobs=1) vi_stats.optimize_hyperparams(n_trials=5, timeout=30, n_sim=5, n_fit=1, n_jobs=1, sampler_method='random', pruner_method='none') assert vi_stats.best_hyperparams['gamma'] == 0.99
def test_hyperparam_optim_random(): # Define train env train_env = GridWorld() # Parameters params = {"n_episodes": 500} # Run AgentStats stats_agent = AgentStats(DummyAgent, train_env, init_kwargs=params, n_fit=4, eval_horizon=10, n_jobs=1) # test hyperparameter optimization with random sampler stats_agent.optimize_hyperparams(sampler_method="random")
def test_hyperparam_optim_tpe(): # Define trainenv train_env = GridWorld() # Parameters params = {"n_episodes": 500} # Run AgentStats stats_agent = AgentStats(DummyAgent, train_env, init_kwargs=params, n_fit=4, eval_horizon=10, n_jobs=1) # test hyperparameter optimization with TPE sampler # using hyperopt default values sampler_kwargs = TPESampler.hyperopt_parameters() stats_agent.optimize_hyperparams(sampler_kwargs=sampler_kwargs)
def test_hyperparam_optim_grid(): # Define train env train_env = GridWorld() # Parameters params = {"n_episodes": 500} # Run AgentStats stats_agent = AgentStats(DummyAgent, train_env, init_kwargs=params, n_fit=4, eval_horizon=10, n_jobs=1) # test hyperparameter optimization with grid sampler search_space = {"hyperparameter1": [1, 2, 3], "hyperparameter2": [-5, 0, 5]} sampler_kwargs = {"search_space": search_space} stats_agent.optimize_hyperparams(n_trials=3*3, sampler_method="grid", sampler_kwargs=sampler_kwargs)
} # ------------------------------- # Run AgentStats and save results # -------------------------------- ppo_stats = AgentStats(PPOAgent, train_env, eval_horizon=HORIZON, init_kwargs=params_ppo, n_fit=4) # hyperparam optim best_trial, data = ppo_stats.optimize_hyperparams( n_trials=10, timeout=None, n_sim=5, n_fit=2, n_jobs=2, sampler_method='optuna_default') initial_n_trials = len(ppo_stats.study.trials) # save ppo_stats.save('ppo_stats_backup') del ppo_stats # load ppo_stats = AgentStats.load('ppo_stats_backup') # continue previous optimization, now with 5s of timeout best_trial, data = ppo_stats.optimize_hyperparams(n_trials=10,