Exemple #1
0
 def _create_sampler(self, sampler_method: str) -> BaseSampler:
     # n_warmup_steps: Disable pruner until the trial reaches the given number of step.
     if sampler_method == "random":
         sampler = RandomSampler(seed=self.seed)
     elif sampler_method == "tpe":
         # TODO: try with multivariate=True
         sampler = TPESampler(n_startup_trials=self.n_startup_trials, seed=self.seed)
     elif sampler_method == "skopt":
         # cf https://scikit-optimize.github.io/#skopt.Optimizer
         # GP: gaussian process
         # Gradient boosted regression: GBRT
         sampler = SkoptSampler(skopt_kwargs={"base_estimator": "GP", "acq_func": "gp_hedge"})
     else:
         raise ValueError(f"Unknown sampler: {sampler_method}")
     return sampler
def hyperparam_optimization(algo,
                            model_fn,
                            env_fn,
                            n_trials=10,
                            n_timesteps=5000,
                            hyperparams=None,
                            n_jobs=1,
                            sampler_method='random',
                            pruner_method='halving',
                            seed=0,
                            verbose=1):
    """
    :param algo: (str)
    :param model_fn: (func) function that is used to instantiate the model
    :param env_fn: (func) function that is used to instantiate the env
    :param n_trials: (int) maximum number of trials for finding the best hyperparams
    :param n_timesteps: (int) maximum number of timesteps per trial
    :param hyperparams: (dict)
    :param n_jobs: (int) number of parallel jobs
    :param sampler_method: (str)
    :param pruner_method: (str)
    :param seed: (int)
    :param verbose: (int)
    :return: (pd.Dataframe) detailed result of the optimization
    """

    # TODO: eval each hyperparams several times to account for noisy evaluation
    # TODO: take into account the normalization (also for the test env -> sync obs_rms)
    if hyperparams is None:
        hyperparams = {}

    n_startup_trials = 10
    # test during 5 episodes
    n_eval_episodes = 5
    # evaluate every 20th of the maximum budget per iteration
    n_evaluations = 20
    eval_freq = int(n_timesteps / n_evaluations)

    # n_warmup_steps: Disable pruner until the trial reaches the given number of step.
    if sampler_method == 'random':
        sampler = RandomSampler(seed=seed)
    elif sampler_method == 'tpe':
        sampler = TPESampler(n_startup_trials=n_startup_trials, seed=seed)
    elif sampler_method == 'skopt':
        # cf https://scikit-optimize.github.io/#skopt.Optimizer
        # GP: gaussian process
        # Gradient boosted regression: GBRT
        sampler = SkoptSampler(skopt_kwargs={
            'base_estimator': "GP",
            'acq_func': 'gp_hedge'
        })
    else:
        raise ValueError('Unknown sampler: {}'.format(sampler_method))

    if pruner_method == 'halving':
        pruner = SuccessiveHalvingPruner(min_resource=1,
                                         reduction_factor=4,
                                         min_early_stopping_rate=0)
    elif pruner_method == 'median':
        pruner = MedianPruner(n_startup_trials=n_startup_trials,
                              n_warmup_steps=n_evaluations // 3)
    elif pruner_method == 'none':
        # Do not prune
        pruner = MedianPruner(n_startup_trials=n_trials,
                              n_warmup_steps=n_evaluations)
    else:
        raise ValueError('Unknown pruner: {}'.format(pruner_method))

    if verbose > 0:
        print("Sampler: {} - Pruner: {}".format(sampler_method, pruner_method))

    study = optuna.create_study(sampler=sampler, pruner=pruner)
    algo_sampler = HYPERPARAMS_SAMPLER[algo]

    def objective(trial):

        kwargs = hyperparams.copy()

        trial.model_class = None
        if algo == 'her':
            trial.model_class = hyperparams['model_class']

        # Hack to use DDPG/TD3 noise sampler
        # if algo in ['ddpg', 'td3'] or trial.model_class in ['ddpg', 'td3']:
        if algo in ['ddpg', 'td3'] or trial.model_class in [
                DDPG, TD3
        ]:  # bug to report: changed by Pierre
            trial.n_actions = env_fn(n_envs=1).action_space.shape[0]
        kwargs.update(algo_sampler(trial))

        model = model_fn(**kwargs)

        eval_env = env_fn(n_envs=1, eval_env=True)
        # Account for parallel envs
        eval_freq_ = eval_freq
        if isinstance(model.get_env(), VecEnv):
            eval_freq_ = max(eval_freq // model.get_env().num_envs, 1)
        # TODO: use non-deterministic eval for Atari?
        eval_callback = TrialEvalCallback(eval_env,
                                          trial,
                                          n_eval_episodes=n_eval_episodes,
                                          eval_freq=eval_freq_,
                                          deterministic=True)

        if algo == 'her':
            # Wrap the env if need to flatten the dict obs
            if isinstance(eval_env, VecEnv):
                print("UNVECTORIZE ENV")
                eval_env = _UnvecWrapper(eval_env)
            # eval_env = HERGoalEnvWrapper(eval_env)  # commented by Pierre

        try:
            model.learn(n_timesteps, callback=eval_callback)
            # Free memory
            model.env.close()
            eval_env.close()
        except AssertionError:
            # Sometimes, random hyperparams can generate NaN
            # Free memory
            model.env.close()
            eval_env.close()
            raise optuna.exceptions.TrialPruned()
        is_pruned = eval_callback.is_pruned
        cost = -1 * eval_callback.last_mean_reward

        del model.env, eval_env
        del model

        if is_pruned:
            raise optuna.exceptions.TrialPruned()

        return cost

    try:
        study.optimize(objective, n_trials=n_trials, n_jobs=n_jobs)
    except KeyboardInterrupt:
        pass

    print('Number of finished trials: ', len(study.trials))

    print('Best trial:')
    trial = study.best_trial

    print('Value: ', trial.value)

    print('Params: ')
    for key, value in trial.params.items():
        print('    {}: {}'.format(key, value))

    ######## added by pierre
    best_params = trial.params
    print("best params: ", best_params)
    # print("best value: ", study.best_value)
    # print("best best trial: ", study.best_trial)

    # with open('hyperparameter.yml', 'w') as outfile:
    # yaml.dump(best_params, outfile)
    ########

    return study.trials_dataframe(), best_params
def hyperparam_optimization(
    algo,
    model_fn,
    env_fn,
    n_trials=10,
    n_timesteps=5000,
    hyperparams=None,  # noqa: C901
    n_jobs=1,
    sampler_method="tpe",
    pruner_method="median",
    n_startup_trials=10,
    n_evaluations=20,
    n_eval_episodes=5,
    storage=None,
    study_name=None,
    seed=0,
    verbose=1,
    deterministic_eval=True,
):
    """
    :param algo: (str)
    :param model_fn: (func) function that is used to instantiate the model
    :param env_fn: (func) function that is used to instantiate the env
    :param n_trials: (int) maximum number of trials for finding the best hyperparams
    :param n_timesteps: (int) maximum number of timesteps per trial
    :param hyperparams: (dict)
    :param n_jobs: (int) number of parallel jobs
    :param sampler_method: (str)
    :param pruner_method: (str)
    :param n_startup_trials: (int)
    :param n_evaluations: (int) Evaluate every 20th of the maximum budget per iteration
    :param n_eval_episodes: (int) Evaluate the model during 5 episodes
    :param storage: (Optional[str])
    :param study_name: (Optional[str])
    :param seed: (int)
    :param verbose: (int)
    :param deterministic_eval: (bool)
    :return: (pd.Dataframe) detailed result of the optimization
    """
    # TODO: eval each hyperparams several times to account for noisy evaluation
    if hyperparams is None:
        hyperparams = {}

    eval_freq = int(n_timesteps / n_evaluations)

    # n_warmup_steps: Disable pruner until the trial reaches the given number of step.
    if sampler_method == "random":
        sampler = RandomSampler(seed=seed)
    elif sampler_method == "tpe":
        # TODO: try with multivariate=True
        sampler = TPESampler(n_startup_trials=n_startup_trials, seed=seed)
    elif sampler_method == "skopt":
        # cf https://scikit-optimize.github.io/#skopt.Optimizer
        # GP: gaussian process
        # Gradient boosted regression: GBRT
        sampler = SkoptSampler(skopt_kwargs={
            "base_estimator": "GP",
            "acq_func": "gp_hedge"
        })
    else:
        raise ValueError(f"Unknown sampler: {sampler_method}")

    if pruner_method == "halving":
        pruner = SuccessiveHalvingPruner(min_resource=1,
                                         reduction_factor=4,
                                         min_early_stopping_rate=0)
    elif pruner_method == "median":
        pruner = MedianPruner(n_startup_trials=n_startup_trials,
                              n_warmup_steps=n_evaluations // 3)
    elif pruner_method == "none":
        # Do not prune
        pruner = MedianPruner(n_startup_trials=n_trials,
                              n_warmup_steps=n_evaluations)
    else:
        raise ValueError(f"Unknown pruner: {pruner_method}")

    if verbose > 0:
        print(f"Sampler: {sampler_method} - Pruner: {pruner_method}")

    study = optuna.create_study(sampler=sampler,
                                pruner=pruner,
                                storage=storage,
                                study_name=study_name,
                                load_if_exists=True,
                                direction="maximize")
    algo_sampler = HYPERPARAMS_SAMPLER[algo]

    def objective(trial):

        kwargs = hyperparams.copy()

        trial.model_class = None
        if algo == "her":
            trial.model_class = hyperparams["model_class"]

        # Hack to use DDPG/TD3 noise sampler
        if algo in ["ddpg", "td3"] or trial.model_class in ["ddpg", "td3"]:
            trial.n_actions = env_fn(n_envs=1).action_space.shape[0]
        kwargs.update(algo_sampler(trial))

        model = model_fn(**kwargs)
        model.trial = trial

        eval_env = env_fn(n_envs=1, eval_env=True)
        # Account for parallel envs
        eval_freq_ = max(eval_freq // model.get_env().num_envs, 1)
        # TODO: Use non-deterministic eval for Atari
        # or use maximum number of steps to avoid infinite loop
        eval_callback = TrialEvalCallback(eval_env,
                                          trial,
                                          n_eval_episodes=n_eval_episodes,
                                          eval_freq=eval_freq_,
                                          deterministic=deterministic_eval)

        try:
            model.learn(n_timesteps, callback=eval_callback)
            # Free memory
            model.env.close()
            eval_env.close()
        except AssertionError as e:
            # Sometimes, random hyperparams can generate NaN
            # Free memory
            model.env.close()
            eval_env.close()
            # Prune hyperparams that generate NaNs
            print(e)
            raise optuna.exceptions.TrialPruned()
        is_pruned = eval_callback.is_pruned
        reward = eval_callback.last_mean_reward

        del model.env, eval_env
        del model

        if is_pruned:
            raise optuna.exceptions.TrialPruned()

        return reward

    try:
        study.optimize(objective, n_trials=n_trials, n_jobs=n_jobs)
    except KeyboardInterrupt:
        pass

    print("Number of finished trials: ", len(study.trials))

    print("Best trial:")
    trial = study.best_trial

    print("Value: ", trial.value)

    print("Params: ")
    for key, value in trial.params.items():
        print(f"    {key}: {value}")

    return study.trials_dataframe()
Exemple #4
0
def hyperparam_optimization(algo,
                            model_fn,
                            env_fn,
                            n_trials=10,
                            n_timesteps=5000,
                            hyperparams=None,
                            n_jobs=1,
                            sampler_method='random',
                            pruner_method='halving',
                            seed=0,
                            verbose=1,
                            timeout=None):
    """
    :param algo: (str)
    :param model_fn: (func) function that is used to instantiate the model
    :param env_fn: (func) function that is used to instantiate the env
    :param n_trials: (int) maximum number of trials for finding the best hyperparams
    :param n_timesteps: (int) maximum number of timesteps per trial
    :param hyperparams: (dict)
    :param n_jobs: (int) number of parallel jobs
    :param sampler_method: (str)
    :param pruner_method: (str)
    :param seed: (int)
    :param verbose: (int)
    :return: (pd.Dataframe) detailed result of the optimization
    """
    # TODO: eval each hyperparams several times to account for noisy evaluation
    # TODO: take into account the normalization (also for the test env -> sync obs_rms)
    if hyperparams is None:
        hyperparams = {}

    # test during 5 episodes
    n_test_episodes = 5
    # evaluate every 20th of the maximum budget per iteration
    n_evaluations = 20
    evaluate_interval = int(n_timesteps / n_evaluations)

    # n_warmup_steps: Disable pruner until the trial reaches the given number of step.
    if sampler_method == 'random':
        sampler = RandomSampler(seed=seed)
    elif sampler_method == 'tpe':
        sampler = TPESampler(n_startup_trials=5, seed=seed)
    elif sampler_method == 'skopt':
        # cf https://scikit-optimize.github.io/#skopt.Optimizer
        # GP: gaussian process
        # Gradient boosted regression: GBRT
        sampler = SkoptSampler(skopt_kwargs={
            'base_estimator': "GP",
            'acq_func': 'gp_hedge'
        })
    else:
        raise ValueError('Unknown sampler: {}'.format(sampler_method))

    if pruner_method == 'halving':
        pruner = SuccessiveHalvingPruner(min_resource=1,
                                         reduction_factor=4,
                                         min_early_stopping_rate=0)
    elif pruner_method == 'median':
        pruner = MedianPruner(n_startup_trials=5,
                              n_warmup_steps=n_evaluations // 3)
    elif pruner_method == 'none':
        # Do not prune
        pruner = MedianPruner(n_startup_trials=n_trials,
                              n_warmup_steps=n_evaluations)
    else:
        raise ValueError('Unknown pruner: {}'.format(pruner_method))

    if verbose > 0:
        print("Sampler: {} - Pruner: {}".format(sampler_method, pruner_method))

    study = optuna.create_study(sampler=sampler, pruner=pruner)
    algo_sampler = HYPERPARAMS_SAMPLER[algo]

    def objective(trial):

        kwargs = hyperparams.copy()

        trial.model_class = None
        if algo == 'her':
            trial.model_class = hyperparams['model_class']

        # Hack to use DDPG/TD3 noise sampler
        if algo in ['ddpg', 'td3'] or trial.model_class in ['ddpg', 'td3']:
            trial.n_actions = env_fn(n_envs=1).action_space.shape[0]
        kwargs.update(algo_sampler(trial))

        def callback(_locals, _globals):
            """
            Callback for monitoring learning progress.
            :param _locals: (dict)
            :param _globals: (dict)
            :return: (bool) If False: stop training
            """
            self_ = _locals['self']
            trial = self_.trial

            # Initialize variables
            if not hasattr(self_, 'is_pruned'):
                self_.is_pruned = False
                self_.last_mean_test_reward = -np.inf
                self_.last_time_evaluated = 0
                self_.eval_idx = 0

            if (self_.num_timesteps -
                    self_.last_time_evaluated) < evaluate_interval:
                return True

            self_.last_time_evaluated = self_.num_timesteps

            # Evaluate the trained agent on the test env
            rewards = []
            n_episodes, reward_sum = 0, 0.0

            # Sync the obs rms if using vecnormalize
            # NOTE: this does not cover all the possible cases
            if isinstance(self_.test_env, VecNormalize):
                self_.test_env.obs_rms = deepcopy(self_.env.obs_rms)
                # Do not normalize reward
                self_.test_env.norm_reward = False

            obs = self_.test_env.reset()
            while n_episodes < n_test_episodes:
                # Use default value for deterministic
                action, _ = self_.predict(obs)
                obs, reward, done, _ = self_.test_env.step(action)
                reward_sum += reward

                if done:
                    rewards.append(reward_sum)
                    reward_sum = 0.0
                    n_episodes += 1
                    obs = self_.test_env.reset()

            mean_reward = np.mean(rewards)
            self_.last_mean_test_reward = mean_reward
            self_.eval_idx += 1

            # report best or report current ?
            # report num_timesteps or elasped time ?
            trial.report(-1 * mean_reward, self_.eval_idx)
            # Prune trial if need
            if trial.should_prune(self_.eval_idx):
                self_.is_pruned = True
                return False

            return True

        model = model_fn(**kwargs)
        model.test_env = env_fn(n_envs=1)
        model.trial = trial
        if algo == 'her':
            model.model.trial = trial
            # Wrap the env if need to flatten the dict obs
            if isinstance(model.test_env, VecEnv):
                model.test_env = _UnvecWrapper(model.test_env)
            model.model.test_env = HERGoalEnvWrapper(model.test_env)

        try:
            model.learn(n_timesteps, callback=callback)
            # Free memory
            model.env.close()
            model.test_env.close()
        except AssertionError:
            # Sometimes, random hyperparams can generate NaN
            # Free memory
            model.env.close()
            model.test_env.close()
            raise
        is_pruned = False
        cost = np.inf
        if hasattr(model, 'is_pruned'):
            is_pruned = model.is_pruned
            cost = -1 * model.last_mean_test_reward
        del model.env, model.test_env
        del model

        if is_pruned:
            raise optuna.structs.TrialPruned()

        return cost

    try:
        study.optimize(objective,
                       n_trials=n_trials,
                       n_jobs=n_jobs,
                       timeout=timeout,
                       catch=((ValueError, AssertionError)))
    except KeyboardInterrupt:
        pass

    print('Number of finished trials: ', len(study.trials))

    print('Best trial:')
    trial = study.best_trial

    print('Value: ', trial.value)

    print('Params: ')
    for key, value in trial.params.items():
        print('    {}: {}'.format(key, value))

    return study.trials_dataframe()
Exemple #5
0
def hyperparam_optimization(n_trials=20,
                            n_timesteps=100000,
                            hyperparams=None,
                            n_jobs=1,
                            sampler_method='random',
                            pruner_method='halving',
                            seed=1,
                            verbose=1):
    """
    :param algo: (str)
    :param model_fn: (func) function that is used to instantiate the model
    :param env_fn: (func) function that is used to instantiate the env
    :param n_trials: (int) maximum number of trials for finding the best hyperparams
    :param n_timesteps: (int) maximum number of timesteps per trial
    :param hyperparams: (dict)
    :param n_jobs: (int) number of parallel jobs
    :param sampler_method: (str)
    :param pruner_method: (str)
    :param seed: (int)
    :param verbose: (int)
    :return: (pd.Dataframe) detailed result of the optimization
    """
    # TODO: eval each hyperparams several times to account for noisy evaluation
    # TODO: take into account the normalization (also for the test env -> sync obs_rms)
    if hyperparams is None:
        hyperparams = {}

    # test during 3000 steps
    n_test_steps = 1500
    # evaluate every 20th of the maximum budget per iteration
    n_evaluations = 40
    evaluate_interval = int(n_timesteps / n_evaluations)

    # n_warmup_steps: Disable pruner until the trial reaches the given number of step.

    #sampler = RandomSampler(seed=seed)

    #sampler = TPESampler(n_startup_trials=5, seed=seed)

    sampler = SkoptSampler(skopt_kwargs={
        'base_estimator': "GP",
        'acq_func': 'gp_hedge'
    })

    #pruner = SuccessiveHalvingPruner(min_resource=1, reduction_factor=4, min_early_stopping_rate=0)

    pruner = MedianPruner(n_startup_trials=5,
                          n_warmup_steps=n_evaluations // 3)

    study = optuna.create_study(study_name="optimisation_PPO2",
                                sampler=sampler,
                                pruner=pruner,
                                storage='sqlite:///optimizationSAC.db',
                                load_if_exists=True)

    def objective(trial):

        kwargs = hyperparams.copy()

        trial.model_class = None

        kwargs.update(sample_td3_params(trial))

        def callback(_locals, _globals):
            """
            Callback for monitoring learning progress.
            :param _locals: (dict)
            :param _globals: (dict)
            :return: (bool) If False: stop training
            """
            self_ = _locals['self']
            trial = self_.trial

            # Initialize variables
            if not hasattr(self_, 'is_pruned'):
                self_.is_pruned = False
                self_.last_mean_test_reward = -np.inf
                self_.last_time_evaluated = 0
                self_.eval_idx = 0

            if (self_.num_timesteps -
                    self_.last_time_evaluated) < evaluate_interval:
                return True

            self_.last_time_evaluated = self_.num_timesteps

            # Evaluate the trained agent on the test env
            rewards = []
            n_steps_done, reward_sum = 0, 0.0

            # Sync the obs rms if using vecnormalize
            # NOTE: this does not cover all the possible cases
            if isinstance(self_.test_env, VecNormalize):
                self_.test_env.obs_rms = deepcopy(self_.env.obs_rms)
                self_.test_env.ret_rms = deepcopy(self_.env.ret_rms)
                # Do not normalize reward
                self_.test_env.norm_reward = False

            obs = self_.test_env.reset()
            while n_steps_done < n_test_steps:
                # Use default value for deterministic
                action, _ = self_.predict(obs, )
                obs, reward, done, _ = self_.test_env.step(action)
                reward_sum += reward
                n_steps_done += 1

                if done:
                    rewards.append(reward_sum)
                    reward_sum = 0.0
                    obs = self_.test_env.reset()
                    n_steps_done = n_test_steps
            rewards.append(reward_sum)
            mean_reward = np.mean(rewards)
            summary = tf.Summary(value=[
                tf.Summary.Value(tag='evaluation', simple_value=mean_reward)
            ])
            _locals['writer'].add_summary(summary, self_.num_timesteps)
            self_.last_mean_test_reward = mean_reward
            self_.eval_idx += 1

            # report best or report current ?
            # report num_timesteps or elasped time ?
            trial.report(-1 * mean_reward, self_.eval_idx)
            # Prune trial if need
            if trial.should_prune(self_.eval_idx):
                self_.is_pruned = True
                return False

            return True

        commands = [[1, 0], [2, 0], [3, 0]]
        env = DummyVecEnv([
            lambda: e.AidaBulletEnv(commands,
                                    render=True,
                                    on_rack=False,
                                    default_reward=2,
                                    height_weight=5,
                                    orientation_weight=3,
                                    direction_weight=2,
                                    speed_weight=4)
        ])

        model = TD3(MlpPolicy,
                    env,
                    gamma=kwargs['gamma'],
                    learning_rate=kwargs['learning_rate'],
                    batch_size=kwargs['batch_size'],
                    buffer_size=kwargs['buffer_size'],
                    train_freq=kwargs['train_freq'],
                    gradient_steps=kwargs['gradient_steps'],
                    action_noise=kwargs['action_noise'],
                    tensorboard_log="./optimisationSAC/logOPTI")

        model.test_env = DummyVecEnv([
            lambda: e.AidaBulletEnv(commands,
                                    render=False,
                                    on_rack=False,
                                    default_reward=2,
                                    height_weight=5,
                                    orientation_weight=3,
                                    direction_weight=2,
                                    speed_weight=2)
        ])

        model.trial = trial

        try:
            model.learn(n_timesteps, callback=callback)
            # Free memory
            model.env.close()
            model.test_env.close()
        except AssertionError:
            # Sometimes, random hyperparams can generate NaN
            # Free memory
            model.env.close()
            model.test_env.close()
            raise
        is_pruned = False
        cost = np.inf
        if hasattr(model, 'is_pruned'):
            is_pruned = model.is_pruned
            cost = -1 * model.last_mean_test_reward
        try:
            os.mkdir("./optimisationSAC/resultats/" + str(trial.number))
        except FileExistsError:
            print("Directory already exists")

        model.save("./optimisation/resultats/" + str(trial.number) + "/" +
                   str(trial.number))

        del model.env, model.test_env
        del model

        if is_pruned:
            try:
                # Optuna >= 0.19.0
                raise optuna.exceptions.TrialPruned()
            except AttributeError:
                raise optuna.structs.TrialPruned()

        return cost

    try:
        study.optimize(objective, n_trials=n_trials, n_jobs=n_jobs)
    except KeyboardInterrupt:
        pass

    print('Number of finished trials: ', len(study.trials))

    print('Best trial:')
    trial = study.best_trial

    print('Value: ', trial.value)

    print('Params: ')
    for key, value in trial.params.items():
        print('    {}: {}'.format(key, value))

    return study.trials_dataframe()
Exemple #6
0
def hyperparam_optimization(algo,
                            model_fn,
                            env_fn,
                            n_trials=10,
                            n_timesteps=5000,
                            hyperparams=None,
                            n_jobs=1,
                            sampler_method='random',
                            pruner_method='halving',
                            n_startup_trials=10,
                            n_evaluations=20,
                            n_eval_episodes=1,
                            seed=0,
                            verbose=1):
    """
    :param algo: (str)
    :param model_fn: (func) function that is used to instantiate the model
    :param env_fn: (func) function that is used to instantiate the env
    :param n_trials: (int) maximum number of trials for finding the best hyperparams
    :param n_timesteps: (int) maximum number of timesteps per trial
    :param hyperparams: (dict)
    :param n_jobs: (int) number of parallel jobs
    :param sampler_method: (str)
    :param pruner_method: (str)
    :param n_startup_trials: (int)
    :param n_evaluations: (int) Evaluate every 20th of the maximum budget per iteration
    :param n_eval_episodes: (int) Evaluate the model during 5 episodes
    :param seed: (int)
    :param verbose: (int)
    :return: (pd.Dataframe) detailed result of the optimization
    """
    # TODO: eval each hyperparams several times to account for noisy evaluation
    if hyperparams is None:
        hyperparams = {}

    eval_freq = int(n_timesteps / n_evaluations)

    # n_warmup_steps: Disable pruner until the trial reaches the given number of step.
    if sampler_method == 'random':
        sampler = RandomSampler(seed=seed)
    elif sampler_method == 'tpe':
        sampler = TPESampler(n_startup_trials=n_startup_trials, seed=seed)
    elif sampler_method == 'skopt':
        # cf https://scikit-optimize.github.io/#skopt.Optimizer
        # GP: gaussian process
        # Gradient boosted regression: GBRT
        sampler = SkoptSampler(skopt_kwargs={
            'base_estimator': "GP",
            'acq_func': 'gp_hedge'
        })
    else:
        raise ValueError(f'Unknown sampler: {sampler_method}')

    if pruner_method == 'halving':
        pruner = SuccessiveHalvingPruner(min_resource=1,
                                         reduction_factor=4,
                                         min_early_stopping_rate=0)
    elif pruner_method == 'median':
        pruner = MedianPruner(n_startup_trials=n_startup_trials,
                              n_warmup_steps=n_evaluations // 3)
    elif pruner_method == 'none':
        # Do not prune
        pruner = MedianPruner(n_startup_trials=n_trials,
                              n_warmup_steps=n_evaluations)
    else:
        raise ValueError(f'Unknown pruner: {pruner_method}')

    if verbose > 0:
        print(f"Sampler: {sampler_method} - Pruner: {pruner_method}")

    study = optuna.create_study(sampler=sampler, pruner=pruner)
    algo_sampler = HYPERPARAMS_SAMPLER[algo]

    def objective(trial):

        kwargs = hyperparams.copy()

        trial.model_class = None
        if algo == 'her':
            trial.model_class = hyperparams['model_class']

        # Hack to use DDPG/TD3 noise sampler
        if algo in ['ddpg', 'td3'] or trial.model_class in ['ddpg', 'td3']:
            trial.n_actions = env_fn(n_envs=1).action_space.shape[0]
        kwargs.update(algo_sampler(trial))

        model = model_fn(**kwargs)
        model.trial = trial

        eval_env = env_fn(n_envs=1, eval_env=True)
        # Account for parallel envs
        eval_freq_ = max(eval_freq // model.get_env().num_envs, 1)
        # TODO: use non-deterministic eval for Atari?
        eval_callback = TrialEvalCallback(eval_env,
                                          trial,
                                          n_eval_episodes=n_eval_episodes,
                                          eval_freq=eval_freq_,
                                          deterministic=True)

        try:
            model.learn(n_timesteps, callback=eval_callback)
            # Free memory
            model.env.close()
            eval_env.close()
        except AssertionError as e:
            # Sometimes, random hyperparams can generate NaN
            # Free memory
            model.env.close()
            eval_env.close()
            # Prune hyperparams that generate NaNs
            print(e)
            raise optuna.exceptions.TrialPruned()
        is_pruned = eval_callback.is_pruned
        cost = -1 * eval_callback.last_mean_reward

        del model.env, eval_env
        del model

        if is_pruned:
            raise optuna.exceptions.TrialPruned()

        return cost

    try:
        study.optimize(objective, n_trials=n_trials, n_jobs=n_jobs)
    except KeyboardInterrupt:
        pass

    print('Number of finished trials: ', len(study.trials))

    print('Best trial:')
    trial = study.best_trial

    print('Value: ', trial.value)

    print('Params: ')
    for key, value in trial.params.items():
        print(f'    {key}: {value}')

    return study.trials_dataframe()