Beispiel #1
0
 def _make_pruner(self):
     if isinstance(self.pruner_method, str):
         if self.pruner_method == 'halving':
             pruner = SuccessiveHalvingPruner(
                 min_resource=self.n_timesteps // 6,
                 reduction_factor=4,
                 min_early_stopping_rate=0)
         elif self.pruner_method == 'median':
             pruner = MedianPruner(n_startup_trials=5,
                                   n_warmup_steps=self.n_timesteps // 6)
         elif self.pruner_method == 'none':
             # Do not prune
             pruner = NopPruner()
         else:
             raise ValueError('Unknown pruner: {}'.format(
                 self.pruner_method))
     elif isinstance(self.pruner_method, dict):
         method_copy = deepcopy(self.pruner_method)
         method = method_copy.pop('method')
         if method == 'halving':
             pruner = SuccessiveHalvingPruner(**method_copy)
         elif method == 'median':
             pruner = MedianPruner(**method_copy)
         elif method == 'none':
             # Do not prune
             pruner = NopPruner()
         else:
             raise ValueError('Unknown pruner: {}'.format(
                 self.pruner_method))
     else:
         raise ValueError("Wrong type for pruner settings!")
     return pruner
Beispiel #2
0
 def _create_pruner(self, pruner_method: str) -> BasePruner:
     if pruner_method == "halving":
         pruner = SuccessiveHalvingPruner(min_resource=1, reduction_factor=4, min_early_stopping_rate=0)
     elif pruner_method == "median":
         pruner = MedianPruner(n_startup_trials=self.n_startup_trials, n_warmup_steps=self.n_evaluations // 3)
     elif pruner_method == "none":
         # Do not prune
         pruner = MedianPruner(n_startup_trials=self.n_trials, n_warmup_steps=self.n_evaluations)
     else:
         raise ValueError(f"Unknown pruner: {pruner_method}")
     return pruner
Beispiel #3
0
def cv_hyperparam_study():
    params = {'class_n': CLASS_N, 'edl_used': EDL_USED}
    if EDL_USED != 0:
        params['edl_fun'] = 'mse'
        params['kl'] = EDL_USED - 1

    for test_trial in range(1, 7):
        params['outer_f'] = test_trial
        for sb_n in range(1, 11):
            params['sb_n'] = sb_n
            study_path = f'study/ecnn{EDL_USED}/sb{sb_n}'
            if not os.path.exists(study_path):
                os.makedirs(study_path)
            sampler = TPESampler()
            study = optuna.create_study(
                direction="minimize",  # maximaze or minimaze our objective
                sampler=sampler,  # parametrs sampling strategy
                pruner=MedianPruner(
                    n_startup_trials=5,
                    n_warmup_steps=3,  # let's say num epochs
                    interval_steps=1,
                ),
                study_name='STUDY',
                storage="sqlite:///" + study_path + f"/t{test_trial}.db",
                # storing study results
                load_if_exists=False  # An error will be raised if same name
            )

            study.optimize(lambda trial: objective(trial, params), n_trials=25)
            print("Number of finished trials: ", len(study.trials))
            print("Best trial:")
            trial = study.best_trial
            print("  Value: ", trial.value)
            print("  Params: ")
            for key, value in trial.params.items():
                print("    {}: {}".format(key, value))

    return
Beispiel #4
0
        elif win == 2:
            draw_count += 1

        if n + 1 == draw_count:
            win_rate = 0.0
        else:
            win_rate = win_count / (n + 1 - draw_count)

        logging.info(
            'trial {} game {} result : win = {}, win count = {}, draw count = {}, win rate = {:.1f}%'
            .format(trial.trial_id, n, win, win_count, draw_count,
                    win_rate * 100))

        # USIエンジン終了
        for p in procs:
            p.stdin.write(b'quit\n')
            p.stdin.flush()
            p.wait()

        # 見込みのない最適化ステップを打ち切り
        trial.report(-win_rate, n)
        if trial.should_prune(n):
            logging.info('trial {} game {} pruned'.format(trial.trial_id, n))
            raise optuna.structs.TrialPruned()

    # 勝率を負の値で返す
    return -win_rate


study = create_study(pruner=MedianPruner(n_warmup_steps=20))
study.optimize(objective, n_trials=args.trials)
def hyperparam_optimization(algo,
                            model_fn,
                            env_fn,
                            n_trials=10,
                            n_timesteps=5000,
                            hyperparams=None,
                            n_jobs=1,
                            sampler_method='random',
                            pruner_method='halving',
                            seed=0,
                            verbose=1):
    """
    :param algo: (str)
    :param model_fn: (func) function that is used to instantiate the model
    :param env_fn: (func) function that is used to instantiate the env
    :param n_trials: (int) maximum number of trials for finding the best hyperparams
    :param n_timesteps: (int) maximum number of timesteps per trial
    :param hyperparams: (dict)
    :param n_jobs: (int) number of parallel jobs
    :param sampler_method: (str)
    :param pruner_method: (str)
    :param seed: (int)
    :param verbose: (int)
    :return: (pd.Dataframe) detailed result of the optimization
    """

    # TODO: eval each hyperparams several times to account for noisy evaluation
    # TODO: take into account the normalization (also for the test env -> sync obs_rms)
    if hyperparams is None:
        hyperparams = {}

    n_startup_trials = 10
    # test during 5 episodes
    n_eval_episodes = 5
    # evaluate every 20th of the maximum budget per iteration
    n_evaluations = 20
    eval_freq = int(n_timesteps / n_evaluations)

    # n_warmup_steps: Disable pruner until the trial reaches the given number of step.
    if sampler_method == 'random':
        sampler = RandomSampler(seed=seed)
    elif sampler_method == 'tpe':
        sampler = TPESampler(n_startup_trials=n_startup_trials, seed=seed)
    elif sampler_method == 'skopt':
        # cf https://scikit-optimize.github.io/#skopt.Optimizer
        # GP: gaussian process
        # Gradient boosted regression: GBRT
        sampler = SkoptSampler(skopt_kwargs={
            'base_estimator': "GP",
            'acq_func': 'gp_hedge'
        })
    else:
        raise ValueError('Unknown sampler: {}'.format(sampler_method))

    if pruner_method == 'halving':
        pruner = SuccessiveHalvingPruner(min_resource=1,
                                         reduction_factor=4,
                                         min_early_stopping_rate=0)
    elif pruner_method == 'median':
        pruner = MedianPruner(n_startup_trials=n_startup_trials,
                              n_warmup_steps=n_evaluations // 3)
    elif pruner_method == 'none':
        # Do not prune
        pruner = MedianPruner(n_startup_trials=n_trials,
                              n_warmup_steps=n_evaluations)
    else:
        raise ValueError('Unknown pruner: {}'.format(pruner_method))

    if verbose > 0:
        print("Sampler: {} - Pruner: {}".format(sampler_method, pruner_method))

    study = optuna.create_study(sampler=sampler, pruner=pruner)
    algo_sampler = HYPERPARAMS_SAMPLER[algo]

    def objective(trial):

        kwargs = hyperparams.copy()

        trial.model_class = None
        if algo == 'her':
            trial.model_class = hyperparams['model_class']

        # Hack to use DDPG/TD3 noise sampler
        # if algo in ['ddpg', 'td3'] or trial.model_class in ['ddpg', 'td3']:
        if algo in ['ddpg', 'td3'] or trial.model_class in [
                DDPG, TD3
        ]:  # bug to report: changed by Pierre
            trial.n_actions = env_fn(n_envs=1).action_space.shape[0]
        kwargs.update(algo_sampler(trial))

        model = model_fn(**kwargs)

        eval_env = env_fn(n_envs=1, eval_env=True)
        # Account for parallel envs
        eval_freq_ = eval_freq
        if isinstance(model.get_env(), VecEnv):
            eval_freq_ = max(eval_freq // model.get_env().num_envs, 1)
        # TODO: use non-deterministic eval for Atari?
        eval_callback = TrialEvalCallback(eval_env,
                                          trial,
                                          n_eval_episodes=n_eval_episodes,
                                          eval_freq=eval_freq_,
                                          deterministic=True)

        if algo == 'her':
            # Wrap the env if need to flatten the dict obs
            if isinstance(eval_env, VecEnv):
                print("UNVECTORIZE ENV")
                eval_env = _UnvecWrapper(eval_env)
            # eval_env = HERGoalEnvWrapper(eval_env)  # commented by Pierre

        try:
            model.learn(n_timesteps, callback=eval_callback)
            # Free memory
            model.env.close()
            eval_env.close()
        except AssertionError:
            # Sometimes, random hyperparams can generate NaN
            # Free memory
            model.env.close()
            eval_env.close()
            raise optuna.exceptions.TrialPruned()
        is_pruned = eval_callback.is_pruned
        cost = -1 * eval_callback.last_mean_reward

        del model.env, eval_env
        del model

        if is_pruned:
            raise optuna.exceptions.TrialPruned()

        return cost

    try:
        study.optimize(objective, n_trials=n_trials, n_jobs=n_jobs)
    except KeyboardInterrupt:
        pass

    print('Number of finished trials: ', len(study.trials))

    print('Best trial:')
    trial = study.best_trial

    print('Value: ', trial.value)

    print('Params: ')
    for key, value in trial.params.items():
        print('    {}: {}'.format(key, value))

    ######## added by pierre
    best_params = trial.params
    print("best params: ", best_params)
    # print("best value: ", study.best_value)
    # print("best best trial: ", study.best_trial)

    # with open('hyperparameter.yml', 'w') as outfile:
    # yaml.dump(best_params, outfile)
    ########

    return study.trials_dataframe(), best_params
def hyperparam_optimization(
    algo,
    model_fn,
    env_fn,
    n_trials=10,
    n_timesteps=5000,
    hyperparams=None,  # noqa: C901
    n_jobs=1,
    sampler_method="tpe",
    pruner_method="median",
    n_startup_trials=10,
    n_evaluations=20,
    n_eval_episodes=5,
    storage=None,
    study_name=None,
    seed=0,
    verbose=1,
    deterministic_eval=True,
):
    """
    :param algo: (str)
    :param model_fn: (func) function that is used to instantiate the model
    :param env_fn: (func) function that is used to instantiate the env
    :param n_trials: (int) maximum number of trials for finding the best hyperparams
    :param n_timesteps: (int) maximum number of timesteps per trial
    :param hyperparams: (dict)
    :param n_jobs: (int) number of parallel jobs
    :param sampler_method: (str)
    :param pruner_method: (str)
    :param n_startup_trials: (int)
    :param n_evaluations: (int) Evaluate every 20th of the maximum budget per iteration
    :param n_eval_episodes: (int) Evaluate the model during 5 episodes
    :param storage: (Optional[str])
    :param study_name: (Optional[str])
    :param seed: (int)
    :param verbose: (int)
    :param deterministic_eval: (bool)
    :return: (pd.Dataframe) detailed result of the optimization
    """
    # TODO: eval each hyperparams several times to account for noisy evaluation
    if hyperparams is None:
        hyperparams = {}

    eval_freq = int(n_timesteps / n_evaluations)

    # n_warmup_steps: Disable pruner until the trial reaches the given number of step.
    if sampler_method == "random":
        sampler = RandomSampler(seed=seed)
    elif sampler_method == "tpe":
        # TODO: try with multivariate=True
        sampler = TPESampler(n_startup_trials=n_startup_trials, seed=seed)
    elif sampler_method == "skopt":
        # cf https://scikit-optimize.github.io/#skopt.Optimizer
        # GP: gaussian process
        # Gradient boosted regression: GBRT
        sampler = SkoptSampler(skopt_kwargs={
            "base_estimator": "GP",
            "acq_func": "gp_hedge"
        })
    else:
        raise ValueError(f"Unknown sampler: {sampler_method}")

    if pruner_method == "halving":
        pruner = SuccessiveHalvingPruner(min_resource=1,
                                         reduction_factor=4,
                                         min_early_stopping_rate=0)
    elif pruner_method == "median":
        pruner = MedianPruner(n_startup_trials=n_startup_trials,
                              n_warmup_steps=n_evaluations // 3)
    elif pruner_method == "none":
        # Do not prune
        pruner = MedianPruner(n_startup_trials=n_trials,
                              n_warmup_steps=n_evaluations)
    else:
        raise ValueError(f"Unknown pruner: {pruner_method}")

    if verbose > 0:
        print(f"Sampler: {sampler_method} - Pruner: {pruner_method}")

    study = optuna.create_study(sampler=sampler,
                                pruner=pruner,
                                storage=storage,
                                study_name=study_name,
                                load_if_exists=True,
                                direction="maximize")
    algo_sampler = HYPERPARAMS_SAMPLER[algo]

    def objective(trial):

        kwargs = hyperparams.copy()

        trial.model_class = None
        if algo == "her":
            trial.model_class = hyperparams["model_class"]

        # Hack to use DDPG/TD3 noise sampler
        if algo in ["ddpg", "td3"] or trial.model_class in ["ddpg", "td3"]:
            trial.n_actions = env_fn(n_envs=1).action_space.shape[0]
        kwargs.update(algo_sampler(trial))

        model = model_fn(**kwargs)
        model.trial = trial

        eval_env = env_fn(n_envs=1, eval_env=True)
        # Account for parallel envs
        eval_freq_ = max(eval_freq // model.get_env().num_envs, 1)
        # TODO: Use non-deterministic eval for Atari
        # or use maximum number of steps to avoid infinite loop
        eval_callback = TrialEvalCallback(eval_env,
                                          trial,
                                          n_eval_episodes=n_eval_episodes,
                                          eval_freq=eval_freq_,
                                          deterministic=deterministic_eval)

        try:
            model.learn(n_timesteps, callback=eval_callback)
            # Free memory
            model.env.close()
            eval_env.close()
        except AssertionError as e:
            # Sometimes, random hyperparams can generate NaN
            # Free memory
            model.env.close()
            eval_env.close()
            # Prune hyperparams that generate NaNs
            print(e)
            raise optuna.exceptions.TrialPruned()
        is_pruned = eval_callback.is_pruned
        reward = eval_callback.last_mean_reward

        del model.env, eval_env
        del model

        if is_pruned:
            raise optuna.exceptions.TrialPruned()

        return reward

    try:
        study.optimize(objective, n_trials=n_trials, n_jobs=n_jobs)
    except KeyboardInterrupt:
        pass

    print("Number of finished trials: ", len(study.trials))

    print("Best trial:")
    trial = study.best_trial

    print("Value: ", trial.value)

    print("Params: ")
    for key, value in trial.params.items():
        print(f"    {key}: {value}")

    return study.trials_dataframe()
Beispiel #7
0
def hyperparam_optimization(algo,
                            model_fn,
                            env_fn,
                            n_trials=10,
                            n_timesteps=5000,
                            hyperparams=None,
                            n_jobs=1,
                            sampler_method='random',
                            pruner_method='halving',
                            seed=0,
                            verbose=1,
                            timeout=None):
    """
    :param algo: (str)
    :param model_fn: (func) function that is used to instantiate the model
    :param env_fn: (func) function that is used to instantiate the env
    :param n_trials: (int) maximum number of trials for finding the best hyperparams
    :param n_timesteps: (int) maximum number of timesteps per trial
    :param hyperparams: (dict)
    :param n_jobs: (int) number of parallel jobs
    :param sampler_method: (str)
    :param pruner_method: (str)
    :param seed: (int)
    :param verbose: (int)
    :return: (pd.Dataframe) detailed result of the optimization
    """
    # TODO: eval each hyperparams several times to account for noisy evaluation
    # TODO: take into account the normalization (also for the test env -> sync obs_rms)
    if hyperparams is None:
        hyperparams = {}

    # test during 5 episodes
    n_test_episodes = 5
    # evaluate every 20th of the maximum budget per iteration
    n_evaluations = 20
    evaluate_interval = int(n_timesteps / n_evaluations)

    # n_warmup_steps: Disable pruner until the trial reaches the given number of step.
    if sampler_method == 'random':
        sampler = RandomSampler(seed=seed)
    elif sampler_method == 'tpe':
        sampler = TPESampler(n_startup_trials=5, seed=seed)
    elif sampler_method == 'skopt':
        # cf https://scikit-optimize.github.io/#skopt.Optimizer
        # GP: gaussian process
        # Gradient boosted regression: GBRT
        sampler = SkoptSampler(skopt_kwargs={
            'base_estimator': "GP",
            'acq_func': 'gp_hedge'
        })
    else:
        raise ValueError('Unknown sampler: {}'.format(sampler_method))

    if pruner_method == 'halving':
        pruner = SuccessiveHalvingPruner(min_resource=1,
                                         reduction_factor=4,
                                         min_early_stopping_rate=0)
    elif pruner_method == 'median':
        pruner = MedianPruner(n_startup_trials=5,
                              n_warmup_steps=n_evaluations // 3)
    elif pruner_method == 'none':
        # Do not prune
        pruner = MedianPruner(n_startup_trials=n_trials,
                              n_warmup_steps=n_evaluations)
    else:
        raise ValueError('Unknown pruner: {}'.format(pruner_method))

    if verbose > 0:
        print("Sampler: {} - Pruner: {}".format(sampler_method, pruner_method))

    study = optuna.create_study(sampler=sampler, pruner=pruner)
    algo_sampler = HYPERPARAMS_SAMPLER[algo]

    def objective(trial):

        kwargs = hyperparams.copy()

        trial.model_class = None
        if algo == 'her':
            trial.model_class = hyperparams['model_class']

        # Hack to use DDPG/TD3 noise sampler
        if algo in ['ddpg', 'td3'] or trial.model_class in ['ddpg', 'td3']:
            trial.n_actions = env_fn(n_envs=1).action_space.shape[0]
        kwargs.update(algo_sampler(trial))

        def callback(_locals, _globals):
            """
            Callback for monitoring learning progress.
            :param _locals: (dict)
            :param _globals: (dict)
            :return: (bool) If False: stop training
            """
            self_ = _locals['self']
            trial = self_.trial

            # Initialize variables
            if not hasattr(self_, 'is_pruned'):
                self_.is_pruned = False
                self_.last_mean_test_reward = -np.inf
                self_.last_time_evaluated = 0
                self_.eval_idx = 0

            if (self_.num_timesteps -
                    self_.last_time_evaluated) < evaluate_interval:
                return True

            self_.last_time_evaluated = self_.num_timesteps

            # Evaluate the trained agent on the test env
            rewards = []
            n_episodes, reward_sum = 0, 0.0

            # Sync the obs rms if using vecnormalize
            # NOTE: this does not cover all the possible cases
            if isinstance(self_.test_env, VecNormalize):
                self_.test_env.obs_rms = deepcopy(self_.env.obs_rms)
                # Do not normalize reward
                self_.test_env.norm_reward = False

            obs = self_.test_env.reset()
            while n_episodes < n_test_episodes:
                # Use default value for deterministic
                action, _ = self_.predict(obs)
                obs, reward, done, _ = self_.test_env.step(action)
                reward_sum += reward

                if done:
                    rewards.append(reward_sum)
                    reward_sum = 0.0
                    n_episodes += 1
                    obs = self_.test_env.reset()

            mean_reward = np.mean(rewards)
            self_.last_mean_test_reward = mean_reward
            self_.eval_idx += 1

            # report best or report current ?
            # report num_timesteps or elasped time ?
            trial.report(-1 * mean_reward, self_.eval_idx)
            # Prune trial if need
            if trial.should_prune(self_.eval_idx):
                self_.is_pruned = True
                return False

            return True

        model = model_fn(**kwargs)
        model.test_env = env_fn(n_envs=1)
        model.trial = trial
        if algo == 'her':
            model.model.trial = trial
            # Wrap the env if need to flatten the dict obs
            if isinstance(model.test_env, VecEnv):
                model.test_env = _UnvecWrapper(model.test_env)
            model.model.test_env = HERGoalEnvWrapper(model.test_env)

        try:
            model.learn(n_timesteps, callback=callback)
            # Free memory
            model.env.close()
            model.test_env.close()
        except AssertionError:
            # Sometimes, random hyperparams can generate NaN
            # Free memory
            model.env.close()
            model.test_env.close()
            raise
        is_pruned = False
        cost = np.inf
        if hasattr(model, 'is_pruned'):
            is_pruned = model.is_pruned
            cost = -1 * model.last_mean_test_reward
        del model.env, model.test_env
        del model

        if is_pruned:
            raise optuna.structs.TrialPruned()

        return cost

    try:
        study.optimize(objective,
                       n_trials=n_trials,
                       n_jobs=n_jobs,
                       timeout=timeout,
                       catch=((ValueError, AssertionError)))
    except KeyboardInterrupt:
        pass

    print('Number of finished trials: ', len(study.trials))

    print('Best trial:')
    trial = study.best_trial

    print('Value: ', trial.value)

    print('Params: ')
    for key, value in trial.params.items():
        print('    {}: {}'.format(key, value))

    return study.trials_dataframe()
Beispiel #8
0
    def objective(trial, ctf):
        learning_rate = trial.suggest_uniform('learning_rate', 0.0, 0.1)
        gamma = trial.suggest_uniform('gamma', 0.0, 0.1)
        dimensions = trial.suggest_int('dimensions', 750, 1000)
        x, y = calculate_fisher_discriminant(training_images_flat,
                                             training_labels, ctf)
        local_svm = SVMTree(x.shape[1],
                            list(range(10)),
                            learning_rate,
                            dimensions=dimensions,
                            gamma=gamma)
        return local_svm.train(x, y, 14)

    data = []
    for ctf in frange(0.0, 1.0, 0.2):
        study = optuna.create_study(pruner=MedianPruner())
        study.optimize(lambda trial: objective(trial, ctf), n_trials=25)
        best = study.best_params

        x, y = calculate_fisher_discriminant(training_images_flat,
                                             training_labels, ctf)
        svm = SVMTree(x.shape[1],
                      list(range(10)),
                      best['learning_rate'],
                      dimensions=best['dimensions'],
                      gamma=best['gamma'])
        t1 = time.time()
        svm.train(x, y, 14)
        t2 = time.time()
        adv_ex, bro_ex, grad_ex, peraccuracy = stage(test_images, test_labels,
                                                     'SVM_tree.pickle', 0.08,
def hyperparam_optimization(n_trials=60, n_timesteps=int(1e5),
                            n_jobs=1):
    """
    :param n_trials: (int) maximum number of trials for finding the best hyperparams
    :param n_timesteps: (int) maximum number of timesteps per trial
    :param n_jobs: (int) number of parallel jobs
    :return: (pd.Dataframe) detailed result of the optimization
    """

    n_startup_trials = 5

    sampler = TPESampler(n_startup_trials=n_startup_trials, seed=3)

    pruner = MedianPruner(n_startup_trials=n_startup_trials, n_warmup_steps=15)

    study = optuna.create_study(sampler=sampler, pruner=pruner)
    algo_sampler = sample_sac_params

    def objective(trial):

        kwargs = {}
        trial.model_class = None

        kwargs.update(algo_sampler(trial))

        eval_env = ft.envs.CitationNormal()
        env_train = ft.envs.CitationNormal()
        model = create_model(env_train, **kwargs)

        eval_callback = ft.agent.SaveOnBestReturn(eval_env=eval_env, eval_freq=2000, log_path='optimization_logs/tmp/',
                                         best_model_save_path='optimization_logs/tmp/', verbose=0)

        try:
            model.learn(n_timesteps, callback=eval_callback)
            # Free memory
            model.env.close()
            eval_env.close()
            env_train.close()
        except AssertionError:
            # Sometimes, random hyperparams can generate NaN
            # Free memory
            model.env.close()
            eval_env.close()
            env_train.close()
            raise optuna.exceptions.TrialPruned()
        except IndexError:
            model.env.close()
            eval_env.close()
            env_train.close()
            raise optuna.exceptions.TrialPruned()
        cost = -1 * eval_callback.best_reward

        del model.env, eval_env
        del model

        return cost

    try:
        study.optimize(objective, n_trials=n_trials, n_jobs=n_jobs)
    except KeyboardInterrupt:
        pass

    print('Number of finished trials: ', len(study.trials))

    print('Best trial:')
    trial = study.best_trial

    print('Value: ', trial.value)

    print('Params: ')
    for key, value in trial.params.items():
        print('    {}: {}'.format(key, value))

    return study.trials_dataframe()
Beispiel #10
0
    return evaluate_accuracy(model, valid_loader)


if __name__ == "__main__":
    """
    Create a study and save stduy results to sqlite (for other storages see documentaion) 
    later we can laod our study to examine results (an example in jupyter notebook )
    study = optuna.study.load_study(study_name=STUDY_NAME, storage='sqlite:///example.db')
    """
    sampler = TPESampler(seed=10)
    study = optuna.create_study(
        direction="maximize",  # maximaze or minimaze our objective
        sampler=sampler,  # parametrs sampling strategy
        pruner=MedianPruner(
            n_startup_trials=15,
            n_warmup_steps=5,  # let's say num epochs
            interval_steps=2,
        ),
        study_name=STUDY_NAME,
        storage=
        "sqlite:///example.db",  # storing study results, other storages are available too, see documentation.
        load_if_exists=True,
    )

    study.optimize(objective, n_trials=50)

    print("Number of finished trials: ", len(study.trials))

    print("Best trial:")
    trial = study.best_trial
Beispiel #11
0
def hyperparam_optimization(n_trials=20,
                            n_timesteps=100000,
                            hyperparams=None,
                            n_jobs=1,
                            sampler_method='random',
                            pruner_method='halving',
                            seed=1,
                            verbose=1):
    """
    :param algo: (str)
    :param model_fn: (func) function that is used to instantiate the model
    :param env_fn: (func) function that is used to instantiate the env
    :param n_trials: (int) maximum number of trials for finding the best hyperparams
    :param n_timesteps: (int) maximum number of timesteps per trial
    :param hyperparams: (dict)
    :param n_jobs: (int) number of parallel jobs
    :param sampler_method: (str)
    :param pruner_method: (str)
    :param seed: (int)
    :param verbose: (int)
    :return: (pd.Dataframe) detailed result of the optimization
    """
    # TODO: eval each hyperparams several times to account for noisy evaluation
    # TODO: take into account the normalization (also for the test env -> sync obs_rms)
    if hyperparams is None:
        hyperparams = {}

    # test during 3000 steps
    n_test_steps = 1500
    # evaluate every 20th of the maximum budget per iteration
    n_evaluations = 40
    evaluate_interval = int(n_timesteps / n_evaluations)

    # n_warmup_steps: Disable pruner until the trial reaches the given number of step.

    #sampler = RandomSampler(seed=seed)

    #sampler = TPESampler(n_startup_trials=5, seed=seed)

    sampler = SkoptSampler(skopt_kwargs={
        'base_estimator': "GP",
        'acq_func': 'gp_hedge'
    })

    #pruner = SuccessiveHalvingPruner(min_resource=1, reduction_factor=4, min_early_stopping_rate=0)

    pruner = MedianPruner(n_startup_trials=5,
                          n_warmup_steps=n_evaluations // 3)

    study = optuna.create_study(study_name="optimisation_PPO2",
                                sampler=sampler,
                                pruner=pruner,
                                storage='sqlite:///optimizationSAC.db',
                                load_if_exists=True)

    def objective(trial):

        kwargs = hyperparams.copy()

        trial.model_class = None

        kwargs.update(sample_td3_params(trial))

        def callback(_locals, _globals):
            """
            Callback for monitoring learning progress.
            :param _locals: (dict)
            :param _globals: (dict)
            :return: (bool) If False: stop training
            """
            self_ = _locals['self']
            trial = self_.trial

            # Initialize variables
            if not hasattr(self_, 'is_pruned'):
                self_.is_pruned = False
                self_.last_mean_test_reward = -np.inf
                self_.last_time_evaluated = 0
                self_.eval_idx = 0

            if (self_.num_timesteps -
                    self_.last_time_evaluated) < evaluate_interval:
                return True

            self_.last_time_evaluated = self_.num_timesteps

            # Evaluate the trained agent on the test env
            rewards = []
            n_steps_done, reward_sum = 0, 0.0

            # Sync the obs rms if using vecnormalize
            # NOTE: this does not cover all the possible cases
            if isinstance(self_.test_env, VecNormalize):
                self_.test_env.obs_rms = deepcopy(self_.env.obs_rms)
                self_.test_env.ret_rms = deepcopy(self_.env.ret_rms)
                # Do not normalize reward
                self_.test_env.norm_reward = False

            obs = self_.test_env.reset()
            while n_steps_done < n_test_steps:
                # Use default value for deterministic
                action, _ = self_.predict(obs, )
                obs, reward, done, _ = self_.test_env.step(action)
                reward_sum += reward
                n_steps_done += 1

                if done:
                    rewards.append(reward_sum)
                    reward_sum = 0.0
                    obs = self_.test_env.reset()
                    n_steps_done = n_test_steps
            rewards.append(reward_sum)
            mean_reward = np.mean(rewards)
            summary = tf.Summary(value=[
                tf.Summary.Value(tag='evaluation', simple_value=mean_reward)
            ])
            _locals['writer'].add_summary(summary, self_.num_timesteps)
            self_.last_mean_test_reward = mean_reward
            self_.eval_idx += 1

            # report best or report current ?
            # report num_timesteps or elasped time ?
            trial.report(-1 * mean_reward, self_.eval_idx)
            # Prune trial if need
            if trial.should_prune(self_.eval_idx):
                self_.is_pruned = True
                return False

            return True

        commands = [[1, 0], [2, 0], [3, 0]]
        env = DummyVecEnv([
            lambda: e.AidaBulletEnv(commands,
                                    render=True,
                                    on_rack=False,
                                    default_reward=2,
                                    height_weight=5,
                                    orientation_weight=3,
                                    direction_weight=2,
                                    speed_weight=4)
        ])

        model = TD3(MlpPolicy,
                    env,
                    gamma=kwargs['gamma'],
                    learning_rate=kwargs['learning_rate'],
                    batch_size=kwargs['batch_size'],
                    buffer_size=kwargs['buffer_size'],
                    train_freq=kwargs['train_freq'],
                    gradient_steps=kwargs['gradient_steps'],
                    action_noise=kwargs['action_noise'],
                    tensorboard_log="./optimisationSAC/logOPTI")

        model.test_env = DummyVecEnv([
            lambda: e.AidaBulletEnv(commands,
                                    render=False,
                                    on_rack=False,
                                    default_reward=2,
                                    height_weight=5,
                                    orientation_weight=3,
                                    direction_weight=2,
                                    speed_weight=2)
        ])

        model.trial = trial

        try:
            model.learn(n_timesteps, callback=callback)
            # Free memory
            model.env.close()
            model.test_env.close()
        except AssertionError:
            # Sometimes, random hyperparams can generate NaN
            # Free memory
            model.env.close()
            model.test_env.close()
            raise
        is_pruned = False
        cost = np.inf
        if hasattr(model, 'is_pruned'):
            is_pruned = model.is_pruned
            cost = -1 * model.last_mean_test_reward
        try:
            os.mkdir("./optimisationSAC/resultats/" + str(trial.number))
        except FileExistsError:
            print("Directory already exists")

        model.save("./optimisation/resultats/" + str(trial.number) + "/" +
                   str(trial.number))

        del model.env, model.test_env
        del model

        if is_pruned:
            try:
                # Optuna >= 0.19.0
                raise optuna.exceptions.TrialPruned()
            except AttributeError:
                raise optuna.structs.TrialPruned()

        return cost

    try:
        study.optimize(objective, n_trials=n_trials, n_jobs=n_jobs)
    except KeyboardInterrupt:
        pass

    print('Number of finished trials: ', len(study.trials))

    print('Best trial:')
    trial = study.best_trial

    print('Value: ', trial.value)

    print('Params: ')
    for key, value in trial.params.items():
        print('    {}: {}'.format(key, value))

    return study.trials_dataframe()
Beispiel #12
0
def hyperparam_optimization(algo,
                            model_fn,
                            env_fn,
                            n_trials=10,
                            n_timesteps=5000,
                            hyperparams=None,
                            n_jobs=1,
                            sampler_method='random',
                            pruner_method='halving',
                            n_startup_trials=10,
                            n_evaluations=20,
                            n_eval_episodes=1,
                            seed=0,
                            verbose=1):
    """
    :param algo: (str)
    :param model_fn: (func) function that is used to instantiate the model
    :param env_fn: (func) function that is used to instantiate the env
    :param n_trials: (int) maximum number of trials for finding the best hyperparams
    :param n_timesteps: (int) maximum number of timesteps per trial
    :param hyperparams: (dict)
    :param n_jobs: (int) number of parallel jobs
    :param sampler_method: (str)
    :param pruner_method: (str)
    :param n_startup_trials: (int)
    :param n_evaluations: (int) Evaluate every 20th of the maximum budget per iteration
    :param n_eval_episodes: (int) Evaluate the model during 5 episodes
    :param seed: (int)
    :param verbose: (int)
    :return: (pd.Dataframe) detailed result of the optimization
    """
    # TODO: eval each hyperparams several times to account for noisy evaluation
    if hyperparams is None:
        hyperparams = {}

    eval_freq = int(n_timesteps / n_evaluations)

    # n_warmup_steps: Disable pruner until the trial reaches the given number of step.
    if sampler_method == 'random':
        sampler = RandomSampler(seed=seed)
    elif sampler_method == 'tpe':
        sampler = TPESampler(n_startup_trials=n_startup_trials, seed=seed)
    elif sampler_method == 'skopt':
        # cf https://scikit-optimize.github.io/#skopt.Optimizer
        # GP: gaussian process
        # Gradient boosted regression: GBRT
        sampler = SkoptSampler(skopt_kwargs={
            'base_estimator': "GP",
            'acq_func': 'gp_hedge'
        })
    else:
        raise ValueError(f'Unknown sampler: {sampler_method}')

    if pruner_method == 'halving':
        pruner = SuccessiveHalvingPruner(min_resource=1,
                                         reduction_factor=4,
                                         min_early_stopping_rate=0)
    elif pruner_method == 'median':
        pruner = MedianPruner(n_startup_trials=n_startup_trials,
                              n_warmup_steps=n_evaluations // 3)
    elif pruner_method == 'none':
        # Do not prune
        pruner = MedianPruner(n_startup_trials=n_trials,
                              n_warmup_steps=n_evaluations)
    else:
        raise ValueError(f'Unknown pruner: {pruner_method}')

    if verbose > 0:
        print(f"Sampler: {sampler_method} - Pruner: {pruner_method}")

    study = optuna.create_study(sampler=sampler, pruner=pruner)
    algo_sampler = HYPERPARAMS_SAMPLER[algo]

    def objective(trial):

        kwargs = hyperparams.copy()

        trial.model_class = None
        if algo == 'her':
            trial.model_class = hyperparams['model_class']

        # Hack to use DDPG/TD3 noise sampler
        if algo in ['ddpg', 'td3'] or trial.model_class in ['ddpg', 'td3']:
            trial.n_actions = env_fn(n_envs=1).action_space.shape[0]
        kwargs.update(algo_sampler(trial))

        model = model_fn(**kwargs)
        model.trial = trial

        eval_env = env_fn(n_envs=1, eval_env=True)
        # Account for parallel envs
        eval_freq_ = max(eval_freq // model.get_env().num_envs, 1)
        # TODO: use non-deterministic eval for Atari?
        eval_callback = TrialEvalCallback(eval_env,
                                          trial,
                                          n_eval_episodes=n_eval_episodes,
                                          eval_freq=eval_freq_,
                                          deterministic=True)

        try:
            model.learn(n_timesteps, callback=eval_callback)
            # Free memory
            model.env.close()
            eval_env.close()
        except AssertionError as e:
            # Sometimes, random hyperparams can generate NaN
            # Free memory
            model.env.close()
            eval_env.close()
            # Prune hyperparams that generate NaNs
            print(e)
            raise optuna.exceptions.TrialPruned()
        is_pruned = eval_callback.is_pruned
        cost = -1 * eval_callback.last_mean_reward

        del model.env, eval_env
        del model

        if is_pruned:
            raise optuna.exceptions.TrialPruned()

        return cost

    try:
        study.optimize(objective, n_trials=n_trials, n_jobs=n_jobs)
    except KeyboardInterrupt:
        pass

    print('Number of finished trials: ', len(study.trials))

    print('Best trial:')
    trial = study.best_trial

    print('Value: ', trial.value)

    print('Params: ')
    for key, value in trial.params.items():
        print(f'    {key}: {value}')

    return study.trials_dataframe()
Beispiel #13
0
    N_iterations_max = 10_000
    early_stopping_rounds = 50

    if boosting_type == "dart":
        N_iterations_max = 100
        early_stopping_rounds = None

    cv_res = lgb.cv(
        params,
        lgb_data_train,
        num_boost_round=N_iterations_max,
        early_stopping_rounds=early_stopping_rounds,
        verbose_eval=False,
        seed=42,
        callbacks=[LightGBMPruningCallback(trial, "auc")],
    )

    num_boost_round = len(cv_res["auc-mean"])
    trial.set_user_attr("num_boost_round", num_boost_round)
    return cv_res["auc-mean"][-1]


#%%

study = optuna.create_study(
    direction="maximize",
    sampler=TPESampler(seed=42),
    pruner=MedianPruner(n_warmup_steps=50),
)

study.optimize(objective, n_trials=100, show_progress_bar=True)
Beispiel #14
0
        logging.info(
            'trial {} game {} result : win = {}, win count = {}, draw count = {}, win rate = {:.1f}%'
            .format(trial.number, n, win, win_count, draw_count,
                    win_rate * 100))

        # USIエンジン終了
        for p in procs:
            p.stdin.write(b'quit\n')
            p.stdin.flush()
            p.wait()

        # 見込みのない最適化ステップを打ち切り
        trial.report(-win_rate, n)
        if trial.should_prune(n):
            logging.info('trial {} game {} pruned'.format(trial.number, n))
            raise TrialPruned()

    # 勝率を負の値で返す
    return -win_rate


if args.storage:
    study = load_study(study_name='mcts_params_optimizer',
                       storage=args.storage,
                       pruner=MedianPruner(n_warmup_steps=args.n_warmup_steps))
else:
    study = create_study(pruner=MedianPruner(
        n_warmup_steps=args.n_warmup_steps))
study.optimize(objective, n_trials=args.trials)
Beispiel #15
0
    if nan_encountered:
        return float("nan")

    if eval_callback.is_pruned:
        raise optuna.exceptions.TrialPruned()

    return eval_callback.last_mean_reward


if __name__ == "__main__":
    # Set pytorch num threads to 1 for faster training
    torch.set_num_threads(1)

    sampler = TPESampler(n_startup_trials=N_STARTUP_TRIALS)
    # Do not prune before 1/3 of the max budget is used
    pruner = MedianPruner(n_startup_trials=N_STARTUP_TRIALS,
                          n_warmup_steps=N_EVALUATIONS // 3)

    study = optuna.create_study(sampler=sampler,
                                pruner=pruner,
                                direction="maximize")
    try:
        study.optimize(objective,
                       n_trials=N_TRIALS,
                       n_jobs=N_JOBS,
                       timeout=600)
    except KeyboardInterrupt:
        pass

    print("Number of finished trials: ", len(study.trials))

    print("Best trial:")
Beispiel #16
0
    algo.train(snapshot_mode='latest', seed=seed)

    # Evaluate
    min_rollouts = 1000
    sampler = ParallelSampler(env, policy, num_envs=20, min_rollouts=min_rollouts)
    ros = sampler.sample()
    mean_ret = sum([r.undiscounted_return() for r in ros])/min_rollouts

    return mean_ret


if __name__ == '__main__':
    # Parse command line arguments
    args = get_argparser().parse_args()

    ex_dir = setup_experiment('hyperparams', QBallBalancerSim.name, 'ppo_250Hz_actnorm', seed=args.seed)

    # Run hyper-parameter optimization
    name = f'{ex_dir.algo_name}_{ex_dir.add_info}'  # e.g. qbb_ppo_fnn_actnorm
    study = optuna.create_study(
        study_name=name,
        storage=f"sqlite:////{osp.join(pyrado.TEMP_DIR, ex_dir, f'{name}.db')}",
        direction='maximize',
        pruner=MedianPruner(),
        load_if_exists=True
    )
    study.optimize(functools.partial(train_and_eval, ex_dir=ex_dir, seed=args.seed), n_trials=100, n_jobs=6)

    # Save the best hyper-parameters
    save_list_of_dicts_to_yaml([study.best_params, dict(seed=args.seed)], ex_dir, 'best_hyperparams')
Beispiel #17
0
def optimize(env_id, params, args, session_path, session_id):
    n_trials = args.n_trials
    n_episodes_per_eval = args.n_episodes_per_eval

    seed = int(time())

    if args.sampler == 'random':
        sampler = RandomSampler(seed=seed)
    elif args.sampler == 'tpe':
        sampler = TPESampler(n_startup_trials=5, seed=seed)
    elif args.sampler == 'skopt':
        sampler = SkoptSampler(skopt_kwargs={'base_estimator': "GP", 'acq_func': 'gp_hedge'})
    else:
        raise ValueError('Unknown sampler: {}'.format(args.sampler))

    if args.pruner == 'halving':
        pruner = SuccessiveHalvingPruner(min_resource=1, reduction_factor=4, min_early_stopping_rate=0)
    elif args.pruner == 'median':
        pruner = MedianPruner(n_startup_trials=5)
    elif args.pruner == 'none':
        # Do not prune
        pruner = MedianPruner(n_startup_trials=n_trials)
    else:
        raise ValueError('Unknown pruner: {}'.format(args.pruner))

    study_name = args.optimizer_study_name if args.optimizer_study_name else f'{session_id}-optimizer_study'
    storage = f'sqlite:///{study_name}.db' if args.optimizer_use_db else None

    study = optuna.create_study(study_name=study_name,
                                storage=storage,
                                load_if_exists=True,
                                sampler=sampler,
                                pruner=pruner)

    # the objective function called by optuna during each trial
    def objective(trial):
        # copy to preserve original params
        _params = params.copy()
        _params['hyper_params'] = HYPERPARAMS_SAMPLER[args.algorithm.lower()](trial)

        # network architecture
        net_arch = trial.suggest_categorical('net_arch', ['8x8', '16x16', '32x32'])
        layers = map(int, net_arch.split('x'))
        policy_kwargs = dict(act_fun=tf.nn.relu, net_arch=list(layers))

        print(f'*** beginning trial {trial.number}')
        print('\thyper-parameters:')
        for param, value in _params['hyper_params'].items():
            print(f'\t\t{param}:{value}')
        print(f'\t\tnet_arch: {net_arch}')

        _params['save_dir'] = _params['save_dir'] / 'optimizer'

        try:
            # purge any previously saved models
            purge_model(_params, args, interactive=False)

            ######################################################
            # learning phase - on possibly multiple environments #
            ######################################################
            godot_instances = [GodotInstance(o_port, a_port) for o_port, a_port in
                               get_godot_instances(args.n_godot_instances)]
            env = create_env(args, env_id, godot_instances, _params, session_path)
            env = VecCheckNan(env, warn_once=False, raise_exception=True)

            # learn and save model
            model = init_model(session_path, _params, env, args, policy_kwargs=policy_kwargs)
            learn(env, model, _params, args, session_path)
            env.close()

            ##########################################################################
            # evaluation phase - single environment (deterministic action selection) #
            ##########################################################################
            env = create_env(args, env_id, [GODOT_EVAL_INSTANCE], _params, session_path, eval=True)
            env = VecCheckNan(env, warn_once=False, raise_exception=True)

            # loaded previously learned model and evaluate
            model = init_model(session_path, _params, env, args, eval=True)
            mean_reward, _ = evaluate(model, env, args, n_episodes=n_episodes_per_eval)
            env.close()

        except (AssertionError, ValueError) as e:
            print(f'pruning optimizer trial {trial} due to exception {e}')
            raise optuna.exceptions.TrialPruned()

        # optuna minimizes the objective by default, so we need to flip the sign to maximize
        cost = -1 * mean_reward
        return cost

    try:
        study.optimize(objective, n_trials)
    except KeyboardInterrupt:
        pass

    print('Number of finished trials: ', len(study.trials))
    print('Best trial:')
    trial = study.best_trial

    print('Value: ', trial.value)
    print('Params: ')
    for key, value in trial.params.items():
        print('    {}: {}'.format(key, value))

    return study.trials_dataframe()