コード例 #1
0
 def _make_pruner(self):
     if isinstance(self.pruner_method, str):
         if self.pruner_method == 'halving':
             pruner = SuccessiveHalvingPruner(
                 min_resource=self.n_timesteps // 6,
                 reduction_factor=4,
                 min_early_stopping_rate=0)
         elif self.pruner_method == 'median':
             pruner = MedianPruner(n_startup_trials=5,
                                   n_warmup_steps=self.n_timesteps // 6)
         elif self.pruner_method == 'none':
             # Do not prune
             pruner = NopPruner()
         else:
             raise ValueError('Unknown pruner: {}'.format(
                 self.pruner_method))
     elif isinstance(self.pruner_method, dict):
         method_copy = deepcopy(self.pruner_method)
         method = method_copy.pop('method')
         if method == 'halving':
             pruner = SuccessiveHalvingPruner(**method_copy)
         elif method == 'median':
             pruner = MedianPruner(**method_copy)
         elif method == 'none':
             # Do not prune
             pruner = NopPruner()
         else:
             raise ValueError('Unknown pruner: {}'.format(
                 self.pruner_method))
     else:
         raise ValueError("Wrong type for pruner settings!")
     return pruner
コード例 #2
0
def main():
    import optuna
    from optuna.pruners import SuccessiveHalvingPruner
    study = optuna.create_study(pruner=SuccessiveHalvingPruner())
    study.optimize(objective, n_trials=100)

    pruned_trials = [
        t for t in study.trials if t.state == optuna.structs.TrialState.PRUNED
    ]
    complete_trials = [
        t for t in study.trials
        if t.state == optuna.structs.TrialState.COMPLETE
    ]
    print('Study statistics: ')
    print('  Number of finished trials: ', len(study.trials))
    print('  Number of pruned trials: ', len(pruned_trials))
    print('  Number of complete trials: ', len(complete_trials))

    print('Best trial:')
    trial = study.best_trial

    print('  Value: ', trial.value)

    print('  Params: ')
    for key, value in trial.params.items():
        print('    {}: {}'.format(key, value))

    print('  User attrs:')
    for key, value in trial.user_attrs.items():
        print('    {}: {}'.format(key, value))
コード例 #3
0
 def _create_pruner(self, pruner_method: str) -> BasePruner:
     if pruner_method == "halving":
         pruner = SuccessiveHalvingPruner(min_resource=1, reduction_factor=4, min_early_stopping_rate=0)
     elif pruner_method == "median":
         pruner = MedianPruner(n_startup_trials=self.n_startup_trials, n_warmup_steps=self.n_evaluations // 3)
     elif pruner_method == "none":
         # Do not prune
         pruner = MedianPruner(n_startup_trials=self.n_trials, n_warmup_steps=self.n_evaluations)
     else:
         raise ValueError(f"Unknown pruner: {pruner_method}")
     return pruner
コード例 #4
0
def search_neural_arch(non_arch_config, checkpoint_dir=None):

    optuna.logging.set_verbosity(optuna.logging.FATAL)

    study = optuna.create_study(
        directions=["minimize", "maximize"],
        study_name=str(non_arch_config),
        sampler=BoTorchSampler(),
        pruner=SuccessiveHalvingPruner(),
        #         storage='sqlite:///na.db',
        storage="mysql://root@localhost/example",
        load_if_exists=True)

    study.optimize(
        partial(train_cifar, non_arch_config),
        n_trials=oom,
        #         n_jobs=4,
        gc_after_trial=True,
        callbacks=[nas_report])
コード例 #5
0
    def __init__(self,
                 estimators,
                 pipeline=None,
                 pipe_params=None,
                 scoring=None,
                 cv=3,
                 agg_func=np.mean,
                 refit=True,
                 tol=1e-5,
                 max_iter=50,
                 time_limit=None,
                 max_fails=3,
                 study_name=None,
                 save_cv_preds=False,
                 pruner=SuccessiveHalvingPruner(min_resource=3,
                                                reduction_factor=3),
                 sampler=TPESampler(**TPESampler.hyperopt_parameters()),
                 storage=None,
                 n_jobs=1,
                 verbose=1,
                 random_state=None):

        self.estimators = estimators
        self.pipeline = pipeline
        self.pipe_params = pipe_params
        self.scoring = scoring
        self.cv = cv
        self.agg_func = agg_func
        self.refit_ = refit
        self.tol = tol
        self.max_iter = max_iter
        self.time_limit = time_limit
        self.max_fails = max_fails
        self.study_name = study_name
        self.save_cv_preds = save_cv_preds
        self.pruner = pruner
        self.sampler = sampler
        self.storage = storage
        self.n_jobs = n_jobs
        self.verbose = verbose
        self.random_state = random_state
        self.best_estimator_ = None
コード例 #6
0
ファイル: main.py プロジェクト: eduidl/python-sandbox
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--epochs', type=int, default=10)
    parser.add_argument('--trials', type=int, default=5)
    args = parser.parse_args()

    study = optuna.create_study(direction='maximize',
                                pruner=SuccessiveHalvingPruner())
    study.optimize(objective_wrapper(args.epochs), n_trials=args.trials)

    print('Number of finished trials: ', len(study.trials))

    print('Best trial:')
    trial = study.best_trial

    print('  Value: ', trial.value)

    print('  Params: ')
    for key, value in trial.params.items():
        print(f'    {key}: {value}')

    study.trials_dataframe().to_csv(DIR / 'result.csv')
コード例 #7
0
def hyperparam_optimization(algo,
                            model_fn,
                            env_fn,
                            n_trials=10,
                            n_timesteps=5000,
                            hyperparams=None,
                            n_jobs=1,
                            sampler_method='random',
                            pruner_method='halving',
                            seed=0,
                            verbose=1):
    """
    :param algo: (str)
    :param model_fn: (func) function that is used to instantiate the model
    :param env_fn: (func) function that is used to instantiate the env
    :param n_trials: (int) maximum number of trials for finding the best hyperparams
    :param n_timesteps: (int) maximum number of timesteps per trial
    :param hyperparams: (dict)
    :param n_jobs: (int) number of parallel jobs
    :param sampler_method: (str)
    :param pruner_method: (str)
    :param seed: (int)
    :param verbose: (int)
    :return: (pd.Dataframe) detailed result of the optimization
    """

    # TODO: eval each hyperparams several times to account for noisy evaluation
    # TODO: take into account the normalization (also for the test env -> sync obs_rms)
    if hyperparams is None:
        hyperparams = {}

    n_startup_trials = 10
    # test during 5 episodes
    n_eval_episodes = 5
    # evaluate every 20th of the maximum budget per iteration
    n_evaluations = 20
    eval_freq = int(n_timesteps / n_evaluations)

    # n_warmup_steps: Disable pruner until the trial reaches the given number of step.
    if sampler_method == 'random':
        sampler = RandomSampler(seed=seed)
    elif sampler_method == 'tpe':
        sampler = TPESampler(n_startup_trials=n_startup_trials, seed=seed)
    elif sampler_method == 'skopt':
        # cf https://scikit-optimize.github.io/#skopt.Optimizer
        # GP: gaussian process
        # Gradient boosted regression: GBRT
        sampler = SkoptSampler(skopt_kwargs={
            'base_estimator': "GP",
            'acq_func': 'gp_hedge'
        })
    else:
        raise ValueError('Unknown sampler: {}'.format(sampler_method))

    if pruner_method == 'halving':
        pruner = SuccessiveHalvingPruner(min_resource=1,
                                         reduction_factor=4,
                                         min_early_stopping_rate=0)
    elif pruner_method == 'median':
        pruner = MedianPruner(n_startup_trials=n_startup_trials,
                              n_warmup_steps=n_evaluations // 3)
    elif pruner_method == 'none':
        # Do not prune
        pruner = MedianPruner(n_startup_trials=n_trials,
                              n_warmup_steps=n_evaluations)
    else:
        raise ValueError('Unknown pruner: {}'.format(pruner_method))

    if verbose > 0:
        print("Sampler: {} - Pruner: {}".format(sampler_method, pruner_method))

    study = optuna.create_study(sampler=sampler, pruner=pruner)
    algo_sampler = HYPERPARAMS_SAMPLER[algo]

    def objective(trial):

        kwargs = hyperparams.copy()

        trial.model_class = None
        if algo == 'her':
            trial.model_class = hyperparams['model_class']

        # Hack to use DDPG/TD3 noise sampler
        # if algo in ['ddpg', 'td3'] or trial.model_class in ['ddpg', 'td3']:
        if algo in ['ddpg', 'td3'] or trial.model_class in [
                DDPG, TD3
        ]:  # bug to report: changed by Pierre
            trial.n_actions = env_fn(n_envs=1).action_space.shape[0]
        kwargs.update(algo_sampler(trial))

        model = model_fn(**kwargs)

        eval_env = env_fn(n_envs=1, eval_env=True)
        # Account for parallel envs
        eval_freq_ = eval_freq
        if isinstance(model.get_env(), VecEnv):
            eval_freq_ = max(eval_freq // model.get_env().num_envs, 1)
        # TODO: use non-deterministic eval for Atari?
        eval_callback = TrialEvalCallback(eval_env,
                                          trial,
                                          n_eval_episodes=n_eval_episodes,
                                          eval_freq=eval_freq_,
                                          deterministic=True)

        if algo == 'her':
            # Wrap the env if need to flatten the dict obs
            if isinstance(eval_env, VecEnv):
                print("UNVECTORIZE ENV")
                eval_env = _UnvecWrapper(eval_env)
            # eval_env = HERGoalEnvWrapper(eval_env)  # commented by Pierre

        try:
            model.learn(n_timesteps, callback=eval_callback)
            # Free memory
            model.env.close()
            eval_env.close()
        except AssertionError:
            # Sometimes, random hyperparams can generate NaN
            # Free memory
            model.env.close()
            eval_env.close()
            raise optuna.exceptions.TrialPruned()
        is_pruned = eval_callback.is_pruned
        cost = -1 * eval_callback.last_mean_reward

        del model.env, eval_env
        del model

        if is_pruned:
            raise optuna.exceptions.TrialPruned()

        return cost

    try:
        study.optimize(objective, n_trials=n_trials, n_jobs=n_jobs)
    except KeyboardInterrupt:
        pass

    print('Number of finished trials: ', len(study.trials))

    print('Best trial:')
    trial = study.best_trial

    print('Value: ', trial.value)

    print('Params: ')
    for key, value in trial.params.items():
        print('    {}: {}'.format(key, value))

    ######## added by pierre
    best_params = trial.params
    print("best params: ", best_params)
    # print("best value: ", study.best_value)
    # print("best best trial: ", study.best_trial)

    # with open('hyperparameter.yml', 'w') as outfile:
    # yaml.dump(best_params, outfile)
    ########

    return study.trials_dataframe(), best_params
コード例 #8
0
def main(args):

    workdir = os.path.expanduser(args.tuning_directory)

    if os.path.exists(workdir) and not args.force:
        print("* error: %s exists." % workdir)
        exit(1)

    os.makedirs(workdir, exist_ok=True)

    init(args.seed, args.device)
    device = torch.device(args.device)

    print("[loading data]")
    chunks, chunk_lengths, targets, target_lengths = load_data(
        limit=args.chunks, directory=args.directory)
    split = np.floor(chunks.shape[0] * args.validation_split).astype(np.int32)
    train_dataset = ChunkDataSet(chunks[:split], chunk_lengths[:split],
                                 targets[:split], target_lengths[:split])
    test_dataset = ChunkDataSet(chunks[split:], chunk_lengths[split:],
                                targets[split:], target_lengths[split:])
    train_loader = DataLoader(train_dataset,
                              batch_size=args.batch,
                              shuffle=True,
                              num_workers=4,
                              pin_memory=True)
    test_loader = DataLoader(test_dataset,
                             batch_size=args.batch,
                             num_workers=4,
                             pin_memory=True)

    def objective(trial):

        config = toml.load(args.config)

        lr = 1e-3
        #config['block'][0]['stride'] = [trial.suggest_int('stride', 4, 6)]

        # C1
        config['block'][0]['kernel'] = [
            int(trial.suggest_discrete_uniform('c1_kernel', 1, 129, 2))
        ]
        config['block'][0]['filters'] = trial.suggest_int(
            'c1_filters', 1, 1024)

        # B1 - B5
        for i in range(1, 6):
            config['block'][i]['repeat'] = trial.suggest_int(
                'b%s_repeat' % i, 1, 9)
            config['block'][i]['filters'] = trial.suggest_int(
                'b%s_filters' % i, 1, 512)
            config['block'][i]['kernel'] = [
                int(trial.suggest_discrete_uniform('b%s_kernel' % i, 1, 129,
                                                   2))
            ]

        # C2
        config['block'][-2]['kernel'] = [
            int(trial.suggest_discrete_uniform('c2_kernel', 1, 129, 2))
        ]
        config['block'][-2]['filters'] = trial.suggest_int(
            'c2_filters', 1, 1024)

        # C3
        config['block'][-1]['kernel'] = [
            int(trial.suggest_discrete_uniform('c3_kernel', 1, 129, 2))
        ]
        config['block'][-1]['filters'] = trial.suggest_int(
            'c3_filters', 1, 1024)

        model = load_symbol(config, 'Model')(config)
        num_params = sum(p.numel() for p in model.parameters())

        print("[trial %s]" % trial.number)

        if num_params > args.max_params:
            print("[pruned] network too large")
            raise optuna.exceptions.TrialPruned()

        model.to(args.device)
        model.train()

        os.makedirs(workdir, exist_ok=True)

        optimizer = AdamW(model.parameters(), amsgrad=True, lr=lr)
        model, optimizer = amp.initialize(model,
                                          optimizer,
                                          opt_level="O1",
                                          verbosity=0)
        schedular = CosineAnnealingLR(optimizer,
                                      args.epochs * len(train_loader))

        for epoch in range(1, args.epochs + 1):

            try:
                train_loss, duration = train(model,
                                             device,
                                             train_loader,
                                             optimizer,
                                             use_amp=True)
                val_loss, val_mean, val_median = test(model, device,
                                                      test_loader)
                print(
                    "[epoch {}] directory={} loss={:.4f} mean_acc={:.3f}% median_acc={:.3f}%"
                    .format(epoch, workdir, val_loss, val_mean, val_median))
            except KeyboardInterrupt:
                exit()
            except:
                print("[pruned] exception")
                raise optuna.exceptions.TrialPruned()

            if np.isnan(val_loss): val_loss = 9.9
            trial.report(val_loss, epoch)

            if trial.should_prune():
                print("[pruned] unpromising")
                raise optuna.exceptions.TrialPruned()

        trial.set_user_attr('seed', args.seed)
        trial.set_user_attr('val_loss', val_loss)
        trial.set_user_attr('val_mean', val_mean)
        trial.set_user_attr('val_median', val_median)
        trial.set_user_attr('train_loss', train_loss)
        trial.set_user_attr('batchsize', args.batch)
        trial.set_user_attr('model_params', num_params)

        torch.save(model.state_dict(),
                   os.path.join(workdir, "weights_%s.tar" % trial.number))
        toml.dump(
            config,
            open(os.path.join(workdir, 'config_%s.toml' % trial.number), 'w'))

        print("[loss] %.4f" % val_loss)
        return val_loss

    print("[starting study]")

    optuna.logging.set_verbosity(optuna.logging.WARNING)

    study = optuna.create_study(direction='minimize',
                                storage='sqlite:///%s' %
                                os.path.join(workdir, 'tune.db'),
                                study_name='bonito-study',
                                load_if_exists=True,
                                pruner=SuccessiveHalvingPruner())

    study.optimize(objective, n_trials=args.trials)
コード例 #9
0
def hyperparam_optimization(
    algo,
    model_fn,
    env_fn,
    n_trials=10,
    n_timesteps=5000,
    hyperparams=None,  # noqa: C901
    n_jobs=1,
    sampler_method="tpe",
    pruner_method="median",
    n_startup_trials=10,
    n_evaluations=20,
    n_eval_episodes=5,
    storage=None,
    study_name=None,
    seed=0,
    verbose=1,
    deterministic_eval=True,
):
    """
    :param algo: (str)
    :param model_fn: (func) function that is used to instantiate the model
    :param env_fn: (func) function that is used to instantiate the env
    :param n_trials: (int) maximum number of trials for finding the best hyperparams
    :param n_timesteps: (int) maximum number of timesteps per trial
    :param hyperparams: (dict)
    :param n_jobs: (int) number of parallel jobs
    :param sampler_method: (str)
    :param pruner_method: (str)
    :param n_startup_trials: (int)
    :param n_evaluations: (int) Evaluate every 20th of the maximum budget per iteration
    :param n_eval_episodes: (int) Evaluate the model during 5 episodes
    :param storage: (Optional[str])
    :param study_name: (Optional[str])
    :param seed: (int)
    :param verbose: (int)
    :param deterministic_eval: (bool)
    :return: (pd.Dataframe) detailed result of the optimization
    """
    # TODO: eval each hyperparams several times to account for noisy evaluation
    if hyperparams is None:
        hyperparams = {}

    eval_freq = int(n_timesteps / n_evaluations)

    # n_warmup_steps: Disable pruner until the trial reaches the given number of step.
    if sampler_method == "random":
        sampler = RandomSampler(seed=seed)
    elif sampler_method == "tpe":
        # TODO: try with multivariate=True
        sampler = TPESampler(n_startup_trials=n_startup_trials, seed=seed)
    elif sampler_method == "skopt":
        # cf https://scikit-optimize.github.io/#skopt.Optimizer
        # GP: gaussian process
        # Gradient boosted regression: GBRT
        sampler = SkoptSampler(skopt_kwargs={
            "base_estimator": "GP",
            "acq_func": "gp_hedge"
        })
    else:
        raise ValueError(f"Unknown sampler: {sampler_method}")

    if pruner_method == "halving":
        pruner = SuccessiveHalvingPruner(min_resource=1,
                                         reduction_factor=4,
                                         min_early_stopping_rate=0)
    elif pruner_method == "median":
        pruner = MedianPruner(n_startup_trials=n_startup_trials,
                              n_warmup_steps=n_evaluations // 3)
    elif pruner_method == "none":
        # Do not prune
        pruner = MedianPruner(n_startup_trials=n_trials,
                              n_warmup_steps=n_evaluations)
    else:
        raise ValueError(f"Unknown pruner: {pruner_method}")

    if verbose > 0:
        print(f"Sampler: {sampler_method} - Pruner: {pruner_method}")

    study = optuna.create_study(sampler=sampler,
                                pruner=pruner,
                                storage=storage,
                                study_name=study_name,
                                load_if_exists=True,
                                direction="maximize")
    algo_sampler = HYPERPARAMS_SAMPLER[algo]

    def objective(trial):

        kwargs = hyperparams.copy()

        trial.model_class = None
        if algo == "her":
            trial.model_class = hyperparams["model_class"]

        # Hack to use DDPG/TD3 noise sampler
        if algo in ["ddpg", "td3"] or trial.model_class in ["ddpg", "td3"]:
            trial.n_actions = env_fn(n_envs=1).action_space.shape[0]
        kwargs.update(algo_sampler(trial))

        model = model_fn(**kwargs)
        model.trial = trial

        eval_env = env_fn(n_envs=1, eval_env=True)
        # Account for parallel envs
        eval_freq_ = max(eval_freq // model.get_env().num_envs, 1)
        # TODO: Use non-deterministic eval for Atari
        # or use maximum number of steps to avoid infinite loop
        eval_callback = TrialEvalCallback(eval_env,
                                          trial,
                                          n_eval_episodes=n_eval_episodes,
                                          eval_freq=eval_freq_,
                                          deterministic=deterministic_eval)

        try:
            model.learn(n_timesteps, callback=eval_callback)
            # Free memory
            model.env.close()
            eval_env.close()
        except AssertionError as e:
            # Sometimes, random hyperparams can generate NaN
            # Free memory
            model.env.close()
            eval_env.close()
            # Prune hyperparams that generate NaNs
            print(e)
            raise optuna.exceptions.TrialPruned()
        is_pruned = eval_callback.is_pruned
        reward = eval_callback.last_mean_reward

        del model.env, eval_env
        del model

        if is_pruned:
            raise optuna.exceptions.TrialPruned()

        return reward

    try:
        study.optimize(objective, n_trials=n_trials, n_jobs=n_jobs)
    except KeyboardInterrupt:
        pass

    print("Number of finished trials: ", len(study.trials))

    print("Best trial:")
    trial = study.best_trial

    print("Value: ", trial.value)

    print("Params: ")
    for key, value in trial.params.items():
        print(f"    {key}: {value}")

    return study.trials_dataframe()
コード例 #10
0
def hyperparam_optimization(algo,
                            model_fn,
                            env_fn,
                            n_trials=10,
                            n_timesteps=5000,
                            hyperparams=None,
                            n_jobs=1,
                            sampler_method='random',
                            pruner_method='halving',
                            seed=0,
                            verbose=1,
                            timeout=None):
    """
    :param algo: (str)
    :param model_fn: (func) function that is used to instantiate the model
    :param env_fn: (func) function that is used to instantiate the env
    :param n_trials: (int) maximum number of trials for finding the best hyperparams
    :param n_timesteps: (int) maximum number of timesteps per trial
    :param hyperparams: (dict)
    :param n_jobs: (int) number of parallel jobs
    :param sampler_method: (str)
    :param pruner_method: (str)
    :param seed: (int)
    :param verbose: (int)
    :return: (pd.Dataframe) detailed result of the optimization
    """
    # TODO: eval each hyperparams several times to account for noisy evaluation
    # TODO: take into account the normalization (also for the test env -> sync obs_rms)
    if hyperparams is None:
        hyperparams = {}

    # test during 5 episodes
    n_test_episodes = 5
    # evaluate every 20th of the maximum budget per iteration
    n_evaluations = 20
    evaluate_interval = int(n_timesteps / n_evaluations)

    # n_warmup_steps: Disable pruner until the trial reaches the given number of step.
    if sampler_method == 'random':
        sampler = RandomSampler(seed=seed)
    elif sampler_method == 'tpe':
        sampler = TPESampler(n_startup_trials=5, seed=seed)
    elif sampler_method == 'skopt':
        # cf https://scikit-optimize.github.io/#skopt.Optimizer
        # GP: gaussian process
        # Gradient boosted regression: GBRT
        sampler = SkoptSampler(skopt_kwargs={
            'base_estimator': "GP",
            'acq_func': 'gp_hedge'
        })
    else:
        raise ValueError('Unknown sampler: {}'.format(sampler_method))

    if pruner_method == 'halving':
        pruner = SuccessiveHalvingPruner(min_resource=1,
                                         reduction_factor=4,
                                         min_early_stopping_rate=0)
    elif pruner_method == 'median':
        pruner = MedianPruner(n_startup_trials=5,
                              n_warmup_steps=n_evaluations // 3)
    elif pruner_method == 'none':
        # Do not prune
        pruner = MedianPruner(n_startup_trials=n_trials,
                              n_warmup_steps=n_evaluations)
    else:
        raise ValueError('Unknown pruner: {}'.format(pruner_method))

    if verbose > 0:
        print("Sampler: {} - Pruner: {}".format(sampler_method, pruner_method))

    study = optuna.create_study(sampler=sampler, pruner=pruner)
    algo_sampler = HYPERPARAMS_SAMPLER[algo]

    def objective(trial):

        kwargs = hyperparams.copy()

        trial.model_class = None
        if algo == 'her':
            trial.model_class = hyperparams['model_class']

        # Hack to use DDPG/TD3 noise sampler
        if algo in ['ddpg', 'td3'] or trial.model_class in ['ddpg', 'td3']:
            trial.n_actions = env_fn(n_envs=1).action_space.shape[0]
        kwargs.update(algo_sampler(trial))

        def callback(_locals, _globals):
            """
            Callback for monitoring learning progress.
            :param _locals: (dict)
            :param _globals: (dict)
            :return: (bool) If False: stop training
            """
            self_ = _locals['self']
            trial = self_.trial

            # Initialize variables
            if not hasattr(self_, 'is_pruned'):
                self_.is_pruned = False
                self_.last_mean_test_reward = -np.inf
                self_.last_time_evaluated = 0
                self_.eval_idx = 0

            if (self_.num_timesteps -
                    self_.last_time_evaluated) < evaluate_interval:
                return True

            self_.last_time_evaluated = self_.num_timesteps

            # Evaluate the trained agent on the test env
            rewards = []
            n_episodes, reward_sum = 0, 0.0

            # Sync the obs rms if using vecnormalize
            # NOTE: this does not cover all the possible cases
            if isinstance(self_.test_env, VecNormalize):
                self_.test_env.obs_rms = deepcopy(self_.env.obs_rms)
                # Do not normalize reward
                self_.test_env.norm_reward = False

            obs = self_.test_env.reset()
            while n_episodes < n_test_episodes:
                # Use default value for deterministic
                action, _ = self_.predict(obs)
                obs, reward, done, _ = self_.test_env.step(action)
                reward_sum += reward

                if done:
                    rewards.append(reward_sum)
                    reward_sum = 0.0
                    n_episodes += 1
                    obs = self_.test_env.reset()

            mean_reward = np.mean(rewards)
            self_.last_mean_test_reward = mean_reward
            self_.eval_idx += 1

            # report best or report current ?
            # report num_timesteps or elasped time ?
            trial.report(-1 * mean_reward, self_.eval_idx)
            # Prune trial if need
            if trial.should_prune(self_.eval_idx):
                self_.is_pruned = True
                return False

            return True

        model = model_fn(**kwargs)
        model.test_env = env_fn(n_envs=1)
        model.trial = trial
        if algo == 'her':
            model.model.trial = trial
            # Wrap the env if need to flatten the dict obs
            if isinstance(model.test_env, VecEnv):
                model.test_env = _UnvecWrapper(model.test_env)
            model.model.test_env = HERGoalEnvWrapper(model.test_env)

        try:
            model.learn(n_timesteps, callback=callback)
            # Free memory
            model.env.close()
            model.test_env.close()
        except AssertionError:
            # Sometimes, random hyperparams can generate NaN
            # Free memory
            model.env.close()
            model.test_env.close()
            raise
        is_pruned = False
        cost = np.inf
        if hasattr(model, 'is_pruned'):
            is_pruned = model.is_pruned
            cost = -1 * model.last_mean_test_reward
        del model.env, model.test_env
        del model

        if is_pruned:
            raise optuna.structs.TrialPruned()

        return cost

    try:
        study.optimize(objective,
                       n_trials=n_trials,
                       n_jobs=n_jobs,
                       timeout=timeout,
                       catch=((ValueError, AssertionError)))
    except KeyboardInterrupt:
        pass

    print('Number of finished trials: ', len(study.trials))

    print('Best trial:')
    trial = study.best_trial

    print('Value: ', trial.value)

    print('Params: ')
    for key, value in trial.params.items():
        print('    {}: {}'.format(key, value))

    return study.trials_dataframe()
コード例 #11
0
def run(n_timesteps=train.shape[0], seed=42, n_trials=100):

    n_startup_trials = 10
    # evaluate every 20th of the maximum budget per iteration
    n_evaluations = 20
    eval_freq = int(n_timesteps / n_evaluations)

    sampler = TPESampler(n_startup_trials=n_startup_trials, seed=seed)
    pruner = SuccessiveHalvingPruner(min_resource=1,
                                     reduction_factor=4,
                                     min_early_stopping_rate=0)

    study = optuna.create_study(sampler=sampler,
                                pruner=pruner,
                                study_name=study_name,
                                storage=storage_name,
                                load_if_exists=True)

    def param_sampler(trial: Trial):
        batch_size = trial.suggest_categorical('batch_size',
                                               [32, 64, 128, 256])
        n_steps = trial.suggest_categorical(
            'n_steps', [16, 32, 64, 128, 256, 512, 1024, 2048])
        gamma = trial.suggest_categorical('gamma',
                                          [0.001, 0.01, 0.1, 0.2, 0.3, 0.5])
        learning_rate = trial.suggest_loguniform('lr', 1e-5, 1)
        ent_coef = trial.suggest_loguniform('ent_coef', 0.00000001, 0.1)
        cliprange = trial.suggest_categorical('cliprange',
                                              [0.1, 0.2, 0.3, 0.4])
        noptepochs = trial.suggest_categorical('noptepochs',
                                               [1, 5, 10, 20, 30, 50])
        lam = trial.suggest_categorical(
            'lambda', [0.8, 0.9, 0.92, 0.95, 0.98, 0.99, 1.0])

        if n_steps < batch_size:
            nminibatches = 1
        else:
            nminibatches = int(n_steps / batch_size)

        multiplicator_coef = trial.suggest_uniform("multiplicator_coef", 0.8,
                                                   1.2)
        reward_multiplicator = 100
        negative_reward_multiplicator = reward_multiplicator * multiplicator_coef

        features = [c for c in train.columns.values if "f_" in c
                    ] + ["feature_0", "weight"]

        train_py_env = MarketEnvDaily(
            trades=train,
            features=features,
            reward_column="resp",
            weight_column="weight",
            include_weight=True,
            reward_multiplicator=reward_multiplicator,
            negative_reward_multiplicator=negative_reward_multiplicator)

        train_env = DummyVecEnv([lambda: train_py_env])

        num_layers = trial.suggest_categorical("num_layers", [1, 2, 3])
        net_arch = []
        for i in range(num_layers):
            l = trial.suggest_categorical("layer_{i}".format(i=i), [1, 2, 3])
            net_arch.append(l * len(features))

        policy_kwargs = dict(act_fun=tf.nn.swish, net_arch=net_arch)

        return {
            'policy': MlpPolicy,
            'env': train_env,
            'n_steps': n_steps,
            'nminibatches': nminibatches,
            'gamma': gamma,
            'learning_rate': learning_rate,
            'ent_coef': ent_coef,
            'cliprange': cliprange,
            'noptepochs': noptepochs,
            'policy_kwargs': policy_kwargs,
            'lam': lam
        }

    def objective(trial):
        kwargs = param_sampler(trial)
        model = PPO2(**kwargs)
        eval_callback = TrialEvalCallback(train,
                                          eval_df,
                                          trial,
                                          eval_freq=eval_freq)

        try:
            model.learn(n_timesteps, callback=eval_callback)
            # Free memory
            model.env.close()
        except AssertionError:
            # Sometimes, random hyperparams can generate NaN
            # Free memory
            model.env.close()
            raise optuna.exceptions.TrialPruned()

        is_pruned = eval_callback.is_pruned
        sum_of_t_coef = -1 * eval_callback.sum_of_t_coef

        del model.env
        del model

        if is_pruned:
            raise optuna.exceptions.TrialPruned()

        return float(sum_of_t_coef)

    try:
        study.optimize(objective, n_trials=n_trials)
    except KeyboardInterrupt:
        pass

    print('Number of finished trials: ', len(study.trials))

    print('Best trial:')
    trial = study.best_trial

    print('Value: ', trial.value)

    print('Params: ')
    for key, value in trial.params.items():
        print('    {}: {}'.format(key, value))
コード例 #12
0
def hyperparam_optimization(algo,
                            model_fn,
                            env_fn,
                            n_trials=10,
                            n_timesteps=5000,
                            hyperparams=None,
                            n_jobs=1,
                            sampler_method='random',
                            pruner_method='halving',
                            n_startup_trials=10,
                            n_evaluations=20,
                            n_eval_episodes=1,
                            seed=0,
                            verbose=1):
    """
    :param algo: (str)
    :param model_fn: (func) function that is used to instantiate the model
    :param env_fn: (func) function that is used to instantiate the env
    :param n_trials: (int) maximum number of trials for finding the best hyperparams
    :param n_timesteps: (int) maximum number of timesteps per trial
    :param hyperparams: (dict)
    :param n_jobs: (int) number of parallel jobs
    :param sampler_method: (str)
    :param pruner_method: (str)
    :param n_startup_trials: (int)
    :param n_evaluations: (int) Evaluate every 20th of the maximum budget per iteration
    :param n_eval_episodes: (int) Evaluate the model during 5 episodes
    :param seed: (int)
    :param verbose: (int)
    :return: (pd.Dataframe) detailed result of the optimization
    """
    # TODO: eval each hyperparams several times to account for noisy evaluation
    if hyperparams is None:
        hyperparams = {}

    eval_freq = int(n_timesteps / n_evaluations)

    # n_warmup_steps: Disable pruner until the trial reaches the given number of step.
    if sampler_method == 'random':
        sampler = RandomSampler(seed=seed)
    elif sampler_method == 'tpe':
        sampler = TPESampler(n_startup_trials=n_startup_trials, seed=seed)
    elif sampler_method == 'skopt':
        # cf https://scikit-optimize.github.io/#skopt.Optimizer
        # GP: gaussian process
        # Gradient boosted regression: GBRT
        sampler = SkoptSampler(skopt_kwargs={
            'base_estimator': "GP",
            'acq_func': 'gp_hedge'
        })
    else:
        raise ValueError(f'Unknown sampler: {sampler_method}')

    if pruner_method == 'halving':
        pruner = SuccessiveHalvingPruner(min_resource=1,
                                         reduction_factor=4,
                                         min_early_stopping_rate=0)
    elif pruner_method == 'median':
        pruner = MedianPruner(n_startup_trials=n_startup_trials,
                              n_warmup_steps=n_evaluations // 3)
    elif pruner_method == 'none':
        # Do not prune
        pruner = MedianPruner(n_startup_trials=n_trials,
                              n_warmup_steps=n_evaluations)
    else:
        raise ValueError(f'Unknown pruner: {pruner_method}')

    if verbose > 0:
        print(f"Sampler: {sampler_method} - Pruner: {pruner_method}")

    study = optuna.create_study(sampler=sampler, pruner=pruner)
    algo_sampler = HYPERPARAMS_SAMPLER[algo]

    def objective(trial):

        kwargs = hyperparams.copy()

        trial.model_class = None
        if algo == 'her':
            trial.model_class = hyperparams['model_class']

        # Hack to use DDPG/TD3 noise sampler
        if algo in ['ddpg', 'td3'] or trial.model_class in ['ddpg', 'td3']:
            trial.n_actions = env_fn(n_envs=1).action_space.shape[0]
        kwargs.update(algo_sampler(trial))

        model = model_fn(**kwargs)
        model.trial = trial

        eval_env = env_fn(n_envs=1, eval_env=True)
        # Account for parallel envs
        eval_freq_ = max(eval_freq // model.get_env().num_envs, 1)
        # TODO: use non-deterministic eval for Atari?
        eval_callback = TrialEvalCallback(eval_env,
                                          trial,
                                          n_eval_episodes=n_eval_episodes,
                                          eval_freq=eval_freq_,
                                          deterministic=True)

        try:
            model.learn(n_timesteps, callback=eval_callback)
            # Free memory
            model.env.close()
            eval_env.close()
        except AssertionError as e:
            # Sometimes, random hyperparams can generate NaN
            # Free memory
            model.env.close()
            eval_env.close()
            # Prune hyperparams that generate NaNs
            print(e)
            raise optuna.exceptions.TrialPruned()
        is_pruned = eval_callback.is_pruned
        cost = -1 * eval_callback.last_mean_reward

        del model.env, eval_env
        del model

        if is_pruned:
            raise optuna.exceptions.TrialPruned()

        return cost

    try:
        study.optimize(objective, n_trials=n_trials, n_jobs=n_jobs)
    except KeyboardInterrupt:
        pass

    print('Number of finished trials: ', len(study.trials))

    print('Best trial:')
    trial = study.best_trial

    print('Value: ', trial.value)

    print('Params: ')
    for key, value in trial.params.items():
        print(f'    {key}: {value}')

    return study.trials_dataframe()
コード例 #13
0
ファイル: optimizer.py プロジェクト: ekimetrics/pyepidemics
    def run(
        self,
        true,
        space,
        init_state=None,
        objective_fn=None,
        n=100,
        early_stopping=None,
        timeout=None,
        constraint=None,
        callbacks=None,
        show_progress_bar=True,
        n_jobs=1,
        info=None,
        save=True,
        filename=None,
    ):

        # Verify n_jobs is equal to 1
        assert n_jobs == 1, f"Parallel optimization is not implemented yet"

        # Disable default logging of Optuna
        optuna.logging.disable_default_handler()

        # Prepare Optuna objective function
        if objective_fn is None:
            objective_fn = lambda params: self.model.objective(
                true, params, init_state=init_state, constraint=constraint)

        def objective(trial):
            params = self._sample(trial, space)
            return objective_fn(params)

        # Create Optuna study
        # Possibility here to change sampler and pruner
        sampler = TPESampler()
        pruner = HyperbandPruner()
        self.study = optuna.create_study(direction="minimize",
                                         pruner=SuccessiveHalvingPruner(),
                                         sampler=TPESampler())

        # Create callback
        pbar = tqdm(range(0, n), desc="Parameters Optimization")

        def custom_callback(study, trial):
            # Message
            pbar.set_postfix({
                "value": trial.value,
                "best_value": study.best_value
            })
            pbar.update()
            # Early stopping
            if early_stopping is not None:
                if trial.number - study.best_trial.number > early_stopping:
                    raise EarlyStoppingError("Stopping")

        # Run optimizer to find best parameters
        # Try except block allow for early stopping if best value has not changed since a given number of trials
        try:
            if callbacks is None: callbacks = []

            self.study.optimize(
                objective,
                n_trials=n,
                n_jobs=n_jobs,
                show_progress_bar=False,
                timeout=timeout,
                gc_after_trial=False,  # is it accelerating computation ?
                callbacks=[custom_callback] + callbacks)

        except EarlyStoppingError:
            print(
                f"... Early stopping - best value has not changed since {early_stopping} trials at {self.study.best_value}"
            )

        # Return best value
        best = self.study.best_params
        print(
            f"... Found best solution {best} for value {self.study.best_value}"
        )

        # Compute final loss
        loss_dict = self.model.objective(true,
                                         best,
                                         init_state,
                                         return_dict=True)
        if info is None:
            info = {}

        # Save parameters
        if save:
            self.save_params(filename,
                             message="Parameters calibration",
                             info={
                                 "on": true.columns.tolist(),
                                 "init_state": init_state,
                                 **loss_dict,
                                 **info
                             })

        return best
コード例 #14
0
ファイル: tune.py プロジェクト: sleeepyjack/bonito
def main(args):

    workdir = os.path.expanduser(args.tuning_directory)

    if os.path.exists(workdir) and not args.force:
        print("* error: %s exists." % workdir)
        exit(1)

    os.makedirs(workdir, exist_ok=True)

    init(args.seed, args.device)
    device = torch.device(args.device)

    print("[loading data]")
    train_data = load_data(limit=args.chunks, directory=args.directory)
    if os.path.exists(os.path.join(args.directory, 'validation')):
        valid_data = load_data(directory=os.path.join(args.directory,
                                                      'validation'),
                               limit=10000)
    else:
        print("[validation set not found: splitting training set]")
        split = np.floor(len(train_data[0]) * 0.97).astype(np.int32)
        valid_data = [x[split:] for x in train_data]
        train_data = [x[:split] for x in train_data]

    train_loader = DataLoader(ChunkDataSet(*train_data),
                              batch_size=args.batch,
                              shuffle=True,
                              num_workers=4,
                              pin_memory=True)
    test_loader = DataLoader(ChunkDataSet(*valid_data),
                             batch_size=args.batch,
                             num_workers=4,
                             pin_memory=True)

    def objective(trial):

        config = toml.load(args.config)

        lr = trial.suggest_loguniform('learning_rate', 1e-5, 1e-1)

        model = load_symbol(config, 'Model')(config)

        num_params = sum(p.numel() for p in model.parameters())

        print("[trial %s]" % trial.number)

        model.to(args.device)
        model.train()

        os.makedirs(workdir, exist_ok=True)

        scaler = GradScaler(enabled=True)
        optimizer = AdamW(model.parameters(), amsgrad=False, lr=lr)
        model, optimizer = amp.initialize(model,
                                          optimizer,
                                          opt_level="O1",
                                          verbosity=0)

        if hasattr(model, 'seqdist'):
            criterion = model.seqdist.ctc_loss
        else:
            criterion = None

        lr_scheduler = func_scheduler(
            optimizer,
            cosine_decay_schedule(1.0, decay),
            args.epochs * len(train_loader),
            warmup_steps=warmup_steps,
            warmup_ratio=warmup_ratio,
        )

        for epoch in range(1, args.epochs + 1):

            try:
                train_loss, duration = train(model,
                                             device,
                                             train_loader,
                                             optimizer,
                                             scaler=scaler,
                                             use_amp=True,
                                             criterion=criterion)
                val_loss, val_mean, val_median = test(model,
                                                      device,
                                                      test_loader,
                                                      criterion=criterion)
                print(
                    "[epoch {}] directory={} loss={:.4f} mean_acc={:.3f}% median_acc={:.3f}%"
                    .format(epoch, workdir, val_loss, val_mean, val_median))
            except KeyboardInterrupt:
                exit()
            except Exception as e:
                print("[pruned] exception")
                raise optuna.exceptions.TrialPruned()

            if np.isnan(val_loss): val_loss = 9.9
            trial.report(val_loss, epoch)

            if trial.should_prune():
                print("[pruned] unpromising")
                raise optuna.exceptions.TrialPruned()

        trial.set_user_attr('val_loss', val_loss)
        trial.set_user_attr('val_mean', val_mean)
        trial.set_user_attr('val_median', val_median)
        trial.set_user_attr('train_loss', train_loss)
        trial.set_user_attr('model_params', num_params)

        torch.save(model.state_dict(),
                   os.path.join(workdir, "weights_%s.tar" % trial.number))
        toml.dump(
            config,
            open(os.path.join(workdir, 'config_%s.toml' % trial.number), 'w'))

        print("[loss] %.4f" % val_loss)
        return val_loss

    print("[starting study]")

    optuna.logging.set_verbosity(optuna.logging.WARNING)

    study = optuna.create_study(direction='minimize',
                                storage='sqlite:///%s' %
                                os.path.join(workdir, 'tune.db'),
                                study_name='bonito-study',
                                load_if_exists=True,
                                pruner=SuccessiveHalvingPruner())

    study.optimize(objective, n_trials=args.trials)
コード例 #15
0
sock.listen(NUM_PAR)


def objective(trial):
    con, addr = sock.accept()
    x = trial.suggest_uniform('x', -10, 10)
    con.sendall(json.dumps(x).encode())
    d = con.recv(1024)
    con.close()
    return json.loads(d)


db_name = 'sqlite:///test.db'
study_name = str(uuid.uuid4())


def func():
    study = optuna.load_study(study_name=study_name, storage=db_name)
    study.optimize(objective, n_trials=10 // NUM_PAR)


study = optuna.create_study(study_name=study_name,
                            storage=db_name,
                            pruner=SuccessiveHalvingPruner())

with ThreadPoolExecutor(max_workers=NUM_PAR) as ex:
    for i in range(NUM_PAR):
        ex.submit(func)

study = optuna.load_study(study_name=study_name, storage=db_name)
print(study.best_params)
コード例 #16
0
ファイル: run.py プロジェクト: skugele/simple-animat-world
def optimize(env_id, params, args, session_path, session_id):
    n_trials = args.n_trials
    n_episodes_per_eval = args.n_episodes_per_eval

    seed = int(time())

    if args.sampler == 'random':
        sampler = RandomSampler(seed=seed)
    elif args.sampler == 'tpe':
        sampler = TPESampler(n_startup_trials=5, seed=seed)
    elif args.sampler == 'skopt':
        sampler = SkoptSampler(skopt_kwargs={'base_estimator': "GP", 'acq_func': 'gp_hedge'})
    else:
        raise ValueError('Unknown sampler: {}'.format(args.sampler))

    if args.pruner == 'halving':
        pruner = SuccessiveHalvingPruner(min_resource=1, reduction_factor=4, min_early_stopping_rate=0)
    elif args.pruner == 'median':
        pruner = MedianPruner(n_startup_trials=5)
    elif args.pruner == 'none':
        # Do not prune
        pruner = MedianPruner(n_startup_trials=n_trials)
    else:
        raise ValueError('Unknown pruner: {}'.format(args.pruner))

    study_name = args.optimizer_study_name if args.optimizer_study_name else f'{session_id}-optimizer_study'
    storage = f'sqlite:///{study_name}.db' if args.optimizer_use_db else None

    study = optuna.create_study(study_name=study_name,
                                storage=storage,
                                load_if_exists=True,
                                sampler=sampler,
                                pruner=pruner)

    # the objective function called by optuna during each trial
    def objective(trial):
        # copy to preserve original params
        _params = params.copy()
        _params['hyper_params'] = HYPERPARAMS_SAMPLER[args.algorithm.lower()](trial)

        # network architecture
        net_arch = trial.suggest_categorical('net_arch', ['8x8', '16x16', '32x32'])
        layers = map(int, net_arch.split('x'))
        policy_kwargs = dict(act_fun=tf.nn.relu, net_arch=list(layers))

        print(f'*** beginning trial {trial.number}')
        print('\thyper-parameters:')
        for param, value in _params['hyper_params'].items():
            print(f'\t\t{param}:{value}')
        print(f'\t\tnet_arch: {net_arch}')

        _params['save_dir'] = _params['save_dir'] / 'optimizer'

        try:
            # purge any previously saved models
            purge_model(_params, args, interactive=False)

            ######################################################
            # learning phase - on possibly multiple environments #
            ######################################################
            godot_instances = [GodotInstance(o_port, a_port) for o_port, a_port in
                               get_godot_instances(args.n_godot_instances)]
            env = create_env(args, env_id, godot_instances, _params, session_path)
            env = VecCheckNan(env, warn_once=False, raise_exception=True)

            # learn and save model
            model = init_model(session_path, _params, env, args, policy_kwargs=policy_kwargs)
            learn(env, model, _params, args, session_path)
            env.close()

            ##########################################################################
            # evaluation phase - single environment (deterministic action selection) #
            ##########################################################################
            env = create_env(args, env_id, [GODOT_EVAL_INSTANCE], _params, session_path, eval=True)
            env = VecCheckNan(env, warn_once=False, raise_exception=True)

            # loaded previously learned model and evaluate
            model = init_model(session_path, _params, env, args, eval=True)
            mean_reward, _ = evaluate(model, env, args, n_episodes=n_episodes_per_eval)
            env.close()

        except (AssertionError, ValueError) as e:
            print(f'pruning optimizer trial {trial} due to exception {e}')
            raise optuna.exceptions.TrialPruned()

        # optuna minimizes the objective by default, so we need to flip the sign to maximize
        cost = -1 * mean_reward
        return cost

    try:
        study.optimize(objective, n_trials)
    except KeyboardInterrupt:
        pass

    print('Number of finished trials: ', len(study.trials))
    print('Best trial:')
    trial = study.best_trial

    print('Value: ', trial.value)
    print('Params: ')
    for key, value in trial.params.items():
        print('    {}: {}'.format(key, value))

    return study.trials_dataframe()