def objective_fn(
        trial: Trial,
        device: int,
        direction: str,
        target_metric: str,
        base_serialization_dir: str,
):
    embedding_dim = trial.suggest_int("embedding_dim", 128, 256)
    max_filter_size = trial.suggest_int("max_filter_size", 3, 6)
    num_filters = trial.suggest_int("num_filters", 128, 256)
    output_dim = trial.suggest_int("output_dim", 128, 512)
    dropout = trial.suggest_float("dropout", 0, 1.0, log=False)
    lr = trial.suggest_float("lr", 1e-4, 1e-1, log=True)

    train_dataset, valid_dataset, vocab = prepare_data()
    model = create_model(vocab, embedding_dim, max_filter_size, num_filters, output_dim, dropout)

    if device > -1:
        model.to(torch.device("cuda:{}".format(device)))

    optimizer = SGD(model.parameters(), lr=lr)
    data_loader = DataLoader(train_dataset, batch_size=10, collate_fn=allennlp_collate)
    validation_data_loader = DataLoader(valid_dataset, batch_size=64, collate_fn=allennlp_collate)
    serialization_dir = os.path.join(base_serialization_dir, "trial_{}".format(trial.number))
    trainer = GradientDescentTrainer(
        model=model,
        optimizer=optimizer,
        data_loader=data_loader,
        validation_data_loader=validation_data_loader,
        validation_metric=("+" if direction == "MAXIMIZE" else "-") + target_metric,
        patience=None,  # `patience=None` since it could conflict with AllenNLPPruningCallback
        num_epochs=50,
        cuda_device=device,
        serialization_dir=serialization_dir,
        epoch_callbacks=[AllenNLPPruningCallback(trial, f"validation_{target_metric}")],
    )
    vocab.save_to_files(os.path.join(serialization_dir, "vocabulary"))
    return trainer.train()[f"best_validation_{target_metric}"]
Beispiel #2
0
class AllenNLPPruningCallback(EpochCallback):
    """AllenNLP callback to prune unpromising trials.

    See `the example <https://github.com/optuna/optuna/blob/master/
    examples/allennlp/allennlp_simple.py>`__
    if you want to add a proning callback which observes a metric.

    You can also see the tutorial of our AllenNLP integration on
    `AllenNLP Guide <https://guide.allennlp.org/hyperparameter-optimization>`_.

    .. note::
        When :class:`~optuna.integration.AllenNLPPruningCallback` is instantiated in Python script,
        trial and monitor are mandatory.

        On the other hand, when :class:`~optuna.integration.AllenNLPPruningCallback` is used with
        :class:`~optuna.integration.AllenNLPExecutor`, ``trial`` and ``monitor``
        would be ``None``. :class:`~optuna.integration.AllenNLPExecutor` sets
        environment variables for a study name, trial id, monitor, and storage.
        Then :class:`~optuna.integration.AllenNLPPruningCallback`
        loads them to restore ``trial`` and ``monitor``.

    Args:
        trial:
            A :class:`~optuna.trial.Trial` corresponding to the current evaluation of the
            objective function.
        monitor:
            An evaluation metric for pruning, e.g. ``validation_loss`` or
            ``validation_accuracy``.

    """
    def __init__(
        self,
        trial: Optional[optuna.trial.Trial] = None,
        monitor: Optional[str] = None,
    ):
        _imports.check()

        if allennlp.__version__ < "1.0.0":
            raise Exception(
                "AllenNLPPruningCallback requires `allennlp`>=1.0.0.")

        # When `AllenNLPPruningCallback` is instantiated in Python script,
        # trial and monitor should not be `None`.
        if trial is not None and monitor is not None:
            self._trial = trial
            self._monitor = monitor

        # When `AllenNLPPruningCallback` is used with `AllenNLPExecutor`,
        # `trial` and `monitor` would be None. `AllenNLPExecutor` sets information
        # for a study name, trial id, monitor, and storage in environment variables.
        else:
            environment_variables = _get_environment_variables_for_trial()
            study_name = environment_variables["study_name"]
            trial_id = environment_variables["trial_id"]
            monitor = environment_variables["monitor"]
            storage = environment_variables["storage"]

            if study_name is None or trial_id is None or monitor is None or storage is None:
                message = (
                    "Fail to load study. Perhaps you attempt to use `AllenNLPPruningCallback`"
                    " without `AllenNLPExecutor`. If you want to use a callback"
                    " without an executor, you have to instantiate a callback with"
                    "`trial` and `monitor. Please see the Optuna example: https://github.com/"
                    "optuna/optuna/blob/master/examples/allennlp/allennlp_simple.py."
                )
                raise RuntimeError(message)

            else:
                # If `stoage` is empty despite `study_name`, `trial_id`,
                # and `monitor` are not `None`, users attempt to use `AllenNLPPruningCallback`
                # with `AllenNLPExecutor` and in-memory storage.
                # `AllenNLPruningCallback` needs RDB or Redis storages to work.
                if storage == "":
                    message = (
                        "If you want to use AllenNLPExecutor and AllenNLPPruningCallback,"
                        " you have to use RDB or Redis storage.")
                    raise RuntimeError(message)

                study = load_study(study_name,
                                   storage,
                                   pruner=_create_pruner())
                self._trial = Trial(study, int(trial_id))
                self._monitor = monitor

    def __call__(
        self,
        trainer: "allennlp.training.GradientDescentTrainer",
        metrics: Dict[str, Any],
        epoch: int,
        is_master: bool,
    ) -> None:
        value = metrics.get(self._monitor)
        if value is None:
            return

        self._trial.report(float(value), epoch)
        if self._trial.should_prune():
            raise optuna.TrialPruned()
Beispiel #3
0
def get_params(trial: Trial, tunable_params: Dict, default_params: Dict):
    defaults = default_params.copy()
    for key in tunable_params:
        args = tunable_params[key]
        defaults[key] = trial.suggest_float(name=key, **args)
    return defaults
Beispiel #4
0
    def __call__(self, trial: Trial) -> Optional[float]:
        """Suggest parameters then train the model."""
        if self.model_kwargs is not None:
            problems = [
                x
                for x in ('loss', 'regularizer', 'optimizer', 'training', 'negative_sampler', 'stopper')
                if x in self.model_kwargs
            ]
            if problems:
                raise ValueError(f'model_kwargs should not have: {problems}. {self}')

        # 2. Model
        _model_kwargs = _get_kwargs(
            trial=trial,
            prefix='model',
            default_kwargs_ranges=self.model.hpo_default,
            kwargs=self.model_kwargs,
            kwargs_ranges=self.model_kwargs_ranges,
        )

        try:
            loss_default_kwargs_ranges = self.loss.hpo_default
        except AttributeError:
            logger.warning('using a loss function with no hpo_default field: %s', self.loss)
            loss_default_kwargs_ranges = {}

        # 3. Loss
        _loss_kwargs = _get_kwargs(
            trial=trial,
            prefix='loss',
            default_kwargs_ranges=loss_default_kwargs_ranges,
            kwargs=self.loss_kwargs,
            kwargs_ranges=self.loss_kwargs_ranges,
        )
        # 4. Regularizer
        _regularizer_kwargs: Optional[Mapping[str, Any]]
        if self.regularizer is None:
            _regularizer_kwargs = {}
        else:
            _regularizer_kwargs = _get_kwargs(
                trial=trial,
                prefix='regularizer',
                default_kwargs_ranges=self.regularizer.hpo_default,
                kwargs=self.regularizer_kwargs,
                kwargs_ranges=self.regularizer_kwargs_ranges,
            )
        # 5. Optimizer
        _optimizer_kwargs = _get_kwargs(
            trial=trial,
            prefix='optimizer',
            default_kwargs_ranges=optimizers_hpo_defaults[self.optimizer],
            kwargs=self.optimizer_kwargs,
            kwargs_ranges=self.optimizer_kwargs_ranges,
        )

        _negative_sampler_kwargs: Mapping[str, Any]
        if self.training_loop is not SLCWATrainingLoop:
            _negative_sampler_kwargs = {}
        else:
            _negative_sampler_kwargs = _get_kwargs(
                trial=trial,
                prefix='negative_sampler',
                default_kwargs_ranges={} if self.negative_sampler is None else self.negative_sampler.hpo_default,
                kwargs=self.negative_sampler_kwargs,
                kwargs_ranges=self.negative_sampler_kwargs_ranges,
            )

        _training_kwargs = _get_kwargs(
            trial=trial,
            prefix='training',
            default_kwargs_ranges=self.training_loop.hpo_default,
            kwargs=self.training_kwargs,
            kwargs_ranges=self.training_kwargs_ranges,
        )

        _stopper_kwargs = dict(self.stopper_kwargs or {})
        if self.stopper is not None and issubclass(self.stopper, EarlyStopper):
            self._update_stopper_callbacks(_stopper_kwargs, trial)

        try:
            result = pipeline(
                # 1. Dataset
                dataset=self.dataset,
                dataset_kwargs=self.dataset_kwargs,
                training=self.training,
                testing=self.testing,
                validation=self.validation,
                evaluation_entity_whitelist=self.evaluation_entity_whitelist,
                evaluation_relation_whitelist=self.evaluation_relation_whitelist,
                # 2. Model
                model=self.model,
                model_kwargs=_model_kwargs,
                # 3. Loss
                loss=self.loss,
                loss_kwargs=_loss_kwargs,
                # 4. Regularizer
                regularizer=self.regularizer,
                regularizer_kwargs=_regularizer_kwargs,
                clear_optimizer=True,
                # 5. Optimizer
                optimizer=self.optimizer,
                optimizer_kwargs=_optimizer_kwargs,
                # 6. Training Loop
                training_loop=self.training_loop,
                negative_sampler=self.negative_sampler,
                negative_sampler_kwargs=_negative_sampler_kwargs,
                # 7. Training
                training_loop_kwargs=self.training_loop_kwargs,
                training_kwargs=_training_kwargs,
                stopper=self.stopper,
                stopper_kwargs=_stopper_kwargs,
                # 8. Evaluation
                evaluator=self.evaluator,
                evaluator_kwargs=self.evaluator_kwargs,
                evaluation_kwargs=self.evaluation_kwargs,
                filter_validation_when_testing=self.filter_validation_when_testing,
                # 9. Tracker
                result_tracker=self.result_tracker,
                result_tracker_kwargs=self.result_tracker_kwargs,
                # Misc.
                use_testing_data=False,  # use validation set during HPO!
                device=self.device,
            )
        except (MemoryError, RuntimeError) as e:
            trial.set_user_attr('failure', str(e))
            # Will trigger Optuna to set the state of the trial as failed
            return None
        else:
            if self.save_model_directory:
                model_directory = os.path.join(self.save_model_directory, str(trial.number))
                os.makedirs(model_directory, exist_ok=True)
                result.save_to_directory(model_directory)

            trial.set_user_attr('random_seed', result.random_seed)

            for k, v in result.metric_results.to_flat_dict().items():
                trial.set_user_attr(k, v)

            return result.metric_results.get_metric(self.metric)
Beispiel #5
0
def train_and_eval(trial: optuna.Trial, ex_dir: str, seed: [int, None]):
    """
    Objective function for the Optuna `Study` to maximize.

    .. note::
        Optuna expects only the `trial` argument, thus we use `functools.partial` to sneak in custom arguments.

    :param trial: Optuna Trial object for hyper-parameter optimization
    :param ex_dir: experiment's directory, i.e. the parent directory for all trials in this study
    :param seed: seed value for the random number generators, pass `None` for no seeding
    :return: objective function value
    """
    # Synchronize seeds between Optuna trials
    pyrado.set_seed(seed)

    # Environment
    env = QBallBalancerSim(dt=1/250., max_steps=1500)
    env = ActNormWrapper(env)

    # Policy
    policy = FNNPolicy(
        spec=env.spec,
        hidden_sizes=trial.suggest_categorical('hidden_sizes_policy', [[16, 16], [32, 32], [64, 64]]),
        hidden_nonlin=fcn_from_str(trial.suggest_categorical('hidden_nonlin_policy', ['to_tanh', 'to_relu'])),
    )

    # Critic
    value_fcn = FNN(
        input_size=env.obs_space.flat_dim,
        output_size=1,
        hidden_sizes=trial.suggest_categorical('hidden_sizes_critic', [[16, 16], [32, 32], [64, 64]]),
        hidden_nonlin=fcn_from_str(trial.suggest_categorical('hidden_nonlin_critic', ['to_tanh', 'to_relu'])),
    )
    critic_hparam = dict(
        gamma=trial.suggest_uniform('gamma_critic', 0.99, 1.),
        lamda=trial.suggest_uniform('lamda_critic', 0.95, 1.),
        num_epoch=trial.suggest_int('num_epoch_critic', 1, 10),
        batch_size=100,
        lr=trial.suggest_loguniform('lr_critic', 1e-5, 1e-3),
        standardize_adv=trial.suggest_categorical('standardize_adv_critic', [True, False]),
        # max_grad_norm=5.,
        # lr_scheduler=scheduler.StepLR,
        # lr_scheduler_hparam=dict(step_size=10, gamma=0.9)
        # lr_scheduler=scheduler.ExponentialLR,
        # lr_scheduler_hparam=dict(gamma=0.99)
    )
    critic = GAE(value_fcn, **critic_hparam)

    # Algorithm
    algo_hparam = dict(
        num_sampler_envs=1,  # parallelize via optuna n_jobs
        max_iter=500,
        min_steps=25*env.max_steps,
        num_epoch=trial.suggest_int('num_epoch_algo', 1, 10),
        eps_clip=trial.suggest_uniform('eps_clip_algo', 0.05, 0.2),
        batch_size=100,
        std_init=0.9,
        lr=trial.suggest_loguniform('lr_algo', 1e-5, 1e-3),
        # max_grad_norm=5.,
        # lr_scheduler=scheduler.StepLR,
        # lr_scheduler_hparam=dict(step_size=10, gamma=0.9)
        # lr_scheduler=scheduler.ExponentialLR,
        # lr_scheduler_hparam=dict(gamma=0.99)
    )
    algo = PPO(osp.join(ex_dir, f'trial_{trial.number}'), env, policy, critic, **algo_hparam)

    # Train without saving the results
    algo.train(snapshot_mode='latest', seed=seed)

    # Evaluate
    min_rollouts = 1000
    sampler = ParallelSampler(env, policy, num_envs=20, min_rollouts=min_rollouts)
    ros = sampler.sample()
    mean_ret = sum([r.undiscounted_return() for r in ros])/min_rollouts

    return mean_ret
Beispiel #6
0
def sample_dqn_params(trial: optuna.Trial) -> Dict[str, Any]:
    """
    Sampler for DQN hyperparams.

    :param trial:
    :return:
    """
    gamma = trial.suggest_categorical(
        "gamma", [0.9, 0.95, 0.98, 0.99, 0.995, 0.999, 0.9999])
    learning_rate = trial.suggest_loguniform("lr", 1e-5, 1)
    batch_size = trial.suggest_categorical("batch_size",
                                           [16, 32, 64, 100, 128, 256, 512])
    buffer_size = trial.suggest_categorical(
        "buffer_size",
        [int(1e4), int(5e4), int(1e5), int(1e6)])
    exploration_final_eps = trial.suggest_uniform("exploration_final_eps", 0,
                                                  0.2)
    exploration_fraction = trial.suggest_uniform("exploration_fraction", 0,
                                                 0.5)
    target_update_interval = trial.suggest_categorical(
        "target_update_interval", [1, 1000, 5000, 10000, 15000, 20000])
    learning_starts = trial.suggest_categorical("learning_starts",
                                                [0, 1000, 5000, 10000, 20000])

    train_freq = trial.suggest_categorical("train_freq",
                                           [1, 4, 8, 16, 128, 256, 1000])
    subsample_steps = trial.suggest_categorical("subsample_steps",
                                                [1, 2, 4, 8])
    gradient_steps = max(train_freq // subsample_steps, 1)
    n_episodes_rollout = -1

    net_arch = trial.suggest_categorical("net_arch",
                                         ["tiny", "small", "medium"])

    net_arch = {
        "tiny": [64],
        "small": [64, 64],
        "medium": [256, 256]
    }[net_arch]

    hyperparams = {
        "gamma": gamma,
        "learning_rate": learning_rate,
        "batch_size": batch_size,
        "buffer_size": buffer_size,
        "train_freq": train_freq,
        "gradient_steps": gradient_steps,
        "n_episodes_rollout": n_episodes_rollout,
        "exploration_fraction": exploration_fraction,
        "exploration_final_eps": exploration_final_eps,
        "target_update_interval": target_update_interval,
        "learning_starts": learning_starts,
        "policy_kwargs": dict(net_arch=net_arch),
    }

    return hyperparams
Beispiel #7
0
def sample_ppo_params(trial: optuna.Trial) -> Dict[str, Any]:
    """
    Sampler for PPO2 hyperparams.

    :param trial:
    :return:
    """
    batch_size = trial.suggest_categorical("batch_size",
                                           [8, 16, 32, 64, 128, 256, 512])
    n_steps = trial.suggest_categorical(
        "n_steps", [8, 16, 32, 64, 128, 256, 512, 1024, 2048])
    gamma = trial.suggest_categorical(
        "gamma", [0.9, 0.95, 0.98, 0.99, 0.995, 0.999, 0.9999])
    learning_rate = trial.suggest_loguniform("lr", 1e-5, 1)
    lr_schedule = "constant"
    # lr_schedule = trial.suggest_categorical('lr_schedule', ['linear', 'constant'])
    ent_coef = trial.suggest_loguniform("ent_coef", 0.00000001, 0.1)
    clip_range = trial.suggest_categorical("clip_range", [0.1, 0.2, 0.3, 0.4])
    n_epochs = trial.suggest_categorical("n_epochs", [1, 5, 10, 20])
    gae_lambda = trial.suggest_categorical(
        "gae_lambda", [0.8, 0.9, 0.92, 0.95, 0.98, 0.99, 1.0])
    max_grad_norm = trial.suggest_categorical(
        "max_grad_norm", [0.3, 0.5, 0.6, 0.7, 0.8, 0.9, 1, 2, 5])
    vf_coef = trial.suggest_uniform("vf_coef", 0, 1)
    net_arch = trial.suggest_categorical("net_arch", ["small", "medium"])
    log_std_init = trial.suggest_uniform("log_std_init", -4, 1)
    sde_sample_freq = trial.suggest_categorical("sde_sample_freq",
                                                [-1, 8, 16, 32, 64, 128, 256])
    ortho_init = False
    # ortho_init = trial.suggest_categorical('ortho_init', [False, True])
    # activation_fn = trial.suggest_categorical('activation_fn', ['tanh', 'relu', 'elu', 'leaky_relu'])
    activation_fn = trial.suggest_categorical("activation_fn",
                                              ["tanh", "relu"])

    # TODO: account when using multiple envs
    if batch_size > n_steps:
        batch_size = n_steps

    if lr_schedule == "linear":
        learning_rate = linear_schedule(learning_rate)

    net_arch = {
        "small": [dict(pi=[64, 64], vf=[64, 64])],
        "medium": [dict(pi=[256, 256], vf=[256, 256])],
    }[net_arch]

    activation_fn = {
        "tanh": nn.Tanh,
        "relu": nn.ReLU,
        "elu": nn.ELU,
        "leaky_relu": nn.LeakyReLU
    }[activation_fn]

    return {
        "n_steps":
        n_steps,
        "batch_size":
        batch_size,
        "gamma":
        gamma,
        "learning_rate":
        learning_rate,
        "ent_coef":
        ent_coef,
        "clip_range":
        clip_range,
        "n_epochs":
        n_epochs,
        "gae_lambda":
        gae_lambda,
        "max_grad_norm":
        max_grad_norm,
        "vf_coef":
        vf_coef,
        "sde_sample_freq":
        sde_sample_freq,
        "policy_kwargs":
        dict(
            log_std_init=log_std_init,
            net_arch=net_arch,
            activation_fn=activation_fn,
            ortho_init=ortho_init,
        ),
    }
Beispiel #8
0
 def objective(self, trial: optuna.Trial):
     stagnation = trial.suggest_categorical('stagnation', [10, 20, 40])
     collapse = trial.suggest_categorical('collapse', [1e-4, 1e-3, 1e-2])
     # collapse_type = trial.suggest_categorical('collapse_type', ['LINEAR', 'VARIANCE'])
     score = self._run_gapso(stagnation, collapse, trial.number)
     return score
Beispiel #9
0
    def f(trial: Trial) -> float:

        trial.set_user_attr("train_accuracy", 1)
        assert trial.user_attrs["train_accuracy"] == 1
        return 0.0
Beispiel #10
0
    def objective(t: optuna.Trial) -> float:

        value = t.suggest_int("x", -1, 1) + t.suggest_int("y", -1, 1)
        if t.number == 0:
            raise Exception("first trial is failed")
        return float(value)
Beispiel #11
0
 def objective(trial: optuna.Trial) -> float:
     _ = trial.suggest_uniform("x", -1, 1)
     _ = trial.suggest_uniform("y", -1, 1)
     return 1.0
Beispiel #12
0
    def objective(self, trial: optuna.Trial) -> float:

        kwargs = self._hyperparams.copy()

        trial.model_class = None
        if self.algo == "her":
            trial.model_class = self._hyperparams.get("model_class", None)

        # Hack to use DDPG/TD3 noise sampler
        trial.n_actions = self.n_actions
        # Sample candidate hyperparameters
        kwargs.update(HYPERPARAMS_SAMPLER[self.algo](trial))

        model = ALGOS[self.algo](
            env=self.create_envs(self.n_envs, no_log=True),
            tensorboard_log=None,
            # We do not seed the trial
            seed=None,
            verbose=0,
            **kwargs,
        )

        model.trial = trial

        eval_env = self.create_envs(n_envs=1, eval_env=True)

        eval_freq = int(self.n_timesteps / self.n_evaluations)
        # Account for parallel envs
        eval_freq_ = max(eval_freq // model.get_env().num_envs, 1)
        # Use non-deterministic eval for Atari
        eval_callback = TrialEvalCallback(
            eval_env,
            trial,
            n_eval_episodes=self.n_eval_episodes,
            eval_freq=eval_freq_,
            deterministic=self.deterministic_eval,
        )

        try:
            model.learn(self.n_timesteps, callback=eval_callback)
            # Free memory
            model.env.close()
            eval_env.close()
        except AssertionError as e:
            # Sometimes, random hyperparams can generate NaN
            # Free memory
            model.env.close()
            eval_env.close()
            # Prune hyperparams that generate NaNs
            print(e)
            raise optuna.exceptions.TrialPruned()
        is_pruned = eval_callback.is_pruned
        reward = eval_callback.last_mean_reward

        del model.env, eval_env
        del model

        if is_pruned:
            raise optuna.exceptions.TrialPruned()

        return reward
Beispiel #13
0
 def objective(trial: optuna.Trial) -> float:
     x = trial.suggest_float("x", -10, 10)
     y = trial.suggest_float("y", -10, 10)
     # Objective values are negative.
     return -(x**2) - (y - 5) ** 2
Beispiel #14
0
 def objective(trial: optuna.Trial) -> float:
     x = trial.suggest_float("x", -10, 10)
     y = trial.suggest_float("y", -10, 10)
     return x**2 + y
Beispiel #15
0
 def objective2(trial: optuna.Trial) -> float:
     x0 = trial.suggest_float("x0", 2, 3)
     x1 = trial.suggest_float("x1", 1e-2, 1e2, log=True)
     x2 = trial.suggest_float("x2", 1e-2, 1e2, log=True)
     return x0 + x1 + x2
Beispiel #16
0
def objective(trial: Trial) -> float:
    x1 = trial.suggest_uniform("x1", 0.1, 3)
    x2 = trial.suggest_loguniform("x2", 0.1, 3)
    x3 = trial.suggest_loguniform("x3", 2, 4)
    return x1 + x2 * x3
def fixed_param(trial: op.Trial, name: str, value: float = 0.) -> float:
    return trial.suggest_float(name, value, value)
Beispiel #18
0
    def f(trial: Trial) -> float:

        trial.set_system_attr("system_message", "test")
        assert trial.system_attrs["system_message"] == "test"
        return 0.0
Beispiel #19
0
    def objective(trial: optuna.Trial) -> float:
        # Filenames for each trial must be made unique in order to access each checkpoint.
        checkpoint_callback = pl.callbacks.ModelCheckpoint(
            dirpath=os.path.join(model_path, "trial_{}".format(trial.number)),
            filename="{epoch}",
            monitor="val_loss")

        # The default logger in PyTorch Lightning writes to event files to be consumed by
        # TensorBoard. We don't use any logger here as it requires us to implement several abstract
        # methods. Instead we setup a simple callback, that saves metrics from each validation step.
        metrics_callback = MetricsCallback()
        learning_rate_callback = LearningRateMonitor()
        logger = TensorBoardLogger(log_dir,
                                   name="optuna",
                                   version=trial.number)
        gradient_clip_val = trial.suggest_loguniform("gradient_clip_val",
                                                     *gradient_clip_val_range)
        default_trainer_kwargs = dict(
            gpus=[0] if torch.cuda.is_available() else None,
            max_epochs=max_epochs,
            gradient_clip_val=gradient_clip_val,
            callbacks=[
                metrics_callback,
                learning_rate_callback,
                checkpoint_callback,
                PyTorchLightningPruningCallback(trial, monitor="val_loss"),
            ],
            logger=logger,
            progress_bar_refresh_rate=[0, 1
                                       ][optuna_verbose < optuna.logging.INFO],
            weights_summary=[None,
                             "top"][optuna_verbose < optuna.logging.INFO],
        )
        default_trainer_kwargs.update(trainer_kwargs)
        trainer = pl.Trainer(**default_trainer_kwargs, )

        # create model
        hidden_size = trial.suggest_int("hidden_size",
                                        *hidden_size_range,
                                        log=True)
        kwargs["loss"] = copy.deepcopy(loss)
        model = TemporalFusionTransformer.from_dataset(
            train_dataloader.dataset,
            dropout=trial.suggest_uniform("dropout", *dropout_range),
            hidden_size=hidden_size,
            hidden_continuous_size=trial.suggest_int(
                "hidden_continuous_size",
                hidden_continuous_size_range[0],
                min(hidden_continuous_size_range[1], hidden_size),
                log=True,
            ),
            attention_head_size=trial.suggest_int("attention_head_size",
                                                  *attention_head_size_range),
            log_interval=-1,
            **kwargs,
        )
        # find good learning rate
        if use_learning_rate_finder:
            lr_trainer = pl.Trainer(
                gradient_clip_val=gradient_clip_val,
                gpus=[0] if torch.cuda.is_available() else None,
                logger=False,
                progress_bar_refresh_rate=0,
                weights_summary=None,
            )
            res = lr_trainer.tuner.lr_find(
                model,
                train_dataloader=train_dataloader,
                val_dataloaders=val_dataloader,
                early_stop_threshold=10000,
                min_lr=learning_rate_range[0],
                num_training=100,
                max_lr=learning_rate_range[1],
            )

            loss_finite = np.isfinite(res.results["loss"])
            if loss_finite.sum(
            ) > 3:  # at least 3 valid values required for learning rate finder
                lr_smoothed, loss_smoothed = sm.nonparametric.lowess(
                    np.asarray(res.results["loss"])[loss_finite],
                    np.asarray(res.results["lr"])[loss_finite],
                    frac=1.0 / 10.0,
                )[min(loss_finite.sum() - 3, 10):-1].T
                optimal_idx = np.gradient(loss_smoothed).argmin()
                optimal_lr = lr_smoothed[optimal_idx]
            else:
                optimal_idx = np.asarray(res.results["loss"]).argmin()
                optimal_lr = res.results["lr"][optimal_idx]
            optuna_logger.info(f"Using learning rate of {optimal_lr:.3g}")
            # add learning rate artificially
            model.hparams.learning_rate = trial.suggest_uniform(
                "learning_rate", optimal_lr, optimal_lr)
        else:
            model.hparams.learning_rate = trial.suggest_loguniform(
                "learning_rate", *learning_rate_range)

        # fit
        trainer.fit(model,
                    train_dataloader=train_dataloader,
                    val_dataloaders=val_dataloader)

        # report result
        return metrics_callback.metrics[-1]["val_loss"].item()
Beispiel #20
0
        def objective(trial: Trial) -> float:

            x = trial.suggest_int("x", -10, 10)
            y = trial.suggest_int("y", -10, 10)
            return x**2 + y**2
Beispiel #21
0
def sample_ddpg_params(trial: optuna.Trial) -> Dict[str, Any]:
    """
    Sampler for DDPG hyperparams.

    :param trial:
    :return:
    """
    gamma = trial.suggest_categorical(
        "gamma", [0.9, 0.95, 0.98, 0.99, 0.995, 0.999, 0.9999])
    learning_rate = trial.suggest_loguniform("lr", 1e-5, 1)
    batch_size = trial.suggest_categorical("batch_size",
                                           [16, 32, 64, 100, 128, 256, 512])
    buffer_size = trial.suggest_categorical(
        "buffer_size", [int(1e4), int(1e5), int(1e6)])
    # Polyak coeff
    tau = trial.suggest_categorical("tau", [0.001, 0.005, 0.01, 0.02])

    episodic = trial.suggest_categorical("episodic", [True, False])

    if episodic:
        n_episodes_rollout = 1
        train_freq, gradient_steps = -1, -1
    else:
        train_freq = trial.suggest_categorical("train_freq",
                                               [1, 16, 128, 256, 1000, 2000])
        gradient_steps = train_freq
        n_episodes_rollout = -1

    noise_type = trial.suggest_categorical(
        "noise_type", ["ornstein-uhlenbeck", "normal", None])
    noise_std = trial.suggest_uniform("noise_std", 0, 1)

    net_arch = trial.suggest_categorical("net_arch",
                                         ["small", "medium", "big"])
    # activation_fn = trial.suggest_categorical('activation_fn', [nn.Tanh, nn.ReLU, nn.ELU, nn.LeakyReLU])

    net_arch = {
        "small": [64, 64],
        "medium": [256, 256],
        "big": [400, 300],
    }[net_arch]

    hyperparams = {
        "gamma": gamma,
        "tau": tau,
        "learning_rate": learning_rate,
        "batch_size": batch_size,
        "buffer_size": buffer_size,
        "train_freq": train_freq,
        "gradient_steps": gradient_steps,
        "n_episodes_rollout": n_episodes_rollout,
        "policy_kwargs": dict(net_arch=net_arch),
    }

    if noise_type == "normal":
        hyperparams["action_noise"] = NormalActionNoise(
            mean=np.zeros(trial.n_actions),
            sigma=noise_std * np.ones(trial.n_actions))
    elif noise_type == "ornstein-uhlenbeck":
        hyperparams["action_noise"] = OrnsteinUhlenbeckActionNoise(
            mean=np.zeros(trial.n_actions),
            sigma=noise_std * np.ones(trial.n_actions))

    return hyperparams
Beispiel #22
0
        def objective(trial: Trial) -> float:

            return trial.suggest_int("x", 1, 1)  # Single element.
Beispiel #23
0
def sample_a2c_params(trial: optuna.Trial) -> Dict[str, Any]:
    """
    Sampler for A2C hyperparams.

    :param trial:
    :return:
    """
    gamma = trial.suggest_categorical(
        "gamma", [0.9, 0.95, 0.98, 0.99, 0.995, 0.999, 0.9999])
    normalize_advantage = trial.suggest_categorical("normalize_advantage",
                                                    [False, True])
    max_grad_norm = trial.suggest_categorical(
        "max_grad_norm", [0.3, 0.5, 0.6, 0.7, 0.8, 0.9, 1, 2, 5])
    use_rms_prop = trial.suggest_categorical("use_rms_prop", [False, True])
    gae_lambda = trial.suggest_categorical(
        "gae_lambda", [0.8, 0.9, 0.92, 0.95, 0.98, 0.99, 1.0])
    n_steps = trial.suggest_categorical(
        "n_steps", [8, 16, 32, 64, 128, 256, 512, 1024, 2048])
    lr_schedule = trial.suggest_categorical("lr_schedule",
                                            ["linear", "constant"])
    learning_rate = trial.suggest_loguniform("lr", 1e-5, 1)
    ent_coef = trial.suggest_loguniform("ent_coef", 0.00000001, 0.1)
    vf_coef = trial.suggest_uniform("vf_coef", 0, 1)
    log_std_init = trial.suggest_uniform("log_std_init", -4, 1)
    ortho_init = trial.suggest_categorical("ortho_init", [False, True])
    net_arch = trial.suggest_categorical("net_arch", ["small", "medium"])
    # sde_net_arch = trial.suggest_categorical("sde_net_arch", [None, "tiny", "small"])
    # full_std = trial.suggest_categorical("full_std", [False, True])
    # activation_fn = trial.suggest_categorical('activation_fn', ['tanh', 'relu', 'elu', 'leaky_relu'])
    activation_fn = trial.suggest_categorical("activation_fn",
                                              ["tanh", "relu"])

    if lr_schedule == "linear":
        learning_rate = linear_schedule(learning_rate)

    net_arch = {
        "small": [dict(pi=[64, 64], vf=[64, 64])],
        "medium": [dict(pi=[256, 256], vf=[256, 256])],
    }[net_arch]

    # sde_net_arch = {
    #     None: None,
    #     "tiny": [64],
    #     "small": [64, 64],
    # }[sde_net_arch]

    activation_fn = {
        "tanh": nn.Tanh,
        "relu": nn.ReLU,
        "elu": nn.ELU,
        "leaky_relu": nn.LeakyReLU
    }[activation_fn]

    return {
        "n_steps":
        n_steps,
        "gamma":
        gamma,
        "gae_lambda":
        gae_lambda,
        "learning_rate":
        learning_rate,
        "ent_coef":
        ent_coef,
        "normalize_advantage":
        normalize_advantage,
        "max_grad_norm":
        max_grad_norm,
        "use_rms_prop":
        use_rms_prop,
        "vf_coef":
        vf_coef,
        "policy_kwargs":
        dict(
            log_std_init=log_std_init,
            net_arch=net_arch,
            # full_std=full_std,
            activation_fn=activation_fn,
            # sde_net_arch=sde_net_arch,
            ortho_init=ortho_init,
        ),
    }
Beispiel #24
0
    def objective(trial: Trial) -> float:

        return trial.suggest_int("x", 1, 1)
Beispiel #25
0
    def objective(trial: optuna.Trial) -> float:
        # Filenames for each trial must be made unique in order to access each checkpoint.
        checkpoint_callback = pl.callbacks.ModelCheckpoint(
            os.path.join(model_path, "trial_{}".format(trial.number), "{epoch}"), monitor="val_loss"
        )

        # The default logger in PyTorch Lightning writes to event files to be consumed by
        # TensorBoard. We don't use any logger here as it requires us to implement several abstract
        # methods. Instead we setup a simple callback, that saves metrics from each validation step.
        metrics_callback = MetricsCallback()
        learning_rate_callback = LearningRateLogger()
        logger = TensorBoardLogger(log_dir, name="optuna", version=trial.number)
        gradient_clip_val = trial.suggest_loguniform("gradient_clip_val", *gradient_clip_val_range)
        trainer = pl.Trainer(
            checkpoint_callback=checkpoint_callback,
            max_epochs=max_epochs,
            gradient_clip_val=gradient_clip_val,
            gpus=[0] if torch.cuda.is_available() else None,
            callbacks=[metrics_callback, learning_rate_callback],
            early_stop_callback=PyTorchLightningPruningCallback(trial, monitor="val_loss"),
            logger=logger,
            **trainer_kwargs,
        )

        # create model
        hidden_size = trial.suggest_int("hidden_size", *hidden_size_range, log=True)
        model = TemporalFusionTransformer.from_dataset(
            train_dataloader.dataset,
            dropout=trial.suggest_uniform("dropout", *dropout_range),
            hidden_size=hidden_size,
            hidden_continuous_size=trial.suggest_int(
                "hidden_continuous_size",
                hidden_continuous_size_range[0],
                min(hidden_continuous_size_range[1], hidden_size),
                log=True,
            ),
            attention_head_size=trial.suggest_int("attention_head_size", *attention_head_size_range),
            log_interval=-1,
            **kwargs,
        )
        # find good learning rate
        if use_learning_rate_finder:
            lr_trainer = pl.Trainer(
                gradient_clip_val=gradient_clip_val,
                gpus=[0] if torch.cuda.is_available() else None,
                logger=False,
            )
            res = lr_trainer.lr_find(
                model,
                train_dataloader=train_dataloader,
                val_dataloaders=val_dataloader,
                early_stop_threshold=10000.0,
                min_lr=learning_rate_range[0],
                num_training=100,
                max_lr=learning_rate_range[1],
            )

            loss_finite = np.isfinite(res.results["loss"])
            lr_smoothed, loss_smoothed = sm.nonparametric.lowess(
                np.asarray(res.results["loss"])[loss_finite],
                np.asarray(res.results["lr"])[loss_finite],
                frac=1.0 / 10.0,
            )[10:-1].T
            optimal_idx = np.gradient(loss_smoothed).argmin()
            optimal_lr = lr_smoothed[optimal_idx]
            print(f"Using learning rate of {optimal_lr:.3g}")
            model.hparams.learning_rate = optimal_lr
        else:
            model.hparams.learning_rate = trial.suggest_loguniform("learning_rate_range", *learning_rate_range)

        # fit
        trainer.fit(model, train_dataloader=train_dataloader, val_dataloaders=val_dataloader)

        # report result
        return metrics_callback.metrics[-1]["val_loss"].item()
Beispiel #26
0
 def objective(trial: Trial) -> List[float]:
     return [
         trial.suggest_uniform("v{}".format(i), 0, 5)
         for i in range(n_objectives + 1)
     ]
Beispiel #27
0
def suggest_discrete_power_int(trial: Trial, name: str, low: int, high: int, base: int = 2) -> int:
    """Suggest an integer in the given range [2^low, 2^high]."""
    if high <= low:
        raise Exception(f"Upper bound {high} is not greater than lower bound {low}.")
    choices = [base ** i for i in range(low, high + 1)]
    return trial.suggest_categorical(name=name, choices=choices)
Beispiel #28
0
def train_and_eval(trial: optuna.Trial, ex_dir: str, seed: [int, None]):
    """
    Objective function for the Optuna `Study` to maximize.

    .. note::
        Optuna expects only the `trial` argument, thus we use `functools.partial` to sneak in custom arguments.

    :param trial: Optuna Trial object for hyper-parameter optimization
    :param ex_dir: experiment's directory, i.e. the parent directory for all trials in this study
    :param seed: seed value for the random number generators, pass `None` for no seeding
    :return: objective function value
    """
    # Synchronize seeds between Optuna trials
    pyrado.set_seed(seed)

    # Environment
    env_hparams = dict(dt=1 / 100., max_steps=600)
    env = QQubeSim(**env_hparams)
    env = ActNormWrapper(env)

    # Policy
    policy_hparam = dict(
        shared_hidden_sizes=trial.suggest_categorical(
            'shared_hidden_sizes_policy',
            [[16, 16], [32, 32], [64, 64], [16, 16, 16], [32, 32, 32]]),
        shared_hidden_nonlin=fcn_from_str(
            trial.suggest_categorical('shared_hidden_nonlin_policy',
                                      ['to_tanh', 'to_relu'])),
    )
    policy = TwoHeadedFNNPolicy(spec=env.spec, **policy_hparam)

    # Critic
    q_fcn_hparam = dict(
        hidden_sizes=trial.suggest_categorical(
            'hidden_sizes_critic',
            [[16, 16], [32, 32], [64, 64], [16, 16, 16], [32, 32, 32]]),
        hidden_nonlin=fcn_from_str(
            trial.suggest_categorical('hidden_nonlin_critic',
                                      ['to_tanh', 'to_relu'])),
    )
    obsact_space = BoxSpace.cat([env.obs_space, env.act_space])
    q_fcn_1 = FNNPolicy(spec=EnvSpec(obsact_space, ValueFunctionSpace),
                        **q_fcn_hparam)
    q_fcn_2 = FNNPolicy(spec=EnvSpec(obsact_space, ValueFunctionSpace),
                        **q_fcn_hparam)

    # Algorithm
    algo_hparam = dict(
        num_sampler_envs=1,  # parallelize via optuna n_jobs
        max_iter=100 * env.max_steps,
        min_steps=trial.suggest_categorical(
            'min_steps_algo', [1]),  # , 10, env.max_steps, 10*env.max_steps
        memory_size=trial.suggest_loguniform('memory_size_algo',
                                             1e2 * env.max_steps,
                                             1e4 * env.max_steps),
        tau=trial.suggest_uniform('tau_algo', 0.99, 1.),
        alpha_init=trial.suggest_uniform('alpha_init_algo', 0.1, 0.9),
        learn_alpha=trial.suggest_categorical('learn_alpha_algo',
                                              [True, False]),
        standardize_rew=trial.suggest_categorical('standardize_rew_algo',
                                                  [False]),
        gamma=trial.suggest_uniform('gamma_algo', 0.99, 1.),
        target_update_intvl=trial.suggest_categorical(
            'target_update_intvl_algo', [1, 5]),
        num_batch_updates=trial.suggest_categorical('num_batch_updates_algo',
                                                    [1, 5]),
        batch_size=trial.suggest_categorical('batch_size_algo',
                                             [128, 256, 512]),
        lr=trial.suggest_loguniform('lr_algo', 1e-5, 1e-3),
    )
    csv_logger = create_csv_step_logger(
        osp.join(ex_dir, f'trial_{trial.number}'))
    algo = SAC(ex_dir,
               env,
               policy,
               q_fcn_1,
               q_fcn_2,
               **algo_hparam,
               logger=csv_logger)

    # Train without saving the results
    algo.train(snapshot_mode='latest', seed=seed)

    # Evaluate
    min_rollouts = 1000
    sampler = ParallelSampler(
        env, policy, num_envs=1,
        min_rollouts=min_rollouts)  # parallelize via optuna n_jobs
    ros = sampler.sample()
    mean_ret = sum([r.undiscounted_return() for r in ros]) / min_rollouts

    return mean_ret
Beispiel #29
0
 def objective(trial: optuna.Trial) -> float:
     trial.suggest_uniform("DROPOUT", dropout, dropout)
     executor = optuna.integration.AllenNLPExecutor(
         trial, input_config_file, tmp_dir)
     return executor.run()
Beispiel #30
0
def objective(trial: Trial, train_X, train_y, test_X, test_y) -> float:
    params = {
        "n_estimators":
        trial.suggest_int('n_estimators', 0, 1000),
        'max_depth':
        trial.suggest_int('max_depth', 2, 25),
        'reg_alpha':
        trial.suggest_int('reg_alpha', 0, 10),
        'reg_lambda':
        trial.suggest_int('reg_lambda', 0, 10),
        'min_child_weight':
        trial.suggest_int('min_child_weight', 0, 20),
        'gamma':
        trial.suggest_int('gamma', 0, 5),
        'learning_rate':
        trial.suggest_loguniform('learning_rate', 0.0001, 0.5),
        'colsample_bytree':
        trial.suggest_discrete_uniform('colsample_bytree', 0.1, 1, 0.01),
        'nthread':
        -1,
        'scale_pos_weight':
        trial.suggest_int('scale_pos_weight', 1, 10),
        'random_state':
        trial.suggest_int('random_state', 1, 30),
        'subsample':
        trial.suggest_float('subsample', 0.5, 0.9)
    }
    model = XGBClassifier(**params)

    model.fit(train_X, train_y)

    return cross_val_score(model, test_X, test_y).mean()