def objective_fn( trial: Trial, device: int, direction: str, target_metric: str, base_serialization_dir: str, ): embedding_dim = trial.suggest_int("embedding_dim", 128, 256) max_filter_size = trial.suggest_int("max_filter_size", 3, 6) num_filters = trial.suggest_int("num_filters", 128, 256) output_dim = trial.suggest_int("output_dim", 128, 512) dropout = trial.suggest_float("dropout", 0, 1.0, log=False) lr = trial.suggest_float("lr", 1e-4, 1e-1, log=True) train_dataset, valid_dataset, vocab = prepare_data() model = create_model(vocab, embedding_dim, max_filter_size, num_filters, output_dim, dropout) if device > -1: model.to(torch.device("cuda:{}".format(device))) optimizer = SGD(model.parameters(), lr=lr) data_loader = DataLoader(train_dataset, batch_size=10, collate_fn=allennlp_collate) validation_data_loader = DataLoader(valid_dataset, batch_size=64, collate_fn=allennlp_collate) serialization_dir = os.path.join(base_serialization_dir, "trial_{}".format(trial.number)) trainer = GradientDescentTrainer( model=model, optimizer=optimizer, data_loader=data_loader, validation_data_loader=validation_data_loader, validation_metric=("+" if direction == "MAXIMIZE" else "-") + target_metric, patience=None, # `patience=None` since it could conflict with AllenNLPPruningCallback num_epochs=50, cuda_device=device, serialization_dir=serialization_dir, epoch_callbacks=[AllenNLPPruningCallback(trial, f"validation_{target_metric}")], ) vocab.save_to_files(os.path.join(serialization_dir, "vocabulary")) return trainer.train()[f"best_validation_{target_metric}"]
class AllenNLPPruningCallback(EpochCallback): """AllenNLP callback to prune unpromising trials. See `the example <https://github.com/optuna/optuna/blob/master/ examples/allennlp/allennlp_simple.py>`__ if you want to add a proning callback which observes a metric. You can also see the tutorial of our AllenNLP integration on `AllenNLP Guide <https://guide.allennlp.org/hyperparameter-optimization>`_. .. note:: When :class:`~optuna.integration.AllenNLPPruningCallback` is instantiated in Python script, trial and monitor are mandatory. On the other hand, when :class:`~optuna.integration.AllenNLPPruningCallback` is used with :class:`~optuna.integration.AllenNLPExecutor`, ``trial`` and ``monitor`` would be ``None``. :class:`~optuna.integration.AllenNLPExecutor` sets environment variables for a study name, trial id, monitor, and storage. Then :class:`~optuna.integration.AllenNLPPruningCallback` loads them to restore ``trial`` and ``monitor``. Args: trial: A :class:`~optuna.trial.Trial` corresponding to the current evaluation of the objective function. monitor: An evaluation metric for pruning, e.g. ``validation_loss`` or ``validation_accuracy``. """ def __init__( self, trial: Optional[optuna.trial.Trial] = None, monitor: Optional[str] = None, ): _imports.check() if allennlp.__version__ < "1.0.0": raise Exception( "AllenNLPPruningCallback requires `allennlp`>=1.0.0.") # When `AllenNLPPruningCallback` is instantiated in Python script, # trial and monitor should not be `None`. if trial is not None and monitor is not None: self._trial = trial self._monitor = monitor # When `AllenNLPPruningCallback` is used with `AllenNLPExecutor`, # `trial` and `monitor` would be None. `AllenNLPExecutor` sets information # for a study name, trial id, monitor, and storage in environment variables. else: environment_variables = _get_environment_variables_for_trial() study_name = environment_variables["study_name"] trial_id = environment_variables["trial_id"] monitor = environment_variables["monitor"] storage = environment_variables["storage"] if study_name is None or trial_id is None or monitor is None or storage is None: message = ( "Fail to load study. Perhaps you attempt to use `AllenNLPPruningCallback`" " without `AllenNLPExecutor`. If you want to use a callback" " without an executor, you have to instantiate a callback with" "`trial` and `monitor. Please see the Optuna example: https://github.com/" "optuna/optuna/blob/master/examples/allennlp/allennlp_simple.py." ) raise RuntimeError(message) else: # If `stoage` is empty despite `study_name`, `trial_id`, # and `monitor` are not `None`, users attempt to use `AllenNLPPruningCallback` # with `AllenNLPExecutor` and in-memory storage. # `AllenNLPruningCallback` needs RDB or Redis storages to work. if storage == "": message = ( "If you want to use AllenNLPExecutor and AllenNLPPruningCallback," " you have to use RDB or Redis storage.") raise RuntimeError(message) study = load_study(study_name, storage, pruner=_create_pruner()) self._trial = Trial(study, int(trial_id)) self._monitor = monitor def __call__( self, trainer: "allennlp.training.GradientDescentTrainer", metrics: Dict[str, Any], epoch: int, is_master: bool, ) -> None: value = metrics.get(self._monitor) if value is None: return self._trial.report(float(value), epoch) if self._trial.should_prune(): raise optuna.TrialPruned()
def get_params(trial: Trial, tunable_params: Dict, default_params: Dict): defaults = default_params.copy() for key in tunable_params: args = tunable_params[key] defaults[key] = trial.suggest_float(name=key, **args) return defaults
def __call__(self, trial: Trial) -> Optional[float]: """Suggest parameters then train the model.""" if self.model_kwargs is not None: problems = [ x for x in ('loss', 'regularizer', 'optimizer', 'training', 'negative_sampler', 'stopper') if x in self.model_kwargs ] if problems: raise ValueError(f'model_kwargs should not have: {problems}. {self}') # 2. Model _model_kwargs = _get_kwargs( trial=trial, prefix='model', default_kwargs_ranges=self.model.hpo_default, kwargs=self.model_kwargs, kwargs_ranges=self.model_kwargs_ranges, ) try: loss_default_kwargs_ranges = self.loss.hpo_default except AttributeError: logger.warning('using a loss function with no hpo_default field: %s', self.loss) loss_default_kwargs_ranges = {} # 3. Loss _loss_kwargs = _get_kwargs( trial=trial, prefix='loss', default_kwargs_ranges=loss_default_kwargs_ranges, kwargs=self.loss_kwargs, kwargs_ranges=self.loss_kwargs_ranges, ) # 4. Regularizer _regularizer_kwargs: Optional[Mapping[str, Any]] if self.regularizer is None: _regularizer_kwargs = {} else: _regularizer_kwargs = _get_kwargs( trial=trial, prefix='regularizer', default_kwargs_ranges=self.regularizer.hpo_default, kwargs=self.regularizer_kwargs, kwargs_ranges=self.regularizer_kwargs_ranges, ) # 5. Optimizer _optimizer_kwargs = _get_kwargs( trial=trial, prefix='optimizer', default_kwargs_ranges=optimizers_hpo_defaults[self.optimizer], kwargs=self.optimizer_kwargs, kwargs_ranges=self.optimizer_kwargs_ranges, ) _negative_sampler_kwargs: Mapping[str, Any] if self.training_loop is not SLCWATrainingLoop: _negative_sampler_kwargs = {} else: _negative_sampler_kwargs = _get_kwargs( trial=trial, prefix='negative_sampler', default_kwargs_ranges={} if self.negative_sampler is None else self.negative_sampler.hpo_default, kwargs=self.negative_sampler_kwargs, kwargs_ranges=self.negative_sampler_kwargs_ranges, ) _training_kwargs = _get_kwargs( trial=trial, prefix='training', default_kwargs_ranges=self.training_loop.hpo_default, kwargs=self.training_kwargs, kwargs_ranges=self.training_kwargs_ranges, ) _stopper_kwargs = dict(self.stopper_kwargs or {}) if self.stopper is not None and issubclass(self.stopper, EarlyStopper): self._update_stopper_callbacks(_stopper_kwargs, trial) try: result = pipeline( # 1. Dataset dataset=self.dataset, dataset_kwargs=self.dataset_kwargs, training=self.training, testing=self.testing, validation=self.validation, evaluation_entity_whitelist=self.evaluation_entity_whitelist, evaluation_relation_whitelist=self.evaluation_relation_whitelist, # 2. Model model=self.model, model_kwargs=_model_kwargs, # 3. Loss loss=self.loss, loss_kwargs=_loss_kwargs, # 4. Regularizer regularizer=self.regularizer, regularizer_kwargs=_regularizer_kwargs, clear_optimizer=True, # 5. Optimizer optimizer=self.optimizer, optimizer_kwargs=_optimizer_kwargs, # 6. Training Loop training_loop=self.training_loop, negative_sampler=self.negative_sampler, negative_sampler_kwargs=_negative_sampler_kwargs, # 7. Training training_loop_kwargs=self.training_loop_kwargs, training_kwargs=_training_kwargs, stopper=self.stopper, stopper_kwargs=_stopper_kwargs, # 8. Evaluation evaluator=self.evaluator, evaluator_kwargs=self.evaluator_kwargs, evaluation_kwargs=self.evaluation_kwargs, filter_validation_when_testing=self.filter_validation_when_testing, # 9. Tracker result_tracker=self.result_tracker, result_tracker_kwargs=self.result_tracker_kwargs, # Misc. use_testing_data=False, # use validation set during HPO! device=self.device, ) except (MemoryError, RuntimeError) as e: trial.set_user_attr('failure', str(e)) # Will trigger Optuna to set the state of the trial as failed return None else: if self.save_model_directory: model_directory = os.path.join(self.save_model_directory, str(trial.number)) os.makedirs(model_directory, exist_ok=True) result.save_to_directory(model_directory) trial.set_user_attr('random_seed', result.random_seed) for k, v in result.metric_results.to_flat_dict().items(): trial.set_user_attr(k, v) return result.metric_results.get_metric(self.metric)
def train_and_eval(trial: optuna.Trial, ex_dir: str, seed: [int, None]): """ Objective function for the Optuna `Study` to maximize. .. note:: Optuna expects only the `trial` argument, thus we use `functools.partial` to sneak in custom arguments. :param trial: Optuna Trial object for hyper-parameter optimization :param ex_dir: experiment's directory, i.e. the parent directory for all trials in this study :param seed: seed value for the random number generators, pass `None` for no seeding :return: objective function value """ # Synchronize seeds between Optuna trials pyrado.set_seed(seed) # Environment env = QBallBalancerSim(dt=1/250., max_steps=1500) env = ActNormWrapper(env) # Policy policy = FNNPolicy( spec=env.spec, hidden_sizes=trial.suggest_categorical('hidden_sizes_policy', [[16, 16], [32, 32], [64, 64]]), hidden_nonlin=fcn_from_str(trial.suggest_categorical('hidden_nonlin_policy', ['to_tanh', 'to_relu'])), ) # Critic value_fcn = FNN( input_size=env.obs_space.flat_dim, output_size=1, hidden_sizes=trial.suggest_categorical('hidden_sizes_critic', [[16, 16], [32, 32], [64, 64]]), hidden_nonlin=fcn_from_str(trial.suggest_categorical('hidden_nonlin_critic', ['to_tanh', 'to_relu'])), ) critic_hparam = dict( gamma=trial.suggest_uniform('gamma_critic', 0.99, 1.), lamda=trial.suggest_uniform('lamda_critic', 0.95, 1.), num_epoch=trial.suggest_int('num_epoch_critic', 1, 10), batch_size=100, lr=trial.suggest_loguniform('lr_critic', 1e-5, 1e-3), standardize_adv=trial.suggest_categorical('standardize_adv_critic', [True, False]), # max_grad_norm=5., # lr_scheduler=scheduler.StepLR, # lr_scheduler_hparam=dict(step_size=10, gamma=0.9) # lr_scheduler=scheduler.ExponentialLR, # lr_scheduler_hparam=dict(gamma=0.99) ) critic = GAE(value_fcn, **critic_hparam) # Algorithm algo_hparam = dict( num_sampler_envs=1, # parallelize via optuna n_jobs max_iter=500, min_steps=25*env.max_steps, num_epoch=trial.suggest_int('num_epoch_algo', 1, 10), eps_clip=trial.suggest_uniform('eps_clip_algo', 0.05, 0.2), batch_size=100, std_init=0.9, lr=trial.suggest_loguniform('lr_algo', 1e-5, 1e-3), # max_grad_norm=5., # lr_scheduler=scheduler.StepLR, # lr_scheduler_hparam=dict(step_size=10, gamma=0.9) # lr_scheduler=scheduler.ExponentialLR, # lr_scheduler_hparam=dict(gamma=0.99) ) algo = PPO(osp.join(ex_dir, f'trial_{trial.number}'), env, policy, critic, **algo_hparam) # Train without saving the results algo.train(snapshot_mode='latest', seed=seed) # Evaluate min_rollouts = 1000 sampler = ParallelSampler(env, policy, num_envs=20, min_rollouts=min_rollouts) ros = sampler.sample() mean_ret = sum([r.undiscounted_return() for r in ros])/min_rollouts return mean_ret
def sample_dqn_params(trial: optuna.Trial) -> Dict[str, Any]: """ Sampler for DQN hyperparams. :param trial: :return: """ gamma = trial.suggest_categorical( "gamma", [0.9, 0.95, 0.98, 0.99, 0.995, 0.999, 0.9999]) learning_rate = trial.suggest_loguniform("lr", 1e-5, 1) batch_size = trial.suggest_categorical("batch_size", [16, 32, 64, 100, 128, 256, 512]) buffer_size = trial.suggest_categorical( "buffer_size", [int(1e4), int(5e4), int(1e5), int(1e6)]) exploration_final_eps = trial.suggest_uniform("exploration_final_eps", 0, 0.2) exploration_fraction = trial.suggest_uniform("exploration_fraction", 0, 0.5) target_update_interval = trial.suggest_categorical( "target_update_interval", [1, 1000, 5000, 10000, 15000, 20000]) learning_starts = trial.suggest_categorical("learning_starts", [0, 1000, 5000, 10000, 20000]) train_freq = trial.suggest_categorical("train_freq", [1, 4, 8, 16, 128, 256, 1000]) subsample_steps = trial.suggest_categorical("subsample_steps", [1, 2, 4, 8]) gradient_steps = max(train_freq // subsample_steps, 1) n_episodes_rollout = -1 net_arch = trial.suggest_categorical("net_arch", ["tiny", "small", "medium"]) net_arch = { "tiny": [64], "small": [64, 64], "medium": [256, 256] }[net_arch] hyperparams = { "gamma": gamma, "learning_rate": learning_rate, "batch_size": batch_size, "buffer_size": buffer_size, "train_freq": train_freq, "gradient_steps": gradient_steps, "n_episodes_rollout": n_episodes_rollout, "exploration_fraction": exploration_fraction, "exploration_final_eps": exploration_final_eps, "target_update_interval": target_update_interval, "learning_starts": learning_starts, "policy_kwargs": dict(net_arch=net_arch), } return hyperparams
def sample_ppo_params(trial: optuna.Trial) -> Dict[str, Any]: """ Sampler for PPO2 hyperparams. :param trial: :return: """ batch_size = trial.suggest_categorical("batch_size", [8, 16, 32, 64, 128, 256, 512]) n_steps = trial.suggest_categorical( "n_steps", [8, 16, 32, 64, 128, 256, 512, 1024, 2048]) gamma = trial.suggest_categorical( "gamma", [0.9, 0.95, 0.98, 0.99, 0.995, 0.999, 0.9999]) learning_rate = trial.suggest_loguniform("lr", 1e-5, 1) lr_schedule = "constant" # lr_schedule = trial.suggest_categorical('lr_schedule', ['linear', 'constant']) ent_coef = trial.suggest_loguniform("ent_coef", 0.00000001, 0.1) clip_range = trial.suggest_categorical("clip_range", [0.1, 0.2, 0.3, 0.4]) n_epochs = trial.suggest_categorical("n_epochs", [1, 5, 10, 20]) gae_lambda = trial.suggest_categorical( "gae_lambda", [0.8, 0.9, 0.92, 0.95, 0.98, 0.99, 1.0]) max_grad_norm = trial.suggest_categorical( "max_grad_norm", [0.3, 0.5, 0.6, 0.7, 0.8, 0.9, 1, 2, 5]) vf_coef = trial.suggest_uniform("vf_coef", 0, 1) net_arch = trial.suggest_categorical("net_arch", ["small", "medium"]) log_std_init = trial.suggest_uniform("log_std_init", -4, 1) sde_sample_freq = trial.suggest_categorical("sde_sample_freq", [-1, 8, 16, 32, 64, 128, 256]) ortho_init = False # ortho_init = trial.suggest_categorical('ortho_init', [False, True]) # activation_fn = trial.suggest_categorical('activation_fn', ['tanh', 'relu', 'elu', 'leaky_relu']) activation_fn = trial.suggest_categorical("activation_fn", ["tanh", "relu"]) # TODO: account when using multiple envs if batch_size > n_steps: batch_size = n_steps if lr_schedule == "linear": learning_rate = linear_schedule(learning_rate) net_arch = { "small": [dict(pi=[64, 64], vf=[64, 64])], "medium": [dict(pi=[256, 256], vf=[256, 256])], }[net_arch] activation_fn = { "tanh": nn.Tanh, "relu": nn.ReLU, "elu": nn.ELU, "leaky_relu": nn.LeakyReLU }[activation_fn] return { "n_steps": n_steps, "batch_size": batch_size, "gamma": gamma, "learning_rate": learning_rate, "ent_coef": ent_coef, "clip_range": clip_range, "n_epochs": n_epochs, "gae_lambda": gae_lambda, "max_grad_norm": max_grad_norm, "vf_coef": vf_coef, "sde_sample_freq": sde_sample_freq, "policy_kwargs": dict( log_std_init=log_std_init, net_arch=net_arch, activation_fn=activation_fn, ortho_init=ortho_init, ), }
def objective(self, trial: optuna.Trial): stagnation = trial.suggest_categorical('stagnation', [10, 20, 40]) collapse = trial.suggest_categorical('collapse', [1e-4, 1e-3, 1e-2]) # collapse_type = trial.suggest_categorical('collapse_type', ['LINEAR', 'VARIANCE']) score = self._run_gapso(stagnation, collapse, trial.number) return score
def f(trial: Trial) -> float: trial.set_user_attr("train_accuracy", 1) assert trial.user_attrs["train_accuracy"] == 1 return 0.0
def objective(t: optuna.Trial) -> float: value = t.suggest_int("x", -1, 1) + t.suggest_int("y", -1, 1) if t.number == 0: raise Exception("first trial is failed") return float(value)
def objective(trial: optuna.Trial) -> float: _ = trial.suggest_uniform("x", -1, 1) _ = trial.suggest_uniform("y", -1, 1) return 1.0
def objective(self, trial: optuna.Trial) -> float: kwargs = self._hyperparams.copy() trial.model_class = None if self.algo == "her": trial.model_class = self._hyperparams.get("model_class", None) # Hack to use DDPG/TD3 noise sampler trial.n_actions = self.n_actions # Sample candidate hyperparameters kwargs.update(HYPERPARAMS_SAMPLER[self.algo](trial)) model = ALGOS[self.algo]( env=self.create_envs(self.n_envs, no_log=True), tensorboard_log=None, # We do not seed the trial seed=None, verbose=0, **kwargs, ) model.trial = trial eval_env = self.create_envs(n_envs=1, eval_env=True) eval_freq = int(self.n_timesteps / self.n_evaluations) # Account for parallel envs eval_freq_ = max(eval_freq // model.get_env().num_envs, 1) # Use non-deterministic eval for Atari eval_callback = TrialEvalCallback( eval_env, trial, n_eval_episodes=self.n_eval_episodes, eval_freq=eval_freq_, deterministic=self.deterministic_eval, ) try: model.learn(self.n_timesteps, callback=eval_callback) # Free memory model.env.close() eval_env.close() except AssertionError as e: # Sometimes, random hyperparams can generate NaN # Free memory model.env.close() eval_env.close() # Prune hyperparams that generate NaNs print(e) raise optuna.exceptions.TrialPruned() is_pruned = eval_callback.is_pruned reward = eval_callback.last_mean_reward del model.env, eval_env del model if is_pruned: raise optuna.exceptions.TrialPruned() return reward
def objective(trial: optuna.Trial) -> float: x = trial.suggest_float("x", -10, 10) y = trial.suggest_float("y", -10, 10) # Objective values are negative. return -(x**2) - (y - 5) ** 2
def objective(trial: optuna.Trial) -> float: x = trial.suggest_float("x", -10, 10) y = trial.suggest_float("y", -10, 10) return x**2 + y
def objective2(trial: optuna.Trial) -> float: x0 = trial.suggest_float("x0", 2, 3) x1 = trial.suggest_float("x1", 1e-2, 1e2, log=True) x2 = trial.suggest_float("x2", 1e-2, 1e2, log=True) return x0 + x1 + x2
def objective(trial: Trial) -> float: x1 = trial.suggest_uniform("x1", 0.1, 3) x2 = trial.suggest_loguniform("x2", 0.1, 3) x3 = trial.suggest_loguniform("x3", 2, 4) return x1 + x2 * x3
def fixed_param(trial: op.Trial, name: str, value: float = 0.) -> float: return trial.suggest_float(name, value, value)
def f(trial: Trial) -> float: trial.set_system_attr("system_message", "test") assert trial.system_attrs["system_message"] == "test" return 0.0
def objective(trial: optuna.Trial) -> float: # Filenames for each trial must be made unique in order to access each checkpoint. checkpoint_callback = pl.callbacks.ModelCheckpoint( dirpath=os.path.join(model_path, "trial_{}".format(trial.number)), filename="{epoch}", monitor="val_loss") # The default logger in PyTorch Lightning writes to event files to be consumed by # TensorBoard. We don't use any logger here as it requires us to implement several abstract # methods. Instead we setup a simple callback, that saves metrics from each validation step. metrics_callback = MetricsCallback() learning_rate_callback = LearningRateMonitor() logger = TensorBoardLogger(log_dir, name="optuna", version=trial.number) gradient_clip_val = trial.suggest_loguniform("gradient_clip_val", *gradient_clip_val_range) default_trainer_kwargs = dict( gpus=[0] if torch.cuda.is_available() else None, max_epochs=max_epochs, gradient_clip_val=gradient_clip_val, callbacks=[ metrics_callback, learning_rate_callback, checkpoint_callback, PyTorchLightningPruningCallback(trial, monitor="val_loss"), ], logger=logger, progress_bar_refresh_rate=[0, 1 ][optuna_verbose < optuna.logging.INFO], weights_summary=[None, "top"][optuna_verbose < optuna.logging.INFO], ) default_trainer_kwargs.update(trainer_kwargs) trainer = pl.Trainer(**default_trainer_kwargs, ) # create model hidden_size = trial.suggest_int("hidden_size", *hidden_size_range, log=True) kwargs["loss"] = copy.deepcopy(loss) model = TemporalFusionTransformer.from_dataset( train_dataloader.dataset, dropout=trial.suggest_uniform("dropout", *dropout_range), hidden_size=hidden_size, hidden_continuous_size=trial.suggest_int( "hidden_continuous_size", hidden_continuous_size_range[0], min(hidden_continuous_size_range[1], hidden_size), log=True, ), attention_head_size=trial.suggest_int("attention_head_size", *attention_head_size_range), log_interval=-1, **kwargs, ) # find good learning rate if use_learning_rate_finder: lr_trainer = pl.Trainer( gradient_clip_val=gradient_clip_val, gpus=[0] if torch.cuda.is_available() else None, logger=False, progress_bar_refresh_rate=0, weights_summary=None, ) res = lr_trainer.tuner.lr_find( model, train_dataloader=train_dataloader, val_dataloaders=val_dataloader, early_stop_threshold=10000, min_lr=learning_rate_range[0], num_training=100, max_lr=learning_rate_range[1], ) loss_finite = np.isfinite(res.results["loss"]) if loss_finite.sum( ) > 3: # at least 3 valid values required for learning rate finder lr_smoothed, loss_smoothed = sm.nonparametric.lowess( np.asarray(res.results["loss"])[loss_finite], np.asarray(res.results["lr"])[loss_finite], frac=1.0 / 10.0, )[min(loss_finite.sum() - 3, 10):-1].T optimal_idx = np.gradient(loss_smoothed).argmin() optimal_lr = lr_smoothed[optimal_idx] else: optimal_idx = np.asarray(res.results["loss"]).argmin() optimal_lr = res.results["lr"][optimal_idx] optuna_logger.info(f"Using learning rate of {optimal_lr:.3g}") # add learning rate artificially model.hparams.learning_rate = trial.suggest_uniform( "learning_rate", optimal_lr, optimal_lr) else: model.hparams.learning_rate = trial.suggest_loguniform( "learning_rate", *learning_rate_range) # fit trainer.fit(model, train_dataloader=train_dataloader, val_dataloaders=val_dataloader) # report result return metrics_callback.metrics[-1]["val_loss"].item()
def objective(trial: Trial) -> float: x = trial.suggest_int("x", -10, 10) y = trial.suggest_int("y", -10, 10) return x**2 + y**2
def sample_ddpg_params(trial: optuna.Trial) -> Dict[str, Any]: """ Sampler for DDPG hyperparams. :param trial: :return: """ gamma = trial.suggest_categorical( "gamma", [0.9, 0.95, 0.98, 0.99, 0.995, 0.999, 0.9999]) learning_rate = trial.suggest_loguniform("lr", 1e-5, 1) batch_size = trial.suggest_categorical("batch_size", [16, 32, 64, 100, 128, 256, 512]) buffer_size = trial.suggest_categorical( "buffer_size", [int(1e4), int(1e5), int(1e6)]) # Polyak coeff tau = trial.suggest_categorical("tau", [0.001, 0.005, 0.01, 0.02]) episodic = trial.suggest_categorical("episodic", [True, False]) if episodic: n_episodes_rollout = 1 train_freq, gradient_steps = -1, -1 else: train_freq = trial.suggest_categorical("train_freq", [1, 16, 128, 256, 1000, 2000]) gradient_steps = train_freq n_episodes_rollout = -1 noise_type = trial.suggest_categorical( "noise_type", ["ornstein-uhlenbeck", "normal", None]) noise_std = trial.suggest_uniform("noise_std", 0, 1) net_arch = trial.suggest_categorical("net_arch", ["small", "medium", "big"]) # activation_fn = trial.suggest_categorical('activation_fn', [nn.Tanh, nn.ReLU, nn.ELU, nn.LeakyReLU]) net_arch = { "small": [64, 64], "medium": [256, 256], "big": [400, 300], }[net_arch] hyperparams = { "gamma": gamma, "tau": tau, "learning_rate": learning_rate, "batch_size": batch_size, "buffer_size": buffer_size, "train_freq": train_freq, "gradient_steps": gradient_steps, "n_episodes_rollout": n_episodes_rollout, "policy_kwargs": dict(net_arch=net_arch), } if noise_type == "normal": hyperparams["action_noise"] = NormalActionNoise( mean=np.zeros(trial.n_actions), sigma=noise_std * np.ones(trial.n_actions)) elif noise_type == "ornstein-uhlenbeck": hyperparams["action_noise"] = OrnsteinUhlenbeckActionNoise( mean=np.zeros(trial.n_actions), sigma=noise_std * np.ones(trial.n_actions)) return hyperparams
def objective(trial: Trial) -> float: return trial.suggest_int("x", 1, 1) # Single element.
def sample_a2c_params(trial: optuna.Trial) -> Dict[str, Any]: """ Sampler for A2C hyperparams. :param trial: :return: """ gamma = trial.suggest_categorical( "gamma", [0.9, 0.95, 0.98, 0.99, 0.995, 0.999, 0.9999]) normalize_advantage = trial.suggest_categorical("normalize_advantage", [False, True]) max_grad_norm = trial.suggest_categorical( "max_grad_norm", [0.3, 0.5, 0.6, 0.7, 0.8, 0.9, 1, 2, 5]) use_rms_prop = trial.suggest_categorical("use_rms_prop", [False, True]) gae_lambda = trial.suggest_categorical( "gae_lambda", [0.8, 0.9, 0.92, 0.95, 0.98, 0.99, 1.0]) n_steps = trial.suggest_categorical( "n_steps", [8, 16, 32, 64, 128, 256, 512, 1024, 2048]) lr_schedule = trial.suggest_categorical("lr_schedule", ["linear", "constant"]) learning_rate = trial.suggest_loguniform("lr", 1e-5, 1) ent_coef = trial.suggest_loguniform("ent_coef", 0.00000001, 0.1) vf_coef = trial.suggest_uniform("vf_coef", 0, 1) log_std_init = trial.suggest_uniform("log_std_init", -4, 1) ortho_init = trial.suggest_categorical("ortho_init", [False, True]) net_arch = trial.suggest_categorical("net_arch", ["small", "medium"]) # sde_net_arch = trial.suggest_categorical("sde_net_arch", [None, "tiny", "small"]) # full_std = trial.suggest_categorical("full_std", [False, True]) # activation_fn = trial.suggest_categorical('activation_fn', ['tanh', 'relu', 'elu', 'leaky_relu']) activation_fn = trial.suggest_categorical("activation_fn", ["tanh", "relu"]) if lr_schedule == "linear": learning_rate = linear_schedule(learning_rate) net_arch = { "small": [dict(pi=[64, 64], vf=[64, 64])], "medium": [dict(pi=[256, 256], vf=[256, 256])], }[net_arch] # sde_net_arch = { # None: None, # "tiny": [64], # "small": [64, 64], # }[sde_net_arch] activation_fn = { "tanh": nn.Tanh, "relu": nn.ReLU, "elu": nn.ELU, "leaky_relu": nn.LeakyReLU }[activation_fn] return { "n_steps": n_steps, "gamma": gamma, "gae_lambda": gae_lambda, "learning_rate": learning_rate, "ent_coef": ent_coef, "normalize_advantage": normalize_advantage, "max_grad_norm": max_grad_norm, "use_rms_prop": use_rms_prop, "vf_coef": vf_coef, "policy_kwargs": dict( log_std_init=log_std_init, net_arch=net_arch, # full_std=full_std, activation_fn=activation_fn, # sde_net_arch=sde_net_arch, ortho_init=ortho_init, ), }
def objective(trial: Trial) -> float: return trial.suggest_int("x", 1, 1)
def objective(trial: optuna.Trial) -> float: # Filenames for each trial must be made unique in order to access each checkpoint. checkpoint_callback = pl.callbacks.ModelCheckpoint( os.path.join(model_path, "trial_{}".format(trial.number), "{epoch}"), monitor="val_loss" ) # The default logger in PyTorch Lightning writes to event files to be consumed by # TensorBoard. We don't use any logger here as it requires us to implement several abstract # methods. Instead we setup a simple callback, that saves metrics from each validation step. metrics_callback = MetricsCallback() learning_rate_callback = LearningRateLogger() logger = TensorBoardLogger(log_dir, name="optuna", version=trial.number) gradient_clip_val = trial.suggest_loguniform("gradient_clip_val", *gradient_clip_val_range) trainer = pl.Trainer( checkpoint_callback=checkpoint_callback, max_epochs=max_epochs, gradient_clip_val=gradient_clip_val, gpus=[0] if torch.cuda.is_available() else None, callbacks=[metrics_callback, learning_rate_callback], early_stop_callback=PyTorchLightningPruningCallback(trial, monitor="val_loss"), logger=logger, **trainer_kwargs, ) # create model hidden_size = trial.suggest_int("hidden_size", *hidden_size_range, log=True) model = TemporalFusionTransformer.from_dataset( train_dataloader.dataset, dropout=trial.suggest_uniform("dropout", *dropout_range), hidden_size=hidden_size, hidden_continuous_size=trial.suggest_int( "hidden_continuous_size", hidden_continuous_size_range[0], min(hidden_continuous_size_range[1], hidden_size), log=True, ), attention_head_size=trial.suggest_int("attention_head_size", *attention_head_size_range), log_interval=-1, **kwargs, ) # find good learning rate if use_learning_rate_finder: lr_trainer = pl.Trainer( gradient_clip_val=gradient_clip_val, gpus=[0] if torch.cuda.is_available() else None, logger=False, ) res = lr_trainer.lr_find( model, train_dataloader=train_dataloader, val_dataloaders=val_dataloader, early_stop_threshold=10000.0, min_lr=learning_rate_range[0], num_training=100, max_lr=learning_rate_range[1], ) loss_finite = np.isfinite(res.results["loss"]) lr_smoothed, loss_smoothed = sm.nonparametric.lowess( np.asarray(res.results["loss"])[loss_finite], np.asarray(res.results["lr"])[loss_finite], frac=1.0 / 10.0, )[10:-1].T optimal_idx = np.gradient(loss_smoothed).argmin() optimal_lr = lr_smoothed[optimal_idx] print(f"Using learning rate of {optimal_lr:.3g}") model.hparams.learning_rate = optimal_lr else: model.hparams.learning_rate = trial.suggest_loguniform("learning_rate_range", *learning_rate_range) # fit trainer.fit(model, train_dataloader=train_dataloader, val_dataloaders=val_dataloader) # report result return metrics_callback.metrics[-1]["val_loss"].item()
def objective(trial: Trial) -> List[float]: return [ trial.suggest_uniform("v{}".format(i), 0, 5) for i in range(n_objectives + 1) ]
def suggest_discrete_power_int(trial: Trial, name: str, low: int, high: int, base: int = 2) -> int: """Suggest an integer in the given range [2^low, 2^high].""" if high <= low: raise Exception(f"Upper bound {high} is not greater than lower bound {low}.") choices = [base ** i for i in range(low, high + 1)] return trial.suggest_categorical(name=name, choices=choices)
def train_and_eval(trial: optuna.Trial, ex_dir: str, seed: [int, None]): """ Objective function for the Optuna `Study` to maximize. .. note:: Optuna expects only the `trial` argument, thus we use `functools.partial` to sneak in custom arguments. :param trial: Optuna Trial object for hyper-parameter optimization :param ex_dir: experiment's directory, i.e. the parent directory for all trials in this study :param seed: seed value for the random number generators, pass `None` for no seeding :return: objective function value """ # Synchronize seeds between Optuna trials pyrado.set_seed(seed) # Environment env_hparams = dict(dt=1 / 100., max_steps=600) env = QQubeSim(**env_hparams) env = ActNormWrapper(env) # Policy policy_hparam = dict( shared_hidden_sizes=trial.suggest_categorical( 'shared_hidden_sizes_policy', [[16, 16], [32, 32], [64, 64], [16, 16, 16], [32, 32, 32]]), shared_hidden_nonlin=fcn_from_str( trial.suggest_categorical('shared_hidden_nonlin_policy', ['to_tanh', 'to_relu'])), ) policy = TwoHeadedFNNPolicy(spec=env.spec, **policy_hparam) # Critic q_fcn_hparam = dict( hidden_sizes=trial.suggest_categorical( 'hidden_sizes_critic', [[16, 16], [32, 32], [64, 64], [16, 16, 16], [32, 32, 32]]), hidden_nonlin=fcn_from_str( trial.suggest_categorical('hidden_nonlin_critic', ['to_tanh', 'to_relu'])), ) obsact_space = BoxSpace.cat([env.obs_space, env.act_space]) q_fcn_1 = FNNPolicy(spec=EnvSpec(obsact_space, ValueFunctionSpace), **q_fcn_hparam) q_fcn_2 = FNNPolicy(spec=EnvSpec(obsact_space, ValueFunctionSpace), **q_fcn_hparam) # Algorithm algo_hparam = dict( num_sampler_envs=1, # parallelize via optuna n_jobs max_iter=100 * env.max_steps, min_steps=trial.suggest_categorical( 'min_steps_algo', [1]), # , 10, env.max_steps, 10*env.max_steps memory_size=trial.suggest_loguniform('memory_size_algo', 1e2 * env.max_steps, 1e4 * env.max_steps), tau=trial.suggest_uniform('tau_algo', 0.99, 1.), alpha_init=trial.suggest_uniform('alpha_init_algo', 0.1, 0.9), learn_alpha=trial.suggest_categorical('learn_alpha_algo', [True, False]), standardize_rew=trial.suggest_categorical('standardize_rew_algo', [False]), gamma=trial.suggest_uniform('gamma_algo', 0.99, 1.), target_update_intvl=trial.suggest_categorical( 'target_update_intvl_algo', [1, 5]), num_batch_updates=trial.suggest_categorical('num_batch_updates_algo', [1, 5]), batch_size=trial.suggest_categorical('batch_size_algo', [128, 256, 512]), lr=trial.suggest_loguniform('lr_algo', 1e-5, 1e-3), ) csv_logger = create_csv_step_logger( osp.join(ex_dir, f'trial_{trial.number}')) algo = SAC(ex_dir, env, policy, q_fcn_1, q_fcn_2, **algo_hparam, logger=csv_logger) # Train without saving the results algo.train(snapshot_mode='latest', seed=seed) # Evaluate min_rollouts = 1000 sampler = ParallelSampler( env, policy, num_envs=1, min_rollouts=min_rollouts) # parallelize via optuna n_jobs ros = sampler.sample() mean_ret = sum([r.undiscounted_return() for r in ros]) / min_rollouts return mean_ret
def objective(trial: optuna.Trial) -> float: trial.suggest_uniform("DROPOUT", dropout, dropout) executor = optuna.integration.AllenNLPExecutor( trial, input_config_file, tmp_dir) return executor.run()
def objective(trial: Trial, train_X, train_y, test_X, test_y) -> float: params = { "n_estimators": trial.suggest_int('n_estimators', 0, 1000), 'max_depth': trial.suggest_int('max_depth', 2, 25), 'reg_alpha': trial.suggest_int('reg_alpha', 0, 10), 'reg_lambda': trial.suggest_int('reg_lambda', 0, 10), 'min_child_weight': trial.suggest_int('min_child_weight', 0, 20), 'gamma': trial.suggest_int('gamma', 0, 5), 'learning_rate': trial.suggest_loguniform('learning_rate', 0.0001, 0.5), 'colsample_bytree': trial.suggest_discrete_uniform('colsample_bytree', 0.1, 1, 0.01), 'nthread': -1, 'scale_pos_weight': trial.suggest_int('scale_pos_weight', 1, 10), 'random_state': trial.suggest_int('random_state', 1, 30), 'subsample': trial.suggest_float('subsample', 0.5, 0.9) } model = XGBClassifier(**params) model.fit(train_X, train_y) return cross_val_score(model, test_X, test_y).mean()