def objective(self, trial: optuna.Trial) -> float: x = trial.suggest_float("x", -100, 100) y = trial.suggest_int("y", -100, 100) return x**2 + y**2
def suggest(self, trial: Trial, prefix: str = "") -> Any: return trial.suggest_int( prefix + self.name, self.low, self.high, step=self.step )
def objective(t: optuna.Trial) -> float: value = t.suggest_int("x", -1, 1) + t.suggest_int("y", -1, 1) if t.number == 0: raise Exception("first trial is failed") return float(value)
def objective(trial: optuna.Trial, # with optuna lr: int = None, output_dims: List = None, dropout: float = None # without optuna ): assert not (trial is not None and lr is not None) assert not (trial is not None and output_dims is not None) assert not (trial is not None and dropout is not None) assert not (trial is None and lr is None) assert not (trial is None and output_dims is None) assert not (trial is None and dropout is None) global model torch.manual_seed(seed) torch.cuda.manual_seed(seed) np.random.seed(seed) if trial is not None: if not isinstance(trial, optuna.Trial): # Best lr = trial.params['lr'] nlayers = trial.params['nlayers'] dropouts = trial.params['dropout'] output_dims = [trial.params[f'n_units_l{i}'] for i in range(nlayers)] else: # In study. logger(f'{"-" * 10} Trial #{trial.number} {"-" * 10}') # optuna settings # lr = trial.suggest_uniform('lr', lr_lower_bound, lr_upper_bound) lr = trial.suggest_categorical('lr', [1e-3, 3e-4, 2e-4, 1e-5]) nlayers = trial.suggest_int('nlayers', nlayers_lower_bound, nlayers_upper_bound) dropouts = [ trial.suggest_categorical(f'dropout_l{i}', [0.2, 0.5, 0.7]) for i in range(2) ] output_dims = [ int(trial.suggest_categorical(f'n_units_l{i}', list(range(odim_start, odim_end, odim_step)))) for i in range(nlayers) ] else: nlayers = len(output_dims) logger('Setting up models...') device = torch.device('cuda' if use_cuda else 'cpu') model = MLP(nlayers, dropouts, output_dims).to(device) optimizer = optim.Adam(model.parameters(), lr=lr) criteria = nn.CrossEntropyLoss(weight=loss_weight.to(device)) best_acc = 0 n_fail_in_a_raw = 0 limit_n_fail_in_a_raw = 5 # print('Start training...') for i_epoch in range(1, epoch+1): losses = [] model.train() for tgts, sent1s, sent2s in train_dataloader: tgts = tgts.to(device) sent1s = sent1s.to(device) sent2s = sent2s.to(device) preds = model(sent1s, sent2s) loss = criteria(preds, tgts) optimizer.zero_grad() loss.backward() optimizer.step() losses.append(loss.item()) model.eval() valid_losses = [] valid_accs = [] with torch.no_grad(): for tgts, sent1s, sent2s in valid_dataloader: tgts = tgts.to(device) sent1s = sent1s.to(device) sent2s = sent2s.to(device) preds = model(sent1s, sent2s) pred_idxs = preds.argmax(dim=1).tolist() loss = criteria(preds, tgts) acc = len([1 for p, t in zip(pred_idxs, tgts.tolist()) if p == t]) / len(tgts.tolist()) valid_losses.append(loss.item()) valid_accs.append(acc) logger(f'Train loss: {np.mean(losses)}') _loss = np.mean(valid_losses) _acc = np.mean(valid_accs) logger(f'Valid loss: {_loss}') logger(f'Valid accuracy: {_acc}') if _acc > best_acc: best_acc = _acc n_fail_in_a_raw = 0 else: n_fail_in_a_raw += 1 if n_fail_in_a_raw >= limit_n_fail_in_a_raw: break logger(f"{'-' * 25}\n") return best_acc
def objective(trial: Trial) -> float: return trial.suggest_int("x", 1, 1) # Single element.
def objective(trial: Trial) -> float: return trial.suggest_int("x", 1, 1)
def train_and_eval(trial: optuna.Trial, study_dir: str, seed: int): """ Objective function for the Optuna `Study` to maximize. .. note:: Optuna expects only the `trial` argument, thus we use `functools.partial` to sneak in custom arguments. :param trial: Optuna Trial object for hyper-parameter optimization :param study_dir: the parent directory for all trials in this study :param seed: seed value for the random number generators, pass `None` for no seeding :return: objective function value """ # Synchronize seeds between Optuna trials pyrado.set_seed(seed) # Environments env_hparams = dict(dt=1 / 100., max_steps=600) env_real = QQubeSwingUpSim(**env_hparams) env_real.domain_param = dict( Mr=0.095 * 0.9, # 0.095*0.9 = 0.0855 Mp=0.024 * 1.1, # 0.024*1.1 = 0.0264 Lr=0.085 * 0.9, # 0.085*0.9 = 0.0765 Lp=0.129 * 1.1, # 0.129*1.1 = 0.1419 ) env_sim = QQubeSwingUpSim(**env_hparams) randomizer = DomainRandomizer( NormalDomainParam(name='Mr', mean=0., std=1e6, clip_lo=1e-3), NormalDomainParam(name='Mp', mean=0., std=1e6, clip_lo=1e-3), NormalDomainParam(name='Lr', mean=0., std=1e6, clip_lo=1e-3), NormalDomainParam(name='Lp', mean=0., std=1e6, clip_lo=1e-3), ) env_sim = DomainRandWrapperLive(env_sim, randomizer) dp_map = { 0: ('Mr', 'mean'), 1: ('Mr', 'std'), 2: ('Mp', 'mean'), 3: ('Mp', 'std'), 4: ('Lr', 'mean'), 5: ('Lr', 'std'), 6: ('Lp', 'mean'), 7: ('Lp', 'std') } trafo_mask = [True] * 8 env_sim = MetaDomainRandWrapper(env_sim, dp_map) # Subroutine for policy improvement behav_policy_hparam = dict(hidden_sizes=[64, 64], hidden_nonlin=to.tanh) behav_policy = FNNPolicy(spec=env_sim.spec, **behav_policy_hparam) vfcn_hparam = dict(hidden_sizes=[64, 64], hidden_nonlin=to.tanh) vfcn = FNNPolicy(spec=EnvSpec(env_sim.obs_space, ValueFunctionSpace), **vfcn_hparam) critic_hparam = dict( gamma=0.9885, lamda=0.9648, num_epoch=2, batch_size=500, standardize_adv=False, lr=5.792e-4, max_grad_norm=1., ) critic = GAE(vfcn, **critic_hparam) subrtn_policy_hparam = dict( max_iter=200, min_steps=3 * 23 * env_sim.max_steps, num_epoch=7, eps_clip=0.0744, batch_size=500, std_init=0.9074, lr=3.446e-04, max_grad_norm=1., num_workers=1, ) subrtn_policy = PPO(study_dir, env_sim, behav_policy, critic, **subrtn_policy_hparam) # Subroutine for system identification prior_std_denom = trial.suggest_uniform('prior_std_denom', 5, 20) prior = DomainRandomizer( NormalDomainParam(name='Mr', mean=0.095, std=0.095 / prior_std_denom), NormalDomainParam(name='Mp', mean=0.024, std=0.024 / prior_std_denom), NormalDomainParam(name='Lr', mean=0.085, std=0.085 / prior_std_denom), NormalDomainParam(name='Lp', mean=0.129, std=0.129 / prior_std_denom), ) ddp_policy = DomainDistrParamPolicy( mapping=dp_map, trafo_mask=trafo_mask, prior=prior, scale_params=trial.suggest_categorical('ddp_policy_scale_params', [True, False]), ) subsubrtn_distr_hparam = dict( max_iter=trial.suggest_categorical('subsubrtn_distr_max_iter', [20]), pop_size=trial.suggest_int('pop_size', 50, 500), num_rollouts=1, num_is_samples=trial.suggest_int('num_is_samples', 5, 20), expl_std_init=trial.suggest_loguniform('expl_std_init', 1e-3, 1e-1), expl_std_min=trial.suggest_categorical('expl_std_min', [1e-4]), extra_expl_std_init=trial.suggest_loguniform('expl_std_init', 1e-3, 1e-1), extra_expl_decay_iter=trial.suggest_int('extra_expl_decay_iter', 0, 10), num_workers=1, ) csv_logger = create_csv_step_logger( osp.join(study_dir, f'trial_{trial.number}')) subsubrtn_distr = CEM(study_dir, env_sim, ddp_policy, **subsubrtn_distr_hparam, logger=csv_logger) obs_vel_weight = trial.suggest_loguniform('obs_vel_weight', 1, 100) subrtn_distr_hparam = dict( metric=None, obs_dim_weight=[1, 1, 1, 1, obs_vel_weight, obs_vel_weight], num_rollouts_per_distr=trial.suggest_int('num_rollouts_per_distr', 20, 100), num_workers=1, ) subrtn_distr = SysIdViaEpisodicRL(subsubrtn_distr, behav_policy, **subrtn_distr_hparam) # Algorithm algo_hparam = dict( max_iter=trial.suggest_categorical('algo_max_iter', [10]), num_eval_rollouts=trial.suggest_categorical('algo_num_eval_rollouts', [5]), warmstart=trial.suggest_categorical('algo_warmstart', [True]), thold_succ_subrtn=trial.suggest_categorical('algo_thold_succ_subrtn', [50]), subrtn_snapshot_mode='latest', ) algo = SimOpt(study_dir, env_sim, env_real, subrtn_policy, subrtn_distr, **algo_hparam, logger=csv_logger) # Jeeeha algo.train(seed=args.seed) # Evaluate min_rollouts = 1000 sampler = ParallelRolloutSampler( env_real, algo.policy, num_workers=1, min_rollouts=min_rollouts) # parallelize via optuna n_jobs ros = sampler.sample() mean_ret = sum([r.undiscounted_return() for r in ros]) / min_rollouts return mean_ret
def suggest_kwargs( trial: Trial, prefix: str, kwargs_ranges: Mapping[str, Any], kwargs: Optional[Mapping[str, Any]] = None, ) -> Mapping[str, Any]: _kwargs: Dict[str, Any] = {} if kwargs: _kwargs.update(kwargs) for name, info in kwargs_ranges.items(): if name in _kwargs: continue # has been set by default, won't be suggested prefixed_name = f"{prefix}.{name}" # TODO: make it even easier to specify categorical strategies just as lists # if isinstance(info, (tuple, list, set)): # info = dict(type='categorical', choices=list(info)) dtype, low, high = info["type"], info.get("low"), info.get("high") log = info.get("log") in { True, "TRUE", "True", "true", "t", "YES", "Yes", "yes", "y" } if dtype in {int, "int"}: scale = info.get("scale") if scale in {"power_two", "power"}: _kwargs[name] = suggest_discrete_power_int( trial=trial, name=prefixed_name, low=low, high=high, base=info.get("q") or info.get("base") or 2, ) elif scale is None or scale == "linear": # get log from info - could either be a boolean or string _kwargs[name] = trial.suggest_int( name=prefixed_name, low=low, high=high, step=info.get("q") or info.get("step") or 1, log=log, ) else: logger.warning( f"Unhandled scale {scale} for parameter {name} of data type {dtype}" ) elif dtype in {float, "float"}: _kwargs[name] = trial.suggest_float( name=prefixed_name, low=low, high=high, step=info.get("q") or info.get("step"), log=log, ) elif dtype == "categorical": choices = info["choices"] _kwargs[name] = trial.suggest_categorical(name=prefixed_name, choices=choices) elif dtype in {bool, "bool"}: _kwargs[name] = trial.suggest_categorical(name=prefixed_name, choices=[True, False]) else: logger.warning( f"Unhandled data type ({dtype}) for parameter {name}") return _kwargs
def train_and_eval(trial: optuna.Trial, study_dir: str, seed: int): """ Objective function for the Optuna `Study` to maximize. .. note:: Optuna expects only the `trial` argument, thus we use `functools.partial` to sneak in custom arguments. :param trial: Optuna Trial object for hyper-parameter optimization :param study_dir: the parent directory for all trials in this study :param seed: seed value for the random number generators, pass `None` for no seeding :return: objective function value """ # Synchronize seeds between Optuna trials pyrado.set_seed(seed) # Environment env_hparams = dict(dt=1/100., max_steps=600) env = QQubeSwingUpSim(**env_hparams) env = ActNormWrapper(env) # Learning rate scheduler lrs_gamma = trial.suggest_categorical('exp_lr_scheduler_gamma', [None, 0.995, 0.999]) if lrs_gamma is not None: lr_sched = lr_scheduler.ExponentialLR lr_sched_hparam = dict(gamma=lrs_gamma) else: lr_sched, lr_sched_hparam = None, dict() # Policy policy_hparam = dict( hidden_sizes=trial.suggest_categorical('hidden_sizes_policy', [(16, 16), (32, 32), (64, 64)]), hidden_nonlin=fcn_from_str(trial.suggest_categorical('hidden_nonlin_policy', ['to_tanh', 'to_relu'])), ) # FNN # policy_hparam = dict( # hidden_size=trial.suggest_categorical('hidden_size_policy', [16, 32, 64]), # num_recurrent_layers=trial.suggest_categorical('num_recurrent_layers_policy', [1, 2]), # ) # LSTM & GRU policy = FNNPolicy(spec=env.spec, **policy_hparam) # policy = GRUPolicy(spec=env.spec, **policy_hparam) # Critic vfcn_hparam = dict( hidden_sizes=trial.suggest_categorical('hidden_sizes_critic', [(16, 16), (32, 32), (64, 64)]), hidden_nonlin=fcn_from_str(trial.suggest_categorical('hidden_nonlin_critic', ['to_tanh', 'to_relu'])), ) # vfcn_hparam = dict( # hidden_size=trial.suggest_categorical('hidden_size_critic', [16, 32, 64]), # num_recurrent_layers=trial.suggest_categorical('num_recurrent_layers_critic', [1, 2]), # ) # LSTM & GRU vfcn = FNNPolicy(spec=EnvSpec(env.obs_space, ValueFunctionSpace), **vfcn_hparam) # vfcn = GRUPolicy(spec=EnvSpec(env.obs_space, ValueFunctionSpace), **vfcn_hparam) critic_hparam = dict( batch_size=500, gamma=trial.suggest_uniform('gamma_critic', 0.98, 1.), lamda=trial.suggest_uniform('lamda_critic', 0.95, 1.), num_epoch=trial.suggest_int('num_epoch_critic', 1, 10), lr=trial.suggest_loguniform('lr_critic', 1e-5, 1e-3), standardize_adv=trial.suggest_categorical('standardize_adv_critic', [False]), max_grad_norm=trial.suggest_categorical('max_grad_norm_critic', [None, 1., 5.]), lr_scheduler=lr_sched, lr_scheduler_hparam=lr_sched_hparam ) critic = GAE(vfcn, **critic_hparam) # Algorithm algo_hparam = dict( num_workers=1, # parallelize via optuna n_jobs max_iter=250, batch_size=500, min_steps=trial.suggest_int('num_rollouts_algo', 10, 30)*env.max_steps, num_epoch=trial.suggest_int('num_epoch_algo', 1, 10), eps_clip=trial.suggest_uniform('eps_clip_algo', 0.05, 0.2), std_init=trial.suggest_uniform('std_init_algo', 0.5, 1.0), lr=trial.suggest_loguniform('lr_algo', 1e-5, 1e-3), max_grad_norm=trial.suggest_categorical('max_grad_norm_algo', [None, 1., 5.]), lr_scheduler=lr_sched, lr_scheduler_hparam=lr_sched_hparam ) csv_logger = create_csv_step_logger(osp.join(study_dir, f'trial_{trial.number}')) algo = PPO(osp.join(study_dir, f'trial_{trial.number}'), env, policy, critic, **algo_hparam, logger=csv_logger) # Train without saving the results algo.train(snapshot_mode='latest', seed=seed) # Evaluate min_rollouts = 1000 sampler = ParallelRolloutSampler(env, policy, num_workers=1, min_rollouts=min_rollouts) # parallelize via optuna n_jobs ros = sampler.sample() mean_ret = sum([r.undiscounted_return() for r in ros])/min_rollouts return mean_ret
def objective(trial: optuna.Trial) -> float: # Filenames for each trial must be made unique in order to access each checkpoint. checkpoint_callback = pl.callbacks.ModelCheckpoint(os.path.join( model_path, "trial_{}".format(trial.number), "{epoch}"), monitor="val_loss") # The default logger in PyTorch Lightning writes to event files to be consumed by # TensorBoard. We don't use any logger here as it requires us to implement several abstract # methods. Instead we setup a simple callback, that saves metrics from each validation step. metrics_callback = MetricsCallback() learning_rate_callback = LearningRateMonitor() logger = TensorBoardLogger(log_dir, name="optuna", version=trial.number) gradient_clip_val = trial.suggest_loguniform("gradient_clip_val", *gradient_clip_val_range) trainer = pl.Trainer( checkpoint_callback=checkpoint_callback, max_epochs=max_epochs, gradient_clip_val=gradient_clip_val, gpus=[0] if torch.cuda.is_available() else None, callbacks=[ metrics_callback, learning_rate_callback, PyTorchLightningPruningCallback(trial, monitor="val_loss"), ], logger=logger, **trainer_kwargs, ) # create model hidden_size = trial.suggest_int("hidden_size", *hidden_size_range, log=True) model = TemporalFusionTransformer.from_dataset( train_dataloader.dataset, dropout=trial.suggest_uniform("dropout", *dropout_range), hidden_size=hidden_size, hidden_continuous_size=trial.suggest_int( "hidden_continuous_size", hidden_continuous_size_range[0], min(hidden_continuous_size_range[1], hidden_size), log=True, ), attention_head_size=trial.suggest_int("attention_head_size", *attention_head_size_range), log_interval=-1, **kwargs, ) # find good learning rate if use_learning_rate_finder: lr_trainer = pl.Trainer( gradient_clip_val=gradient_clip_val, gpus=[0] if torch.cuda.is_available() else None, logger=False, ) res = lr_trainer.tuner.lr_find( model, train_dataloader=train_dataloader, val_dataloaders=val_dataloader, early_stop_threshold=10000.0, min_lr=learning_rate_range[0], num_training=100, max_lr=learning_rate_range[1], ) loss_finite = np.isfinite(res.results["loss"]) lr_smoothed, loss_smoothed = sm.nonparametric.lowess( np.asarray(res.results["loss"])[loss_finite], np.asarray(res.results["lr"])[loss_finite], frac=1.0 / 10.0, )[10:-1].T optimal_idx = np.gradient(loss_smoothed).argmin() optimal_lr = lr_smoothed[optimal_idx] print(f"Using learning rate of {optimal_lr:.3g}") model.hparams.learning_rate = optimal_lr else: model.hparams.learning_rate = trial.suggest_loguniform( "learning_rate_range", *learning_rate_range) # fit trainer.fit(model, train_dataloader=train_dataloader, val_dataloaders=val_dataloader) # report result return metrics_callback.metrics[-1]["val_loss"].item()
def train_and_eval(trial: optuna.Trial, study_dir: str, seed: int): """ Objective function for the Optuna `Study` to maximize. .. note:: Optuna expects only the `trial` argument, thus we use `functools.partial` to sneak in custom arguments. :param trial: Optuna Trial object for hyper-parameter optimization :param study_dir: the parent directory for all trials in this study :param seed: seed value for the random number generators, pass `None` for no seeding :return: objective function value """ # Synchronize seeds between Optuna trials pyrado.set_seed(seed) # Load the data data_set_name = "oscillation_50Hz_initpos-0.5" data = pd.read_csv( osp.join(pyrado.PERMA_DIR, "misc", f"{data_set_name}.csv")) if data_set_name == "daily_min_temperatures": data = to.tensor(data["Temp"].values, dtype=to.get_default_dtype()).view(-1, 1) elif data_set_name == "monthly_sunspots": data = to.tensor(data["Sunspots"].values, dtype=to.get_default_dtype()).view(-1, 1) elif "oscillation" in data_set_name: data = to.tensor(data["Positions"].values, dtype=to.get_default_dtype()).view(-1, 1) else: raise pyrado.ValueErr( given=data_set_name, eq_constraint="'daily_min_temperatures', 'monthly_sunspots', " "'oscillation_50Hz_initpos-0.5', or 'oscillation_100Hz_initpos-0.4", ) # Dataset data_set_hparam = dict( name=data_set_name, ratio_train=0.7, window_size=trial.suggest_int("dataset_window_size", 1, 100), standardize_data=False, scale_min_max_data=True, ) dataset = TimeSeriesDataSet(data, **data_set_hparam) # Policy policy_hparam = dict( dt=0.02 if "oscillation" in data_set_name else 1.0, hidden_size=trial.suggest_int("policy_hidden_size", 2, 51), obs_layer=None, activation_nonlin=fcn_from_str( trial.suggest_categorical("policy_activation_nonlin", ["to_tanh", "to_sigmoid"])), mirrored_conv_weights=trial.suggest_categorical( "policy_mirrored_conv_weights", [True, False]), conv_out_channels=1, conv_kernel_size=None, conv_padding_mode=trial.suggest_categorical("policy_conv_padding_mode", ["zeros", "circular"]), tau_init=trial.suggest_loguniform("policy_tau_init", 1e-2, 1e3), tau_learnable=True, kappa_init=trial.suggest_categorical("policy_kappa_init", [0, 1e-4, 1e-2]), kappa_learnable=True, potential_init_learnable=trial.suggest_categorical( "policy_potential_init_learnable", [True, False]), init_param_kwargs=trial.suggest_categorical( "policy_init_param_kwargs", [None, dict(bell=True)]), use_cuda=False, ) policy = NFPolicy(spec=EnvSpec(act_space=InfBoxSpace(shape=1), obs_space=InfBoxSpace(shape=1)), **policy_hparam) # Algorithm algo_hparam = dict( windowed=trial.suggest_categorical("algo_windowed", [True, False]), max_iter=1000, optim_class=optim.Adam, optim_hparam=dict( lr=trial.suggest_uniform("optim_lr", 5e-4, 5e-2), eps=trial.suggest_uniform("optim_eps", 1e-8, 1e-5), weight_decay=trial.suggest_uniform("optim_weight_decay", 5e-5, 5e-3), ), loss_fcn=nn.MSELoss(), ) csv_logger = create_csv_step_logger( osp.join(study_dir, f"trial_{trial.number}")) algo = TSPred(study_dir, dataset, policy, **algo_hparam, logger=csv_logger) # Train without saving the results algo.train(snapshot_mode="latest", seed=seed) # Evaluate num_init_samples = dataset.window_size _, loss_trn = TSPred.evaluate( policy, dataset.data_trn_inp, dataset.data_trn_targ, windowed=algo.windowed, num_init_samples=num_init_samples, cascaded=False, ) _, loss_tst = TSPred.evaluate( policy, dataset.data_tst_inp, dataset.data_tst_targ, windowed=algo.windowed, num_init_samples=num_init_samples, cascaded=False, ) return loss_trn
def hp_search_optuna(trial: optuna.Trial): global gargs args = gargs # set config config = load_config(args) config['args'] = args logger.info("%s", config) # set path set_path(config) # create accelerator accelerator = Accelerator() config['accelerator'] = accelerator args.device = accelerator.device # set search spaces lr = trial.suggest_loguniform( 'lr', 1e-6, 1e-3) # .suggest_float('lr', 1e-6, 1e-3, log=True) bsz = trial.suggest_categorical('batch_size', [32, 64, 128]) seed = trial.suggest_int('seed', 17, 42) epochs = trial.suggest_int('epochs', 1, args.epoch) # prepare train, valid dataset train_loader, valid_loader = prepare_datasets(config, hp_search_bsz=bsz) with temp_seed(seed): # prepare model model = prepare_model(config) # create optimizer, scheduler, summary writer model, optimizer, scheduler, writer = prepare_others(config, model, train_loader, lr=lr) train_loader = accelerator.prepare(train_loader) valid_loader = accelerator.prepare(valid_loader) config['optimizer'] = optimizer config['scheduler'] = scheduler config['writer'] = writer total_batch_size = args.batch_size * accelerator.num_processes * args.gradient_accumulation_steps logger.info("***** Running training *****") logger.info(f" Num examples = {len(train_loader)}") logger.info(f" Num Epochs = {args.epoch}") logger.info( f" Instantaneous batch size per device = {args.batch_size}") logger.info( f" Total train batch size (w. parallel, distributed & accumulation) = {total_batch_size}" ) logger.info( f" Gradient Accumulation steps = {args.gradient_accumulation_steps}" ) logger.info(f" Total optimization steps = {args.max_train_steps}") early_stopping = EarlyStopping(logger, patience=args.patience, measure=args.measure, verbose=1) best_eval_measure = float( 'inf') if args.measure == 'loss' else -float('inf') for epoch in range(epochs): eval_loss, eval_acc, best_eval_measure = train_epoch( model, config, train_loader, valid_loader, epoch, best_eval_measure) if args.measure == 'loss': eval_measure = eval_loss else: eval_measure = eval_acc # early stopping if early_stopping.validate(eval_measure, measure=args.measure): break if eval_measure == best_eval_measure: early_stopping.reset(best_eval_measure) early_stopping.status() trial.report(eval_acc, epoch) if trial.should_prune(): raise optuna.TrialPruned() return eval_acc
def hp_search_optuna(trial: optuna.Trial): if torch.cuda.is_available(): logger.info("%s", torch.cuda.get_device_name(0)) global gopt opt = gopt # set config config = load_config(opt) config['opt'] = opt logger.info("%s", config) # set path set_path(config) # set search spaces lr = trial.suggest_loguniform( 'lr', 1e-6, 1e-3) # .suggest_float('lr', 1e-6, 1e-3, log=True) bsz = trial.suggest_categorical('batch_size', [32, 64, 128]) seed = trial.suggest_int('seed', 17, 42) epochs = trial.suggest_int('epochs', 1, opt.epoch) # prepare train, valid dataset train_loader, valid_loader = prepare_datasets(config, hp_search_bsz=bsz) with temp_seed(seed): # prepare model model = prepare_model(config) # create optimizer, scheduler, summary writer, scaler optimizer, scheduler, writer, scaler = prepare_osws(config, model, train_loader, lr=lr) config['optimizer'] = optimizer config['scheduler'] = scheduler config['writer'] = writer config['scaler'] = scaler early_stopping = EarlyStopping(logger, patience=opt.patience, measure=opt.measure, verbose=1) best_eval_measure = float( 'inf') if opt.measure == 'loss' else -float('inf') for epoch in range(epochs): eval_loss, eval_acc, best_eval_measure = train_epoch( model, config, train_loader, valid_loader, epoch, best_eval_measure) if opt.measure == 'loss': eval_measure = eval_loss else: eval_measure = eval_acc # early stopping if early_stopping.validate(eval_measure, measure=opt.measure): break if eval_measure == best_eval_measure: early_stopping.reset(best_eval_measure) early_stopping.status() trial.report(eval_acc, epoch) if trial.should_prune(): raise optuna.TrialPruned() return eval_acc
def train_and_eval(trial: optuna.Trial, study_dir: str, seed: int): """ Objective function for the Optuna `Study` to maximize. .. note:: Optuna expects only the `trial` argument, thus we use `functools.partial` to sneak in custom arguments. :param trial: Optuna Trial object for hyper-parameter optimization :param study_dir: the parent directory for all trials in this study :param seed: seed value for the random number generators, pass `None` for no seeding :return: objective function value """ # Synchronize seeds between Optuna trials pyrado.set_seed(seed) # Environment env = QBallBalancerSim(dt=1 / 250.0, max_steps=1500) env = ActNormWrapper(env) # Learning rate scheduler lrs_gamma = trial.suggest_categorical("exp_lr_scheduler_gamma", [None, 0.99, 0.995, 0.999]) if lrs_gamma is not None: lr_sched = lr_scheduler.ExponentialLR lr_sched_hparam = dict(gamma=lrs_gamma) else: lr_sched, lr_sched_hparam = None, dict() # Policy policy = FNNPolicy( spec=env.spec, hidden_sizes=trial.suggest_categorical("hidden_sizes_policy", [(16, 16), (32, 32), (64, 64)]), hidden_nonlin=fcn_from_str( trial.suggest_categorical("hidden_nonlin_policy", ["to_tanh", "to_relu"])), ) # Critic vfcn = FNN( input_size=env.obs_space.flat_dim, output_size=1, hidden_sizes=trial.suggest_categorical("hidden_sizes_critic", [(16, 16), (32, 32), (64, 64)]), hidden_nonlin=fcn_from_str( trial.suggest_categorical("hidden_nonlin_critic", ["to_tanh", "to_relu"])), ) critic_hparam = dict( batch_size=250, gamma=trial.suggest_uniform("gamma_critic", 0.99, 1.0), lamda=trial.suggest_uniform("lamda_critic", 0.95, 1.0), num_epoch=trial.suggest_int("num_epoch_critic", 1, 10), lr=trial.suggest_loguniform("lr_critic", 1e-5, 1e-3), standardize_adv=trial.suggest_categorical("standardize_adv_critic", [True, False]), max_grad_norm=trial.suggest_categorical("max_grad_norm_critic", [None, 1.0, 5.0]), lr_scheduler=lr_sched, lr_scheduler_hparam=lr_sched_hparam, ) critic = GAE(vfcn, **critic_hparam) # Algorithm algo_hparam = dict( num_workers=1, # parallelize via optuna n_jobs max_iter=300, batch_size=250, min_steps=trial.suggest_int("num_rollouts_algo", 10, 30) * env.max_steps, num_epoch=trial.suggest_int("num_epoch_algo", 1, 10), eps_clip=trial.suggest_uniform("eps_clip_algo", 0.05, 0.2), std_init=trial.suggest_uniform("std_init_algo", 0.5, 1.0), lr=trial.suggest_loguniform("lr_algo", 1e-5, 1e-3), max_grad_norm=trial.suggest_categorical("max_grad_norm_algo", [None, 1.0, 5.0]), lr_scheduler=lr_sched, lr_scheduler_hparam=lr_sched_hparam, ) algo = PPO(osp.join(study_dir, f"trial_{trial.number}"), env, policy, critic, **algo_hparam) # Train without saving the results algo.train(snapshot_mode="latest", seed=seed) # Evaluate min_rollouts = 1000 sampler = ParallelRolloutSampler(env, policy, num_workers=1, min_rollouts=min_rollouts) ros = sampler.sample() mean_ret = sum([r.undiscounted_return() for r in ros]) / min_rollouts return mean_ret
def objective(trial: Trial) -> float: time.sleep(1) x = trial.suggest_int("x", -10, 10) return x
def objective(trial: Trial) -> float: x = trial.suggest_int("x", -10, 10) y = trial.suggest_int("y", -10, 10) return x**2 + y**2
def _trial_to_params(trial: Trial): params = { **DEFAULT_PARAMS, # 'gblinear' and 'dart' boosters are too slow "booster": trial.suggest_categorical("booster", ['gbtree']), "seed": trial.suggest_int('seed', 0, 999999), "learning_rate": trial.suggest_loguniform('learning_rate', 0.005, 0.5), "lambda": trial.suggest_loguniform("lambda", 1e-8, 1.0), "alpha": trial.suggest_loguniform("alpha", 1e-8, 1.0) } if params['booster'] == 'gbtree' or params['booster'] == 'dart': sampling_method = trial.suggest_categorical( "sampling_method", ["uniform", "gradient_based"]) if sampling_method == 'uniform': subsample = trial.suggest_discrete_uniform('subsample', .5, 1, .05) else: subsample = trial.suggest_discrete_uniform('subsample', .1, 1, .05) params.update({ "max_depth": trial.suggest_int('max_depth', 2, 25), "sampling_method": sampling_method, "subsample": subsample, "colsample_bytree": trial.suggest_discrete_uniform('colsample_bytree', .20, 1., .01), "colsample_bylevel": trial.suggest_discrete_uniform('colsample_bylevel', .20, 1., .01), "colsample_bynode": trial.suggest_discrete_uniform('colsample_bynode', .20, 1., .01), "gamma": trial.suggest_categorical( "gamma", [0, 0, 0, 0, 0, 0.01, 0.1, 0.2, 0.3, 0.5, 1., 10., 100.]), "min_child_weight": trial.suggest_categorical('min_child_weight', [ 1, 1, 1, 1, 2, 3, 4, 5, 1, 6, 7, 8, 9, 10, 11, 15, 30, 60, 100, 1, 1, 1 ]), "max_delta_step": trial.suggest_categorical("max_delta_step", [0, 0, 0, 0, 0, 1, 2, 5, 8]), "grow_policy": trial.suggest_categorical("grow_policy", ["depthwise", "lossguide"]), "tree_method": "auto" }) if params["booster"] == "dart": params.update({ "sample_type": trial.suggest_categorical("sample_type", ["uniform", "weighted"]), "normalize_type": trial.suggest_categorical("normalize_type", ["tree", "forest"]), "rate_drop": trial.suggest_loguniform("rate_drop", 1e-8, 1.0), "skip_drop": trial.suggest_loguniform("skip_drop", 1e-8, 1.0) }) return params