def objective(self, trial: Trial) -> float: suggest: Dict[str, Union[bool, float, int, str]] = { "n_estimators": trial.suggest_int("n_estimators", self.params["n_estimators"][0], self.params["n_estimators"][1]), "max_depth": trial.suggest_int("max_depth", self.params["max_depth"][0], self.params["max_depth"][1]), "max_features": trial.suggest_categorical("max_features", self.params["max_features"]), "min_samples_leaf": trial.suggest_loguniform( "min_samples_leaf", self.params["min_samples_leaf"][0], self.params["min_samples_leaf"][1], ), "min_samples_split": trial.suggest_loguniform( "min_samples_split", self.params["min_samples_split"][0], self.params["min_samples_split"][1], ), "bootstrap": trial.suggest_categorical("bootstrap", (True, False)), } est: BaseEstimator = self.model.__class__(**suggest) return -cross_val_score(estimator=est, X=self.X, y=self.y, cv=self.cv, scoring="neg_mean_squared_error").mean()
def sample_a2c_params(trial: optuna.Trial) -> Dict[str, Any]: """ Sampler for A2C hyperparams. :param trial: :return: """ gamma = trial.suggest_categorical("gamma", [0.9, 0.95, 0.98, 0.99, 0.995, 0.999, 0.9999]) normalize_advantage = trial.suggest_categorical("normalize_advantage", [False, True]) max_grad_norm = trial.suggest_categorical("max_grad_norm", [0.3, 0.5, 0.6, 0.7, 0.8, 0.9, 1, 2, 5]) use_rms_prop = trial.suggest_categorical("use_rms_prop", [False, True]) gae_lambda = trial.suggest_categorical("gae_lambda", [0.8, 0.9, 0.92, 0.95, 0.98, 0.99, 1.0]) n_steps = trial.suggest_categorical("n_steps", [8, 16, 32, 64, 128, 256, 512, 1024, 2048]) lr_schedule = trial.suggest_categorical("lr_schedule", ["linear", "constant"]) learning_rate = trial.suggest_loguniform("lr", 1e-5, 1) ent_coef = trial.suggest_loguniform("ent_coef", 0.00000001, 0.1) vf_coef = trial.suggest_uniform("vf_coef", 0, 1) log_std_init = trial.suggest_uniform("log_std_init", -4, 1) ortho_init = trial.suggest_categorical("ortho_init", [False, True]) net_arch = trial.suggest_categorical("net_arch", ["small", "medium"]) # sde_net_arch = trial.suggest_categorical("sde_net_arch", [None, "tiny", "small"]) # full_std = trial.suggest_categorical("full_std", [False, True]) # activation_fn = trial.suggest_categorical('activation_fn', ['tanh', 'relu', 'elu', 'leaky_relu']) activation_fn = trial.suggest_categorical("activation_fn", ["tanh", "relu"]) if lr_schedule == "linear": learning_rate = linear_schedule(learning_rate) net_arch = { "small": [dict(pi=[64, 64], vf=[64, 64])], "medium": [dict(pi=[256, 256], vf=[256, 256])], }[net_arch] # sde_net_arch = { # None: None, # "tiny": [64], # "small": [64, 64], # }[sde_net_arch] activation_fn = {"tanh": nn.Tanh, "relu": nn.ReLU, "elu": nn.ELU, "leaky_relu": nn.LeakyReLU}[activation_fn] return { "n_steps": n_steps, "gamma": gamma, "gae_lambda": gae_lambda, "learning_rate": learning_rate, "ent_coef": ent_coef, "normalize_advantage": normalize_advantage, "max_grad_norm": max_grad_norm, "use_rms_prop": use_rms_prop, "vf_coef": vf_coef, "policy_kwargs": dict( log_std_init=log_std_init, net_arch=net_arch, # full_std=full_std, activation_fn=activation_fn, # sde_net_arch=sde_net_arch, ortho_init=ortho_init, ), }
def get_boosting_parameter_suggestions(trial: Trial) -> dict: """ Get parameter sample for Boosting (like XGBoost, LightGBM) Args: trial(trial.Trial): Returns: dict: parameter sample generated by trial obj """ return { # L2 正則化 'reg_lambda': trial.suggest_loguniform('reg_lambda', 1e-3, 1e3), # L1 正則化 'reg_alpha': trial.suggest_loguniform('reg_alpha', 1e-3, 1e3), # 弱学習木ごとに使う特徴量の割合 # 0.5 だと全体のうち半分の特徴量を最初に選んで, その範囲内で木を成長させる 'colsample_bytree': trial.suggest_loguniform('colsample_bytree', .5, 1.), # 学習データ全体のうち使用する割合 # colsample とは反対に row 方向にサンプルする 'subsample': trial.suggest_loguniform('subsample', .5, 1.), # 木の最大の深さ # たとえば 5 の時各弱学習木のぶん機は最大でも5に制限される. 'max_depth': trial.suggest_int('max_depth', low=3, high=8), # 末端ノードに含まれる最小のサンプル数 # これを下回るような分割は作れなくなるため, 大きく設定するとより全体の傾向でしか分割ができなくなる # [NOTE]: 数であるのでデータセットの大きさ依存であることに注意 'min_child_weight': trial.suggest_uniform('min_child_weight', low=.5, high=40) }
def objective(self, trial: Trial) -> float: """ Optuna optimization method. Parameters ---------- trial : optuna.Trial An optuna trial object. Returns ------- float The performance evaluation metric value for a single trial. """ suggest: Dict[str, float] = { "vect__max_df": trial.suggest_uniform( name="vect__max_df", low=self.hparams["vectorizer_hparams"]["max_df"][0], high=self.hparams["vectorizer_hparams"]["max_df"][1], ), "decomp__doc_topic_prior": trial.suggest_loguniform( name="decomp__doc_topic_prior", low=self.hparams["lda_hparams"]["alpha"][0], high=self.hparams["lda_hparams"]["alpha"][1], ), "decomp__topic_word_prior": trial.suggest_loguniform( name="decomp__topic_word_prior", low=self.hparams["lda_hparams"]["beta"][0], high=self.hparams["lda_hparams"]["beta"][1], ), "decomp__n_components": trial.suggest_int( name="decomp__n_components", low=self.hparams["lda_hparams"]["num_topics"][0], high=self.hparams["lda_hparams"]["num_topics"][1], ), "decomp__max_iter": trial.suggest_int( name="decomp__max_iter", low=self.hparams["lda_hparams"]["iterations"][0], high=self.hparams["lda_hparams"]["iterations"][1], ), "decomp__learning_decay": trial.suggest_uniform( name="decomp__learning_decay", low=self.hparams["lda_hparams"]["decay"][0], high=self.hparams["lda_hparams"]["decay"][1], ), "decomp__learning_offset": trial.suggest_float( name="decomp__learning_offset", low=self.hparams["lda_hparams"]["offset"][0], high=self.hparams["lda_hparams"]["offset"][1], ), } est: Pipeline = self.pipeline.set_params(**suggest).fit(self.X) return coherence(pipeline=est, X=self.X)
def sample_ppo_params(trial: optuna.Trial) -> Dict[str, Any]: """ Sampler for PPO2 hyperparams. :param trial: :return: """ batch_size = trial.suggest_categorical("batch_size", [8, 16, 32, 64, 128, 256, 512]) n_steps = trial.suggest_categorical("n_steps", [8, 16, 32, 64, 128, 256, 512, 1024, 2048]) gamma = trial.suggest_categorical("gamma", [0.9, 0.95, 0.98, 0.99, 0.995, 0.999, 0.9999]) learning_rate = trial.suggest_loguniform("lr", 1e-5, 1) lr_schedule = "constant" # lr_schedule = trial.suggest_categorical('lr_schedule', ['linear', 'constant']) ent_coef = trial.suggest_loguniform("ent_coef", 0.00000001, 0.1) clip_range = trial.suggest_categorical("clip_range", [0.1, 0.2, 0.3, 0.4]) n_epochs = trial.suggest_categorical("n_epochs", [1, 5, 10, 20]) gae_lambda = trial.suggest_categorical("gae_lambda", [0.8, 0.9, 0.92, 0.95, 0.98, 0.99, 1.0]) max_grad_norm = trial.suggest_categorical("max_grad_norm", [0.3, 0.5, 0.6, 0.7, 0.8, 0.9, 1, 2, 5]) vf_coef = trial.suggest_uniform("vf_coef", 0, 1) net_arch = trial.suggest_categorical("net_arch", ["small", "medium"]) log_std_init = trial.suggest_uniform("log_std_init", -4, 1) sde_sample_freq = trial.suggest_categorical("sde_sample_freq", [-1, 8, 16, 32, 64, 128, 256]) ortho_init = False # ortho_init = trial.suggest_categorical('ortho_init', [False, True]) # activation_fn = trial.suggest_categorical('activation_fn', ['tanh', 'relu', 'elu', 'leaky_relu']) activation_fn = trial.suggest_categorical("activation_fn", ["tanh", "relu"]) # TODO: account when using multiple envs if batch_size > n_steps: batch_size = n_steps if lr_schedule == "linear": learning_rate = linear_schedule(learning_rate) net_arch = { "small": [dict(pi=[64, 64], vf=[64, 64])], "medium": [dict(pi=[256, 256], vf=[256, 256])], }[net_arch] activation_fn = {"tanh": nn.Tanh, "relu": nn.ReLU, "elu": nn.ELU, "leaky_relu": nn.LeakyReLU}[activation_fn] return { "n_steps": n_steps, "batch_size": batch_size, "gamma": gamma, "learning_rate": learning_rate, "ent_coef": ent_coef, "clip_range": clip_range, "n_epochs": n_epochs, "gae_lambda": gae_lambda, "max_grad_norm": max_grad_norm, "vf_coef": vf_coef, "sde_sample_freq": sde_sample_freq, "policy_kwargs": dict( log_std_init=log_std_init, net_arch=net_arch, activation_fn=activation_fn, ortho_init=ortho_init, ), }
def objective(self, trial: Trial) -> float: suggest: Dict[str, Union[float, int, str]] = { "objective": "reg:squarederror", # xgboost v.90 "tree_method": self.tree_method, "n_estimators": trial.suggest_int("n_estimators", self.params["n_estimators"][0], self.params["n_estimators"][1]), "reg_alpha": trial.suggest_loguniform("reg_alpha", self.params["alpha"][0], self.params["alpha"][1]), "reg_lambda": trial.suggest_loguniform("reg_lambda", self.params["lambda"][0], self.params["lambda"][1]), "learning_rate": trial.suggest_loguniform("learning_rate", self.params["learning_rate"][0], self.params["learning_rate"][1]), "max_depth": trial.suggest_int("max_depth", self.params["max_depth"][0], self.params["max_depth"][1]), "min_child_weight": trial.suggest_int( "min_child_weight", self.params["min_child_weight"][0], self.params["min_child_weight"][1], ), "gamma": trial.suggest_loguniform("gamma", self.params["gamma"][0], self.params["gamma"][1]), "subsample": trial.suggest_uniform("subsample", self.params["subsample"][0], self.params["subsample"][1]), "colsample_bytree": trial.suggest_uniform("colsample_bytree", self.params["colsample"][0], self.params["colsample"][1]), "colsample_bylevel": trial.suggest_uniform("colsample_bylevel", self.params["colsample"][0], self.params["colsample"][1]), "colsample_bynode": trial.suggest_uniform("colsample_bynode", self.params["colsample"][0], self.params["colsample"][1]), "grow_policy": trial.suggest_categorical("grow_policy", ["depthwise", "lossguide"]), } est: BaseEstimator = self.model.__class__(**suggest) return -cross_val_score(estimator=est, X=self.X, y=self.y, cv=self.cv, scoring="neg_mean_squared_error").mean()
def __call__(self, report_dir: Path, trial: Trial) -> ArmorDigitPipeline: return ArmorDigitKerasPipeline.from_distillation( teacher_pipeline=self.teacher, conv_blocks=((32, 32), (64, 64)), logs_dir=str(report_dir), dropout=trial.suggest_uniform("dropout", 0, 0.99), lr=trial.suggest_loguniform("lr", 5e-4, 1e-3), dense_size= 1024, # 2 ** round(trial.suggest_discrete_uniform("dense_size_log2", 3, 10, 1)), temperature=trial.suggest_loguniform("temperature", 1, 100), )
def _trial_to_params(trial: Trial): params = {**DEFAULT_PARAMS, # 'gblinear' and 'dart' boosters are too slow "booster": trial.suggest_categorical("booster", ['gbtree']), "seed": trial.suggest_int('seed', 0, 999999), "learning_rate": trial.suggest_loguniform( 'learning_rate', 0.005, 0.5), "lambda": trial.suggest_loguniform("lambda", 1e-8, 1.0), "alpha": trial.suggest_loguniform("alpha", 1e-8, 1.0)} if params['booster'] == 'gbtree' or params['booster'] == 'dart': sampling_method = trial.suggest_categorical( "sampling_method", ["uniform", "gradient_based"]) if sampling_method == 'uniform': subsample = trial.suggest_discrete_uniform('subsample', .5, 1, .05) else: subsample = trial.suggest_discrete_uniform('subsample', .1, 1, .05) params.update({ "max_depth": trial.suggest_int('max_depth', 2, 25), "sampling_method": sampling_method, "subsample": subsample, "colsample_bytree": trial.suggest_discrete_uniform( 'colsample_bytree', .20, 1., .01), "colsample_bylevel": trial.suggest_discrete_uniform( 'colsample_bylevel', .20, 1., .01), "colsample_bynode": trial.suggest_discrete_uniform( 'colsample_bynode', .20, 1., .01), "gamma": trial.suggest_categorical("gamma", [0, 0, 0, 0, 0, 0.01, 0.1, 0.2, 0.3, 0.5, 1., 10., 100.]), "min_child_weight": trial.suggest_categorical('min_child_weight', [1, 1, 1, 1, 2, 3, 4, 5, 1, 6, 7, 8, 9, 10, 11, 15, 30, 60, 100, 1, 1, 1]), "max_delta_step": trial.suggest_categorical("max_delta_step", [0, 0, 0, 0, 0, 1, 2, 5, 8]), "grow_policy": trial.suggest_categorical( "grow_policy", ["depthwise", "lossguide"]), "tree_method": "gpu_hist", "gpu_id": 0}) if params["booster"] == "dart": params.update({ "sample_type": trial.suggest_categorical( "sample_type", ["uniform", "weighted"]), "normalize_type": trial.suggest_categorical( "normalize_type", ["tree", "forest"]), "rate_drop": trial.suggest_loguniform("rate_drop", 1e-8, 1.0), "skip_drop": trial.suggest_loguniform("skip_drop", 1e-8, 1.0)}) return params
def _make_cate_predictions(self, trial: optuna.Trial, i: int) -> np.ndarray: """Make predictions of CATE by a sampled set of hyperparameters.""" # hyparparameters # for control model eta_con = trial.suggest_loguniform('eta_control', 1e-5, 1e-1) min_leaf_con = trial.suggest_int('min_samples_leaf_control', 1, 20) max_depth_con = trial.suggest_int('max_depth_control', 1, 20) subsample_con = trial.suggest_uniform('sub_sample_control', 0.1, 1.0) control_params = { 'n_estimators': 100, 'learning_rate': eta_con, 'min_samples_leaf': min_leaf_con, 'max_depth': max_depth_con, 'subsample': subsample_con, 'random_state': 12345 } # for treated model eta_trt = trial.suggest_loguniform('eta_treat', 1e-5, 1e-1) min_leaf_trt = trial.suggest_int('min_samples_leaf_treat', 1, 20) max_depth_trt = trial.suggest_int('max_depth_treat', 1, 20) subsample_trt = trial.suggest_uniform('sub_sample_treat', 0.1, 1.0) treated_params = { 'n_estimators': 100, 'learning_rate': eta_trt, 'min_samples_leaf': min_leaf_trt, 'max_depth': max_depth_trt, 'subsample': subsample_trt, 'random_state': 12345 } # for overall model eta_ova = trial.suggest_loguniform('eta_overall', 1e-5, 1e-1) min_leaf_ova = trial.suggest_int('min_samples_leaf_overall', 1, 20) max_depth_ova = trial.suggest_int('max_depth_overall', 1, 20) subsample_ova = trial.suggest_uniform('sub_sample_overall', 0.1, 1.0) overall_params = { 'n_estimators': 100, 'learning_rate': eta_ova, 'min_samples_leaf': min_leaf_ova, 'max_depth': max_depth_ova, 'subsample': subsample_ova, 'random_state': 12345 } # define DAL model meta_learner = DAL(controls_model=GBR(**control_params), treated_model=GBR(**treated_params), overall_model=GBR(**overall_params)) meta_learner.fit(X=self.Xtr[i], T=self.Ttr[i], Y=self.Ytr[i]) return meta_learner.effect(X=self.Xval[i])
def objective(self, trial: Trial) -> float: suggest: Dict[str, float] = { "alpha": trial.suggest_loguniform("alpha", self.params["alpha"][0], self.params["alpha"][1]), "l1_ratio": trial.suggest_loguniform("l1_ratio", self.params["l1_ratio"][0], self.params["l1_ratio"][1]), } est: BaseEstimator = self.model.__class__(**suggest) return -cross_val_score(estimator=est, X=self.X, y=self.y, cv=self.cv, scoring="neg_mean_squared_error").mean()
def objective(trial: Trial, X_train, X_test, y_train, y_test) -> float: params = { "booster": "gbtree", "n_estimators": trial.suggest_int("n_estimators", 0, 1000), "max_depth": trial.suggest_int("max_depth", 2, 10), "reg_alpha": trial.suggest_int("reg_alpha", 0, 5), "reg_lambda": trial.suggest_int("reg_lambda", 0, 5), "min_child_weight": trial.suggest_int("min_child_weight", 0, 5), "gamma": trial.suggest_int("gamma", 0, 5), "learning_rate": trial.suggest_loguniform("learning_rate", 0.005, 0.5), "colsample_bytree": trial.suggest_discrete_uniform( "colsample_bytree", 0.1, 1, 0.01 ), "nthread": -1, "use_label_encoder": False, "eval_metric": "logloss", } model = MultiOutputRegressor(XGBRegressor(**params)) model.fit(X_train, y_train) n_scores = cross_val_score( model, X_test, y_test, scoring="neg_mean_absolute_error", cv=3, n_jobs=-1 ) return np.mean(abs(n_scores))
def objective(trial: Trial, data) -> float: params = { "booster": "gbtree", #"tree_method": "gpu_hist", "n_estimators": trial.suggest_int("n_estimators", 0, 1000), "max_depth": trial.suggest_int("max_depth", 2, 10), "reg_alpha": trial.suggest_int("reg_alpha", 0, 5), "reg_lambda": trial.suggest_int("reg_lambda", 0, 5), "min_child_weight": trial.suggest_int("min_child_weight", 0, 5), "gamma": trial.suggest_int("gamma", 0, 5), "learning_rate": trial.suggest_loguniform("learning_rate", 0.005, 0.5), "colsample_bytree": trial.suggest_discrete_uniform("colsample_bytree", 0.1, 1, 0.01), "nthread": -1, "use_label_encoder": False, "eval_metric": "logloss" } mae, y, yhat = walk_forward_validation(params, data, 20) return mae
def func(trial: Trial, x_max: float = 1.0) -> float: x = trial.suggest_uniform("x", -x_max, x_max) y = trial.suggest_loguniform("y", 20, 30) z = trial.suggest_categorical("z", (-1.0, 1.0)) assert isinstance(z, float) return (x - 2)**2 + (y - 25)**2 + z
def objective(trial: optuna.Trial, hparams: Namespace): """Return the objective loss for an optuna trial. Args: trial (optuna.Trial): The optuna trial. hparams (Namespace): The argparse `Namespace` that will be passed to the `LightningModule`. """ hparams.loss = trial.suggest_categorical( 'loss', ['CrossEntropy', 'TripletMargin', 'ArcFace']) if hparams.loss == 'ArcFace': hparams.loss_margin = trial.suggest_uniform('loss_margin', 0.3, 0.9) hparams.sampler = None elif hparams.loss == 'TripletMargin': hparams.loss_margin = trial.suggest_uniform('loss_margin', 0, 0.2) hparams.sampler = 'MPerClass' hparams.m_per_class = 5 hparams.miner = trial.suggest_categorical('miner', ['BatchHard', None]) hparams.optim = 'SGD' hparams.lr = trial.suggest_loguniform('lr', 1e-8, 1e0) max_steps = 1e4 hparams.lr_sched = trial.suggest_categorical('lr_sched', [None, 'OneCycleLr']) if hparams.lr_sched == 'OneCycleLR': hparams.lr_sched_total_steps = max_steps hparams.lr_sched_max_lr = 10 * hparams.lr hparams.use_sample_data = True
def objective(trial: optuna.Trial): negative_rate = trial.suggest_categorical("negative_rate", [10, 20, 30, 40, 50, 60]) in_dim = trial.suggest_categorical('input dim', [100, 200, 500, 1000]) out_dim = in_dim alpha = trial.suggest_loguniform('alpha', 2e-6, 2e-1) return run_experiment(negative_rate, in_dim, out_dim, alpha)
def objective(trial: Trial, train_X, train_y, test_X, test_y) -> float: params = { "n_estimators": trial.suggest_int('n_estimators', 0, 1000), 'max_depth': trial.suggest_int('max_depth', 2, 25), 'reg_alpha': trial.suggest_int('reg_alpha', 0, 10), 'reg_lambda': trial.suggest_int('reg_lambda', 0, 10), 'min_child_weight': trial.suggest_int('min_child_weight', 0, 20), 'gamma': trial.suggest_int('gamma', 0, 5), 'learning_rate': trial.suggest_loguniform('learning_rate', 0.0001, 0.5), 'colsample_bytree': trial.suggest_discrete_uniform('colsample_bytree', 0.1, 1, 0.01), 'nthread': -1, 'scale_pos_weight': trial.suggest_int('scale_pos_weight', 1, 10), 'random_state': trial.suggest_int('random_state', 1, 30), 'subsample': trial.suggest_float('subsample', 0.5, 0.9) } model = XGBClassifier(**params) model.fit(train_X, train_y) return cross_val_score(model, test_X, test_y).mean()
def _suggest(self, trial: optuna.Trial, v: problem.Var) -> float: if v.name in trial.params: if isinstance(trial.params[v.name], str): assert isinstance(v.range, problem.CategoricalRange) return v.range.choices.index(trial.params[v.name]) else: return trial.params[v.name] if isinstance(v.range, problem.ContinuousRange): if v.distribution == problem.Distribution.UNIFORM: return trial.suggest_uniform(v.name, v.range.low, v.range.high) elif v.distribution == problem.Distribution.LOG_UNIFORM: return trial.suggest_loguniform(v.name, v.range.low, v.range.high) elif isinstance(v.range, problem.DiscreteRange): if self._use_discrete_uniform: return trial.suggest_discrete_uniform(v.name, v.range.low, v.range.high - 1, q=1) elif v.distribution == problem.Distribution.LOG_UNIFORM: return trial.suggest_int(v.name, v.range.low, v.range.high - 1, log=True) else: return trial.suggest_int(v.name, v.range.low, v.range.high - 1) elif isinstance(v.range, problem.CategoricalRange): category = trial.suggest_categorical(v.name, v.range.choices) return v.range.choices.index(category) raise ValueError("Unsupported parameter: {}".format(v))
def sample_sac_params(trial: optuna.Trial) -> Dict[str, Any]: """ Sampler for SAC hyperparams. :param trial: :return: """ gamma = trial.suggest_categorical("gamma", [0.9, 0.95, 0.98, 0.99, 0.995, 0.999, 0.9999]) learning_rate = trial.suggest_loguniform("learning_rate", 1e-5, 1) batch_size = trial.suggest_categorical("batch_size", [16, 32, 64, 128, 256, 512, 1024, 2048]) buffer_size = trial.suggest_categorical("buffer_size", [int(1e4), int(1e5), int(1e6)]) learning_starts = trial.suggest_categorical("learning_starts", [0, 1000, 10000, 20000]) # train_freq = trial.suggest_categorical('train_freq', [1, 10, 100, 300]) train_freq = trial.suggest_categorical("train_freq", [1, 4, 8, 16, 32, 64, 128, 256, 512]) # Polyak coeff tau = trial.suggest_categorical("tau", [0.001, 0.005, 0.01, 0.02, 0.05, 0.08]) # gradient_steps takes too much time # gradient_steps = trial.suggest_categorical('gradient_steps', [1, 100, 300]) gradient_steps = train_freq # ent_coef = trial.suggest_categorical('ent_coef', ['auto', 0.5, 0.1, 0.05, 0.01, 0.0001]) ent_coef = "auto" # You can comment that out when not using gSDE log_std_init = trial.suggest_uniform("log_std_init", -4, 1) # NOTE: Add "verybig" to net_arch when tuning HER net_arch = trial.suggest_categorical("net_arch", ["small", "medium", "big"]) # activation_fn = trial.suggest_categorical('activation_fn', [nn.Tanh, nn.ReLU, nn.ELU, nn.LeakyReLU]) net_arch = { "small": [64, 64], "medium": [256, 256], "big": [400, 300], # Uncomment for tuning HER # "large": [256, 256, 256], # "verybig": [512, 512, 512], }[net_arch] target_entropy = "auto" # if ent_coef == 'auto': # # target_entropy = trial.suggest_categorical('target_entropy', ['auto', 5, 1, 0, -1, -5, -10, -20, -50]) # target_entropy = trial.suggest_uniform('target_entropy', -10, 10) hyperparams = { "gamma": gamma, "learning_rate": learning_rate, "batch_size": batch_size, "buffer_size": buffer_size, "learning_starts": learning_starts, "train_freq": train_freq, "gradient_steps": gradient_steps, "ent_coef": ent_coef, "tau": tau, "target_entropy": target_entropy, "policy_kwargs": dict(log_std_init=log_std_init, net_arch=net_arch), } if trial.using_her_replay_buffer: hyperparams = sample_her_params(trial, hyperparams) return hyperparams
def sample_ddpg_params(trial: optuna.Trial) -> Dict[str, Any]: """ Sampler for DDPG hyperparams. :param trial: :return: """ gamma = trial.suggest_categorical("gamma", [0.9, 0.95, 0.98, 0.99, 0.995, 0.999, 0.9999]) learning_rate = trial.suggest_loguniform("lr", 1e-5, 1) batch_size = trial.suggest_categorical("batch_size", [16, 32, 64, 100, 128, 256, 512, 1024, 2048]) buffer_size = trial.suggest_categorical("buffer_size", [int(1e4), int(1e5), int(1e6)]) # Polyak coeff tau = trial.suggest_categorical("tau", [0.001, 0.005, 0.01, 0.02]) episodic = trial.suggest_categorical("episodic", [True, False]) if episodic: n_episodes_rollout = 1 train_freq, gradient_steps = -1, -1 else: train_freq = trial.suggest_categorical("train_freq", [1, 16, 128, 256, 1000, 2000]) gradient_steps = train_freq n_episodes_rollout = -1 noise_type = trial.suggest_categorical("noise_type", ["ornstein-uhlenbeck", "normal", None]) noise_std = trial.suggest_uniform("noise_std", 0, 1) # NOTE: Add "verybig" to net_arch when tuning HER (see TD3) net_arch = trial.suggest_categorical("net_arch", ["small", "medium", "big"]) # activation_fn = trial.suggest_categorical('activation_fn', [nn.Tanh, nn.ReLU, nn.ELU, nn.LeakyReLU]) net_arch = { "small": [64, 64], "medium": [256, 256], "big": [400, 300], }[net_arch] hyperparams = { "gamma": gamma, "tau": tau, "learning_rate": learning_rate, "batch_size": batch_size, "buffer_size": buffer_size, "train_freq": train_freq, "gradient_steps": gradient_steps, "n_episodes_rollout": n_episodes_rollout, "policy_kwargs": dict(net_arch=net_arch), } if noise_type == "normal": hyperparams["action_noise"] = NormalActionNoise( mean=np.zeros(trial.n_actions), sigma=noise_std * np.ones(trial.n_actions) ) elif noise_type == "ornstein-uhlenbeck": hyperparams["action_noise"] = OrnsteinUhlenbeckActionNoise( mean=np.zeros(trial.n_actions), sigma=noise_std * np.ones(trial.n_actions) ) return hyperparams
def suggest_kwargs( trial: Trial, prefix: str, kwargs_ranges: Mapping[str, Any], kwargs: Optional[Mapping[str, Any]] = None, ): _kwargs = {} if kwargs: _kwargs.update(kwargs) for name, info in kwargs_ranges.items(): if name in _kwargs: continue # has been set by default, won't be suggested prefixed_name = f'{prefix}.{name}' dtype, low, high = info['type'], info.get('low'), info.get('high') if dtype in {int, 'int'}: q, scale = info.get('q'), info.get('scale') if scale == 'power_two': _kwargs[name] = suggest_discrete_power_two_int( trial=trial, name=prefixed_name, low=low, high=high, ) elif q is not None: _kwargs[name] = suggest_discrete_uniform_int( trial=trial, name=prefixed_name, low=low, high=high, q=q, ) else: _kwargs[name] = trial.suggest_int(name=prefixed_name, low=low, high=high) elif dtype in {float, 'float'}: if info.get('scale') == 'log': _kwargs[name] = trial.suggest_loguniform(name=prefixed_name, low=low, high=high) else: _kwargs[name] = trial.suggest_uniform(name=prefixed_name, low=low, high=high) elif dtype == 'categorical': choices = info['choices'] _kwargs[name] = trial.suggest_categorical(name=prefixed_name, choices=choices) elif dtype in {bool, 'bool'}: _kwargs[name] = trial.suggest_categorical(name=prefixed_name, choices=[True, False]) else: logger.warning( f'Unhandled data type ({dtype}) for parameter {name}') return _kwargs
def objective(trial: optuna.Trial): log_dir = os.path.join(args.log_dir, 'trial_{}'.format(trial.number)) checkpoint_callback = pl.callbacks.ModelCheckpoint(dirpath=log_dir, monitor='val_ce', mode='min') data = SemEvalDataModule(path_train=args.path_train, path_val=args.path_val, batch_size=trial.suggest_categorical( 'batch_size', choices=[16, 32, 64]), num_workers=args.workers) data.prepare_data() data.setup('fit') epochs = trial.suggest_categorical('epochs', choices=[3, 4, 5]) lr_bert = trial.suggest_loguniform('lr_bert', 1e-6, 1e-4) lr_class = trial.suggest_loguniform('lr_class', 1e-5, 1e-3) weight_decay = trial.suggest_loguniform('weight_decay', 1e-3, 1e-1) total_steps = epochs * len(data.data_train) effective_steps = total_steps // (min(args.gpus, 1) * args.num_nodes * args.accumulate_grad_batches) model = SentBert(out_classes=3, lr_bert=lr_bert, lr_class=lr_class, weight_decay=weight_decay, train_steps=effective_steps) metrics_callback = MetricsCallback() pruning_callback = PyTorchLightningPruningCallback(trial, monitor='val_ce') trainer = pl.Trainer.from_argparse_args(args, default_root_dir=args.log_dir, max_epochs=epochs, checkpoint_callback=True, accelerator='ddp', auto_select_gpus=True, num_sanity_val_steps=0, profiler='simple', callbacks=[ checkpoint_callback, metrics_callback, pruning_callback ]) trainer.fit(model=model, datamodule=data) return metrics_callback.metrics[-1]['val_ce'].item()
def objective(trial: opt.Trial): # only test dropping sozio economic facotrs drop_sozioeco = trial.suggest_categorical("drop_eco", [True, False]) # rest of preprocessing keeps default values # categrorial encoding, try identical encoders for all columns (for now) enc_name = trial.suggest_categorical("encoder", ["one-hot", "woe", "binary"]) enc = encoders[enc_name] x_tr = enc.fit_transform(x, y) param = { "verbosity": 0, "obective": "binary:logistic", "eval_metric": ["aucpr"], "max_depth": trial.suggest_int("max_depth", 4, 8), "booster": "gbtree", "lambda": trial.suggest_float("lambda", 1e-7, 0.5, log=True), "alpha": trial.suggest_float("alpha", 1e-8, 0.5, log=True), "subsample": trial.suggest_uniform("subsample", 0.5, 1.0), "eta": trial.suggest_loguniform("lr", 1e-5, 0.2), "gamma": trial.suggest_loguniform("gamma", 1e-8, 1.0), "grow_policy": trial.suggest_categorical("grow_policy", ["depthwise", "lossguide"]) } dtrain = xgb.DMatrix(x_tr, label=y) cb = optuna.integration.XGBoostPruningCallback( trial, observation_key='test-aucpr') scores = xgb.cv(param, dtrain, nfold=5, stratified=True) test_aucpr = score['test-aucpr-mean'].values[-1] return test_aucpr
def add_suggest(trial: optuna.Trial, user_attrs={}): """ Add hyperparam ranges to an optuna trial and typical user attrs. Usage: trial = optuna.trial.FixedTrial( params={ 'hidden_size': 128, } ) trial = add_suggest(trial) trainer = pl.Trainer() model = LSTM_PL(dict(**trial.params, **trial.user_attrs), dataset_train, dataset_test, cache_base_path, norm) trainer.fit(model) """ trial.suggest_loguniform("learning_rate", 1e-6, 1e-2) trial.suggest_uniform("attention_dropout", 0, 0.75) # we must have nhead<==hidden_size # so nhead_power.max()<==hidden_size_power.min() trial.suggest_discrete_uniform("hidden_size_power", 4, 10, 1) trial.suggest_discrete_uniform("hidden_out_size_power", 4, 9, 1) trial.suggest_discrete_uniform("nhead_power", 1, 4, 1) trial.suggest_int("nlayers", 1, 12) trial.suggest_categorical("use_lstm", [False, True]) trial.suggest_categorical("agg", ['last', 'max', 'mean', 'all']) user_attrs_default = { "batch_size": 16, "grad_clip": 40, "max_nb_epochs": 200, "num_workers": 4, "num_extra_target": 24 * 4, "vis_i": "670", "num_context": 24 * 4, "input_size": 18, "input_size_decoder": 17, "context_in_target": False, "output_size": 1, "patience": 3, 'min_std': 0.005, } [trial.set_user_attr(k, v) for k, v in user_attrs_default.items()] [trial.set_user_attr(k, v) for k, v in user_attrs.items()] return trial
def sample_dqn_params(trial: optuna.Trial) -> Dict[str, Any]: """ Sampler for DQN hyperparams. :param trial: :return: """ gamma = trial.suggest_categorical( "gamma", [0.9, 0.95, 0.98, 0.99, 0.995, 0.999, 0.9999]) learning_rate = trial.suggest_loguniform("learning_rate", 1e-5, 1) batch_size = trial.suggest_categorical("batch_size", [16, 32, 64, 100, 128, 256, 512]) buffer_size = trial.suggest_categorical( "buffer_size", [int(1e4), int(5e4), int(1e5), int(1e6)]) exploration_final_eps = trial.suggest_uniform("exploration_final_eps", 0, 0.2) exploration_fraction = trial.suggest_uniform("exploration_fraction", 0, 0.5) target_update_interval = trial.suggest_categorical( "target_update_interval", [1, 1000, 5000, 10000, 15000, 20000]) learning_starts = trial.suggest_categorical("learning_starts", [0, 1000, 5000, 10000, 20000]) train_freq = trial.suggest_categorical("train_freq", [1, 4, 8, 16, 128, 256, 1000]) subsample_steps = trial.suggest_categorical("subsample_steps", [1, 2, 4, 8]) gradient_steps = max(train_freq // subsample_steps, 1) net_arch = trial.suggest_categorical("net_arch", ["tiny", "small", "medium"]) net_arch = { "tiny": [64], "small": [64, 64], "medium": [256, 256] }[net_arch] hyperparams = { "gamma": gamma, "learning_rate": learning_rate, "batch_size": batch_size, "buffer_size": buffer_size, "train_freq": train_freq, "gradient_steps": gradient_steps, "exploration_fraction": exploration_fraction, "exploration_final_eps": exploration_final_eps, "target_update_interval": target_update_interval, "learning_starts": learning_starts, "policy_kwargs": dict(net_arch=net_arch), } if trial.using_her_replay_buffer: hyperparams = sample_her_params(trial, hyperparams) return hyperparams
def sample_td3_params(trial: optuna.Trial) -> Dict[str, Any]: """ Sampler for TD3 hyperparams. :param trial: :return: """ gamma = trial.suggest_categorical("gamma", [0.9, 0.95, 0.98, 0.99, 0.995, 0.999, 0.9999]) learning_rate = trial.suggest_loguniform("learning_rate", 1e-5, 1) batch_size = trial.suggest_categorical("batch_size", [16, 32, 64, 100, 128, 256, 512, 1024, 2048]) buffer_size = trial.suggest_categorical("buffer_size", [int(1e4), int(1e5), int(1e6)]) # Polyak coeff tau = trial.suggest_categorical("tau", [0.001, 0.005, 0.01, 0.02, 0.05, 0.08]) train_freq = trial.suggest_categorical("train_freq", [1, 4, 8, 16, 32, 64, 128, 256, 512]) gradient_steps = train_freq noise_type = trial.suggest_categorical("noise_type", ["ornstein-uhlenbeck", "normal", None]) noise_std = trial.suggest_uniform("noise_std", 0, 1) # NOTE: Add "verybig" to net_arch when tuning HER net_arch = trial.suggest_categorical("net_arch", ["small", "medium", "big"]) # activation_fn = trial.suggest_categorical('activation_fn', [nn.Tanh, nn.ReLU, nn.ELU, nn.LeakyReLU]) net_arch = { "small": [64, 64], "medium": [256, 256], "big": [400, 300], # Uncomment for tuning HER # "verybig": [256, 256, 256], }[net_arch] hyperparams = { "gamma": gamma, "learning_rate": learning_rate, "batch_size": batch_size, "buffer_size": buffer_size, "train_freq": train_freq, "gradient_steps": gradient_steps, "policy_kwargs": dict(net_arch=net_arch), "tau": tau, } if noise_type == "normal": hyperparams["action_noise"] = NormalActionNoise( mean=np.zeros(trial.n_actions), sigma=noise_std * np.ones(trial.n_actions) ) elif noise_type == "ornstein-uhlenbeck": hyperparams["action_noise"] = OrnsteinUhlenbeckActionNoise( mean=np.zeros(trial.n_actions), sigma=noise_std * np.ones(trial.n_actions) ) if trial.using_her_replay_buffer: hyperparams = sample_her_params(trial, hyperparams) return hyperparams
def objective(self, trial: Trial) -> float: alpha: float = trial.suggest_loguniform("alpha", self.params["alpha"][0], self.params["alpha"][1]) est: BaseEstimator = self.model.__class__(alpha=alpha) return -cross_val_score(estimator=est, X=self.X, y=self.y, cv=self.cv, scoring="neg_mean_squared_error").mean()
def svc(trial: optuna.Trial): svc_c = trial.suggest_loguniform("svc_c", 1e-10, 1e10) kernel = trial.suggest_categorical('kernel', ['linear', 'poly', 'rbf', 'sigmoid']) degree = trial.suggest_int('degree', 1, 10) classifier = sklearn.svm.SVC(C=svc_c, gamma="auto", kernel=kernel, degree=degree) return evaluate_classifier(classifier)
def cnn_pipeline_factory(report_dir: Path, trial: Trial) -> ArmorDigitPipeline: return ArmorDigitKerasPipeline.from_custom_cnn( input_size=32, conv_blocks=((32, 32), (64, 64)), logs_dir=str(report_dir), dropout=trial.suggest_uniform("dropout", 0, 0.99), lr=trial.suggest_loguniform("lr", 1e-5, 1e-1), dense_size=2**round( trial.suggest_discrete_uniform("dense_size_log2", 3, 10, 1)), )
def get_trainer_and_reporter( trial:Trial, model:CbLossClassifier, iter_test:iterators.SerialIterator, iter_train:iterators.SerialIterator, batch_converter, args, device=0, best_params={}): if best_params != {}:# 過去の best_param 使う場合 learning_rate = best_params['learning_rate'] else: learning_rate = trial.suggest_loguniform('learning_rate', 1e-5, 1e-1) grad_clipping = trial.suggest_uniform('grad_clipping',0,1.0) optimizer = optimizers.SGD(lr=learning_rate) optimizer.setup(model) optimizer.add_hook(optimizer_hooks.GradientClipping(threshold=grad_clipping)) updater = training.StandardUpdater( iter_train, optimizer, device=device, converter=batch_converter ) early_trigger = training.triggers.EarlyStoppingTrigger( check_trigger=(1, "epoch"), monitor="validation/main/accuracy", patients=3, mode="max", max_trigger=(args.epoch, "epoch") ) trainer = training.Trainer(updater,early_trigger,out='optuna') trainer.extend(extensions.Evaluator(iter_test, model,device=device,converter=batch_converter)) snapshot_writer = training.extensions.snapshot_writers.ThreadQueueWriter() trainer.extend(training.extensions.snapshot_object( target=model, filename='model_{}.npz'.format(args.desc), writer=snapshot_writer),trigger=(10,'epoch')) reporter = extensions.LogReport() trainer.extend(reporter) trainer.extend(integration.ChainerPruningExtension( trial,args.pruning_key,(args.pruning_trigger_epoch,'epoch'))) iter_test.reset() iter_train.reset() return trainer,reporter
def suggest_kwargs( trial: Trial, prefix: str, kwargs_ranges: Mapping[str, Any], kwargs: Optional[Mapping[str, Any]] = None, ) -> Mapping[str, Any]: _kwargs: Dict[str, Any] = {} if kwargs: _kwargs.update(kwargs) for name, info in kwargs_ranges.items(): if name in _kwargs: continue # has been set by default, won't be suggested prefixed_name = f'{prefix}.{name}' dtype, low, high = info['type'], info.get('low'), info.get('high') if dtype in {int, 'int'}: scale = info.get('scale') if scale in {'power_two', 'power'}: _kwargs[name] = suggest_discrete_power_int( trial=trial, name=prefixed_name, low=low, high=high, base=info.get('q') or info.get('base') or 2, ) elif scale is None or scale == 'linear': # get log from info - could either be a boolean or string log = info.get('log') in {True, 'TRUE', 'True', 'true', 't', 'YES', 'Yes', 'yes', 'y'} _kwargs[name] = trial.suggest_int( name=prefixed_name, low=low, high=high, step=info.get('q') or info.get('step') or 1, log=log, ) else: logger.warning(f'Unhandled scale {scale} for parameter {name} of data type {dtype}') elif dtype in {float, 'float'}: if info.get('scale') == 'log': _kwargs[name] = trial.suggest_loguniform(name=prefixed_name, low=low, high=high) else: _kwargs[name] = trial.suggest_uniform(name=prefixed_name, low=low, high=high) elif dtype == 'categorical': choices = info['choices'] _kwargs[name] = trial.suggest_categorical(name=prefixed_name, choices=choices) elif dtype in {bool, 'bool'}: _kwargs[name] = trial.suggest_categorical(name=prefixed_name, choices=[True, False]) else: logger.warning(f'Unhandled data type ({dtype}) for parameter {name}') return _kwargs