def main(): selector = GroupCombinationExplorer( base_cols=["v1", "v2"], list_groups={ "group1": ["v3", "v4"], "group2": ["v5"], }, ) df = pd.DataFrame({ "v1": [ 1, 1, 1, 1, ], "v2": [ 2, 2, 2, 2, ], "v3": [ 2, 2, 2, 2, ], "v4": [ 2, 2, 2, 2, ], "v5": [ 2, 2, 2, 2, ], "target": [ 0, 0, 0, 1, ], }) # Optimize with Optuna! study = optuna.create_study(direction="maximize", sampler=GridSampler( selector.get_search_space())) study.optimize(partial(objective, df, selector), n_trials=selector.gridsearch_space_size()) # Load from the best trial selector.from_trial(study.best_trial) print("Selected cols:", selector.get_selected_cols())
def start_tuning(train_data, valid_data, model_path:str ,param_path:str, sampler='TPE'): if(sampler == 'TPE'): print('selecting tpe sampler') study = optuna.create_study(direction="maximize", sampler=TPESampler()) study.optimize(lambda trial: objective(train_data, valid_data,model_path, trial), n_trials=30) elif(sampler == 'Grid'): print('selecting grid search sampler') #search_space = {"lrmain": [5e-5, 3e-5, 2e-5], "drop_out": [0.1]} search_space = {"lrmain": [5e-5, 4e-5, 3e-5, 2e-5], "drop_out": [0.0, 0.1, 0.2, 0.3]} study = optuna.create_study(direction="maximize", sampler=GridSampler(search_space)) study.optimize(lambda trial:objective(train_data, valid_data, model_path, trial), n_trials=4 * 4 ) elif(sampler == 'Grid_with_two_lr'): print('selecting grid search sampler 2lr') search_space = {"lrmain": [5e-5, 4e-5, 3e-5, 2e-5],'lrclassifier': [1e-3, 1e-2, 1e-1], "drop_out": [0.0, 0.1,0.2,0.3]} study = optuna.create_study(direction="maximize", sampler=GridSampler(search_space)) study.optimize(lambda trial: objective(train_data, valid_data, model_path, trial), n_trials=4 * 3 * 4) best_params = study.best_params save_json(best_params, param_path) return best_params, study.best_trial
def __init__(self, engine, mode='fast_ai', epochs_warmup=2, max_lr=0.03, min_lr=4e-3, step=0.001, num_epochs=3, path_to_savefig='', seed = 5, stop_callback=None, smooth_f=0.01, n_trials=30, **kwargs) -> None: r"""A pipeline for learning rate search. Args: mode (str, optional): mode for learning rate finder, "fast_ai", "grid_search", "TPE". Default is "fast_ai". max_lr (float): upper bound for leaning rate min_lr (float): lower bound for leaning rate step (float, optional): number of step for learning rate searching space. Default is 1e-3 num_epochs (int, optional): number of epochs to train for each learning rate. Default is 3 pretrained (bool): whether or not the model is pretrained path_to_savefig (str): if path given save plot loss/lr (only for fast_ai mode). Default: '' """ self.engine = engine main_model_name = engine.get_model_names(None)[0] self.model = engine.models[main_model_name] self.optimizer = engine.optims[main_model_name] self.model_device = next(self.model.parameters()).device self.mode = mode self.min_lr = min_lr self.max_lr = max_lr self.step = step self.n_trials = n_trials self.num_epochs = num_epochs self.path_to_savefig = path_to_savefig self.seed = seed self.stop_callback = stop_callback self.epochs_warmup = epochs_warmup self.enable_sam = engine.enable_sam self.smooth_f = smooth_f self.engine_cfg = Dict(min_lr=min_lr, max_lr=max_lr, mode=mode, step=step) search_space = np.arange(min_lr, max_lr, step) self.samplers = {'grid_search': GridSampler(search_space={'lr': search_space}), 'TPE': TPESampler(n_startup_trials=5, seed=True)}
def __init__(self, argument, grid_search_space=None): self.name = '' self.argument = argument self.grid_search_space = grid_search_space if self.argument.sampler == "grid": assert self.grid_search_space is not None, "grid search spaceを指定してください" self.sampler = GridSampler(self.grid_search_space) self.n_trials = 1 for value in self.grid_search_space.values(): self.n_trials *= len(value) # トライアル回数制限 # if self.n_trials > self.argument.n_trials: # self.n_trials = self.argument.n_trials self.obj_func_name = self.objective_grid elif self.argument.sampler == "random": self.sampler = RandomSampler(seed=self.argument.seed) self.n_trials = self.argument.n_trials self.obj_func_name = self.objective_no_grid else: self.sampler = TPESampler(**TPESampler.hyperopt_parameters(), seed=self.argument.seed) self.n_trials = self.argument.n_trials self.obj_func_name = self.objective_no_grid if self.n_trials == 1: try: mlflow.set_experiment(self.argument.experiment) except Exception as e: print(e) else: try: mlflow.set_experiment( self.argument.experiment + "_" + datetime.now().strftime('%Y%m%d_%H:%M:%S')) except Exception as e: print(e) self.study = optuna.create_study(sampler=self.sampler)
def main(): parser = argparse.ArgumentParser() parser.add_argument('--dataset', required=True) parser.add_argument('--seed', required=False, type=int, default=1) parser.add_argument('--run', required=True, choices=['grid', 'hpo', 'hpo-tpe']) args = parser.parse_args() print(f'Dataset = {args.dataset}') data_folder = get_data(path_dir, args.dataset) inputs = data_folder.X labels = data_folder.y folds = data_folder.folds labels = preprocess_data(labels) result_folder = '../../../../result/simulated/' if args.run == 'hpo': run_nested_cv( inputs, labels, folds, seed=args.seed, dataset_name=args.dataset, search_obj=HPO(), n_trials=100, distributions=['normal', 'logistic', 'extreme'], sampler=RandomSampler(seed=args.seed), model_file_fmt=result_folder + '{dataset_name}/{distribution}-fold{test_fold_id}-model.json', trial_log_fmt=result_folder + '{dataset_name}/{distribution}-fold{test_fold_id}.json') elif args.run == 'hpo-tpe': run_nested_cv( inputs, labels, folds, seed=args.seed, dataset_name=args.dataset, search_obj=HPO(), n_trials=100, distributions=['normal', 'logistic', 'extreme'], sampler=TPESampler(seed=args.seed), model_file_fmt= '{dataset_name}/tpe-{distribution}-fold{test_fold_id}-model.json', trial_log_fmt= '{dataset_name}/tpe-{distribution}-fold{test_fold_id}.json') elif args.run == 'grid': grid = { 'learning_rate': [0.001, 0.01, 0.1, 1.0], 'max_depth': [2, 3, 4, 5, 6, 7, 8, 9, 10], 'min_child_weight': [0.1, 1.0, 10.0, 100.0], 'reg_alpha': [0.0001, 0.001, 0.01, 0.1, 1.0, 10.0, 100.0], 'reg_lambda': [0.0001, 0.001, 0.01, 0.1, 1.0, 10.0, 100.0], 'aft_loss_distribution_scale': [1.0, 10.0, 100.0] } run_nested_cv( inputs, labels, folds, seed=args.seed, dataset_name=args.dataset, search_obj=Grid(), n_trials=100, distributions=['normal'], sampler=GridSampler(search_space=grid), model_file_fmt=result_folder + '{dataset_name}/grid-{distribution}-fold{test_fold_id}-model.json', trial_log_fmt=result_folder + '{dataset_name}/grid-{distribution}-fold{test_fold_id}.json') else: raise ValueError(f'Unknown run: {args.run}')