def get_parameters(train_data, kFold, iterations, save=False, filepath='./result/loss_time_tpe.csv'): def objective(parameters): if save: loss, timepoint = object_function.cv_method( parameters, train_data, kFold, start) timepoint_dic.append(timepoint) loss_dic.append(loss) else: loss = object_function.cv_method(parameters, train_data, kFold) return loss configspace = { 'boosting_type': hp.choice('boosting_type', ['gbdt']), 'objective': hp.choice('objective', ['regression_l2']), # hp.quniform('max_depth', -10, 10, 1), 'num_leaves': scope.int(hp.quniform('num_leaves', 10, 35, 1)), 'min_data_in_leaf': scope.int(hp.quniform('min_data_in_leaf', 1, 12, 1)), 'max_bin': scope.int(hp.quniform('max_bin', 20, 255, 10)), 'lambda_l2': scope.int(hp.quniform('lambda_l2', 0, 70, 5)), 'feature_fraction': hp.uniform('feature_fraction', 0.01, 1), 'min_gain_to_split': hp.uniform('min_gain_to_split', 0.0, 1), 'feature_fraction': hp.uniform('feature_fraction', 0.005, 0.5), 'verbose': hp.choice('verbose', [-1]) } if save: trials = Trials() start = time.time() timepoint_dic = [] loss_dic = [] best = fmin(objective, configspace, algo=tpe.suggest, max_evals=iterations, trials=trials) best_parameters = space_eval(configspace, best) best_loss = trials.best_trial['result']['loss'] save_to_csv.save(filepath, timepoint_dic, loss_dic) else: trials = Trials() best = fmin(objective, configspace, algo=tpe.suggest, max_evals=iterations, trials=trials) best_parameters = space_eval(configspace, best) best_loss = trials.best_trial['result']['loss'] return best_parameters, best_loss
def get_parameters(train_data, kFold, iterations, save=False, filepath='./result/loss_time_optuna.csv'): def objective(trial): num_leaves = trial.suggest_int('num_leaves', 10, 35) min_data_in_leaf = trial.suggest_int('min_data_in_leaf', 1, 12) max_bin = trial.suggest_int('max_bin', 20, 255) feature_fraction = trial.suggest_uniform('feature_fraction', 0.01, 1.0) lambda_l2 = trial.suggest_uniform('lambda_l2', 0, 70.0) min_gain_to_split = trial.suggest_uniform('min_gain_to_split', 0.0, 1.0) learning_rate = trial.suggest_uniform('learning_rate', 0.005, 0.5) parameters = { 'boosting_type': 'gbdt', 'objective': 'regression_l2', 'learning_rate': learning_rate, 'num_leaves': num_leaves, # 'max_depth': max_depth, 'min_data_in_leaf': min_data_in_leaf, # 'num_trees': 10000, 'max_bin': max_bin, # 'bagging_fraction': bagging_fraction, # 'bagging_freq': bagging_freq, 'feature_fraction': feature_fraction, 'verbose': -1, # 'lambda_l1': lambda_l1, 'lambda_l2': lambda_l2, 'min_gain_to_split': min_gain_to_split } if save: loss, timepoint = object_function.cv_method( parameters, train_data, kFold, start) timepoint_dic.append(timepoint) loss_dic.append(loss) else: loss = object_function.cv_method(parameters, train_data, kFold) return loss if save: start = time.time() timepoint_dic = [] loss_dic = [] study = optuna.create_study() study.optimize(objective, n_trials=iterations) save_to_csv.save(filepath, timepoint_dic, loss_dic) else: study = optuna.create_study() study.optimize(objective, n_trials=iterations) return study.best_params, study.best_value
def get_parameters(train_data, kFold, iterations, save=False, filepath = './result/loss_time_bohb.csv'): parser = argparse.ArgumentParser(description='Example 1 - sequential and local execution.') parser.add_argument('--min_budget', type=float, help='Minimum budget used during the optimization.', default=1) parser.add_argument('--max_budget', type=float, help='Maximum budget used during the optimization.', default=1) parser.add_argument('--n_iterations', type=int, help='Number of iterations performed by the optimizer', default=iterations) # max value = 4 # parser.add_argument('--worker', help='Flag to turn this into a worker process', action='store_true') parser.add_argument('--shared_directory', type=str,help='A directory that is accessible for all processes, e.g. a NFS share.', default='./result') # parser.add_argument('--nic_name', type=str, default='lo') args = parser.parse_args() result_logger = hpres.json_result_logger(directory=args.shared_directory, overwrite=True) NS = hpns.NameServer(run_id='BOHB', host='127.0.0.1', port=None) NS.start() w = worker(train_data, kFold, nameserver='127.0.0.1', run_id='BOHB') w.run(background=True) bohb = BOHB(configspace=w.get_configspace(), run_id='BOHB', nameserver='127.0.0.1', min_budget=args.min_budget, max_budget=args.max_budget, result_logger=result_logger ) res = bohb.run(n_iterations=args.n_iterations) bohb.shutdown(shutdown_workers=True) NS.shutdown() id2config = res.get_id2config_mapping() incumbent = res.get_incumbent_id() info = res.get_runs_by_id(incumbent) parameter = id2config[incumbent]['config'] min_error = info[0]['loss'] if save: all_info = res.get_all_runs() timepoint_dic = [] loss_dic = [] for i in all_info: timepoint_dic.append(i['time_stamps']['finished']) loss_dic.append(i['loss']) save_to_csv.save(filepath, timepoint_dic, loss_dic) return parameter, min_error
def get_parameters(train_data, kFold, iterations, save=False, filepath='./result/loss_time_gs.csv'): def func(parameters): if save: loss, timepoint = object_function.cv_method( parameters, train_data, kFold, start) timepoint_dic.append(timepoint) loss_dic.append(loss) else: loss = object_function.cv_method(parameters, train_data, kFold) return -loss def black_box_function(num_leaves, max_bin, min_data_in_leaf, feature_fraction, lambda_l2, min_gain_to_split, learning_rate): parameters = { 'boosting_type': 'gbdt', 'objective': 'regression_l2', 'learning_rate': learning_rate, 'num_leaves': int(num_leaves), # 'max_depth': max_depth, 'min_data_in_leaf': int(min_data_in_leaf), # 'num_trees': 10000, 'max_bin': int(max_bin), # 'bagging_fraction': bagging_fraction, # 'bagging_freq': bagging_freq, 'feature_fraction': feature_fraction, 'verbose': -1, # 'lambda_l1': lambda_l1, 'lambda_l2': lambda_l2, 'min_gain_to_split': min_gain_to_split } return func(parameters) configspace = { 'num_leaves': (10, 35), 'min_data_in_leaf': (1, 12), 'max_bin': (20, 255), 'feature_fraction': (0.01, 1.0), 'lambda_l2': (0, 70), 'min_gain_to_split': (0, 10), 'learning_rate': (0.005, 0.5) } optimizer = BayesianOptimization( f=black_box_function, pbounds=configspace, verbose= 1, # verbose = 1 prints only when a maximum is observed, verbose = 0 is silent random_state=random.randint(1, 100), ) if save: start = time.time() timepoint_dic = [] loss_dic = [] optimizer.maximize(init_points=5, n_iter=iterations - 5, acq='ei') loss = -optimizer.max['target'] params = optimizer.max['params'] save_to_csv.save(filepath, timepoint_dic, loss_dic) else: optimizer.maximize(init_points=5, n_iter=iterations - 5, acq='ei') loss = -optimizer.max['target'] params = optimizer.max['params'] return params, loss