def optimize(): global ITERATIONS ITERATIONS = 1 MAX_EVALS = 10 from bayes_opt import BayesianOptimization # Bounded region of parameter space pbounds = {'alpha': (0.001, 0.999), 'beta': (0.001, 1.5)} optimizer = BayesianOptimization( f=objective, pbounds=pbounds, random_state=1, ) try: from bayes_opt.util import load_logs load_logs(optimizer, logs=["logs.json"]) print("Rerunning from {} trials".format(len(optimizer.res))) except: print("Starting from scratch: new trials.") from bayes_opt.observer import JSONLogger from bayes_opt.event import Events logger = JSONLogger(path="logs.json") optimizer.subscribe(Events.OPTMIZATION_STEP, logger) # Results will be saved in ./logs.json optimizer.maximize( init_points=20, #max(0, 5 - len(optimizer.res)), n_iter=MAX_EVALS, ) print(optimizer.max)
def optimize(self, random_state=5, verbose=2, init_points=10, n_iter=5, acq='poi'): """Function to perform the actual optimization.""" for hyp in itertools.product(*self._category_para.values()): self._hyd_d = dict(zip(self._category_para.keys(), hyp)) print("category_para: ", self._hyd_d) optimizer = BayesianOptimization(f=self.optimization_func, pbounds=self._search_space, random_state=random_state) log_path = "gemben/intermediate/bays_opt/" try: os.makedirs(log_path) except: pass logger = JSONLogger(path=log_path + "logs.json") optimizer.subscribe(Events.OPTMIZATION_STEP, logger) optimizer.maximize(init_points=init_points, n_iter=n_iter, acq=acq, kappa=2.576, xi=1.0) print("category_para: ", ) print("Final result:", optimizer.max) return 0
def run_bayesian_optimization(): file = open('output_raw.log', 'w+') file.close() # Bounds for Pressure (10 - 140 torr) and Scintillator Distance (120 - 240 mm) # pbounds = {'x': (10, 140), 'y': (120, 240), 'g': (1, 3), 'w': (1, 5)} pbounds = { 'x': (10, 140), 'y': (120, 240), 'g': (1, 4), 'w': (1, 5), 'gz': (20, 200) } # Bayesian Optimizer # Verbose = 0: Silent # Verbose = 1: Prints only when a maximum is observed # Verbose = 2 optimizer = BayesianOptimization(f=black_box, pbounds=pbounds, verbose=2, random_state=42) # For saving progress logger = JSONLogger(path="./logs.json") optimizer.subscribe(Events.OPTMIZATION_STEP, logger) # n_iter: How many steps of bayesian optimization you want to perform # init_points: How many steps of random exploration you want to perform optimizer.maximize(init_points=80, n_iter=60) for i, res in enumerate(optimizer.res): print("Iteration {}: \n\t{}".format(i, res)) print(optimizer.max)
def optimize_bayes_wo_param(parse_model_param): def crossval(*args_model, **kwargs_model): estimator = parse_model_param(*args_model, **kwargs_model) return cross_val_score(estimator, X=X, y=y, *args_eval, **kwargs_eval).mean() optimizer = BayesianOptimization(crossval, pbounds=pbounds) optimizer_log_dir = (LOG_DIR / log_dir) if optimizer_log_dir.exists(): all_log = [str(path) for path in optimizer_log_dir.iterdir()] load_logs(optimizer, logs=all_log) filename = 'log_{}.json'.format(len(all_log)) else: optimizer_log_dir.mkdir() filename = 'log_0.json' logger = JSONLogger(path=str(optimizer_log_dir / filename)) optimizer.subscribe(Events.OPTMIZATION_STEP, logger) optimizer.maximize(init_points, n_iter, kappa=kappa, acq=acq) best_model = parse_model_param(**optimizer.max['params']) best_model.fit(X=X, y=y) return best_model
def run(self, n_iter=5, save_log=False): BoParams = { 'max_depth': (5, 16), 'min_child_weight': (1, 10), 'gamma': (0, 1), 'subsample': (0.6, 1), 'colsample_bytree': (0.6, 1), 'reg_alpha': (0, 1), 'reg_lambda': (0, 1), } optimizer = BayesianOptimization(self.__evaluator, BoParams, self.opt_seed) if save_log: logger = JSONLogger(path="./opt_xgb_logs.json") optimizer.subscribe(Events.OPTMIZATION_STEP, logger) if self.fix_params: optimizer.set_bounds({k: (v, v) for k, v in self.fix_params.items()}) # optimizer.probe( # {'max_depth': 7, # 'min_child_weight': 1, # 'gamma': 0, # 'subsample': 0.8, # 'colsample_bytree': 0.8, # 'reg_alpha': 0.01, # 'reg_lambda': 1}) gp_params = {"alpha": 1e-5, "n_restarts_optimizer": 3} optimizer.maximize(init_points=3, n_iter=n_iter, acq='ucb', kappa=2.576, xi=0.0, **gp_params) self.__get_params(optimizer)
def optimizer(self, log_json=False, load_log=False, log_path=None, **kwargs): allow = ['n_init_explore_point', 'n_bayesian_iterations'] self.__dict__.update((k, v) for k, v in kwargs.items() if k in allow) b_opt = BayesianOptimization( f=self.bb_partial, pbounds=self.parameters, verbose=self.opt_verbose, random_state=0, ) if log_json: logger = JSONLogger(path=r'./Logs/' + self.type_nn + '_' + str(self.rnn_kind) + '.json') b_opt.subscribe(Events.OPTMIZATION_STEP, logger) if load_log and log_path is not None: load_logs(b_opt, logs=log_path) b_opt.maximize( init_points=self.n_init_explore_point, n_iter=self.n_bayesian_iterations, acq= "poi", # Acquisition Function "Probability of Improvement" --> Prefer exploration (with xi=0.1) xi=1e-1) print('best parameters:', b_opt.max, '\n') return b_opt
def hyperParameterOptimizer(): def blackbox(c1Filters, c1KernelSize, c1Strides, c2Filters, c2KernelSize, c2Strides, c3Filters, c3KernelSize, c3Strides, fcc1Units, fcc2Units, dropout1, dropout2): hyperParams = HyperParams() hyperParams.c1Filters = int(round(c1Filters)) hyperParams.c1KernelSize = int(round(c1KernelSize)) hyperParams.c1Strides = int(round(c1Strides)) hyperParams.c2Filters = int(round(c2Filters)) hyperParams.c2KernelSize = int(round(c2KernelSize)) hyperParams.c2Strides = int(round(c2Strides)) hyperParams.c3Filters = int(round(c3Filters)) hyperParams.c3KernelSize = int(round(c3KernelSize)) hyperParams.c3Strides = int(round(c3Strides)) hyperParams.fcc1Units = int(round(fcc1Units)) hyperParams.fcc2Units = int(round(fcc2Units)) hyperParams.dropout1 = round(dropout1, 2) hyperParams.dropout2 = round(dropout2, 2) checkpoint = train(200, None, hyperParams) return checkpoint.validationAccuracy bounds = { 'c1Filters': (100, 128), 'c1KernelSize': (2, 2), 'c1Strides': (2, 2), 'c2Filters': (64, 100), 'c2KernelSize': (2, 2), 'c2Strides': (2, 2), 'c3Filters': (32, 64), 'c3KernelSize': (2, 2), 'c3Strides': (2, 2), 'fcc1Units': (32, 150), 'fcc2Units': (32, 150), 'dropout1': (0.2, 0.5), 'dropout2': (0.2, 0.5), } optimizer = BayesianOptimization( f=blackbox, pbounds=bounds, random_state=1, ) logger = JSONLogger(path="./logs.json") optimizer.subscribe(Events.OPTMIZATION_STEP, logger) load_logs(optimizer, logs=["./oldlogs.json"]) optimizer.maximize( init_points=2, n_iter=36, ) print(optimizer.max)
def xgb_optimization(X, y, params, random_state=1337): training_data = xgb.DMatrix(X, y) def xgb_model( feature_fraction, bagging_fraction, lambda_l1, lambda_l2, max_depth, num_leaves, min_split_gain, min_child_weight, learning_rate, n_estimators, ): params["feature_fraction"] = max(min(feature_fraction, 1), 0) params["bagging_fraction"] = max(min(bagging_fraction, 1), 0) params["lambda_l1"] = max(lambda_l1, 0) params["lambda_l2"] = max(lambda_l2, 0) params["max_depth"] = int(round(max_depth)) params["num_leaves"] = int(round(num_leaves)) params["min_split_gain"] = min_split_gain params["min_child_weight"] = min_child_weight params["learning_rate"] = learning_rate params["n_estimators"] = int(round(n_estimators)) params.update({ "objective": "reg:squarederror", "max_bin": 255, "bagging_freq": 1, "min_child_samples": 20, "boosting": "gbdt", "verbosity": 1, "early_stopping_round": 200, "metric": "rmse", }) clf = xgb.cv(params, training_data, nfold=5, seed=random_state, verbose_eval=1) return (-1 * np.array(clf["test-rmse-mean"])).max() optimizer = BayesianOptimization(f=xgb_model, pbounds=params, random_state=1337) logger_path = os.path.join(LOGS_DIR, "logs_xgb.json") if os.path.exists(logger_path): load_logs(optimizer, logs=logger_path) logger = JSONLogger(path=logger_path) optimizer.subscribe(Events.OPTMIZATION_STEP, logger) optimizer.maximize(init_points=5, n_iter=25, acq="ucb") return optimizer.max["params"]
def __init__(self, optimization_folder, nr_iterations, iteration_chunck_size, nr_init_points, embedder='LASEREmbedderI', log_file_name='logs.json', load_log=False, prev_log='log.json', probe=True): # Set static variables self.INTERMEDIATE_RESULTS_FOLDER = optimization_folder self.FINAL_RESULTS_FOLDER = optimization_folder self.NR_ITERATIONS = nr_iterations self.ITERATION_CHUNCK_SIZE = iteration_chunck_size self.NR_INIT_POINTS = nr_init_points self.EMBEDDER = embedder self.probe = probe self.config = Config() self.log_file = os.path.join(optimization_folder, log_file_name) self.prev_log = os.path.join(optimization_folder, prev_log) self.logger = JSONLogger(path=self.log_file) self.load_log = load_log # Boundaries between which to explore the input space self.param_boundaries = { 'dropout_before_laser': (0., 0.5), 'dropout_in_laser': (0., 0.5), 'transformer_drop': (0., 0.5), 'dropout': (0., 0.5), 'hidden_size_lstm': (50, 350), 'weight_decay': (0., 0.1), 'learning_rate_warmup_steps': (1., 10.0), 'num_heads': (0.5, 4.49), 'filter_size': (3.5, 350) } # Set points on which to evaluate the model for exploration of the solution space self.explore_points = { 'dropout_before_laser': [0.1], 'dropout_in_laser': [0.25], 'transformer_drop': [0.0], 'dropout': [0.0], 'hidden_size_lstm': [350], 'weight_decay': [0.01], 'learning_rate_warmup_steps': [2.], 'num_heads': [4.], 'filter_size': [350.] } self.bo = None # initialize variable for further error handling assert len( np.unique([len(n) for n in self.explore_points.values()]) ) == 1, 'number of explore points should be the same for all parameters' self.NUM_EXPLORE_POINTS = np.unique( [len(n) for n in self.explore_points.values()])[0]
def optimize_2d(path=None, steps=None, init_points=None, bounds=None, true_function=None, plot=False, load=False): def wrapper(x, y): os.environ['NW'] = "%f" % (x) os.environ['NN'] = "%f" % (y) res = -F.func_para() return res opt = BayesianOptimization(f=wrapper, pbounds=bounds, verbose=2, random_state=92898) log_file = new_log_file_name() logger = JSONLogger(path=log_file) screen_logger = ScreenLogger(verbose=2) opt.subscribe(Events.OPTMIZATION_STEP, logger) opt.subscribe(Events.OPTMIZATION_START, screen_logger) opt.subscribe(Events.OPTMIZATION_STEP, screen_logger) opt.subscribe(Events.OPTMIZATION_END, screen_logger) print('Logging to logfile: ', os.path.abspath(log_file)) dump_bounds(log_file, bounds) no_log_files_found = False if load: files = find_log_files() if len(files) > 0: print('Loading previous runs from logfile(s):') for f in files: print(f) load_logs(opt, logs=files) else: no_log_files_found = True if (init_points is not None) and (init_points > 0): if no_log_files_found or not load: opt.maximize(init_points=init_points, n_iter=0, alpha=1e-5) first_step = True opt.unsubscribe(Events.OPTMIZATION_END, screen_logger) print('') if _check_steps_finite(steps): for _ in range(steps): opt.maximize(init_points=0, n_iter=1, alpha=1e-5) if first_step: opt.unsubscribe(Events.OPTMIZATION_START, screen_logger) first_step = False else: while True: opt.maximize(init_points=0, n_iter=1, alpha=1e-5) print("MAX: ", opt.max) return opt
def get_tuned_params_bayesian(model, config_dict, train_data, cv_fold, outer_iter_i, experiment_dir): optimizer = BayesianOptimization(f=get_cross_validated_cindex_fn(model, train_data, cv_fold, outer_iter_i, experiment_dir), pbounds=config_dict['params'], verbose=2, random_state=SEED) logger = JSONLogger(path=os.path.join(experiment_dir, "btune_logs_{}.json".format(outer_iter_i))) optimizer.subscribe(Events.OPTMIZATION_STEP, logger) optimizer.maximize( init_points=config_dict['bayesian']['n_init'], n_iter=config_dict['bayesian']['n_bayesian'] ) return optimizer.max['params']
def run(self, n_iter=5, save_log=False): BoParams = { 'num_leaves': (2**4, 2**8), 'min_split_gain': (0.01, 1), 'min_child_weight': (0, 0.01), 'min_child_samples': (8, 32), 'subsample': (0.6, 1), 'colsample_bytree': (0.6, 1), 'reg_alpha': (0, 1), 'reg_lambda': (0, 1), } optimizer = BayesianOptimization(self.__evaluator, BoParams, random_state=self.opt_seed) if save_log: logger = JSONLogger(path="./opt_lgb_logs.json") optimizer.subscribe(Events.OPTMIZATION_STEP, logger) if self.fix_params: optimizer.set_bounds( {k: (v, v) for k, v in self.fix_params.items()}) # optimizer.probe( # {'num_leaves': 2 ** 7 - 1, # 'min_split_gain': 0, # 'min_child_weight': 0.001, # 'min_child_samples': 6, # 'subsample': 0.8, # 'colsample_bytree': 0.8, # 'reg_alpha': 0.01, # 'reg_lambda': 1}) gp_params = {"alpha": 1e-5, "n_restarts_optimizer": 3} optimizer.maximize(init_points=3, n_iter=n_iter, acq='ucb', kappa=2.576, xi=0.0, **gp_params) self.__get_params(optimizer)
def main(): bounds = {'alpha':(0.3,0.9),'temperature':(3,15)} optimizer = BayesianOptimization( f = KD_train, pbounds = bounds, verbose = 2, random_state = 0) utility = UtilityFunction(kind = 'ei', kappa= 1,xi=0.0) for _ in range(5): next_p = optimizer.suggest(utility) print('suggest for next:',next_p) result = KD_train(**next_p) optimizer.register(params = next_p,target = result) for i,res in enumerate(optimizer.res): print("ite {} \t {}".format(i,res)) logger = JSONLogger(path = './BO_logs.json') optimizer.subscribe(Events.OPTIMIZATION_STEP, logger)
def get_tuned_params_random(model, config_dict, train_data, cv_fold, outer_iter_i, experiment_dir): # note that here we are still using the BayesianOpt package under the hood # becuase it allows us to customize sweeping to become entirely random within a specified grid optimizer = BayesianOptimization(f=get_cross_validated_cindex_fn(model, train_data, cv_fold, outer_iter_i, experiment_dir), pbounds=config_dict['params'], verbose=2, random_state=SEED) logger = JSONLogger(path=os.path.join(experiment_dir, "btune_logs_{}.json".format(outer_iter_i))) try: optimizer.subscribe(Events.OPTMIZATION_STEP, logger) except: optimizer.subscribe(Events.OPTIMIZATION_STEP, logger) optimizer.maximize( init_points=config_dict['random']['n_probes'], # init_points: How many steps of random exploration you want to perform. n_iter=0 # zero bayesian optimization steps ) return optimizer.max['params']
def bo_logistic_regression(training, param_grid, seed, cv=5): logger = JSONLogger(path="./logs.json") n_param_grid = {} for key, value in param_grid.items(): key = key.replace("lr" + '__', '') if type(value) == type(tuple([0, 0])): n_param_grid[key] = value print(value) else: n_param_grid[key] = (0, 1) def ob_function(C): skf = StratifiedKFold(n_splits=cv, shuffle=True) for train_index, test_index in skf.split( training.loc[:, (training.columns != "Response")].values, training["Response"].values): train = training.iloc[train_index] test = training.iloc[test_index] pr = Processor(train, test, seed) fe = FeatureEngineer(pr.training, pr.unseen, seed) model = LogisticRegression( random_state=seed, C=C, max_iter=200, ) model.fit( fe.training.loc[:, (fe.training.columns != "Response")].values, fe.training["Response"].values) y_pred = model.predict(fe.unseen.drop('Response', axis=1)) return profit(test['Response'], y_pred) b_optimizer = BayesianOptimization( f=ob_function, pbounds=n_param_grid, random_state=1, ) b_optimizer.subscribe(Events.OPTMIZATION_STEP, logger) b_optimizer.maximize(n_iter=100, init_points=50) return b_optimizer
def __init__(self, model, X_train, y_train, verbose=0, cv = None, pbounds = None, init_params =None, tp = None, n_iter = 5, init_points = 1 , cat_columns = None, av_params = None): if av_params and model in av_params.keys(): self.av_params = av_params[model] else : self.av_params = None self.model = model self.X_train = X_train self.y_train = y_train self.cv = cv self.tp = tp self.pbounds = pbounds self.init_params = init_params optimizer = BayesianOptimization(self.evaluate_model, pbounds, random_state=4) self.cat_columns = cat_columns logger = JSONLogger(path="./logs.json") optimizer.subscribe(Events.OPTMIZATION_STEP, logger) optimizer.maximize(init_points=init_points, n_iter=n_iter) self.opt = optimizer self.results = optimizer.res
def main(): # Bounded region of parameter space pbounds = { 'batch_size': (768, 1280), 'neg_sample_size': (128, 384), 'max_steps': (1000, 100000) } optimizer = BayesianOptimization( f=black_box_function, pbounds=pbounds, random_state=1, ) logger = JSONLogger(path='../models/tmp/logs.json') optimizer.subscribe(Events.OPTMIZATION_STEP, logger) # try to maximize metrics, results will be saved in models/tmp/logs.json optimizer.maximize(init_points=1, n_iter=2) print(optimizer.max) for i, res in enumerate(optimizer.res): print("Iteration {}: \n\t{}".format(i, res))
def optimise(kappa, n_runs, n_sub_runs, ignore_similar, score_type = 'general'): n_epochs = 10 print_step = max(n_epochs // 2, 1) min_score = -1000 train, val, ENDPOINT, AGE, SEX, vocab_size, sequence_length, n_individuals = get_dataset(nrows = 10_000_000) print('Data loaded, number of individuals:', n_individuals) def objective_function(batch_size, lr, temperature): try: batch_size = int(batch_size) lr = 10 ** (-lr) scores = [] for i in range(n_sub_runs): print('sub run {}'.format(i)) # Train the GAN G = RelationalMemoryGenerator(mem_slots, head_size, embed_size, vocab_size, temperature, num_heads, num_blocks) D = RelGANDiscriminator(n_embeddings, vocab_size, embed_size, sequence_length, out_channels, filter_sizes, use_aux_info, use_mbd, mbd_out_features, mbd_kernel_dims) # Call train function dist_score, transition_score, similarity_score, mode_collapse_score, indv_score, transition_score_full, _, _, _ = train_GAN( G, D, train, val, ENDPOINT, batch_size, vocab_size, sequence_length, n_epochs, lr, temperature, GAN_type, n_critic, print_step, get_scores, ignore_time, dummy_batch_size, ignore_similar, one_sided_label_smoothing, relativistic_average, True, use_gp, lambda_gp ) if score_type == 'general': score = -(2 * dist_score[-1] + \ 1 * transition_score[-1] + \ #1 * similarity_score[-1] + \ 4 * mode_collapse_score[-1]) elif score_type == 'chd_and_br_cancer': # minimize the transition score from chd to breast cancer score = -transition_score_full[ \ -1, ENDPOINT.vocab.stoi['C3_BREAST'] - 3, ENDPOINT.vocab.stoi['I9_CHD'] - 3 \ ] elif score_type == 'transition': score = -transition_score[-1] if isnan(score): score = min_score score = max(min_score, score) print('Score:', score) scores.append(score) score = np.mean(scores) return score except RuntimeError as e: print(e) return min_score # Bounded region of parameter space pbounds = { 'batch_size': (16, 512), 'lr': (2, 8), 'temperature': (1, 100), } optimizer = BayesianOptimization( f = objective_function, pbounds = pbounds, #random_state = 1, ) filename = "optim_results/{}_{}.json".format(score_type, n_individuals) load_logs(optimizer, logs=[filename]) logger = JSONLogger(path=filename) optimizer.subscribe(Events.OPTMIZATION_STEP, logger) optimizer.maximize( init_points = int(np.sqrt(n_runs)), n_iter = n_runs, ) #fix_optim_log(filename) print(optimizer.max)
vec['n'] += 1 return res.mean() def search_param(alpha, beta, topK): return my_func(alpha, beta, topK) optimizer = BayesianOptimization( f=search_param, pbounds=tuning_params, verbose=3, random_state=2010, ) logger = JSONLogger(path="./Logs/tmp/" + 'RP3beta_multi' + ".json") optimizer.subscribe(Events.OPTMIZATION_STEP, logger) optimizer.probe({ 'alpha': 0.03374950051351756, 'beta': 0.24087176329409027, 'topK': 16}, lazy=True, ) optimizer.maximize( init_points=40, n_iter=400, ) print(optimizer.max)
pbounds=pbounds, ) if args.probe: optimizer.probe( params={"lr_pow": args.probe}, lazy=True, ) else: optimizer.probe( params={"lr_pow": -2}, lazy=True, ) optimizer.probe( params={"lr_pow": -4}, lazy=True, ) optimizer.probe( params={"lr_pow": -6}, lazy=True, ) logger = JSONLogger(path=logpath) optimizer.subscribe(Events.OPTMIZATION_STEP, logger) optimizer.maximize( init_points=0, n_iter=100, ) print(optimizer.max)
# Bounded region of parameter space pbounds = {'buffer_pool': (128, buffer_pool_max), 'log_file': (48, log_file_max),'flush_method': (0, flush_method_max), 'thread_cache': (9, thread_cache_max),'thread_sleep': (0, thread_sleep_max), 'max_connect': (151, max_connect_max)} optimizer = BayesianOptimization( f=objFunction, pbounds=pbounds, random_state=1, ) load_logs(optimizer, logs=[setLoggerRoute()]) logger = JSONLogger(path="./logsResumed.json") optimizer.subscribe(Events.OPTMIZATION_STEP, logger) print("New optimizer is now aware of {} points.".format(len(optimizer.space))) count=len(optimizer.space) optimizer.maximize( init_points=0, n_iter=defineOptIterations(), ) print(optimizer.max)
# # optimizer.maximize( # init_points=0, # n_iter=5, # ) # print(optimizer.max) # for i, res in enumerate(optimizer.res): # print("Iteration {}: \n\t{}".format(i, res)) optimizer.probe( # params={"x": 0.5, "y": 0.7}, params=[2.0, 0.1], lazy=True, ) logger = JSONLogger(path="./logs.json") optimizer.subscribe(Events.OPTMIZATION_STEP, logger) # By default these will be explored lazily (lazy=True), meaning these points will be evaluated only the next time you call maximize. print(optimizer.space.keys) optimizer.maximize(init_points=0, n_iter=6) for i, res in enumerate(optimizer.res): print("Iteration {}: \n\t{}".format(i, res)) print(optimizer.max) #load_logs new_optimizer = BayesianOptimization(
data = data_all[data_all.pass_preselection == 1] train, test = train_test_split(data, test_size=0.3, random_state=17) xgboostBO = BayesianOptimization( xgboost_fom, { 'max_depth': (2, 10), 'learning_rate': (0.01, 0.4), 'n_estimators': (200, 1000), 'subsample': (0.4, 0.8), 'colsample_bytree': (0.5, 0.99), 'min_child_weight': (2, 10), 'gamma': (0., 1.), 'max_delta_step': (0, 0.2), }) ## save optimization steps logger = JSONLogger(path="logs_sumIso.json") xgboostBO.subscribe(Events.OPTMIZATION_STEP, logger) xgboostBO.maximize( n_iter=30, init_points=20, ) print('-' * 53) print('Final Results') print(xgboostBO.max) print '\n ---- all iterations ------ \n' for i, res in enumerate(xgboostBO.res): print("Iteration {}: \n\t{}".format(i, res))
def Catboost_tuning(X_train, y_train, kfold=6): ''' Catboost model hyperparameters tuning, use baye_opt to cross-validate entire training dataset. @ tuning hyperparameters: one_hot_max_size: if required int depth: 6 ~ 10 int l2_leaf_reg: positive value 1 ~ 30 random_strength: 1 ~ 30 bagging_temperature: 0 ~ 1000 @ default hyperparameters: NUMER_OF_TREES: iterations: 10000 use_best_model = True eval_metric = 'RMSE' eval_set = Pool() learning_rate = 0.02 border_count = 254 Parameters ---------- X_train: feature dataframe y_train: target series Return ------ dict: diction of tuning hyperparameters of Catboost ''' from catboost import train, Pool from sklearn.model_selection import KFold import numpy as np import gc from bayes_opt.observer import JSONLogger from bayes_opt.event import Events from bayes_opt import BayesianOptimization X_train = X_train y_train = y_train features = [feature for feature in X_train.columns \ if feature not in ['card_id', 'first_active_month']] categorical_features = [feature for feature in features \ if 'feature_' in feature] folds = KFold(n_splits=kfold, shuffle=True, random_state=133) catboost_opt_params = { 'one_hot_max_size': (0, 6), 'depth': (5, 11), 'l2_leaf_reg': (1, 30), 'random_strength': (1, 30), 'bagging_temperature': (0, 1000) } def cv_helper(one_hot_max_size,\ depth,\ l2_leaf_reg,\ random_strength,\ bagging_temperature): # entire date for evaluate clf training performance all_data = Pool(data=X_train[features],\ label=y_train,\ cat_features=categorical_features) # validation RMSE RMSE = [] for train_idxs, val_idxs in folds.split(X_train.values, y_train.values): # training set train_data = Pool(data=X_train.iloc[train_idxs][features],\ label=y_train.iloc[train_idxs],\ cat_features=categorical_features) # validation set val_data = Pool(data=X_train.iloc[val_idxs][features],\ label=y_train.iloc[val_idxs],\ cat_features=categorical_features) # hyperparameters params = { 'eval_metric': 'RMSE', 'use_best_model': True, 'loss_function': 'RMSE', 'learning_rate': 0.02, 'early_stopping_rounds': 400, 'border_count': 254, 'task_type': 'GPU', 'one_hot_max_size': int(one_hot_max_size), 'depth': int(depth), 'l2_leaf_reg': l2_leaf_reg, 'random_strength': random_strength, 'bagging_temperature': bagging_temperature } # classifier clf = train(pool=train_data,\ params=params,\ verbose=200,\ iterations=10000,\ eval_set=all_data) # add current fold RMSE on all_data RMSE.append(clf.best_score_['validation_0']['RMSE']) return -np.mean(np.array(RMSE)) logger = JSONLogger(path="bayes_opt_log/catBoost_logs.json") CAT_bayes_opt = BayesianOptimization(cv_helper, pbounds=catboost_opt_params) CAT_bayes_opt.subscribe(Events.OPTMIZATION_STEP, logger) CAT_bayes_opt.maximize(init_points=4,\ n_iter=20,\ acq='ei',\ xi=0.0) return CAT_bayes_opt.max['params']
def LightGBM_tuning(X_train, y_train, kfold=6): ''' LightGBM model hyperparameters tuning, use baye_opt to cross-validate entire training dataset. @ tuning hyperparameters: feature_fraction bagging_fraction lambda_l1 max_depth min_data_in_leaf num_leaves @ default hyperparameters: bagging_freq = 1 bagging_seed = 11 boosting = 'gbdt' learning_rate: 0.005 Parameters ---------- X_train: feature dataframe y_train: target series Return ------ dict: diction of tuning hyperparameters of lightGBM ''' import lightgbm as lgb from sklearn.metrics import mean_squared_error from sklearn.model_selection import KFold import numpy as np import gc from bayes_opt.observer import JSONLogger from bayes_opt.event import Events from bayes_opt import BayesianOptimization X_train = X_train y_train = y_train features = [feature for feature in X_train.columns \ if feature not in ['card_id', 'first_active_month']] categorical_features = [feature for feature in features \ if 'feature_' in feature] folds = KFold(n_splits=kfold, shuffle=True, random_state=133) y_val = np.zeros(y_train.shape) bayes_opt_params = { 'feature_fraction': (0.1, 1.0), 'bagging_fraction': (0.1, 1.0), 'lambda_l1': (0, 6), 'max_depth': (4, 20), 'min_data_in_leaf': (10, 300), 'num_leaves': (5, 300), } # define the croos-validation functions which returns object score(-rmse) # then use bayesian optimizers to tuning the object score def cv_helper(max_depth,\ num_leaves,\ min_data_in_leaf,\ feature_fraction,\ bagging_fraction,\ lambda_l1): for train_idxs, val_idxs in folds.split(X_train.values, y_train.values): # training set train_data = lgb.Dataset(data=X_train.iloc[train_idxs][features],\ label=y_train.iloc[train_idxs],\ categorical_feature=categorical_features) # validation set val_data = lgb.Dataset(data=X_train.iloc[val_idxs][features],\ label=y_train.iloc[val_idxs],\ categorical_feature=categorical_features) # hyperparameters params = { 'objective': 'regression', 'metric': 'rmse', 'lambda_l1': lambda_l1, 'num_leaves': int(num_leaves), 'min_data_in_leaf': int(min_data_in_leaf), 'max_depth': int(max_depth), 'feature_fraction': feature_fraction, 'bagging_fraction': bagging_fraction, 'bagging_freq': 1, 'bagging_seed': 11, 'boosting': 'gbdt', 'learning_rate': 0.005, 'verbosity': 1 } # classifier clf = lgb.train(params=params,\ train_set=train_data,\ num_boost_round=10000,\ valid_sets=[train_data, val_data],\ verbose_eval=200,\ early_stopping_rounds=200) # prediction of validation y_val[val_idxs] = clf.predict(X_train.iloc[val_idxs][features],\ num_iteration=clf.best_iteration) return -mean_squared_error(y_true=y_train, y_pred=y_val)**0.5 logger = JSONLogger(path="bayes_opt_log/lightGBM_logs.json") LGB_bayes_opt = BayesianOptimization(cv_helper, pbounds=bayes_opt_params) LGB_bayes_opt.subscribe(Events.OPTMIZATION_STEP, logger) LGB_bayes_opt.maximize(init_points=4,\ n_iter=20,\ acq='ei',\ xi=0.0) return LGB_bayes_opt.max['params']
print("Model trees: {}; Val score: {:<8.5f}".format( clf.tree_count_, val_score)) return val_score bounds = { 'l2_leaf_reg': (0.5, 100.0), 'bagging_temperature': (0.1, 100.0), 'random_strength': (0, 20), 'subsample': (0.2, 1.0), 'depth': (1, 3), 'border_count': (32, 128) } bo = BayesianOptimization(evaluate_cb, pbounds=bounds, random_state=42) logger = JSONLogger(path="./stacking_bo.json") bo.subscribe(Events.OPTMIZATION_STEP, logger) bo.probe( params={ 'l2_leaf_reg': 3.0, 'bagging_temperature': 1, 'random_strength': 1, 'subsample': 0.3, 'depth': 2, 'border_count': 128 }, lazy=True, ) bo.maximize(init_points=5, n_iter=10)
def main(batch=False): '''This main function allows quick testing of the batch and non-batch versions of the simulation. Keyword Arguments: batch {bool} -- if True, the simulation will run a batch experiment (default: {False}) ''' np.random.seed(1234) # Note: all probabilities are in units p(event) per hour params = { # Intake Probabilities (Note, 1-sum(these) is probability of no intake) 'pSusceptibleIntake': 0.125, 'pInfectIntake': 0.02, 'pSymptomaticIntake': 0.0, 'pInsusceptibleIntake': 0.05, # Survival of Illness 'pSurviveInfected': 0.0, 'pSurviveSymptomatic': 0.0, # Alternate Death Rate 'pDieAlternate': 0.001, # Discharge and Cleaning 'pDischarge': 0.0, 'pCleaning': 1.0, # Disease Refractory Period 'refractoryPeriod': 7.0*24.0, # Death and Symptoms of Illness 'pSymptomatic': 0.0, 'pDie': 0.0, # Infection Logic 'infection_kernel': [0.05, 0.01], 'infection_kernel_function': 'lambda node, k: k*(1-node[\'occupant\'][\'immunity\'])', # Immunity Growth (a0*immunity+a1) # (1.03, 0.001 represents full immunity in 5 days) #'immunity_growth_factors': [1.03, 0.001], 'immunity_growth_factors': [0.0114, 0.0129, 0.0146, 0.0166, 0.0187, 0.0212, 0.0240, 0.0271, 0.0306, 0.0346, 0.0390, 0.0440, 0.0496, 0.0559, 0.0629, 0.0707, 0.0794, 0.0891, 0.0998, 0.1117, 0.1248, 0.1392, 0.1549, 0.1721, 0.1908, 0.2109, 0.2326, 0.2558, 0.2804, 0.3065, 0.3338, 0.3623, 0.3918, 0.4221, 0.4530, 0.4843, 0.5157, 0.5470, 0.5779, 0.6082, 0.6377, 0.6662, 0.6935, 0.7196, 0.7442, 0.7674, 0.7891, 0.8092, 0.8279, 0.8451, 0.8608, 0.8752, 0.8883, 0.9002, 0.9109, 0.9206, 0.9293, 0.9371, 0.9441, 0.9504, 0.9560, 0.9610, 0.9654, 0.9694, 0.9729, 0.9760, 0.9788, 0.9813, 0.9834, 0.9854, 0.9871, 0.9886], 'immunity_lut': True, # End Conditions 'max_time': 31*24, # One month 'max_intakes': None, # Intervention 'intervention': 'TimedRemovalIntervention()' # Different interventions can go here } if not batch: print(params['intervention']) params['pSusceptibleIntake'] = best_params['pSusceptibleIntake'] params['pInfectIntake'] = best_params['pInfectIntake'] params['pInsusceptibleIntake'] = best_params['pInsusceptibleIntake'] params['pDieAlternate'] = best_params['pDieAlternate'] params['infection_kernel'] = [best_params['infection_kernel_0'], best_params['infection_kernel_1']] with open('./sim_params.json', 'w+') as out: json.dump(params, out) sim = simulation.Simulation(params, spatial_visualization=True, aggregate_visualization=True, return_on_equillibrium=True,) #print(sim.run()) else: # Run batch simulation comparing interventions """ Grid Search Method with Baysian Optimization `pSusceptibleIntake`, `pInfectIntake`, `pInsusceptibleIntake`, `pDieAlternate`, and `infection_kernel` """ from bayes_opt import BayesianOptimization from bayes_opt.observer import JSONLogger from bayes_opt.event import Events import warnings log_name = 'APA-XGB_BO-Distemper-03-16-2019-v1' logger = JSONLogger(path='./'+log_name+'.json') orig_params = params.copy() Test = False Target = { 'Total Intake': 847, 'E2I':68, 'sum_S2D_IS2D':68, 'E2S':432, 'E2IS':347, 'S2I':111 } def _get_results(_p): runs = 2 results = simulation.BatchSimulation(_p, runs).run() results_dataframe = pd.DataFrame.from_records(results) results_dataframe = results_dataframe.drop(['S', 'IS', 'SY', 'D'], axis=1) results_dataframe = results_dataframe.rename(index=str, columns={"E": "Total Intake", "I": "Total Infected"}) results_dataframe['Infection Rate'] = \ results_dataframe['Total Infected'] / results_dataframe['Total Intake'] means = results_dataframe.mean() stes = results_dataframe.std() / np.sqrt(len(results_dataframe)) cols = results_dataframe.columns return means, stes, cols def _heuristic( pSusceptibleIntake, pInfectIntake, pInsusceptibleIntake, pDieAlternate, infection_kernel_0, infection_kernel_1 ): params = orig_params.copy() params['pSusceptibleIntake'] = pSusceptibleIntake params['pInfectIntake'] = pInfectIntake params['pInsusceptibleIntake'] = pInsusceptibleIntake params['pDieAlternate'] = pDieAlternate params['infection_kernel'] = [infection_kernel_0,infection_kernel_1] m_0, s_0, c_0 = _get_results(params) if Test: return m_0 else: loss = 0 for key, value in Target.items(): # category-wise normalized L2 loss loss += abs((m_0[key]-value)/value) loss /= len(Target) return -1.*loss """ Desired ouput Total Intake = 847 Empty->Infected = 68, Susceptible->Dead + Insusceptible->Dead = 68, Empty->Susceptible=432, Empty->Insusceptible=347, Susceptible->Infected=111 When 'pSusceptibleIntake': 0.125, 'pInfectIntake': 0.02, 'pSymptomaticIntake': 0.0, 'pInsusceptibleIntake': 0.05, # Survival of Illness 'pSurviveInfected': 0.0, 'pSurviveSymptomatic': 0.0, # Alternate Death Rate 'pDieAlternate': 0.001, # Discharge and Cleaning 'pDischarge': 0.0, 'pCleaning': 1.0, # Disease Refractory Period 'refractoryPeriod': 7.0*24.0, # Death and Symptoms of Illness 'pSymptomatic': 0.0, 'pDie': 0.0, # Infection Logic 'infection_kernel': [0.05, 0.01], 'infection_kernel_function': 'lambda node, k: k*(1-node[\'occupant\'][\'immunity\'])', We have {'E': 987, 'S': 0, 'IS': 511, 'I': 369, 'SY': 0, 'D': 46, 'E2I': 98, 'sum_S2D_IS2D': 26, 'E2S': 623, 'E2IS': 266, 'S2I': 271} We need to Decrease E2I (↓pInfectIntake) Increase sum_S2D_IS2D (↑pDieAlternate) Decrese E2S (↓pSusceptibleIntake) Increase E2IS (↑pInsusceptibleIntake) Decrease S2I (↓infection_kernel) """ BO_wrapper = BayesianOptimization( _heuristic, { 'pSusceptibleIntake':(0.05,0.2), 'pInfectIntake':(0.005,0.03), 'pInsusceptibleIntake':(0.025,0.2), 'pDieAlternate':(0.0025,0.01), 'infection_kernel_0':(0.01,0.1), 'infection_kernel_1':(0.001,0.01) } ) BO_wrapper.subscribe(Events.OPTMIZATION_STEP, logger) print('-'*130) with warnings.catch_warnings(): warnings.filterwarnings('ignore') BO_wrapper.maximize(init_points=20, n_iter=50, acq='ei', xi=0.01) print('-'*130) print('Final Results') print('Maximum value: %f' % BO_wrapper.max['target']) print('Best parameters: ', BO_wrapper.max['params']) Test = True m_0 = _heuristic(**BO_wrapper.max['params']) for key, value in Target.items(): print(key, value, m_0[key])
def bayes_tuning( self, params=None, eval_func=None, init_points=5, n_iter=25, mode='ei', # acq='ei', # xi=0.0, learning_rate=0.03, metric=None, random_state=7, **kwargs): ''' Attn For eval_func, the hyper-parameters have to be the same as stored dict. And adjust the postive/ negative return values accordingly by metrics. :param learning_rate: float or list. ref: https://github.com/fmfn/BayesianOptimization/blob/master/examples/exploitation%20vs%20exploration.ipynb kwargs for Bayesian optimizer: :param acq = 'ei' :param xi=0.0 :param kappa ''' if params == None: __params = self.__set_booster_params() bayesian_optimizer_params = kwargs if kwargs is not None else {} if mode == 'exploration': bayesian_optimizer_params['acq'] = 'ucb' bayesian_optimizer_params['kappa'] = 10 elif mode == 'ei': # Expected Improvement bayesian_optimizer_params['acq'] = 'ei' bayesian_optimizer_params['xi'] = 1e-4 # __params['objective'] = self.__booster_params['objective'] # # if metric == None: # __params['metric'] = self.__booster_params['metric'] # else: # __params['metric'] = metric try: ### Test bayes_opt modes: # if mode == 'prefer_exploitation': # bo = BayesianOptimization(__eval_func, __params) # bo.maximize(init_point=init_point, n_iter=n_iter, acq='ucb', kappa=1) # return bo # if mode == 'prefer_exploration': # bo = BayesianOptimization(__eval_func, __params) # bo.maximize(init_point=init_point, n_iter=n_iter, acq='ucb', kappa=10) if eval_func == None: bo = BayesianOptimization(self.__eval_params_using_cv, __params, random_state=random_state) else: bo = BayesianOptimization(eval_func, __params, random_state=random_state) logger = JSONLogger(path=self.__translate_file_dir( 'BayesianOptimization_log.json')) bo.subscribe(Events.OPTMIZATION_STEP, logger) bo.maximize(init_points=init_points, n_iter=n_iter, **kwargs) opt_res = pd.DataFrame(bo.res['all']['params']) opt_res['values'] = bo.res['all']['values'] return opt_res except Exception as e: print('Failed in Bayesian optimization. Error: {}'.format(e)) raise
return accuracy pbounds = { 'alphabet_size': (64.0, 64.0), 'dropout': (0.0, 0.4), 'embedding_size': (32.0, 64.0), 'label_smoothing': (0.0, 0.2), 'layer_size': (1280.0, 1280.0), 'learning_rate': (-4.0, -2.0), 'learning_rate_final': (-5.0, -3.0), 'window': (8.0, 8.0) } optimizer = BayesianOptimization(f=model_accuracy, pbounds=pbounds, verbose=2, random_state=1) if os.path.isfile("./parameters_log.json"): load_logs(optimizer, logs=["./parameters_log.json"]) print("Loaded {} model evaluations".format(len(optimizer.space))) logger = JSONLogger(path="./parameters_log_new.json") optimizer.subscribe(Events.OPTMIZATION_STEP, logger) optimizer.subscribe(Events.OPTMIZATION_STEP, ScreenLogger()) optimizer.maximize( init_points=max(0, 20 - len(optimizer.space)), n_iter=40 - max(len(optimizer.space) - 20, 0), )
parser.add_argument("-l", "--log-dir", type=str, default="./logs.json") args, unknown = parser.parse_known_args() c1 = args.c1 c2 = args.c2 w = args.w k = args.k p = args.p n_particles = args.n_particles epochs = args.epochs search = args.search log_dir = args.log_dir mode = args.mode.lower() if search: import os from bayes_opt import BayesianOptimization from bayes_opt.observer import JSONLogger from bayes_opt.event import Events from bayes_opt.util import load_logs pbounds = {"c1": (0, 1.0), "c2": (0, 1.0), "w": (0, 1.0)} optimizer = BayesianOptimization(f=pso, pbounds=pbounds) if os.path.exists(log_dir): load_logs(optimizer, logs=[log_dir]) optimizer.subscribe(Events.OPTMIZATION_STEP, JSONLogger(path=log_dir)) optimizer.maximize(init_points=100, n_iter=25) print(optimizer.max) else: pso(c1, c2, w, k, p, n_particles, epochs, mode, verbose=2, visualize=1)