def test_bayes_opt_demo(): """ pytest tests/test_bayesian_optimization.py::test_bayes_opt_demo See Also -------- https://github.com/fmfn/BayesianOptimization/blob/master/examples/exploitation%20vs%20exploration.ipynb """ random_state = ensure_rng(0) xs = np.linspace(-2, 10, 1000) f = np.exp(-(xs - 2)**2) + np.exp(-(xs - 6)**2 / 10) + 1 / (xs**2 + 1) bo = BayesianOptimization(f=lambda x: f[int(x)], pbounds={'x': (0, len(f) - 1)}, random_state=random_state, verbose=0) gp_params = {'alpha': 1e-5, 'n_restarts_optimizer': 2} # Change aquisition params to speedup optimization for testing purposes bo._acqkw['n_iter'] = 5 bo._acqkw['n_warmup'] = 1000 bo.maximize(init_points=10, n_iter=5, acq='ucb', kappa=5, **gp_params) res = bo.space.max_point() max_params = res['max_params'] max_val = res['max_val'] ratio = max_val / f.max() assert max_val > 1.1, 'got {}, but should be > 1'.format(max_val) assert ratio > .9, 'got {}, should be better than 90% of max val'.format(ratio) assert max_params['x'] > 300, 'should be in a peak area (around 300)' assert max_params['x'] < 400, 'should be in a peak area (around 300)'
def fit(self,X,y=None): """Fit a model: Parameters ---------- X : pandas dataframe or array-like training samples. If pandas dataframe can handle dict of feature in one column or convert a set of columns y : array like, required for array-like X and not used presently for pandas dataframe class labels Returns ------- self: object """ self.X = X self.y = y bopt = BayesianOptimization(self.score,self.param_ranges) bopt.maximize() logger.info(bopt.res) self.best_score = bopt.res['max']['max_val'] params = bopt.res['max']['max_params'] for v in self.param_int: params[v] = int(params[v]) self.clf.set_params(**params) self.clf.fit(X,y) return self
def main(): # stdout_path = 'outcome_testBO.txt' # print '[INFO] stdout_path:\t{}'.format(stdout_path) # sys.stdout = open(stdout_path, 'w') # # np.random.seed(1) print '#' * 53 scores = [] sensis = [] specis = [] for i in range(10): trainset, testset = load_data(i + 1) X_train, y_train = trainset X_test, y_test = testset def svccv(C, tol): return cross_val_score(SVC(C=C, random_state=1, tol=tol), X_train, y_train, cv=9).mean() def rfccv(n_estimators, min_samples_split, max_features): return cross_val_score(RFC(n_estimators=int(n_estimators), min_samples_split=int(min_samples_split), max_features=min(max_features, 0.999), random_state=2), X_train, y_train, 'f1', cv=5).mean() svcBO = BayesianOptimization(svccv, {'C': (10, 50000), 'tol': (0.0001, 0.1)}) svcBO.explore({'C': [10, 100, 1000, 10000, 20000, 50000], 'tol': [0.0001, 0.001, 0.005, 0.01, 0.05, 0.1]}) # rfcBO = BayesianOptimization(rfccv, {'n_estimators': (10, 250), # 'min_samples_split': (2, 25), # 'max_features': (0.1, 0.999)}) svcBO.maximize(init_points=50, restarts=200, n_iter=100) print '#' * 53 print 'Final Results' print 'SVC: %f' % svcBO.res['max']['max_val'] print 'max_params: ', svcBO.res['max']['max_params'] params = svcBO.res['max']['max_params'] clf = SVC(C=params['C'], random_state=1, tol=params['tol']) clf.fit(X_train, y_train) score = clf.score(X_test, y_test) result = clf.predict(X_test) sensi, speci = my_scores(y_test, result) print 1 - score, sensi, speci # print 'err:', 1 - score scores.append(score) sensis.append(sensi) specis.append(speci) print scores print "accur:\t{}\tstd:\t{}".format(np.mean(scores), np.std(scores)) print "sensi:\t{}".format(np.mean(sensis)) print "speci:\t{}".format(np.mean(specis))
def compute_results(self): optim = BayesianOptimization(self.model.test_hyperparams, self.hp_ranges) gp_params = self.get_gp_params() optim.maximize(**gp_params) return self.get_results(optim)
def main(): bo = BayesianOptimization(lambda fr, sm, mo, ma, nm, de, co: play_game(fr, sm, mo, ma, nm, de, co), {'fr': (2, 6), 'sm': (-1, 1), 'mo': (0, 2), 'ma': (0, 2), 'nm': (-1, 1), 'de': (-1, 1), 'co': (-1, 1)}) bo.explore({'fr': [5.0771664428677061], 'sm': [-0.13059762676063172], 'mo': [1.3682148714919597], 'ma': [0.52214706278657907], 'nm': [-0.86627512983565302], 'de': [0.42238952601950097], 'co': [-0.39416823224808289]}) bo.maximize(init_points=5, n_iter=50, kappa=0.5) # The output values can be accessed with self.res print 'RESULTS' print(bo.res['max'])
def test_explore_lazy(): random_state = ensure_rng(0) xs = np.linspace(-2, 10, 1000) f = np.exp(-(xs - 2)**2) + np.exp(-(xs - 6)**2 / 10) + 1 / (xs**2 + 1) bo = BayesianOptimization(f=lambda x: f[int(x)], pbounds={'x': (0, len(f) - 1)}, random_state=random_state, verbose=0) bo.explore({'x': [f.argmin()]}, eager=False) assert len(bo.space) == 0 assert len(bo.init_points) == 1 # Note we currently expect lazy explore to override points # This may not be the case in the future. bo.explore({'x': [f.argmax()]}, eager=False) assert len(bo.space) == 0 assert len(bo.init_points) == 1 bo.maximize(init_points=0, n_iter=0, acq='ucb', kappa=5) res = bo.space.max_point() max_params = res['max_params'] max_val = res['max_val'] assert max_params['x'] == f.argmax() assert max_val == f.max()
def test_only_random(): random_state = ensure_rng(0) xs = np.linspace(-2, 10, 1000) f = np.exp(-(xs - 2)**2) + np.exp(-(xs - 6)**2 / 10) + 1 / (xs**2 + 1) bo = BayesianOptimization(f=lambda x: f[int(x)], pbounds={'x': (0, len(f) - 1)}, random_state=random_state, verbose=0) bo.init(20) res = bo.space.max_point() max_params = res['max_params'] max_val = res['max_val'] assert max_val > 1.0, 'function range is ~.2 - ~1.4, should be above 1.' assert max_val / f.max() > .8, 'should be better than 80% of max val' assert max_params['x'] > 200, 'should be in a peak area (around 300)' assert max_params['x'] < 500, 'should be in a peak area (around 300)'
def opti(self): bo = BayesianOptimization(self.trainAndCompareHit, {"x": (10, 50), "y": (0.1, 1.0)}) bo.explore({"x": range(10, 50), "y": [0.1, 0.25, 0.5, 0.75, 1.0]}) bo.initialize({-11: {"x": 20, "y": 0.5}}) bo.maximize(init_points=5, n_iter=5, kappa=3.29) print(bo.res["max"])
file.close() K.clear_session() # bayes opt is a maximization algorithm, to minimize validation_loss, return 1-this bayes_opt_score = 1.0 - score[1] return bayes_opt_score # bayesian optimization optimizer = BayesianOptimization( f=train_model, pbounds={ 'encoder_blocks': (2, 3.999), 'lstm_units': (3, 5.999), #2** 'lr': (0.001, 0.0001), 'batch_size': (1, 2.999), 'kernel_size': (3, 5.999), #*16 'num_res': (1, 4.999) }, verbose=2) optimizer.maximize(init_points=15, n_iter=20) # training-test-evaluation iterations with best params targets = [e['target'] for e in optimizer.res] best_index = targets.index(max(targets)) params = optimizer.res[best_index]['params'] for i in range(0, 10): train_model(encoder_blocks=params['encoder_blocks'], lstm_units=params['lstm_units'],
cv_result = xgb.cv( params, dtrain, seed=42, nfold=5, feval = f1, early_stopping_rounds=10, verbose_eval=0 ) return -1.0 * cv_result['test-f1-mean'].iloc[-1] xgb_bo = BayesianOptimization(bo_tune_xgb, {'max_depth': (6, 14), 'min_split_loss': (0, 1), 'learning_rate':(0,2), 'n_estimators':(50,200), 'subsample' :(0.5,1), 'min_child_weight' : (0,5) }) xgb_bo.maximize(init_points=100, n_iter=700, acq='ei', xi=0.0) with open('log.log', 'a') as logfile: logfile.write(f'{xgb_bo.max}') params = { 'learning_rate': 0.2, 'min_split_loss': 0.2, 'n_estimators': 100, 'objective': 'binary:logistic', 'max_depth': 6,
def optimizeBayes(self): self.bayesianOptimizer = BayesianOptimization(self.optimizationFunctionBayes, self.bounds) self.bayesianOptimizer.maximize(n_iter=200,nugget = 0.02)
#定义优化参数 def rf_cv(n_estimators, min_samples_split, max_depth, max_features): val = cross_val_score(RandomForestClassifier(n_estimators=int(n_estimators), min_samples_split=int(min_samples_split), max_depth = int(max_depth), max_features = min(max_features,0.999), random_state = 2), data_tr,label_tr,scoring="roc_auc",cv=5).mean() return val # 实例化一个bayes优化对象 # 贝叶斯优化 rf_bo = BayesianOptimization(rf_cv, { "n_estimators":(10,250), "min_samples_split":(2,25), "max_features":(0.1,0.999), "max_depth":(10,18) }) rf_bo.maximize(init_points=5,n_iter=25) print(rf_bo.max) #模型构建 rfc = RandomForestClassifier(n_estimators=200,min_samples_split=2,max_features=0.14,max_depth=14) #模型训练 rfc = rfc.fit(data_tr,label_tr) pre = rfc.predict(data_te) score_r = rfc.score(data_te,label_te)
def __init__(self, outdir, hyperparamsetting, randomstate=2019, maxrounds=3000, minrounds=3, earlystoprounds=30, nthread=16, doregression=False, useeffrms=True, usegpu=False): assert (hyperparamsetting and isinstance(hyperparamsetting, dict)) self.outdir_ = outdir # results caching self.hyperparamdefault_ = { k: v['default'] for k, v in hyperparamsetting.items() } # default hyperparameter setting for xgboost self.hyperparamranges_ = { k: tuple(v['range']) for k, v in hyperparamsetting.items() } # hyperparameter ranges to be optimized self.hyperparamloguniform_ = \ [k for k in hyperparamsetting if hyperparamsetting[k]['loguniform']] # hyperparameter names whose value will be sampled in a log-uniform way self.randomstate_ = randomstate self.maxrounds_ = maxrounds self.minrounds_ = minrounds self.earlystoprounds_ = earlystoprounds self.doregression_ = doregression self.useeffrms_ = useeffrms self.params_ = { 'silent': 1, 'verbose_eval': 0, 'nthread': nthread, 'objective': 'reg:linear', } self.cvcolumns_ = [] # sequence matters self.cvresults_ = [] # holding result of each cross validation self.cviter_ = 0 # number of cross validation performed if usegpu: # enable GPU acceleration self.params_.update({ "tree_method": "gpu_hist", }) ## setting cvresults subdir if not os.path.exists(join(self.outdir_, 'cvresults')): os.makedirs(join(self.outdir_, 'cvresults')) if doregression: # regression task self.hyperparamdefault_['base_score'] = 1 if useeffrms: self.cvcolumns_ = [ "train-effrms-mean", "train-effrms-std", "test-effrms-mean", "test-effrms-std" ] else: self.cvcolumns_ = [ "train-rmse-mean", "train-rmse-std", "test-rmse-mean", "test-rmse-std" ] else: # classification task self.cvcolumns_ = [ "train-auc-mean", "train-auc-std", "test-auc-mean", "test-auc-std" ] self.params_.update({ 'objective': 'binary:logitraw', 'eval_metric': 'auc', }) self.earlystophistory_ = [] self.models_ = {} self.callbackstatus_ = [] self.trieddefault_ = False ## optimizer self.optimizer_ = BayesianOptimization(self.evaluate_xgb, self.hyperparamranges_, self.randomstate_) ## if trained before, adjust random state and load history summaryfile = join(self.outdir_, 'summary.csv') if os.path.isfile(summaryfile): _df = pd.read_csv(summaryfile) self.randomstate_ += len(_df) self._load_data(summaryfile)
def optimize(self): """ Performs bayesian optimization. For more information visit: https://www.kdnuggets.com/2019/07/xgboost-random-forest-bayesian-optimisation.html Parameters ---------- None Returns --------- estimator: scikit-learn estmator Scickit learn optimized with chosen hyperparameters""" def rfc_crossval(n_estimators, max_depth, min_samples_split, min_samples_leaf, max_leaf_nodes): return (self.rfc_cv(n_estimators=int(n_estimators), max_depth=int(max_depth), min_samples_split=int(min_samples_split), min_samples_leaf=int(min_samples_leaf), max_leaf_nodes=int(max_leaf_nodes), data=self.X, targets=self.y_val, scoring=self.scoring)) def gbt_crossval(learning_rate, n_estimators, min_samples_split, min_samples_leaf, max_depth): return (self.gbt_cv(learning_rate=float(learning_rate), n_estimators=int(n_estimators), min_samples_split=int(min_samples_split), min_samples_leaf=int(min_samples_leaf), max_depth=int(max_depth), data=self.X, targets=self.y_val, scoring=self.scoring)) def svc_crossval(C, degree): return (self.svc_cv(C=float(C), degree=int(degree), data=self.X, targets=self.y_val, scoring=self.scoring)) if self.algo == 'RandomForest': optimizer = BayesianOptimization(f=rfc_crossval, pbounds={ "n_estimators": (10, 250), "min_samples_split": (2, 25), "max_depth": (2, 300), "min_samples_leaf": (1, 25), "max_leaf_nodes": (2, 25), }, random_state=12, verbose=2) optimizer.maximize(n_iter=self.iter) print("Final result:", optimizer.max) max_optimizer = optimizer.max['params'] return (RandomForestClassifier( max_leaf_nodes=int(max_optimizer['max_leaf_nodes']), min_samples_split=int(max_optimizer['min_samples_split']), max_depth=int(max_optimizer["max_depth"]), min_samples_leaf=int(max_optimizer['min_samples_leaf']), n_estimators=int(max_optimizer['n_estimators']), random_state=12).fit(self.X, self.y_val)) elif self.algo == "GradientBoostingTree": optimizer = BayesianOptimization(f=gbt_crossval, pbounds={ "learning_rate": (0.001, 0.2), "min_samples_split": (2, 25), "max_depth": (2, 300), "min_samples_leaf": (1, 25), "n_estimators": (10, 300), }, random_state=12, verbose=2) optimizer.maximize(n_iter=self.iter) print("Final result:", optimizer.max) max_optimizer = optimizer.max['params'] return (GradientBoostingClassifier( learning_rate=max_optimizer['learning_rate'], min_samples_split=int(max_optimizer['min_samples_split']), max_depth=int(max_optimizer["max_depth"]), min_samples_leaf=int(max_optimizer['min_samples_leaf']), n_estimators=int(max_optimizer['n_estimators']), random_state=12).fit(self.X, self.y_val)) elif self.algo == "SupportVectorMachine": optimizer = BayesianOptimization(f=svc_crossval, pbounds={ "C": (0.001, 0.9999), "degree": (2, 4), }, random_state=12, verbose=2) optimizer.maximize(n_iter=self.iter) print("Final result:", optimizer.max) max_optimizer = optimizer.max['params'] return (SVC(C=max_optimizer['C'], degree=int(max_optimizer['degree']), kernel='poly', random_state=12).fit(self.X, self.y_val)) elif self.algo == "LogisticRegression": lr = LogisticRegression(n_jobs=-1, random_state=12) parameters = { "C": [ 0.05, 0.1, 0.15, 0.2, 0.25, 0.3, 0.4, 0.5, 0.6, 0.7, 0.75, 0.8, 0.85, 0.9, 0.95, 1 ], "fit_intercept": [True, False] } grid_lr = GridSearchCV(lr, param_grid=parameters, scoring=self.scoring, n_jobs=-1).fit(self.X, self.y_val) return (grid_lr.best_estimator_) else: print("No valid algorithm")
from bayes_opt import BayesianOptimization # Example of how to use this bayesian optimization package. # Lets find the maximum of a simple quadratic function of two variables # We create the bayes_opt object and pass the function to be maximized # together with the parameters names and their bounds. bo = BayesianOptimization(lambda x, y: -x**2 - (y - 1)**2 + 1, {'x': (-4, 4), 'y': (-3, 3)}) # One of the things we can do with this object is pass points # which we want the algorithm to probe. A dictionary with the # parameters names and a list of values to include in the search # must be given. bo.explore({'x': [-1, 3], 'y': [-2, 2]}) # Additionally, if we have any prior knowledge of the behaviour of # the target function (even if not totally accurate) we can also # tell that to the optimizer. # Here we pass a dictionary with target values as keys of another # dictionary with parameters names and their corresponding value. bo.initialize({-2: {'x': 1, 'y': 0}, -1.251: {'x': 1, 'y': 1.5}}) # Once we are satisfied with the initialization conditions # we let the algorithm do its magic by calling the maximize() # method. bo.maximize(init_points=5, n_iter=15, kappa=3.29) # The output values can be accessed with self.res print(bo.res['max'])
right_index=True, left_on=["Store", "SchoolHoliday"]) # If there is a missing value due to the Merge, replace with the Mean value X_Final = X_Final.apply(lambda x: x.fillna(x.mean()), axis=0) print("Size of Training Set: Columns = {}, Rows = {}"). \ format(X.shape[1], X.shape[0]) print("Size of Test Set: Columns = {}, Rows = {}"). \ format(X_Final.shape[1], X_Final.shape[0]) ############################################################################## # Bayesian Optimisation - 75 Iterations for Each Algorithm # Machine Learning Algorithm #1 - Define ranges of Hyperparameters ml1_bo = BayesianOptimization(cross_validation, {"max_features": (1, 20), "criterion": (0, 1), "normv": (1, 1), "max_depth": (1, 40), "n_estimators": (100, 300), "log_y": (1, 1)}) ml1_bo.explore({"max_features": [3.0], "criterion": [0], "normv": [1], "max_depth": [15], "n_estimators": [50], "log_y": [1]}) # Machine Learning Algorithm #2 - Define ranges of Hyperparameters ml2_bo = BayesianOptimization(cross_validation2, {"n_neighbors": (2, 20), "leaf_size": (10, 60), "normv": (1, 1), "log_y": (1, 1)}) ml2_bo.explore({"n_neighbors": [5], "leaf_size": [20],
def gekko_bayesian(indicator=None): print("") global Strategy Strategy = indicator if indicator == None: Strategy = settings['Strategy'] print("Starting search %s parameters" % Strategy) bo = BayesianOptimization(gekko_search, copy.deepcopy(StratConfig)) # 1st Evaluate print("") print("Step 1: BayesianOptimization parameter search") bo.maximize(init_points=settings['init_points'], n_iter=settings['num_iter']) max_val = bo.res['max']['max_val'] index = all_val.index(max_val) s1 = stats[index] # 2nd Evaluate print("") print("Step 2: testing searched parameters on random date") max_params = bo.res['max']['max_params'].copy() #max_params["persistence"] = 1 print("Starting Second Evaluation") gekko_search(**max_params) s2 = stats[-1] # 3rd Evaluate print("") print("Step 3: testing searched parameters on new date") watch = settings["watch"] print(max_params) result = Evaluate(Strategy, max_params) resultjson = expandGekkoStrategyParameters(max_params, Strategy)#[Strategy] s3= result # config.js like output percentiles = np.array([0.25, 0.5, 0.75]) formatted_percentiles = [str(int(round(x*100)))+"%" for x in percentiles] stats_index = (['count', 'mean', 'std', 'min'] + formatted_percentiles + ['max']) print("") print("// "+'-'*50) print("// "+ Strategy + ' Settings') print("// "+'-'*50) print("// 1st Evaluate: %.3f" % s1[1]) for i in range(len(s1)): print('// %s: %.3f' % (stats_index[i], s1[i])) print("// "+'-'*50) print("// 2nd Evaluate: %.3f" % s2[1]) for i in range(len(s2)): print('// %s: %.3f' % (stats_index[i], s2[i])) print("// "+'-'*50) print("// 3rd Evaluted: %f" % s3) print("// "+'-'*50) print("config.%s = {%s};" % (Strategy, json.dumps(resultjson, indent=2)[1:-1])) print("// "+'-'*50) return max_params
class Optimizer(object): def __init__(self,beamline): super(Optimizer,self).__init__() self.beamline = beamline def start(self,subset,method): if method == 'bayes': self.defineBounds(subset) self.optimizeBayes() elif method == 'my mcmc': self.defineParameters(subset) self.optimizeMyMCMC() else: self.defineParameters(subset) self.optimizeMCMC() def defineParameters(self,subset): self.p = lm.Parameters() for n in subset: self.p.add(n, value = self.beamline.voltages[n].setpoint, min = self.beamline.voltages[n].scanStart, max = self.beamline.voltages[n].scanStop, vary = True) def defineBounds(self,subset): self.bounds = {} for n in subset: self.bounds[n] = (self.beamline.voltages[n].scanStart,self.beamline.voltages[n].scanStop) def optimizeBayes(self): self.bayesianOptimizer = BayesianOptimization(self.optimizationFunctionBayes, self.bounds) self.bayesianOptimizer.maximize(n_iter=200,nugget = 0.02) def optimizeMCMC(self): ndim, nwalkers = len(self.p),2*len(self.p) pos = [np.array([p.value for p in self.p.values()]) + 1e-4*np.random.rand(ndim) for i in range(nwalkers)] sampler = emcee.EnsembleSampler(nwalkers, ndim, self.optimizationFunctionMCMC) sampler.run_mcmc(pos, N = 2000) def optimizeMyMCMC(self): from walker import Walkers ndim, nwalkers = len(self.p),10*len(self.p) pos = [p.value for p in self.p.values()] w = Walkers(nwalkers,pos,10,1,self.optimizationFunctionMyMCMC) print('done') while self.beamline.continueScanning: w.walk_all() def optimizationFunctionBayes(self,vrs,): for n,v in zip(self.p.keys(),vrs): self.beamline.voltages[n].setpoint = v self.beamline.wait() self.beamline.wait() # Not sure why, but convergence works better with two waits? current = self.beamline.current.value print(current) return current def optimizationFunctionMCMC(self,vrs,): for v in vrs: if v < 0 or v > 10**4: return -np.inf for n,v in zip(self.p.keys(),vrs): self.beamline.voltages[n].setpoint = v self.beamline.wait() self.beamline.wait() # Not sure why, but convergence works better with two waits? time.sleep(0.05) current = self.beamline.current.value if current < 10**-9: return -np.inf return np.log(current) def optimizationFunctionMyMCMC(self,vrs,): for v in vrs: if v < 0 or v > 10**4: return -np.inf for n,v in zip(self.p.keys(),vrs): self.beamline.voltages[n].setpoint = v self.beamline.wait() self.beamline.wait() # Not sure why, but convergence works better with two waits? time.sleep(0.05) current = self.beamline.current.value std = self.beamline.current_std.value return (current,std)
acq.plot(x[np.argmax(utility)], np.max(utility), '*', markersize=15, label=u'Next Best Guess', markerfacecolor='gold', markeredgecolor='k', markeredgewidth=1) acq.set_xlim((0, 0.1)) acq.set_ylim((0, np.max(utility) + 0.5)) acq.set_ylabel('Utility', fontdict={'size':20}) acq.set_xlabel('x', fontdict={'size':20}) axis.legend() acq.legend() if __name__ == "__main__": gp_params = {"alpha": 1e-5} #SVM svcBO = BayesianOptimization(svccv, {'gamma': (0.00001, 0.1)}) svcBO.maximize(init_points=3, n_iter=4, **gp_params) #Random Forest rfcBO = BayesianOptimization( rfccv, {'n_estimators': (10, 300), 'max_depth': (2, 10) } ) rfcBO.explore({'max_depth': [2, 4, 6], 'n_estimators': [64, 128, 256]}) rfcBO.maximize(init_points=4, n_iter=4, **gp_params) print('Final Results')
def LightGBM_tuning(X_train, y_train, kfold=6): ''' LightGBM model hyperparameters tuning, use baye_opt to cross-validate entire training dataset. @ tuning hyperparameters: feature_fraction bagging_fraction lambda_l1 max_depth min_data_in_leaf num_leaves @ default hyperparameters: bagging_freq = 1 bagging_seed = 11 boosting = 'gbdt' learning_rate: 0.005 Parameters ---------- X_train: feature dataframe y_train: target series Return ------ dict: diction of tuning hyperparameters of lightGBM ''' import lightgbm as lgb from sklearn.metrics import mean_squared_error from sklearn.model_selection import KFold import numpy as np import gc from bayes_opt.observer import JSONLogger from bayes_opt.event import Events from bayes_opt import BayesianOptimization X_train = X_train y_train = y_train features = [feature for feature in X_train.columns \ if feature not in ['card_id', 'first_active_month']] categorical_features = [feature for feature in features \ if 'feature_' in feature] folds = KFold(n_splits=kfold, shuffle=True, random_state=133) y_val = np.zeros(y_train.shape) bayes_opt_params = { 'feature_fraction': (0.1, 1.0), 'bagging_fraction': (0.1, 1.0), 'lambda_l1': (0, 6), 'max_depth': (4, 20), 'min_data_in_leaf': (10, 300), 'num_leaves': (5, 300), } # define the croos-validation functions which returns object score(-rmse) # then use bayesian optimizers to tuning the object score def cv_helper(max_depth,\ num_leaves,\ min_data_in_leaf,\ feature_fraction,\ bagging_fraction,\ lambda_l1): for train_idxs, val_idxs in folds.split(X_train.values, y_train.values): # training set train_data = lgb.Dataset(data=X_train.iloc[train_idxs][features],\ label=y_train.iloc[train_idxs],\ categorical_feature=categorical_features) # validation set val_data = lgb.Dataset(data=X_train.iloc[val_idxs][features],\ label=y_train.iloc[val_idxs],\ categorical_feature=categorical_features) # hyperparameters params = { 'objective': 'regression', 'metric': 'rmse', 'lambda_l1': lambda_l1, 'num_leaves': int(num_leaves), 'min_data_in_leaf': int(min_data_in_leaf), 'max_depth': int(max_depth), 'feature_fraction': feature_fraction, 'bagging_fraction': bagging_fraction, 'bagging_freq': 1, 'bagging_seed': 11, 'boosting': 'gbdt', 'learning_rate': 0.005, 'verbosity': 1 } # classifier clf = lgb.train(params=params,\ train_set=train_data,\ num_boost_round=10000,\ valid_sets=[train_data, val_data],\ verbose_eval=200,\ early_stopping_rounds=200) # prediction of validation y_val[val_idxs] = clf.predict(X_train.iloc[val_idxs][features],\ num_iteration=clf.best_iteration) return -mean_squared_error(y_true=y_train, y_pred=y_val)**0.5 logger = JSONLogger(path="bayes_opt_log/lightGBM_logs.json") LGB_bayes_opt = BayesianOptimization(cv_helper, pbounds=bayes_opt_params) LGB_bayes_opt.subscribe(Events.OPTMIZATION_STEP, logger) LGB_bayes_opt.maximize(init_points=4,\ n_iter=20,\ acq='ei',\ xi=0.0) return LGB_bayes_opt.max['params']
def rfccv(n_estimators, min_samples_split, max_features): val = cross_val_score( RFC(n_estimators=int(n_estimators), min_samples_split=int(min_samples_split), max_features=min(max_features, 0.999), random_state=2 ), data, target, 'f1', cv=2 ).mean() return val if __name__ == "__main__": gp_params = {"alpha": 1e-5} svcBO = BayesianOptimization(svccv, {'C': (0.001, 100), 'gamma': (0.0001, 0.1)}) svcBO.explore({'C': [0.001, 0.01, 0.1], 'gamma': [0.001, 0.01, 0.1]}) rfcBO = BayesianOptimization( rfccv, {'n_estimators': (10, 250), 'min_samples_split': (2, 25), 'max_features': (0.1, 0.999)} ) svcBO.maximize(n_iter=10, **gp_params) print('-' * 53) rfcBO.maximize(n_iter=10, **gp_params) print('-' * 53) print('Final Results')
return cv_s(XGBClassifier(max_depth=int(max_depth), learning_rate=learning_rate, n_estimators=int(n_estimators), silent=silent, nthread=nthread, gamma=gamma, min_child_weight=min_child_weight, subsample=subsample, colsample_bytree=colsample_bytree, objective='multi:softprob'), dct, dy, "log_loss", cv=8).mean() xgboostBO = BayesOpt( xgbcv, { 'max_depth': (1, 8), 'learning_rate': (0.005, 0.1), 'n_estimators': (100, 600), 'gamma': (0.5, 5), 'min_child_weight': (1, 30), 'subsample': (0.2, 1), 'colsample_bytree': (0.2, 1) }) xgboostBO.maximize(init_points=35, n_iter=365) xgboostBO.res["max"] etime = float(time.time() - stime)
"""Example of how to use this bayesian optimization package.""" import sys sys.path.append("./") from bayes_opt import BayesianOptimization # Lets find the maximum of a simple quadratic function of two variables # We create the bayes_opt object and pass the function to be maximized # together with the parameters names and their bounds. bo = BayesianOptimization(lambda x, y: -x ** 2 - (y - 1) ** 2 + 1, {'x': (-4, 4), 'y': (-3, 3)}) # One of the things we can do with this object is pass points # which we want the algorithm to probe. A dictionary with the # parameters names and a list of values to include in the search # must be given. bo.explore({'x': [-1, 3], 'y': [-2, 2]}) # Additionally, if we have any prior knowledge of the behaviour of # the target function (even if not totally accurate) we can also # tell that to the optimizer. # Here we pass a dictionary with 'target' and parameter names as keys and a # list of corresponding values bo.initialize( { 'target': [-1, -1], 'x': [1, 1], 'y': [0, 2] } )
file.write("\n") file.close() K.clear_session() # bayes opt is a maximization algorithm, to minimize validation_loss, return 1-this bayes_opt_score = 1.0 - score[1] return bayes_opt_score # bayesian optimization optimizer = BayesianOptimization( f=train_model, pbounds={ 'residual_units': (4, 6.999), 'lr': (0.001, 0.0001), 'batch_size': (1, 2.999), # *16 # 'kernel_size': (3, 5.999) }, verbose=2) optimizer.maximize(init_points=2, n_iter=10) # training-test-evaluation iterations with best params if os.path.isdir('results') is False: os.mkdir('results') targets = [e['target'] for e in optimizer.res] bs_fname = 'bs_taxiNYC.json' with open(os.path.join('results', bs_fname), 'w') as f: json.dump(optimizer.res, f, indent=2) best_index = targets.index(max(targets))
config = ConfigParser.ConfigParser() try: config.read("ensembles.config") valid_mode_on = config.getboolean(config_name, "valid_mode_on") if valid_mode_on: valid_file = "../data/train-va.csv" else: valid_file = None model_output_paths = map(lambda x: x.strip(), config.get(config_name, "model_output_paths").split(",")) try: cs = map(float, config.get(config_name, "cs").split(",")) assert len(cs) == len(model_output_paths) except ConfigParser.NoOptionError: cs = np.ones(len(model_output_paths)) except Exception as e: logging.error("Could not load configuration file from models.config") logging.error(str(e)) df_valid = pd.read_csv(valid_file, usecols=["row_id", "place_id"]) df_valid.rename(columns={"place_id": "place_id_label"}, inplace=True) logging.basicConfig(level=logging.INFO, format='%(asctime)s %(levelname)s: %(message)s') dfs = load_models(model_output_paths) def target(**cs): return ensemble_score(dfs, model_output_paths, df_valid, **cs) bo = BayesianOptimization(target, {"c%d" % m: (0., 1.) for m in range(len(model_output_paths))}) bo.maximize(n_iter=100, kappa=5)
# folds xfolds = pd.read_csv('../input/xfolds.csv') # work with validation split idx0 = xfolds[xfolds.valid == 0].index idx1 = xfolds[xfolds.valid == 1].index x0 = xtrain[xtrain.index.isin(idx0)] x1 = xtrain[xtrain.index.isin(idx1)] y0 = ytrain[ytrain.index.isin(idx0)] y1 = ytrain[ytrain.index.isin(idx1)] xgboostBO = BayesianOptimization(xgboostcv, {'max_depth': (int(2), int(25)), 'learning_rate': (0.0005, 0.06), 'n_estimators': (int(500), int(2000)), 'subsample': (0.1, 0.99), 'colsample_bytree': (0.1, 0.99), 'gamma': (0.00000000001, 0.05), 'min_child_weight': (int(1), int(40)) }) # xgboostBO.explore({'colsample_bytree': [0.76427399221822834], # 'learning_rate': [0.0073362638967263945], # 'min_child_weight': [14.634866816577702], # 'n_estimators': [2408], # 'subsample': [0.72679682406267243], # 'max_depth': [14.40730693062795], # 'gamma': [0.0071936123399884092]} # ) xgboostBO.maximize(init_points=5, n_iter=20, acq='ei') print('-' * 53)
xgtrain = xgb.DMatrix(X, label=y) return xgtrain if __name__ == '__main__': xgtrain = prepare_data() num_rounds = 3000 random_state = 2016 num_iter = 25 init_points = 5 params = { 'eta': 0.1, 'silent': 1, 'eval_metric': 'mae', 'verbose_eval': True, 'seed': random_state } xgbBO = BayesianOptimization(xgb_evaluate, {'min_child_weight': (1, 20), 'colsample_bytree': (0.1, 1), 'max_depth': (5, 15), 'subsample': (0.5, 1), 'gamma': (0, 10), 'alpha': (0, 10), }) xgbBO.maximize(init_points=init_points, n_iter=num_iter)
if not testing: prepareSubmission(pred) else: print(pred) uuid_string = str(uuid.uuid4()) f=functools.partial(runSolution,testing=True,train=train,test=test,numSquares=numSquares,y_w=1000) bo = BayesianOptimization(f=f, pbounds={ 'acc_w': (0, 1), # Fix w_y at 1000 as the most important feature #'w_y': (500, 2000), "daysin_w": (0.1, 0.5), "daycos_w": (0.1, 0.5), "minsin_w": (0.2, 0.7), "mincos_w": (0.2, 0.7), "weekdaysin_w": (0, 0.4), "weekdaycos_w": (0, 0.4), "x_w": (18, 24), "year_w": (0.4, 0.6), }, verbose=True ) ########################################################################### bo.maximize(init_points=2, n_iter=300, acq="ei", xi=0.1)#0,1 prefer exploration with open('{}.json'.format(uuid_string), 'w+') as fh: fh.write(json.dumps(bo.res, sort_keys=True, indent=4)) # "acc_w": 0.29209822227034421,
n_informative=12, n_redundant=7) def svccv(C, gamma): return cross_val_score(SVC(C=C, gamma=gamma, random_state=2), data, target, 'f1', cv=5).mean() def rfccv(n_estimators, min_samples_split, max_features): return cross_val_score(RFC(n_estimators=int(n_estimators), min_samples_split=int(min_samples_split), max_features=min(max_features, 0.999), random_state=2), data, target, 'f1', cv=5).mean() if __name__ == "__main__": svcBO = BayesianOptimization(svccv, {'C': (0.001, 100), 'gamma': (0.0001, 0.1)}) svcBO.explore({'C': [0.001, 0.01, 0.1], 'gamma': [0.001, 0.01, 0.1]}) rfcBO = BayesianOptimization(rfccv, {'n_estimators': (10, 250), 'min_samples_split': (2, 25), 'max_features': (0.1, 0.999)}) svcBO.maximize() print('-'*53) rfcBO.maximize() print('-'*53) print('Final Results') print('SVC: %f' % svcBO.res['max']['max_val']) print('RFC: %f' % rfcBO.res['max']['max_val'])
num_leaves=int(num_leaves), n_estimators=int(n_estimators), max_depth=int(max_depth), min_data_in_leaf=int(min_data_in_leaf), silent=silent, nthread=-nthread), X_train, y_train, scoring="neg_mean_squared_error", cv=5).mean() # Load data set and target values lgboostBO = BayesianOptimization( lgboostcv, { 'num_leaves': (5, 2500), 'n_estimators': (10, 2500), 'max_depth': (2, 65), 'min_data_in_leaf': (1, 100), }) # lgboostBO.maximize(init_points=init_points, n_iter=num_iter) params = lgboostBO.res['max']['max_params'] # Best model parameters found via Bayesian Optimization if id == '04': params = { 'max_depth': 14, 'min_data_in_leaf': 98, 'n_estimators': 297, 'num_leaves': 7 }
n_informative=12, n_redundant=7) def svccv(C, gamma): return cross_val_score(SVC(C=C, gamma=gamma, random_state=2), data, target, 'roc_auc', cv=5).mean() def rfccv(n_estimators, min_samples_split, max_features): return cross_val_score(RFC(n_estimators=int(n_estimators), min_samples_split=int(min_samples_split), max_features=min(max_features, 0.999), random_state=2), data, target, 'roc_auc', cv=5).mean() if __name__ == "__main__": svcBO = BayesianOptimization(svccv, {'C': (0.001, 100), 'gamma': (0.0001, 0.1)}) svcBO.explore({'C': [0.001, 0.01, 0.1], 'gamma': [0.001, 0.01, 0.1]}) rfcBO = BayesianOptimization(rfccv, {'n_estimators': (10, 250), 'min_samples_split': (2, 25), 'max_features': (0.1, 0.999)}) svcBO.maximize(acq='xcxcxc') print('-'*53) #---------------------------------------------------------- rfcBO.maximize() #------------------------------------------------------------------------------ #------------------------------------------------------------- print('-'*53) #---------------------------------------------------- print('Final Results') #---------------------------- print('SVC: %f' % svcBO.res['max']['max_val']) #---------------------------- print('RFC: %f' % rfcBO.res['max']['max_val'])
if optimization: print('optimization') def lr_cv(max_iter): lr = LogisticRegression(solver='sag', n_jobs=1, max_iter=max_iter) score = cross_val_score(lr, df_train, y_train, scoring=ali_scorer, cv=tscv, verbose=1, n_jobs=-1) return score.mean() aliBO = BayesianOptimization(lr_cv, {'max_iter': (100, 2000)}) init_points = 5 num_iter = 20 aliBO.maximize(init_points=init_points, n_iter=num_iter) print(aliBO.res['max']) print(aliBO.res['all']) sys.exit(0) if online: print('online') X_train = df_train.loc[df_train['label'] != 2] X_test = df_train.loc[df_train['label'] == 2] y_train = X_train.pop('label') y_test = X_test.pop('label')
params['learning_rate'] = learning_rate / 100 xgb_model = XGBRegressor(**params) xgb_model.fit(x_rsrp_train, y_rsrp_train) y_rsrp_pred = xgb_model.predict(x_rsrp_test) predictions = [round(value) for value in y_rsrp_pred] mse = metric.mean_squared_error(y_rsrp_test, predictions) rmse = math.sqrt(mse) return -1 * rmse # In[12]: xgbBO = BayesianOptimization(xgb_evaluate, { 'min_child_weight': (1, 20), 'learning_rate': (1, 10), 'max_depth': (3, 15) }) xgbBO.maximize(init_points=3, n_iter=10) # In[102]: params = {'learning_rate': 0.1, 'max_depth': 15, 'min_child_weight': 1} xgb_model = XGBRegressor(**params) xgb_model.fit(x_rsrp_train, y_rsrp_train) y_rsrp_pred = xgb_model.predict(x_rsrp_test) predictions = [round(value) for value in y_rsrp_pred] mae = metric.mean_absolute_error(y_rsrp_test, predictions) mse = metric.mean_squared_error(y_rsrp_test, predictions) rmse = math.sqrt(mse)
def start_automated_run(path, automated_run_id): """Starts automated run. This will automatically create base learners until the run finishes or errors out. Args: path (str): Path to Xcessiv notebook automated_run_id (str): Automated Run ID """ with functions.DBContextManager(path) as session: automated_run = session.query( models.AutomatedRun).filter_by(id=automated_run_id).first() if not automated_run: raise exceptions.UserError( 'Automated run {} ' 'does not exist'.format(automated_run_id)) automated_run.job_id = get_current_job().id automated_run.job_status = 'started' session.add(automated_run) session.commit() try: module = functions.import_string_code_as_module( automated_run.source) random_state = 8 if not hasattr( module, 'random_state') else module.random_state assert module.metric_to_optimize in automated_run.base_learner_origin.metric_generators # get non-searchable parameters base_estimator = automated_run.base_learner_origin.return_estimator( ) base_estimator.set_params(**module.default_params) default_params = functions.make_serializable( base_estimator.get_params()) non_searchable_params = dict( (key, val) for key, val in iteritems(default_params) if key not in module.pbounds) # get already calculated base learners in search space existing_base_learners = [] for base_learner in automated_run.base_learner_origin.base_learners: if not base_learner.job_status == 'finished': continue in_search_space = True for key, val in iteritems(non_searchable_params): if base_learner.hyperparameters[key] != val: in_search_space = False break # If no match, move on to the next base learner if in_search_space: existing_base_learners.append(base_learner) # build initialize dictionary target = [] initialization_dict = dict( (key, list()) for key in module.pbounds.keys()) for base_learner in existing_base_learners: # check if base learner's searchable hyperparameters are all numerical all_numerical = True for key in module.pbounds.keys(): if not isinstance(base_learner.hyperparameters[key], numbers.Number): all_numerical = False break if not all_numerical: continue # if there is a non-numerical hyperparameter, skip this. for key in module.pbounds.keys(): initialization_dict[key].append( base_learner.hyperparameters[key]) target.append( base_learner.individual_score[module.metric_to_optimize]) initialization_dict['target'] = target if not module.invert_metric \ else list(map(lambda x: -x, target)) print('{} existing in initialization dictionary'.format( len(initialization_dict['target']))) # Create function to be optimized func_to_optimize = return_func_to_optimize( path, session, automated_run.base_learner_origin, module.default_params, module.metric_to_optimize, module.invert_metric, set(module.integers)) # Create Bayes object bo = BayesianOptimization(func_to_optimize, module.pbounds) bo.initialize(initialization_dict) np.random.seed(random_state) bo.maximize(**module.maximize_config) automated_run.job_status = 'finished' session.add(automated_run) session.commit() except: session.rollback() automated_run.job_status = 'errored' automated_run.description['error_type'] = repr(sys.exc_info()[0]) automated_run.description['error_value'] = repr(sys.exc_info()[1]) automated_run.description['error_traceback'] = \ traceback.format_exception(*sys.exc_info()) session.add(automated_run) session.commit() raise
def optimize(self): self.cambium.clean() self._flatten_param_grid() # --- initialize optimizer --- bounds_transformer = SequentialDomainReductionTransformer() optimizer = BayesianOptimization( f=self._worker_return_score, pbounds=self.bayes_grid, random_state=1, verbose=1, bounds_transformer=bounds_transformer, ) # --- probe largest system config --- for divisor in self.probe_divisors: if self.tech == 'pv': probe_dict = self.bayes_grid.copy() #probe_dict['SystemDesign#subarray1_track_mode'] = 1 probe_dict['SystemDesign#subarray1_azimuth'] = 180 probe_dict['SystemDesign#subarray1_tilt'] = float(self.resource_file.split('/')[-1].split('_')[1]) probe_dict['SystemDesign#dc_ac_ratio'] = 1.2 if 'SystemDesign#system_capacity' in probe_dict.keys(): probe_dict['SystemDesign#system_capacity'] = np.max(probe_dict['SystemDesign#system_capacity']) / divisor if 'BatteryTools#desired_power' in probe_dict.keys(): probe_dict['BatteryTools#desired_power'] = np.max(probe_dict['BatteryTools#desired_power']) / divisor probe_dict['BatteryTools#desired_capacity'] = np.max(probe_dict['BatteryTools#desired_capacity']) elif self.tech == 'wind': probe_dict = self.bayes_grid.copy() probe_dict['Turbine#wind_turbine_hub_ht'] = 100 probe_dict['Turbine#turbine_class'] = 7 if 'Farm#system_capacity' in probe_dict.keys(): probe_dict['Farm#system_capacity'] = np.max(probe_dict['Farm#system_capacity']) / divisor if 'BatteryTools#desired_power' in probe_dict.keys(): probe_dict['BatteryTools#desired_power'] = np.max(probe_dict['BatteryTools#desired_power']) / divisor probe_dict['BatteryTools#desired_capacity'] = np.max(probe_dict['BatteryTools#desired_capacity']) optimizer.probe(params=probe_dict, lazy=False) # --- run optimizer --- optimizer.maximize( init_points=config.BAYES_INIT_POINTS, n_iter=config.BAYES_ITER, acq=config.BAYES_ACQ_FUNC, **config.BAYES_KWARGS ) # --- rerun best system with no battery --- best_params = optimizer.max['params'] if 'BatteryTools#desired_capacity' in best_params.keys(): #rerun system without battery if (best_params['BatteryTools#desired_capacity'] > 0) | (best_params['BatteryTools#desired_power'] > 0): best_params['BatteryTools#desired_capacity'] = 0 best_params['BatteryTools#desired_power'] = 0 optimizer.probe(params=best_params, lazy=False) # --- best score --- best_score = optimizer.max['target'] # currently unused if self._check_if_maximizing(self.opt_var): self.best_score = best_score else: self.best_score = -1 * best_score # --- access best params --- self.best_params = optimizer.max['params'] # --- force discrete params --- self.best_params = self._force_discrete_bayesian_params(self.best_params) # --- convert to nested dict --- self.best_params = self._unflatten_param_grid(self.best_params) # --- rerun best params --- output = self._base_worker(self.best_params) # --- Create new outputs --- output = self._create_output_metrics(output) # --- flatten param grid for df --- df_param_grid = self._nested_param_grid_to_df(self.best_params) # --- combine flattened param grid with output results --- dict_for_df = {**output, **df_param_grid} # --- Convert any iterables in dict to str representations --- numpy_converted = [] list_converted = [] for k,v in dict_for_df.items(): if isinstance(v, (str, int, float)): continue elif isinstance(v, (np.ndarray, np.generic)): dict_for_df[k] = str(v) numpy_converted.append(k) elif isinstance(v, (list, tuple)): dict_for_df[k] = str(v) list_converted.append(k) # --- convert to df --- self.best_df = pd.DataFrame(dict_for_df, index=[self.opt_var]) # --- convert columns back to iterables --- for c in self.best_df.columns: try: if c in numpy_converted: self.best_df[c] = [np.fromstring(i[1:-1], dtype=np.int, sep=' ') for i in list(self.best_df[c])] elif c in list_converted: self.best_df[c] = [ast.literal_eval(i) for i in list(self.best_df[c])] else: continue except Exception as e: # log.warning(f'Warning! Error converting {c} back to iterable representation') pass # --- add entire param grid --- self.best_df['system_config'] = [self.best_params]
featimpmean = featimpmean.fillna(1. / featimp.shape[1]) normalization = featimpmean[chosen].sum() / featimpmean.sum() / np.sum( fscore.values()) for k, v in fscore.iteritems(): fscores[k] += normalization * v idx = round(1000 * (np.log(2) - s), scoredp) featimp = featimp.append(pd.Series(fscores, name=idx)) return idx while True: init_points = args.init n_iter = args.iter scaledrange = {k: (0, 1) for k in p_range.keys()} bo = BayesianOptimization(score, scaledrange) if p: bo.initialize({ k: { pk: (pv - p_range[pk][0]) / (p_range[pk][1] - p_range[pk][0]) for pk, pv in param.iteritems() } for k, param in p.iteritems() }) else: init_points, n_iter = 5, 0 if not args.trunc: bo.maximize(init_points=init_points, n_iter=n_iter, acq=args.acq) featimp_cur = featimp p_new = {} for i in xrange(len(bo.Y)):
# test data test_id = test.id.values test = test.drop('id', axis=1) # convert to xgb xgb_train = xgb.DMatrix(train, label=train_labels) # calling starts here # parameters to optimize with ranges xgb_bayes = BayesianOptimization( xgb_cv, { 'max_depth': (2, 12), 'gamma': (0.001, 10.0), 'min_child_weight': (0, 20), 'max_delta_step': (0, 10), 'subsample': (0.4, 1.0), 'colsample_bytree': (0.4, 1.0) }) # explore from range of values to try xgb_bayes.explore({ 'max_depth': [3, 8, 3, 8, 8, 3, 8, 3], 'gamma': [0.5, 8, 0.2, 9, 0.5, 8, 0.2, 9], 'min_child_weight': [0.2, 0.2, 0.2, 0.2, 12, 12, 12, 12], 'max_delta_step': [1, 2, 2, 1, 2, 1, 1, 2], 'subsample': [0.6, 0.8, 0.6, 0.8, 0.6, 0.8, 0.6, 0.8], 'colsample_bytree': [0.6, 0.8, 0.6, 0.8, 0.6, 0.8, 0.6, 0.8] })
def Catboost_tuning(X_train, y_train, kfold=6): ''' Catboost model hyperparameters tuning, use baye_opt to cross-validate entire training dataset. @ tuning hyperparameters: one_hot_max_size: if required int depth: 6 ~ 10 int l2_leaf_reg: positive value 1 ~ 30 random_strength: 1 ~ 30 bagging_temperature: 0 ~ 1000 @ default hyperparameters: NUMER_OF_TREES: iterations: 10000 use_best_model = True eval_metric = 'RMSE' eval_set = Pool() learning_rate = 0.02 border_count = 254 Parameters ---------- X_train: feature dataframe y_train: target series Return ------ dict: diction of tuning hyperparameters of Catboost ''' from catboost import train, Pool from sklearn.model_selection import KFold import numpy as np import gc from bayes_opt.observer import JSONLogger from bayes_opt.event import Events from bayes_opt import BayesianOptimization X_train = X_train y_train = y_train features = [feature for feature in X_train.columns \ if feature not in ['card_id', 'first_active_month']] categorical_features = [feature for feature in features \ if 'feature_' in feature] folds = KFold(n_splits=kfold, shuffle=True, random_state=133) catboost_opt_params = { 'one_hot_max_size': (0, 6), 'depth': (5, 11), 'l2_leaf_reg': (1, 30), 'random_strength': (1, 30), 'bagging_temperature': (0, 1000) } def cv_helper(one_hot_max_size,\ depth,\ l2_leaf_reg,\ random_strength,\ bagging_temperature): # entire date for evaluate clf training performance all_data = Pool(data=X_train[features],\ label=y_train,\ cat_features=categorical_features) # validation RMSE RMSE = [] for train_idxs, val_idxs in folds.split(X_train.values, y_train.values): # training set train_data = Pool(data=X_train.iloc[train_idxs][features],\ label=y_train.iloc[train_idxs],\ cat_features=categorical_features) # validation set val_data = Pool(data=X_train.iloc[val_idxs][features],\ label=y_train.iloc[val_idxs],\ cat_features=categorical_features) # hyperparameters params = { 'eval_metric': 'RMSE', 'use_best_model': True, 'loss_function': 'RMSE', 'learning_rate': 0.02, 'early_stopping_rounds': 400, 'border_count': 254, 'task_type': 'GPU', 'one_hot_max_size': int(one_hot_max_size), 'depth': int(depth), 'l2_leaf_reg': l2_leaf_reg, 'random_strength': random_strength, 'bagging_temperature': bagging_temperature } # classifier clf = train(pool=train_data,\ params=params,\ verbose=200,\ iterations=10000,\ eval_set=all_data) # add current fold RMSE on all_data RMSE.append(clf.best_score_['validation_0']['RMSE']) return -np.mean(np.array(RMSE)) logger = JSONLogger(path="bayes_opt_log/catBoost_logs.json") CAT_bayes_opt = BayesianOptimization(cv_helper, pbounds=catboost_opt_params) CAT_bayes_opt.subscribe(Events.OPTMIZATION_STEP, logger) CAT_bayes_opt.maximize(init_points=4,\ n_iter=20,\ acq='ei',\ xi=0.0) return CAT_bayes_opt.max['params']
@author: jd1336 """ import numpy as np from bayes_opt import BayesianOptimization def camel6(x, vae=0): # min is -1.0316 (0.0898,-0.7126) and (-0.0898,0.7126); [-3,3,[-2,2]] x1, x2 = x[0], x[1] f1 = (4.0 - 2.1 * x1 ** 2 + (x1 ** 4) / 3.0) * (x1 ** 2) + (x1 * x2) + (-4 + 4 * (x2 ** 2)) * (x2 ** 2) return -f1 def branin(x, vae=0): # print(x) x1, x2 = x[0], x[1] a, b, c = 1, 5.1 / (4 * np.pi ** 2), 5 / np.pi r, s, t = 6, 10, 1 / (8 * np.pi) return -(a * (x2 - b * x1 ** 2 + c * x1 - r) ** 2 + s * (1 - t) * np.cos(x1) + s) parUnknownId = [1, 2] bounds = [(-5, 5) for i in parUnknownId] parUnknownId = [str(i) for i in parUnknownId] gp_surr = BayesianOptimization(camel6, dict(zip(parUnknownId, bounds)), 0, 0) gp_surr.maximize(init_points=10, n_iter=100, acq='ei')
def main(): bo = BayesianOptimization(target, {'x1': (-2, 10), 'x2': (-2, 10)}) bo.maximize(init_points=2, n_iter=30, acq='ucb', kappa=5)
cv_test = [] for i, (train_index, test_index) in enumerate( cv.split(train, groups=train['user_id'].values)): cv_train.append(train.iloc[train_index]) cv_test.append(train.iloc[test_index]) del cv, train_index, test_index gc.collect() cv_mean_f1 = Parallel(n_jobs=3, temp_folder='/data/tmp/')( delayed(lgb_cv)(tra, tes, params, low_bound, topk, idx) for tra, tes, idx in zip(cv_train, cv_test, [1, 2, 3, 4, 5])) del cv_train, cv_test gc.collect() return 100 * np.mean(cv_mean_f1) # return np.mean(cv_test_auc) lgbBO = BayesianOptimization( lgb_evaluate, { 'num_leaves': (64, 256), 'max_depth': (7, 12), 'min_data_in_leaf': (10, 100), 'feature_fraction': (0.6, 1), 'bagging_freq': (5, 20), 'bagging_fraction': (0.6, 1), # f1 params 'low_bound': (0, 0.1), 'topk': (80, 100) }) lgbBO.maximize(init_points=init_points, n_iter=num_iter)
class XgboFitter: def __init__(self, outdir, hyperparamsetting, randomstate=2019, maxrounds=3000, minrounds=3, earlystoprounds=30, nthread=16, doregression=False, useeffrms=True, usegpu=False): assert (hyperparamsetting and isinstance(hyperparamsetting, dict)) self.outdir_ = outdir # results caching self.hyperparamdefault_ = { k: v['default'] for k, v in hyperparamsetting.items() } # default hyperparameter setting for xgboost self.hyperparamranges_ = { k: tuple(v['range']) for k, v in hyperparamsetting.items() } # hyperparameter ranges to be optimized self.hyperparamloguniform_ = \ [k for k in hyperparamsetting if hyperparamsetting[k]['loguniform']] # hyperparameter names whose value will be sampled in a log-uniform way self.randomstate_ = randomstate self.maxrounds_ = maxrounds self.minrounds_ = minrounds self.earlystoprounds_ = earlystoprounds self.doregression_ = doregression self.useeffrms_ = useeffrms self.params_ = { 'silent': 1, 'verbose_eval': 0, 'nthread': nthread, 'objective': 'reg:linear', } self.cvcolumns_ = [] # sequence matters self.cvresults_ = [] # holding result of each cross validation self.cviter_ = 0 # number of cross validation performed if usegpu: # enable GPU acceleration self.params_.update({ "tree_method": "gpu_hist", }) ## setting cvresults subdir if not os.path.exists(join(self.outdir_, 'cvresults')): os.makedirs(join(self.outdir_, 'cvresults')) if doregression: # regression task self.hyperparamdefault_['base_score'] = 1 if useeffrms: self.cvcolumns_ = [ "train-effrms-mean", "train-effrms-std", "test-effrms-mean", "test-effrms-std" ] else: self.cvcolumns_ = [ "train-rmse-mean", "train-rmse-std", "test-rmse-mean", "test-rmse-std" ] else: # classification task self.cvcolumns_ = [ "train-auc-mean", "train-auc-std", "test-auc-mean", "test-auc-std" ] self.params_.update({ 'objective': 'binary:logitraw', 'eval_metric': 'auc', }) self.earlystophistory_ = [] self.models_ = {} self.callbackstatus_ = [] self.trieddefault_ = False ## optimizer self.optimizer_ = BayesianOptimization(self.evaluate_xgb, self.hyperparamranges_, self.randomstate_) ## if trained before, adjust random state and load history summaryfile = join(self.outdir_, 'summary.csv') if os.path.isfile(summaryfile): _df = pd.read_csv(summaryfile) self.randomstate_ += len(_df) self._load_data(summaryfile) def _load_data(self, summaryfile): df = pd.read_csv(summaryfile) print( "Found results of {} optimization rounds in output directory, loading..." .format(len(df))) self.earlystophistory_.extend(list(df.n_estimators.values)) self.callbackstatus_.extend(list(df.callback.values)) self.trieddefault_ = True ## load cross validation results for i in range(len(df)): cvfile = join(self.outdir_, 'cvresults/{0:04d}.csv'.format(i)) self.cvresults_.append(pd.read_csv(cvfile)) self.cviter_ = len(df) ## load the optimization results so far into the Bayesian optimization object eval_col = self.cvcolumns_[2] df['target'] = -df[eval_col] if self.doregression_ else df[eval_col] # idx_max, val_max = 0, 0 # if self.doregression_: # idx_max = df[eval_col].idxmin() # val_max = -df[eval_col].min() # df['target'] = -df[eval_col] # else: # idx_max = df[eval_col].idxmax() # val_max = df[eval_col].max() # df['target'] = df[eval_col] for idx in df.index: value = df.loc[idx, eval_col] if self.doregression_: value = -value params = df.loc[idx, list(self.hyperparamranges_)].to_dict() self.optimizer_.register(params, value) def evaluate_xgb(self, **hyperparameters): for k in hyperparameters: if k in self.hyperparamloguniform_: hyperparameters[k] = 10**hyperparameters[k] self.params_.update(hyperparameters) self.params_ = guardxgbparams(self.params_) best_test_eval_metric = -9999999.0 if self.optimizer_.res: self.summary.to_csv(join(self.outdir_, 'summary.csv')) best_test_eval_metric = max( [d['target'] for d in self.optimizer_.res]) feval = None # evaluation function callback_status = {'status': 0} if self.doregression_ and self.useeffrms_: callbacks = [ early_stop(self.earlystoprounds_, start_round=self.minrounds_, eval_idx=-2), ] feval = evaleffrms else: callbacks = [ early_stop( self.earlystoprounds_, start_round=self.minrounds_, ), callback_overtraining(best_test_eval_metric, callback_status), ] cv_result = xgb.cv(self.params_, self.xgtrain_, num_boost_round=self.maxrounds_, nfold=self.nfold_, seed=self.randomstate_, callbacks=callbacks, verbose_eval=50, feval=feval) cv_result.to_csv( join(self.outdir_, 'cvresults/{0:04d}.csv'.format(self.cviter_))) self.cviter_ += 1 self.earlystophistory_.append(len(cv_result)) self.cvresults_.append(cv_result) self.callbackstatus_.append(callback_status['status']) if self.doregression_: return -cv_result[self.cvcolumns_[2]].values[-1] else: return cv_result[self.cvcolumns_[2]].values[-1] def optimize(self, xgtrain, init_points=3, n_iter=3, nfold=5, acq='ei'): self.nfold_ = nfold self.xgtrain_ = xgtrain if not self.trieddefault_: self.optimizer_.probe(params=list( self.hyperparamdefault_.values()), lazy=False) self.trieddefault_ = True ## NOTE # The following block is mostly equivalent to # self.optimizer_.maximize(init_points=init_points, n_iter=n_iter, acq=acq) # but saving summary file after each hyperparameter point probed, # in case program stopped, and we want to reload and continue next time. self.optimizer_.maximize(init_points=init_points, n_iter=0, acq=acq) for i in range(n_iter): self.optimizer_.maximize(init_points=0, n_iter=1, acq=acq) self.summary.to_csv(join(self.outdir_, 'summary.csv')) self.summary.to_csv(join(self.outdir_, 'summary.csv')) def fit(self, xgtrain, model='optimized'): params = self.params_ if model == 'default': params.update(self.hyperparamdefault_) params['n_estimators'] = self.earlystophistory_[0] if model == 'optimized': idxmax = np.argmax([d['target'] for d in self.optimizer_.res]) params.update(guardxgbparams( self.optimizer_.res[idxmax]['params'])) for k in params: if k in self.hyperparamloguniform_: params[k] = 10**params[k] params['n_estimators'] = self.earlystophistory_[idxmax] self.models_[model] = xgb.train(params, xgtrain, params['n_estimators'], verbose_eval=50) def predict(self, xgtest, model='optimized'): return self.models_[model].predict(xgtest) @property def summary(self): res = [dict(d) for d in self.optimizer_.res ] # [{'target': float, 'params': dict}, ] for d in res: d['params'] = guardxgbparams(d['params']) data = {} for name in self.cvcolumns_: data[name] = [r[name].values[-1] for r in self.cvresults_] for hp in self.hyperparamranges_: data[hp] = [r['params'][hp] for r in res] data['n_estimators'] = self.earlystophistory_ data['callback'] = self.callbackstatus_ return pd.DataFrame(data) def save_model(self, feature_names, model='optimized'): modeldir = join(self.outdir_, 'model_' + model) print("saving {} model --> {}".format(model, modeldir)) if not os.path.exists(modeldir): os.makedirs(modeldir) self.models_[model].dump_model(join(modeldir, 'dump.raw.txt')) # dump text self.models_[model].save_model(join(modeldir, 'model.bin')) # save binary tmvafile = join(modeldir, 'weights.xml') try: convert_model(self.models_[model].get_dump(), input_variables=[(n, 'F') for n in feature_names], output_xml=tmvafile) os.system("xmllint --format {0} > {0}.tmp".format(tmvafile)) os.system("mv {0} {0}.bak".format(tmvafile)) os.system("mv {0}.tmp {0}".format(tmvafile)) os.system("gzip -f {0}".format(tmvafile)) os.system("mv {0}.bak {0}".format(tmvafile)) except: warnings.warn("\n".join([ "Warning:", "Saving model<{}> in TMVA XML format failed.".format(model), "Don't worry now, you can still convert xgboost model later." ]))
# Bounded region of parameter space pbounds = { 'pp': (pp, pp), 'seqlength': (1, 52), 'densesize': (1, 256), 'batchsize': (16, 256), 'filters': (1, 256), 'rH': (0, 1), 'T': (0, 1), 'Tsin': (0, 1), } #constrained optimization technique, so you must specify the minimum and maximum values that can be probed for each parameter optimizer = BayesianOptimization( f=bayesOpt_function, #function that is optimized pbounds=pbounds, #opt.-range of parameters random_state=1, verbose= 0 # verbose = 1 prints only when a maximum is observed, verbose = 0 is silent, verbose = 2 prints everything ) #load existing optimizer log_already_available = 0 if os.path.isfile("./logs_CNN_seq2seq_GWLt-1_" + Well_ID + ".json"): load_logs(optimizer, logs=["./logs_CNN_seq2seq_GWLt-1_" + Well_ID + ".json"]) print("\nExisting optimizer is already aware of {} points.".format( len(optimizer.space))) log_already_available = 1 # Saving progress logger = newJSONLogger(path="./logs_CNN_seq2seq_GWLt-1_" + Well_ID +
# Bounded region of parameter space bounds_LGB = { 'num_leaves': (5, 20), 'min_data_in_leaf': (5, 20), 'learning_rate': (0.01, 0.3), 'min_sum_hessian_in_leaf': (0.00001, 0.01), 'feature_fraction': (0.05, 0.5), 'lambda_l1': (0, 5.0), 'lambda_l2': (0, 5.0), 'min_gain_to_split': (0, 1.0), 'max_depth': (3, 15), } from bayes_opt import BayesianOptimization LGB_BO = BayesianOptimization(LGB_bayesian, bounds_LGB, random_state=13) init_points = 5 n_iter = 5 with warnings.catch_warnings(): warnings.filterwarnings('ignore') LGB_BO.maximize(init_points=init_points, n_iter=n_iter, acq='ucb', xi=0.0, alpha=1e-6) print(LGB_BO.max['target']) print(LGB_BO.max['params'])
def gekko_bayesian(strategy): print("") global Strategy Strategy = strategy TargetParameters = getSettings()["strategies"][Strategy] TargetParameters = promoterz.parameterOperations.parameterValuesToRangeOfValues( TargetParameters, bayesconf.parameter_spread) print("Starting search %s parameters" % Strategy) bo = BayesianOptimization(gekko_search, copy.deepcopy(TargetParameters)) # 1st Evaluate print("") print("Step 1: BayesianOptimization parameter search") bo.maximize(init_points=settings['init_points'], n_iter=settings['num_iter']) max_val = bo.res['max']['max_val'] index = all_val.index(max_val) s1 = stats[index] # 2nd Evaluate print("") print("Step 2: testing searched parameters on random date") max_params = bo.res['max']['max_params'].copy() #max_params["persistence"] = 1 print("Starting Second Evaluation") gekko_search(**max_params) s2 = stats[-1] # 3rd Evaluate print("") print("Step 3: testing searched parameters on new date") watch = settings["watch"] print(max_params) result = Evaluate(Strategy, max_params) resultjson = expandGekkoStrategyParameters(max_params, Strategy) #[Strategy] s3 = result # config.js like output percentiles = np.array([0.25, 0.5, 0.75]) formatted_percentiles = [ str(int(round(x * 100))) + "%" for x in percentiles ] stats_index = (['count', 'mean', 'std', 'min'] + formatted_percentiles + ['max']) print("") print("// " + '-' * 50) print("// " + Strategy + ' Settings') print("// " + '-' * 50) print("// 1st Evaluate: %.3f" % s1[1]) for i in range(len(s1)): print('// %s: %.3f' % (stats_index[i], s1[i])) print("// " + '-' * 50) print("// 2nd Evaluate: %.3f" % s2[1]) for i in range(len(s2)): print('// %s: %.3f' % (stats_index[i], s2[i])) print("// " + '-' * 50) print("// 3rd Evaluted: %f" % s3) print("// " + '-' * 50) print("config.%s = {%s};" % (Strategy, json.dumps(resultjson, indent=2)[1:-1])) print('\n\n') print(resultInterface.parametersToTOML(resultjson)) print("// " + '-' * 50) return max_params
# setting for BO # aruguments setting param = {'lsigma_f': (-2, 2), 'lsigma_s': (-2, 2), 'lsigma_t': (-2, 2), 'l_corr': (10, 1000), 't_corr': (1, 20)} for key, value in json.loads(args.range).items(): param[key] = value print("Prameter ranges as below:") if not args.full: del param['l_corr'] del param['t_corr'] print(param) # preparation of data_manager dm = data_manupulation.impute_shield_dm(100) gene_df = pd.read_csv(args.genefile) selected_gene_df = dm.select_gene_df(gene_df) dm = data_manupulation.standard_dm(args.refnum) # BO estimation bo = BayesianOptimization(ts_recovery_correlation, param) bo.maximize(init_points=5, n_iter=args.boiter) print(bo.res['max']) # Out put f = open(args.filepath, "w") json.dump(bo.res['max'], f)
# model = FlowlineModel(final_flowline.fls[-1], mb_model=mb_model, y0=0) model = FlowlineModel(VerticalWallFlowline(surface_h=bed_h, bed_h=bed_h, widths=np.zeros(200) + 3., map_dx=100), mb_model=mb_model, y0=0) model.run_until(time) flowline = model.fls[-1] new_mb_model = LinearMassBalanceModel(3000, grad=4) new_model = FlowlineModel(flowline, mb_model=new_mb_model, y0=0) new_model.run_until(150) return -sum( abs(final_flowline.fls[-1].surface_h - new_model.fls[-1].surface_h)) bo = BayesianOptimization(target, {'ela': (2500,3500), 'time': (0, 200)}) # One of the things we can do with this object is pass points # which we want the algorithm to probe. A dictionary with the # parameters names and a list of values to include in the search # must be given. bo.explore({'ela': [3000, 2750], 'time': [0,150]}) # Additionally, if we have any prior knowledge of the behaviour of # the target function (even if not totally accurate) we can also # tell that to the optimizer. # Here we pass a dictionary with 'target' and parameter names as keys and a # list of corresponding values #bo.initialize() # Once we are satisfied with the initialization conditions
from bayes_opt import BayesianOptimization ''' Example of how to use this bayesian optimization package. ''' # Lets find the maximum of a simple quadratic function of two variables # We create the bayes_opt object and pass the function to be maximized # together with the parameters names and their bounds. bo = BayesianOptimization(lambda x, y: -x**2 - (y - 1)**2 + 1, {'x': (-4, 4), 'y': (-3, 3)}) # One of the things we can do with this object is pass points # which we want the algorithm to probe. A dictionary with the # parameters names and a list of values to include in the search # must be given. bo.explore({'x': [-1, 3], 'y': [-2, 2]}) # Additionally, if we have any prior knowledge of the behaviour of # the target function (even if not totally accurate) we can also # tell that to the optimizer. # Here we pass a dictionary with target values as keys of another # dictionary with parameters names and their corresponding value. bo.initialize({-2: {'x': 1, 'y': 0}, -1.251: {'x': 1, 'y': 1.5}}) # Once we are satisfied with the initialization conditions # we let the algorithm do its magic by calling the maximize() # method. bo.maximize(init_points=15, n_iter=25) # The output values can be accessed with self.res print(bo.res['max'])
def construct_rf_classifier(self, train, features, label_col): data = train[features] target = train[label_col] def rfcv(nr_classifiers, max_depth, min_samples_leaf, bootstrap, criterion, max_features): nr_classifiers = int(nr_classifiers) max_depth = int(max_depth) min_samples_leaf = int(min_samples_leaf) if np.round(bootstrap): bootstrap = True else: bootstrap = False if np.round(criterion): criterion = 'gini' else: criterion = 'entropy' if np.round(max_features): max_features = None else: max_features = 1.0 return cross_val_score(RandomForestClassifier( n_estimators=nr_classifiers, max_depth=max_depth, min_samples_leaf=min_samples_leaf, bootstrap=bootstrap, criterion=criterion, max_features=max_features), data, target, 'accuracy', cv=5).mean() params = { 'nr_classifiers': (10, 1000), 'max_depth': (5, 10), 'min_samples_leaf': (2, 10), 'bootstrap': (0, 1), 'criterion': (0, 1), 'max_features': (0, 1) } rfBO = BayesianOptimization(rfcv, params, verbose=0) rfBO.maximize(init_points=10, n_iter=20, n_restarts_optimizer=50) best_params = rfBO.res['max']['max_params'] best_nr_classifiers = int(best_params['nr_classifiers']) self.nr_clf = best_nr_classifiers best_max_depth = int(best_params['max_depth']) best_min_samples_leaf = int(best_params['min_samples_leaf']) best_bootstrap = best_params['bootstrap'] best_criterion = best_params['criterion'] best_max_features = best_params['max_features'] if np.round(best_bootstrap): best_bootstrap = True else: best_bootstrap = False if np.round(best_criterion): best_criterion = 'gini' else: best_criterion = 'entropy' if np.round(best_max_features): best_max_features = None else: best_max_features = 1.0 self.clf = RandomForestClassifier( n_estimators=best_nr_classifiers, max_depth=best_max_depth, min_samples_leaf=best_min_samples_leaf, bootstrap=best_bootstrap, criterion=best_criterion, max_features=best_max_features) start = time.time() self.clf.fit(data, target) self.time = time.time() - start
, w_x_d_y = 100 # 86 , w_x_t_y = 100 # 91 ) print "5. Execute Bayesian parameter optimization to select feature weights..." ### Bayesian Optimization of Parameters ### f = functools.partial(process_grid, train, test, threshold = nn_threshold, grid_ids = grid_ids, model = model_nn, grid_variable = grid_variable) #, weights = feature_weights) bo = BayesianOptimization(f=f, pbounds={ 'w_x': (80, 200), # (100, 1000) 'w_y': (50, 150), # (500, 2000) "w_hour": (50, 150), # (1, 10) "w_weekday": (20, 60), # (1, 10) "w_day_of_month": (20, 100), # (1,10) "w_month": (20, 80), # (1,10) "w_year": (0, 50), # (2,20) "w_accuracy": (1, 5), # (3,30) "w_x_d_y": (70, 200), # (3,30) "w_x_t_y": (70, 200) # (3,30) }, verbose=True ) bo.maximize(init_points = 2, n_iter = 1, acq = "ei", xi = 1.0) # 0.1 for i in range(300): bo.maximize(n_iter = 1, acq = "ei", xi = 1.0) # exploration points bo.maximize(n_iter = 1, acq = "ei", xi = 1.0) # exploitation points print "6. Complete!!!"
xtest = pd.read_csv('../input2/xtest_'+ dataset_version + '.csv') id_test = xtest.ID xtest.drop('ID', axis = 1, inplace = True) # folds xfolds = pd.read_csv('../input/xfolds.csv') # work with validation split idx0 = xfolds[xfolds.valid == 0].index idx1 = xfolds[xfolds.valid == 1].index x0 = xtrain[xtrain.index.isin(idx0)] x1 = xtrain[xtrain.index.isin(idx1)] y0 = ytrain[ytrain.index.isin(idx0)] y1 = ytrain[ytrain.index.isin(idx1)] extratreesBO = BayesianOptimization(extratreescv, {'n_estimators': (int(250), int(2000)), 'min_samples_split': (int(2), int(6)), 'min_samples_leaf': (int(1), int(6)), 'max_features': (int(15), int(100)), 'max_depth': (int(25) , int(50)), 'min_weight_fraction_leaf': (0, 0.01), }) extratreesBO.maximize(init_points=5, n_iter=50, acq='ei') print('-' * 53) print('Final Results') print('Extra Trees: %f' % extratreesBO.res['max']['max_val']) print(extratreesBO.res['max']['max_params'])
args.experiment = sanitise_for_mongo(args.experiment) metadata = init_dataset(args.dataset) if not "surprise samples" in metadata: metadata["surprise_samples"] = 100000 score_method = "plausibility" surprise_depth=2 steps = 1000 init_steps = 100 recipes_per_step = 5 lower_bound_plaus=False num_sigma_range = 5 k=3 gp_params = {'corr':'absolute_exponential','nugget': 1} model = init_model(args.dataset, metadata, args.model, surprise_depth, args.experiment) params = {} for p in range(model.model.nhid): params["x_"+str(p)] = (-num_sigma_range,num_sigma_range) bo = BayesianOptimization(lambda **param_args : wrap_plausibility_and_surprise([param_args[p] for p in param_args], model=model, plausibility_dist=metadata["experiments"][args.experiment]["plausibility_distribution"], weight_by_length=False, errors_by_length=metadata["experiments"][args.experiment]["errors_by_length"],from_visible=False, feature_list=metadata["fields_x"], use_lower_bound=lower_bound_plaus, surprise_dist=metadata["experiments"][args.experiment]["surprise_distribution"], surprise_depth=surprise_depth),params) bo.maximize(init_points=init_steps, n_iter=0, kappa=k) for step in range(steps): bo.maximize(init_points=0, n_iter=recipes_per_step, kappa=k, **gp_params) for r,v in zip(bo.X[-recipes_per_step:],bo.Y[-recipes_per_step:]): print model.positive_features_from_design_vector(model.construct_from_hidden(np.atleast_2d(np.array(r)))[0].tolist()[0]).keys(),"({0:.4f})".format(v)
def kNNOptimize(train_set, test_set, njobs, ijob): delta_x = 10. / NBINS_X delta_y = 10. / NBINS_Y NBINS_TOTAL = NBINS_X * NBINS_Y ijob_bins = np.array_split(np.arange(NBINS_TOTAL), njobs)[ijob] for i_bin in ijob_bins: bin_filename = 'knn_bayes/{0:05d}_{1:02d}_{2:02d}.json'.format( i_bin, NBINS_X, NBINS_Y) if os.path.isfile(bin_filename): continue y_lower = int(i_bin / NBINS_X) * delta_y x_lower = (i_bin % NBINS_X) * delta_x x_upper = x_lower + delta_x y_upper = y_lower + delta_y # this block is needed because some points fall on the right or # top boundary of the domain exactly. if x_upper == 10.: x_upper += 1.0e-5 if y_upper == 10.: y_upper += 1.0e-5 initial_points = {"cut_threshold": (5, 7), "w_x": (450, 550), "w_y": (1050, 950), "w_hour": (4, 2), "w_log10acc": (10, 10), "w_weekday": (2, 3), "w_year": (9, 11), "n_neighbors": (20, 25), "margin": (0.02, 0.03) } f = functools.partial(validation_map3_kNN, train_set=train_set, xlower=x_lower, xupper=x_upper, ylower=y_lower, yupper=y_upper) bo = BayesianOptimization(f=f, pbounds={"cut_threshold": (3, 12), "w_x": (250, 1000), "w_y": (500, 2000), "w_hour": (1, 10), "w_log10acc": (5, 30), "w_weekday": (1, 10), "w_year": (2, 20), "n_neighbors": (10, 40), "margin": (0.01, 0.04) }, verbose=True) # this little bit of code allows seeding of the bayesian optimizer # with a few points that you already know are decent parameter values. # initial points are based off @Sandro's kNN script. # # seed the bayesian optimizer with a couple of points. bo.explore(initial_points) # For some reason that I don't understand, the Bayesian optimizer slows # down greatly after 64 iterations. So to be more computationally # efficient, limit it to 64. # explore the space (xi=0.1) # 2 custom (above), 5 initial (implied), 25 exploration = 32 total bo.maximize(n_iter=25, acq="ei", xi=0.1) # exploit the peaks for the other 32 iterations (xi=0.) bo.maximize(n_iter=32, acq="ei", xi=0.0) optimizer_output = bo.res['all'] optimizer_output['max'] = bo.res['max'] optimizer_output['i_bin'] = i_bin optimizer_output['nx'] = NBINS_X optimizer_output['ny'] = NBINS_Y optimizer_output['x_lower'] = x_lower optimizer_output['y_lower'] = y_lower optimizer_output['x_upper'] = x_upper optimizer_output['y_upper'] = y_upper with open(bin_filename, 'w') as fh: fh.write(json.dumps(optimizer_output, sort_keys=True, indent=4, separators=(',', ': ')))
min_samples_split=int(min_samples_split), max_features=min(max_features, 0.999), # float max_depth=int(max_depth), random_state=2), X_train, y_train, scoring='r2', cv=5).mean() return val #### Run Bayesian optimization rf_bo = BayesianOptimization( rf_cv, { 'n_estimators': (10, 250), 'min_samples_split': (2, 25), 'max_features': (0.1, 0.999), 'max_depth': (5, 15) }) rf_bo.probe({ 'n_estimators': [100, 200, 300], 'min_samples_split': [2, 10, 20], 'max_features': [0.1, 0.5, 0.9], 'max_depth': [10, 15, 25] }) rf_bo.maximize() #### output the optimal hyperparameters rf_bo.max
doc_name_save = data['doc_name'] paragraph_nb_save = data['paragraph_nb'] firstname_is_french_save = data['firstname_is_french'] data = data[useful_col] y = data['is_target'] data = data.drop('is_target', axis=1) X = data ratio = float(np.sum(y == 0)) / np.sum(y==1) # 131.708 xgboostBO = BayesianOptimization(xgboostcv, {'max_depth': (5, 8), 'learning_rate': (0.01, 0.3), 'n_estimators': (150, 300), # 'gamma': (1., 0.01), # 'min_child_weight': (1, 10), # 'max_delta_step': (0, 0.1), 'subsample': (0.85, 1), 'colsample_bytree' :(0.5, 1), # 'scale_pos_weight' : ratio }) xgboostBO.maximize(init_points=10, n_iter=40) print('-'*53) print('Final Results') print('XGBOOST: %f' % xgboostBO.res['max']['max_val']) print('-'*53) print xgboostBO.res['max']
max_delta_step=max_delta_step, subsample=subsample, colsample_bytree=colsample_bytree, seed=seed, objective="multi:softprob"), train, labels, "log_loss", cv=5).mean() if __name__ == "__main__": # Load data set and target values train, labels, test, _, _ = load_data() xgboostBO = BayesianOptimization(xgboostcv, {'max_depth': (5, 10), 'learning_rate': (0.01, 0.3), 'n_estimators': (50, 1000), 'gamma': (1., 0.01), 'min_child_weight': (2, 10), 'max_delta_step': (0, 0.1), 'subsample': (0.7, 0.8), 'colsample_bytree': (0.5, 0.99) }) xgboostBO.maximize() print('-' * 53) print('Final Results') print('XGBOOST: %f' % xgboostBO.res['max']['max_val'])
def optimize_lgbm_params(train_df, target_df): """Apply Bayesian Optimization to LightGBM parameters Args: train_df(pd.DataFrame): Training data target_df(pd.Series): Target/ Test data Returns: best_params(dict): Optimized parameters for LGBM """ def _lgbm_evaluate(**params): """Wrapper for KFold LGBM parameter evaluation Args: params(dict): Parameter to evaluate based on LGBM outcome Returns: roc_auc_score(float): ROC-AUC-value to optimize by Bayesian optimization """ warnings.simplefilter('ignore') params['num_leaves'] = int(params['num_leaves']) params['max_depth'] = int(params['max_depth']) clf = LGBMClassifier(**params, n_estimators=10000, nthread=4) folds = KFold(n_splits=2, shuffle=True, random_state=1001) test_pred_proba = np.zeros(train_df.shape[0]) for n_fold, (train_idx, valid_idx) in enumerate(folds.split(train_df, target_df)): train_x, train_y = train_df[feats].iloc[train_idx], target_df.iloc[ train_idx] valid_x, valid_y = train_df[feats].iloc[valid_idx], target_df.iloc[ valid_idx] clf.fit(train_x, train_y, eval_set=[(train_x, train_y), (valid_x, valid_y)], eval_metric='auc', verbose=False, early_stopping_rounds=100) test_pred_proba[valid_idx] = clf.predict_proba( valid_x, num_iteration=clf.best_iteration_)[:, 1] del train_x, train_y, valid_x, valid_y gc.collect() return roc_auc_score(target_df, test_pred_proba) # parameter ranges for optimization params = { 'colsample_bytree': (0.8, 1), 'learning_rate': (.015, .025), 'num_leaves': (33, 35), 'subsample': (0.8, 1), 'max_depth': (7, 9), 'reg_alpha': (.03, .05), 'reg_lambda': (.06, .08), 'min_split_gain': (.01, .03), 'min_child_weight': (38, 40) } bo = BayesianOptimization(_lgbm_evaluate, params) bo.maximize(init_points=5, n_iter=5) best_params = bo.max['params'] best_params['n_estimators'] = 10000 best_params['nthread'] = 4 best_params['num_leaves'] = int(best_params['num_leaves']) best_params['max_depth'] = int(best_params['max_depth']) return best_params
# work with validation split idx0 = np.where(fold_index != 1) idx1 = np.where(fold_index == 1) x0 = np.array(xtrain)[idx0,:][0] x1 = np.array(xtrain)[idx1,:][0] y0 = np.array(ytrain)[idx0] y1 = np.array(ytrain)[idx1] nb_classes = 2 dims = xtrain.shape[1] print(dims, 'dims') kerasBO = BayesianOptimization(kerascv, {'dense1': (int(0.15 * xtrain.shape[1]), int(2 * xtrain.shape[1])), 'dropout1': (0.05, 0.5), 'dense2': (int(0.15 * xtrain.shape[1]), int(2 * xtrain.shape[1])), 'dropout2': (0.05, 0.5), 'epochs': (int(20), int(150)) }) kerasBO.explore({'dense1': [int(0.15 * xtrain.shape[1])], 'dropout1': [0.05], 'dense2': [int(1.5 * xtrain.shape[1])], 'dropout2': [0.5], 'epochs': [40]}) kerasBO.maximize(init_points=3, n_iter=25) print('-' * 53) print('Final Results') print('Extra Trees: %f' % kerasBO.res['max']['max_val']) print(kerasBO.res['max']['max_params'])
from bayes_opt.logger import JSONLogger from bayes_opt.event import Events from bayes_opt import BayesianOptimization def function_to_be_optimized(batch, epochs, lr, weight_decay, lr_multiplier): batch = int(batch) epochs = int(epochs) return Bayes_Unet(batch, epochs, lr, weight_decay, lr_multiplier) # Bounded region of parameter space pbounds = {'batch': (4, 16), 'epochs': (60, 140), 'lr': (1e-6, 1e-2), 'weight_decay': (1e-8, 1e-3), 'lr_multiplier': (0.1, 1)} hyperparams = BayesianOptimization( f=function_to_be_optimized, pbounds=pbounds, verbose=2, random_state=1, ) logger = JSONLogger(path="/content/drive/MyDrive/UNET/Training models/logs.json") hyperparams.subscribe(Events.OPTIMIZATION_STEP, logger) hyperparams.maximize( init_points=3, # 3 random trials n_iter=30 # 30 Bayesian steps ) """## **Testing + visualization** """ import numpy as np import matplotlib.pyplot as plt