def test_explore_lazy(): random_state = ensure_rng(0) xs = np.linspace(-2, 10, 1000) f = np.exp(-(xs - 2)**2) + np.exp(-(xs - 6)**2 / 10) + 1 / (xs**2 + 1) bo = BayesianOptimization(f=lambda x: f[int(x)], pbounds={'x': (0, len(f) - 1)}, random_state=random_state, verbose=0) bo.explore({'x': [f.argmin()]}, eager=False) assert len(bo.space) == 0 assert len(bo.init_points) == 1 # Note we currently expect lazy explore to override points # This may not be the case in the future. bo.explore({'x': [f.argmax()]}, eager=False) assert len(bo.space) == 0 assert len(bo.init_points) == 1 bo.maximize(init_points=0, n_iter=0, acq='ucb', kappa=5) res = bo.space.max_point() max_params = res['max_params'] max_val = res['max_val'] assert max_params['x'] == f.argmax() assert max_val == f.max()
def main(): # stdout_path = 'outcome_testBO.txt' # print '[INFO] stdout_path:\t{}'.format(stdout_path) # sys.stdout = open(stdout_path, 'w') # # np.random.seed(1) print '#' * 53 scores = [] sensis = [] specis = [] for i in range(10): trainset, testset = load_data(i + 1) X_train, y_train = trainset X_test, y_test = testset def svccv(C, tol): return cross_val_score(SVC(C=C, random_state=1, tol=tol), X_train, y_train, cv=9).mean() def rfccv(n_estimators, min_samples_split, max_features): return cross_val_score(RFC(n_estimators=int(n_estimators), min_samples_split=int(min_samples_split), max_features=min(max_features, 0.999), random_state=2), X_train, y_train, 'f1', cv=5).mean() svcBO = BayesianOptimization(svccv, {'C': (10, 50000), 'tol': (0.0001, 0.1)}) svcBO.explore({'C': [10, 100, 1000, 10000, 20000, 50000], 'tol': [0.0001, 0.001, 0.005, 0.01, 0.05, 0.1]}) # rfcBO = BayesianOptimization(rfccv, {'n_estimators': (10, 250), # 'min_samples_split': (2, 25), # 'max_features': (0.1, 0.999)}) svcBO.maximize(init_points=50, restarts=200, n_iter=100) print '#' * 53 print 'Final Results' print 'SVC: %f' % svcBO.res['max']['max_val'] print 'max_params: ', svcBO.res['max']['max_params'] params = svcBO.res['max']['max_params'] clf = SVC(C=params['C'], random_state=1, tol=params['tol']) clf.fit(X_train, y_train) score = clf.score(X_test, y_test) result = clf.predict(X_test) sensi, speci = my_scores(y_test, result) print 1 - score, sensi, speci # print 'err:', 1 - score scores.append(score) sensis.append(sensi) specis.append(speci) print scores print "accur:\t{}\tstd:\t{}".format(np.mean(scores), np.std(scores)) print "sensi:\t{}".format(np.mean(sensis)) print "speci:\t{}".format(np.mean(specis))
def opti(self): bo = BayesianOptimization(self.trainAndCompareHit, {"x": (10, 50), "y": (0.1, 1.0)}) bo.explore({"x": range(10, 50), "y": [0.1, 0.25, 0.5, 0.75, 1.0]}) bo.initialize({-11: {"x": 20, "y": 0.5}}) bo.maximize(init_points=5, n_iter=5, kappa=3.29) print(bo.res["max"])
def main(): bo = BayesianOptimization(lambda fr, sm, mo, ma, nm, de, co: play_game(fr, sm, mo, ma, nm, de, co), {'fr': (2, 6), 'sm': (-1, 1), 'mo': (0, 2), 'ma': (0, 2), 'nm': (-1, 1), 'de': (-1, 1), 'co': (-1, 1)}) bo.explore({'fr': [5.0771664428677061], 'sm': [-0.13059762676063172], 'mo': [1.3682148714919597], 'ma': [0.52214706278657907], 'nm': [-0.86627512983565302], 'de': [0.42238952601950097], 'co': [-0.39416823224808289]}) bo.maximize(init_points=5, n_iter=50, kappa=0.5) # The output values can be accessed with self.res print 'RESULTS' print(bo.res['max'])
def run(gpunum, cancer_type, feature_type, attempt): batch_size = 32 epochs = 100 os.environ["CUDA_VISIBLE_DEVICES"] = gpunum def get_session(gpu_fraction=1): gpu_options = tf.GPUOptions( per_process_gpu_memory_fraction=gpu_fraction, allow_growth=True) return tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) ktf.set_session(get_session()) results = [] def scoreofModel(cancer_type, feature_type, attempt): def inner_SoM(pca, dropout, ae_dim1, ae_dim2): hidden_dims = 512 print("**scoreofModel pca " + str(pca) + " dropout " + str(dropout) + " hidden dims " + str(hidden_dims) + " dim1 " + str(ae_dim1) + " dim2 " + str(ae_dim2)) print("ct %s ft %s attempt %d" % (cancer_type, feature_type, attempt)) hidden_dims = int(hidden_dims) ae_dim1 = int(ae_dim1) ae_dim2 = int(ae_dim2) # AE with open('../test_bong/data/overlap_%s.pkl' % (cancer_type), 'rb') as handle: labels = pickle.load(handle) x = pickle.load(handle) y = pickle.load(handle) x_trn, x_tst, c_trn, c_tst, s_trn, s_tst, l_trn, l_tst = \ train_test_split(x, y[:, 0], y[:, 1], labels, test_size=80, random_state=7) if variables.mse_tag == "DIV": s_trn = np.divide(s_trn, 1000.0) s_tst = np.divide(s_tst, 1000.0) elif variables.mse_tag == "LOG": s_trn = np.log(s_trn) s_tst = np.log(s_tst) x_trn, x_tst = AE_again_read.AE_model_save(cancer_type, feature_type, ae_dim1, ae_dim2, x_trn, x_tst) clf = PCA(pca, whiten=True) x_trn = clf.fit_transform(x_trn) x_tst = clf.transform(x_tst) def ModelV1(model_input): z = Dropout(dropout)(model_input) z = Dense(hidden_dims, activation='relu')(z) z = Dropout(dropout)(z) z = Dense(hidden_dims, activation='relu')(z) model_output = Dense(1, activation=None)(z) model = Model(model_input, model_output) #model.compile(loss=my_cindex(c_tst, s_tst), optimizer='adam')#,metrics=["mse"]) model.compile(loss="mse", optimizer='adam') return model def ModelV2(model_input): z = Dense(hidden_dims, activation="selu")(model_input) z = BatchNormalization()(z) z = Dropout(dropout)(z) model_output = Dense(1)(z) model = Model(model_input, model_output) learning_ratio = 0.001 sgd = SGD(lr=learning_ratio, decay=1e-5, momentum=0.9, nesterov=True) model.compile(loss='mean_squared_error', optimizer=sgd, metrics=['accuracy', 'mean_squared_error']) return model x_trn, x_dev, c_trn, c_dev, s_trn, s_dev, l_trn, l_dev = train_test_split( x_trn, c_trn, s_trn, l_trn, test_size=20, random_state=7) print("x_trn %s, x_dex %s, x_tst %s" % (str(x_trn.shape), str(x_dev.shape), str(x_tst.shape))) feature_dim = x_trn.shape[1] input_shape = (feature_dim, ) model_input = Input(shape=input_shape) if variables.model_type == "V1": model = ModelV1(model_input) else: model = ModelV2(model_input) if variables.train_with_censored == "EXCLUDE": x_trn = x_trn[c_trn == 0] x_dev = x_dev[c_dev == 0] s_trn = s_trn[c_trn == 0] s_dev = s_dev[c_dev == 0] print("reduced to x_trn %s, x_dev %s" % (str(x_trn.shape), str(x_dev.shape))) data = tuple((x_trn, c_trn, s_trn, x_dev, c_dev, s_dev, x_tst, c_tst, s_tst)) model.summary() model_filepath = '../model/%s-%s-%d-%s-%s-%d-%d-%d.model' % ( cancer_type, feature_type, attempt, str(pca), str(dropout), hidden_dims, ae_dim1, ae_dim2) checkpoint = MyCallback(results, model_filepath, data, real_save=True, verbose=0, save_best_only=True, mode='auto', cancer_type=cancer_type, feature_type=feature_type, thr=pca, dropout_prob=dropout, dimension=hidden_dims, activate='relu', AE1=ae_dim1, AE2=ae_dim2) callbacks_list = [checkpoint] history = model.fit(x_trn, s_trn, batch_size=batch_size, shuffle=True, callbacks=callbacks_list, epochs=epochs, validation_data=(x_dev, s_dev)) #print("-----History----") #print(history.history.keys()) #print(history.history) #print(len(history.history['val_loss'])) pred_tst = model.predict(x_tst) return my_cindex(c_tst, s_tst)(s_tst, pred_tst) return inner_SoM def frange(x, y, jump): while x < y: yield x x += jump result_cindex = scoreofModel(cancer_type, feature_type, 0)(0.9999, 0.0, 1400, 700) # results # pca 0.98 dropout 0.0 hidden dims 512 dim1 1441 dim2 226 0.496551724138 0.524384112619 # pca 0.98 dropout 0.0 hidden dims 256 dim1 1441 dim2 226 0.442857142857 0.512820512821 # #pca 0.98 dropout 0.8 hidden dims 10.0 dim1 442.372154389 dim2 700.0 bad #pca 0.996454803902 dropout 0.0812376887453 hidden dims 532.204282998 dim1 1498.73305566 dim2 697.318639345 bad # **scoreofModel pca 0.98 dropout 0.0 hidden dims 512 dim1 1441.27853285 dim2 226.547497983 bo_dict = { "pca": (0.99, 0.9999), "dropout": (0, 0.8), # "hidden_dims" : (10, 1000), "ae_dim1": (1400, 1500), "ae_dim2": (500, 700) } #for k in bo_dict.keys() : # print(k) # print (bo_dict[k]) #scoreofModel(**{'ae_dim1': 1138.0196836044008, 'dropout': 0.18242910081095307, 'pca': 0.98912275449631237, 'hidden_dims': 373.61768597111694, 'ae_dim2': 472.20225514485821}) v1BO = BayesianOptimization(scoreofModel(cancer_type, feature_type, attempt), bo_dict, verbose=True) v1BO.explore({ "pca": [0.99, 0.1, 0.9999], "dropout": [0, 0.2, 0.8], # "hidden_dims" : [10, 200, 1000], "ae_dim1": [1400, 100, 1500], "ae_dim2": [500, 100, 700], }) gp_params = {"alpha": 1e-5} v1BO.maximize(init_points=5, n_iter=40) print('Final Results') #print('max %f' % v1BO.res['max']['max_val']) #print('***<max>****') #print(v1BO.res['max']) #print('***<all>***') #print(v1BO.res['all']) results.append(v1BO.res['all']) #print(results) print(v1BO.res) with open('./BO_Result_' + cancer_type + '.txt', 'at') as f: params = v1BO.res['all']['params'] values = v1BO.res['all']['values'] keys = params[0].keys() for i in range(2): line = [cancer_type, feature_type] for k in keys: line.append(str(params[i][k])) line.append(str(values[i])) f.write('\t'.join(line) + '\n')
n_estimators=10000, seed=9999) generate_metrics(XGB, os_train_data, os_train_target, test_data, test_target) SKDNN = MLPClassifier(solver='adam', alpha=1e-5, batch_size='auto', hidden_layer_sizes=(30,40,50,60), learning_rate='adaptive', learning_rate_init = 1e-2) generate_metrics(SKDNN, os_train_data, os_train_target, test_data, test_target) #--- XGB ensemble ---------------------- negpos = 1.0*(len(target)-target.sum())/target.sum() def xgbcv(learning_rate, n_estimators): return my_cross_val_score(xgboost.XGBClassifier(learning_rate= 10**learning_rate, n_estimators=int(n_estimators), #scale_pos_weight=negpos seed=9999 ), os_train_data, os_train_target, cv=10).mean() xgbBO = BayesianOptimization(xgbcv, {'learning_rate': (-4, -1), 'n_estimators': (100, 1000)}) xgbBO.explore({'learning_rate': [-6, -3.5, -1], 'n_estimators': [100,500,1000]}) xgbBO.maximize(init_points=10, n_iter=40) print('XGB: %f' % xgbBO.res['max']['max_val'])
gp_params = {"alpha": 1e-5} #SVM svcBO = BayesianOptimization(svccv, {'gamma': (0.00001, 0.1)}) svcBO.maximize(init_points=3, n_iter=4, **gp_params) #Random Forest rfcBO = BayesianOptimization( rfccv, {'n_estimators': (10, 300), 'max_depth': (2, 10) } ) rfcBO.explore({'max_depth': [2, 4, 6], 'n_estimators': [64, 128, 256]}) rfcBO.maximize(init_points=4, n_iter=4, **gp_params) print('Final Results') print('SVC: %f' % svcBO.res['max']['max_val']) print('RFC: %f' % rfcBO.res['max']['max_val']) #visualize results x = np.linspace(0.00001,0.1,64).reshape(-1,1) plot_gp(svcBO,x) plt.show() rfc_X = map(lambda x: round(x,0), rfcBO.X[:,0]) rfc_Y = map(lambda x: round(x,0), rfcBO.X[:,1]) data = pd.DataFrame(data={'n_est':rfc_X,'max_depth':rfc_Y,'score':rfcBO.Y})
lgbBO = BayesianOptimization( lgb_cv, { 'min_child_weight': (1, 20), 'colsample_bytree': (0.1, 1), 'max_depth': (5, 15), 'subsample': (0.5, 1), 'learning_rate': (0, 1), 'reg_lambda': (0, 1.0), 'n_estimators': (20, 200), }) lgbBO.explore({ "min_child_weight": [2, 5], "colsample_bytree": [0.7, 0.8], "max_depth": [5, 10], "learning_rate": [0.095, 0.001], 'subsample': [0.7, 0.6], "reg_lambda": [0, 0.001], 'n_estimators': [100, 50] }) lgbBO.maximize(init_points=5, n_iter=20) # In[8]: from sklearn.model_selection import StratifiedKFold from sklearn.metrics import log_loss, roc_auc_score import lightgbm as lgb def train_and_validate_model(x_train, y_train, x_validation, y_validation, cls):
n_informative=12, n_redundant=7) def svccv(C, gamma): return cross_val_score(SVC(C=C, gamma=gamma, random_state=2), data, target, 'f1', cv=5).mean() def rfccv(n_estimators, min_samples_split, max_features): return cross_val_score(RFC(n_estimators=int(n_estimators), min_samples_split=int(min_samples_split), max_features=min(max_features, 0.999), random_state=2), data, target, 'f1', cv=5).mean() if __name__ == "__main__": svcBO = BayesianOptimization(svccv, {'C': (0.001, 100), 'gamma': (0.0001, 0.1)}) svcBO.explore({'C': [0.001, 0.01, 0.1], 'gamma': [0.001, 0.01, 0.1]}) rfcBO = BayesianOptimization(rfccv, {'n_estimators': (10, 250), 'min_samples_split': (2, 25), 'max_features': (0.1, 0.999)}) svcBO.maximize() print('-'*53) rfcBO.maximize() print('-'*53) print('Final Results') print('SVC: %f' % svcBO.res['max']['max_val']) print('RFC: %f' % rfcBO.res['max']['max_val'])
def testReferenceImplementation3D(self): """ Check for numeric correctness against reference implementation """ def f(x): # vector version return np.exp(-(x[0] - 2)**2) + np.exp(-(x[0] - 6)**2 / 10) + \ 1 / (x[0]**2 + 1) + np.sin(x[1]) + 5 * np.cos(6.42 * x[2]) def ff(x, y, z): # variable version return np.exp(-(x - 2)**2) + np.exp(-(x - 6)**2 / 10) + \ 1 / (x**2 + 1) + np.sin(y) + 5 * np.cos(6.42 * z) def posterior(gp, x): mu, sigma = gp.predict(x, return_std=True) return mu, sigma bounds = np.array([[-5, 5]] * 3) # Generate trainning data np.random.seed(6) X = np.random.uniform(bounds[:, 0], bounds[:, 1], size=(1000, bounds.shape[0])) w = [f(x) for x in X] np.random.seed(6) X_train = np.random.uniform(bounds[:, 0], bounds[:, 1], size=(3, bounds.shape[0])) y_train = [f(x) for x in X_train] rand_seed = 0 gp_params = { "alpha": 1e-5, "n_restarts_optimizer": 25, "kernel": Matern(nu=2.5), "random_state": rand_seed } # Reference implementation optimizer = BO_ref(ff, { 'x': (-5, 5), 'y': (-5, 5), 'z': (-5, 5) }, verbose=0) # append trainning data optimizer.explore({ 'x': X_train[:, 0], 'y': X_train[:, 1], 'z': X_train[:, 2] }) # fit gaussian process regressor optimizer.maximize(init_points=0, n_iter=0, acq='ei', xi=1e-4, **gp_params) # get results post = np.array([posterior(optimizer.gp, x.reshape(1, -1)) for x in X]) mu_ref, std_ref = post[:, 0], post[:, 1] utility_ref = optimizer.util.utility(X, optimizer.gp, optimizer.Y.max()) # Testing implementation gp = get_fitted_gaussian_processor(np.array(X_train), np.array(y_train), None, standardize_y=False, **gp_params) util = UtilityFunction(kind='ei', gp_objective=gp, xi=1e-4) post_impl = np.array([posterior(gp, x.reshape(1, -1)) for x in X]) mu_impl, std_impl = post_impl[:, 0], post_impl[:, 1] utility_impl = util.utility(X) assert (mu_ref == mu_impl).all(),\ "mu(x) comparison failed" assert (std_ref == std_impl).all(),\ "std(x) comparison failed" assert (utility_ref == utility_impl).all(),\ "utility(x) comparison failed"
dnnBO = BayesianOptimization( dnncv, { 'h1': (10, 100), 'h2': (10, 100), 'h3': (10, 100), 'h4': (10, 100), 'learning_rate': (1e-4, 1e-1), 'dropout': (0.1, 0.9), }) dnnBO.explore({ 'h1': [20, 50], 'h2': [20, 50], 'h3': [20, 50], 'h4': [20, 50], 'learning_rate': [1e-3, 1e-2], 'dropout': [.3, .6], }) dnnBO.maximize(init_points=12, n_iter=40) print('DNN: %f' % dnnBO.res['max']['max_val']) ''' def rfccv(n_estimators, min_samples_split, max_features): return cross_val_score(RFC(n_estimators=int(n_estimators), min_samples_split=int(min_samples_split), max_features=min(max_features, 0.999), random_state=2), data, train_age_target, cv=10, n_jobs=-1).mean()
def target(**inargs): ordered_values = [inargs[param_name] for param_name in param_names] return acc_dict[np.array(ordered_values).tostring()] init_dict = OrderedDict() for i, param_name in enumerate(param_names): init_dict[param_name] = (min(param_ranges[i]), max(param_ranges[i])) bo = BayesianOptimization(target, init_dict, verbose=0) done_params = np.reshape(results[:, :-1], (results.shape[0], nparam)) param_dict = OrderedDict() for i, param_name in enumerate(param_names): param_dict[param_name] = done_params[:, i] bo.explore(param_dict) #you can tune the gp parameters and bo parameters #when acq='ucb', set kappa within [10^-3, 10^-2, ..., 10^3] #when acq='poi' or 'ei', set xi within [10^-3, 10^-2, ..., 10^3] gp_params = {'kernel': None, 'alpha': 1e-5} bo.maximize(init_points=0, n_iter=0, acq='poi', xi=0.01, **gp_params) utility = bo.util.utility(all_params, bo.gp, 0) sort_indices = np.argsort(utility) sort_indices = sort_indices[::-1] fid = open('output_params.txt', 'w') icount = 0 for tmp_index in sort_indices: tmp_param = all_params[tmp_index]
num_folds = 5 BO = BayesianOptimization( get_cls_result, { 'learning_rate': (0.01, 0.5), 'num_leaves': (30, 120), 'colsample_bytree': (0.5, 1), 'subsample': (0.8, 1), 'max_depth': (5, 15), 'reg_alpha': (0, 10), 'reg_lambda': (0, 10), 'min_split_gain': (0, 10), 'min_child_weight': (1, 50), }) BO.explore({ 'learning_rate': [0.01, 0.02, 0.1], 'num_leaves': [20, 32, 50], 'colsample_bytree': [0.5, 0.95, 0.99], 'subsample': [0.8, 0.87, 0.95], 'max_depth': [5, 8, 15], 'reg_alpha': [0.04, 0.1, 0.2], 'reg_lambda': [0.073, 0.2, 0.5], 'min_split_gain': [0.02224, 0.1, 0.2], 'min_child_weight': [20, 40, 50], }) BO.maximize(init_points=5, n_iter=30) print('-' * 53) print('Final Results') print('LGB: %f' % BO.res['max']['max_val'])
#---- SVM --------------------------- def svccv(C, gamma): return cross_val_score(SVC(C=C, gamma=gamma, random_state=None, probability=True), data, target, cv=10).mean() svcBO = BayesianOptimization(svccv, { 'C': (0.001, 1000), 'gamma': (0.0001, 0.1) }) svcBO.explore({'C': [0.001, 0.01, 0.1, 1.0], 'gamma': [0.001, 0.01, 0.1, 1.0]}) svcBO.maximize(init_points=10, n_iter=20) print('SVC: %f' % svcBO.res['max']['max_val']) #---- XGB ---------------------------- def xgbcv(learning_rate, n_estimators): return cross_val_score(xgboost.XGBClassifier( learning_rate=learning_rate, n_estimators=int(n_estimators)), data, target, cv=10).mean() xgbBO = BayesianOptimization(xgbcv, { 'learning_rate': (0.0001, 1.0),
if args.resume_bo: dramBO = pickle.load(open("dramBO.pkl", "rb")) else: dramBO = BayesianOptimization(dram, { "location_sigma": (1.0, 0.01), "lr": (0.1, 1e-8), "alpha": (1.0, 1e-10), "ratio": (1.0, 0.1), "adam_epsilon": (1e-7, 1e-12) }, verbose=1) dramBO.explore({ "location_sigma": (0.3, 0.1), "lr": (0.003, 0.001), "alpha": (1e-7, 9e-8), "ratio": (0.3, 0.2), "adam_epsilon": (1e-8, 9e-9) }) dramBO.maximize(init_points=3, n_iter=50, acq="ucb", kappa=2.576, xi=0.0) dramBO.maximize(init_points=3, n_iter=50, acq="poi", kappa=2.576, xi=0.0) dramBO.maximize(init_points=3, n_iter=50, acq="ei", kappa=2.576, xi=0.0) dramBO.explore({ "location_sigma": (0.5, 0.01), "lr": (0.03, 3e-5), "alpha": (1.0, 1e-8), "ratio": (1.0, 0.1), "adam_epsilon": (1e-8, 1e-12) })
QWK.explore({ # 'bi_rmm': [0, 0, 0, 1, 0, 1,], # 'rnn_layers': [0, 0, 0, 0, 2, 2,], # 'embd_train': [0, 0, 0, 1, 1, 1,], # 'embd_dim': [0, 0, 1, 1, 2, 2,], # 'tfidf': [0, 0, 0, 0, 0, 1,], # 'lr': [0.001, 0.001], # 'convwin': [2, 2, 0], # 'convkernel': [0, 32, 0], 'rnn_dim': [ 0, 0, 32, 32, 64, 64, 128, 128, ], 'dropout': [ 0.2, 0.6, 0.2, 0.6, 0.2, 0.6, 0.3, 0.6, ], 'dropout_w': [ 0.2, 0.4, 0.4, 0.5, 0.4, 0.4, 0.4, 0.5, ], 'dropout_u': [ 0.2, 0.4, 0.4, 0.5, 0.4, 0.4, 0.3, 0.6, ] })
def kNNOptimize(train_set, test_set, njobs, ijob): delta_x = 10. / NBINS_X delta_y = 10. / NBINS_Y NBINS_TOTAL = NBINS_X * NBINS_Y ijob_bins = np.array_split(np.arange(NBINS_TOTAL), njobs)[ijob] for i_bin in ijob_bins: bin_filename = 'knn_bayes/{0:05d}_{1:02d}_{2:02d}.json'.format( i_bin, NBINS_X, NBINS_Y) if os.path.isfile(bin_filename): continue y_lower = int(i_bin / NBINS_X) * delta_y x_lower = (i_bin % NBINS_X) * delta_x x_upper = x_lower + delta_x y_upper = y_lower + delta_y # this block is needed because some points fall on the right or # top boundary of the domain exactly. if x_upper == 10.: x_upper += 1.0e-5 if y_upper == 10.: y_upper += 1.0e-5 initial_points = {"cut_threshold": (5, 7), "w_x": (450, 550), "w_y": (1050, 950), "w_hour": (4, 2), "w_log10acc": (10, 10), "w_weekday": (2, 3), "w_year": (9, 11), "n_neighbors": (20, 25), "margin": (0.02, 0.03) } f = functools.partial(validation_map3_kNN, train_set=train_set, xlower=x_lower, xupper=x_upper, ylower=y_lower, yupper=y_upper) bo = BayesianOptimization(f=f, pbounds={"cut_threshold": (3, 12), "w_x": (250, 1000), "w_y": (500, 2000), "w_hour": (1, 10), "w_log10acc": (5, 30), "w_weekday": (1, 10), "w_year": (2, 20), "n_neighbors": (10, 40), "margin": (0.01, 0.04) }, verbose=True) # this little bit of code allows seeding of the bayesian optimizer # with a few points that you already know are decent parameter values. # initial points are based off @Sandro's kNN script. # # seed the bayesian optimizer with a couple of points. bo.explore(initial_points) # For some reason that I don't understand, the Bayesian optimizer slows # down greatly after 64 iterations. So to be more computationally # efficient, limit it to 64. # explore the space (xi=0.1) # 2 custom (above), 5 initial (implied), 25 exploration = 32 total bo.maximize(n_iter=25, acq="ei", xi=0.1) # exploit the peaks for the other 32 iterations (xi=0.) bo.maximize(n_iter=32, acq="ei", xi=0.0) optimizer_output = bo.res['all'] optimizer_output['max'] = bo.res['max'] optimizer_output['i_bin'] = i_bin optimizer_output['nx'] = NBINS_X optimizer_output['ny'] = NBINS_Y optimizer_output['x_lower'] = x_lower optimizer_output['y_lower'] = y_lower optimizer_output['x_upper'] = x_upper optimizer_output['y_upper'] = y_upper with open(bin_filename, 'w') as fh: fh.write(json.dumps(optimizer_output, sort_keys=True, indent=4, separators=(',', ': ')))
def optimize(): # Lasso optimization # lasso_BO = BayesianOptimization(lasso_func, {'alpha': (0.000001, .05)}) # lasso_BO.explore({'alpha': [.00001, .0003, 0.001, 0.01]}) # lasso_BO.maximize(n_iter=100) # print(lasso_BO.res['max']) # KRR optimization # krr_BO = BayesianOptimization(krr_func, {'alpha': (0,.05), 'degree': (1,5), 'coef0': (0, 10000)}) # krr_BO.explore({'alpha': [0.001, 0.005, .05], 'degree':[2, 3, 4], 'coef0':[0, .5, 10]}) # krr_BO.maximize(n_iter=100) # print(krr_BO.res['max']) # # Elastic optimization elastic_BO = BayesianOptimization(elastic_func, { 'alpha': (0, 10000), 'l1_ratio': (0, 1) }) elastic_BO.explore({ 'alpha': [0.001, 0.1, 1, 10, 100, 1000, 5000], 'l1_ratio': [0, .1, .2, .3, .5, .7, .9] }) elastic_BO.maximize(n_iter=1) print(elastic_BO.res['max']) # Random forest optimization # rf_BO = BayesianOptimization(rf_func, {'n_estimators': (1,1000), 'max_depth': (1,500)}) # rf_BO.explore({'n_estimators': [25, 50, 100, 200, 400], 'max_depth':[10, 40, 80, 320, 500]}) # rf_BO.maximize(n_iter=100) # print(rf_BO.res['max']) # svr_BO = BayesianOptimization(svr_func, {'C': (0, 10), 'epsilon':(0,10)}) # svr_BO.explore({'C': [.001, 0.01, 0.1, 1, 10, 100, 1000], 'epsilon':[.001, .01, .1, 1, 10, 100, 1000]}) # svr_BO.maximize(n_iter=100) # print(svr_BO.res['max']) # XGBoost optimization xgb_BO = BayesianOptimization( xgb_func, { 'min_child_weight': (1, 4), 'colsample_bytree': (0.1, 1), 'max_depth': (2, 9), 'subsample': (0.3, .8), 'gamma': (0, 1), 'alpha': (0, 1), 'num_rounds': (2000, 7000) }) xgb_BO.maximize(init_points=5, n_iter=300) print(xgb_BO.res['max']) # Lgb optimization lgb_BO = BayesianOptimization( lgb_func, { 'num_leaves': (1, 20), 'lr': (0.001, .05), 'num_estimators': (400, 1500), 'max_bin': (30, 70), 'bagging_fraction': (0, 1), 'bagging_freq': (3, 8), 'feature_fraction': (.6, 1), 'min_data_in_leaf': (3, 10), 'min_sum_hessian_in_leaf': (4, 20) }) lgb_BO.explore({ 'num_leaves': [5, 5, 5, 4, 5, 6, 7], 'lr': [.01, .01, .01, .01, .01, .01, .01], 'num_estimators': [200, 300, 500, 700, 800, 900, 1000], 'max_bin': [10, 40, 80, 100, 55, 30, 53], 'bagging_fraction': [.7, .8, .6, .2, .9, .7, .3], 'bagging_freq': [4, 5, 2, 7, 2, 4, 5], 'feature_fraction': [.2, .3, .25, .3, .21, .15, .13], 'min_data_in_leaf': [5, 6, 5, 4, 4, 7, 7], 'min_sum_hessian_in_leaf': [8, 2, 5, 10, 11, 15, 17] }) lgb_BO.maximize(init_points=5, n_iter=300) print(lgb_BO.res['max']) print("=" * 50) print(lasso_BO.res['max']) print(elastic_BO.res['max']) # print(rf_BO.res['max']) # print(krr_BO.res['max']) print(xgb_BO.res['max']) print(lgb_BO.res['max'])
val_pi_labels_onehot = np.load('./val_pi_labels_onehot.out.npy') val_dna_seqs = pickle.load(open('./val_dna_seqs.out', 'rb')) val_dna_seqs_onehot = np.transpose(convert_onehot2D(val_dna_seqs), axes=(0, 2, 1)) global num_classes num_classes = val_pi_labels_onehot.shape[1] global dna_bp_length dna_bp_length = len(val_dna_seqs[0]) # perform bayesian optimization within hyperparameter ranges, with initial guesses print("Start Bayesian optimization") gp_params = {"alpha": 1e-5, "n_restarts_optimizer": 2} bo = BayesianOptimization( target, { 'total_epoch': (5, 5), 'filter_num': (1, 512), 'filter_len': (1, 48), 'num_dense_nodes': (1, 256) }) bo.explore({ 'total_epoch': [5, 5, 5], 'filter_num': [512, 256, 128], 'filter_len': [48, 24, 12], 'num_dense_nodes': [256, 128, 64] }) bo.maximize(init_points=0, n_iter=20, acq="ucb", kappa=5, **gp_params) # print output values from bayesian optimization print(bo.res['max']) print(bo.res['all'])
def gp_opt_for_policy_search(T, s, y, beta, eta_init, treatment_budget, k, env, infection_probs_predictor, infection_probs_kwargs, transmission_probs_predictor, transmission_probs_kwargs, data_depth, n_rep_per_gp_opt_iteration=10): # Objective is mean score over n_rep_per_gp_opt_iteration MC replicates def objective(eta1, eta2, eta3): eta = np.array([eta1, eta2, eta3]) scores = [] for _ in range(n_rep_per_gp_opt_iteration): s_tpm = s y_tpm = y a_dummy = np.zeros(env.L) for m in range(T): # print(m) # Plus perturbation priority_score = R(env, s_tpm, a_dummy, y_tpm, infection_probs_predictor, infection_probs_kwargs, transmission_probs_predictor, transmission_probs_kwargs, data_depth, eta, beta) # env, s, a, y, infection_probs_predictor, infection_probs_kwargs, transmission_prob_predictor, # transmission_probs_kwargs, data_depth, eta, bet a_tpm = decision_rule(env, s_tpm, a_dummy, y_tpm, infection_probs_predictor, infection_probs_kwargs, transmission_probs_predictor, transmission_probs_kwargs, eta, beta, k, treatment_budget, priority_score) infection_probs = infection_probs_predictor( a_tpm, y_tpm, beta, env.L, env.adjacency_list, **infection_probs_kwargs) y_tpm = np.random.binomial(n=1, p=infection_probs) scores.append(-np.mean(y_tpm)) return np.mean(scores) ETA_BOUNDS = (0.0, np.power(1, -1 / 3)) explore_ = { 'eta1': [eta_init[0]], 'eta2': [eta_init[1]], 'eta3': [eta_init[2]] } bounds = {'eta1': ETA_BOUNDS, 'eta2': ETA_BOUNDS, 'eta3': ETA_BOUNDS} bo = BayesianOptimization(objective, bounds) bo.explore(explore_) bo.maximize(init_points=10, n_iter=10, alpha=1e-4) best_param = bo.res['max']['max_params'] best_params = [best_param['eta1'], best_param['eta2'], best_param['eta3']] return best_params
def run_hyperparameter_optimization(options, run_exp): """ This function performs hyperparameter optimization using bayesian optimization, random search, or gridsearch. It takes an argparse object holding the parameters for configuring an experiments, and a function 'run_exp' that takes the argparse object, runs an experiments with the respective configuration, and returns a score from that configuration. It then uses the hyperparameter optimization method to adjust the parameters and run the new configuration. Parameters: ================ argparse : The argparse object holding the parameters. In particular, it must contain the following two parameters. 'optimization' : str, Specifies the optimization method. Either 'bayesian', 'random', or 'grid'. 'optimization_spaces' : str, Specifies the path to a file that denotes the parameters to do search over and their possible values (in case of grid search) or possible spaces. See file 'default_optimization_space' for details. run_exp : function A function that takes the argparse object as input and returns a float that is interpreted as the score of the configuration (higher is better). """ if options.optimization: def optimized_experiment(**parameters): current_options = _update_options(options, **parameters) result = run_exp(current_options) # return the f1 score of the previous experiment return result if options.optimization == "bayesian": gp_params = {"alpha": 1e-5, "kernel" : Matern(nu = 5 / 2)} space, init_vals, num_init_vals = _make_space(options) bayesian_optimizer = BayesianOptimization(optimized_experiment, space) bayesian_optimizer.explore(init_vals) bayesian_optimizer.maximize(n_iter=options.optimization_iterations - num_init_vals, acq = 'ei', **gp_params) elif options.optimization == "random": fmin(lambda parameters : optimized_experiment(**parameters), _make_space(options), algo=rand.suggest, max_evals=options.optimization_iterations, rstate = np.random.RandomState(1337)) elif options.optimization == "grid": # perform grid-search by running every possible parameter combination combinations = _all_option_combinations(_make_space(options)) for combi in combinations: optimized_experiment(**combi) else: raise Exception("No hyperparameter method specified!")
'max_depth': (2, 12), 'gamma': (0.001, 10.0), 'min_child_weight': (0, 20), 'max_delta_step': (0, 10), 'subsample': (0.4, 1.0), 'colsample_bytree': (0.4, 1.0) }) # This portion of the code is not necessary. You can simply specify that 10-20 random parameter combinations (**init_points** below) be used. However, I like to try couple of high- and low-end values for each parameter as a starting point, and after that fewer random points are needed. Note that a number of options must be the same for each parameter, and they are applied vertically. # In[ ]: XGB_BO.explore({ 'max_depth': [3, 8, 3, 8, 8, 3, 8, 3], 'gamma': [0.5, 8, 0.2, 9, 0.5, 8, 0.2, 9], 'min_child_weight': [0.2, 0.2, 0.2, 0.2, 12, 12, 12, 12], 'max_delta_step': [1, 2, 2, 1, 2, 1, 1, 2], 'subsample': [0.6, 0.8, 0.6, 0.8, 0.6, 0.8, 0.6, 0.8], 'colsample_bytree': [0.6, 0.8, 0.6, 0.8, 0.6, 0.8, 0.6, 0.8], }) # In my version of sklearn there are many warning thrown out by the GP portion of this code. This is set to prevent them from showing on screen. # # If you have a special relationship with your computer and want to know everything it is saying back, you'd probably want to remove the two "warnings" lines and slide the XGB_BO line all the way left. # # I am doing only 2 initial points, which along with 8 exploratory points above makes it 10 "random" parameter combinations. I'd say that 15-20 is usually adequate. For n_iter 25-50 is usually enough. # # There are several commented out maximize lines that could be worth exploring. The exact combination of parameters determines **[exploitation vs. exploration](https://github.com/fmfn/BayesianOptimization/blob/master/examples/exploitation%20vs%20exploration.ipynb)**. It is tough to know which would work better without actually trying, though in my hands exploitation with "expected improvement" usually works the best. That's what the XGB_BO.maximize line below is specifying. # In[ ]: print('-' * 130)
val = cross_val_score(SVC(C=C, gamma=gamma, random_state=5), X_train, y_train, 'recall_weighted', cv=5).mean() return val gp_params = {"alpha": 1e-5} clfBO = BayesianOptimization(classifier, { 'C': (1, 600), 'gamma': (0.001, 0.01) }) clfBO.explore({ 'C': [10, 150, 10, 300, 400], 'gamma': [0.001, 0.01, 0.001, 0.01, 0.01] }) clfBO.maximize(n_iter=10, **gp_params) print('-' * 53) print('#' * 53) print('Final Results') print('SVC: %f' % clfBO.res['max']['max_val']) params = {'kernel': 'rbf', 'gamma': 0.0100, 'C': 574.777} classifier = SVC(**params) classifier.fit(X_train, y_train) y_true, y_pred = y_test, classifier.predict(X_test) print "\nFull performance report:\n" print classification_report(y_true, y_pred)
plt.legend() plt.title('Bayesian Optimization performance vs Iterations') plt.xlabel('Number of iterations') plt.ylabel('Validation Set Accuracy') plt.savefig('bayes_opt.png') plt.close(1) train_data_path = 'income-data/income.train.txt' dev_data_path = 'income-data/income.dev.txt' test_data_path = 'income-data/income.test.txt' read_data(train_data_path, dev_data_path, test_data_path) # Perform Bayesian Optimization for Bagging bagging = BayesianOptimization(optimize_bagging, {'max_depth': [1, 100], 'n_estimators': [1, 100]}) bagging.explore({'max_depth': [1, 2, 3, 5, 10], 'n_estimators': [1, 2, 5, 10, 20]}) # Run for 50 iterations bagging.maximize(n_iter = 50) # Perform Bayesian Optimization for Boosting boosting = BayesianOptimization(optimize_boosting, {'max_depth': [1, 100], 'n_estimators': [1, 100]}) boosting.explore({'max_depth': [1, 2, 3, 5, 10], 'n_estimators': [1, 2, 5, 10, 20]}) # Run for 50 iterations boosting.maximize(n_iter = 50) # Write output to file generate_results(bagging, boosting)
def optimize_postproc_params(arch_to_paths, arches, train_data_path): def bo_best(self): return {'max_val': self.Y.max(), 'max_params': dict(zip(self.keys, self.X[self.Y.argmax()]))} preload, seeded_objective = _make_scorable_objective(arch_to_paths, arches, train_data_path) preload() # read datas into memory seeded_bounds = { 'mask_thresh': (.4, .9), 'seed_thresh': (.4, .9), 'min_seed_size': (0, 100), 'min_size': (0, 100), 'alpha': (0.0, 1.0), } seeded_bo = BayesianOptimization(seeded_objective, seeded_bounds) cand_params = [ {'mask_thresh': 0.9000, 'min_seed_size': 100.0000, 'min_size': 100.0000, 'seed_thresh': 0.4000}, {'mask_thresh': 0.8367, 'seed_thresh': 0.4549, 'min_seed_size': 97, 'min_size': 33}, # 'max_val': 0.8708 {'mask_thresh': 0.8367, 'min_seed_size': 97.0000, 'min_size': 33.0000, 'seed_thresh': 0.4549}, # max_val': 0.8991 {'mask_thresh': 0.7664, 'min_seed_size': 48.5327, 'min_size': 61.8757, 'seed_thresh': 0.4090}, # 'max_val': 0.9091} {'mask_thresh': 0.6666, 'min_seed_size': 81.5941, 'min_size': 13.2919, 'seed_thresh': 0.4241}, # full dataset 'max_val': 0.9142} # {'mask_thresh': 0.8, 'seed_thresh': 0.5, 'min_seed_size': 20, 'min_size': 0}, # {'mask_thresh': 0.5, 'seed_thresh': 0.8, 'min_seed_size': 20, 'min_size': 0}, # {'mask_thresh': 0.8338, 'min_seed_size': 25.7651, 'min_size': 38.6179, 'seed_thresh': 0.6573}, # {'mask_thresh': 0.6225, 'min_seed_size': 93.2705, 'min_size': 5, 'seed_thresh': 0.4401}, # {'mask_thresh': 0.7870, 'min_seed_size': 85.1641, 'min_size': 64.0634, 'seed_thresh': 0.4320}, ] for p in cand_params: p['alpha'] = .88 n_init = 2 if DEBUG else 40 seeded_bo.explore(pd.DataFrame(cand_params).to_dict(orient='list')) # Basically just using this package for random search. # The BO doesnt seem to help much seeded_bo.plog.print_header(initialization=True) seeded_bo.init(n_init) print('seeded ' + ub.repr2(bo_best(seeded_bo), nl=0, precision=4)) gp_params = {"alpha": 1e-5, "n_restarts_optimizer": 2} n_iter = 2 if DEBUG else 10 for kappa in [10, 5, 1]: seeded_bo.maximize(n_iter=n_iter, acq='ucb', kappa=kappa, **gp_params) best_res = bo_best(seeded_bo) print('seeded ' + ub.repr2(best_res, nl=0, precision=4)) max_params = best_res['max_params'] max_value = best_res['max_val'] # search for a good alpha # TODO: improve bayes_opt package to handle this for alpha in tqdm.tqdm(np.linspace(0, 1, 50), desc='opt alpha'): params = max_params.copy() params['alpha'] = alpha val = seeded_objective(**params) if val > max_value: max_value = val max_params = params return max_value, max_params
print("Size of Test Set: Columns = {}, Rows = {}"). \ format(X_Final.shape[1], X_Final.shape[0]) ############################################################################## # Bayesian Optimisation - 75 Iterations for Each Algorithm # Machine Learning Algorithm #1 - Define ranges of Hyperparameters ml1_bo = BayesianOptimization(cross_validation, {"max_features": (1, 20), "criterion": (0, 1), "normv": (1, 1), "max_depth": (1, 40), "n_estimators": (100, 300), "log_y": (1, 1)}) ml1_bo.explore({"max_features": [3.0], "criterion": [0], "normv": [1], "max_depth": [15], "n_estimators": [50], "log_y": [1]}) # Machine Learning Algorithm #2 - Define ranges of Hyperparameters ml2_bo = BayesianOptimization(cross_validation2, {"n_neighbors": (2, 20), "leaf_size": (10, 60), "normv": (1, 1), "log_y": (1, 1)}) ml2_bo.explore({"n_neighbors": [5], "leaf_size": [20], "normv": [1], "log_y": [1]}) # Optimisation of Machine Learning Algorithm #1 = RandomForestRegressor ml1_bo.maximize(init_points=75, n_iter=1) # Optimisation of Machine Learning Algorithm #2 = KNeighborsRegressor ml2_bo.maximize(init_points=75, n_iter=1)
def run(gpunum, cancer_type, feature_type, attempt): batch_size = 32 epochs = 10 os.environ["CUDA_VISIBLE_DEVICES"] = gpunum def get_session(gpu_fraction=1): gpu_options = tf.GPUOptions( per_process_gpu_memory_fraction=gpu_fraction, allow_growth=True) return tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) ktf.set_session(get_session()) results = [] def scoreofModel(cancer_type, feature_type, attempt): def inner_SoM(pca, dropout, hidden_dims, ae_dim1, ae_dim2): print("**scoreofModel pca " + str(pca) + " dropout " + str(dropout) + " hidden dims " + str(hidden_dims) + " dim1 " + str(ae_dim1) + " dim2 " + str(ae_dim2)) print("ct %s ft %s attempt %d" % (cancer_type, feature_type, attempt)) hidden_dims = int(hidden_dims) ae_dim1 = int(ae_dim1) ae_dim2 = int(ae_dim2) # AE with open('../test_bong/data/overlap_%s.pkl' % (cancer_type), 'rb') as handle: labels = pickle.load(handle) x = pickle.load(handle) y = pickle.load(handle) x_trn, x_tst, c_trn, c_tst, s_trn, s_tst, l_trn, l_tst = \ train_test_split(x, y[:, 0], y[:, 1], labels, test_size=80, random_state=7) x_trn, x_tst = AE_again_read.AE_model_save(cancer_type, feature_type, ae_dim1, ae_dim2, x_trn, x_tst) clf = PCA(pca, whiten=True) x_trn = clf.fit_transform(x_trn) x_tst = clf.transform(x_tst) x_trn, x_dev, c_trn, c_dev, s_trn, s_dev, l_trn, l_dev = train_test_split( x_trn, c_trn, s_trn, l_trn, test_size=20, random_state=7) data = tuple((x_trn, c_trn, s_trn, x_dev, c_dev, s_dev, x_tst, c_tst, s_tst)) def ModelV1(model_input): z = Dropout(dropout)(model_input) z = Dense(hidden_dims, activation='relu')(z) z = Dropout(dropout)(z) z = Dense(hidden_dims, activation='relu')(z) model_output = Dense(1, activation=None)(z) model = Model(model_input, model_output) #model.compile(loss=my_cindex(c_tst, s_tst), optimizer='adam')#,metrics=["mse"]) model.compile(loss="mse", optimizer='adam') return model feature_dim = x_trn.shape[1] input_shape = (feature_dim, ) model_input = Input(shape=input_shape) model = ModelV1(model_input) model.summary() model_filepath = '../model/%s-%s-%d-%s-%s-%d-%d-%d.model' % ( cancer_type, feature_type, attempt, str(pca), str(dropout), hidden_dims, ae_dim1, ae_dim2) checkpoint = MyCallback(results, model_filepath, data, real_save=True, verbose=1, save_best_only=True, mode='auto', cancer_type=cancer_type, feature_type=feature_type, thr=pca, dropout_prob=dropout, dimension=hidden_dims, activate='relu', AE1=ae_dim1, AE2=ae_dim2) callbacks_list = [checkpoint] history = model.fit(x_trn, s_trn, batch_size=batch_size, shuffle=True, callbacks=callbacks_list, epochs=epochs, validation_data=(x_dev, s_dev)) #print("-----History----") #print(history.history.keys()) #print(history.history) #print(len(history.history['val_loss'])) pred_tst = model.predict(x_tst) return my_cindex(c_tst, s_tst)(s_tst, pred_tst) return inner_SoM def frange(x, y, jump): while x < y: yield x x += jump bo_dict = { "pca": (0.98, 0.9999), "dropout": (0, 0.8), "hidden_dims": (10, 1000), "ae_dim1": (100, 1500), "ae_dim2": (100, 700) } #for k in bo_dict.keys() : # print(k) # print (bo_dict[k]) #scoreofModel(**{'ae_dim1': 1138.0196836044008, 'dropout': 0.18242910081095307, 'pca': 0.98912275449631237, 'hidden_dims': 373.61768597111694, 'ae_dim2': 472.20225514485821}) v1BO = BayesianOptimization(scoreofModel(cancer_type, feature_type, attempt), bo_dict, verbose=True) v1BO.explore({ "pca": [0.98, 0.1, 0.9999], "dropout": [0, 0.2, 0.8], "hidden_dims": [10, 200, 1000], "ae_dim1": [100, 300, 1500], "ae_dim2": [100, 100, 700], }) gp_params = {"alpha": 1e-5} v1BO.maximize(init_points=2, n_iter=30, acq='ucb', kappa=5) print('Final Results') #print('max %f' % v1BO.res['max']['max_val']) #print('***<max>****') #print(v1BO.res['max']) #print('***<all>***') #print(v1BO.res['all']) results.append(v1BO.res['all']) #print(results) print(v1BO.res) with open('./BO_Result_' + cancer_type + '.txt', 'at') as f: params = v1BO.res['all']['params'] values = v1BO.res['all']['values'] keys = params[0].keys() for i in range(2): line = [cancer_type, feature_type] for k in keys: line.append(str(params[i][k])) line.append(str(values[i])) f.write('\t'.join(line) + '\n')
x1 = np.array(xtrain)[idx1, :][0] y0 = np.array(ytrain)[idx0] y1 = np.array(ytrain)[idx1] nb_classes = 2 dims = xtrain.shape[1] print(dims, 'dims') kerasBO = BayesianOptimization( kerascv, { 'dense1': (int(0.15 * xtrain.shape[1]), int(2 * xtrain.shape[1])), 'dropout1': (0.05, 0.5), 'dense2': (int(0.15 * xtrain.shape[1]), int(2 * xtrain.shape[1])), 'dropout2': (0.05, 0.5), 'epochs': (int(20), int(150)) }) kerasBO.explore({ 'dense1': [int(0.15 * xtrain.shape[1])], 'dropout1': [0.05], 'dense2': [int(1.5 * xtrain.shape[1])], 'dropout2': [0.5], 'epochs': [40] }) kerasBO.maximize(init_points=3, n_iter=25) print('-' * 53) print('Final Results') print('Extra Trees: %f' % kerasBO.res['max']['max_val']) print(kerasBO.res['max']['max_params'])
from bayes_opt import BayesianOptimization # Example of how to use this bayesian optimization package. # Lets find the maximum of a simple quadratic function of two variables # We create the bayes_opt object and pass the function to be maximized # together with the parameters names and their bounds. bo = BayesianOptimization(lambda x, y: -x**2 - (y - 1)**2 + 1, { 'x': (-4, 4), 'y': (-3, 3) }) # One of the things we can do with this object is pass points # which we want the algorithm to probe. A dictionary with the # parameters names and a list of values to include in the search # must be given. bo.explore({'x': [-1, 3], 'y': [-2, 2]}) # Additionally, if we have any prior knowledge of the behaviour of # the target function (even if not totally accurate) we can also # tell that to the optimizer. # Here we pass a dictionary with target values as keys of another # dictionary with parameters names and their corresponding value. bo.initialize({-2: {'x': 1, 'y': 0}, -1.251: {'x': 1, 'y': 1.5}}) # Once we are satisfied with the initialization conditions # we let the algorithm do its magic by calling the maximize() # method. bo.maximize(init_points=5, n_iter=15, kappa=3.29) # The output values can be accessed with self.res print(bo.res['max'])
def target(**inargs): ordered_values = [inargs[param_name] for param_name in param_names] acc = acc_dict[np.array(ordered_values).tostring()] return acc init_dict = OrderedDict() for i, param_name in enumerate(param_names): init_dict[param_name] = (min(param_ranges[i]), max(param_ranges[i])) bo = BayesianOptimization(target, init_dict, verbose=0) done_params = np.reshape(results[:,:-1], (results.shape[0], nparam)) param_dict = OrderedDict() for i, param_name in enumerate(param_names): param_dict[param_name] = done_params[:, i] bo.explore(param_dict) ################################################## # main loop for iter in range(max_iter): #you can tune the gp parameters and bo parameters #when acq='ucb', set kappa within [10^-3, 10^-2, ..., 10^3] #when acq='poi' or 'ei', set xi within [10^-3, 10^-2, ..., 10^3] gp_params = {'kernel': None, 'alpha': 1e-5} bo.maximize(init_points=0, n_iter=0, acq='poi', xi=0.01, **gp_params) utility = bo.util.utility(all_params, bo.gp, 0) sort_indices = np.argsort(utility) sort_indices = sort_indices[:: -1] for tmp_index in sort_indices: next_params = all_params[tmp_index]
n_informative=12, n_redundant=7) def svccv(C, gamma): return cross_val_score(SVC(C=C, gamma=gamma, random_state=2), data, target, 'roc_auc', cv=5).mean() def rfccv(n_estimators, min_samples_split, max_features): return cross_val_score(RFC(n_estimators=int(n_estimators), min_samples_split=int(min_samples_split), max_features=min(max_features, 0.999), random_state=2), data, target, 'roc_auc', cv=5).mean() if __name__ == "__main__": svcBO = BayesianOptimization(svccv, {'C': (0.001, 100), 'gamma': (0.0001, 0.1)}) svcBO.explore({'C': [0.001, 0.01, 0.1], 'gamma': [0.001, 0.01, 0.1]}) rfcBO = BayesianOptimization(rfccv, {'n_estimators': (10, 250), 'min_samples_split': (2, 25), 'max_features': (0.1, 0.999)}) svcBO.maximize(acq='xcxcxc') print('-'*53) #---------------------------------------------------------- rfcBO.maximize() #------------------------------------------------------------------------------ #------------------------------------------------------------- print('-'*53) #---------------------------------------------------- print('Final Results') #---------------------------- print('SVC: %f' % svcBO.res['max']['max_val']) #---------------------------- print('RFC: %f' % rfcBO.res['max']['max_val'])
def hypersearch_probs(): prob_paths = paths['probs'] prob1_paths = paths['probs1'] # https://github.com/fmfn/BayesianOptimization # https://github.com/fmfn/BayesianOptimization/blob/master/examples/usage.py # https://github.com/fmfn/BayesianOptimization/blob/master/examples/exploitation%20vs%20exploration.ipynb # subx = [0, 1, 2, 3, 4, 5] subx = [2, 4, 5, 9, 10, 14, 17, 18, 20, 30, 33, 39, 61, 71, 72, 73, 75, 81, 84] from bayes_opt import BayesianOptimization def best(self): return {'max_val': self.Y.max(), 'max_params': dict(zip(self.keys, self.X[self.Y.argmax()]))} def seeded_objective(**params): seed_thresh, mask_thresh, min_seed_size, min_size = ub.take( params, 'seed_thresh, mask_thresh, min_seed_size, min_size'.split(', ')) fscores = [] for path, path1 in zip(ub.take(prob_paths, subx), ub.take(prob1_paths, subx)): gti, uncertain, dsm, bgr = gt_info_from_path(path) probs = np.load(path)['arr_0'] seed_probs = probs[:, :, task.classname_to_id['inner_building']] seed = (seed_probs > seed_thresh).astype(np.uint8) probs1 = np.load(path1)['arr_0'] mask_probs = probs1[:, :, 1] mask = (mask_probs > mask_thresh).astype(np.uint8) pred = seeded_instance_label(seed, mask, min_seed_size=min_seed_size, min_size=min_size) scores = instance_fscore(gti, uncertain, dsm, pred) fscore = scores[0] fscores.append(fscore) mean_fscore = np.mean(fscores) return mean_fscore seeded_bounds = { 'mask_thresh': (.4, .9), 'seed_thresh': (.4, .9), 'min_seed_size': (0, 100), 'min_size': (0, 100), } n_init = 50 seeded_bo = BayesianOptimization(seeded_objective, seeded_bounds) seeded_bo.explore(pd.DataFrame([ {'mask_thresh': 0.9000, 'min_seed_size': 100.0000, 'min_size': 100.0000, 'seed_thresh': 0.4000}, {'mask_thresh': 0.8, 'seed_thresh': 0.5, 'min_seed_size': 20, 'min_size': 0}, {'mask_thresh': 0.5, 'seed_thresh': 0.8, 'min_seed_size': 20, 'min_size': 0}, {'mask_thresh': 0.8338, 'min_seed_size': 25.7651, 'min_size': 38.6179, 'seed_thresh': 0.6573}, {'mask_thresh': 0.6225, 'min_seed_size': 93.2705, 'min_size': 5, 'seed_thresh': 0.4401}, {'mask_thresh': 0.7870, 'min_seed_size': 85.1641, 'min_size': 64.0634, 'seed_thresh': 0.4320}, {'mask_thresh': 0.8367, 'seed_thresh': 0.4549, 'min_seed_size': 97, 'min_size': 33}, # 'max_val': 0.8708 {'mask_thresh': 0.7664, 'min_seed_size': 48.5327, 'min_size': 61.8757, 'seed_thresh': 0.4090}, # 'max_val': 0.9091} {'mask_thresh': 0.8367, 'min_seed_size': 97.0000, 'min_size': 33.0000, 'seed_thresh': 0.4549}, # max_val': 0.8991 ]).to_dict(orient='list')) seeded_bo.plog.print_header(initialization=True) seeded_bo.init(n_init) print(ub.repr2(best(seeded_bo), nl=0, precision=4)) print('seeded ' + ub.repr2(best(seeded_bo), nl=0, precision=4)) print('inner ' + ub.repr2(best(inner_bo), nl=0, precision=4)) print('outer ' + ub.repr2(best(outer_bo), nl=0, precision=4)) # {'max_params': {'thresh': 0.8000, 'min_size': 0.0000}, 'max_val': 0.6445} gp_params = {"alpha": 1e-5, "n_restarts_optimizer": 2} n_iter = n_init // 2 for kappa in [10, 5, 1]: seeded_bo.maximize(n_iter=n_iter, acq='ucb', kappa=kappa, **gp_params) inner_bo.maximize(n_iter=n_iter, acq='ucb', kappa=kappa, **gp_params) outer_bo.maximize(n_iter=n_iter, acq='ucb', kappa=kappa, **gp_params) print('seeded ' + ub.repr2(best(seeded_bo), nl=0, precision=4)) print('inner ' + ub.repr2(best(inner_bo), nl=0, precision=4)) print('outer ' + ub.repr2(best(outer_bo), nl=0, precision=4)) print(arch)
from bayes_opt import BayesianOptimization ''' Example of how to use this bayesian optimization package. ''' # Lets find the maximum of a simple quadratic function of two variables # We create the bayes_opt object and pass the function to be maximized # together with the parameters names and their bounds. bo = BayesianOptimization(lambda x, y: -x**2 - (y - 1)**2 + 1, {'x': (-4, 4), 'y': (-3, 3)}) # One of the things we can do with this object is pass points # which we want the algorithm to probe. A dictionary with the # parameters names and a list of values to include in the search # must be given. bo.explore({'x': [-1, 3], 'y': [-2, 2]}) # Additionally, if we have any prior knowledge of the behaviour of # the target function (even if not totally accurate) we can also # tell that to the optimizer. # Here we pass a dictionary with target values as keys of another # dictionary with parameters names and their corresponding value. bo.initialize({-2: {'x': 1, 'y': 0}, -1.251: {'x': 1, 'y': 1.5}}) # Once we are satisfied with the initialization conditions # we let the algorithm do its magic by calling the maximize() # method. bo.maximize(init_points=15, n_iter=25) # The output values can be accessed with self.res print(bo.res['max'])
def bayesopt_under_true_model(seed, info, quantile, mc_reps=1000, T=50): np.random.seed(seed) env = Bandit.NormalMAB(list_of_reward_mus=[0.3, 0.6], list_of_reward_vars=[0.1**2, 0.1**2]) pre_simulated_data = env.generate_mc_samples(mc_reps, T) rollout_function_kwargs = {'pre_simulated_data': pre_simulated_data} rollout_function = mab_rollout_with_fixed_simulations policy = mab_epsilon_greedy_policy if info: bounds = { 'zeta0': (0.05, 2.0), 'zeta1': (-5.0, 5.0), 'zeta2': (-5.0, 5.0), 'zeta3': (-5.0, 5.0), 'zeta4': (-5.0, 5.0), 'zeta5': (-5.0, 5.0), 'zeta6': (-5.0, 5.0), 'zeta7': (-5.0, 5.0), 'zeta8': (-5.0, 5.0) } explore_ = { 'zeta0': [0.05, 0.1, 0.0, 1.0, 0.1], 'zeta1': [0.0, 0.0, 0.0, 0.0, -122.5], 'zeta2': [0.0, 0.0, 0.0, 0.0, 0.0], 'zeta3': [0.0, 0.0, 0.0, 0.0, 0.0], 'zeta4': [0.0, 0.0, 0.0, 0.0, 0.0], 'zeta5': [0.0, 0.0, 0.0, 0.0, 2.5], 'zeta6': [0.0, 0.0, 0.0, 0.0, 0.0], 'zeta7': [0.0, 0.0, 0.0, 0.0, 0.0], 'zeta8': [0.0, 0.0, 0.0, 0.0, 0.0] } tuning_function = tuned_bandit.information_expit_epsilon_decay def objective(zeta0, zeta1, zeta2, zeta3, zeta4, zeta5, zeta6, zeta7, zeta8): zeta = np.array([ zeta0, zeta1, zeta2, zeta3, zeta4, zeta5, zeta6, zeta7, zeta8 ]) return rollout_function(zeta, policy, T, tuning_function, env, info, quantile, **rollout_function_kwargs) else: bounds = { 'zeta0': (0.05, 2.0), 'zeta1': (1.0, 49.0), 'zeta2': (0.01, 2.5) } explore_ = { 'zeta0': [1.0, 0.05, 1.0, 0.1], 'zeta1': [50.0, 49.0, 1.0, 49.0], 'zeta2': [0.1, 2.5, 1.0, 2.5] } tuning_function = tuned_bandit.expit_epsilon_decay def objective(zeta0, zeta1, zeta2): zeta = np.array([zeta0, zeta1, zeta2]) return rollout_function(zeta, policy, T, tuning_function, env, info, quantile, **rollout_function_kwargs) bo = BayesianOptimization(objective, bounds) bo.explore(explore_) bo.maximize(init_points=50, n_iter=50, alpha=1e-4) # bo.maximize(init_points=10, n_iter=15, alpha=1e-4) best_param = bo.res['max']['max_params'] best_param = np.array( [best_param['zeta{}'.format(i)] for i in range(len(bounds))]) print(best_param) return best_param
idx0 = np.where(fold_index != 1) idx1 = np.where(fold_index == 1) x0 = np.array(xtrain)[idx0,:][0] x1 = np.array(xtrain)[idx1,:][0] y0 = np.array(ytrain)[idx0] y1 = np.array(ytrain)[idx1] nb_classes = 2 dims = xtrain.shape[1] print(dims, 'dims') kerasBO = BayesianOptimization(kerascv, {'dense1': (int(0.15 * xtrain.shape[1]), int(2 * xtrain.shape[1])), 'dropout1': (0.05, 0.5), 'dense2': (int(0.15 * xtrain.shape[1]), int(2 * xtrain.shape[1])), 'dropout2': (0.05, 0.5), 'epochs': (int(20), int(150)) }) kerasBO.explore({'dense1': [int(0.15 * xtrain.shape[1])], 'dropout1': [0.05], 'dense2': [int(1.5 * xtrain.shape[1])], 'dropout2': [0.5], 'epochs': [40]}) kerasBO.maximize(init_points=3, n_iter=25) print('-' * 53) print('Final Results') print('Extra Trees: %f' % kerasBO.res['max']['max_val']) print(kerasBO.res['max']['max_params'])
val = 0. return val def rfccv(n_estimators, min_samples_split, max_features): val = cross_val_score(RFC(n_estimators=int(n_estimators), min_samples_split=int(min_samples_split), max_features=min(max_features, 0.999), random_state=2), data, target, 'f1', cv=2).mean() return val if __name__ == "__main__": gp_params = {"alpha": 1e-5} SA = SA() svcBO = BayesianOptimization(svccv, {'C': (0., 1.)}) svcBO.explore({'C': SA}) #svcBO.explore({'C':[0.1,0.2,0.5,0.9]}) svcBO.maximize(n_iter=5, **gp_params) print('-' * 53) print('-' * 53) print('Final Results') print('SVC: %f' % svcBO.res['max']['max_val']) print('SVC: %s' % list(svcBO.res['max']['max_params'].values())[0])