def main(): # create Bayes Opt model instance containing ET-DDF sim cfg = load_config( os.path.abspath( '/home/ian/Documents/school/grad/et-ddf/python/config/config.yaml') ) etddf_model = ETDDFModel(cfg) # # create Bayes Opt instance, specifying model and number of parameters opt_instance = BayesOpt(etddf_model, param_bounds=[(0, 10)]) # opt_instance = BayesOpt(RastriginFxn1D(), param_bounds=[(-5.5,5.5)]) # opt_instance = BayesOpt(TestModel(), param_bounds=[(-5.5,5.5)]) # compute optimal parameter values optimal_params = opt_instance.optimize(max_iterations=20) print(optimal_params)
reg_alpha, subsample=1, colsample_bytree=1, silent=True): return cv_s(xgb.XGBRegressor(max_depth=int(max_depth), learning_rate=learning_rate, n_estimators=int(n_estimators), gamma=gamma, reg_alpha=reg_alpha, min_child_weight=min_child_weight, objective='reg:linear'), gpstrain, height, "mean_squared_error", cv=4).mean() ## can optimize std xgboostBO = BayesOpt( xgbcv, { 'max_depth': (8, 17), 'learning_rate': (0.01, 0.1), 'n_estimators': (100, 500), 'reg_alpha': (0, 1), 'gamma': (0.1, 1), 'min_child_weight': (0, 10), }) print("Start Optimization of Main Model") xgboostBO.maximize(init_points=10, n_iter=110, xi=0.0, acq="poi")
import pandas as pd from sklearn.linear_model import LogisticRegression from sklearn.cross_validation import cross_val_score as cv_s from bayes_opt import BayesianOptimization as BayesOpt target = pd.read_csv('target.csv', index_col=0) train = pd.read_hdf('train_one_hot.h5') LogisticRegression(C=1.0, max_iter=800) def lrcv(C): return cv_s(LogisticRegression(C=10**C), train, target['status_group'], "log_loss", cv=4).mean() xgboostBO = BayesOpt(lrcv, {'C': (-5, 2)}) print("Start Optimization of Main Model") xgboostBO.maximize(init_points=20, n_iter=130, xi=0.1, acq="poi")
header=None).astype('float64') features = pd.read_csv('./L-features.csv', index_col=False, header=None) #train=train.drop(o,axis=0) #target=target.drop(o,axis=0) #train.index=range(0,train.shape[0]) #target.index=range(0,train.shape[0]) train = train.ix[:, np.where(train.columns == features)[1]] ### XGB CV model def Kernelcv(alpha, gamma): return cv_s(KernelRidge(alpha=10**alpha, gamma=10**gamma, kernel='rbf'), train, target, "mean_squared_error", cv=4).mean() xgboostBO = BayesOpt(Kernelcv, {'alpha': (-8, 0), 'gamma': (-8, 0)}) print("Start Optimization of Main Model") xgboostBO.maximize(init_points=50, n_iter=350, xi=0.05, acq="poi") xgboostBO.res["max"] #{'max_params': {'alpha': -2.3534935983173315, 'gamma': -2.5598522482339896}, # 'max_val': -0.014045783322961644} etime = float(time.time() - stime)
o = [30, 410, 462, 495, 523, 588, 632, 688, 968, 970, 1298, 1324, 1432] train = pd.read_csv('./blend_train_log500-15-no.csv', index_col=0) target = np.log( pd.read_csv('./target.csv', index_col=0, header=None).astype('float64')) #train=train.drop(o,axis=0) target = target.drop(o, axis=0) #train.index=range(0,train.shape[0]) target.index = range(0, target.shape[0]) ### XGB CV model def Kernelcv(alpha): return cv_s(Lasso(alpha=10**alpha), train, target, "mean_squared_error", cv=15).mean() xgboostBO = BayesOpt(Kernelcv, {'alpha': (-4.5, -4)}) print("Start Optimization of Main Model") xgboostBO.maximize(init_points=50, n_iter=350, xi=0.05, acq="poi") xgboostBO.res["max"] #{'max_params': {'alpha': -4.2625417423330569}, # 'max_val': -0.0086725388641284446} etime = float(time.time() - stime)
# 'n_estimators': (402), # 'gamma': (0.8170), # 'min_child_weight': (1.039), # 'subsample': (0.9892), # 'colsample_bytree' :(0.5972) ### Best Cat: accuracy: 0.83339 # 'max_depth': (20 (can be more)), # 'learning_rate': (0.0619), # 'n_estimators': (237), # 'gamma': (0.6564), # 'min_child_weight': (1.4305), # 'subsample': (0.9431), # 'colsample_bytree' :(0.7469) ## Cat log_loss: 0.44981 xgboostBO = BayesOpt( xgbcv, { 'max_depth': (20, 27), 'learning_rate': (0.0001, 0.07), 'n_estimators': (300, 600), 'gamma': (0.1, 10), 'min_child_weight': (1, 5), 'subsample': (0.9, 1), 'colsample_bytree': (0.5, 1) }) xgboostBO.maximize(init_points=35, n_iter=225) xgboostBO.res["max"] etime = float(time.time() - stime)
def xgbcv(max_depth, max_features, n_estimators, min_samples_leaf, n_jobs=-1): return cv_s(ExtraTreesRegressor(max_depth=int(max_depth), max_features=max_features, n_estimators=int(n_estimators), min_samples_leaf=int(min_samples_leaf)), train, target, "mean_squared_error", cv=4).mean() ## can optimize std xgboostBO = BayesOpt( xgbcv, { 'max_depth': (10, 30), 'max_features': (0.2, 1), 'n_estimators': (200, 501), 'min_samples_leaf': (1, 21), }) print("Start Optimization of Main Model") xgboostBO.maximize(init_points=50, n_iter=250, xi=0.05, acq="poi") xgboostBO.res["max"] #{'max_params': {'max_depth': 25, # 'max_features': 0.93856560366003716, # 'min_samples_leaf': 1.204402405756321, # 'n_estimators': 337.74955938860347}, # 'max_val': -0.019068833348385262} etime = float(time.time() - stime)
import time from sklearn.ensemble import RandomForestClassifier from sklearn.cross_validation import cross_val_score as cv_s from bayes_opt import BayesianOptimization as BayesOpt train = pd.read_csv('train_lon_lat_predicted.csv.csv', index_col=0) target = pd.read_csv('target.csv', index_col=0) def xgbcv(max_depth, max_features, n_estimators, min_samples_leaf, n_jobs=-1): return cv_s(RandomForestClassifier(max_depth=int(max_depth), max_features=max_features, n_estimators=int(n_estimators), min_samples_leaf=int(min_samples_leaf)), train, target['status_group'], "accuracy", cv=4).mean() xgboostBO = BayesOpt( xgbcv, { 'max_depth': (25, 35), 'max_features': (0.5, 1), 'n_estimators': (100, 401), 'min_samples_leaf': (1, 21), }) print("Start Optimization of Main Model") xgboostBO.maximize(init_points=20, n_iter=130, xi=0.1, acq="poi")
ybest=[0]*nRepeat MyTime=[0]*nRepeat MyOptTime=[0]*nRepeat marker=[0]*nRepeat bo=[0]*nRepeat [0]*nRepeat for ii in range(nRepeat): if 'kov' in acq_name or acq_name == 'erm' or acq_name == 'cbm': bo[ii]=BayesOpt_KnownOptimumValue(myfunction.func,myfunction.bounds,myfunction.fstar, \ acq_name,IsTGP,verbose=1) else: bo[ii]=BayesOpt(myfunction.func,myfunction.bounds,acq_name,verbose=1) ybest[ii],MyTime[ii]=utilities.run_experiment(bo[ii],n_init=3*myfunction.input_dim,\ NN=10*myfunction.input_dim,runid=ii) MyOptTime[ii]=bo[ii].time_opt print("ii={} BFV={:.3f}".format(ii,myfunction.ismax*np.max(ybest[ii]))) Score={} Score["ybest"]=ybest Score["MyTime"]=MyTime Score["MyOptTime"]=MyOptTime utilities.print_result_sequential(bo,myfunction,Score,acq_type)
n_fourier = np.random.choice(N_FOURIER_RANGE) sigma_obs = np.random.choice(SIGMA_OBS_RANGE) init_sample_size = np.random.choice(N_INIT_RANGE) max_steps = np.random.choice(N_STEPS_RANGE) MCMC_OPTS = { "prior": lambda l: int(l > 0 and l < 1), "icdf": lambda l: l, "jump": lambda l: l + 0.05 * np.random.randn(), "burn_period": 10000, "mcmc_samples": np.random.choice(MCMC_RANGE) } data_gen = DataGenerator(n_fourier, sigma_obs) opt_engine = BayesOpt(data_gen, init_sample_size, max_steps, sigma_obs, is_mcmc=True, mcmc_opts=MCMC_OPTS) opt_engine.run() avg_regret = data_gen.true_best_y - np.sum(opt_engine.y) / max_steps print avg_regret opt_engine = BayesOpt(data_gen, init_sample_size, max_steps, sigma_obs, is_mcmc=False, mcmc_opts=None) opt_engine.run() avg_regret = data_gen.true_best_y - np.sum(opt_engine.y) / max_steps print avg_regret
#target.index=range(0,traindata.shape[0]) #traindata=traindata.ix[:,L_features.ix[:,0]] ### XGB CV model def Kernelcv(alpha, gamma): return -( (-cv_s(KernelRidge(alpha=10**alpha, gamma=10**gamma, kernel='rbf'), traindata, target, "mean_squared_error", cv=10).mean())**0.5) xgboostBO = BayesOpt(Kernelcv, {'alpha': (-2, -1), 'gamma': (-2, -1)}) print("Start Optimization of Main Model") xgboostBO.maximize(init_points=50, n_iter=350, xi=0.05, acq="poi") xgboostBO.res["max"] #{'max_params': {'alpha': -2.3405748356840319, 'gamma': -0.95995274293421018}, # 'max_val': -0.010021231086057682} # 500-15-no #{'max_params': {'alpha': -11.794465355319215, 'gamma': -9.6130228126748882}, # 'max_val': -0.010009470913376273} # 500-15-no-more #{'max_params': {'alpha': -11.837378408540506, 'gamma': -9.1153909839966953},
#train=train.drop(o,axis=0) #target=target.drop(o,axis=0) #train.index=range(0,train.shape[0]) #target.index=range(0,train.shape[0]) ### XGB CV model def Kernelcv(alpha): return cv_s(Lasso(alpha=10**alpha), train, target, "mean_squared_error", cv=4).mean() xgboostBO = BayesOpt(Kernelcv, {'alpha': (-3.3, -3.2)}) print("Start Optimization of Main Model") xgboostBO.maximize(init_points=50, n_iter=250, xi=0.05, acq="poi") xgboostBO.res["max"] # no outliers #{'max_params': {'alpha': -3.6392536392421784}, # 'max_val': -0.0099399944646872615} # outliers #{'max_params': {'alpha': -3.2868282025067179}, # 'max_val': -0.015103465103503494} etime = float(time.time() - stime)
lambda_2): return cv_s(BayesianRidge(n_iter=800, alpha_1=alpha_1, alpha_2=alpha_2, lambda_1=lambda_1, lambda_2=lambda_2 ), train, target, "mean_squared_error", cv=4).mean() xgboostBO = BayesOpt(Kernelcv, { 'alpha_1': (-8, -4), 'alpha_2': (-8, -4), 'lambda_1': (-8, -4), 'lambda_2': (-8, -4) }) print ("Start Optimization of Main Model") xgboostBO.maximize(init_points=50,n_iter=250, xi=0.05, acq="poi") xgboostBO.res["max"] #{'max_params': {'alpha_1': -5.8572336837097456, # 'alpha_2': -7.7045215593126599, # 'lambda_1': -4.7191989503901404, # 'lambda_2': -4.0643295981217626}, # 'max_val': -0.0021668807816198196} #{'max_params': {'alpha_1': -6.6440446568101725,
return cv_s(XGBClassifier(max_depth=int(max_depth), learning_rate=learning_rate, n_estimators=int(n_estimators), silent=silent, nthread=nthread, gamma=gamma, min_child_weight=min_child_weight, subsample=subsample, colsample_bytree=colsample_bytree, objective='multi:softprob'), dct, dy, "log_loss", cv=8).mean() xgboostBO = BayesOpt( xgbcv, { 'max_depth': (1, 8), 'learning_rate': (0.005, 0.1), 'n_estimators': (100, 600), 'gamma': (0.5, 5), 'min_child_weight': (1, 30), 'subsample': (0.2, 1), 'colsample_bytree': (0.2, 1) }) xgboostBO.maximize(init_points=35, n_iter=365) xgboostBO.res["max"] etime = float(time.time() - stime)
acq_func['dim']=myfunction.input_dim func_params={} func_params['function']=myfunction acq_params={} acq_params['acq_func']=acq_func gp_params = {'kernel':'SE','lengthscale':0.2*myfunction.input_dim,'noise_delta':1e-8} bo=BayesOpt(gp_params,func_params,acq_params) # initialize BO using 3*dim number of observations bo.init(gp_params,n_init_points=1*myfunction.input_dim) # run for 10*dim iterations NN=3*myfunction.input_dim for index in range(0,NN): bo.maximize() #viz.plot_bo_2d_pvrs(bo) viz.plot_bo_2d_pvrs_short(bo)
min_child_weight=min_child_weight, subsample=subsample, colsample_bytree=colsample_bytree, objective='reg:linear'), train, target, "mean_squared_error", cv=4).mean() ## can optimize std xgboostBO = BayesOpt( xgbcv, { 'max_depth': (11, 16), 'learning_rate': (0.06, 0.07), 'n_estimators': (200, 400), 'gamma': (0.03, 0.06), 'min_child_weight': (0, 5), 'subsample': (0.8, 1), 'colsample_bytree': (0.8, 1) }) #{'max_params': {'colsample_bytree': 0.91216449242846653, # 'gamma': 0.029999999999999999, # 'learning_rate': 0.064959684732528264, # 'max_depth': 15.311711784076165, # 'min_child_weight': 1.4483624828673114, # 'n_estimators': 361.30845545898808, # 'subsample': 0.90551784084608777}, # 'max_val': -0.001998787919975611} print("Start Optimization of Latitude Model")
reg_alpha=reg_alpha, min_child_weight=min_child_weight, subsample=subsample, colsample_bytree=colsample_bytree, objective='multi:softprob'), train, outcome, "accuracy", cv=4).mean() xgboostBO = BayesOpt(xgbcv, { 'max_depth': (2,10), 'learning_rate': (0.01, 0.1), 'n_estimators': (100,300), 'gamma': (0.01, 1), 'reg_alpha':(0,1), 'min_child_weight': (1,40), 'subsample': (0.2, 1), 'colsample_bytree' :(0.2, 1) }) print ("Start Optimization of Main Model") xgboostBO.maximize(init_points=30,n_iter=220, xi=0.02, acq="poi") xgboostBO.res["max"] ## 0.82207, 50n, alldata ##{'max_params': {'colsample_bytree': 0.95166695052920525, ## 'gamma': 0.074789906865142142, ## 'learning_rate': 0.023577270815059184,
cv=4).mean() ## Dog log_loss: 0.88746 #xgboostBO = BayesOpt(xgbcv, # { # 'max_depth': (8), # 'learning_rate': (0.0229), # 'n_estimators': (455), # 'gamma': (0.6161), # 'min_child_weight': (1.8397), # 'subsample': (0.9097), # 'colsample_bytree' :(0.6130) # }) ## Dog log_loss: 0.88746 xgboostBO = BayesOpt(xgbcv, { 'max_depth': (6,12), 'learning_rate': (0.01, 0.04), 'n_estimators': (350,550), 'gamma': (0.5, 0.8), 'min_child_weight': (1,3), 'subsample': (0.85, 0.95), 'colsample_bytree' :(0.55, 0.75) }) xgboostBO.maximize(init_points=35,n_iter=205) xgboostBO.res["max"] etime = float(time.time()-stime)
traindata = pd.read_csv('./blend_train_feature.csv', index_col=0) target = np.log( pd.read_csv('./target.csv', index_col=0, header=None).astype('float64')) nf = 50 #traindata=traindata.drop(o,axis=0) #target=target.drop(o,axis=0) #traindata.index=range(0,traindata.shape[0]) #target.index=range(0,traindata.shape[0]) ### XGB CV model def Kernelcv(alpha): return cv_s(Lasso(alpha=10**alpha), traindata, target, "mean_squared_error", cv=30).mean() xgboostBO = BayesOpt(Kernelcv, {'alpha': (-13, -0.1)}) print("Start Optimization of Main Model") xgboostBO.maximize(init_points=50, n_iter=350, xi=0.05, acq="poi") xgboostBO.res["max"] #{'max_params': {'alpha': -4.3021424130219348}, # 'max_val': -0.0086766852987939756} etime = float(time.time() - stime)