def test_bayes_opt_demo():
    """
    pytest tests/test_bayesian_optimization.py::test_bayes_opt_demo

    See Also
    --------
    https://github.com/fmfn/BayesianOptimization/blob/master/examples/exploitation%20vs%20exploration.ipynb
    """
    random_state = ensure_rng(0)
    xs = np.linspace(-2, 10, 1000)
    f = np.exp(-(xs - 2)**2) + np.exp(-(xs - 6)**2 / 10) + 1 / (xs**2 + 1)
    bo = BayesianOptimization(f=lambda x: f[int(x)],
                              pbounds={'x': (0, len(f) - 1)},
                              random_state=random_state,
                              verbose=0)
    gp_params = {'alpha': 1e-5, 'n_restarts_optimizer': 2}
    # Change aquisition params to speedup optimization for testing purposes
    bo._acqkw['n_iter'] = 5
    bo._acqkw['n_warmup'] = 1000
    bo.maximize(init_points=10, n_iter=5, acq='ucb', kappa=5, **gp_params)
    res = bo.space.max_point()
    max_params = res['max_params']
    max_val = res['max_val']

    ratio = max_val / f.max()
    assert max_val > 1.1, 'got {}, but should be > 1'.format(max_val)
    assert ratio > .9, 'got {}, should be better than 90% of max val'.format(ratio)

    assert max_params['x'] > 300, 'should be in a peak area (around 300)'
    assert max_params['x'] < 400, 'should be in a peak area (around 300)'
Example #2
0
    def fit(self,X,y=None):
        """Fit a model: 

        Parameters
        ----------

        X : pandas dataframe or array-like
           training samples. If pandas dataframe can handle dict of feature in one column or convert a set of columns
        y : array like, required for array-like X and not used presently for pandas dataframe
           class labels

        Returns
        -------
        self: object
        """
        self.X = X
        self.y = y
        bopt = BayesianOptimization(self.score,self.param_ranges)
        bopt.maximize()
        logger.info(bopt.res)
        self.best_score = bopt.res['max']['max_val']
        params = bopt.res['max']['max_params']
        for v in self.param_int:
            params[v] = int(params[v])
        self.clf.set_params(**params)
        self.clf.fit(X,y)
        return self
Example #3
0
def main():
    # stdout_path = 'outcome_testBO.txt'
    # print '[INFO]  stdout_path:\t{}'.format(stdout_path)
    # sys.stdout = open(stdout_path, 'w')
    #
    # np.random.seed(1)
    print '#' * 53
    scores = []
    sensis = []
    specis = []
    for i in range(10):
        trainset, testset = load_data(i + 1)
        X_train, y_train = trainset
        X_test, y_test = testset

        def svccv(C, tol):
            return cross_val_score(SVC(C=C, random_state=1, tol=tol),
                                   X_train, y_train, cv=9).mean()


        def rfccv(n_estimators, min_samples_split, max_features):
            return cross_val_score(RFC(n_estimators=int(n_estimators),
                                       min_samples_split=int(min_samples_split),
                                       max_features=min(max_features, 0.999),
                                       random_state=2),
                                   X_train, y_train, 'f1', cv=5).mean()

        svcBO = BayesianOptimization(svccv, {'C': (10, 50000), 'tol': (0.0001, 0.1)})
        svcBO.explore({'C': [10, 100, 1000, 10000, 20000, 50000], 'tol': [0.0001, 0.001, 0.005, 0.01, 0.05, 0.1]})

        # rfcBO = BayesianOptimization(rfccv, {'n_estimators': (10, 250),
        # 'min_samples_split': (2, 25),
        # 'max_features': (0.1, 0.999)})

        svcBO.maximize(init_points=50, restarts=200, n_iter=100)

        print '#' * 53
        print 'Final Results'
        print 'SVC: %f' % svcBO.res['max']['max_val']
        print 'max_params: ', svcBO.res['max']['max_params']

        params = svcBO.res['max']['max_params']
        clf = SVC(C=params['C'], random_state=1, tol=params['tol'])
        clf.fit(X_train, y_train)
        score = clf.score(X_test, y_test)
        result = clf.predict(X_test)
        sensi, speci = my_scores(y_test, result)
        print 1 - score, sensi, speci
        # print 'err:', 1 - score

        scores.append(score)
        sensis.append(sensi)
        specis.append(speci)

    print scores
    print "accur:\t{}\tstd:\t{}".format(np.mean(scores), np.std(scores))
    print "sensi:\t{}".format(np.mean(sensis))
    print "speci:\t{}".format(np.mean(specis))
Example #4
0
    def compute_results(self):

        optim = BayesianOptimization(self.model.test_hyperparams, self.hp_ranges)

        gp_params = self.get_gp_params()

        optim.maximize(**gp_params)

        return self.get_results(optim)
Example #5
0
def main():
	bo = BayesianOptimization(lambda fr, sm, mo, ma, nm, de, co: play_game(fr, sm, mo, ma, nm, de, co),
						  {'fr': (2, 6), 'sm': (-1, 1), 'mo': (0, 2),  'ma': (0, 2), 'nm': (-1, 1), 'de': (-1, 1), 'co': (-1, 1)})

	bo.explore({'fr': [5.0771664428677061], 'sm': [-0.13059762676063172], 'mo': [1.3682148714919597],
		'ma': [0.52214706278657907], 'nm': [-0.86627512983565302], 'de': [0.42238952601950097], 'co': [-0.39416823224808289]})

	bo.maximize(init_points=5, n_iter=50, kappa=0.5)

	# The output values can be accessed with self.res
	print 'RESULTS'
	print(bo.res['max'])
def test_explore_lazy():
    random_state = ensure_rng(0)
    xs = np.linspace(-2, 10, 1000)
    f = np.exp(-(xs - 2)**2) + np.exp(-(xs - 6)**2 / 10) + 1 / (xs**2 + 1)
    bo = BayesianOptimization(f=lambda x: f[int(x)],
                              pbounds={'x': (0, len(f) - 1)},
                              random_state=random_state,
                              verbose=0)
    bo.explore({'x': [f.argmin()]}, eager=False)
    assert len(bo.space) == 0
    assert len(bo.init_points) == 1

    # Note we currently expect lazy explore to override points
    # This may not be the case in the future.
    bo.explore({'x': [f.argmax()]}, eager=False)
    assert len(bo.space) == 0
    assert len(bo.init_points) == 1

    bo.maximize(init_points=0, n_iter=0, acq='ucb', kappa=5)

    res = bo.space.max_point()
    max_params = res['max_params']
    max_val = res['max_val']

    assert max_params['x'] == f.argmax()
    assert max_val == f.max()
def test_only_random():
    random_state = ensure_rng(0)
    xs = np.linspace(-2, 10, 1000)
    f = np.exp(-(xs - 2)**2) + np.exp(-(xs - 6)**2 / 10) + 1 / (xs**2 + 1)
    bo = BayesianOptimization(f=lambda x: f[int(x)],
                              pbounds={'x': (0, len(f) - 1)},
                              random_state=random_state,
                              verbose=0)
    bo.init(20)
    res = bo.space.max_point()
    max_params = res['max_params']
    max_val = res['max_val']

    assert max_val > 1.0, 'function range is ~.2 - ~1.4, should be above 1.'
    assert max_val / f.max() > .8, 'should be better than 80% of max val'

    assert max_params['x'] > 200, 'should be in a peak area (around 300)'
    assert max_params['x'] < 500, 'should be in a peak area (around 300)'
Example #8
0
    def opti(self):

        bo = BayesianOptimization(self.trainAndCompareHit, {"x": (10, 50), "y": (0.1, 1.0)})

        bo.explore({"x": range(10, 50), "y": [0.1, 0.25, 0.5, 0.75, 1.0]})

        bo.initialize({-11: {"x": 20, "y": 0.5}})

        bo.maximize(init_points=5, n_iter=5, kappa=3.29)

        print(bo.res["max"])
Example #9
0
            file.close()
        K.clear_session()

    # bayes opt is a maximization algorithm, to minimize validation_loss, return 1-this
    bayes_opt_score = 1.0 - score[1]

    return bayes_opt_score


# bayesian optimization
optimizer = BayesianOptimization(
    f=train_model,
    pbounds={
        'encoder_blocks': (2, 3.999),
        'lstm_units': (3, 5.999),  #2**
        'lr': (0.001, 0.0001),
        'batch_size': (1, 2.999),
        'kernel_size': (3, 5.999),  #*16
        'num_res': (1, 4.999)
    },
    verbose=2)

optimizer.maximize(init_points=15, n_iter=20)

# training-test-evaluation iterations with best params
targets = [e['target'] for e in optimizer.res]
best_index = targets.index(max(targets))
params = optimizer.res[best_index]['params']
for i in range(0, 10):
    train_model(encoder_blocks=params['encoder_blocks'],
                lstm_units=params['lstm_units'],
Example #10
0
    cv_result = xgb.cv(
                        params,
                        dtrain,
                        seed=42,
                        nfold=5,
                        feval = f1,
                        early_stopping_rounds=10,
                        verbose_eval=0
                    )
    
    return -1.0 * cv_result['test-f1-mean'].iloc[-1]

xgb_bo = BayesianOptimization(bo_tune_xgb, {'max_depth': (6, 14),
                                            'min_split_loss': (0, 1),
                                            'learning_rate':(0,2),
                                            'n_estimators':(50,200),
                                            'subsample' :(0.5,1),
                                            'min_child_weight' : (0,5)
                                            })

xgb_bo.maximize(init_points=100, n_iter=700, acq='ei', xi=0.0)

with open('log.log', 'a') as logfile:
    logfile.write(f'{xgb_bo.max}')

params = {
    'learning_rate': 0.2, 
    'min_split_loss': 0.2,
    'n_estimators': 100, 
    'objective': 'binary:logistic', 
    'max_depth': 6, 
 def optimizeBayes(self):
     self.bayesianOptimizer = BayesianOptimization(self.optimizationFunctionBayes, self.bounds)
     self.bayesianOptimizer.maximize(n_iter=200,nugget = 0.02)
Example #12
0
#定义优化参数
def rf_cv(n_estimators, min_samples_split, max_depth, max_features):
    val = cross_val_score(RandomForestClassifier(n_estimators=int(n_estimators),
                          min_samples_split=int(min_samples_split),
                          max_depth = int(max_depth),
                          max_features = min(max_features,0.999),
                          random_state = 2),
            data_tr,label_tr,scoring="roc_auc",cv=5).mean()
    return val
# 实例化一个bayes优化对象
# 贝叶斯优化
rf_bo = BayesianOptimization(rf_cv,
                             {
                                 "n_estimators":(10,250),
                                 "min_samples_split":(2,25),
                                 "max_features":(0.1,0.999),
                                 "max_depth":(10,18)
                             })
rf_bo.maximize(init_points=5,n_iter=25)
print(rf_bo.max)


#模型构建
rfc = RandomForestClassifier(n_estimators=200,min_samples_split=2,max_features=0.14,max_depth=14)
#模型训练
rfc = rfc.fit(data_tr,label_tr)

pre = rfc.predict(data_te)
score_r = rfc.score(data_te,label_te)
Example #13
0
    def __init__(self,
                 outdir,
                 hyperparamsetting,
                 randomstate=2019,
                 maxrounds=3000,
                 minrounds=3,
                 earlystoprounds=30,
                 nthread=16,
                 doregression=False,
                 useeffrms=True,
                 usegpu=False):
        assert (hyperparamsetting and isinstance(hyperparamsetting, dict))

        self.outdir_ = outdir  # results caching
        self.hyperparamdefault_ = {
            k: v['default']
            for k, v in hyperparamsetting.items()
        }  # default hyperparameter setting for xgboost
        self.hyperparamranges_ = {
            k: tuple(v['range'])
            for k, v in hyperparamsetting.items()
        }  # hyperparameter ranges to be optimized
        self.hyperparamloguniform_ = \
            [k for k in hyperparamsetting if hyperparamsetting[k]['loguniform']] # hyperparameter names whose value will be sampled in a log-uniform way
        self.randomstate_ = randomstate
        self.maxrounds_ = maxrounds
        self.minrounds_ = minrounds
        self.earlystoprounds_ = earlystoprounds
        self.doregression_ = doregression
        self.useeffrms_ = useeffrms

        self.params_ = {
            'silent': 1,
            'verbose_eval': 0,
            'nthread': nthread,
            'objective': 'reg:linear',
        }
        self.cvcolumns_ = []  # sequence matters
        self.cvresults_ = []  # holding result of each cross validation
        self.cviter_ = 0  # number of cross validation performed

        if usegpu:  # enable GPU acceleration
            self.params_.update({
                "tree_method": "gpu_hist",
            })

        ## setting cvresults subdir
        if not os.path.exists(join(self.outdir_, 'cvresults')):
            os.makedirs(join(self.outdir_, 'cvresults'))

        if doregression:  # regression task
            self.hyperparamdefault_['base_score'] = 1
            if useeffrms:
                self.cvcolumns_ = [
                    "train-effrms-mean", "train-effrms-std",
                    "test-effrms-mean", "test-effrms-std"
                ]
            else:
                self.cvcolumns_ = [
                    "train-rmse-mean", "train-rmse-std", "test-rmse-mean",
                    "test-rmse-std"
                ]
        else:  # classification task
            self.cvcolumns_ = [
                "train-auc-mean", "train-auc-std", "test-auc-mean",
                "test-auc-std"
            ]

            self.params_.update({
                'objective': 'binary:logitraw',
                'eval_metric': 'auc',
            })

        self.earlystophistory_ = []
        self.models_ = {}
        self.callbackstatus_ = []
        self.trieddefault_ = False

        ## optimizer
        self.optimizer_ = BayesianOptimization(self.evaluate_xgb,
                                               self.hyperparamranges_,
                                               self.randomstate_)

        ## if trained before, adjust random state and load history
        summaryfile = join(self.outdir_, 'summary.csv')
        if os.path.isfile(summaryfile):
            _df = pd.read_csv(summaryfile)
            self.randomstate_ += len(_df)
            self._load_data(summaryfile)
Example #14
0
    def optimize(self):
        """
        Performs bayesian optimization. For more information visit: https://www.kdnuggets.com/2019/07/xgboost-random-forest-bayesian-optimisation.html 
        
        Parameters
        ----------
        None

        Returns
        ---------
        estimator: scikit-learn estmator
            Scickit learn optimized with chosen hyperparameters"""
        def rfc_crossval(n_estimators, max_depth, min_samples_split,
                         min_samples_leaf, max_leaf_nodes):
            return (self.rfc_cv(n_estimators=int(n_estimators),
                                max_depth=int(max_depth),
                                min_samples_split=int(min_samples_split),
                                min_samples_leaf=int(min_samples_leaf),
                                max_leaf_nodes=int(max_leaf_nodes),
                                data=self.X,
                                targets=self.y_val,
                                scoring=self.scoring))

        def gbt_crossval(learning_rate, n_estimators, min_samples_split,
                         min_samples_leaf, max_depth):
            return (self.gbt_cv(learning_rate=float(learning_rate),
                                n_estimators=int(n_estimators),
                                min_samples_split=int(min_samples_split),
                                min_samples_leaf=int(min_samples_leaf),
                                max_depth=int(max_depth),
                                data=self.X,
                                targets=self.y_val,
                                scoring=self.scoring))

        def svc_crossval(C, degree):
            return (self.svc_cv(C=float(C),
                                degree=int(degree),
                                data=self.X,
                                targets=self.y_val,
                                scoring=self.scoring))

        if self.algo == 'RandomForest':
            optimizer = BayesianOptimization(f=rfc_crossval,
                                             pbounds={
                                                 "n_estimators": (10, 250),
                                                 "min_samples_split": (2, 25),
                                                 "max_depth": (2, 300),
                                                 "min_samples_leaf": (1, 25),
                                                 "max_leaf_nodes": (2, 25),
                                             },
                                             random_state=12,
                                             verbose=2)
            optimizer.maximize(n_iter=self.iter)
            print("Final result:", optimizer.max)
            max_optimizer = optimizer.max['params']
            return (RandomForestClassifier(
                max_leaf_nodes=int(max_optimizer['max_leaf_nodes']),
                min_samples_split=int(max_optimizer['min_samples_split']),
                max_depth=int(max_optimizer["max_depth"]),
                min_samples_leaf=int(max_optimizer['min_samples_leaf']),
                n_estimators=int(max_optimizer['n_estimators']),
                random_state=12).fit(self.X, self.y_val))
        elif self.algo == "GradientBoostingTree":
            optimizer = BayesianOptimization(f=gbt_crossval,
                                             pbounds={
                                                 "learning_rate": (0.001, 0.2),
                                                 "min_samples_split": (2, 25),
                                                 "max_depth": (2, 300),
                                                 "min_samples_leaf": (1, 25),
                                                 "n_estimators": (10, 300),
                                             },
                                             random_state=12,
                                             verbose=2)
            optimizer.maximize(n_iter=self.iter)
            print("Final result:", optimizer.max)
            max_optimizer = optimizer.max['params']
            return (GradientBoostingClassifier(
                learning_rate=max_optimizer['learning_rate'],
                min_samples_split=int(max_optimizer['min_samples_split']),
                max_depth=int(max_optimizer["max_depth"]),
                min_samples_leaf=int(max_optimizer['min_samples_leaf']),
                n_estimators=int(max_optimizer['n_estimators']),
                random_state=12).fit(self.X, self.y_val))
        elif self.algo == "SupportVectorMachine":
            optimizer = BayesianOptimization(f=svc_crossval,
                                             pbounds={
                                                 "C": (0.001, 0.9999),
                                                 "degree": (2, 4),
                                             },
                                             random_state=12,
                                             verbose=2)
            optimizer.maximize(n_iter=self.iter)
            print("Final result:", optimizer.max)
            max_optimizer = optimizer.max['params']
            return (SVC(C=max_optimizer['C'],
                        degree=int(max_optimizer['degree']),
                        kernel='poly',
                        random_state=12).fit(self.X, self.y_val))
        elif self.algo == "LogisticRegression":
            lr = LogisticRegression(n_jobs=-1, random_state=12)
            parameters = {
                "C": [
                    0.05, 0.1, 0.15, 0.2, 0.25, 0.3, 0.4, 0.5, 0.6, 0.7, 0.75,
                    0.8, 0.85, 0.9, 0.95, 1
                ],
                "fit_intercept": [True, False]
            }
            grid_lr = GridSearchCV(lr,
                                   param_grid=parameters,
                                   scoring=self.scoring,
                                   n_jobs=-1).fit(self.X, self.y_val)
            return (grid_lr.best_estimator_)
        else:
            print("No valid algorithm")
Example #15
0
from bayes_opt import BayesianOptimization
# Example of how to use this bayesian optimization package.

# Lets find the maximum of a simple quadratic function of two variables
# We create the bayes_opt object and pass the function to be maximized
# together with the parameters names and their bounds.
bo = BayesianOptimization(lambda x, y: -x**2 - (y - 1)**2 + 1,
                          {'x': (-4, 4), 'y': (-3, 3)})

# One of the things we can do with this object is pass points
# which we want the algorithm to probe. A dictionary with the
# parameters names and a list of values to include in the search
# must be given.
bo.explore({'x': [-1, 3], 'y': [-2, 2]})

# Additionally, if we have any prior knowledge of the behaviour of
# the target function (even if not totally accurate) we can also
# tell that to the optimizer.
# Here we pass a dictionary with target values as keys of another
# dictionary with parameters names and their corresponding value.
bo.initialize({-2: {'x': 1, 'y': 0}, -1.251: {'x': 1, 'y': 1.5}})

# Once we are satisfied with the initialization conditions
# we let the algorithm do its magic by calling the maximize()
# method.
bo.maximize(init_points=5, n_iter=15, kappa=3.29)

# The output values can be accessed with self.res
print(bo.res['max'])

                   right_index=True,
                   left_on=["Store", "SchoolHoliday"])
# If there is a missing value due to the Merge, replace with the Mean value
X_Final = X_Final.apply(lambda x: x.fillna(x.mean()), axis=0)
print("Size of Training Set: Columns = {}, Rows = {}"). \
    format(X.shape[1], X.shape[0])
print("Size of Test Set: Columns = {}, Rows = {}"). \
    format(X_Final.shape[1], X_Final.shape[0])


##############################################################################
# Bayesian Optimisation - 75 Iterations for Each Algorithm
# Machine Learning Algorithm #1 - Define ranges of Hyperparameters
ml1_bo = BayesianOptimization(cross_validation, {"max_features": (1, 20),
                                                 "criterion": (0, 1),
                                                 "normv": (1, 1),
                                                 "max_depth": (1, 40),
                                                 "n_estimators": (100, 300),
                                                 "log_y": (1, 1)})
ml1_bo.explore({"max_features": [3.0],
                "criterion": [0],
                "normv": [1],
                "max_depth": [15],
                "n_estimators": [50],
                "log_y": [1]})
# Machine Learning Algorithm #2 - Define ranges of Hyperparameters
ml2_bo = BayesianOptimization(cross_validation2, {"n_neighbors": (2, 20),
                                                  "leaf_size": (10, 60),
                                                  "normv": (1, 1),
                                                  "log_y": (1, 1)})
ml2_bo.explore({"n_neighbors": [5],
                "leaf_size": [20],
Example #17
0
def gekko_bayesian(indicator=None):
    print("")
    global Strategy
    Strategy = indicator



    if indicator == None:
        Strategy = settings['Strategy']
    print("Starting search %s parameters" % Strategy)
    bo = BayesianOptimization(gekko_search, copy.deepcopy(StratConfig))

    # 1st Evaluate
    print("")
    print("Step 1: BayesianOptimization parameter search")
    bo.maximize(init_points=settings['init_points'], n_iter=settings['num_iter'])
    max_val = bo.res['max']['max_val']
    index = all_val.index(max_val)
    s1 = stats[index]

    # 2nd Evaluate
    print("")
    print("Step 2: testing searched parameters on random date")

    max_params = bo.res['max']['max_params'].copy()
    #max_params["persistence"] = 1
    print("Starting Second Evaluation")
    gekko_search(**max_params)
    s2 = stats[-1]

    # 3rd Evaluate
    print("")
    print("Step 3: testing searched parameters on new date")
    watch = settings["watch"]
    print(max_params)
    result = Evaluate(Strategy, max_params)
    
    resultjson = expandGekkoStrategyParameters(max_params, Strategy)#[Strategy]
    s3= result
    # config.js like output
    percentiles = np.array([0.25, 0.5, 0.75])
    formatted_percentiles = [str(int(round(x*100)))+"%" for x in percentiles]
    stats_index = (['count', 'mean', 'std', 'min'] +
          formatted_percentiles + ['max'])
    print("")
    print("// "+'-'*50)
    print("// "+ Strategy + ' Settings')
    print("// "+'-'*50)
    print("// 1st Evaluate: %.3f" % s1[1])
    for i in range(len(s1)):
        print('// %s: %.3f' % (stats_index[i], s1[i]))
    print("// "+'-'*50)
    print("// 2nd Evaluate: %.3f" % s2[1])
    for i in range(len(s2)):
        print('// %s: %.3f' % (stats_index[i], s2[i]))
    print("// "+'-'*50)
    print("// 3rd Evaluted: %f" % s3)
    print("// "+'-'*50)
    print("config.%s = {%s};" % (Strategy, json.dumps(resultjson, indent=2)[1:-1]))
    print("// "+'-'*50)



    return max_params
class Optimizer(object):
    def __init__(self,beamline):
        super(Optimizer,self).__init__()
        self.beamline = beamline

    def start(self,subset,method):
        if method == 'bayes':
            self.defineBounds(subset)
            self.optimizeBayes()       
        elif method == 'my mcmc':
            self.defineParameters(subset)
            self.optimizeMyMCMC()  
        else:
            self.defineParameters(subset)
            self.optimizeMCMC()  

    def defineParameters(self,subset):
        self.p = lm.Parameters()
        for n in subset:
            self.p.add(n, value = self.beamline.voltages[n].setpoint,
                                    min = self.beamline.voltages[n].scanStart,
                                    max = self.beamline.voltages[n].scanStop,
                                    vary = True)

    def defineBounds(self,subset):
        self.bounds = {}
        for n in subset:
            self.bounds[n] = (self.beamline.voltages[n].scanStart,self.beamline.voltages[n].scanStop)

    def optimizeBayes(self):
        self.bayesianOptimizer = BayesianOptimization(self.optimizationFunctionBayes, self.bounds)
        self.bayesianOptimizer.maximize(n_iter=200,nugget = 0.02)

    def optimizeMCMC(self):
        ndim, nwalkers = len(self.p),2*len(self.p)
        pos = [np.array([p.value for p in self.p.values()]) + 1e-4*np.random.rand(ndim) for i in range(nwalkers)]
        sampler = emcee.EnsembleSampler(nwalkers, ndim, self.optimizationFunctionMCMC)
        sampler.run_mcmc(pos, N = 2000)

    def optimizeMyMCMC(self):
        from walker import Walkers
        ndim, nwalkers = len(self.p),10*len(self.p)
        pos = [p.value for p in self.p.values()]
        w = Walkers(nwalkers,pos,10,1,self.optimizationFunctionMyMCMC)
        print('done')
        while self.beamline.continueScanning:
            w.walk_all()


    def optimizationFunctionBayes(self,vrs,):
        
        for n,v in zip(self.p.keys(),vrs):
            self.beamline.voltages[n].setpoint = v
        
        self.beamline.wait()
        self.beamline.wait() # Not sure why, but convergence works better with two waits?

        current = self.beamline.current.value

        print(current)

        return current

    def optimizationFunctionMCMC(self,vrs,):
        for v in vrs:
            if v < 0 or v > 10**4:
                return -np.inf

        for n,v in zip(self.p.keys(),vrs):
            self.beamline.voltages[n].setpoint = v

        
        self.beamline.wait()
        self.beamline.wait() # Not sure why, but convergence works better with two waits?

        time.sleep(0.05)

        current = self.beamline.current.value

        if current < 10**-9:
            return -np.inf

        return np.log(current)

    def optimizationFunctionMyMCMC(self,vrs,):
        for v in vrs:
            if v < 0 or v > 10**4:
                return -np.inf

        for n,v in zip(self.p.keys(),vrs):
            self.beamline.voltages[n].setpoint = v

        
        self.beamline.wait()
        self.beamline.wait() # Not sure why, but convergence works better with two waits?

        time.sleep(0.05)

        current = self.beamline.current.value
        std = self.beamline.current_std.value 

        return (current,std)
Example #19
0
    acq.plot(x[np.argmax(utility)], np.max(utility), '*', markersize=15, 
             label=u'Next Best Guess', markerfacecolor='gold', markeredgecolor='k', markeredgewidth=1)
    acq.set_xlim((0, 0.1))
    acq.set_ylim((0, np.max(utility) + 0.5))
    acq.set_ylabel('Utility', fontdict={'size':20})
    acq.set_xlabel('x', fontdict={'size':20})
    
    axis.legend()
    acq.legend()
    

if __name__ == "__main__":
    gp_params = {"alpha": 1e-5}

    #SVM
    svcBO = BayesianOptimization(svccv,
        {'gamma': (0.00001, 0.1)})

    svcBO.maximize(init_points=3, n_iter=4, **gp_params)

    #Random Forest
    rfcBO = BayesianOptimization(
        rfccv,
        {'n_estimators': (10, 300),
         'max_depth': (2, 10)
        }
    )
    rfcBO.explore({'max_depth': [2, 4, 6], 'n_estimators': [64, 128, 256]})

    rfcBO.maximize(init_points=4, n_iter=4, **gp_params)

    print('Final Results')
Example #20
0
def LightGBM_tuning(X_train, y_train, kfold=6):
    '''
    LightGBM model hyperparameters tuning, use baye_opt to cross-validate entire training dataset.
    @ tuning hyperparameters:
        feature_fraction
        bagging_fraction
        lambda_l1
        max_depth
        min_data_in_leaf
        num_leaves
    @ default hyperparameters:
        bagging_freq = 1
        bagging_seed = 11
        boosting = 'gbdt'
        learning_rate: 0.005
        
    Parameters
    ----------
    X_train: feature dataframe
    
    y_train: target series
    
    Return
    ------
    dict: diction of tuning hyperparameters of lightGBM
    '''
    
    import lightgbm as lgb
    from sklearn.metrics import mean_squared_error
    from sklearn.model_selection import KFold
    import numpy as np
    import gc
    from bayes_opt.observer import JSONLogger
    from bayes_opt.event import Events
    from bayes_opt import BayesianOptimization
    
    X_train = X_train
    y_train = y_train
    features = [feature for feature in X_train.columns \
                if feature not in ['card_id', 'first_active_month']]
    categorical_features = [feature for feature in features \
                            if 'feature_' in feature]
    folds = KFold(n_splits=kfold, shuffle=True, random_state=133)
    y_val = np.zeros(y_train.shape)
    bayes_opt_params = {
        'feature_fraction': (0.1, 1.0),
        'bagging_fraction': (0.1, 1.0),
        'lambda_l1': (0, 6),
        'max_depth': (4, 20),
        'min_data_in_leaf': (10, 300),
        'num_leaves': (5, 300),
    }
    
    # define the croos-validation functions which returns object score(-rmse)
    # then use bayesian optimizers to tuning the object score
    def cv_helper(max_depth,\
                  num_leaves,\
                  min_data_in_leaf,\
                  feature_fraction,\
                  bagging_fraction,\
                  lambda_l1):
        
        for train_idxs, val_idxs in folds.split(X_train.values, y_train.values):
            
            # training set
            train_data = lgb.Dataset(data=X_train.iloc[train_idxs][features],\
                                     label=y_train.iloc[train_idxs],\
                                     categorical_feature=categorical_features)

            # validation set
            val_data = lgb.Dataset(data=X_train.iloc[val_idxs][features],\
                                   label=y_train.iloc[val_idxs],\
                                   categorical_feature=categorical_features)
            # hyperparameters
            params = {
                'objective': 'regression',
                'metric': 'rmse',
                'lambda_l1': lambda_l1,
                'num_leaves': int(num_leaves),
                'min_data_in_leaf': int(min_data_in_leaf),
                'max_depth': int(max_depth),
                'feature_fraction': feature_fraction,
                'bagging_fraction': bagging_fraction,
                'bagging_freq': 1,
                'bagging_seed': 11,
                'boosting': 'gbdt',
                'learning_rate': 0.005,
                'verbosity': 1
            }
            
            # classifier
            clf = lgb.train(params=params,\
                            train_set=train_data,\
                            num_boost_round=10000,\
                            valid_sets=[train_data, val_data],\
                            verbose_eval=200,\
                            early_stopping_rounds=200)
            
            # prediction of validation
            y_val[val_idxs] = clf.predict(X_train.iloc[val_idxs][features],\
                                          num_iteration=clf.best_iteration)
            
        return -mean_squared_error(y_true=y_train, y_pred=y_val)**0.5
    
    logger = JSONLogger(path="bayes_opt_log/lightGBM_logs.json")
    LGB_bayes_opt = BayesianOptimization(cv_helper, pbounds=bayes_opt_params)
    LGB_bayes_opt.subscribe(Events.OPTMIZATION_STEP, logger)
    LGB_bayes_opt.maximize(init_points=4,\
                       n_iter=20,\
                       acq='ei',\
                       xi=0.0)
    
    return LGB_bayes_opt.max['params']
def rfccv(n_estimators, min_samples_split, max_features):
    val = cross_val_score(
        RFC(n_estimators=int(n_estimators),
            min_samples_split=int(min_samples_split),
            max_features=min(max_features, 0.999),
            random_state=2
        ),
        data, target, 'f1', cv=2
    ).mean()
    return val

if __name__ == "__main__":
    gp_params = {"alpha": 1e-5}

    svcBO = BayesianOptimization(svccv,
        {'C': (0.001, 100), 'gamma': (0.0001, 0.1)})
    svcBO.explore({'C': [0.001, 0.01, 0.1], 'gamma': [0.001, 0.01, 0.1]})

    rfcBO = BayesianOptimization(
        rfccv,
        {'n_estimators': (10, 250),
        'min_samples_split': (2, 25),
        'max_features': (0.1, 0.999)}
    )

    svcBO.maximize(n_iter=10, **gp_params)
    print('-' * 53)
    rfcBO.maximize(n_iter=10, **gp_params)

    print('-' * 53)
    print('Final Results')
Example #22
0
    return cv_s(XGBClassifier(max_depth=int(max_depth),
                              learning_rate=learning_rate,
                              n_estimators=int(n_estimators),
                              silent=silent,
                              nthread=nthread,
                              gamma=gamma,
                              min_child_weight=min_child_weight,
                              subsample=subsample,
                              colsample_bytree=colsample_bytree,
                              objective='multi:softprob'),
                dct,
                dy,
                "log_loss",
                cv=8).mean()


xgboostBO = BayesOpt(
    xgbcv, {
        'max_depth': (1, 8),
        'learning_rate': (0.005, 0.1),
        'n_estimators': (100, 600),
        'gamma': (0.5, 5),
        'min_child_weight': (1, 30),
        'subsample': (0.2, 1),
        'colsample_bytree': (0.2, 1)
    })

xgboostBO.maximize(init_points=35, n_iter=365)
xgboostBO.res["max"]

etime = float(time.time() - stime)
Example #23
0
"""Example of how to use this bayesian optimization package."""

import sys
sys.path.append("./")
from bayes_opt import BayesianOptimization

# Lets find the maximum of a simple quadratic function of two variables
# We create the bayes_opt object and pass the function to be maximized
# together with the parameters names and their bounds.
bo = BayesianOptimization(lambda x, y: -x ** 2 - (y - 1) ** 2 + 1,
                          {'x': (-4, 4), 'y': (-3, 3)})

# One of the things we can do with this object is pass points
# which we want the algorithm to probe. A dictionary with the
# parameters names and a list of values to include in the search
# must be given.
bo.explore({'x': [-1, 3], 'y': [-2, 2]})

# Additionally, if we have any prior knowledge of the behaviour of
# the target function (even if not totally accurate) we can also
# tell that to the optimizer.
# Here we pass a dictionary with 'target' and parameter names as keys and a
# list of corresponding values
bo.initialize(
    {
        'target': [-1, -1],
        'x': [1, 1],
        'y': [0, 2]
    }
)
Example #24
0
            file.write("\n")
            file.close()
        K.clear_session()

    # bayes opt is a maximization algorithm, to minimize validation_loss, return 1-this
    bayes_opt_score = 1.0 - score[1]

    return bayes_opt_score


# bayesian optimization
optimizer = BayesianOptimization(
    f=train_model,
    pbounds={
        'residual_units': (4, 6.999),
        'lr': (0.001, 0.0001),
        'batch_size': (1, 2.999),  # *16
        #   'kernel_size': (3, 5.999)
    },
    verbose=2)

optimizer.maximize(init_points=2, n_iter=10)

# training-test-evaluation iterations with best params
if os.path.isdir('results') is False:
    os.mkdir('results')
targets = [e['target'] for e in optimizer.res]
bs_fname = 'bs_taxiNYC.json'
with open(os.path.join('results', bs_fname), 'w') as f:
    json.dump(optimizer.res, f, indent=2)
best_index = targets.index(max(targets))
    config = ConfigParser.ConfigParser()
    try:
        config.read("ensembles.config")
        valid_mode_on = config.getboolean(config_name, "valid_mode_on")
        if valid_mode_on:
            valid_file = "../data/train-va.csv"
        else:
            valid_file = None
        model_output_paths = map(lambda x: x.strip(), config.get(config_name, "model_output_paths").split(","))
        try:
            cs = map(float, config.get(config_name, "cs").split(","))
            assert len(cs) == len(model_output_paths)
        except ConfigParser.NoOptionError:
            cs = np.ones(len(model_output_paths))
    except Exception as e:
        logging.error("Could not load configuration file from models.config")
        logging.error(str(e))

    df_valid = pd.read_csv(valid_file, usecols=["row_id", "place_id"])
    df_valid.rename(columns={"place_id": "place_id_label"}, inplace=True)

    logging.basicConfig(level=logging.INFO, format='%(asctime)s %(levelname)s: %(message)s')
    dfs = load_models(model_output_paths)
    def target(**cs):
        return ensemble_score(dfs, model_output_paths, df_valid, **cs)

    bo = BayesianOptimization(target, {"c%d" % m: (0., 1.) for m in range(len(model_output_paths))})
    bo.maximize(n_iter=100, kappa=5)


Example #26
0
    # folds
    xfolds = pd.read_csv('../input/xfolds.csv')
    # work with validation split
    idx0 = xfolds[xfolds.valid == 0].index
    idx1 = xfolds[xfolds.valid == 1].index
    x0 = xtrain[xtrain.index.isin(idx0)]
    x1 = xtrain[xtrain.index.isin(idx1)]
    y0 = ytrain[ytrain.index.isin(idx0)]
    y1 = ytrain[ytrain.index.isin(idx1)]

    xgboostBO = BayesianOptimization(xgboostcv,
                                     {'max_depth': (int(2), int(25)),
                                      'learning_rate': (0.0005, 0.06),
                                      'n_estimators': (int(500), int(2000)),
                                      'subsample': (0.1, 0.99),
                                      'colsample_bytree': (0.1, 0.99),
                                      'gamma': (0.00000000001, 0.05),
                                      'min_child_weight': (int(1), int(40))
                                     })

#     xgboostBO.explore({'colsample_bytree': [0.76427399221822834],
#                        'learning_rate': [0.0073362638967263945],
#                        'min_child_weight': [14.634866816577702],
#                        'n_estimators': [2408],
#                        'subsample': [0.72679682406267243],
#                        'max_depth': [14.40730693062795],
#                        'gamma': [0.0071936123399884092]}
#                       )
    xgboostBO.maximize(init_points=5, n_iter=20, acq='ei')
    print('-' * 53)
    xgtrain = xgb.DMatrix(X, label=y)

    return xgtrain


if __name__ == '__main__':
    xgtrain = prepare_data()

    num_rounds = 3000
    random_state = 2016
    num_iter = 25
    init_points = 5
    params = {
        'eta': 0.1,
        'silent': 1,
        'eval_metric': 'mae',
        'verbose_eval': True,
        'seed': random_state
    }

    xgbBO = BayesianOptimization(xgb_evaluate, {'min_child_weight': (1, 20),
                                                'colsample_bytree': (0.1, 1),
                                                'max_depth': (5, 15),
                                                'subsample': (0.5, 1),
                                                'gamma': (0, 10),
                                                'alpha': (0, 10),
                                                })

    xgbBO.maximize(init_points=init_points, n_iter=num_iter)

Example #28
0
if not testing:
    prepareSubmission(pred)
else:
    print(pred)
    
uuid_string = str(uuid.uuid4())
f=functools.partial(runSolution,testing=True,train=train,test=test,numSquares=numSquares,y_w=1000)
bo = BayesianOptimization(f=f,
                                  pbounds={
                                      'acc_w': (0, 1),
                                      
                                      # Fix w_y at 1000 as the most important feature
                                      #'w_y': (500, 2000), 
                                      "daysin_w": (0.1, 0.5),
                                      "daycos_w": (0.1, 0.5),
                                      "minsin_w": (0.2, 0.7),
                                      "mincos_w": (0.2, 0.7),
                                      "weekdaysin_w": (0, 0.4),
                                      "weekdaycos_w": (0, 0.4),
                                      "x_w": (18, 24),
                                      
                                      "year_w": (0.4, 0.6),
                                      },
                                  verbose=True
                                  )
                                  
###########################################################################
bo.maximize(init_points=2, n_iter=300, acq="ei", xi=0.1)#0,1 prefer exploration
with open('{}.json'.format(uuid_string), 'w+') as fh:
                fh.write(json.dumps(bo.res, sort_keys=True, indent=4))
                
#            "acc_w": 0.29209822227034421, 
                                   n_informative=12,
                                   n_redundant=7)

def svccv(C, gamma):
    return cross_val_score(SVC(C=C, gamma=gamma, random_state=2),
                           data, target, 'f1', cv=5).mean()

def rfccv(n_estimators, min_samples_split, max_features):
    return cross_val_score(RFC(n_estimators=int(n_estimators),
                               min_samples_split=int(min_samples_split),
                               max_features=min(max_features, 0.999),
                               random_state=2),
                           data, target, 'f1', cv=5).mean()

if __name__ == "__main__":

    svcBO = BayesianOptimization(svccv, {'C': (0.001, 100), 'gamma': (0.0001, 0.1)})
    svcBO.explore({'C': [0.001, 0.01, 0.1], 'gamma': [0.001, 0.01, 0.1]})

    rfcBO = BayesianOptimization(rfccv, {'n_estimators': (10, 250),
                                         'min_samples_split': (2, 25),
                                         'max_features': (0.1, 0.999)})
    svcBO.maximize()

    print('-'*53)
    rfcBO.maximize()

    print('-'*53)
    print('Final Results')
    print('SVC: %f' % svcBO.res['max']['max_val'])
    print('RFC: %f' % rfcBO.res['max']['max_val'])
Example #30
0
            num_leaves=int(num_leaves),
            n_estimators=int(n_estimators),
            max_depth=int(max_depth),
            min_data_in_leaf=int(min_data_in_leaf),
            silent=silent,
            nthread=-nthread),
                               X_train,
                               y_train,
                               scoring="neg_mean_squared_error",
                               cv=5).mean()

    # Load data set and target values
    lgboostBO = BayesianOptimization(
        lgboostcv, {
            'num_leaves': (5, 2500),
            'n_estimators': (10, 2500),
            'max_depth': (2, 65),
            'min_data_in_leaf': (1, 100),
        })

    #    lgboostBO.maximize(init_points=init_points, n_iter=num_iter)
    params = lgboostBO.res['max']['max_params']

    # Best model parameters found via Bayesian Optimization
    if id == '04':
        params = {
            'max_depth': 14,
            'min_data_in_leaf': 98,
            'n_estimators': 297,
            'num_leaves': 7
        }
                                   n_informative=12,
                                   n_redundant=7)

def svccv(C, gamma):
    return cross_val_score(SVC(C=C, gamma=gamma, random_state=2),
                           data, target, 'roc_auc', cv=5).mean()

def rfccv(n_estimators, min_samples_split, max_features):
    return cross_val_score(RFC(n_estimators=int(n_estimators),
                               min_samples_split=int(min_samples_split),
                               max_features=min(max_features, 0.999),
                               random_state=2),
                           data, target, 'roc_auc', cv=5).mean()

if __name__ == "__main__":

    svcBO = BayesianOptimization(svccv, {'C': (0.001, 100), 'gamma': (0.0001, 0.1)})
    svcBO.explore({'C': [0.001, 0.01, 0.1], 'gamma': [0.001, 0.01, 0.1]})

    rfcBO = BayesianOptimization(rfccv, {'n_estimators': (10, 250),
                                         'min_samples_split': (2, 25),
                                         'max_features': (0.1, 0.999)})
    svcBO.maximize(acq='xcxcxc')

    print('-'*53)
    #---------------------------------------------------------- rfcBO.maximize()
#------------------------------------------------------------------------------ 
    #------------------------------------------------------------- print('-'*53)
    #---------------------------------------------------- print('Final Results')
    #---------------------------- print('SVC: %f' % svcBO.res['max']['max_val'])
    #---------------------------- print('RFC: %f' % rfcBO.res['max']['max_val'])
Example #32
0
if optimization:
    print('optimization')

    def lr_cv(max_iter):
        lr = LogisticRegression(solver='sag', n_jobs=1, max_iter=max_iter)
        score = cross_val_score(lr,
                                df_train,
                                y_train,
                                scoring=ali_scorer,
                                cv=tscv,
                                verbose=1,
                                n_jobs=-1)
        return score.mean()

    aliBO = BayesianOptimization(lr_cv, {'max_iter': (100, 2000)})
    init_points = 5
    num_iter = 20
    aliBO.maximize(init_points=init_points, n_iter=num_iter)
    print(aliBO.res['max'])
    print(aliBO.res['all'])

    sys.exit(0)

if online:
    print('online')
    X_train = df_train.loc[df_train['label'] != 2]
    X_test = df_train.loc[df_train['label'] == 2]

    y_train = X_train.pop('label')
    y_test = X_test.pop('label')
Example #33
0
    params['learning_rate'] = learning_rate / 100

    xgb_model = XGBRegressor(**params)
    xgb_model.fit(x_rsrp_train, y_rsrp_train)
    y_rsrp_pred = xgb_model.predict(x_rsrp_test)
    predictions = [round(value) for value in y_rsrp_pred]
    mse = metric.mean_squared_error(y_rsrp_test, predictions)
    rmse = math.sqrt(mse)
    return -1 * rmse


# In[12]:

xgbBO = BayesianOptimization(xgb_evaluate, {
    'min_child_weight': (1, 20),
    'learning_rate': (1, 10),
    'max_depth': (3, 15)
})

xgbBO.maximize(init_points=3, n_iter=10)

# In[102]:

params = {'learning_rate': 0.1, 'max_depth': 15, 'min_child_weight': 1}
xgb_model = XGBRegressor(**params)
xgb_model.fit(x_rsrp_train, y_rsrp_train)
y_rsrp_pred = xgb_model.predict(x_rsrp_test)
predictions = [round(value) for value in y_rsrp_pred]
mae = metric.mean_absolute_error(y_rsrp_test, predictions)
mse = metric.mean_squared_error(y_rsrp_test, predictions)
rmse = math.sqrt(mse)
Example #34
0
def start_automated_run(path, automated_run_id):
    """Starts automated run. This will automatically create
    base learners until the run finishes or errors out.

    Args:
        path (str): Path to Xcessiv notebook

        automated_run_id (str): Automated Run ID
    """
    with functions.DBContextManager(path) as session:
        automated_run = session.query(
            models.AutomatedRun).filter_by(id=automated_run_id).first()
        if not automated_run:
            raise exceptions.UserError(
                'Automated run {} '
                'does not exist'.format(automated_run_id))
        automated_run.job_id = get_current_job().id
        automated_run.job_status = 'started'

        session.add(automated_run)
        session.commit()

        try:
            module = functions.import_string_code_as_module(
                automated_run.source)
            random_state = 8 if not hasattr(
                module, 'random_state') else module.random_state
            assert module.metric_to_optimize in automated_run.base_learner_origin.metric_generators

            # get non-searchable parameters
            base_estimator = automated_run.base_learner_origin.return_estimator(
            )
            base_estimator.set_params(**module.default_params)
            default_params = functions.make_serializable(
                base_estimator.get_params())
            non_searchable_params = dict(
                (key, val) for key, val in iteritems(default_params)
                if key not in module.pbounds)

            # get already calculated base learners in search space
            existing_base_learners = []
            for base_learner in automated_run.base_learner_origin.base_learners:
                if not base_learner.job_status == 'finished':
                    continue
                in_search_space = True
                for key, val in iteritems(non_searchable_params):
                    if base_learner.hyperparameters[key] != val:
                        in_search_space = False
                        break  # If no match, move on to the next base learner
                if in_search_space:
                    existing_base_learners.append(base_learner)

            # build initialize dictionary
            target = []
            initialization_dict = dict(
                (key, list()) for key in module.pbounds.keys())
            for base_learner in existing_base_learners:
                # check if base learner's searchable hyperparameters are all numerical
                all_numerical = True
                for key in module.pbounds.keys():
                    if not isinstance(base_learner.hyperparameters[key],
                                      numbers.Number):
                        all_numerical = False
                        break
                if not all_numerical:
                    continue  # if there is a non-numerical hyperparameter, skip this.

                for key in module.pbounds.keys():
                    initialization_dict[key].append(
                        base_learner.hyperparameters[key])
                target.append(
                    base_learner.individual_score[module.metric_to_optimize])
            initialization_dict['target'] = target if not module.invert_metric \
                else list(map(lambda x: -x, target))
            print('{} existing in initialization dictionary'.format(
                len(initialization_dict['target'])))

            # Create function to be optimized
            func_to_optimize = return_func_to_optimize(
                path, session, automated_run.base_learner_origin,
                module.default_params, module.metric_to_optimize,
                module.invert_metric, set(module.integers))

            # Create Bayes object
            bo = BayesianOptimization(func_to_optimize, module.pbounds)

            bo.initialize(initialization_dict)

            np.random.seed(random_state)

            bo.maximize(**module.maximize_config)

            automated_run.job_status = 'finished'
            session.add(automated_run)
            session.commit()

        except:
            session.rollback()
            automated_run.job_status = 'errored'
            automated_run.description['error_type'] = repr(sys.exc_info()[0])
            automated_run.description['error_value'] = repr(sys.exc_info()[1])
            automated_run.description['error_traceback'] = \
                traceback.format_exception(*sys.exc_info())
            session.add(automated_run)
            session.commit()
            raise
Example #35
0
    def optimize(self):
        
        self.cambium.clean()

        self._flatten_param_grid()
        
        # --- initialize optimizer ---
        bounds_transformer = SequentialDomainReductionTransformer()
        optimizer = BayesianOptimization(
            f=self._worker_return_score,
            pbounds=self.bayes_grid,
            random_state=1,
            verbose=1,
            bounds_transformer=bounds_transformer,
        )
        
        # --- probe largest system config ---
        for divisor in self.probe_divisors:
            if self.tech == 'pv':
                probe_dict = self.bayes_grid.copy()
                #probe_dict['SystemDesign#subarray1_track_mode'] = 1
                probe_dict['SystemDesign#subarray1_azimuth'] = 180
                probe_dict['SystemDesign#subarray1_tilt'] = float(self.resource_file.split('/')[-1].split('_')[1])
                probe_dict['SystemDesign#dc_ac_ratio'] = 1.2
                if 'SystemDesign#system_capacity' in probe_dict.keys():
                    probe_dict['SystemDesign#system_capacity'] = np.max(probe_dict['SystemDesign#system_capacity']) / divisor
                if 'BatteryTools#desired_power' in probe_dict.keys():
                    probe_dict['BatteryTools#desired_power'] = np.max(probe_dict['BatteryTools#desired_power']) / divisor
                    probe_dict['BatteryTools#desired_capacity'] = np.max(probe_dict['BatteryTools#desired_capacity'])
            elif self.tech == 'wind':
                probe_dict = self.bayes_grid.copy()
                probe_dict['Turbine#wind_turbine_hub_ht'] = 100
                probe_dict['Turbine#turbine_class'] = 7
                if 'Farm#system_capacity' in probe_dict.keys():
                    probe_dict['Farm#system_capacity'] = np.max(probe_dict['Farm#system_capacity'])  / divisor
                if 'BatteryTools#desired_power' in probe_dict.keys():
                    probe_dict['BatteryTools#desired_power'] = np.max(probe_dict['BatteryTools#desired_power'])  / divisor
                    probe_dict['BatteryTools#desired_capacity'] = np.max(probe_dict['BatteryTools#desired_capacity'])
            optimizer.probe(params=probe_dict, lazy=False)
        
        # --- run optimizer ---
        optimizer.maximize(
            init_points=config.BAYES_INIT_POINTS,
            n_iter=config.BAYES_ITER,
            acq=config.BAYES_ACQ_FUNC,
            **config.BAYES_KWARGS
        )

        # --- rerun best system with no battery ---
        best_params = optimizer.max['params'] 
        if 'BatteryTools#desired_capacity' in best_params.keys(): #rerun system without battery
            if (best_params['BatteryTools#desired_capacity'] > 0) | (best_params['BatteryTools#desired_power'] > 0):
                best_params['BatteryTools#desired_capacity'] = 0
                best_params['BatteryTools#desired_power'] = 0
                optimizer.probe(params=best_params, lazy=False)

        # --- best score ---
        best_score = optimizer.max['target']  # currently unused
        if self._check_if_maximizing(self.opt_var):
            self.best_score = best_score
        else:
            self.best_score = -1 * best_score

        # --- access best params ---
        self.best_params = optimizer.max['params']

        # --- force discrete params ---
        self.best_params = self._force_discrete_bayesian_params(self.best_params)

        # --- convert to nested dict ---
        self.best_params = self._unflatten_param_grid(self.best_params)

        # --- rerun best params ---
        output = self._base_worker(self.best_params)

        # --- Create new outputs ---
        output = self._create_output_metrics(output)

        # --- flatten param grid for df ---
        df_param_grid = self._nested_param_grid_to_df(self.best_params)

        # --- combine flattened param grid with output results ---
        dict_for_df = {**output, **df_param_grid}

        # --- Convert any iterables in dict to str representations ---
        numpy_converted = []
        list_converted = []
        for k,v in dict_for_df.items():
            if isinstance(v, (str, int, float)):
                continue
            elif isinstance(v, (np.ndarray, np.generic)):
                dict_for_df[k] = str(v)
                numpy_converted.append(k)
            elif isinstance(v, (list, tuple)):
                dict_for_df[k] = str(v)
                list_converted.append(k)

        # --- convert to df ---
        self.best_df = pd.DataFrame(dict_for_df, index=[self.opt_var])

        # --- convert columns back to iterables ---
        for c in self.best_df.columns:
            try:
                if c in numpy_converted:
                    self.best_df[c] = [np.fromstring(i[1:-1], dtype=np.int, sep=' ') for i in list(self.best_df[c])]
                elif c in list_converted:
                    self.best_df[c] = [ast.literal_eval(i) for i in list(self.best_df[c])]
                else:
                    continue
            except Exception as e:
                # log.warning(f'Warning! Error converting {c} back to iterable representation')
                pass

        # --- add entire param grid ---
        self.best_df['system_config'] = [self.best_params]
Example #36
0
    featimpmean = featimpmean.fillna(1. / featimp.shape[1])
    normalization = featimpmean[chosen].sum() / featimpmean.sum() / np.sum(
        fscore.values())
    for k, v in fscore.iteritems():
        fscores[k] += normalization * v

    idx = round(1000 * (np.log(2) - s), scoredp)
    featimp = featimp.append(pd.Series(fscores, name=idx))
    return idx


while True:
    init_points = args.init
    n_iter = args.iter
    scaledrange = {k: (0, 1) for k in p_range.keys()}
    bo = BayesianOptimization(score, scaledrange)
    if p:
        bo.initialize({
            k: {
                pk: (pv - p_range[pk][0]) / (p_range[pk][1] - p_range[pk][0])
                for pk, pv in param.iteritems()
            }
            for k, param in p.iteritems()
        })
    else:
        init_points, n_iter = 5, 0
    if not args.trunc:
        bo.maximize(init_points=init_points, n_iter=n_iter, acq=args.acq)
        featimp_cur = featimp
        p_new = {}
        for i in xrange(len(bo.Y)):
Example #37
0
# test data
test_id = test.id.values
test = test.drop('id', axis=1)

# convert to xgb
xgb_train = xgb.DMatrix(train, label=train_labels)

# calling starts here

# parameters to optimize with ranges
xgb_bayes = BayesianOptimization(
    xgb_cv, {
        'max_depth': (2, 12),
        'gamma': (0.001, 10.0),
        'min_child_weight': (0, 20),
        'max_delta_step': (0, 10),
        'subsample': (0.4, 1.0),
        'colsample_bytree': (0.4, 1.0)
    })

# explore from range of values to try
xgb_bayes.explore({
    'max_depth': [3, 8, 3, 8, 8, 3, 8, 3],
    'gamma': [0.5, 8, 0.2, 9, 0.5, 8, 0.2, 9],
    'min_child_weight': [0.2, 0.2, 0.2, 0.2, 12, 12, 12, 12],
    'max_delta_step': [1, 2, 2, 1, 2, 1, 1, 2],
    'subsample': [0.6, 0.8, 0.6, 0.8, 0.6, 0.8, 0.6, 0.8],
    'colsample_bytree': [0.6, 0.8, 0.6, 0.8, 0.6, 0.8, 0.6, 0.8]
})
Example #38
0
def Catboost_tuning(X_train, y_train, kfold=6):
    '''
    Catboost model hyperparameters tuning, use baye_opt to cross-validate entire training dataset.
    @ tuning hyperparameters:
        one_hot_max_size: if required int
        depth: 6 ~ 10 int
        l2_leaf_reg: positive value 1 ~ 30
        random_strength: 1 ~ 30
        bagging_temperature: 0 ~ 1000
        
    @ default hyperparameters:
        NUMER_OF_TREES:
            iterations: 10000
            use_best_model = True
            eval_metric = 'RMSE'
            eval_set = Pool()
        learning_rate = 0.02
        border_count = 254
        
    Parameters
    ----------
    X_train: feature dataframe
    
    y_train: target series
    
    Return
    ------
    dict: diction of tuning hyperparameters of Catboost
    '''
    
    from catboost import train, Pool
    from sklearn.model_selection import KFold
    import numpy as np
    import gc
    from bayes_opt.observer import JSONLogger
    from bayes_opt.event import Events
    from bayes_opt import BayesianOptimization
    
    X_train = X_train
    y_train = y_train
    features = [feature for feature in X_train.columns \
                if feature not in ['card_id', 'first_active_month']]
    categorical_features = [feature for feature in features \
                            if 'feature_' in feature]
    folds = KFold(n_splits=kfold, shuffle=True, random_state=133)
    catboost_opt_params = {
        'one_hot_max_size': (0, 6),
        'depth': (5, 11),
        'l2_leaf_reg': (1, 30),
        'random_strength': (1, 30),
        'bagging_temperature': (0, 1000)
    }
    
    def cv_helper(one_hot_max_size,\
                  depth,\
                  l2_leaf_reg,\
                  random_strength,\
                  bagging_temperature):
        
        # entire date for evaluate clf training performance
        all_data = Pool(data=X_train[features],\
                        label=y_train,\
                        cat_features=categorical_features)
        # validation RMSE
        RMSE = []
        
        for train_idxs, val_idxs in folds.split(X_train.values, y_train.values):
            
            # training set
            train_data = Pool(data=X_train.iloc[train_idxs][features],\
                              label=y_train.iloc[train_idxs],\
                              cat_features=categorical_features)

            # validation set
            val_data = Pool(data=X_train.iloc[val_idxs][features],\
                            label=y_train.iloc[val_idxs],\
                            cat_features=categorical_features)
            # hyperparameters
            params = {
                'eval_metric': 'RMSE',
                'use_best_model': True,
                'loss_function': 'RMSE',
                'learning_rate': 0.02,
                'early_stopping_rounds': 400,
                'border_count': 254,
                'task_type': 'GPU',
                'one_hot_max_size': int(one_hot_max_size),
                'depth': int(depth),
                'l2_leaf_reg': l2_leaf_reg,
                'random_strength': random_strength,
                'bagging_temperature': bagging_temperature
            }
            
            # classifier
            clf = train(pool=train_data,\
                        params=params,\
                        verbose=200,\
                        iterations=10000,\
                        eval_set=all_data)
            
            # add current fold RMSE on all_data
            RMSE.append(clf.best_score_['validation_0']['RMSE'])
            
        return -np.mean(np.array(RMSE))
    
    logger = JSONLogger(path="bayes_opt_log/catBoost_logs.json")
    CAT_bayes_opt = BayesianOptimization(cv_helper, pbounds=catboost_opt_params)
    CAT_bayes_opt.subscribe(Events.OPTMIZATION_STEP, logger)
    CAT_bayes_opt.maximize(init_points=4,\
                       n_iter=20,\
                       acq='ei',\
                       xi=0.0)
    
    return CAT_bayes_opt.max['params']
Example #39
0
@author: jd1336
"""

import numpy as np
from bayes_opt import BayesianOptimization


def camel6(x, vae=0):
    # min is -1.0316 (0.0898,-0.7126) and (-0.0898,0.7126); [-3,3,[-2,2]]

    x1, x2 = x[0], x[1]
    f1 = (4.0 - 2.1 * x1 ** 2 + (x1 ** 4) / 3.0) * (x1 ** 2) + (x1 * x2) + (-4 + 4 * (x2 ** 2)) * (x2 ** 2)
    return -f1


def branin(x, vae=0):
    #    print(x)
    x1, x2 = x[0], x[1]
    a, b, c = 1, 5.1 / (4 * np.pi ** 2), 5 / np.pi
    r, s, t = 6, 10, 1 / (8 * np.pi)
    return -(a * (x2 - b * x1 ** 2 + c * x1 - r) ** 2 + s * (1 - t) * np.cos(x1) + s)


parUnknownId = [1, 2]
bounds = [(-5, 5) for i in parUnknownId]
parUnknownId = [str(i) for i in parUnknownId]

gp_surr = BayesianOptimization(camel6,
                               dict(zip(parUnknownId, bounds)), 0, 0)
gp_surr.maximize(init_points=10, n_iter=100, acq='ei')
Example #40
0
def main():
    bo = BayesianOptimization(target, {'x1': (-2, 10), 'x2': (-2, 10)})
    bo.maximize(init_points=2, n_iter=30, acq='ucb', kappa=5)
    cv_test = []
    for i, (train_index, test_index) in enumerate(
            cv.split(train, groups=train['user_id'].values)):
        cv_train.append(train.iloc[train_index])
        cv_test.append(train.iloc[test_index])
    del cv, train_index, test_index
    gc.collect()
    cv_mean_f1 = Parallel(n_jobs=3, temp_folder='/data/tmp/')(
        delayed(lgb_cv)(tra, tes, params, low_bound, topk, idx)
        for tra, tes, idx in zip(cv_train, cv_test, [1, 2, 3, 4, 5]))
    del cv_train, cv_test
    gc.collect()
    return 100 * np.mean(cv_mean_f1)
    # return np.mean(cv_test_auc)


lgbBO = BayesianOptimization(
    lgb_evaluate,
    {
        'num_leaves': (64, 256),
        'max_depth': (7, 12),
        'min_data_in_leaf': (10, 100),
        'feature_fraction': (0.6, 1),
        'bagging_freq': (5, 20),
        'bagging_fraction': (0.6, 1),
        # f1 params
        'low_bound': (0, 0.1),
        'topk': (80, 100)
    })

lgbBO.maximize(init_points=init_points, n_iter=num_iter)
Example #42
0
class XgboFitter:
    def __init__(self,
                 outdir,
                 hyperparamsetting,
                 randomstate=2019,
                 maxrounds=3000,
                 minrounds=3,
                 earlystoprounds=30,
                 nthread=16,
                 doregression=False,
                 useeffrms=True,
                 usegpu=False):
        assert (hyperparamsetting and isinstance(hyperparamsetting, dict))

        self.outdir_ = outdir  # results caching
        self.hyperparamdefault_ = {
            k: v['default']
            for k, v in hyperparamsetting.items()
        }  # default hyperparameter setting for xgboost
        self.hyperparamranges_ = {
            k: tuple(v['range'])
            for k, v in hyperparamsetting.items()
        }  # hyperparameter ranges to be optimized
        self.hyperparamloguniform_ = \
            [k for k in hyperparamsetting if hyperparamsetting[k]['loguniform']] # hyperparameter names whose value will be sampled in a log-uniform way
        self.randomstate_ = randomstate
        self.maxrounds_ = maxrounds
        self.minrounds_ = minrounds
        self.earlystoprounds_ = earlystoprounds
        self.doregression_ = doregression
        self.useeffrms_ = useeffrms

        self.params_ = {
            'silent': 1,
            'verbose_eval': 0,
            'nthread': nthread,
            'objective': 'reg:linear',
        }
        self.cvcolumns_ = []  # sequence matters
        self.cvresults_ = []  # holding result of each cross validation
        self.cviter_ = 0  # number of cross validation performed

        if usegpu:  # enable GPU acceleration
            self.params_.update({
                "tree_method": "gpu_hist",
            })

        ## setting cvresults subdir
        if not os.path.exists(join(self.outdir_, 'cvresults')):
            os.makedirs(join(self.outdir_, 'cvresults'))

        if doregression:  # regression task
            self.hyperparamdefault_['base_score'] = 1
            if useeffrms:
                self.cvcolumns_ = [
                    "train-effrms-mean", "train-effrms-std",
                    "test-effrms-mean", "test-effrms-std"
                ]
            else:
                self.cvcolumns_ = [
                    "train-rmse-mean", "train-rmse-std", "test-rmse-mean",
                    "test-rmse-std"
                ]
        else:  # classification task
            self.cvcolumns_ = [
                "train-auc-mean", "train-auc-std", "test-auc-mean",
                "test-auc-std"
            ]

            self.params_.update({
                'objective': 'binary:logitraw',
                'eval_metric': 'auc',
            })

        self.earlystophistory_ = []
        self.models_ = {}
        self.callbackstatus_ = []
        self.trieddefault_ = False

        ## optimizer
        self.optimizer_ = BayesianOptimization(self.evaluate_xgb,
                                               self.hyperparamranges_,
                                               self.randomstate_)

        ## if trained before, adjust random state and load history
        summaryfile = join(self.outdir_, 'summary.csv')
        if os.path.isfile(summaryfile):
            _df = pd.read_csv(summaryfile)
            self.randomstate_ += len(_df)
            self._load_data(summaryfile)

    def _load_data(self, summaryfile):

        df = pd.read_csv(summaryfile)
        print(
            "Found results of {} optimization rounds in output directory, loading..."
            .format(len(df)))

        self.earlystophistory_.extend(list(df.n_estimators.values))
        self.callbackstatus_.extend(list(df.callback.values))
        self.trieddefault_ = True

        ## load cross validation results
        for i in range(len(df)):
            cvfile = join(self.outdir_, 'cvresults/{0:04d}.csv'.format(i))
            self.cvresults_.append(pd.read_csv(cvfile))
        self.cviter_ = len(df)

        ## load the optimization results so far into the Bayesian optimization object
        eval_col = self.cvcolumns_[2]
        df['target'] = -df[eval_col] if self.doregression_ else df[eval_col]
        # idx_max, val_max = 0, 0
        # if self.doregression_:
        #     idx_max = df[eval_col].idxmin()
        #     val_max = -df[eval_col].min()
        #     df['target'] = -df[eval_col]
        # else:
        #     idx_max = df[eval_col].idxmax()
        #     val_max = df[eval_col].max()
        #     df['target'] = df[eval_col]

        for idx in df.index:
            value = df.loc[idx, eval_col]
            if self.doregression_: value = -value

            params = df.loc[idx, list(self.hyperparamranges_)].to_dict()
            self.optimizer_.register(params, value)

    def evaluate_xgb(self, **hyperparameters):

        for k in hyperparameters:
            if k in self.hyperparamloguniform_:
                hyperparameters[k] = 10**hyperparameters[k]

        self.params_.update(hyperparameters)
        self.params_ = guardxgbparams(self.params_)

        best_test_eval_metric = -9999999.0
        if self.optimizer_.res:
            self.summary.to_csv(join(self.outdir_, 'summary.csv'))
            best_test_eval_metric = max(
                [d['target'] for d in self.optimizer_.res])

        feval = None  # evaluation function
        callback_status = {'status': 0}

        if self.doregression_ and self.useeffrms_:
            callbacks = [
                early_stop(self.earlystoprounds_,
                           start_round=self.minrounds_,
                           eval_idx=-2),
            ]
            feval = evaleffrms
        else:
            callbacks = [
                early_stop(
                    self.earlystoprounds_,
                    start_round=self.minrounds_,
                ),
                callback_overtraining(best_test_eval_metric, callback_status),
            ]

        cv_result = xgb.cv(self.params_,
                           self.xgtrain_,
                           num_boost_round=self.maxrounds_,
                           nfold=self.nfold_,
                           seed=self.randomstate_,
                           callbacks=callbacks,
                           verbose_eval=50,
                           feval=feval)
        cv_result.to_csv(
            join(self.outdir_, 'cvresults/{0:04d}.csv'.format(self.cviter_)))

        self.cviter_ += 1
        self.earlystophistory_.append(len(cv_result))
        self.cvresults_.append(cv_result)
        self.callbackstatus_.append(callback_status['status'])

        if self.doregression_:
            return -cv_result[self.cvcolumns_[2]].values[-1]
        else:
            return cv_result[self.cvcolumns_[2]].values[-1]

    def optimize(self, xgtrain, init_points=3, n_iter=3, nfold=5, acq='ei'):

        self.nfold_ = nfold
        self.xgtrain_ = xgtrain

        if not self.trieddefault_:
            self.optimizer_.probe(params=list(
                self.hyperparamdefault_.values()),
                                  lazy=False)
            self.trieddefault_ = True

        ## NOTE
        # The following block is mostly equivalent to
        #   self.optimizer_.maximize(init_points=init_points, n_iter=n_iter, acq=acq)
        # but saving summary file after each hyperparameter point probed,
        # in case program stopped, and we want to reload and continue next time.
        self.optimizer_.maximize(init_points=init_points, n_iter=0, acq=acq)

        for i in range(n_iter):
            self.optimizer_.maximize(init_points=0, n_iter=1, acq=acq)
            self.summary.to_csv(join(self.outdir_, 'summary.csv'))
        self.summary.to_csv(join(self.outdir_, 'summary.csv'))

    def fit(self, xgtrain, model='optimized'):

        params = self.params_

        if model == 'default':
            params.update(self.hyperparamdefault_)
            params['n_estimators'] = self.earlystophistory_[0]

        if model == 'optimized':
            idxmax = np.argmax([d['target'] for d in self.optimizer_.res])
            params.update(guardxgbparams(
                self.optimizer_.res[idxmax]['params']))
            for k in params:
                if k in self.hyperparamloguniform_:
                    params[k] = 10**params[k]
            params['n_estimators'] = self.earlystophistory_[idxmax]

        self.models_[model] = xgb.train(params,
                                        xgtrain,
                                        params['n_estimators'],
                                        verbose_eval=50)

    def predict(self, xgtest, model='optimized'):
        return self.models_[model].predict(xgtest)

    @property
    def summary(self):

        res = [dict(d) for d in self.optimizer_.res
               ]  # [{'target': float, 'params': dict}, ]
        for d in res:
            d['params'] = guardxgbparams(d['params'])

        data = {}
        for name in self.cvcolumns_:
            data[name] = [r[name].values[-1] for r in self.cvresults_]
        for hp in self.hyperparamranges_:
            data[hp] = [r['params'][hp] for r in res]
        data['n_estimators'] = self.earlystophistory_
        data['callback'] = self.callbackstatus_

        return pd.DataFrame(data)

    def save_model(self, feature_names, model='optimized'):

        modeldir = join(self.outdir_, 'model_' + model)
        print("saving {} model --> {}".format(model, modeldir))

        if not os.path.exists(modeldir):
            os.makedirs(modeldir)

        self.models_[model].dump_model(join(modeldir,
                                            'dump.raw.txt'))  # dump text
        self.models_[model].save_model(join(modeldir,
                                            'model.bin'))  # save binary

        tmvafile = join(modeldir, 'weights.xml')
        try:
            convert_model(self.models_[model].get_dump(),
                          input_variables=[(n, 'F') for n in feature_names],
                          output_xml=tmvafile)
            os.system("xmllint --format {0} > {0}.tmp".format(tmvafile))
            os.system("mv {0} {0}.bak".format(tmvafile))
            os.system("mv {0}.tmp {0}".format(tmvafile))
            os.system("gzip -f {0}".format(tmvafile))
            os.system("mv {0}.bak {0}".format(tmvafile))
        except:
            warnings.warn("\n".join([
                "Warning:",
                "Saving model<{}> in TMVA XML format failed.".format(model),
                "Don't worry now, you can still convert xgboost model later."
            ]))
Example #43
0
        # Bounded region of parameter space
        pbounds = {
            'pp': (pp, pp),
            'seqlength': (1, 52),
            'densesize': (1, 256),
            'batchsize': (16, 256),
            'filters': (1, 256),
            'rH': (0, 1),
            'T': (0, 1),
            'Tsin': (0, 1),
        }  #constrained optimization technique, so you must specify the minimum and maximum values that can be probed for each parameter

        optimizer = BayesianOptimization(
            f=bayesOpt_function,  #function that is optimized
            pbounds=pbounds,  #opt.-range of parameters
            random_state=1,
            verbose=
            0  # verbose = 1 prints only when a maximum is observed, verbose = 0 is silent, verbose = 2 prints everything
        )

        #load existing optimizer
        log_already_available = 0
        if os.path.isfile("./logs_CNN_seq2seq_GWLt-1_" + Well_ID + ".json"):
            load_logs(optimizer,
                      logs=["./logs_CNN_seq2seq_GWLt-1_" + Well_ID + ".json"])
            print("\nExisting optimizer is already aware of {} points.".format(
                len(optimizer.space)))
            log_already_available = 1

        # Saving progress
        logger = newJSONLogger(path="./logs_CNN_seq2seq_GWLt-1_" + Well_ID +
Example #44
0
# Bounded region of parameter space
bounds_LGB = {
    'num_leaves': (5, 20),
    'min_data_in_leaf': (5, 20),
    'learning_rate': (0.01, 0.3),
    'min_sum_hessian_in_leaf': (0.00001, 0.01),
    'feature_fraction': (0.05, 0.5),
    'lambda_l1': (0, 5.0),
    'lambda_l2': (0, 5.0),
    'min_gain_to_split': (0, 1.0),
    'max_depth': (3, 15),
}

from bayes_opt import BayesianOptimization

LGB_BO = BayesianOptimization(LGB_bayesian, bounds_LGB, random_state=13)

init_points = 5
n_iter = 5

with warnings.catch_warnings():
    warnings.filterwarnings('ignore')
    LGB_BO.maximize(init_points=init_points,
                    n_iter=n_iter,
                    acq='ucb',
                    xi=0.0,
                    alpha=1e-6)

print(LGB_BO.max['target'])

print(LGB_BO.max['params'])
Example #45
0
def gekko_bayesian(strategy):
    print("")
    global Strategy
    Strategy = strategy

    TargetParameters = getSettings()["strategies"][Strategy]
    TargetParameters = promoterz.parameterOperations.parameterValuesToRangeOfValues(
        TargetParameters, bayesconf.parameter_spread)

    print("Starting search %s parameters" % Strategy)
    bo = BayesianOptimization(gekko_search, copy.deepcopy(TargetParameters))

    # 1st Evaluate
    print("")
    print("Step 1: BayesianOptimization parameter search")
    bo.maximize(init_points=settings['init_points'],
                n_iter=settings['num_iter'])
    max_val = bo.res['max']['max_val']
    index = all_val.index(max_val)
    s1 = stats[index]

    # 2nd Evaluate
    print("")
    print("Step 2: testing searched parameters on random date")

    max_params = bo.res['max']['max_params'].copy()
    #max_params["persistence"] = 1
    print("Starting Second Evaluation")
    gekko_search(**max_params)
    s2 = stats[-1]

    # 3rd Evaluate
    print("")
    print("Step 3: testing searched parameters on new date")
    watch = settings["watch"]
    print(max_params)
    result = Evaluate(Strategy, max_params)

    resultjson = expandGekkoStrategyParameters(max_params,
                                               Strategy)  #[Strategy]
    s3 = result
    # config.js like output
    percentiles = np.array([0.25, 0.5, 0.75])
    formatted_percentiles = [
        str(int(round(x * 100))) + "%" for x in percentiles
    ]
    stats_index = (['count', 'mean', 'std', 'min'] + formatted_percentiles +
                   ['max'])
    print("")
    print("// " + '-' * 50)
    print("// " + Strategy + ' Settings')
    print("// " + '-' * 50)
    print("// 1st Evaluate: %.3f" % s1[1])
    for i in range(len(s1)):
        print('// %s: %.3f' % (stats_index[i], s1[i]))
    print("// " + '-' * 50)
    print("// 2nd Evaluate: %.3f" % s2[1])
    for i in range(len(s2)):
        print('// %s: %.3f' % (stats_index[i], s2[i]))
    print("// " + '-' * 50)
    print("// 3rd Evaluted: %f" % s3)
    print("// " + '-' * 50)
    print("config.%s = {%s};" %
          (Strategy, json.dumps(resultjson, indent=2)[1:-1]))
    print('\n\n')
    print(resultInterface.parametersToTOML(resultjson))
    print("// " + '-' * 50)

    return max_params
Example #46
0
# setting for BO

# aruguments setting
param = {'lsigma_f':  (-2, 2),
         'lsigma_s':  (-2, 2),
         'lsigma_t':  (-2, 2),
         'l_corr':  (10, 1000),
         't_corr':  (1, 20)}
for key, value in json.loads(args.range).items():
    param[key] = value
print("Prameter ranges as below:")
if not args.full:
    del param['l_corr']
    del param['t_corr']
print(param)

# preparation of data_manager
dm = data_manupulation.impute_shield_dm(100)
gene_df = pd.read_csv(args.genefile)
selected_gene_df = dm.select_gene_df(gene_df)
dm = data_manupulation.standard_dm(args.refnum)

# BO estimation
bo = BayesianOptimization(ts_recovery_correlation, param)
bo.maximize(init_points=5, n_iter=args.boiter)
print(bo.res['max'])
# Out put
f = open(args.filepath, "w")
json.dump(bo.res['max'], f)
    # model = FlowlineModel(final_flowline.fls[-1], mb_model=mb_model, y0=0)
    model = FlowlineModel(VerticalWallFlowline(surface_h=bed_h, bed_h=bed_h,
                                               widths=np.zeros(200) + 3.,
                                               map_dx=100), mb_model=mb_model,
                                               y0=0)
    model.run_until(time)
    flowline = model.fls[-1]

    new_mb_model = LinearMassBalanceModel(3000, grad=4)
    new_model = FlowlineModel(flowline, mb_model=new_mb_model, y0=0)
    new_model.run_until(150)
    return -sum(
        abs(final_flowline.fls[-1].surface_h - new_model.fls[-1].surface_h))


bo = BayesianOptimization(target,
                          {'ela': (2500,3500), 'time': (0, 200)})

# One of the things we can do with this object is pass points
# which we want the algorithm to probe. A dictionary with the
# parameters names and a list of values to include in the search
# must be given.
bo.explore({'ela': [3000, 2750], 'time': [0,150]})

# Additionally, if we have any prior knowledge of the behaviour of
# the target function (even if not totally accurate) we can also
# tell that to the optimizer.
# Here we pass a dictionary with 'target' and parameter names as keys and a
# list of corresponding values
#bo.initialize()

# Once we are satisfied with the initialization conditions
Example #48
0
from bayes_opt import BayesianOptimization
'''
Example of how to use this bayesian optimization package.
'''

# Lets find the maximum of a simple quadratic function of two variables
# We create the bayes_opt object and pass the function to be maximized
# together with the parameters names and their bounds.
bo = BayesianOptimization(lambda x, y: -x**2 - (y - 1)**2 + 1,
                          {'x': (-4, 4), 'y': (-3, 3)})

# One of the things we can do with this object is pass points
# which we want the algorithm to probe. A dictionary with the
# parameters names and a list of values to include in the search
# must be given.
bo.explore({'x': [-1, 3], 'y': [-2, 2]})

# Additionally, if we have any prior knowledge of the behaviour of
# the target function (even if not totally accurate) we can also
# tell that to the optimizer.
# Here we pass a dictionary with target values as keys of another
# dictionary with parameters names and their corresponding value.
bo.initialize({-2: {'x': 1, 'y': 0}, -1.251: {'x': 1, 'y': 1.5}})

# Once we are satisfied with the initialization conditions
# we let the algorithm do its magic by calling the maximize()
# method.
bo.maximize(init_points=15, n_iter=25)

# The output values can be accessed with self.res
print(bo.res['max'])
Example #49
0
    def construct_rf_classifier(self, train, features, label_col):
        data = train[features]
        target = train[label_col]

        def rfcv(nr_classifiers, max_depth, min_samples_leaf, bootstrap,
                 criterion, max_features):
            nr_classifiers = int(nr_classifiers)
            max_depth = int(max_depth)
            min_samples_leaf = int(min_samples_leaf)
            if np.round(bootstrap):
                bootstrap = True
            else:
                bootstrap = False
            if np.round(criterion):
                criterion = 'gini'
            else:
                criterion = 'entropy'
            if np.round(max_features):
                max_features = None
            else:
                max_features = 1.0

            return cross_val_score(RandomForestClassifier(
                n_estimators=nr_classifiers,
                max_depth=max_depth,
                min_samples_leaf=min_samples_leaf,
                bootstrap=bootstrap,
                criterion=criterion,
                max_features=max_features),
                                   data,
                                   target,
                                   'accuracy',
                                   cv=5).mean()

        params = {
            'nr_classifiers': (10, 1000),
            'max_depth': (5, 10),
            'min_samples_leaf': (2, 10),
            'bootstrap': (0, 1),
            'criterion': (0, 1),
            'max_features': (0, 1)
        }

        rfBO = BayesianOptimization(rfcv, params, verbose=0)
        rfBO.maximize(init_points=10, n_iter=20, n_restarts_optimizer=50)

        best_params = rfBO.res['max']['max_params']

        best_nr_classifiers = int(best_params['nr_classifiers'])
        self.nr_clf = best_nr_classifiers
        best_max_depth = int(best_params['max_depth'])
        best_min_samples_leaf = int(best_params['min_samples_leaf'])
        best_bootstrap = best_params['bootstrap']
        best_criterion = best_params['criterion']
        best_max_features = best_params['max_features']

        if np.round(best_bootstrap):
            best_bootstrap = True
        else:
            best_bootstrap = False
        if np.round(best_criterion):
            best_criterion = 'gini'
        else:
            best_criterion = 'entropy'
        if np.round(best_max_features):
            best_max_features = None
        else:
            best_max_features = 1.0

        self.clf = RandomForestClassifier(
            n_estimators=best_nr_classifiers,
            max_depth=best_max_depth,
            min_samples_leaf=best_min_samples_leaf,
            bootstrap=best_bootstrap,
            criterion=best_criterion,
            max_features=best_max_features)
        start = time.time()
        self.clf.fit(data, target)
        self.time = time.time() - start
                            , w_x_d_y = 100 # 86
                            , w_x_t_y = 100 # 91
                            )
                              
    print "5. Execute Bayesian parameter optimization to select feature weights..."   
    
    ### Bayesian Optimization of Parameters ### 
    f = functools.partial(process_grid, train, test, threshold = nn_threshold, grid_ids = grid_ids, model = model_nn, grid_variable = grid_variable) #, weights = feature_weights)
    bo = BayesianOptimization(f=f,
                                  pbounds={
                                      'w_x': (80, 200), # (100, 1000)
                                      'w_y': (50, 150),  # (500, 2000)
                                      "w_hour": (50, 150), # (1, 10)
                                      "w_weekday": (20, 60), # (1, 10)
                                      "w_day_of_month": (20, 100), # (1,10)
                                      "w_month": (20, 80), # (1,10)
                                      "w_year": (0, 50), # (2,20)
                                      "w_accuracy": (1, 5), # (3,30)
                                      "w_x_d_y": (70, 200), # (3,30)
                                      "w_x_t_y": (70, 200) # (3,30)
                                      },
                                  verbose=True
                                  )
    
    bo.maximize(init_points = 2, n_iter = 1, acq = "ei", xi = 1.0) # 0.1
    for i in range(300):
        bo.maximize(n_iter = 1, acq = "ei", xi = 1.0) # exploration points
        bo.maximize(n_iter = 1, acq = "ei", xi = 1.0) # exploitation points
        
    print "6. Complete!!!" 
 
Example #51
0
    xtest = pd.read_csv('../input2/xtest_'+ dataset_version + '.csv')
    id_test = xtest.ID
    xtest.drop('ID', axis = 1, inplace = True)

    # folds
    xfolds = pd.read_csv('../input/xfolds.csv')
    # work with validation split
    idx0 = xfolds[xfolds.valid == 0].index
    idx1 = xfolds[xfolds.valid == 1].index
    x0 = xtrain[xtrain.index.isin(idx0)]
    x1 = xtrain[xtrain.index.isin(idx1)]
    y0 = ytrain[ytrain.index.isin(idx0)]
    y1 = ytrain[ytrain.index.isin(idx1)]

    extratreesBO = BayesianOptimization(extratreescv,
                                        {'n_estimators': (int(250), int(2000)),
                                         'min_samples_split': (int(2), int(6)),
                                         'min_samples_leaf': (int(1), int(6)),
                                         'max_features': (int(15), int(100)),
                                         'max_depth': (int(25) , int(50)),
                                         'min_weight_fraction_leaf': (0, 0.01),
                                         })


    extratreesBO.maximize(init_points=5, n_iter=50, acq='ei')
    print('-' * 53)

    print('Final Results')
    print('Extra Trees: %f' % extratreesBO.res['max']['max_val'])
    print(extratreesBO.res['max']['max_params'])
Example #52
0
	args.experiment = sanitise_for_mongo(args.experiment)

	metadata = init_dataset(args.dataset)

	if not "surprise samples" in metadata:
		metadata["surprise_samples"] = 100000

	score_method = "plausibility"
	surprise_depth=2
	steps = 1000
	init_steps = 100
	recipes_per_step = 5
	lower_bound_plaus=False
	num_sigma_range = 5
	k=3

	gp_params = {'corr':'absolute_exponential','nugget': 1}
	model = init_model(args.dataset, metadata, args.model, surprise_depth, args.experiment)


	params = {}
	for p in range(model.model.nhid):
		params["x_"+str(p)] = (-num_sigma_range,num_sigma_range)
	bo = BayesianOptimization(lambda **param_args : wrap_plausibility_and_surprise([param_args[p] for p in param_args], model=model, plausibility_dist=metadata["experiments"][args.experiment]["plausibility_distribution"], weight_by_length=False, errors_by_length=metadata["experiments"][args.experiment]["errors_by_length"],from_visible=False, feature_list=metadata["fields_x"], use_lower_bound=lower_bound_plaus, surprise_dist=metadata["experiments"][args.experiment]["surprise_distribution"], surprise_depth=surprise_depth),params)

	bo.maximize(init_points=init_steps, n_iter=0, kappa=k)
	for step in range(steps):
		bo.maximize(init_points=0, n_iter=recipes_per_step, kappa=k, **gp_params)
		for r,v in zip(bo.X[-recipes_per_step:],bo.Y[-recipes_per_step:]):
			print model.positive_features_from_design_vector(model.construct_from_hidden(np.atleast_2d(np.array(r)))[0].tolist()[0]).keys(),"({0:.4f})".format(v)
def kNNOptimize(train_set, test_set, njobs, ijob):

    delta_x = 10. / NBINS_X
    delta_y = 10. / NBINS_Y

    NBINS_TOTAL = NBINS_X * NBINS_Y
    ijob_bins = np.array_split(np.arange(NBINS_TOTAL), njobs)[ijob]

    for i_bin in ijob_bins:

        bin_filename = 'knn_bayes/{0:05d}_{1:02d}_{2:02d}.json'.format(
            i_bin, NBINS_X, NBINS_Y)
        if os.path.isfile(bin_filename):
            continue

        y_lower = int(i_bin / NBINS_X) * delta_y
        x_lower = (i_bin % NBINS_X) * delta_x

        x_upper = x_lower + delta_x
        y_upper = y_lower + delta_y

        # this block is needed because some points fall on the right or
        # top boundary of the domain exactly.
        if x_upper == 10.:
            x_upper += 1.0e-5
        if y_upper == 10.:
            y_upper += 1.0e-5

        initial_points = {"cut_threshold": (5, 7),
                          "w_x": (450, 550),
                          "w_y": (1050, 950),
                          "w_hour": (4, 2),
                          "w_log10acc": (10, 10),
                          "w_weekday": (2, 3),
                          "w_year": (9, 11),
                          "n_neighbors": (20, 25),
                          "margin": (0.02, 0.03)
                          }

        f = functools.partial(validation_map3_kNN,
                              train_set=train_set,
                              xlower=x_lower, xupper=x_upper,
                              ylower=y_lower, yupper=y_upper)
        bo = BayesianOptimization(f=f,
                                  pbounds={"cut_threshold": (3, 12),
                                           "w_x": (250, 1000),
                                           "w_y": (500, 2000),
                                           "w_hour": (1, 10),
                                           "w_log10acc": (5, 30),
                                           "w_weekday": (1, 10),
                                           "w_year": (2, 20),
                                           "n_neighbors": (10, 40),
                                           "margin": (0.01, 0.04)
                                           },
                                  verbose=True)

        # this little bit of code allows seeding of the bayesian optimizer
        # with a few points that you already know are decent parameter values.
        # initial points are based off @Sandro's kNN script.
        #
        # seed the bayesian optimizer with a couple of points.
        bo.explore(initial_points)

        # For some reason that I don't understand, the Bayesian optimizer slows
        # down greatly after 64 iterations. So to be more computationally
        # efficient, limit it to 64.

        # explore the space (xi=0.1)
        # 2 custom (above), 5 initial (implied), 25 exploration  = 32 total
        bo.maximize(n_iter=25, acq="ei", xi=0.1)

        # exploit the peaks for the other 32 iterations (xi=0.)
        bo.maximize(n_iter=32, acq="ei", xi=0.0)

        optimizer_output = bo.res['all']
        optimizer_output['max'] = bo.res['max']

        optimizer_output['i_bin'] = i_bin
        optimizer_output['nx'] = NBINS_X
        optimizer_output['ny'] = NBINS_Y
        optimizer_output['x_lower'] = x_lower
        optimizer_output['y_lower'] = y_lower
        optimizer_output['x_upper'] = x_upper
        optimizer_output['y_upper'] = y_upper

        with open(bin_filename, 'w') as fh:
            fh.write(json.dumps(optimizer_output, sort_keys=True,
                                indent=4, separators=(',', ': ')))
Example #54
0
            min_samples_split=int(min_samples_split),
            max_features=min(max_features, 0.999),  # float
            max_depth=int(max_depth),
            random_state=2),
        X_train,
        y_train,
        scoring='r2',
        cv=5).mean()
    return val


#### Run Bayesian optimization
rf_bo = BayesianOptimization(
    rf_cv, {
        'n_estimators': (10, 250),
        'min_samples_split': (2, 25),
        'max_features': (0.1, 0.999),
        'max_depth': (5, 15)
    })

rf_bo.probe({
    'n_estimators': [100, 200, 300],
    'min_samples_split': [2, 10, 20],
    'max_features': [0.1, 0.5, 0.9],
    'max_depth': [10, 15, 25]
})

rf_bo.maximize()

#### output the optimal hyperparameters
rf_bo.max
    doc_name_save = data['doc_name']
    paragraph_nb_save = data['paragraph_nb']
    firstname_is_french_save = data['firstname_is_french']
    
    data = data[useful_col]
    y = data['is_target']
    data = data.drop('is_target', axis=1)
    X = data
    
    ratio = float(np.sum(y == 0)) / np.sum(y==1) # 131.708

    xgboostBO = BayesianOptimization(xgboostcv,
                                     {'max_depth': (5, 8),
                                      'learning_rate': (0.01, 0.3),
                                      'n_estimators': (150, 300),
#                                      'gamma': (1., 0.01),
#                                      'min_child_weight': (1, 10),
#                                      'max_delta_step': (0, 0.1),
                                      'subsample': (0.85, 1),
                                      'colsample_bytree' :(0.5, 1),
#                                      'scale_pos_weight' : ratio
                                     })
                                     
    xgboostBO.maximize(init_points=10, n_iter=40)
    print('-'*53)

    print('Final Results')
    print('XGBOOST: %f' % xgboostBO.res['max']['max_val'])
    
    print('-'*53)
    print xgboostBO.res['max']
Example #56
0
                                         max_delta_step=max_delta_step,
                                         subsample=subsample,
                                         colsample_bytree=colsample_bytree,
                                         seed=seed,
                                         objective="multi:softprob"),
                           train,
                           labels,
                           "log_loss",
                           cv=5).mean()

if __name__ == "__main__":
    # Load data set and target values
    train, labels, test, _, _ = load_data()

    xgboostBO = BayesianOptimization(xgboostcv,
                                     {'max_depth': (5, 10),
                                      'learning_rate': (0.01, 0.3),
                                      'n_estimators': (50, 1000),
                                      'gamma': (1., 0.01),
                                      'min_child_weight': (2, 10),
                                      'max_delta_step': (0, 0.1),
                                      'subsample': (0.7, 0.8),
                                      'colsample_bytree': (0.5, 0.99)
                                      })

    xgboostBO.maximize()
    print('-' * 53)

    print('Final Results')
    print('XGBOOST: %f' % xgboostBO.res['max']['max_val'])
def optimize_lgbm_params(train_df, target_df):
    """Apply Bayesian Optimization to LightGBM parameters
        Args:
            train_df(pd.DataFrame): Training data
            target_df(pd.Series):  Target/ Test data

        Returns:
            best_params(dict): Optimized parameters for LGBM
    """
    def _lgbm_evaluate(**params):
        """Wrapper for KFold LGBM parameter evaluation
            Args:
                params(dict): Parameter to evaluate based on LGBM outcome

            Returns:
                roc_auc_score(float): ROC-AUC-value to optimize by Bayesian optimization
        """

        warnings.simplefilter('ignore')
        params['num_leaves'] = int(params['num_leaves'])
        params['max_depth'] = int(params['max_depth'])

        clf = LGBMClassifier(**params, n_estimators=10000, nthread=4)

        folds = KFold(n_splits=2, shuffle=True, random_state=1001)
        test_pred_proba = np.zeros(train_df.shape[0])
        for n_fold, (train_idx,
                     valid_idx) in enumerate(folds.split(train_df, target_df)):
            train_x, train_y = train_df[feats].iloc[train_idx], target_df.iloc[
                train_idx]
            valid_x, valid_y = train_df[feats].iloc[valid_idx], target_df.iloc[
                valid_idx]

            clf.fit(train_x,
                    train_y,
                    eval_set=[(train_x, train_y), (valid_x, valid_y)],
                    eval_metric='auc',
                    verbose=False,
                    early_stopping_rounds=100)

            test_pred_proba[valid_idx] = clf.predict_proba(
                valid_x, num_iteration=clf.best_iteration_)[:, 1]

            del train_x, train_y, valid_x, valid_y
            gc.collect()

        return roc_auc_score(target_df, test_pred_proba)

    # parameter ranges for optimization
    params = {
        'colsample_bytree': (0.8, 1),
        'learning_rate': (.015, .025),
        'num_leaves': (33, 35),
        'subsample': (0.8, 1),
        'max_depth': (7, 9),
        'reg_alpha': (.03, .05),
        'reg_lambda': (.06, .08),
        'min_split_gain': (.01, .03),
        'min_child_weight': (38, 40)
    }

    bo = BayesianOptimization(_lgbm_evaluate, params)
    bo.maximize(init_points=5, n_iter=5)

    best_params = bo.max['params']
    best_params['n_estimators'] = 10000
    best_params['nthread'] = 4
    best_params['num_leaves'] = int(best_params['num_leaves'])
    best_params['max_depth'] = int(best_params['max_depth'])

    return best_params
Example #58
0
    # work with validation split
    idx0 = np.where(fold_index != 1)
    idx1 = np.where(fold_index == 1)
    x0 = np.array(xtrain)[idx0,:][0]
    x1 = np.array(xtrain)[idx1,:][0]
    y0 = np.array(ytrain)[idx0]
    y1 = np.array(ytrain)[idx1]

    nb_classes = 2
    dims = xtrain.shape[1]
    print(dims, 'dims')

    kerasBO = BayesianOptimization(kerascv,
                                   {'dense1': (int(0.15 * xtrain.shape[1]), int(2 * xtrain.shape[1])),
                                    'dropout1': (0.05, 0.5),
                                    'dense2': (int(0.15 * xtrain.shape[1]), int(2 * xtrain.shape[1])),
                                    'dropout2': (0.05, 0.5),
                                    'epochs': (int(20), int(150))
                                    })
    kerasBO.explore({'dense1': [int(0.15 * xtrain.shape[1])],
                     'dropout1': [0.05],
                     'dense2': [int(1.5 * xtrain.shape[1])],
                     'dropout2': [0.5],
                     'epochs': [40]})

    kerasBO.maximize(init_points=3, n_iter=25)
    print('-' * 53)

    print('Final Results')
    print('Extra Trees: %f' % kerasBO.res['max']['max_val'])
    print(kerasBO.res['max']['max_params'])
from bayes_opt.logger import JSONLogger
from bayes_opt.event import Events
from bayes_opt import BayesianOptimization

def function_to_be_optimized(batch, epochs, lr, weight_decay, lr_multiplier):
    batch = int(batch)
    epochs = int(epochs)
    return Bayes_Unet(batch, epochs, lr, weight_decay, lr_multiplier)


# Bounded region of parameter space
pbounds = {'batch': (4, 16), 'epochs': (60, 140), 'lr': (1e-6, 1e-2), 'weight_decay': (1e-8, 1e-3), 'lr_multiplier': (0.1, 1)}

hyperparams = BayesianOptimization(
    f=function_to_be_optimized,
    pbounds=pbounds,
    verbose=2,
    random_state=1,
)

logger = JSONLogger(path="/content/drive/MyDrive/UNET/Training models/logs.json")
hyperparams.subscribe(Events.OPTIMIZATION_STEP, logger)

hyperparams.maximize(
    init_points=3,   # 3 random trials
    n_iter=30    # 30 Bayesian steps
)

"""## **Testing + visualization** """

import numpy as np
import matplotlib.pyplot as plt