コード例 #1
0
    def SVC(self):
        def on_step(optim_result):
            score = bayesClf.best_score_
            print("Score: SVC: ", score * 100)
            if score == 1:
                print('Max Score Achieved')
                return True

        bayesClf = BayesSearchCV(SVC(random_state=0),
                                 search_spaceSVC,
                                 n_iter=N_ITER,
                                 cv=CV,
                                 scoring=scoringMetrics,
                                 return_train_score=False)

        bayesClf.fit(self.Xtr, self.ytr, callback=on_step)

        y_pred = bayesClf.best_estimator_.predict(self.Xte)

        metrics = self.calculateMetrics(y_pred)
        logClassifier(SVC(), self.classes, metrics[0], metrics[1], metrics[2],
                      metrics[3], metrics[4], metrics[5], metrics[6],
                      bayesClf.best_params_, scoringMetrics)

        return SVC(**bayesClf.best_params_,
                   probability=True).fit(self.Xtr, self.ytr)
コード例 #2
0
    def LR(self):
        def on_step(optim_result):
            score = bayesClf.best_score_
            print("Score: LR: ", score * 100)
            if score == 1:
                print('Max Score Achieved')
                return True

        bayesClf = BayesSearchCV(LogisticRegression(max_iter=100,
                                                    random_state=0),
                                 search_spaceLR,
                                 cv=CV,
                                 n_iter=N_ITER,
                                 scoring=scoringMetrics,
                                 return_train_score=False)

        bayesClf.fit(self.Xtr, self.ytr, callback=on_step)
        y_pred = bayesClf.best_estimator_.predict(self.Xte)

        metrics = self.calculateMetrics(y_pred)
        logClassifier(LogisticRegression(), self.classes, metrics[0],
                      metrics[1], metrics[2], metrics[3], metrics[4],
                      metrics[5], metrics[6], bayesClf.best_params_,
                      scoringMetrics)

        return LogisticRegression(**bayesClf.best_params_).fit(
            self.Xtr, self.ytr)
コード例 #3
0
class SklearnGeneralModel(ModelBase):
    def __init__(self, is_normalize, model, searchCV=False):
        self.is_normalize = is_normalize
        self.model = model
        self.searchCV = searchCV

    def build_model(self, config_args=None):

        if config_args is None:
            config_args = {}

        if not self.searchCV:
            self.model = self.model(**config_args)
        else:
            self.model = BayesSearchCV(estimator=self.model(), **config_args)

    def train(self, x, y):
        if self.is_normalize:
            self.scaler = Normalizer()
            x = self.scaler.fit_transform(x)

        with warnings.catch_warnings():
            warnings.simplefilter('ignore', UserWarning)
            self.model.fit(x, y)

    def predict(self, x):
        if self.is_normalize:
            x = self.scaler.transform(x)
        return self.model.predict(x)

    def feature_based_metrics(self, columns=None, index=None):
        feature_importance = self.model.best_estimator_.feature_importances_
        feature_importance = feature_importance / np.sum(feature_importance)
        return pd.DataFrame(feature_importance, index=columns, columns=index).T
コード例 #4
0
ファイル: regression.py プロジェクト: aaditis/egem
def train_models(models, params, Xtrain, Ytrain, kfold, filename):
    """
  train_models performs kfold bayesian hyperparameter tuning for different 
  models, and saves the output for model persistence.

  :param models: A single sklearn model object or list of sklearn model objects.
  :param params: A dictionary or list of dictionaries containing hyperparameters 
                to tune.
  :param Xtrain: A numpy array or pandas dataframe containing the training data.
  :param Ytrain: A numpy array or pandas dataframe containing the output data.
  :param kfold:  An integer or sklearn object determining the kfold operation 
                performed.
  :param filename: A string or list of paths to save the models (pickle).

  """
    no_of_cpus = multiprocessing.cpu_count()

    with parallel_backend('threading', n_jobs=no_of_cpus):
        for i in range(len(models)):
            opt = BayesSearchCV(estimator=models[i],
                                search_spaces=params[i],
                                n_iter=30,
                                cv=kfold,
                                n_jobs=-1,
                                random_state=0)

            mdls = []
            #bar.start()
            for j in range(Ytrain.shape[1]):
                _ = opt.fit(Xtrain, Ytrain[:, j])
                mdls.append(opt)
                dump(res=mdls, filename=filename[i])
コード例 #5
0
def test_searchcv_reproducibility():
    """
    Test whether results of BayesSearchCV can be reproduced with a fixed
    random state.
    """

    X, y = load_iris(True)
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, train_size=0.75, random_state=0
    )

    random_state = 42

    opt = BayesSearchCV(
        SVC(random_state=random_state),
        {
            'C': Real(1e-6, 1e+6, prior='log-uniform'),
            'gamma': Real(1e-6, 1e+1, prior='log-uniform'),
            'degree': Integer(1, 8),
            'kernel': Categorical(['linear', 'poly', 'rbf']),
        },
        n_iter=11, random_state=random_state
    )

    opt.fit(X_train, y_train)
    best_est = opt.best_estimator_

    opt2 = clone(opt).fit(X_train, y_train)
    best_est2 = opt2.best_estimator_

    assert getattr(best_est, 'C') == getattr(best_est2, 'C')
    assert getattr(best_est, 'gamma') == getattr(best_est2, 'gamma')
    assert getattr(best_est, 'degree') == getattr(best_est2, 'degree')
    assert getattr(best_est, 'kernel') == getattr(best_est2, 'kernel')
def bayesian_optimization(model,
                          space,
                          scorer,
                          x_train,
                          y_train,
                          x_test,
                          y_test,
                          n_iter=256,
                          cv=4,
                          n_jobs=None):
    global counter
    global opt

    if n_jobs is None:
        n_jobs = cv

    opt = BayesSearchCV(model,
                        space,
                        scoring=scorer,
                        n_iter=n_iter,
                        cv=cv,
                        verbose=10,
                        n_jobs=n_jobs)

    counter = 0
    opt.fit(x_train, y_train, callback=on_step)

    print(opt.best_params_)
    print("val. score: %s" % opt.best_score_)
    print("test score: %s" % opt.score(x_test, y_test))
コード例 #7
0
def perform_bayes_search(
    estimator, X_train, X_val, y_train, y_val, param_grid, scoring=None
):
    if isinstance(estimator, cb.core.CatBoostClassifier):
        eval_set = (X_val, y_val)
    else:
        eval_set = [[X_val, y_val]]

    hyperparam_optimizer = BayesSearchCV(
        estimator=estimator,
        search_spaces=param_grid,
        scoring=scoring,
        cv=2,
        n_iter=20,
        n_jobs=1,
        refit=True,
        return_train_score=False,
        optimizer_kwargs={"base_estimator": "GP"},
        random_state=13,
        fit_params={
            "eval_set": eval_set,
        },
    )
    hyperparam_optimizer.fit(X_train, y_train)

    return hyperparam_optimizer.best_estimator_
コード例 #8
0
def test_searchcv_callback():
    # Test whether callback is used in BayesSearchCV and
    # whether is can be used to interrupt the search loop

    X, y = load_iris(True)
    opt = BayesSearchCV(
        DecisionTreeClassifier(),
        {
            'max_depth': [3],  # additional test for single dimension
            'min_samples_split': Real(0.1, 0.9),
        },
        n_iter=5
    )
    total_iterations = [0]

    def callback(opt_result):
        # this simply counts iterations
        total_iterations[0] += 1

        # break the optimization loop at some point
        if total_iterations[0] > 2:
            return True  # True == stop optimization

        return False

    opt.fit(X, y, callback=callback)

    assert total_iterations[0] == 3

    # test whether final model was fit
    opt.score(X, y)
コード例 #9
0
def tune_param(model, pipes, param_grid, refit, data, target, cv=5, n_iter=6):
    '''
    Tuning parameters with bayesian search
    '''

    param_grid = {
        model + '__' + key: param_grid[key]
        for key in param_grid.keys()
    }

    xgbcv = BayesSearchCV(pipes[model],
                          param_grid,
                          scoring="neg_mean_absolute_error",
                          n_iter=n_iter,
                          refit=refit,
                          n_jobs=-1,
                          verbose=True,
                          cv=cv)
    xgbcv.fit(data, target)

    print('best score: ' + str(xgbcv.best_score_))
    print('best params: ' + str(xgbcv.best_params_))
    results = pd.DataFrame(xgbcv.cv_results_)

    return xgbcv, results
コード例 #10
0
def test_searchcv_reproducibility():
    """
    Test whether results of BayesSearchCV can be reproduced with a fixed
    random state.
    """

    X, y = load_iris(True)
    X_train, X_test, y_train, y_test = train_test_split(X,
                                                        y,
                                                        train_size=0.75,
                                                        random_state=0)

    random_state = 42

    opt = BayesSearchCV(SVC(random_state=random_state), {
        'C': Real(1e-6, 1e+6, prior='log-uniform'),
        'gamma': Real(1e-6, 1e+1, prior='log-uniform'),
        'degree': Integer(1, 8),
        'kernel': Categorical(['linear', 'poly', 'rbf']),
    },
                        n_iter=11,
                        random_state=random_state)

    opt.fit(X_train, y_train)
    best_est = opt.best_estimator_

    opt2 = clone(opt).fit(X_train, y_train)
    best_est2 = opt2.best_estimator_

    assert getattr(best_est, 'C') == getattr(best_est2, 'C')
    assert getattr(best_est, 'gamma') == getattr(best_est2, 'gamma')
    assert getattr(best_est, 'degree') == getattr(best_est2, 'degree')
    assert getattr(best_est, 'kernel') == getattr(best_est2, 'kernel')
コード例 #11
0
ファイル: housing_models.py プロジェクト: jander081/Kaggle
    def fit(self, X, y):

        #         X_ = X.reset_index(drop=True)
        #         y_ = pd.DataFrame(y)

        #         X_tune_merge = pd.concat([X_, y_], axis=1)
        #         X_tune = X_tune_merge.sample(n=100, random_state=181)
        #         y_tune = X_tune.iloc[:, -1]
        #         X_tune = X_tune.iloc[:, :-1]

        kfold = KFold(n_splits=5, shuffle=True, random_state=81)
        bayes = BayesSearchCV(self.model,
                              self.intervals,
                              n_iter=5,
                              n_jobs=-1,
                              cv=kfold,
                              verbose=0,
                              random_state=82)

        #         bayes.fit(X_tune, y_tune)
        bayes.fit(X, y)
        # bayes.best_params_.update( {'random_state': 183} )
        parameters = bayes.best_params_

        super(BayesSVR, self).__init__(**parameters, shrinking=False)

        # Return the Regressor
        super(BayesSVR, self).fit(X, y)

        return self
コード例 #12
0
def bo_RandomForestRegressor(X, y):
    # Define the hyperparameter configuration space
    rf_params = {
        'n_estimators': Integer(10, 100),
        "max_features": Integer(1, 13),
        'max_depth': Integer(5, 50),
        "min_samples_split": Integer(2, 11),
        "min_samples_leaf": Integer(1, 11),
        "criterion": ['mse', 'mae']
    }
    starttime = datetime.datetime.now()
    clf = RandomForestRegressor(random_state=0)
    Bayes_rf = BayesSearchCV(clf,
                             rf_params,
                             cv=3,
                             n_iter=20,
                             scoring='neg_mean_squared_error')
    # number of iterations is set to 20, you can increase this number if time permits
    Bayes_rf.fit(X, y)
    # bclf = Bayes_rf.best_estimator_
    print("RandomForestRegressor MSE score:" + str(-Bayes_rf.best_score_))
    endtime = datetime.datetime.now()
    process_time_rf = endtime - starttime
    print("程序执行时间(秒):{}".format(process_time_rf))
    print("最佳超参数值集合:", Bayes_rf.best_params_)
    save_model_object(Bayes_rf, 'BO-GP', 'RandomForestRegressor',
                      'RandomForestRegressor')
    return str(-Bayes_rf.best_score_), process_time_rf, Bayes_rf.best_params_
コード例 #13
0
def train_val(classifier, name, partition, trial, params, n_iter, X_train,
              y_train):

    bscv = BayesSearchCV(classifier,
                         params,
                         n_iter=n_iter,
                         cv=kf,
                         scoring='f1',
                         return_train_score=True,
                         n_jobs=3)

    log(f'Making {bscv.total_iterations} iterations on {name}_{trial+1} ({partition})'
        )

    total_iters = [0]
    prior_scores = []

    def on_step(optim_result):
        total_iters[0] += 1
        prior_scores.append(bscv.best_score_)

        log(f'{name}{total_iters}[{trial+1}] current best score: {bscv.best_score_:.4f}'
            )

        if total_iters[0] >= stop_after_iters:
            if bscv.best_score_ == 1.0 or (
                    np.mean(prior_scores[-stop_after_iters:]) -
                    np.mean(prior_scores[-(stop_after_iters + 1):-1])
            ) < early_stop_tol:
                log(f'{name}{total_iters}[{trial+1}] stopped early')
                return True

    bscv.fit(X_train, y_train, callback=on_step)

    return bscv
コード例 #14
0
 def __init__(self, X_train, y_train):
     self.X_train = X_train
     self.y_train = y_train
     self.bayes_cv_tuner = BayesSearchCV(
         estimator=XGBClassifier(
             n_jobs=1,
             objective="binary:logistic",
             eval_metric="auc",
             # how many samples will xgboost randomly sample
             # before growing trees to prevent obverfitting
             subsample=0.8,
             use_label_encoder=False,
             random_state=42,
         ),
         search_spaces={
             "learning_rate": (0.01, 1.0),
             "max_depth": (3, 7),
             "min_child_weight": (1, 10),
             "gamma": (1e-9, 0.5),
             "colsample_bytree": (0.01, 1.0),
             "colsample_bylevel": (0.01, 1.0),
             "reg_lambda": (1, 1000),
             "reg_alpha": (1e-9, 1.0),
             "n_estimators": (50, 100),
         },
         cv=StratifiedKFold(n_splits=3, shuffle=True, random_state=42),
         n_iter=10,
         refit=True,
         random_state=42,
     )
コード例 #15
0
def optimize(train_df, test_df, features, target, iters=50):
    # transform categorical features
    le = LabelEncoder()
    train_df['status'] = le.fit_transform(train_df['status'])
    test_df['status'] = le.transform(test_df['status'])

    bayes_cv_tuner = BayesSearchCV(
        estimator=lgb.LGBMRegressor(random_state=42),
        search_spaces={
            'learning_rate': (0.01, 1.0, 'log-uniform'),
            'max_depth': (3, 5),
            'bagging_fraction': (0.01, 1.0, 'uniform'),
            'feature_fraction': (0.01, 1.0, 'uniform'),
            'reg_lambda': (1e-9, 1000, 'log-uniform'),
            'reg_alpha': (1e-9, 1.0, 'log-uniform'),
            'min_child_weight': (1e-3, 1e-1),
            'n_estimators': (100, 1000)
        },
        cv=StratifiedKFold(n_splits=2, shuffle=True, random_state=42),
        scoring=_scoring,
        n_jobs=1,
        n_iter=iters,
        verbose=0,
        refit=True,
        random_state=42)

    # fit the model
    result = bayes_cv_tuner.fit(train_df[xs_features], train_df[target])

    # get predictions
    y_true = test_df[target].values
    y_hat = result.predict(test_df[xs_features].values)

    # return score
    return np.sqrt(np.mean((y_true - y_hat)**2)), result, y_hat
コード例 #16
0
 def tune_parameter(X, y, clf, params):
     # X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
     gs = BayesSearchCV(estimator=clf,
                        search_spaces=params,
                        scoring="f1",
                        n_iter=100,
                        optimizer_kwargs={"base_estimator": "GP"},
                        verbose=2,
                        n_jobs=-1,
                        cv=4,
                        refit=True,
                        random_state=1234)
     gs.fit(X, y, callback=DeltaXStopper(0.000001))
     best_params = gs.best_params_
     best_score = gs.best_score_
     print(best_params)
     print(best_score)
     str_time = str(datetime.datetime.now().strftime("%Y-%m-%d_%H-%M"))
     with open("kuaishou_stats.csv", 'a', newline='') as f:
         writer = csv.writer(f)
         writer.writerow(["the best params for svm: "])
         for key, value in best_params.items():
             writer.writerow([key, value])
         writer.writerow(["the best score for svm: ", best_score, str_time])
     return gs
コード例 #17
0
def bayes_search(X_train, y_train, model, search_params):
    '''
    Performs a BayesSearchCV on the provided model and search parameters.

    X_train:
        feature space array
    y_train:
        response array
    model:
        sklearn model of pipeline to be tuned
    search_params:
        dictionary of search parameters

    return:
        skopt cv results (dict)
    '''
    skf = StratifiedKFold(n_splits=3, random_state=RANDOM_STATE)

    bayes_params = {
        'estimator': model,
        'scoring': 'roc_auc',
        'search_spaces': search_params,
        'n_iter': 50,
        'cv': skf,
        'n_jobs': 3,
        'verbose': 1
    }

    search_cv = BayesSearchCV(**bayes_params)
    search_cv_results = search_cv.fit(X_train, y_train)

    return search_cv_results
コード例 #18
0
def BayesSearchCV_optimisation(data):

    search_spaces = {
        'learning_rate': (0.01, 1.0, 'log-uniform'),
        'min_child_weight': (0, 10),
        'max_depth': (0, 50),
        'max_delta_step': (0, 20),
        'subsample': (0.01, 1.0, 'uniform'),
        'colsample_bytree': (0.01, 1.0, 'uniform'),
        'colsample_bylevel': (0.01, 1.0, 'uniform'),
        'reg_lambda': (1e-9, 1000, 'log-uniform'),
        'reg_alpha': (1e-9, 1.0, 'log-uniform'),
        'gamma': (1e-9, 0.5, 'log-uniform'),
        'min_child_weight': (0, 5),
        'n_estimators': (50, 100),
        'scale_pos_weight': (1e-6, 500, 'log-uniform')
    }

    estimator = LGBMClassifier(objective='binary', metric='auc')

    opt = BayesSearchCV(estimator,
                        search_spaces,
                        n_iter=100,
                        random_state=1234,
                        verbose=0
                        #scoring = 'accuracy'
                        )

    opt.fit(X_train, y_train, callback=status_print)
コード例 #19
0
    def search(self, dataset, hyperparameter_space, n_iter, cv,
               random_seed, scorer, verbose=False):
        '''
        For a given dataset and the space of hyperparameters, does a
        bayesian hyperparameters search.
        :input dataset: a Dataset object
        :input hyperparameter_space: a dictionnary, keys are hyperparameters,
        value their spaces defined with skopt
        :input n_iter: the number of iterations of the bayesian search
        :input cv: the size of the cross validation
        :input random_seed: int, the seed for the bayesian search
        :input scorer: str, the name of the scorer
        :input verbose: bool, print state of the research
        :return: a skopt.searchcv.BayesSearchCV object
        '''

        if dataset.task == Task.REGRESSION:
            estimator = RandomForestRegressor(n_jobs=-1, random_state=random_seed)
        else:
            estimator = RandomForestClassifier(n_jobs=-1, random_state=random_seed)

        opt = BayesSearchCV(estimator, hyperparameter_space, n_iter=n_iter,
                            cv=cv, n_jobs=-1, random_state=random_seed,
                            scoring=scorer, verbose=verbose)

        opt.fit(dataset.X_train, dataset.y_train)

        return opt
コード例 #20
0
def _fit_svc(n_jobs=1, n_points=1, cv=None):
    """
    Utility function to fit a larger classification task with SVC
    """

    X, y = make_classification(n_samples=1000,
                               n_features=20,
                               n_redundant=0,
                               n_informative=18,
                               random_state=1,
                               n_clusters_per_class=1)

    opt = BayesSearchCV(
        SVC(),
        {
            'C': Real(1e-3, 1e+3, prior='log-uniform'),
            'gamma': Real(1e-3, 1e+1, prior='log-uniform'),
            'degree': Integer(1, 3),
        },
        n_jobs=n_jobs,
        n_iter=11,
        n_points=n_points,
        cv=cv,
        random_state=42,
    )

    opt.fit(X, y)

    assert opt.score(X, y) > 0.9
コード例 #21
0
ファイル: test_sgl.py プロジェクト: richford/groupyr
def test_LogisticSGLCV_BayesSearchCV():
    # make sure LogisticSGLCV gives same best params (l1_ratio and alpha) as
    # BayesSearchCV
    X, y, groups = make_group_classification(random_state=42)
    cv = StratifiedKFold(3)

    l1_ratios = np.linspace(0, 1, 3)
    alphas = np.logspace(-4, 4, 3)
    clf_cv = LogisticSGLCV(
        alphas=alphas,
        l1_ratio=l1_ratios,
        groups=groups,
        cv=cv,
        tuning_strategy="bayes",
        n_bayes_iter=10,
        random_state=42,
    )
    clf_cv.fit(X, y)

    search_spaces = {
        "alpha": (np.min(alphas), np.max(alphas), "log-uniform"),
        "l1_ratio": (np.min(l1_ratios), np.max(l1_ratios), "uniform"),
    }
    clf = LogisticSGL(groups=groups)
    gs = BayesSearchCV(clf, search_spaces, cv=cv, random_state=42, n_iter=10)
    gs.fit(X, y)

    assert len(clf_cv.alphas_) == 10
    assert gs.best_params_["l1_ratio"] == clf_cv.l1_ratio_
    assert gs.best_params_["alpha"] == clf_cv.alpha_
コード例 #22
0
def test_searchcv_rank():
    """
    Test whether results of BayesSearchCV can be reproduced with a fixed
    random state.
    """

    X, y = load_iris(True)
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, train_size=0.75, random_state=0
    )

    random_state = 42

    opt = BayesSearchCV(
        SVC(random_state=random_state),
        {
            'C': Real(1e-6, 1e+6, prior='log-uniform'),
            'gamma': Real(1e-6, 1e+1, prior='log-uniform'),
            'degree': Integer(1, 8),
            'kernel': Categorical(['linear', 'poly', 'rbf']),
        },
        n_iter=11, random_state=random_state, return_train_score=True
    )

    opt.fit(X_train, y_train)
    results = opt.cv_results_

    test_rank = np.asarray(rankdata(-np.array(results["mean_test_score"]),
                                    method='min'), dtype=np.int32)
    train_rank = np.asarray(rankdata(-np.array(results["mean_train_score"]),
                                     method='min'), dtype=np.int32)

    assert_array_equal(np.array(results['rank_test_score']), test_rank)
    assert_array_equal(np.array(results['rank_train_score']), train_rank)
コード例 #23
0
    def fit(self, X, y, savepath=None, refit=True):
        rst = dict()
        param_dict = self._get_bayesian_param_dict()

        if savepath is None:
            savepath = os.getcwd()

        estimator_name = self._estimator_name

        if self.cv is None:
            self.cv = ms.RepeatedKFold()

        model = BayesSearchCV(estimator=self.estimator,
                              search_spaces=param_dict,
                              n_iter=self.n_iter,
                              scoring=self.scoring,
                              cv=self.cv,
                              refit=refit)

        try:
            rst[estimator_name] = model.fit(X, y)
        except:
            log.error(
                'Hyperparameter optimization failed, likely due to inappropriate domain of values to optimize'
                ' one or more parameters over. Please check your input file and the sklearn docs for the mode'
                ' you are optimizing for the domain of correct values')
            exit()

        best_estimator = rst[estimator_name].best_estimator_

        self._save_output(savepath, rst)
        return best_estimator
コード例 #24
0
ファイル: XGboost.py プロジェクト: KevinAizen/Hockey-testing
class BayesSearch:
    def __init__(self, model, search_spaces, n_iter, export_path):
        self.export_path = export_path
        self.bayes_cv_tuner = BayesSearchCV(
            model,
            search_spaces,
            cv=5,
            n_jobs=-1,
            n_iter=n_iter,
            verbose=0,
            refit=True,
            random_state=RANDOM_SEED,
        )

    def fit(self, X, y):
        self.bayes_cv_tuner.fit(X, y, callback=self.print_status)
        self.export_results()

    def export_results(self):
        pd.DataFrame(self.bayes_cv_tuner.cv_results_).to_csv(
            f"{self.export_path}_cv_results.csv")
        pd.Series(self.bayes_cv_tuner.best_params_).to_json(
            f"{self.export_path}_best_params.json")
        dump(self.bayes_cv_tuner, f"{self.export_path}_bayes_search.pkl")

    def print_status(self, optim_results):
        print(f"""
Model #{len(opt.bayes_cv_tuner.cv_results_['params'])}
Best: {self.bayes_cv_tuner.best_score_}
Best params: {self.bayes_cv_tuner.best_params_}
        """)
コード例 #25
0
def run_optimization_test():

    N_iter = 100
    # log-uniform: understand as search over p = exp(x) by varying x
    opt = BayesSearchCV(
        TemplateClassifier(),
        {
            "deltaEta": Real(0.0, 4.0, prior="uniform"),
            "deltaPhi": Real(0.0, 4.0, prior="uniform"),
            "maxNRegions": Integer(2, 100),
            "maxNVertices": Integer(1, 5),
            "nSigmaZBeamSpot": Real(0.0, 30.0, prior="uniform"),
            "nSigmaZVertex": Real(-1.0, 1.0, prior="uniform"),
            "originRadius": Real(0.0, 1.0, prior="uniform"),
            "ptMin": Real(0.0, 2.0, prior="uniform"),
            "zErrorBeamSpot": Real(0.0, 1.0, prior="uniform"),
            "zErrorVetex": Real(0.0, 1.0, prior="uniform"),
        },
        n_iter=N_iter,
        cv=[(slice(None), slice(None))],
        verbose=1,
        # scoring="accuracy"
    )

    opt.fit(np.zeros((100, 1)), np.zeros((100)))

    print("After {} iterations:".format(N_iter))
    print("val. score: %s" % opt.best_score_)
    print("test score: %s" % opt.score(0.0, 0.0))
    print("Final params:")
    params = opt.best_estimator_.get_params()
    for i, (param, val) in enumerate(params.items()):
        print("{0}:\t{1:2.2f} vs {2:2.2f}".format(param, val, targets[i]))
コード例 #26
0
ファイル: nonlinear_regression.py プロジェクト: aaditis/egem
def train_models(models, params, Xtrain, Ytrain, kfold, filename):
  """
  train_models performs kfold bayesian hyperparameter tuning for different 
  models, and saves the output for model persistence.

  :param models: A single sklearn model object or list of sklearn model objects.
  :param params: A dictionary or list of dictionaries containing hyperparameters 
                 to tune.
  :param Xtrain: A numpy array or pandas dataframe containing the training data.
  :param Ytrain: A numpy array or pandas dataframe containing the output data.
  :param kfold:  An integer or sklearn object determining the kfold operation 
                 performed.
  :param filename: A string or list of paths to save the models (pickle).

  """
  print("Starting hyperparameter optimization and cross validation")
  for i in range(len(models)):
    model_path = filename[i]
    opt = BayesSearchCV(
                          estimator=models[i],
                          search_spaces=params[i],
                          cv=kfold,
		          n_iter=30,
		          n_jobs=-1,
		          random_state=0
    )
    bar.start()
    mdls =[]
    for j in range(Ytrain.shape[1]):
      _ = opt.fit(Xtrain, Ytrain[:, j])
      mdls.append(opt)
コード例 #27
0
def test_search_cv_internal_parameter_types():
    # Test whether the parameters passed to the
    # estimator of the BayesSearchCV are of standard python
    # types - float, int, str

    # This is estimator is used to check whether the types provided
    # are native python types.
    class TypeCheckEstimator(BaseEstimator):
        def __init__(self, float_param=0.0, int_param=0, str_param=""):
            self.float_param = float_param
            self.int_param = int_param
            self.str_param = str_param

        def fit(self, X, y):
            assert isinstance(self.float_param, float)
            assert isinstance(self.int_param, int)
            assert isinstance(self.str_param, str)
            return self

        def score(self, X, y):
            return 0.0

    # Below is example code that used to not work.
    X, y = make_classification(10, 4)

    model = BayesSearchCV(estimator=TypeCheckEstimator(),
                          search_spaces={
                              'float_param': [0.0, 1.0],
                              'int_param': [0, 10],
                              'str_param': ["one", "two", "three"],
                          },
                          n_iter=11)

    model.fit(X, y)
コード例 #28
0
def bo_ANN(X, y):
    rf_params = {
        'activation': ['relu', 'tanh'],
        'loss': ['mse'],
        'batch_size': [32, 64, 128],
        'neurons': Integer(256, 1024),
        'epochs': [20, 30, 50, 60]
        # 'patience': Integer(3, 20)
    }
    starttime = datetime.datetime.now()
    clf = KerasRegressor(build_fn=ANN, verbose=verbose)
    Bayes_ann = BayesSearchCV(clf,
                              rf_params,
                              cv=3,
                              n_iter=10,
                              scoring='neg_mean_squared_error')
    Bayes_ann.fit(X, y)
    print("ANN MSE score:" + str(-Bayes_ann.best_score_))
    endtime = datetime.datetime.now()
    process_time_ann = endtime - starttime
    print("程序执行时间(秒):{}".format(process_time_ann))
    print("最佳超参数值集合:", Bayes_ann.best_params_)
    model_bo_ann = ANN(**Bayes_ann.best_params_)
    save_model_object(model_bo_ann, 'BO-GP', 'ANN', 'ANN')
    return str(
        -Bayes_ann.best_score_), process_time_ann, Bayes_ann.best_params_
def main(feature_set_key):
    x_train, y_train, x_test = read_feature_data(feature_set=feature_set_key)

    search_spaces = {'iterations': Integer(10, 1000),
                     'depth': Integer(1, 8),
                     'learning_rate': Real(0.01, 1.0, 'log-uniform'),
                     'random_strength': Real(1e-9, 10, 'log-uniform'),
                     'bagging_temperature': Real(0.0, 1.0),
                     'border_count': Integer(1, 255),
                     'l2_leaf_reg': Integer(2, 30),
                     'scale_pos_weight': Real(0.01, 1.0, 'uniform')}

    skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=0)

    clf = CatBoostClassifier(thread_count=2,
                             loss_function='Logloss',
                             od_type='Iter',
                             verbose=True
                             )

    scorer = make_scorer(matthews_corrcoef)

    opt = BayesSearchCV(clf, search_spaces, scoring=scorer, cv=skf, n_iter=1, n_jobs=1, return_train_score=False,
                        refit=True, optimizer_kwargs={'base_estimator': 'GP'}, random_state=42)



    opt.fit(x_train, y_train)

    print(json.dumps(opt.best_params_, indent=4))

    with open('best_params.json', 'w') as outfile:
        json.dump(opt.best_params_, outfile)

    dump(opt, 'optimizer.joblib')
コード例 #30
0
ファイル: regression.py プロジェクト: uke1212/predictor
    def xgb(self):
        """
        Construct a gradient boosting regressor and calculate the training score
        using training data and parameter search with 5-fold cross validation

        ! XGBoost library does currently not install on AWS Lambda via Zappa !
        """

        estimator = XGBRegressor(booster='gbtree',
                                 objective='reg:squarederror')

        # Traditional Grid Search (slow)
        # xgb_parameters = [{
        #     'learning_rate': [x/100 for x in range(5, 10, 1)],
        #     'min_split_loss': [x/10 for x in range(1, 5, 1)],
        #     'max_depth': list(range(5, 10, 1)),
        #     'min_child_weight': list(range(1, 5, 1)),
        #     'colsample_bytree': [x/10 for x in range(5, 10, 1)],
        #     'random_state': [1]
        # }]
        #
        # xgb_search = GridSearchCV(
        #     estimator=estimator, param_grid=xgb_parameters,
        #     scoring=self.scorer, cv=5, n_jobs=-1, iid=False
        # )

        # Bayes Search (faster)
        xgb_parameters = {
            'learning_rate': Real(0.05, 0.5),
            'min_split_loss': Real(0.1, 0.5),
            'max_depth': Integer(5, 10),
            'min_child_weight': Integer(1, 5),
            'random_state': [1]
        }
        xgb_search = BayesSearchCV(estimator=estimator,
                                   search_spaces=xgb_parameters,
                                   scoring=self.scorer,
                                   cv=5,
                                   n_jobs=-1,
                                   n_iter=20)
        stopper = Stopper(xgb_search)
        xgb_search_result = xgb_search.fit(self.X_train,
                                           self.y_train,
                                           callback=stopper.on_step)
        best_xgb_parameters = dict(xgb_search_result.best_params_)
        xgb_score = xgb_search_result.best_score_

        print('Best XGB params: ' + str(best_xgb_parameters))
        print('XGB score: ' + str(xgb_score))

        return XGBRegressor(
            booster='gbtree',
            objective='reg:squarederror',
            learning_rate=best_xgb_parameters['learning_rate'],
            min_split_loss=best_xgb_parameters['min_split_loss'],
            max_depth=best_xgb_parameters['max_depth'],
            min_child_weight=best_xgb_parameters['min_child_weight'],
            random_state=1,
            n_jobs=-1)
コード例 #31
0
 def search_parameters(self, x: DataFrame, y: NpArray, parameters: dict,
                       n_folds_validation: int, model: Any, score_type: str) -> tuple:
     clf = BayesSearchCV(estimator=model, search_spaces=parameters, cv=n_folds_validation,
                         verbose=10, scoring=score_type)
     clf.fit(x, y)
     best_params = clf.best_params_
     best_score = clf.best_score_
     return best_params, best_score
コード例 #32
0
    def fit(self, df: pd.DataFrame):
        df_features = self._to_feature_df(df, True)

        df_features = df_features.dropna()
        df_features = df_features.sample(frac=1, random_state=42)

        X = self._get_X(df_features)
        y = self._get_y(df_features)

        if self.optimize_hyperparams:

            def scorer(estimator, X, y):
                y_pred = np.clip(np.squeeze(estimator.predict(X)), 0.0, 1.0)
                return -mean_absolute_error(y, y_pred)

            print(
                f'IouEstimator: optimizing hyperparams with Bayesian Optimization'
            )
            opt = BayesSearchCV(
                LGBMRegressor(),
                {
                    'num_leaves':
                    Integer(2, 128, prior='log-uniform', base=2),
                    'min_child_samples':
                    Integer(2, 512, prior='log-uniform', base=2),
                    'max_bin':
                    Integer(2, 8192, prior='log-uniform', base=2),
                },
                n_iter=60,
                optimizer_kwargs={
                    'n_initial_points': 20,
                    'base_estimator': 'GP',
                },
                scoring=scorer,
                cv=3,
                refit=False,
                random_state=42,
                return_train_score=True,
            )
            opt.fit(X, y)
            print(f'Found hyperparams {opt.best_params_}')
            print(
                f"Train score: {opt.cv_results_['mean_train_score'][opt.best_index_]}"
            )
            print(f'Test score: {opt.best_score_}')
            estimator = LGBMRegressor(**opt.best_params_)
        elif self.hyperparams is not None:
            print(f'IOUEstimator: using using hyperparams {self.hyperparams}')
            estimator = LGBMRegressor(**self.hyperparams)
        else:
            print(
                f'IOUEstimator: using default hyperparams {self.DEFAULT_HYPERPARAMS}'
            )
            estimator = LGBMRegressor(**self.DEFAULT_HYPERPARAMS)

        self.estimator_ = estimator.fit(X, y)

        return self
コード例 #33
0
def test_searchcv_runs(surrogate, n_jobs):
    """
    Test whether the cross validation search wrapper around sklearn
    models runs properly with available surrogates and with single
    or multiple workers.

    Parameters
    ----------

    * `surrogate` [str or None]:
        A class of the scikit-optimize surrogate used. None means
        to use default surrogate.

    * `n_jobs` [int]:
        Number of parallel processes to use for computations.

    """

    X, y = load_iris(True)
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, train_size=0.75, random_state=0
    )

    # None search space is only supported when only `step` function is used
    assert_raises(ValueError, BayesSearchCV(SVC(), None).fit, (X, y))

    # check if invalid dimensions are raising errors
    with pytest.raises(ValueError):
        BayesSearchCV(SVC(), {'C': '1 ... 100.0'})

    with pytest.raises(TypeError):
        BayesSearchCV(SVC(), ['C', (1.0, 1)])

    # create an instance of a surrogate if it is not a string
    if surrogate is not None:
        optimizer_kwargs = {'base_estimator': surrogate}
    else:
        optimizer_kwargs = None

    opt = BayesSearchCV(
        SVC(),
        {
            'C': Real(1e-6, 1e+6, prior='log-uniform'),
            'gamma': Real(1e-6, 1e+1, prior='log-uniform'),
            'degree': Integer(1, 8),
            'kernel': Categorical(['linear', 'poly', 'rbf']),
        },
        n_jobs=n_jobs, n_iter=11,
        optimizer_kwargs=optimizer_kwargs
    )

    opt.fit(X_train, y_train)

    # this normally does not hold only if something is wrong
    # with the optimizaiton procedure as such
    assert_greater(opt.score(X_test, y_test), 0.9)
コード例 #34
0
def test_searchcv_runs(surrogate, n_jobs, n_points, cv=None):
    """
    Test whether the cross validation search wrapper around sklearn
    models runs properly with available surrogates and with single
    or multiple workers and different number of parameter settings
    to ask from the optimizer in parallel.

    Parameters
    ----------

    * `surrogate` [str or None]:
        A class of the scikit-optimize surrogate used. None means
        to use default surrogate.

    * `n_jobs` [int]:
        Number of parallel processes to use for computations.

    """

    X, y = load_iris(True)
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, train_size=0.75, random_state=0
    )

    # create an instance of a surrogate if it is not a string
    if surrogate is not None:
        optimizer_kwargs = {'base_estimator': surrogate}
    else:
        optimizer_kwargs = None

    opt = BayesSearchCV(
        SVC(),
        {
            'C': Real(1e-6, 1e+6, prior='log-uniform'),
            'gamma': Real(1e-6, 1e+1, prior='log-uniform'),
            'degree': Integer(1, 8),
            'kernel': Categorical(['linear', 'poly', 'rbf']),
        },
        n_jobs=n_jobs, n_iter=11, n_points=n_points, cv=cv,
        optimizer_kwargs=optimizer_kwargs
    )

    opt.fit(X_train, y_train)

    # this normally does not hold only if something is wrong
    # with the optimizaiton procedure as such
    assert_greater(opt.score(X_test, y_test), 0.9)
コード例 #35
0
def test_searchcv_runs_multiple_subspaces():
    """
    Test whether the BayesSearchCV runs without exceptions when
    multiple subspaces are given.
    """

    X, y = load_iris(True)
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, train_size=0.75, random_state=0
    )

    # used to try different model classes
    pipe = Pipeline([
        ('model', SVC())
    ])

    # single categorical value of 'model' parameter sets the model class
    lin_search = {
        'model': Categorical([LinearSVC()]),
        'model__C': Real(1e-6, 1e+6, prior='log-uniform'),
    }

    dtc_search = {
        'model': Categorical([DecisionTreeClassifier()]),
        'model__max_depth': Integer(1, 32),
        'model__min_samples_split': Real(1e-3, 1.0, prior='log-uniform'),
    }

    svc_search = {
        'model': Categorical([SVC()]),
        'model__C': Real(1e-6, 1e+6, prior='log-uniform'),
        'model__gamma': Real(1e-6, 1e+1, prior='log-uniform'),
        'model__degree': Integer(1, 8),
        'model__kernel': Categorical(['linear', 'poly', 'rbf']),
    }

    opt = BayesSearchCV(
        pipe,
        [(lin_search, 1), (dtc_search, 1), svc_search],
        n_iter=2
    )

    opt.fit(X_train, y_train)

    # test if all subspaces are explored
    total_evaluations = len(opt.cv_results_['mean_test_score'])
    assert total_evaluations == 1+1+2, "Not all spaces were explored!"
コード例 #36
0
def _fit_svc(n_jobs=1, n_points=1, cv=None):
    """
    Utility function to fit a larger classification task with SVC
    """

    X, y = make_classification(n_samples=1000, n_features=20, n_redundant=0,
                               n_informative=18, random_state=1,
                               n_clusters_per_class=1)

    opt = BayesSearchCV(
        SVC(),
        {
            'C': Real(1e-3, 1e+3, prior='log-uniform'),
            'gamma': Real(1e-3, 1e+1, prior='log-uniform'),
            'degree': Integer(1, 3),
        },
        n_jobs=n_jobs, n_iter=11, n_points=n_points, cv=cv,
        random_state=42,
    )

    opt.fit(X, y)

    assert_greater(opt.score(X, y), 0.9)
コード例 #37
0
def test_search_cv_internal_parameter_types():
    # Test whether the parameters passed to the
    # estimator of the BayesSearchCV are of standard python
    # types - float, int, str

    # This is estimator is used to check whether the types provided
    # are native python types.
    class TypeCheckEstimator(BaseEstimator):
        def __init__(self, float_param=0.0, int_param=0, str_param=""):
            self.float_param = float_param
            self.int_param = int_param
            self.str_param = str_param

        def fit(self, X, y):
            assert isinstance(self.float_param, float)
            assert isinstance(self.int_param, int)
            assert isinstance(self.str_param, str)
            return self

        def score(self, X, y):
            return 0.0

    # Below is example code that used to not work.
    X, y = make_classification(10, 4)

    model = BayesSearchCV(
        estimator=TypeCheckEstimator(),
        search_spaces={
            'float_param': [0.0, 1.0],
            'int_param': [0, 10],
            'str_param': ["one", "two", "three"],
        },
        n_iter=11
    )

    model.fit(X, y)
コード例 #38
0
def test_searchcv_sklearn_compatibility():
    """
    Test whether the BayesSearchCV is compatible with base sklearn methods
    such as clone, set_params, get_params.
    """

    X, y = load_iris(True)
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, train_size=0.75, random_state=0
    )

    # used to try different model classes
    pipe = Pipeline([
        ('model', SVC())
    ])

    # single categorical value of 'model' parameter sets the model class
    lin_search = {
        'model': Categorical([LinearSVC()]),
        'model__C': Real(1e-6, 1e+6, prior='log-uniform'),
    }

    dtc_search = {
        'model': Categorical([DecisionTreeClassifier()]),
        'model__max_depth': Integer(1, 32),
        'model__min_samples_split': Real(1e-3, 1.0, prior='log-uniform'),
    }

    svc_search = {
        'model': Categorical([SVC()]),
        'model__C': Real(1e-6, 1e+6, prior='log-uniform'),
        'model__gamma': Real(1e-6, 1e+1, prior='log-uniform'),
        'model__degree': Integer(1, 8),
        'model__kernel': Categorical(['linear', 'poly', 'rbf']),
    }

    opt = BayesSearchCV(
        pipe,
        [(lin_search, 1), svc_search],
        n_iter=2
    )

    opt_clone = clone(opt)

    params, params_clone = opt.get_params(), opt_clone.get_params()
    assert params.keys() == params_clone.keys()

    for param, param_clone in zip(params.items(), params_clone.items()):
        assert param[0] == param_clone[0]
        assert isinstance(param[1], type(param_clone[1]))

    opt.set_params(search_spaces=[(dtc_search, 1)])

    opt.fit(X_train, y_train)
    opt_clone.fit(X_train, y_train)

    total_evaluations = len(opt.cv_results_['mean_test_score'])
    total_evaluations_clone = len(opt_clone.cv_results_['mean_test_score'])

    # test if expected number of subspaces is explored
    assert total_evaluations == 1
    assert total_evaluations_clone == 1+2