def test_deadline_stopper():
    deadline = DeadlineStopper(0.0001)
    gp_minimize(bench3, [(-1.0, 1.0)], callback=deadline, n_calls=10, random_state=1)
    assert len(deadline.iter_time) == 1
    assert np.sum(deadline.iter_time) > deadline.total_time

    deadline = DeadlineStopper(60)
    gp_minimize(bench3, [(-1.0, 1.0)], callback=deadline, n_calls=10, random_state=1)
    assert len(deadline.iter_time) == 10
    assert np.sum(deadline.iter_time) < deadline.total_time
Esempio n. 2
0
 def bayes_search(estimator,
                  search_spaces,
                  X,
                  y,
                  fit_params=None,
                  scoring=None,
                  n_jobs=1,
                  cv=None,
                  n_points=1,
                  n_iter=50,
                  refit=False,
                  random_state=9527,
                  verbose=0,
                  deadline=60):
     optimizer = BayesSearchCV(estimator,
                               search_spaces,
                               scoring=scoring,
                               cv=cv,
                               n_points=n_points,
                               n_iter=n_iter,
                               n_jobs=n_jobs,
                               return_train_score=False,
                               refit=refit,
                               optimizer_kwargs={'base_estimator': 'GP'},
                               random_state=random_state)
     best_parmas = BatchTrainer.hyperopt_search(
         optimizer,
         X,
         y,
         fit_params=fit_params,
         title='BayesSearchCV',
         callbacks=[VerboseCallback(verbose),
                    DeadlineStopper(deadline)])
     return best_parmas
Esempio n. 3
0
def tune_with_bayes(X_train, y_train):
    roc_auc = make_scorer(roc_auc_score,
                          greater_is_better=True,
                          needs_threshold=True)
    time_split = TimeSeriesSplit(n_splits=10)

    cboost = CatBoostClassifier(thread_count=2, od_type='Iter', verbose=False)
    search_spaces = {
        'iterations': Integer(10, 1000),
        'depth': Integer(1, 8),
        'learning_rate': Real(0.01, 1.0, 'log-uniform'),
        'random_strength': Real(1e-9, 10, 'log-uniform'),
        'bagging_temperature': Real(0.0, 1.0),
        'border_count': Integer(1, 255),
        'l2_leaf_reg': Integer(2, 30),
        'scale_pos_weight': Real(0.01, 1.0, 'uniform')
    }

    opt = BayesSearchCV(cboost,
                        search_spaces,
                        scoring=roc_auc,
                        cv=time_split,
                        n_iter=100,
                        n_jobs=1,
                        return_train_score=False,
                        refit=True,
                        optimizer_kwargs={'base_estimator': 'GP'},
                        random_state=17)

    best_params = report_performance(
        opt,
        X_train,
        y_train,
        'CatBoost',
        callbacks=[VerboseCallback(100),
                   DeadlineStopper(60 * 10)])

    best_params['iterations'] = 1000
    tuned_model = CatBoostClassifier(**best_params,
                                     od_type='Iter',
                                     one_hot_max_size=10)
    # tuned_model = CatBoostClassifier(**best_params,task_type = "GPU",od_type='Iter',one_hot_max_size=10)
    tuned_model.fit(X_train, y_train)
    return tuned_model
Esempio n. 4
0
def xgboost_grid(x, y):
    try:
        x_train, x_test, y_train, y_test = train_test_split(x,
                                                            y,
                                                            test_size=0.2,
                                                            random_state=42)
    except:
        x = list(x)
        x_train, x_test, y_train, y_test = train_test_split(x,
                                                            y,
                                                            test_size=0.2,
                                                            random_state=42)

    params = {
        "objective": ['reg:squarederror'],
        "colsample_bytree": [0.25, 0.5, 0.75],
        "learning_rate": [0.01, 0.1, 0.2, 0.3],
        "max_depth": [10, 20, 50],
        "gamma": [i * 0.05 for i in range(0, 5)],
        "lambda": [i * 0.05 for i in range(0, 4)],
        "alpha": [i * 0.05 for i in range(0, 4)],
        "eta": [i * 0.05 for i in range(0, 4)],
        "n_estimators": [400, 4000],
        "tree_method": ["gpu_hist"]
    }

    xgb_temp = xgb.XGBRegressor()
    reg = GridSearchCV(xgb_temp, params, verbose=5, cv=3)

    time_to_stop = 60 * 60
    ckpt_loc = "../data/train/bayes/ckpt_bayes_xgboost.pkl"
    checkpoint_callback = CheckpointSaver(ckpt_loc)
    reg.fit(x_train,
            y_train,
            callback=[DeadlineStopper(time_to_stop), checkpoint_callback])
    print(reg.best_params_)
    print(reg.best_score_)
    return reg
Esempio n. 5
0
#                 'boosting_type':['Ordered'],
#                 'learning_rate': Real(0.05, 1.0, 'uniform'),
#                 'border_count': Integer(1, 25),
#                 'fold_len_multiplier': Real(1.1, 1.16, prior='uniform')}

# Setting up BayesSearchCV
opt = BayesSearchCV(
    clf,
    search_spaces,
    scoring=roc_auc,
    cv=skf,
    n_iter=5,
    n_points=100,
    n_jobs=
    1,  # use just 1 job with CatBoost in order to avoid segmentation fault
    return_train_score=False,
    refit=True,
    optimizer_kwargs={'base_estimator': 'ET'},  #'GP', 'RF', 'ET'
    random_state=57)

# Running the optimization
best_params = report_perf(
    opt,
    X,
    y,
    'CatBoost',
    callbacks=[VerboseCallback(20),
               DeadlineStopper(60 * 30)])

print("Notebook Runtime: %0.2f Minutes" % ((time.time() - notebookstart) / 60))
Esempio n. 6
0
def xgboost_bayes_basic(x, y, csv_loc="../data/train/bayes.csv"):

    global csv_loc_bayes
    csv_loc_bayes = csv_loc

    try:
        x_train, x_test, y_train, y_test = train_test_split(x,
                                                            y,
                                                            test_size=0.2,
                                                            random_state=42)
    except:
        x = list(x)
        x_train, x_test, y_train, y_test = train_test_split(x,
                                                            y,
                                                            test_size=0.2,
                                                            random_state=42)

    xgb_temp = xgb.XGBRegressor()
    reg = BayesSearchCV(xgb_temp, {
        "colsample_bytree": Real(0.5, 0.99),
        "max_depth": Integer(5, 25),
        "lambda": Real(0, 0.25),
        "learning_rate": Real(0.1, 0.25),
        "alpha": Real(0, 0.2),
        "eta": Real(0, 0.1),
        "gamma": Real(0, 0.1),
        "n_estimators": Integer(500, 5000),
        "objective": ["reg:squarederror"],
        "tree_method": ["gpu_hist"]
    },
                        n_iter=10000,
                        verbose=4,
                        cv=3)

    time_to_stop = 60 * 60 * 47

    now = datetime.now()
    year = now.strftime("%Y")
    month = now.strftime("%m")
    day = now.strftime("%d")
    hour = now.strftime("%H")
    minute = now.strftime("%M")
    sec = now.strftime("%S")
    #ckpt_loc = "../data/train/bayes/ckpt_bayes_xgboost" + str(year) + "_"+ str(month) + "_" + str(day) + "_" + \
    #           str(hour) + "_" + str(minute) + "_" + str(sec) + ".pkl"
    #checkpoint_callback = CheckpointSaver(ckpt_loc)
    #reg.fit(x_train, y_train, callback=[DeadlineStopper(time_to_stop), checkpoint_callback])

    custom_scorer = custom_skopt_scorer
    reg.fit(x_train,
            y_train,
            callback=[DeadlineStopper(time_to_stop),
                      custom_scorer(x, y)])
    #reg.fit(x_train, y_train, callback=[DeadlineStopper(time_to_stop)])

    score = str(mean_squared_error(reg.predict(x_test), y_test))
    print("MSE score:   " + str(score))
    score = str(mean_absolute_error(reg.predict(x_test), y_test))
    print("MAE score:   " + str(score))
    score = str(r2_score(reg.predict(x_test), y_test))
    print("r2 score:   " + str(score))
    return reg