def grid_search(parameters,
                X_train_res,
                y_train_res,
                X_test,
                y_test,
                useTrainCV=False):
    xgbmodel = XGBRegressor()
    kfold = StratifiedKFold(n_splits=10, shuffle=True, random_state=10)
    grid_search_xg = GridSearchCV(xgbmodel,
                                  parameters,
                                  scoring='roc_auc',
                                  n_jobs=1,
                                  cv=kfold,
                                  verbose=1)
    result_gcv_xgb = grid_search_xg.fit(X_train_res, y_train_res)
    best_params = result_gcv_xgb.best_params_
    print("Best params: %s" % (best_params))

    # rebuild using best params
    xg_reg = XGBRegressor(objective=best_params['objective'],
                          learning_rate=best_params['learning_rate'],
                          max_depth=best_params['max_depth'],
                          n_estimators=best_params['n_estimators'],
                          min_child_weight=best_params['min_child_weight'],
                          gamma=best_params['gamma'],
                          colsample_bytree=best_params['colsample_bytree'],
                          subsample=best_params['subsample'],
                          reg_alpha=best_params['reg_alpha'])

    if useTrainCV:
        xgb_param = xg_reg.get_xgb_params()
        xgtrain = DMatrix(X_train_res, label=y_train_res)
        cvresult = cv(xgb_param,
                      xgtrain,
                      num_boost_round=xg_reg.get_params()['n_estimators'],
                      folds=kfold,
                      metrics='auc',
                      early_stopping_rounds=20)
        xg_reg.set_params(n_estimators=cvresult.shape[0])
        print("Best number of estimators: %i" % (cvresult.shape[0]))

    eval_set = [(X_test, y_test)]
    xg_reg.fit(X_train_res,
               y_train_res,
               eval_metric="error",
               eval_set=eval_set,
               verbose=False)
    y_pred_train = xg_reg.predict(X_train_res)
    #print("Accuracy train: %f" % (accuracy_score(y_train_res, y_pred_train)))
    #print("Recall train: %f" % (recall_score(y_train_res, y_pred_train)))
    #print("Precision train: %f" % (precision_score(y_train_res, y_pred_train)))
    print("AUC train: %f" % (roc_auc_score(y_train_res, y_pred_train)))
    y_pred = xg_reg.predict(X_test)
    #print("Accuracy test: %f" % (accuracy_score(y_test, y_pred)))
    #print("Recall test: %f" % (recall_score(y_test, y_pred)))
    #print("Precision test: %f" % (precision_score(y_test, y_pred)))
    print("AUC test: %f" % (roc_auc_score(y_test, y_pred)))
Beispiel #2
0
warnings.filterwarnings('ignore')
xgtrain = xgb.DMatrix(train_df, label=target.values)

#finding numof boosting rounds and learning rate
alg = XGBRegressor(
 learning_rate =0.1,
 n_estimators=1000,
 max_depth=5,
 min_child_weight=1,
 gamma=0,
 subsample=0.8,
 colsample_bytree=0.8,
 objective= 'reg:squarederror',
 seed=27)

xgb_param = alg.get_xgb_params()

cvresult = xgb.cv(xgb_param, xgtrain, num_boost_round=alg.get_params()['n_estimators'], nfold=5,metrics='rmse', early_stopping_rounds=50)

n_estimators = cvresult.shape[0]

param_test1 = {
 'max_depth':range(3,10,2),
 'min_child_weight':range(1,6,2)
}

gsearch1 = GridSearchCV(estimator = XGBRegressor( learning_rate =0.1, n_estimators=n_estimators, max_depth=5,
 min_child_weight=1, gamma=0, subsample=0.8, colsample_bytree=0.8,
 objective= 'reg:squarederror', nthread=4, seed=27), 
 param_grid = param_test1, scoring=make_scorer(mean_squared_error),n_jobs=4,iid=False, cv=5)
gsearch1.fit(train_df,target)
x_train = df[list(features)].values
y_train = df["SPEED_AVG"].values

gb = XGBRegressor(learning_rate=0.1,
                  n_estimators=750,
                  max_depth=5,
                  min_child_weight=1,
                  gamma=0,
                  subsample=0.8,
                  colsample_bytree=0.8,
                  objective='reg:gamma',
                  nthread=4,
                  scale_pos_weight=1,
                  seed=27)

xgb_param = gb.get_xgb_params()
xgtrain = xgb.DMatrix(df[features].values, label=df['SPEED_AVG'].values)
cvresult = xgb.cv(xgb_param,
                  xgtrain,
                  num_boost_round=gb.get_params()['n_estimators'],
                  nfold=10,
                  metrics='mae',
                  early_stopping_rounds=50)
gb.set_params(n_estimators=cvresult.shape[0])

gb.fit(x_train, y_train, eval_metric='mae')


def mean_absolute_percentage_error(y_true, y_pred):
    y_true, y_pred = np.array(y_true), np.array(y_pred)
    return np.mean(np.abs((y_true - y_pred) / y_true)) * 100