max_f1 = 0. # initializing with 0 
best_params = None 

for max_depth, min_child_weight in gridsearch_params:
    print("CV with max_depth={}, min_child_weight={}".format(
                             max_depth,
                             min_child_weight))
     # Update our parameters
    params['max_depth'] = max_depth
    params['min_child_weight'] = min_child_weight

     # Cross-validation
    cv_results = xgb.cv(params, dtrain, feval= custom_eval, num_boost_round=200,
        maximize=True,
        seed=16,
        nfold=5,
        early_stopping_rounds=10
    )     

    # Finding best F1 Score
    mean_f1 = cv_results['test-f1_score-mean'].max()
    
    boost_rounds = cv_results['test-f1_score-mean'].argmax()    
    print("\tF1 Score {} for {} rounds".format(mean_f1, boost_rounds))    
    if mean_f1 > max_f1:
        max_f1 = mean_f1
        best_params = (max_depth,min_child_weight) 

print("Best params: {}, {}, F1 Score: {}".format(best_params[0], best_params[1], max_f1))

Exemple #2
0
d_train = xgb.DMatrix(Xtrain_hashed, ytrain)
d_valid = xgb.DMatrix(Xtest_hashed, ytest)

# XGBoost parameters 1
params = {
    'n_estimators': 1024,
    'n_jobs': -1,
    'learning_rate': 1,
    'eval_metric': 'auc'
}

# Without early stopping, overfit model
bst_cv = xgb.cv(params,
                d_train,
                num_boost_round=10000,
                nfold=10,
                verbose_eval=100,
                early_stopping_rounds=None,
                as_pandas=True)

bst_cv.tail()

bst_cv[['train-auc-mean', 'test-auc-mean']].plot()

print(bst.eval(d_valid))

# With early stopping. Use CV to find the best number of trees

bst_cv = xgb.cv(
    params,
    d_train,