X_train, X_valid, y_train, y_valid = train_test_split(train_w2v, train["label"], test_size=0.3, random_state=0)

""" Instantiting the  xgboost classifier """
classifier = xgb(n_estimators=1000, max_depth=6)
classifier.fit(X_train, y_train)
# Getiing f1 scores
y_pred = classifier.predict(X_valid)

f1Score = f1_score(y_valid, y_pred)
print(f1Score*100)

""" Hence until now the best score we got was from xgboost """

""" Lets tune hyperparameters of  xgboost """
import xgboost as xgb
dtrain = xgb.DMatrix(X_train, label=y_train) 
dvalid = xgb.DMatrix(X_valid, label=y_valid)
# Parameters that we are going to tune 
params = {
    'objective':'binary:logistic',
    'max_depth':6,
    'min_child_weight': 1,
    'eta':.3,
    'subsample': 1,
    'colsample_bytree': 1
 }

#We will prepare a custom evaluation metric to calculate F1 score.

def custom_eval(preds, dtrain):
    labels = dtrain.get_label().astype(np.int)
Exemple #2
0
plt.title('XGboost ROC Curve')
plt.show()

xgb = XGBClassifier(n_estimators=1000,
                    n_jobs=-1,
                    learning_rate=.5,
                    min_child_weight=100,
                    monotone_constraints='(0)')
xgb.fit(Xtrain_hashed, ytrain)
soft = xgb.predict_proba(Xtest_hashed)
print("The ROC AUC is : " + str(roc_auc_score(ytest, soft[:, 1])))

print(str(classification_report(ytest, predictions)))
"""### Cross validation-best cv-With and without early stopping"""

d_train = xgb.DMatrix(Xtrain_hashed, ytrain)
d_valid = xgb.DMatrix(Xtest_hashed, ytest)

# XGBoost parameters 1
params = {
    'n_estimators': 1024,
    'n_jobs': -1,
    'learning_rate': 1,
    'eval_metric': 'auc'
}

# Without early stopping, overfit model
bst_cv = xgb.cv(params,
                d_train,
                num_boost_round=10000,
                nfold=10,