X_train, X_valid, y_train, y_valid = train_test_split(train_w2v, train["label"], test_size=0.3, random_state=0) """ Instantiting the xgboost classifier """ classifier = xgb(n_estimators=1000, max_depth=6) classifier.fit(X_train, y_train) # Getiing f1 scores y_pred = classifier.predict(X_valid) f1Score = f1_score(y_valid, y_pred) print(f1Score*100) """ Hence until now the best score we got was from xgboost """ """ Lets tune hyperparameters of xgboost """ import xgboost as xgb dtrain = xgb.DMatrix(X_train, label=y_train) dvalid = xgb.DMatrix(X_valid, label=y_valid) # Parameters that we are going to tune params = { 'objective':'binary:logistic', 'max_depth':6, 'min_child_weight': 1, 'eta':.3, 'subsample': 1, 'colsample_bytree': 1 } #We will prepare a custom evaluation metric to calculate F1 score. def custom_eval(preds, dtrain): labels = dtrain.get_label().astype(np.int)
plt.title('XGboost ROC Curve') plt.show() xgb = XGBClassifier(n_estimators=1000, n_jobs=-1, learning_rate=.5, min_child_weight=100, monotone_constraints='(0)') xgb.fit(Xtrain_hashed, ytrain) soft = xgb.predict_proba(Xtest_hashed) print("The ROC AUC is : " + str(roc_auc_score(ytest, soft[:, 1]))) print(str(classification_report(ytest, predictions))) """### Cross validation-best cv-With and without early stopping""" d_train = xgb.DMatrix(Xtrain_hashed, ytrain) d_valid = xgb.DMatrix(Xtest_hashed, ytest) # XGBoost parameters 1 params = { 'n_estimators': 1024, 'n_jobs': -1, 'learning_rate': 1, 'eval_metric': 'auc' } # Without early stopping, overfit model bst_cv = xgb.cv(params, d_train, num_boost_round=10000, nfold=10,