def test_multiclass_classification(objective): from sklearn.datasets import load_iris from sklearn.model_selection import KFold def check_pred(preds, labels, output_margin): if output_margin: err = sum(1 for i in range(len(preds)) if preds[i].argmax() != labels[i]) / float(len(preds)) else: err = sum(1 for i in range(len(preds)) if preds[i] != labels[i]) / float(len(preds)) assert err < 0.4 iris = load_iris() y = iris['target'] X = iris['data'] kf = KFold(n_splits=2, shuffle=True, random_state=rng) for train_index, test_index in kf.split(X, y): xgb_model = xgb.XGBClassifier(objective=objective).fit( X[train_index], y[train_index]) assert (xgb_model.get_booster().num_boosted_rounds() == xgb_model.n_estimators) preds = xgb_model.predict(X[test_index]) # test other params in XGBClassifier().fit preds2 = xgb_model.predict(X[test_index], output_margin=True, ntree_limit=3) preds3 = xgb_model.predict(X[test_index], output_margin=True, ntree_limit=0) preds4 = xgb_model.predict(X[test_index], output_margin=False, ntree_limit=3) labels = y[test_index] check_pred(preds, labels, output_margin=False) check_pred(preds2, labels, output_margin=True) check_pred(preds3, labels, output_margin=True) check_pred(preds4, labels, output_margin=False) cls = xgb.XGBClassifier(n_estimators=4).fit(X, y) assert cls.n_classes_ == 3 proba = cls.predict_proba(X) assert proba.shape[0] == X.shape[0] assert proba.shape[1] == cls.n_classes_ # custom objective, the default is multi:softprob so no transformation is required. cls = xgb.XGBClassifier(n_estimators=4, objective=tm.softprob_obj(3)).fit(X, y) proba = cls.predict_proba(X) assert proba.shape[0] == X.shape[0] assert proba.shape[1] == cls.n_classes_
def test_evaluation_metric(): from sklearn.datasets import load_diabetes, load_digits from sklearn.metrics import mean_absolute_error X, y = load_diabetes(return_X_y=True) n_estimators = 16 with tm.captured_output() as (out, err): reg = xgb.XGBRegressor( tree_method="hist", eval_metric=mean_absolute_error, n_estimators=n_estimators, ) reg.fit(X, y, eval_set=[(X, y)]) lines = out.getvalue().strip().split('\n') assert len(lines) == n_estimators for line in lines: assert line.find("mean_absolute_error") != -1 def metric(predt: np.ndarray, Xy: xgb.DMatrix): y = Xy.get_label() return "m", np.abs(predt - y).sum() with pytest.warns(UserWarning): reg = xgb.XGBRegressor( tree_method="hist", n_estimators=1, ) reg.fit(X, y, eval_set=[(X, y)], eval_metric=metric) def merror(y_true: np.ndarray, predt: np.ndarray): n_samples = y_true.shape[0] assert n_samples == predt.size errors = np.zeros(y_true.shape[0]) errors[y != predt] = 1.0 return np.sum(errors) / n_samples X, y = load_digits(n_class=10, return_X_y=True) clf = xgb.XGBClassifier(use_label_encoder=False, tree_method="hist", eval_metric=merror, n_estimators=16, objective="multi:softmax") clf.fit(X, y, eval_set=[(X, y)]) custom = clf.evals_result() clf = xgb.XGBClassifier(use_label_encoder=False, tree_method="hist", eval_metric="merror", n_estimators=16, objective="multi:softmax") clf.fit(X, y, eval_set=[(X, y)]) internal = clf.evals_result() np.testing.assert_allclose(custom["validation_0"]["merror"], internal["validation_0"]["merror"], atol=1e-6) clf = xgb.XGBRFClassifier( use_label_encoder=False, tree_method="hist", n_estimators=16, objective=tm.softprob_obj(10), eval_metric=merror, ) with pytest.raises(AssertionError): # shape check inside the `merror` function clf.fit(X, y, eval_set=[(X, y)])