예제 #1
0
def test_multiclass_classification(objective):
    from sklearn.datasets import load_iris
    from sklearn.model_selection import KFold

    def check_pred(preds, labels, output_margin):
        if output_margin:
            err = sum(1 for i in range(len(preds))
                      if preds[i].argmax() != labels[i]) / float(len(preds))
        else:
            err = sum(1 for i in range(len(preds))
                      if preds[i] != labels[i]) / float(len(preds))
        assert err < 0.4

    iris = load_iris()
    y = iris['target']
    X = iris['data']
    kf = KFold(n_splits=2, shuffle=True, random_state=rng)
    for train_index, test_index in kf.split(X, y):
        xgb_model = xgb.XGBClassifier(objective=objective).fit(
            X[train_index], y[train_index])
        assert (xgb_model.get_booster().num_boosted_rounds() ==
                xgb_model.n_estimators)
        preds = xgb_model.predict(X[test_index])
        # test other params in XGBClassifier().fit
        preds2 = xgb_model.predict(X[test_index],
                                   output_margin=True,
                                   ntree_limit=3)
        preds3 = xgb_model.predict(X[test_index],
                                   output_margin=True,
                                   ntree_limit=0)
        preds4 = xgb_model.predict(X[test_index],
                                   output_margin=False,
                                   ntree_limit=3)
        labels = y[test_index]

        check_pred(preds, labels, output_margin=False)
        check_pred(preds2, labels, output_margin=True)
        check_pred(preds3, labels, output_margin=True)
        check_pred(preds4, labels, output_margin=False)

    cls = xgb.XGBClassifier(n_estimators=4).fit(X, y)
    assert cls.n_classes_ == 3
    proba = cls.predict_proba(X)
    assert proba.shape[0] == X.shape[0]
    assert proba.shape[1] == cls.n_classes_

    # custom objective, the default is multi:softprob so no transformation is required.
    cls = xgb.XGBClassifier(n_estimators=4,
                            objective=tm.softprob_obj(3)).fit(X, y)
    proba = cls.predict_proba(X)
    assert proba.shape[0] == X.shape[0]
    assert proba.shape[1] == cls.n_classes_
예제 #2
0
def test_evaluation_metric():
    from sklearn.datasets import load_diabetes, load_digits
    from sklearn.metrics import mean_absolute_error
    X, y = load_diabetes(return_X_y=True)
    n_estimators = 16

    with tm.captured_output() as (out, err):
        reg = xgb.XGBRegressor(
            tree_method="hist",
            eval_metric=mean_absolute_error,
            n_estimators=n_estimators,
        )
        reg.fit(X, y, eval_set=[(X, y)])
        lines = out.getvalue().strip().split('\n')

    assert len(lines) == n_estimators
    for line in lines:
        assert line.find("mean_absolute_error") != -1

    def metric(predt: np.ndarray, Xy: xgb.DMatrix):
        y = Xy.get_label()
        return "m", np.abs(predt - y).sum()

    with pytest.warns(UserWarning):
        reg = xgb.XGBRegressor(
            tree_method="hist",
            n_estimators=1,
        )
        reg.fit(X, y, eval_set=[(X, y)], eval_metric=metric)

    def merror(y_true: np.ndarray, predt: np.ndarray):
        n_samples = y_true.shape[0]
        assert n_samples == predt.size
        errors = np.zeros(y_true.shape[0])
        errors[y != predt] = 1.0
        return np.sum(errors) / n_samples

    X, y = load_digits(n_class=10, return_X_y=True)

    clf = xgb.XGBClassifier(use_label_encoder=False,
                            tree_method="hist",
                            eval_metric=merror,
                            n_estimators=16,
                            objective="multi:softmax")
    clf.fit(X, y, eval_set=[(X, y)])
    custom = clf.evals_result()

    clf = xgb.XGBClassifier(use_label_encoder=False,
                            tree_method="hist",
                            eval_metric="merror",
                            n_estimators=16,
                            objective="multi:softmax")
    clf.fit(X, y, eval_set=[(X, y)])
    internal = clf.evals_result()

    np.testing.assert_allclose(custom["validation_0"]["merror"],
                               internal["validation_0"]["merror"],
                               atol=1e-6)

    clf = xgb.XGBRFClassifier(
        use_label_encoder=False,
        tree_method="hist",
        n_estimators=16,
        objective=tm.softprob_obj(10),
        eval_metric=merror,
    )
    with pytest.raises(AssertionError):
        # shape check inside the `merror` function
        clf.fit(X, y, eval_set=[(X, y)])