Ejemplo n.º 1
0
    def test_cv_explicit_fold_indices_labels(self):
        params = {'max_depth': 2, 'eta': 1, 'verbosity': 0, 'objective':
                  'reg:squarederror'}
        N = 100
        F = 3
        dm = xgb.DMatrix(data=np.random.randn(N, F), label=np.arange(N))
        folds = [
            # Train        Test
            ([1, 3], [5, 8]),
            ([7, 9], [23, 43, 11]),
        ]

        # Use callback to log the test labels in each fold
        def cb(cbackenv):
            print([fold.dtest.get_label() for fold in cbackenv.cvfolds])

        # Run cross validation and capture standard out to test callback result
        with tm.captured_output() as (out, err):
            xgb.cv(
                params, dm, num_boost_round=1, folds=folds, callbacks=[cb],
                as_pandas=False
            )
            output = out.getvalue().strip()
        solution = ('[array([5., 8.], dtype=float32), array([23., 43., 11.],' +
                    ' dtype=float32)]')
        assert output == solution
Ejemplo n.º 2
0
    def run_evaluation_monitor(self, D_train, D_valid, rounds, verbose_eval):
        evals_result = {}
        with tm.captured_output() as (out, err):
            xgb.train({
                'objective': 'binary:logistic',
                'eval_metric': 'error'
            },
                      D_train,
                      evals=[(D_train, 'Train'), (D_valid, 'Valid')],
                      num_boost_round=rounds,
                      evals_result=evals_result,
                      verbose_eval=verbose_eval)
            output: str = out.getvalue().strip()

        if int(verbose_eval) == 1:
            # Should print each iteration info
            assert len(output.split('\n')) == rounds
        elif int(verbose_eval) > rounds:
            # Should print first and latest iteration info
            assert len(output.split('\n')) == 2
        else:
            # Should print info by each period additionaly to first and latest iteration
            num_periods = rounds // int(verbose_eval)
            # Extra information is required for latest iteration
            is_extra_info_required = num_periods * int(verbose_eval) < (
                rounds - 1)
            assert len(output.split(
                '\n')) == 1 + num_periods + int(is_extra_info_required)
Ejemplo n.º 3
0
def test_parameter_validation():
    reg = xgb.XGBRegressor(foo='bar', verbosity=1)
    X = np.random.randn(10, 10)
    y = np.random.randn(10)
    with tm.captured_output() as (out, err):
        reg.fit(X, y)
        output = out.getvalue().strip()

    assert output.find('foo') != -1

    reg = xgb.XGBRegressor(n_estimators=2, missing=3,
                           importance_type='gain', verbosity=1)
    X = np.random.randn(10, 10)
    y = np.random.randn(10)
    with tm.captured_output() as (out, err):
        reg.fit(X, y)
        output = out.getvalue().strip()

    assert len(output) == 0
Ejemplo n.º 4
0
    def test_training_on_cpu_only_env(self):
        assert os.environ['CUDA_VISIBLE_DEVICES'] == '-1'
        rng = np.random.RandomState(1994)
        X = rng.randn(10, 10)
        y = rng.randn(10)
        with tm.captured_output() as (out, err):
            # Test no thrust exception is thrown
            with pytest.raises(xgb.core.XGBoostError):
                xgb.train({'tree_method': 'gpu_hist'}, xgb.DMatrix(X, y))

            assert out.getvalue().find('No visible GPU is found') != -1
Ejemplo n.º 5
0
    def run_evaluation_monitor(
        self,
        D_train: xgb.DMatrix,
        D_valid: xgb.DMatrix,
        rounds: int,
        verbose_eval: Union[bool, int]
    ):
        def check_output(output: str) -> None:
            if int(verbose_eval) == 1:
                # Should print each iteration info
                assert len(output.split('\n')) == rounds
            elif int(verbose_eval) > rounds:
                # Should print first and latest iteration info
                assert len(output.split('\n')) == 2
            else:
                # Should print info by each period additionaly to first and latest
                # iteration
                num_periods = rounds // int(verbose_eval)
                # Extra information is required for latest iteration
                is_extra_info_required = num_periods * int(verbose_eval) < (rounds - 1)
                assert len(output.split('\n')) == (
                    1 + num_periods + int(is_extra_info_required)
                )

        evals_result: xgb.callback.TrainingCallback.EvalsLog = {}
        params = {'objective': 'binary:logistic', 'eval_metric': 'error'}
        with tm.captured_output() as (out, err):
            xgb.train(
                params, D_train,
                evals=[(D_train, 'Train'), (D_valid, 'Valid')],
                num_boost_round=rounds,
                evals_result=evals_result,
                verbose_eval=verbose_eval,
            )
            output: str = out.getvalue().strip()
            check_output(output)

        with tm.captured_output() as (out, err):
            xgb.cv(params, D_train, num_boost_round=rounds, verbose_eval=verbose_eval)
            output = out.getvalue().strip()
            check_output(output)
Ejemplo n.º 6
0
    def run_evaluation_monitor(self, D_train, D_valid, rounds, verbose_eval):
        evals_result = {}
        with tm.captured_output() as (out, err):
            xgb.train({
                'objective': 'binary:logistic',
                'eval_metric': 'error'
            },
                      D_train,
                      evals=[(D_train, 'Train'), (D_valid, 'Valid')],
                      num_boost_round=rounds,
                      evals_result=evals_result,
                      verbose_eval=verbose_eval)
            output: str = out.getvalue().strip()

        pos = 0
        msg = 'Train-error'
        for i in range(rounds // int(verbose_eval)):
            pos = output.find('Train-error', pos)
            assert pos != -1
            pos += len(msg)

        assert output.find('Train-error', pos) == -1
Ejemplo n.º 7
0
    def test_evaluation_monitor(self):
        D_train = xgb.DMatrix(self.X_train, self.y_train)
        D_valid = xgb.DMatrix(self.X_valid, self.y_valid)
        evals_result = {}
        rounds = 10
        xgb.train({
            'objective': 'binary:logistic',
            'eval_metric': 'error'
        },
                  D_train,
                  evals=[(D_train, 'Train'), (D_valid, 'Valid')],
                  num_boost_round=rounds,
                  evals_result=evals_result,
                  verbose_eval=True)
        assert len(evals_result['Train']['error']) == rounds
        assert len(evals_result['Valid']['error']) == rounds

        with tm.captured_output() as (out, err):
            xgb.train({
                'objective': 'binary:logistic',
                'eval_metric': 'error'
            },
                      D_train,
                      evals=[(D_train, 'Train'), (D_valid, 'Valid')],
                      num_boost_round=rounds,
                      evals_result=evals_result,
                      verbose_eval=2)
            output: str = out.getvalue().strip()

        pos = 0
        msg = 'Train-error'
        for i in range(rounds // 2):
            pos = output.find('Train-error', pos)
            assert pos != -1
            pos += len(msg)

        assert output.find('Train-error', pos) == -1
Ejemplo n.º 8
0
def test_evaluation_metric():
    from sklearn.datasets import load_diabetes, load_digits
    from sklearn.metrics import mean_absolute_error
    X, y = load_diabetes(return_X_y=True)
    n_estimators = 16

    with tm.captured_output() as (out, err):
        reg = xgb.XGBRegressor(
            tree_method="hist",
            eval_metric=mean_absolute_error,
            n_estimators=n_estimators,
        )
        reg.fit(X, y, eval_set=[(X, y)])
        lines = out.getvalue().strip().split('\n')

    assert len(lines) == n_estimators
    for line in lines:
        assert line.find("mean_absolute_error") != -1

    def metric(predt: np.ndarray, Xy: xgb.DMatrix):
        y = Xy.get_label()
        return "m", np.abs(predt - y).sum()

    with pytest.warns(UserWarning):
        reg = xgb.XGBRegressor(
            tree_method="hist",
            n_estimators=1,
        )
        reg.fit(X, y, eval_set=[(X, y)], eval_metric=metric)

    def merror(y_true: np.ndarray, predt: np.ndarray):
        n_samples = y_true.shape[0]
        assert n_samples == predt.size
        errors = np.zeros(y_true.shape[0])
        errors[y != predt] = 1.0
        return np.sum(errors) / n_samples

    X, y = load_digits(n_class=10, return_X_y=True)

    clf = xgb.XGBClassifier(use_label_encoder=False,
                            tree_method="hist",
                            eval_metric=merror,
                            n_estimators=16,
                            objective="multi:softmax")
    clf.fit(X, y, eval_set=[(X, y)])
    custom = clf.evals_result()

    clf = xgb.XGBClassifier(use_label_encoder=False,
                            tree_method="hist",
                            eval_metric="merror",
                            n_estimators=16,
                            objective="multi:softmax")
    clf.fit(X, y, eval_set=[(X, y)])
    internal = clf.evals_result()

    np.testing.assert_allclose(custom["validation_0"]["merror"],
                               internal["validation_0"]["merror"],
                               atol=1e-6)

    clf = xgb.XGBRFClassifier(
        use_label_encoder=False,
        tree_method="hist",
        n_estimators=16,
        objective=tm.softprob_obj(10),
        eval_metric=merror,
    )
    with pytest.raises(AssertionError):
        # shape check inside the `merror` function
        clf.fit(X, y, eval_set=[(X, y)])