def test_cv_explicit_fold_indices_labels(self): params = {'max_depth': 2, 'eta': 1, 'verbosity': 0, 'objective': 'reg:squarederror'} N = 100 F = 3 dm = xgb.DMatrix(data=np.random.randn(N, F), label=np.arange(N)) folds = [ # Train Test ([1, 3], [5, 8]), ([7, 9], [23, 43, 11]), ] # Use callback to log the test labels in each fold def cb(cbackenv): print([fold.dtest.get_label() for fold in cbackenv.cvfolds]) # Run cross validation and capture standard out to test callback result with tm.captured_output() as (out, err): xgb.cv( params, dm, num_boost_round=1, folds=folds, callbacks=[cb], as_pandas=False ) output = out.getvalue().strip() solution = ('[array([5., 8.], dtype=float32), array([23., 43., 11.],' + ' dtype=float32)]') assert output == solution
def run_evaluation_monitor(self, D_train, D_valid, rounds, verbose_eval): evals_result = {} with tm.captured_output() as (out, err): xgb.train({ 'objective': 'binary:logistic', 'eval_metric': 'error' }, D_train, evals=[(D_train, 'Train'), (D_valid, 'Valid')], num_boost_round=rounds, evals_result=evals_result, verbose_eval=verbose_eval) output: str = out.getvalue().strip() if int(verbose_eval) == 1: # Should print each iteration info assert len(output.split('\n')) == rounds elif int(verbose_eval) > rounds: # Should print first and latest iteration info assert len(output.split('\n')) == 2 else: # Should print info by each period additionaly to first and latest iteration num_periods = rounds // int(verbose_eval) # Extra information is required for latest iteration is_extra_info_required = num_periods * int(verbose_eval) < ( rounds - 1) assert len(output.split( '\n')) == 1 + num_periods + int(is_extra_info_required)
def test_parameter_validation(): reg = xgb.XGBRegressor(foo='bar', verbosity=1) X = np.random.randn(10, 10) y = np.random.randn(10) with tm.captured_output() as (out, err): reg.fit(X, y) output = out.getvalue().strip() assert output.find('foo') != -1 reg = xgb.XGBRegressor(n_estimators=2, missing=3, importance_type='gain', verbosity=1) X = np.random.randn(10, 10) y = np.random.randn(10) with tm.captured_output() as (out, err): reg.fit(X, y) output = out.getvalue().strip() assert len(output) == 0
def test_training_on_cpu_only_env(self): assert os.environ['CUDA_VISIBLE_DEVICES'] == '-1' rng = np.random.RandomState(1994) X = rng.randn(10, 10) y = rng.randn(10) with tm.captured_output() as (out, err): # Test no thrust exception is thrown with pytest.raises(xgb.core.XGBoostError): xgb.train({'tree_method': 'gpu_hist'}, xgb.DMatrix(X, y)) assert out.getvalue().find('No visible GPU is found') != -1
def run_evaluation_monitor( self, D_train: xgb.DMatrix, D_valid: xgb.DMatrix, rounds: int, verbose_eval: Union[bool, int] ): def check_output(output: str) -> None: if int(verbose_eval) == 1: # Should print each iteration info assert len(output.split('\n')) == rounds elif int(verbose_eval) > rounds: # Should print first and latest iteration info assert len(output.split('\n')) == 2 else: # Should print info by each period additionaly to first and latest # iteration num_periods = rounds // int(verbose_eval) # Extra information is required for latest iteration is_extra_info_required = num_periods * int(verbose_eval) < (rounds - 1) assert len(output.split('\n')) == ( 1 + num_periods + int(is_extra_info_required) ) evals_result: xgb.callback.TrainingCallback.EvalsLog = {} params = {'objective': 'binary:logistic', 'eval_metric': 'error'} with tm.captured_output() as (out, err): xgb.train( params, D_train, evals=[(D_train, 'Train'), (D_valid, 'Valid')], num_boost_round=rounds, evals_result=evals_result, verbose_eval=verbose_eval, ) output: str = out.getvalue().strip() check_output(output) with tm.captured_output() as (out, err): xgb.cv(params, D_train, num_boost_round=rounds, verbose_eval=verbose_eval) output = out.getvalue().strip() check_output(output)
def run_evaluation_monitor(self, D_train, D_valid, rounds, verbose_eval): evals_result = {} with tm.captured_output() as (out, err): xgb.train({ 'objective': 'binary:logistic', 'eval_metric': 'error' }, D_train, evals=[(D_train, 'Train'), (D_valid, 'Valid')], num_boost_round=rounds, evals_result=evals_result, verbose_eval=verbose_eval) output: str = out.getvalue().strip() pos = 0 msg = 'Train-error' for i in range(rounds // int(verbose_eval)): pos = output.find('Train-error', pos) assert pos != -1 pos += len(msg) assert output.find('Train-error', pos) == -1
def test_evaluation_monitor(self): D_train = xgb.DMatrix(self.X_train, self.y_train) D_valid = xgb.DMatrix(self.X_valid, self.y_valid) evals_result = {} rounds = 10 xgb.train({ 'objective': 'binary:logistic', 'eval_metric': 'error' }, D_train, evals=[(D_train, 'Train'), (D_valid, 'Valid')], num_boost_round=rounds, evals_result=evals_result, verbose_eval=True) assert len(evals_result['Train']['error']) == rounds assert len(evals_result['Valid']['error']) == rounds with tm.captured_output() as (out, err): xgb.train({ 'objective': 'binary:logistic', 'eval_metric': 'error' }, D_train, evals=[(D_train, 'Train'), (D_valid, 'Valid')], num_boost_round=rounds, evals_result=evals_result, verbose_eval=2) output: str = out.getvalue().strip() pos = 0 msg = 'Train-error' for i in range(rounds // 2): pos = output.find('Train-error', pos) assert pos != -1 pos += len(msg) assert output.find('Train-error', pos) == -1
def test_evaluation_metric(): from sklearn.datasets import load_diabetes, load_digits from sklearn.metrics import mean_absolute_error X, y = load_diabetes(return_X_y=True) n_estimators = 16 with tm.captured_output() as (out, err): reg = xgb.XGBRegressor( tree_method="hist", eval_metric=mean_absolute_error, n_estimators=n_estimators, ) reg.fit(X, y, eval_set=[(X, y)]) lines = out.getvalue().strip().split('\n') assert len(lines) == n_estimators for line in lines: assert line.find("mean_absolute_error") != -1 def metric(predt: np.ndarray, Xy: xgb.DMatrix): y = Xy.get_label() return "m", np.abs(predt - y).sum() with pytest.warns(UserWarning): reg = xgb.XGBRegressor( tree_method="hist", n_estimators=1, ) reg.fit(X, y, eval_set=[(X, y)], eval_metric=metric) def merror(y_true: np.ndarray, predt: np.ndarray): n_samples = y_true.shape[0] assert n_samples == predt.size errors = np.zeros(y_true.shape[0]) errors[y != predt] = 1.0 return np.sum(errors) / n_samples X, y = load_digits(n_class=10, return_X_y=True) clf = xgb.XGBClassifier(use_label_encoder=False, tree_method="hist", eval_metric=merror, n_estimators=16, objective="multi:softmax") clf.fit(X, y, eval_set=[(X, y)]) custom = clf.evals_result() clf = xgb.XGBClassifier(use_label_encoder=False, tree_method="hist", eval_metric="merror", n_estimators=16, objective="multi:softmax") clf.fit(X, y, eval_set=[(X, y)]) internal = clf.evals_result() np.testing.assert_allclose(custom["validation_0"]["merror"], internal["validation_0"]["merror"], atol=1e-6) clf = xgb.XGBRFClassifier( use_label_encoder=False, tree_method="hist", n_estimators=16, objective=tm.softprob_obj(10), eval_metric=merror, ) with pytest.raises(AssertionError): # shape check inside the `merror` function clf.fit(X, y, eval_set=[(X, y)])