class TestFromColumnar: '''Tests for constructing DMatrix from data structure conforming Apache Arrow specification.''' @pytest.mark.skipif(**tm.no_cudf()) def test_from_cudf(self): '''Test constructing DMatrix from cudf''' import cudf import pandas as pd kRows = 80 kCols = 2 na = np.random.randn(kRows, kCols).astype(np.float32) na[3, 1] = np.NAN na[5, 0] = np.NAN pa = pd.DataFrame(na) np_label = np.random.randn(kRows).astype(np.float32) pa_label = pd.DataFrame(np_label) names = [] for i in range(0, kCols): names.append(str(i)) pa.columns = names cd: cudf.DataFrame = cudf.from_pandas(pa) cd_label: cudf.DataFrame = cudf.from_pandas(pa_label) dtrain = xgb.DMatrix(cd, label=cd_label) assert dtrain.num_col() == kCols assert dtrain.num_row() == kRows
class TestFromColumnar: '''Tests for constructing DMatrix from data structure conforming Apache Arrow specification.''' @pytest.mark.skipif(**tm.no_cudf()) def test_from_cudf(self): '''Test constructing DMatrix from cudf''' import cudf dmatrix_from_cudf(np.float32, np.NAN) dmatrix_from_cudf(np.float64, np.NAN) dmatrix_from_cudf(np.uint8, 2) dmatrix_from_cudf(np.uint32, 3) dmatrix_from_cudf(np.uint64, 4) dmatrix_from_cudf(np.int8, 2) dmatrix_from_cudf(np.int32, -2) dmatrix_from_cudf(np.int64, -3) cd = cudf.DataFrame({'x': [1, 2, 3], 'y': [0.1, 0.2, 0.3]}) dtrain = xgb.DMatrix(cd) assert dtrain.feature_names == ['x', 'y'] assert dtrain.feature_types == ['int', 'float'] series = cudf.DataFrame({'x': [1, 2, 3]}).iloc[:, 0] assert isinstance(series, cudf.Series) dtrain = xgb.DMatrix(series) assert dtrain.feature_names == ['x'] assert dtrain.feature_types == ['int'] with pytest.raises(Exception): dtrain = xgb.DMatrix(cd, label=cd)
class TestFromColumnar: '''Tests for constructing DMatrix from data structure conforming Apache Arrow specification.''' @pytest.mark.skipif(**tm.no_cudf()) def test_simple_dmatrix_from_cudf(self): _test_from_cudf(xgb.DMatrix) @pytest.mark.skipif(**tm.no_cudf()) def test_device_dmatrix_from_cudf(self): _test_from_cudf(xgb.DeviceQuantileDMatrix) @pytest.mark.skipif(**tm.no_cudf()) def test_cudf_training_simple_dmatrix(self): _test_cudf_training(xgb.DMatrix) @pytest.mark.skipif(**tm.no_cudf()) def test_cudf_training_device_dmatrix(self): _test_cudf_training(xgb.DeviceQuantileDMatrix) @pytest.mark.skipif(**tm.no_cudf()) def test_cudf_metainfo_simple_dmatrix(self): _test_cudf_metainfo(xgb.DMatrix) @pytest.mark.skipif(**tm.no_cudf()) def test_cudf_metainfo_device_dmatrix(self): _test_cudf_metainfo(xgb.DeviceQuantileDMatrix)
class TestFromColumnar: '''Tests for constructing DMatrix from data structure conforming Apache Arrow specification.''' @pytest.mark.skipif(**tm.no_cudf()) def test_from_cudf(self): '''Test constructing DMatrix from cudf''' import cudf dmatrix_from_cudf(np.float32, np.NAN) dmatrix_from_cudf(np.float64, np.NAN) dmatrix_from_cudf(np.uint8, 2) dmatrix_from_cudf(np.uint32, 3) dmatrix_from_cudf(np.uint64, 4) dmatrix_from_cudf(np.int8, 2) dmatrix_from_cudf(np.int32, -2) dmatrix_from_cudf(np.int64, -3) cd = cudf.DataFrame({'x': [1, 2, 3], 'y': [0.1, 0.2, 0.3]}) dtrain = xgb.DMatrix(cd) assert dtrain.feature_names == ['x', 'y'] assert dtrain.feature_types == ['int', 'float'] series = cudf.DataFrame({'x': [1, 2, 3]}).iloc[:, 0] assert isinstance(series, cudf.Series) dtrain = xgb.DMatrix(series) assert dtrain.feature_names == ['x'] assert dtrain.feature_types == ['int'] with pytest.raises(Exception): dtrain = xgb.DMatrix(cd, label=cd) # Test when number of elements is less than 8 X = cudf.DataFrame({'x': cudf.Series([0, 1, 2, np.NAN, 4], dtype=np.int32)}) dtrain = xgb.DMatrix(X) assert dtrain.num_col() == 1 assert dtrain.num_row() == 5 # Boolean is not supported. X_boolean = cudf.DataFrame({'x': cudf.Series([True, False])}) with pytest.raises(Exception): dtrain = xgb.DMatrix(X_boolean) y_boolean = cudf.DataFrame({ 'x': cudf.Series([True, False, True, True, True])}) with pytest.raises(Exception): dtrain = xgb.DMatrix(X_boolean, label=y_boolean)
class TestFromColumnar: '''Tests for constructing DMatrix from data structure conforming Apache Arrow specification.''' @pytest.mark.skipif(**tm.no_cudf()) def test_from_cudf(self): '''Test constructing DMatrix from cudf''' dmatrix_from_cudf(np.float32, np.NAN) dmatrix_from_cudf(np.float64, np.NAN) dmatrix_from_cudf(np.uint8, 2) dmatrix_from_cudf(np.uint32, 3) dmatrix_from_cudf(np.uint64, 4) dmatrix_from_cudf(np.int8, 2) dmatrix_from_cudf(np.int32, -2) dmatrix_from_cudf(np.int64, -3)
class TestFromColumnar: '''Tests for constructing DMatrix from data structure conforming Apache Arrow specification.''' @pytest.mark.skipif(**tm.no_cudf()) def test_simple_dmatrix_from_cudf(self): _test_from_cudf(xgb.DMatrix) @pytest.mark.skipif(**tm.no_cudf()) def test_device_dmatrix_from_cudf(self): _test_from_cudf(xgb.DeviceQuantileDMatrix) @pytest.mark.skipif(**tm.no_cudf()) def test_cudf_training_simple_dmatrix(self): _test_cudf_training(xgb.DMatrix) @pytest.mark.skipif(**tm.no_cudf()) def test_cudf_training_device_dmatrix(self): _test_cudf_training(xgb.DeviceQuantileDMatrix) @pytest.mark.skipif(**tm.no_cudf()) def test_cudf_metainfo_simple_dmatrix(self): _test_cudf_metainfo(xgb.DMatrix) @pytest.mark.skipif(**tm.no_cudf()) def test_cudf_metainfo_device_dmatrix(self): _test_cudf_metainfo(xgb.DeviceQuantileDMatrix) @pytest.mark.skipif(**tm.no_cudf()) def test_categorical(self): import cudf _X, _y = tm.make_categorical(100, 30, 17, False) X = cudf.from_pandas(_X) y = cudf.from_pandas(_y) Xy = xgb.DMatrix(X, y, enable_categorical=True) assert len(Xy.feature_types) == X.shape[1] assert all(t == "categorical" for t in Xy.feature_types) Xy = xgb.DeviceQuantileDMatrix(X, y, enable_categorical=True) assert len(Xy.feature_types) == X.shape[1] assert all(t == "categorical" for t in Xy.feature_types)
class TestDistributedGPU(unittest.TestCase): @pytest.mark.skipif(**tm.no_dask()) @pytest.mark.skipif(**tm.no_cudf()) @pytest.mark.skipif(**tm.no_dask_cudf()) @pytest.mark.skipif(**tm.no_dask_cuda()) def test_dask_dataframe(self): with LocalCUDACluster() as cluster: with Client(cluster) as client: X, y = generate_array() X = dd.from_dask_array(X) y = dd.from_dask_array(y) X = X.map_partitions(cudf.from_pandas) y = y.map_partitions(cudf.from_pandas) dtrain = dxgb.DaskDMatrix(client, X, y) out = dxgb.train(client, {'tree_method': 'gpu_hist'}, dtrain=dtrain, evals=[(dtrain, 'X')], num_boost_round=2) assert isinstance(out['booster'], dxgb.Booster) assert len(out['history']['X']['rmse']) == 2 predictions = dxgb.predict(client, out, dtrain).compute() assert isinstance(predictions, np.ndarray) @pytest.mark.skipif(**tm.no_dask()) @pytest.mark.skipif(**tm.no_dask_cuda()) @pytest.mark.mgpu def test_empty_dmatrix(self): with LocalCUDACluster() as cluster: with Client(cluster) as client: parameters = {'tree_method': 'gpu_hist'} run_empty_dmatrix(client, parameters)
class TestDistributedGPU: @pytest.mark.skipif(**tm.no_dask()) @pytest.mark.skipif(**tm.no_cudf()) @pytest.mark.skipif(**tm.no_dask_cudf()) @pytest.mark.skipif(**tm.no_dask_cuda()) @pytest.mark.mgpu def test_dask_dataframe(self, local_cuda_cluster: LocalCUDACluster) -> None: with Client(local_cuda_cluster) as client: run_with_dask_dataframe(dxgb.DaskDMatrix, client) run_with_dask_dataframe(dxgb.DaskDeviceQuantileDMatrix, client) @given( params=parameter_strategy, num_rounds=strategies.integers(1, 20), dataset=tm.dataset_strategy, ) @settings(deadline=duration(seconds=120), suppress_health_check=suppress) @pytest.mark.skipif(**tm.no_dask()) @pytest.mark.skipif(**tm.no_dask_cuda()) @pytest.mark.skipif(**tm.no_cupy()) @pytest.mark.parametrize( "local_cuda_cluster", [{"n_workers": 2}], indirect=["local_cuda_cluster"] ) @pytest.mark.mgpu def test_gpu_hist( self, params: Dict, num_rounds: int, dataset: tm.TestDataset, local_cuda_cluster: LocalCUDACluster, ) -> None: with Client(local_cuda_cluster) as client: run_gpu_hist(params, num_rounds, dataset, dxgb.DaskDMatrix, client) run_gpu_hist( params, num_rounds, dataset, dxgb.DaskDeviceQuantileDMatrix, client ) @pytest.mark.skipif(**tm.no_cupy()) @pytest.mark.skipif(**tm.no_dask()) @pytest.mark.skipif(**tm.no_dask_cuda()) @pytest.mark.mgpu def test_dask_array(self, local_cuda_cluster: LocalCUDACluster) -> None: with Client(local_cuda_cluster) as client: run_with_dask_array(dxgb.DaskDMatrix, client) run_with_dask_array(dxgb.DaskDeviceQuantileDMatrix, client) @pytest.mark.skipif(**tm.no_cupy()) @pytest.mark.skipif(**tm.no_dask()) @pytest.mark.skipif(**tm.no_dask_cuda()) def test_early_stopping(self, local_cuda_cluster: LocalCUDACluster) -> None: from sklearn.datasets import load_breast_cancer with Client(local_cuda_cluster) as client: X, y = load_breast_cancer(return_X_y=True) X, y = da.from_array(X), da.from_array(y) m = dxgb.DaskDMatrix(client, X, y) valid = dxgb.DaskDMatrix(client, X, y) early_stopping_rounds = 5 booster = dxgb.train(client, {'objective': 'binary:logistic', 'eval_metric': 'error', 'tree_method': 'gpu_hist'}, m, evals=[(valid, 'Valid')], num_boost_round=1000, early_stopping_rounds=early_stopping_rounds)[ 'booster'] assert hasattr(booster, 'best_score') dump = booster.get_dump(dump_format='json') assert len(dump) - booster.best_iteration == early_stopping_rounds + 1 valid_X = X valid_y = y cls = dxgb.DaskXGBClassifier(objective='binary:logistic', tree_method='gpu_hist', n_estimators=100) cls.client = client cls.fit(X, y, early_stopping_rounds=early_stopping_rounds, eval_set=[(valid_X, valid_y)]) booster = cls.get_booster() dump = booster.get_dump(dump_format='json') assert len(dump) - booster.best_iteration == early_stopping_rounds + 1 @pytest.mark.skipif(**tm.no_cudf()) @pytest.mark.skipif(**tm.no_dask()) @pytest.mark.skipif(**tm.no_dask_cuda()) @pytest.mark.parametrize("model", ["boosting"]) def test_dask_classifier( self, model: str, local_cuda_cluster: LocalCUDACluster ) -> None: import dask_cudf with Client(local_cuda_cluster) as client: X_, y_, w_ = generate_array(with_weights=True) y_ = (y_ * 10).astype(np.int32) X = dask_cudf.from_dask_dataframe(dd.from_dask_array(X_)) y = dask_cudf.from_dask_dataframe(dd.from_dask_array(y_)) w = dask_cudf.from_dask_dataframe(dd.from_dask_array(w_)) run_dask_classifier(X, y, w, model, client, 10) @pytest.mark.skipif(**tm.no_dask()) @pytest.mark.skipif(**tm.no_dask_cuda()) @pytest.mark.mgpu def test_empty_dmatrix(self, local_cuda_cluster: LocalCUDACluster) -> None: with Client(local_cuda_cluster) as client: parameters = {'tree_method': 'gpu_hist', 'debug_synchronize': True} run_empty_dmatrix_reg(client, parameters) run_empty_dmatrix_cls(client, parameters) def test_empty_dmatrix_auc(self, local_cuda_cluster: LocalCUDACluster) -> None: with Client(local_cuda_cluster) as client: n_workers = len(_get_client_workers(client)) run_empty_dmatrix_auc(client, "gpu_hist", n_workers) def test_auc(self, local_cuda_cluster: LocalCUDACluster) -> None: with Client(local_cuda_cluster) as client: run_auc(client, "gpu_hist") def test_data_initialization(self, local_cuda_cluster: LocalCUDACluster) -> None: with Client(local_cuda_cluster) as client: X, y, _ = generate_array() fw = da.random.random((random_cols, )) fw = fw - fw.min() m = dxgb.DaskDMatrix(client, X, y, feature_weights=fw) workers = _get_client_workers(client) rabit_args = client.sync(dxgb._get_rabit_args, len(workers), client) def worker_fn(worker_addr: str, data_ref: Dict) -> None: with dxgb.RabitContext(rabit_args): local_dtrain = dxgb._dmatrix_from_list_of_parts(**data_ref) fw_rows = local_dtrain.get_float_info("feature_weights").shape[0] assert fw_rows == local_dtrain.num_col() futures = [] for i in range(len(workers)): futures.append( client.submit( worker_fn, workers[i], m._create_fn_args(workers[i]), pure=False, workers=[workers[i]] ) ) client.gather(futures) def test_interface_consistency(self) -> None: sig = OrderedDict(signature(dxgb.DaskDMatrix).parameters) del sig["client"] ddm_names = list(sig.keys()) sig = OrderedDict(signature(dxgb.DaskDeviceQuantileDMatrix).parameters) del sig["client"] del sig["max_bin"] ddqdm_names = list(sig.keys()) assert len(ddm_names) == len(ddqdm_names) # between dask for i in range(len(ddm_names)): assert ddm_names[i] == ddqdm_names[i] sig = OrderedDict(signature(xgb.DMatrix).parameters) del sig["nthread"] # no nthread in dask dm_names = list(sig.keys()) sig = OrderedDict(signature(xgb.DeviceQuantileDMatrix).parameters) del sig["nthread"] del sig["max_bin"] dqdm_names = list(sig.keys()) # between single node assert len(dm_names) == len(dqdm_names) for i in range(len(dm_names)): assert dm_names[i] == dqdm_names[i] # ddm <-> dm for i in range(len(ddm_names)): assert ddm_names[i] == dm_names[i] # dqdm <-> ddqdm for i in range(len(ddqdm_names)): assert ddqdm_names[i] == dqdm_names[i] sig = OrderedDict(signature(xgb.XGBRanker.fit).parameters) ranker_names = list(sig.keys()) sig = OrderedDict(signature(xgb.dask.DaskXGBRanker.fit).parameters) dranker_names = list(sig.keys()) for rn, drn in zip(ranker_names, dranker_names): assert rn == drn def run_quantile(self, name: str, local_cuda_cluster: LocalCUDACluster) -> None: if sys.platform.startswith("win"): pytest.skip("Skipping dask tests on Windows") exe = None for possible_path in {'./testxgboost', './build/testxgboost', '../build/testxgboost', '../gpu-build/testxgboost'}: if os.path.exists(possible_path): exe = possible_path assert exe, 'No testxgboost executable found.' test = "--gtest_filter=GPUQuantile." + name def runit( worker_addr: str, rabit_args: List[bytes] ) -> subprocess.CompletedProcess: port_env = '' # setup environment for running the c++ part. for arg in rabit_args: if arg.decode('utf-8').startswith('DMLC_TRACKER_PORT'): port_env = arg.decode('utf-8') port = port_env.split('=') env = os.environ.copy() env[port[0]] = port[1] return subprocess.run([str(exe), test], env=env, stdout=subprocess.PIPE) with Client(local_cuda_cluster) as client: workers = _get_client_workers(client) rabit_args = client.sync(dxgb._get_rabit_args, workers, client) futures = client.map(runit, workers, pure=False, workers=workers, rabit_args=rabit_args) results = client.gather(futures) for ret in results: msg = ret.stdout.decode('utf-8') assert msg.find('1 test from GPUQuantile') != -1, msg assert ret.returncode == 0, msg @pytest.mark.skipif(**tm.no_dask()) @pytest.mark.skipif(**tm.no_dask_cuda()) @pytest.mark.mgpu @pytest.mark.gtest def test_quantile_basic(self, local_cuda_cluster: LocalCUDACluster) -> None: self.run_quantile('AllReduceBasic', local_cuda_cluster) @pytest.mark.skipif(**tm.no_dask()) @pytest.mark.skipif(**tm.no_dask_cuda()) @pytest.mark.mgpu @pytest.mark.gtest def test_quantile_same_on_all_workers( self, local_cuda_cluster: LocalCUDACluster ) -> None: self.run_quantile('SameOnAllWorkers', local_cuda_cluster)
class TestDistributedGPU(unittest.TestCase): @pytest.mark.skipif(**tm.no_dask()) @pytest.mark.skipif(**tm.no_cudf()) @pytest.mark.skipif(**tm.no_dask_cudf()) @pytest.mark.skipif(**tm.no_dask_cuda()) def test_dask_dataframe(self): with LocalCUDACluster() as cluster: with Client(cluster) as client: X, y = generate_array() X = dd.from_dask_array(X) y = dd.from_dask_array(y) X = X.map_partitions(cudf.from_pandas) y = y.map_partitions(cudf.from_pandas) dtrain = dxgb.DaskDMatrix(client, X, y) out = dxgb.train(client, {'tree_method': 'gpu_hist'}, dtrain=dtrain, evals=[(dtrain, 'X')], num_boost_round=2) assert isinstance(out['booster'], dxgb.Booster) assert len(out['history']['X']['rmse']) == 2 predictions = dxgb.predict(client, out, dtrain).compute() assert isinstance(predictions, np.ndarray) @pytest.mark.skipif(**tm.no_dask()) @pytest.mark.skipif(**tm.no_dask_cuda()) @pytest.mark.mgpu def test_empty_dmatrix(self): def _check_outputs(out, predictions): assert isinstance(out['booster'], dxgb.Booster) assert len(out['history']['validation']['rmse']) == 2 assert isinstance(predictions, np.ndarray) assert predictions.shape[0] == 1 parameters = { 'tree_method': 'gpu_hist', 'verbosity': 3, 'debug_synchronize': True } with LocalCUDACluster() as cluster: with Client(cluster) as client: kRows, kCols = 1, 97 X = dd.from_array(np.random.randn(kRows, kCols)) y = dd.from_array(np.random.rand(kRows)) dtrain = dxgb.DaskDMatrix(client, X, y) out = dxgb.train(client, parameters, dtrain=dtrain, evals=[(dtrain, 'validation')], num_boost_round=2) predictions = dxgb.predict(client=client, model=out, data=dtrain).compute() _check_outputs(out, predictions) # train has more rows than evals valid = dtrain kRows += 1 X = dd.from_array(np.random.randn(kRows, kCols)) y = dd.from_array(np.random.rand(kRows)) dtrain = dxgb.DaskDMatrix(client, X, y) out = dxgb.train(client, parameters, dtrain=dtrain, evals=[(valid, 'validation')], num_boost_round=2) predictions = dxgb.predict(client=client, model=out, data=valid).compute() _check_outputs(out, predictions)
class TestGPUPredict(unittest.TestCase): def test_predict(self): iterations = 10 np.random.seed(1) test_num_rows = [10, 1000, 5000] test_num_cols = [10, 50, 500] # This test passes for tree_method=gpu_hist and tree_method=exact. but # for `hist` and `approx` the floating point error accumulates faster # and fails even tol is set to 1e-4. For `hist`, the mismatching rate # with 5000 rows is 0.04. for num_rows in test_num_rows: for num_cols in test_num_cols: dtrain = xgb.DMatrix(np.random.randn(num_rows, num_cols), label=[0, 1] * int(num_rows / 2)) dval = xgb.DMatrix(np.random.randn(num_rows, num_cols), label=[0, 1] * int(num_rows / 2)) dtest = xgb.DMatrix(np.random.randn(num_rows, num_cols), label=[0, 1] * int(num_rows / 2)) watchlist = [(dtrain, 'train'), (dval, 'validation')] res = {} param = { "objective": "binary:logistic", "predictor": "gpu_predictor", 'eval_metric': 'logloss', 'tree_method': 'gpu_hist', 'max_depth': 1 } bst = xgb.train(param, dtrain, iterations, evals=watchlist, evals_result=res) assert self.non_increasing(res["train"]["logloss"]) gpu_pred_train = bst.predict(dtrain, output_margin=True) gpu_pred_test = bst.predict(dtest, output_margin=True) gpu_pred_val = bst.predict(dval, output_margin=True) param["predictor"] = "cpu_predictor" bst_cpu = xgb.train(param, dtrain, iterations, evals=watchlist) cpu_pred_train = bst_cpu.predict(dtrain, output_margin=True) cpu_pred_test = bst_cpu.predict(dtest, output_margin=True) cpu_pred_val = bst_cpu.predict(dval, output_margin=True) np.testing.assert_allclose(cpu_pred_train, gpu_pred_train, rtol=1e-6) np.testing.assert_allclose(cpu_pred_val, gpu_pred_val, rtol=1e-6) np.testing.assert_allclose(cpu_pred_test, gpu_pred_test, rtol=1e-6) def non_increasing(self, L): return all((y - x) < 0.001 for x, y in zip(L, L[1:])) # Test case for a bug where multiple batch predictions made on a # test set produce incorrect results @pytest.mark.skipif(**tm.no_sklearn()) def test_multi_predict(self): from sklearn.datasets import make_regression from sklearn.model_selection import train_test_split n = 1000 X, y = make_regression(n, random_state=rng) X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=123) dtrain = xgb.DMatrix(X_train, label=y_train) dtest = xgb.DMatrix(X_test) params = {} params["tree_method"] = "gpu_hist" params['predictor'] = "gpu_predictor" bst_gpu_predict = xgb.train(params, dtrain) params['predictor'] = "cpu_predictor" bst_cpu_predict = xgb.train(params, dtrain) predict0 = bst_gpu_predict.predict(dtest) predict1 = bst_gpu_predict.predict(dtest) cpu_predict = bst_cpu_predict.predict(dtest) assert np.allclose(predict0, predict1) assert np.allclose(predict0, cpu_predict) @pytest.mark.skipif(**tm.no_sklearn()) def test_sklearn(self): m, n = 15000, 14 tr_size = 2500 X = np.random.rand(m, n) y = 200 * np.matmul(X, np.arange(-3, -3 + n)) X_train, y_train = X[:tr_size, :], y[:tr_size] X_test, y_test = X[tr_size:, :], y[tr_size:] # First with cpu_predictor params = {'tree_method': 'gpu_hist', 'predictor': 'cpu_predictor', 'n_jobs': -1, 'seed': 123} m = xgb.XGBRegressor(**params).fit(X_train, y_train) cpu_train_score = m.score(X_train, y_train) cpu_test_score = m.score(X_test, y_test) # Now with gpu_predictor params['predictor'] = 'gpu_predictor' m = xgb.XGBRegressor(**params).fit(X_train, y_train) gpu_train_score = m.score(X_train, y_train) gpu_test_score = m.score(X_test, y_test) assert np.allclose(cpu_train_score, gpu_train_score) assert np.allclose(cpu_test_score, gpu_test_score) @pytest.mark.skipif(**tm.no_cupy()) def test_inplace_predict_cupy(self): import cupy as cp cp.cuda.runtime.setDevice(0) rows = 1000 cols = 10 cp_rng = cp.random.RandomState(1994) cp.random.set_random_state(cp_rng) X = cp.random.randn(rows, cols) y = cp.random.randn(rows) dtrain = xgb.DMatrix(X, y) booster = xgb.train({'tree_method': 'gpu_hist'}, dtrain, num_boost_round=10) test = xgb.DMatrix(X[:10, ...]) predt_from_array = booster.inplace_predict(X[:10, ...]) predt_from_dmatrix = booster.predict(test) cp.testing.assert_allclose(predt_from_array, predt_from_dmatrix) def predict_dense(x): inplace_predt = booster.inplace_predict(x) d = xgb.DMatrix(x) copied_predt = cp.array(booster.predict(d)) return cp.all(copied_predt == inplace_predt) for i in range(10): run_threaded_predict(X, rows, predict_dense) @pytest.mark.skipif(**tm.no_cudf()) def test_inplace_predict_cudf(self): import cupy as cp import cudf import pandas as pd rows = 1000 cols = 10 rng = np.random.RandomState(1994) X = rng.randn(rows, cols) X = pd.DataFrame(X) y = rng.randn(rows) X = cudf.from_pandas(X) dtrain = xgb.DMatrix(X, y) booster = xgb.train({'tree_method': 'gpu_hist'}, dtrain, num_boost_round=10) test = xgb.DMatrix(X) predt_from_array = booster.inplace_predict(X) predt_from_dmatrix = booster.predict(test) cp.testing.assert_allclose(predt_from_array, predt_from_dmatrix) def predict_df(x): inplace_predt = booster.inplace_predict(x) d = xgb.DMatrix(x) copied_predt = cp.array(booster.predict(d)) return cp.all(copied_predt == inplace_predt) for i in range(10): run_threaded_predict(X, rows, predict_df)
class TestGPUPredict: def test_predict(self): iterations = 10 np.random.seed(1) test_num_rows = [10, 1000, 5000] test_num_cols = [10, 50, 500] # This test passes for tree_method=gpu_hist and tree_method=exact. but # for `hist` and `approx` the floating point error accumulates faster # and fails even tol is set to 1e-4. For `hist`, the mismatching rate # with 5000 rows is 0.04. for num_rows in test_num_rows: for num_cols in test_num_cols: dtrain = xgb.DMatrix(np.random.randn(num_rows, num_cols), label=[0, 1] * int(num_rows / 2)) dval = xgb.DMatrix(np.random.randn(num_rows, num_cols), label=[0, 1] * int(num_rows / 2)) dtest = xgb.DMatrix(np.random.randn(num_rows, num_cols), label=[0, 1] * int(num_rows / 2)) watchlist = [(dtrain, 'train'), (dval, 'validation')] res = {} param = { "objective": "binary:logistic", "predictor": "gpu_predictor", 'eval_metric': 'logloss', 'tree_method': 'gpu_hist', 'max_depth': 1 } bst = xgb.train(param, dtrain, iterations, evals=watchlist, evals_result=res) assert self.non_increasing(res["train"]["logloss"]) gpu_pred_train = bst.predict(dtrain, output_margin=True) gpu_pred_test = bst.predict(dtest, output_margin=True) gpu_pred_val = bst.predict(dval, output_margin=True) param["predictor"] = "cpu_predictor" bst_cpu = xgb.train(param, dtrain, iterations, evals=watchlist) cpu_pred_train = bst_cpu.predict(dtrain, output_margin=True) cpu_pred_test = bst_cpu.predict(dtest, output_margin=True) cpu_pred_val = bst_cpu.predict(dval, output_margin=True) np.testing.assert_allclose(cpu_pred_train, gpu_pred_train, rtol=1e-6) np.testing.assert_allclose(cpu_pred_val, gpu_pred_val, rtol=1e-6) np.testing.assert_allclose(cpu_pred_test, gpu_pred_test, rtol=1e-6) def non_increasing(self, L): return all((y - x) < 0.001 for x, y in zip(L, L[1:])) # Test case for a bug where multiple batch predictions made on a # test set produce incorrect results @pytest.mark.skipif(**tm.no_sklearn()) def test_multi_predict(self): from sklearn.datasets import make_regression from sklearn.model_selection import train_test_split n = 1000 X, y = make_regression(n, random_state=rng) X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=123) dtrain = xgb.DMatrix(X_train, label=y_train) dtest = xgb.DMatrix(X_test) params = {} params["tree_method"] = "gpu_hist" params['predictor'] = "gpu_predictor" bst_gpu_predict = xgb.train(params, dtrain) params['predictor'] = "cpu_predictor" bst_cpu_predict = xgb.train(params, dtrain) predict0 = bst_gpu_predict.predict(dtest) predict1 = bst_gpu_predict.predict(dtest) cpu_predict = bst_cpu_predict.predict(dtest) assert np.allclose(predict0, predict1) assert np.allclose(predict0, cpu_predict) @pytest.mark.skipif(**tm.no_sklearn()) def test_sklearn(self): m, n = 15000, 14 tr_size = 2500 X = np.random.rand(m, n) y = 200 * np.matmul(X, np.arange(-3, -3 + n)) X_train, y_train = X[:tr_size, :], y[:tr_size] X_test, y_test = X[tr_size:, :], y[tr_size:] # First with cpu_predictor params = { 'tree_method': 'gpu_hist', 'predictor': 'cpu_predictor', 'n_jobs': -1, 'seed': 123 } m = xgb.XGBRegressor(**params).fit(X_train, y_train) cpu_train_score = m.score(X_train, y_train) cpu_test_score = m.score(X_test, y_test) # Now with gpu_predictor params['predictor'] = 'gpu_predictor' m = xgb.XGBRegressor(**params).fit(X_train, y_train) gpu_train_score = m.score(X_train, y_train) gpu_test_score = m.score(X_test, y_test) assert np.allclose(cpu_train_score, gpu_train_score) assert np.allclose(cpu_test_score, gpu_test_score) def run_inplace_base_margin(self, booster, dtrain, X, base_margin): import cupy as cp dtrain.set_info(base_margin=base_margin) from_inplace = booster.inplace_predict(data=X, base_margin=base_margin) from_dmatrix = booster.predict(dtrain) cp.testing.assert_allclose(from_inplace, from_dmatrix) @pytest.mark.skipif(**tm.no_cupy()) def test_inplace_predict_cupy(self): import cupy as cp cp.cuda.runtime.setDevice(0) rows = 1000 cols = 10 missing = 11 # set to integer for testing cp_rng = cp.random.RandomState(1994) cp.random.set_random_state(cp_rng) X = cp.random.randn(rows, cols) missing_idx = [i for i in range(0, cols, 4)] X[:, missing_idx] = missing # set to be missing y = cp.random.randn(rows) dtrain = xgb.DMatrix(X, y) booster = xgb.train({'tree_method': 'gpu_hist'}, dtrain, num_boost_round=10) test = xgb.DMatrix(X[:10, ...], missing=missing) predt_from_array = booster.inplace_predict(X[:10, ...], missing=missing) predt_from_dmatrix = booster.predict(test) cp.testing.assert_allclose(predt_from_array, predt_from_dmatrix) def predict_dense(x): inplace_predt = booster.inplace_predict(x) d = xgb.DMatrix(x) copied_predt = cp.array(booster.predict(d)) return cp.all(copied_predt == inplace_predt) # Don't do this on Windows, see issue #5793 if sys.platform.startswith("win"): pytest.skip( 'Multi-threaded in-place prediction with cuPy is not working on Windows' ) for i in range(10): run_threaded_predict(X, rows, predict_dense) base_margin = cp_rng.randn(rows) self.run_inplace_base_margin(booster, dtrain, X, base_margin) # Create a wide dataset X = cp_rng.randn(100, 10000) y = cp_rng.randn(100) missing_idx = [i for i in range(0, X.shape[1], 16)] X[:, missing_idx] = missing reg = xgb.XGBRegressor(tree_method="gpu_hist", n_estimators=8, missing=missing) reg.fit(X, y) gpu_predt = reg.predict(X) reg.set_params(predictor="cpu_predictor") cpu_predt = reg.predict(X) np.testing.assert_allclose(gpu_predt, cpu_predt, atol=1e-6) @pytest.mark.skipif(**tm.no_cupy()) @pytest.mark.skipif(**tm.no_cudf()) def test_inplace_predict_cudf(self): import cupy as cp import cudf import pandas as pd rows = 1000 cols = 10 rng = np.random.RandomState(1994) cp.cuda.runtime.setDevice(0) X = rng.randn(rows, cols) X = pd.DataFrame(X) y = rng.randn(rows) X = cudf.from_pandas(X) dtrain = xgb.DMatrix(X, y) booster = xgb.train({'tree_method': 'gpu_hist'}, dtrain, num_boost_round=10) test = xgb.DMatrix(X) predt_from_array = booster.inplace_predict(X) predt_from_dmatrix = booster.predict(test) cp.testing.assert_allclose(predt_from_array, predt_from_dmatrix) def predict_df(x): # column major array inplace_predt = booster.inplace_predict(x.values) d = xgb.DMatrix(x) copied_predt = cp.array(booster.predict(d)) assert cp.all(copied_predt == inplace_predt) inplace_predt = booster.inplace_predict(x) return cp.all(copied_predt == inplace_predt) for i in range(10): run_threaded_predict(X, rows, predict_df) base_margin = cudf.Series(rng.randn(rows)) self.run_inplace_base_margin(booster, dtrain, X, base_margin) @given(strategies.integers(1, 10), tm.dataset_strategy, shap_parameter_strategy) @settings(deadline=None, print_blob=True) def test_shap(self, num_rounds, dataset, param): if dataset.name.endswith( "-l1"): # not supported by the exact tree method return param.update({"predictor": "gpu_predictor", "gpu_id": 0}) param = dataset.set_params(param) dmat = dataset.get_dmat() bst = xgb.train(param, dmat, num_rounds) test_dmat = xgb.DMatrix(dataset.X, dataset.y, dataset.w, dataset.margin) shap = bst.predict(test_dmat, pred_contribs=True) margin = bst.predict(test_dmat, output_margin=True) assume(len(dataset.y) > 0) assert np.allclose(np.sum(shap, axis=len(shap.shape) - 1), margin, 1e-3, 1e-3) @given(strategies.integers(1, 10), tm.dataset_strategy, shap_parameter_strategy) @settings(deadline=None, max_examples=20, print_blob=True) def test_shap_interactions(self, num_rounds, dataset, param): if dataset.name.endswith( "-l1"): # not supported by the exact tree method return param.update({"predictor": "gpu_predictor", "gpu_id": 0}) param = dataset.set_params(param) dmat = dataset.get_dmat() bst = xgb.train(param, dmat, num_rounds) test_dmat = xgb.DMatrix(dataset.X, dataset.y, dataset.w, dataset.margin) shap = bst.predict(test_dmat, pred_interactions=True) margin = bst.predict(test_dmat, output_margin=True) assume(len(dataset.y) > 0) assert np.allclose( np.sum(shap, axis=(len(shap.shape) - 1, len(shap.shape) - 2)), margin, 1e-3, 1e-3) def test_shap_categorical(self): X, y = tm.make_categorical(100, 20, 7, False) Xy = xgb.DMatrix(X, y, enable_categorical=True) booster = xgb.train({"tree_method": "gpu_hist"}, Xy, num_boost_round=10) booster.set_param({"predictor": "gpu_predictor"}) shap = booster.predict(Xy, pred_contribs=True) margin = booster.predict(Xy, output_margin=True) np.testing.assert_allclose(np.sum(shap, axis=len(shap.shape) - 1), margin, rtol=1e-3) booster.set_param({"predictor": "cpu_predictor"}) shap = booster.predict(Xy, pred_contribs=True) margin = booster.predict(Xy, output_margin=True) np.testing.assert_allclose(np.sum(shap, axis=len(shap.shape) - 1), margin, rtol=1e-3) def test_predict_leaf_basic(self): gpu_leaf = run_predict_leaf('gpu_predictor') cpu_leaf = run_predict_leaf('cpu_predictor') np.testing.assert_equal(gpu_leaf, cpu_leaf) def run_predict_leaf_booster(self, param, num_rounds, dataset): param = dataset.set_params(param) m = dataset.get_dmat() booster = xgb.train(param, dtrain=dataset.get_dmat(), num_boost_round=num_rounds) booster.set_param({'predictor': 'cpu_predictor'}) cpu_leaf = booster.predict(m, pred_leaf=True) booster.set_param({'predictor': 'gpu_predictor'}) gpu_leaf = booster.predict(m, pred_leaf=True) np.testing.assert_equal(cpu_leaf, gpu_leaf) @given(predict_parameter_strategy, tm.dataset_strategy) @settings(deadline=None, print_blob=True) def test_predict_leaf_gbtree(self, param, dataset): param['booster'] = 'gbtree' param['tree_method'] = 'gpu_hist' self.run_predict_leaf_booster(param, 10, dataset) @given(predict_parameter_strategy, tm.dataset_strategy) @settings(deadline=None, print_blob=True) def test_predict_leaf_dart(self, param, dataset): param['booster'] = 'dart' param['tree_method'] = 'gpu_hist' self.run_predict_leaf_booster(param, 10, dataset) @pytest.mark.skipif(**tm.no_sklearn()) @pytest.mark.skipif(**tm.no_pandas()) @given(df=data_frames([ column('x0', elements=strategies.integers(min_value=0, max_value=3)), column('x1', elements=strategies.integers(min_value=0, max_value=5)) ], index=range_indexes(min_size=20, max_size=50))) @settings(deadline=None, print_blob=True) def test_predict_categorical_split(self, df): from sklearn.metrics import mean_squared_error df = df.astype('category') x0, x1 = df['x0'].to_numpy(), df['x1'].to_numpy() y = (x0 * 10 - 20) + (x1 - 2) dtrain = xgb.DMatrix(df, label=y, enable_categorical=True) params = { 'tree_method': 'gpu_hist', 'predictor': 'gpu_predictor', 'max_depth': 3, 'learning_rate': 1.0, 'base_score': 0.0, 'eval_metric': 'rmse' } eval_history = {} bst = xgb.train(params, dtrain, num_boost_round=5, evals=[(dtrain, 'train')], verbose_eval=False, evals_result=eval_history) pred = bst.predict(dtrain) rmse = mean_squared_error(y_true=y, y_pred=pred, squared=False) np.testing.assert_almost_equal(rmse, eval_history['train']['rmse'][-1], decimal=5) @pytest.mark.skipif(**tm.no_cupy()) @pytest.mark.parametrize("n_classes", [2, 3]) def test_predict_dart(self, n_classes): from sklearn.datasets import make_classification import cupy as cp n_samples = 1000 X_, y_ = make_classification(n_samples=n_samples, n_informative=5, n_classes=n_classes) X, y = cp.array(X_), cp.array(y_) Xy = xgb.DMatrix(X, y) if n_classes == 2: params = { "tree_method": "gpu_hist", "booster": "dart", "rate_drop": 0.5, "objective": "binary:logistic" } else: params = { "tree_method": "gpu_hist", "booster": "dart", "rate_drop": 0.5, "objective": "multi:softprob", "num_class": n_classes } booster = xgb.train(params, Xy, num_boost_round=32) # predictor=auto inplace = booster.inplace_predict(X) copied = booster.predict(Xy) cpu_inplace = booster.inplace_predict(X_) booster.set_param({"predictor": "cpu_predictor"}) cpu_copied = booster.predict(Xy) copied = cp.array(copied) cp.testing.assert_allclose(cpu_inplace, copied, atol=1e-6) cp.testing.assert_allclose(cpu_copied, copied, atol=1e-6) cp.testing.assert_allclose(inplace, copied, atol=1e-6) booster.set_param({"predictor": "gpu_predictor"}) inplace = booster.inplace_predict(X) copied = booster.predict(Xy) copied = cp.array(copied) cp.testing.assert_allclose(inplace, copied, atol=1e-6) @pytest.mark.skipif(**tm.no_cupy()) def test_dtypes(self): import cupy as cp rows = 1000 cols = 10 rng = cp.random.RandomState(1994) orig = rng.randint(low=0, high=127, size=rows * cols).reshape(rows, cols) y = rng.randint(low=0, high=127, size=rows) dtrain = xgb.DMatrix(orig, label=y) booster = xgb.train({"tree_method": "gpu_hist"}, dtrain) predt_orig = booster.inplace_predict(orig) # all primitive types in numpy for dtype in [ cp.signedinteger, cp.byte, cp.short, cp.intc, cp.int_, cp.longlong, cp.unsignedinteger, cp.ubyte, cp.ushort, cp.uintc, cp.uint, cp.ulonglong, cp.floating, cp.half, cp.single, cp.double, ]: X = cp.array(orig, dtype=dtype) predt = booster.inplace_predict(X) cp.testing.assert_allclose(predt, predt_orig) # boolean orig = cp.random.binomial(1, 0.5, size=rows * cols).reshape(rows, cols) predt_orig = booster.inplace_predict(orig) for dtype in [cp.bool8, cp.bool_]: X = cp.array(orig, dtype=dtype) predt = booster.inplace_predict(X) cp.testing.assert_allclose(predt, predt_orig) # unsupported types for dtype in [ cp.complex64, cp.complex128, ]: X = cp.array(orig, dtype=dtype) with pytest.raises(ValueError): booster.inplace_predict(X)
class TestGPUPredict: def test_predict(self): iterations = 10 np.random.seed(1) test_num_rows = [10, 1000, 5000] test_num_cols = [10, 50, 500] # This test passes for tree_method=gpu_hist and tree_method=exact. but # for `hist` and `approx` the floating point error accumulates faster # and fails even tol is set to 1e-4. For `hist`, the mismatching rate # with 5000 rows is 0.04. for num_rows in test_num_rows: for num_cols in test_num_cols: dtrain = xgb.DMatrix(np.random.randn(num_rows, num_cols), label=[0, 1] * int(num_rows / 2)) dval = xgb.DMatrix(np.random.randn(num_rows, num_cols), label=[0, 1] * int(num_rows / 2)) dtest = xgb.DMatrix(np.random.randn(num_rows, num_cols), label=[0, 1] * int(num_rows / 2)) watchlist = [(dtrain, 'train'), (dval, 'validation')] res = {} param = { "objective": "binary:logistic", "predictor": "gpu_predictor", 'eval_metric': 'logloss', 'tree_method': 'gpu_hist', 'max_depth': 1 } bst = xgb.train(param, dtrain, iterations, evals=watchlist, evals_result=res) assert self.non_increasing(res["train"]["logloss"]) gpu_pred_train = bst.predict(dtrain, output_margin=True) gpu_pred_test = bst.predict(dtest, output_margin=True) gpu_pred_val = bst.predict(dval, output_margin=True) param["predictor"] = "cpu_predictor" bst_cpu = xgb.train(param, dtrain, iterations, evals=watchlist) cpu_pred_train = bst_cpu.predict(dtrain, output_margin=True) cpu_pred_test = bst_cpu.predict(dtest, output_margin=True) cpu_pred_val = bst_cpu.predict(dval, output_margin=True) np.testing.assert_allclose(cpu_pred_train, gpu_pred_train, rtol=1e-6) np.testing.assert_allclose(cpu_pred_val, gpu_pred_val, rtol=1e-6) np.testing.assert_allclose(cpu_pred_test, gpu_pred_test, rtol=1e-6) def non_increasing(self, L): return all((y - x) < 0.001 for x, y in zip(L, L[1:])) # Test case for a bug where multiple batch predictions made on a # test set produce incorrect results @pytest.mark.skipif(**tm.no_sklearn()) def test_multi_predict(self): from sklearn.datasets import make_regression from sklearn.model_selection import train_test_split n = 1000 X, y = make_regression(n, random_state=rng) X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=123) dtrain = xgb.DMatrix(X_train, label=y_train) dtest = xgb.DMatrix(X_test) params = {} params["tree_method"] = "gpu_hist" params['predictor'] = "gpu_predictor" bst_gpu_predict = xgb.train(params, dtrain) params['predictor'] = "cpu_predictor" bst_cpu_predict = xgb.train(params, dtrain) predict0 = bst_gpu_predict.predict(dtest) predict1 = bst_gpu_predict.predict(dtest) cpu_predict = bst_cpu_predict.predict(dtest) assert np.allclose(predict0, predict1) assert np.allclose(predict0, cpu_predict) @pytest.mark.skipif(**tm.no_sklearn()) def test_sklearn(self): m, n = 15000, 14 tr_size = 2500 X = np.random.rand(m, n) y = 200 * np.matmul(X, np.arange(-3, -3 + n)) X_train, y_train = X[:tr_size, :], y[:tr_size] X_test, y_test = X[tr_size:, :], y[tr_size:] # First with cpu_predictor params = { 'tree_method': 'gpu_hist', 'predictor': 'cpu_predictor', 'n_jobs': -1, 'seed': 123 } m = xgb.XGBRegressor(**params).fit(X_train, y_train) cpu_train_score = m.score(X_train, y_train) cpu_test_score = m.score(X_test, y_test) # Now with gpu_predictor params['predictor'] = 'gpu_predictor' m = xgb.XGBRegressor(**params).fit(X_train, y_train) gpu_train_score = m.score(X_train, y_train) gpu_test_score = m.score(X_test, y_test) assert np.allclose(cpu_train_score, gpu_train_score) assert np.allclose(cpu_test_score, gpu_test_score) @pytest.mark.skipif(**tm.no_cupy()) def test_inplace_predict_cupy(self): import cupy as cp cp.cuda.runtime.setDevice(0) rows = 1000 cols = 10 cp_rng = cp.random.RandomState(1994) cp.random.set_random_state(cp_rng) X = cp.random.randn(rows, cols) y = cp.random.randn(rows) dtrain = xgb.DMatrix(X, y) booster = xgb.train({'tree_method': 'gpu_hist'}, dtrain, num_boost_round=10) test = xgb.DMatrix(X[:10, ...]) predt_from_array = booster.inplace_predict(X[:10, ...]) predt_from_dmatrix = booster.predict(test) cp.testing.assert_allclose(predt_from_array, predt_from_dmatrix) def predict_dense(x): inplace_predt = booster.inplace_predict(x) d = xgb.DMatrix(x) copied_predt = cp.array(booster.predict(d)) return cp.all(copied_predt == inplace_predt) # Don't do this on Windows, see issue #5793 if sys.platform.startswith("win"): pytest.skip( 'Multi-threaded in-place prediction with cuPy is not working on Windows' ) for i in range(10): run_threaded_predict(X, rows, predict_dense) @pytest.mark.skipif(**tm.no_cudf()) def test_inplace_predict_cudf(self): import cupy as cp import cudf import pandas as pd rows = 1000 cols = 10 rng = np.random.RandomState(1994) cp.cuda.runtime.setDevice(0) X = rng.randn(rows, cols) X = pd.DataFrame(X) y = rng.randn(rows) X = cudf.from_pandas(X) dtrain = xgb.DMatrix(X, y) booster = xgb.train({'tree_method': 'gpu_hist'}, dtrain, num_boost_round=10) test = xgb.DMatrix(X) predt_from_array = booster.inplace_predict(X) predt_from_dmatrix = booster.predict(test) cp.testing.assert_allclose(predt_from_array, predt_from_dmatrix) def predict_df(x): inplace_predt = booster.inplace_predict(x) d = xgb.DMatrix(x) copied_predt = cp.array(booster.predict(d)) return cp.all(copied_predt == inplace_predt) for i in range(10): run_threaded_predict(X, rows, predict_df) @given(strategies.integers(1, 10), tm.dataset_strategy, shap_parameter_strategy) @settings(deadline=None) def test_shap(self, num_rounds, dataset, param): param.update({"predictor": "gpu_predictor", "gpu_id": 0}) param = dataset.set_params(param) dmat = dataset.get_dmat() bst = xgb.train(param, dmat, num_rounds) test_dmat = xgb.DMatrix(dataset.X, dataset.y, dataset.w, dataset.margin) shap = bst.predict(test_dmat, pred_contribs=True) margin = bst.predict(test_dmat, output_margin=True) assume(len(dataset.y) > 0) assert np.allclose(np.sum(shap, axis=len(shap.shape) - 1), margin, 1e-3, 1e-3) @given(strategies.integers(1, 10), tm.dataset_strategy, shap_parameter_strategy) @settings(deadline=None, max_examples=20) def test_shap_interactions(self, num_rounds, dataset, param): param.update({"predictor": "gpu_predictor", "gpu_id": 0}) param = dataset.set_params(param) dmat = dataset.get_dmat() bst = xgb.train(param, dmat, num_rounds) test_dmat = xgb.DMatrix(dataset.X, dataset.y, dataset.w, dataset.margin) shap = bst.predict(test_dmat, pred_interactions=True) margin = bst.predict(test_dmat, output_margin=True) assume(len(dataset.y) > 0) assert np.allclose( np.sum(shap, axis=(len(shap.shape) - 1, len(shap.shape) - 2)), margin, 1e-3, 1e-3) def test_predict_leaf_basic(self): gpu_leaf = run_predict_leaf('gpu_predictor') cpu_leaf = run_predict_leaf('cpu_predictor') np.testing.assert_equal(gpu_leaf, cpu_leaf) def run_predict_leaf_booster(self, param, num_rounds, dataset): param = dataset.set_params(param) m = dataset.get_dmat() booster = xgb.train(param, dtrain=dataset.get_dmat(), num_boost_round=num_rounds) booster.set_param({'predictor': 'cpu_predictor'}) cpu_leaf = booster.predict(m, pred_leaf=True) booster.set_param({'predictor': 'gpu_predictor'}) gpu_leaf = booster.predict(m, pred_leaf=True) np.testing.assert_equal(cpu_leaf, gpu_leaf) @given(predict_parameter_strategy, tm.dataset_strategy) @settings(deadline=None) def test_predict_leaf_gbtree(self, param, dataset): param['booster'] = 'gbtree' param['tree_method'] = 'gpu_hist' self.run_predict_leaf_booster(param, 10, dataset) @given(predict_parameter_strategy, tm.dataset_strategy) @settings(deadline=None) def test_predict_leaf_dart(self, param, dataset): param['booster'] = 'dart' param['tree_method'] = 'gpu_hist' self.run_predict_leaf_booster(param, 10, dataset) @pytest.mark.skipif(**tm.no_sklearn()) @pytest.mark.skipif(**tm.no_pandas()) @given(df=data_frames([ column('x0', elements=strategies.integers(min_value=0, max_value=3)), column('x1', elements=strategies.integers(min_value=0, max_value=5)) ], index=range_indexes(min_size=20, max_size=50))) @settings(deadline=None) def test_predict_categorical_split(self, df): from sklearn.metrics import mean_squared_error df = df.astype('category') x0, x1 = df['x0'].to_numpy(), df['x1'].to_numpy() y = (x0 * 10 - 20) + (x1 - 2) dtrain = xgb.DMatrix(df, label=y, enable_categorical=True) params = { 'tree_method': 'gpu_hist', 'predictor': 'gpu_predictor', 'max_depth': 3, 'learning_rate': 1.0, 'base_score': 0.0, 'eval_metric': 'rmse' } eval_history = {} bst = xgb.train(params, dtrain, num_boost_round=5, evals=[(dtrain, 'train')], verbose_eval=False, evals_result=eval_history) pred = bst.predict(dtrain) rmse = mean_squared_error(y_true=y, y_pred=pred, squared=False) np.testing.assert_almost_equal(rmse, eval_history['train']['rmse'][-1], decimal=5)
class TestGPUPredict(unittest.TestCase): def test_predict(self): iterations = 10 np.random.seed(1) test_num_rows = [10, 1000, 5000] test_num_cols = [10, 50, 500] # This test passes for tree_method=gpu_hist and tree_method=exact. but # for `hist` and `approx` the floating point error accumulates faster # and fails even tol is set to 1e-4. For `hist`, the mismatching rate # with 5000 rows is 0.04. for num_rows in test_num_rows: for num_cols in test_num_cols: dtrain = xgb.DMatrix(np.random.randn(num_rows, num_cols), label=[0, 1] * int(num_rows / 2)) dval = xgb.DMatrix(np.random.randn(num_rows, num_cols), label=[0, 1] * int(num_rows / 2)) dtest = xgb.DMatrix(np.random.randn(num_rows, num_cols), label=[0, 1] * int(num_rows / 2)) watchlist = [(dtrain, 'train'), (dval, 'validation')] res = {} param = { "objective": "binary:logistic", "predictor": "gpu_predictor", 'eval_metric': 'logloss', 'tree_method': 'gpu_hist', 'max_depth': 1 } bst = xgb.train(param, dtrain, iterations, evals=watchlist, evals_result=res) assert self.non_increasing(res["train"]["logloss"]) gpu_pred_train = bst.predict(dtrain, output_margin=True) gpu_pred_test = bst.predict(dtest, output_margin=True) gpu_pred_val = bst.predict(dval, output_margin=True) param["predictor"] = "cpu_predictor" bst_cpu = xgb.train(param, dtrain, iterations, evals=watchlist) cpu_pred_train = bst_cpu.predict(dtrain, output_margin=True) cpu_pred_test = bst_cpu.predict(dtest, output_margin=True) cpu_pred_val = bst_cpu.predict(dval, output_margin=True) np.testing.assert_allclose(cpu_pred_train, gpu_pred_train, rtol=1e-6) np.testing.assert_allclose(cpu_pred_val, gpu_pred_val, rtol=1e-6) np.testing.assert_allclose(cpu_pred_test, gpu_pred_test, rtol=1e-6) def non_increasing(self, L): return all((y - x) < 0.001 for x, y in zip(L, L[1:])) # Test case for a bug where multiple batch predictions made on a # test set produce incorrect results @pytest.mark.skipif(**tm.no_sklearn()) def test_multi_predict(self): from sklearn.datasets import make_regression from sklearn.model_selection import train_test_split n = 1000 X, y = make_regression(n, random_state=rng) X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=123) dtrain = xgb.DMatrix(X_train, label=y_train) dtest = xgb.DMatrix(X_test) params = {} params["tree_method"] = "gpu_hist" params['predictor'] = "gpu_predictor" bst_gpu_predict = xgb.train(params, dtrain) params['predictor'] = "cpu_predictor" bst_cpu_predict = xgb.train(params, dtrain) predict0 = bst_gpu_predict.predict(dtest) predict1 = bst_gpu_predict.predict(dtest) cpu_predict = bst_cpu_predict.predict(dtest) assert np.allclose(predict0, predict1) assert np.allclose(predict0, cpu_predict) @pytest.mark.skipif(**tm.no_sklearn()) def test_sklearn(self): m, n = 15000, 14 tr_size = 2500 X = np.random.rand(m, n) y = 200 * np.matmul(X, np.arange(-3, -3 + n)) X_train, y_train = X[:tr_size, :], y[:tr_size] X_test, y_test = X[tr_size:, :], y[tr_size:] # First with cpu_predictor params = { 'tree_method': 'gpu_hist', 'predictor': 'cpu_predictor', 'n_jobs': -1, 'seed': 123 } m = xgb.XGBRegressor(**params).fit(X_train, y_train) cpu_train_score = m.score(X_train, y_train) cpu_test_score = m.score(X_test, y_test) # Now with gpu_predictor params['predictor'] = 'gpu_predictor' m = xgb.XGBRegressor(**params).fit(X_train, y_train) gpu_train_score = m.score(X_train, y_train) gpu_test_score = m.score(X_test, y_test) assert np.allclose(cpu_train_score, gpu_train_score) assert np.allclose(cpu_test_score, gpu_test_score) @pytest.mark.skipif(**tm.no_cupy()) def test_inplace_predict_cupy(self): import cupy as cp cp.cuda.runtime.setDevice(0) rows = 1000 cols = 10 cp_rng = cp.random.RandomState(1994) cp.random.set_random_state(cp_rng) X = cp.random.randn(rows, cols) y = cp.random.randn(rows) dtrain = xgb.DMatrix(X, y) booster = xgb.train({'tree_method': 'gpu_hist'}, dtrain, num_boost_round=10) test = xgb.DMatrix(X[:10, ...]) predt_from_array = booster.inplace_predict(X[:10, ...]) predt_from_dmatrix = booster.predict(test) cp.testing.assert_allclose(predt_from_array, predt_from_dmatrix) def predict_dense(x): inplace_predt = booster.inplace_predict(x) d = xgb.DMatrix(x) copied_predt = cp.array(booster.predict(d)) return cp.all(copied_predt == inplace_predt) # Don't do this on Windows, see issue #5793 if sys.platform.startswith("win"): pytest.skip( 'Multi-threaded in-place prediction with cuPy is not working on Windows' ) for i in range(10): run_threaded_predict(X, rows, predict_dense) @pytest.mark.skipif(**tm.no_cudf()) def test_inplace_predict_cudf(self): import cupy as cp import cudf import pandas as pd rows = 1000 cols = 10 rng = np.random.RandomState(1994) cp.cuda.runtime.setDevice(0) X = rng.randn(rows, cols) X = pd.DataFrame(X) y = rng.randn(rows) X = cudf.from_pandas(X) dtrain = xgb.DMatrix(X, y) booster = xgb.train({'tree_method': 'gpu_hist'}, dtrain, num_boost_round=10) test = xgb.DMatrix(X) predt_from_array = booster.inplace_predict(X) predt_from_dmatrix = booster.predict(test) cp.testing.assert_allclose(predt_from_array, predt_from_dmatrix) def predict_df(x): inplace_predt = booster.inplace_predict(x) d = xgb.DMatrix(x) copied_predt = cp.array(booster.predict(d)) return cp.all(copied_predt == inplace_predt) for i in range(10): run_threaded_predict(X, rows, predict_df) @given(strategies.integers(1, 200), tm.dataset_strategy, shap_parameter_strategy, strategies.booleans()) @settings(deadline=None) def test_shap(self, num_rounds, dataset, param, all_rows): if param['max_depth'] == 0 and param['max_leaves'] == 0: return param.update({"predictor": "gpu_predictor", "gpu_id": 0}) param = dataset.set_params(param) dmat = dataset.get_dmat() bst = xgb.train(param, dmat, num_rounds) if all_rows: test_dmat = xgb.DMatrix(dataset.X, dataset.y, dataset.w, dataset.margin) else: test_dmat = xgb.DMatrix(dataset.X[0:1, :]) shap = bst.predict(test_dmat, pred_contribs=True) bst.set_param({"predictor": "cpu_predictor"}) cpu_shap = bst.predict(test_dmat, pred_contribs=True) margin = bst.predict(test_dmat, output_margin=True) assert np.allclose(shap, cpu_shap, 1e-3, 1e-3) # feature contributions should add up to predictions assume(len(dataset.y) > 0) assert np.allclose(np.sum(shap, axis=len(shap.shape) - 1), margin, 1e-3, 1e-3)
class TestDistributedGPU(unittest.TestCase): @pytest.mark.skipif(**tm.no_dask()) @pytest.mark.skipif(**tm.no_cudf()) @pytest.mark.skipif(**tm.no_dask_cudf()) @pytest.mark.skipif(**tm.no_dask_cuda()) @pytest.mark.mgpu def test_dask_dataframe(self): with LocalCUDACluster() as cluster: with Client(cluster) as client: import cupy as cp cp.cuda.runtime.setDevice(0) X, y = generate_array() X = dd.from_dask_array(X) y = dd.from_dask_array(y) X = X.map_partitions(cudf.from_pandas) y = y.map_partitions(cudf.from_pandas) dtrain = dxgb.DaskDMatrix(client, X, y) out = dxgb.train(client, {'tree_method': 'gpu_hist'}, dtrain=dtrain, evals=[(dtrain, 'X')], num_boost_round=4) assert isinstance(out['booster'], dxgb.Booster) assert len(out['history']['X']['rmse']) == 4 predictions = dxgb.predict(client, out, dtrain).compute() assert isinstance(predictions, np.ndarray) series_predictions = dxgb.inplace_predict(client, out, X) assert isinstance(series_predictions, dd.Series) series_predictions = series_predictions.compute() single_node = out['booster'].predict( xgboost.DMatrix(X.compute())) cp.testing.assert_allclose(single_node, predictions) np.testing.assert_allclose(single_node, series_predictions.to_array()) predt = dxgb.predict(client, out, X) assert isinstance(predt, dd.Series) def is_df(part): assert isinstance(part, cudf.DataFrame), part return part predt.map_partitions(is_df, meta=dd.utils.make_meta( {'prediction': 'f4'})) cp.testing.assert_allclose(predt.values.compute(), single_node) @pytest.mark.skipif(**tm.no_cupy()) @pytest.mark.mgpu def test_dask_array(self): with LocalCUDACluster() as cluster: with Client(cluster) as client: import cupy as cp cp.cuda.runtime.setDevice(0) X, y = generate_array() X = X.map_blocks(cp.asarray) y = y.map_blocks(cp.asarray) dtrain = dxgb.DaskDMatrix(client, X, y) out = dxgb.train(client, {'tree_method': 'gpu_hist'}, dtrain=dtrain, evals=[(dtrain, 'X')], num_boost_round=2) from_dmatrix = dxgb.predict(client, out, dtrain).compute() inplace_predictions = dxgb.inplace_predict(client, out, X).compute() single_node = out['booster'].predict( xgboost.DMatrix(X.compute())) np.testing.assert_allclose(single_node, from_dmatrix) device = cp.cuda.runtime.getDevice() assert device == inplace_predictions.device.id single_node = cp.array(single_node) assert device == single_node.device.id cp.testing.assert_allclose(single_node, inplace_predictions) @pytest.mark.skipif(**tm.no_dask()) @pytest.mark.skipif(**tm.no_dask_cuda()) @pytest.mark.mgpu def test_empty_dmatrix(self): with LocalCUDACluster() as cluster: with Client(cluster) as client: parameters = {'tree_method': 'gpu_hist'} run_empty_dmatrix(client, parameters)
class TestDistributedGPU(unittest.TestCase): @pytest.mark.skipif(**tm.no_dask()) @pytest.mark.skipif(**tm.no_cudf()) @pytest.mark.skipif(**tm.no_dask_cudf()) @pytest.mark.skipif(**tm.no_dask_cuda()) @pytest.mark.mgpu def test_dask_dataframe(self): with LocalCUDACluster() as cluster: with Client(cluster) as client: import cupy as cp cp.cuda.runtime.setDevice(0) X, y = generate_array() X = dd.from_dask_array(X) y = dd.from_dask_array(y) X = X.map_partitions(cudf.from_pandas) y = y.map_partitions(cudf.from_pandas) dtrain = dxgb.DaskDMatrix(client, X, y) out = dxgb.train(client, { 'tree_method': 'gpu_hist', 'debug_synchronize': True }, dtrain=dtrain, evals=[(dtrain, 'X')], num_boost_round=4) assert isinstance(out['booster'], dxgb.Booster) assert len(out['history']['X']['rmse']) == 4 predictions = dxgb.predict(client, out, dtrain).compute() assert isinstance(predictions, np.ndarray) series_predictions = dxgb.inplace_predict(client, out, X) assert isinstance(series_predictions, dd.Series) series_predictions = series_predictions.compute() single_node = out['booster'].predict( xgboost.DMatrix(X.compute())) cp.testing.assert_allclose(single_node, predictions) np.testing.assert_allclose(single_node, series_predictions.to_array()) predt = dxgb.predict(client, out, X) assert isinstance(predt, dd.Series) def is_df(part): assert isinstance(part, cudf.DataFrame), part return part predt.map_partitions(is_df, meta=dd.utils.make_meta( {'prediction': 'f4'})) cp.testing.assert_allclose(predt.values.compute(), single_node) @given(parameter_strategy, strategies.integers(1, 20), tm.dataset_strategy) @settings(deadline=None) @pytest.mark.mgpu def test_gpu_hist(self, params, num_rounds, dataset): with LocalCUDACluster(n_workers=2) as cluster: with Client(cluster) as client: params['tree_method'] = 'gpu_hist' params = dataset.set_params(params) # multi class doesn't handle empty dataset well (empty # means at least 1 worker has data). if params['objective'] == "multi:softmax": return # It doesn't make sense to distribute a completely # empty dataset. if dataset.X.shape[0] == 0: return chunk = 128 X = da.from_array(dataset.X, chunks=(chunk, dataset.X.shape[1])) y = da.from_array(dataset.y, chunks=(chunk, )) if dataset.w is not None: w = da.from_array(dataset.w, chunks=(chunk, )) else: w = None m = dxgb.DaskDMatrix(client, data=X, label=y, weight=w) history = dxgb.train(client, params=params, dtrain=m, num_boost_round=num_rounds, evals=[(m, 'train')])['history'] note(history) assert tm.non_increasing(history['train'][dataset.metric]) @pytest.mark.skipif(**tm.no_cupy()) @pytest.mark.mgpu def test_dask_array(self): with LocalCUDACluster() as cluster: with Client(cluster) as client: import cupy as cp cp.cuda.runtime.setDevice(0) X, y = generate_array() X = X.map_blocks(cp.asarray) y = y.map_blocks(cp.asarray) dtrain = dxgb.DaskDMatrix(client, X, y) out = dxgb.train(client, { 'tree_method': 'gpu_hist', 'debug_synchronize': True }, dtrain=dtrain, evals=[(dtrain, 'X')], num_boost_round=2) from_dmatrix = dxgb.predict(client, out, dtrain).compute() inplace_predictions = dxgb.inplace_predict(client, out, X).compute() single_node = out['booster'].predict( xgboost.DMatrix(X.compute())) np.testing.assert_allclose(single_node, from_dmatrix) device = cp.cuda.runtime.getDevice() assert device == inplace_predictions.device.id single_node = cp.array(single_node) assert device == single_node.device.id cp.testing.assert_allclose(single_node, inplace_predictions) @pytest.mark.skipif(**tm.no_dask()) @pytest.mark.skipif(**tm.no_dask_cuda()) @pytest.mark.mgpu def test_empty_dmatrix(self): with LocalCUDACluster() as cluster: with Client(cluster) as client: parameters = { 'tree_method': 'gpu_hist', 'debug_synchronize': True } run_empty_dmatrix(client, parameters) def run_quantile(self, name): if sys.platform.startswith("win"): pytest.skip("Skipping dask tests on Windows") exe = None for possible_path in { './testxgboost', './build/testxgboost', '../build/testxgboost', '../gpu-build/testxgboost' }: if os.path.exists(possible_path): exe = possible_path assert exe, 'No testxgboost executable found.' test = "--gtest_filter=GPUQuantile." + name def runit(worker_addr, rabit_args): port = None # setup environment for running the c++ part. for arg in rabit_args: if arg.decode('utf-8').startswith('DMLC_TRACKER_PORT'): port = arg.decode('utf-8') port = port.split('=') env = os.environ.copy() env[port[0]] = port[1] return subprocess.run([exe, test], env=env, stdout=subprocess.PIPE) with LocalCUDACluster() as cluster: with Client(cluster) as client: workers = list(dxgb._get_client_workers(client).keys()) rabit_args = dxgb._get_rabit_args(workers, client) futures = client.map(runit, workers, pure=False, workers=workers, rabit_args=rabit_args) results = client.gather(futures) for ret in results: msg = ret.stdout.decode('utf-8') assert msg.find('1 test from GPUQuantile') != -1, msg assert ret.returncode == 0, msg @pytest.mark.skipif(**tm.no_dask()) @pytest.mark.mgpu @pytest.mark.gtest def test_quantile_basic(self): self.run_quantile('AllReduceBasic') @pytest.mark.skipif(**tm.no_dask()) @pytest.mark.mgpu @pytest.mark.gtest def test_quantile_same_on_all_workers(self): self.run_quantile('SameOnAllWorkers')
xgb.DeviceQuantileDMatrix(X, y) Xy = xgb.DeviceQuantileDMatrix(X, y, enable_categorical=True) assert Xy.num_row() == 3 assert Xy.num_col() == 1 X = X["f0"] with pytest.raises(ValueError): xgb.DMatrix(X, y) Xy = xgb.DMatrix(X, y, enable_categorical=True) assert Xy.num_row() == 3 assert Xy.num_col() == 1 @pytest.mark.skipif(**tm.no_cudf()) @pytest.mark.skipif(**tm.no_cupy()) @pytest.mark.skipif(**tm.no_sklearn()) @pytest.mark.skipif(**tm.no_pandas()) def test_cudf_training_with_sklearn(): from cudf import DataFrame as df from cudf import Series as ss import pandas as pd np.random.seed(1) X = pd.DataFrame(np.random.randn(50, 10)) y = pd.DataFrame((np.random.randn(50) > 0).astype(np.int8)) weights = np.random.random(50) + 1.0 cudf_weights = df.from_pandas(pd.DataFrame(weights)) base_margin = np.random.random(50) cudf_base_margin = df.from_pandas(pd.DataFrame(base_margin))
class TestFromColumnar: '''Tests for constructing DMatrix from data structure conforming Apache Arrow specification.''' @pytest.mark.skipif(**tm.no_cudf()) def test_simple_dmatrix_from_cudf(self): _test_from_cudf(xgb.DMatrix) @pytest.mark.skipif(**tm.no_cudf()) def test_device_dmatrix_from_cudf(self): _test_from_cudf(xgb.DeviceQuantileDMatrix) @pytest.mark.skipif(**tm.no_cudf()) def test_cudf_training_simple_dmatrix(self): _test_cudf_training(xgb.DMatrix) @pytest.mark.skipif(**tm.no_cudf()) def test_cudf_training_device_dmatrix(self): _test_cudf_training(xgb.DeviceQuantileDMatrix) @pytest.mark.skipif(**tm.no_cudf()) def test_cudf_metainfo_simple_dmatrix(self): _test_cudf_metainfo(xgb.DMatrix) @pytest.mark.skipif(**tm.no_cudf()) def test_cudf_metainfo_device_dmatrix(self): _test_cudf_metainfo(xgb.DeviceQuantileDMatrix) @pytest.mark.skipif(**tm.no_cudf()) def test_cudf_categorical(self): import cudf _X, _y = tm.make_categorical(100, 30, 17, False) X = cudf.from_pandas(_X) y = cudf.from_pandas(_y) Xy = xgb.DMatrix(X, y, enable_categorical=True) assert len(Xy.feature_types) == X.shape[1] assert all(t == "c" for t in Xy.feature_types) Xy = xgb.DeviceQuantileDMatrix(X, y, enable_categorical=True) assert len(Xy.feature_types) == X.shape[1] assert all(t == "c" for t in Xy.feature_types) # test missing value X = cudf.DataFrame({"f0": ["a", "b", np.NaN]}) X["f0"] = X["f0"].astype("category") df, cat_codes, _, _ = xgb.data._transform_cudf_df( X, None, None, enable_categorical=True) for col in cat_codes: assert col.has_nulls y = [0, 1, 2] with pytest.raises(ValueError): xgb.DMatrix(X, y) Xy = xgb.DMatrix(X, y, enable_categorical=True) assert Xy.num_row() == 3 assert Xy.num_col() == 1 with pytest.raises(ValueError): xgb.DeviceQuantileDMatrix(X, y) Xy = xgb.DeviceQuantileDMatrix(X, y, enable_categorical=True) assert Xy.num_row() == 3 assert Xy.num_col() == 1 X = X["f0"] with pytest.raises(ValueError): xgb.DMatrix(X, y) Xy = xgb.DMatrix(X, y, enable_categorical=True) assert Xy.num_row() == 3 assert Xy.num_col() == 1
class TestFromColumnar: '''Tests for constructing DMatrix from data structure conforming Apache Arrow specification.''' @pytest.mark.skipif(**tm.no_cudf()) def test_from_cudf(self): '''Test constructing DMatrix from cudf''' import cudf dmatrix_from_cudf(np.float32, np.NAN) dmatrix_from_cudf(np.float64, np.NAN) dmatrix_from_cudf(np.int8, 2) dmatrix_from_cudf(np.int32, -2) dmatrix_from_cudf(np.int64, -3) cd = cudf.DataFrame({'x': [1, 2, 3], 'y': [0.1, 0.2, 0.3]}) dtrain = xgb.DMatrix(cd) assert dtrain.feature_names == ['x', 'y'] assert dtrain.feature_types == ['int', 'float'] series = cudf.DataFrame({'x': [1, 2, 3]}).iloc[:, 0] assert isinstance(series, cudf.Series) dtrain = xgb.DMatrix(series) assert dtrain.feature_names == ['x'] assert dtrain.feature_types == ['int'] with pytest.raises(Exception): dtrain = xgb.DMatrix(cd, label=cd) # Test when number of elements is less than 8 X = cudf.DataFrame( {'x': cudf.Series([0, 1, 2, np.NAN, 4], dtype=np.int32)}) dtrain = xgb.DMatrix(X) assert dtrain.num_col() == 1 assert dtrain.num_row() == 5 # Boolean is not supported. X_boolean = cudf.DataFrame({'x': cudf.Series([True, False])}) with pytest.raises(Exception): dtrain = xgb.DMatrix(X_boolean) y_boolean = cudf.DataFrame( {'x': cudf.Series([True, False, True, True, True])}) with pytest.raises(Exception): dtrain = xgb.DMatrix(X_boolean, label=y_boolean) @pytest.mark.skipif(**tm.no_cudf()) def test_cudf_training(self): from cudf import DataFrame as df import pandas as pd np.random.seed(1) X = pd.DataFrame(np.random.randn(50, 10)) y = pd.DataFrame(np.random.randn(50)) weights = np.random.random(50) + 1.0 cudf_weights = df.from_pandas(pd.DataFrame(weights)) base_margin = np.random.random(50) cudf_base_margin = df.from_pandas(pd.DataFrame(base_margin)) evals_result_cudf = {} dtrain_cudf = xgb.DMatrix(df.from_pandas(X), df.from_pandas(y), weight=cudf_weights, base_margin=cudf_base_margin) params = {'gpu_id': 0, 'nthread': 1} xgb.train(params, dtrain_cudf, evals=[(dtrain_cudf, "train")], evals_result=evals_result_cudf) evals_result_np = {} dtrain_np = xgb.DMatrix(X, y, weight=weights, base_margin=base_margin) xgb.train(params, dtrain_np, evals=[(dtrain_np, "train")], evals_result=evals_result_np) assert np.array_equal(evals_result_cudf["train"]["rmse"], evals_result_np["train"]["rmse"]) @pytest.mark.skipif(**tm.no_cudf()) def test_cudf_metainfo(self): from cudf import DataFrame as df import pandas as pd n = 100 X = np.random.random((n, 2)) dmat_cudf = xgb.DMatrix(X) dmat = xgb.DMatrix(X) floats = np.random.random(n) uints = np.array([4, 2, 8]).astype("uint32") cudf_floats = df.from_pandas(pd.DataFrame(floats)) cudf_uints = df.from_pandas(pd.DataFrame(uints)) dmat.set_float_info('weight', floats) dmat.set_float_info('label', floats) dmat.set_float_info('base_margin', floats) dmat.set_uint_info('group', uints) dmat_cudf.set_interface_info('weight', cudf_floats) dmat_cudf.set_interface_info('label', cudf_floats) dmat_cudf.set_interface_info('base_margin', cudf_floats) dmat_cudf.set_interface_info('group', cudf_uints) # Test setting info with cudf DataFrame assert np.array_equal(dmat.get_float_info('weight'), dmat_cudf.get_float_info('weight')) assert np.array_equal(dmat.get_float_info('label'), dmat_cudf.get_float_info('label')) assert np.array_equal(dmat.get_float_info('base_margin'), dmat_cudf.get_float_info('base_margin')) assert np.array_equal(dmat.get_uint_info('group_ptr'), dmat_cudf.get_uint_info('group_ptr')) # Test setting info with cudf Series dmat_cudf.set_interface_info('weight', cudf_floats[cudf_floats.columns[0]]) dmat_cudf.set_interface_info('label', cudf_floats[cudf_floats.columns[0]]) dmat_cudf.set_interface_info('base_margin', cudf_floats[cudf_floats.columns[0]]) dmat_cudf.set_interface_info('group', cudf_uints[cudf_uints.columns[0]]) assert np.array_equal(dmat.get_float_info('weight'), dmat_cudf.get_float_info('weight')) assert np.array_equal(dmat.get_float_info('label'), dmat_cudf.get_float_info('label')) assert np.array_equal(dmat.get_float_info('base_margin'), dmat_cudf.get_float_info('base_margin')) assert np.array_equal(dmat.get_uint_info('group_ptr'), dmat_cudf.get_uint_info('group_ptr'))
class TestDistributedGPU(unittest.TestCase): @pytest.mark.skipif(**tm.no_dask()) @pytest.mark.skipif(**tm.no_cudf()) @pytest.mark.skipif(**tm.no_dask_cudf()) @pytest.mark.skipif(**tm.no_dask_cuda()) @pytest.mark.mgpu def test_dask_dataframe(self): with LocalCUDACluster() as cluster: with Client(cluster) as client: run_with_dask_dataframe(dxgb.DaskDMatrix, client) run_with_dask_dataframe(dxgb.DaskDeviceQuantileDMatrix, client) @given(parameter_strategy, strategies.integers(1, 20), tm.dataset_strategy) @settings(deadline=duration(seconds=120)) @pytest.mark.mgpu def test_gpu_hist(self, params, num_rounds, dataset): with LocalCUDACluster(n_workers=2) as cluster: with Client(cluster) as client: run_gpu_hist(params, num_rounds, dataset, dxgb.DaskDMatrix, client) run_gpu_hist(params, num_rounds, dataset, dxgb.DaskDeviceQuantileDMatrix, client) @pytest.mark.skipif(**tm.no_cupy()) @pytest.mark.mgpu def test_dask_array(self): with LocalCUDACluster() as cluster: with Client(cluster) as client: run_with_dask_array(dxgb.DaskDMatrix, client) run_with_dask_array(dxgb.DaskDeviceQuantileDMatrix, client) @pytest.mark.skipif(**tm.no_dask()) @pytest.mark.skipif(**tm.no_dask_cuda()) @pytest.mark.mgpu def test_empty_dmatrix(self): with LocalCUDACluster() as cluster: with Client(cluster) as client: parameters = {'tree_method': 'gpu_hist', 'debug_synchronize': True} run_empty_dmatrix_reg(client, parameters) run_empty_dmatrix_cls(client, parameters) def run_quantile(self, name): if sys.platform.startswith("win"): pytest.skip("Skipping dask tests on Windows") exe = None for possible_path in {'./testxgboost', './build/testxgboost', '../build/testxgboost', '../gpu-build/testxgboost'}: if os.path.exists(possible_path): exe = possible_path assert exe, 'No testxgboost executable found.' test = "--gtest_filter=GPUQuantile." + name def runit(worker_addr, rabit_args): port = None # setup environment for running the c++ part. for arg in rabit_args: if arg.decode('utf-8').startswith('DMLC_TRACKER_PORT'): port = arg.decode('utf-8') port = port.split('=') env = os.environ.copy() env[port[0]] = port[1] return subprocess.run([exe, test], env=env, stdout=subprocess.PIPE) with LocalCUDACluster() as cluster: with Client(cluster) as client: workers = list(dxgb._get_client_workers(client).keys()) rabit_args = client.sync(dxgb._get_rabit_args, workers, client) futures = client.map(runit, workers, pure=False, workers=workers, rabit_args=rabit_args) results = client.gather(futures) for ret in results: msg = ret.stdout.decode('utf-8') assert msg.find('1 test from GPUQuantile') != -1, msg assert ret.returncode == 0, msg @pytest.mark.skipif(**tm.no_dask()) @pytest.mark.mgpu @pytest.mark.gtest def test_quantile_basic(self): self.run_quantile('AllReduceBasic') @pytest.mark.skipif(**tm.no_dask()) @pytest.mark.mgpu @pytest.mark.gtest def test_quantile_same_on_all_workers(self): self.run_quantile('SameOnAllWorkers')
class TestDistributedGPU: @pytest.mark.skipif(**tm.no_dask()) @pytest.mark.skipif(**tm.no_cudf()) @pytest.mark.skipif(**tm.no_dask_cudf()) @pytest.mark.skipif(**tm.no_dask_cuda()) @pytest.mark.mgpu def test_dask_dataframe(self, local_cuda_cluster: LocalCUDACluster) -> None: with Client(local_cuda_cluster) as client: run_with_dask_dataframe(dxgb.DaskDMatrix, client) run_with_dask_dataframe(dxgb.DaskDeviceQuantileDMatrix, client) @given(params=parameter_strategy, num_rounds=strategies.integers(1, 20), dataset=tm.dataset_strategy) @settings(deadline=duration(seconds=120)) @pytest.mark.skipif(**tm.no_dask()) @pytest.mark.skipif(**tm.no_dask_cuda()) @pytest.mark.parametrize('local_cuda_cluster', [{ 'n_workers': 2 }], indirect=['local_cuda_cluster']) @pytest.mark.mgpu def test_gpu_hist(self, params: Dict, num_rounds: int, dataset: tm.TestDataset, local_cuda_cluster: LocalCUDACluster) -> None: with Client(local_cuda_cluster) as client: run_gpu_hist(params, num_rounds, dataset, dxgb.DaskDMatrix, client) run_gpu_hist(params, num_rounds, dataset, dxgb.DaskDeviceQuantileDMatrix, client) @pytest.mark.skipif(**tm.no_cupy()) @pytest.mark.skipif(**tm.no_dask()) @pytest.mark.skipif(**tm.no_dask_cuda()) @pytest.mark.mgpu def test_dask_array(self, local_cuda_cluster: LocalCUDACluster) -> None: with Client(local_cuda_cluster) as client: run_with_dask_array(dxgb.DaskDMatrix, client) run_with_dask_array(dxgb.DaskDeviceQuantileDMatrix, client) @pytest.mark.skipif(**tm.no_cupy()) @pytest.mark.skipif(**tm.no_dask()) @pytest.mark.skipif(**tm.no_dask_cuda()) def test_early_stopping(self, local_cuda_cluster: LocalCUDACluster) -> None: from sklearn.datasets import load_breast_cancer with Client(local_cuda_cluster) as client: X, y = load_breast_cancer(return_X_y=True) X, y = da.from_array(X), da.from_array(y) m = dxgb.DaskDMatrix(client, X, y) valid = dxgb.DaskDMatrix(client, X, y) early_stopping_rounds = 5 booster = dxgb.train( client, { 'objective': 'binary:logistic', 'eval_metric': 'error', 'tree_method': 'gpu_hist' }, m, evals=[(valid, 'Valid')], num_boost_round=1000, early_stopping_rounds=early_stopping_rounds)['booster'] assert hasattr(booster, 'best_score') dump = booster.get_dump(dump_format='json') assert len( dump) - booster.best_iteration == early_stopping_rounds + 1 valid_X = X valid_y = y cls = dxgb.DaskXGBClassifier(objective='binary:logistic', tree_method='gpu_hist', n_estimators=100) cls.client = client cls.fit(X, y, early_stopping_rounds=early_stopping_rounds, eval_set=[(valid_X, valid_y)]) booster = cls.get_booster() dump = booster.get_dump(dump_format='json') assert len( dump) - booster.best_iteration == early_stopping_rounds + 1 @pytest.mark.skipif(**tm.no_dask()) @pytest.mark.skipif(**tm.no_dask_cuda()) @pytest.mark.mgpu def test_empty_dmatrix(self, local_cuda_cluster: LocalCUDACluster) -> None: with Client(local_cuda_cluster) as client: parameters = {'tree_method': 'gpu_hist', 'debug_synchronize': True} run_empty_dmatrix_reg(client, parameters) run_empty_dmatrix_cls(client, parameters) def run_quantile(self, name: str, local_cuda_cluster: LocalCUDACluster) -> None: if sys.platform.startswith("win"): pytest.skip("Skipping dask tests on Windows") exe = None for possible_path in { './testxgboost', './build/testxgboost', '../build/testxgboost', '../gpu-build/testxgboost' }: if os.path.exists(possible_path): exe = possible_path assert exe, 'No testxgboost executable found.' test = "--gtest_filter=GPUQuantile." + name def runit(worker_addr: str, rabit_args: List[bytes]) -> subprocess.CompletedProcess: port_env = '' # setup environment for running the c++ part. for arg in rabit_args: if arg.decode('utf-8').startswith('DMLC_TRACKER_PORT'): port_env = arg.decode('utf-8') port = port_env.split('=') env = os.environ.copy() env[port[0]] = port[1] return subprocess.run([str(exe), test], env=env, stdout=subprocess.PIPE) with Client(local_cuda_cluster) as client: workers = list(_get_client_workers(client).keys()) rabit_args = client.sync(dxgb._get_rabit_args, workers, client) futures = client.map(runit, workers, pure=False, workers=workers, rabit_args=rabit_args) results = client.gather(futures) for ret in results: msg = ret.stdout.decode('utf-8') assert msg.find('1 test from GPUQuantile') != -1, msg assert ret.returncode == 0, msg @pytest.mark.skipif(**tm.no_dask()) @pytest.mark.skipif(**tm.no_dask_cuda()) @pytest.mark.mgpu @pytest.mark.gtest def test_quantile_basic(self, local_cuda_cluster: LocalCUDACluster) -> None: self.run_quantile('AllReduceBasic', local_cuda_cluster) @pytest.mark.skipif(**tm.no_dask()) @pytest.mark.skipif(**tm.no_dask_cuda()) @pytest.mark.mgpu @pytest.mark.gtest def test_quantile_same_on_all_workers( self, local_cuda_cluster: LocalCUDACluster) -> None: self.run_quantile('SameOnAllWorkers', local_cuda_cluster)