class TestDistributedGPU(unittest.TestCase): @pytest.mark.skipif(**tm.no_dask()) @pytest.mark.skipif(**tm.no_cudf()) @pytest.mark.skipif(**tm.no_dask_cudf()) @pytest.mark.skipif(**tm.no_dask_cuda()) def test_dask_dataframe(self): with LocalCUDACluster() as cluster: with Client(cluster) as client: X, y = generate_array() X = dd.from_dask_array(X) y = dd.from_dask_array(y) X = X.map_partitions(cudf.from_pandas) y = y.map_partitions(cudf.from_pandas) dtrain = dxgb.DaskDMatrix(client, X, y) out = dxgb.train(client, {'tree_method': 'gpu_hist'}, dtrain=dtrain, evals=[(dtrain, 'X')], num_boost_round=2) assert isinstance(out['booster'], dxgb.Booster) assert len(out['history']['X']['rmse']) == 2 predictions = dxgb.predict(client, out, dtrain).compute() assert isinstance(predictions, np.ndarray) @pytest.mark.skipif(**tm.no_dask()) @pytest.mark.skipif(**tm.no_dask_cuda()) @pytest.mark.mgpu def test_empty_dmatrix(self): with LocalCUDACluster() as cluster: with Client(cluster) as client: parameters = {'tree_method': 'gpu_hist'} run_empty_dmatrix(client, parameters)
class TestWithDask: def run_updater_test(self, client, params, num_rounds, dataset, tree_method): params['tree_method'] = tree_method params = dataset.set_params(params) # It doesn't make sense to distribute a completely # empty dataset. if dataset.X.shape[0] == 0: return chunk = 128 X = da.from_array(dataset.X, chunks=(chunk, dataset.X.shape[1])) y = da.from_array(dataset.y, chunks=(chunk, )) if dataset.w is not None: w = da.from_array(dataset.w, chunks=(chunk, )) else: w = None m = xgb.dask.DaskDMatrix(client, data=X, label=y, weight=w) history = xgb.dask.train(client, params=params, dtrain=m, num_boost_round=num_rounds, evals=[(m, 'train')])['history'] note(history) history = history['train'][dataset.metric] assert tm.non_increasing(history) # Make sure that it's decreasing assert history[-1] < history[0] @given(params=hist_parameter_strategy, dataset=tm.dataset_strategy) @settings(deadline=None) def test_hist(self, params, dataset, client): num_rounds = 30 self.run_updater_test(client, params, num_rounds, dataset, 'hist') @given(params=exact_parameter_strategy, dataset=tm.dataset_strategy) @settings(deadline=None) def test_approx(self, client, params, dataset): num_rounds = 30 self.run_updater_test(client, params, num_rounds, dataset, 'approx') def run_quantile(self, name): if sys.platform.startswith("win"): pytest.skip("Skipping dask tests on Windows") exe = None for possible_path in { './testxgboost', './build/testxgboost', '../build/testxgboost', '../cpu-build/testxgboost' }: if os.path.exists(possible_path): exe = possible_path if exe is None: return test = "--gtest_filter=Quantile." + name def runit(worker_addr, rabit_args): port = None # setup environment for running the c++ part. for arg in rabit_args: if arg.decode('utf-8').startswith('DMLC_TRACKER_PORT'): port = arg.decode('utf-8') port = port.split('=') env = os.environ.copy() env[port[0]] = port[1] return subprocess.run([exe, test], env=env, capture_output=True) with LocalCluster(n_workers=4) as cluster: with Client(cluster) as client: workers = list(xgb.dask._get_client_workers(client).keys()) rabit_args = client.sync(xgb.dask._get_rabit_args, workers, client) futures = client.map(runit, workers, pure=False, workers=workers, rabit_args=rabit_args) results = client.gather(futures) for ret in results: msg = ret.stdout.decode('utf-8') assert msg.find('1 test from Quantile') != -1, msg assert ret.returncode == 0, msg @pytest.mark.skipif(**tm.no_dask()) @pytest.mark.gtest def test_quantile_basic(self): self.run_quantile('DistributedBasic') @pytest.mark.skipif(**tm.no_dask()) @pytest.mark.gtest def test_quantile(self): self.run_quantile('Distributed') @pytest.mark.skipif(**tm.no_dask()) @pytest.mark.gtest def test_quantile_same_on_all_workers(self): self.run_quantile('SameOnAllWorkers')
import testing as tm import pytest import xgboost as xgb import sys import numpy as np import json import asyncio from sklearn.datasets import make_classification import os import subprocess from hypothesis import given, settings, note from test_updaters import hist_parameter_strategy, exact_parameter_strategy if sys.platform.startswith("win"): pytest.skip("Skipping dask tests on Windows", allow_module_level=True) if tm.no_dask()['condition']: pytest.skip(msg=tm.no_dask()['reason'], allow_module_level=True) try: from distributed import LocalCluster, Client, get_client from distributed.utils_test import client, loop, cluster_fixture import dask.dataframe as dd import dask.array as da from xgboost.dask import DaskDMatrix import dask except ImportError: LocalCluster = None Client = None get_client = None client = None loop = None
class TestDistributedGPU(unittest.TestCase): @pytest.mark.skipif(**tm.no_dask()) @pytest.mark.skipif(**tm.no_cudf()) @pytest.mark.skipif(**tm.no_dask_cudf()) @pytest.mark.skipif(**tm.no_dask_cuda()) @pytest.mark.mgpu def test_dask_dataframe(self): with LocalCUDACluster() as cluster: with Client(cluster) as client: run_with_dask_dataframe(dxgb.DaskDMatrix, client) run_with_dask_dataframe(dxgb.DaskDeviceQuantileDMatrix, client) @given(parameter_strategy, strategies.integers(1, 20), tm.dataset_strategy) @settings(deadline=duration(seconds=120)) @pytest.mark.mgpu def test_gpu_hist(self, params, num_rounds, dataset): with LocalCUDACluster(n_workers=2) as cluster: with Client(cluster) as client: run_gpu_hist(params, num_rounds, dataset, dxgb.DaskDMatrix, client) run_gpu_hist(params, num_rounds, dataset, dxgb.DaskDeviceQuantileDMatrix, client) @pytest.mark.skipif(**tm.no_cupy()) @pytest.mark.mgpu def test_dask_array(self): with LocalCUDACluster() as cluster: with Client(cluster) as client: run_with_dask_array(dxgb.DaskDMatrix, client) run_with_dask_array(dxgb.DaskDeviceQuantileDMatrix, client) @pytest.mark.skipif(**tm.no_dask()) @pytest.mark.skipif(**tm.no_dask_cuda()) @pytest.mark.mgpu def test_empty_dmatrix(self): with LocalCUDACluster() as cluster: with Client(cluster) as client: parameters = {'tree_method': 'gpu_hist', 'debug_synchronize': True} run_empty_dmatrix_reg(client, parameters) run_empty_dmatrix_cls(client, parameters) def run_quantile(self, name): if sys.platform.startswith("win"): pytest.skip("Skipping dask tests on Windows") exe = None for possible_path in {'./testxgboost', './build/testxgboost', '../build/testxgboost', '../gpu-build/testxgboost'}: if os.path.exists(possible_path): exe = possible_path assert exe, 'No testxgboost executable found.' test = "--gtest_filter=GPUQuantile." + name def runit(worker_addr, rabit_args): port = None # setup environment for running the c++ part. for arg in rabit_args: if arg.decode('utf-8').startswith('DMLC_TRACKER_PORT'): port = arg.decode('utf-8') port = port.split('=') env = os.environ.copy() env[port[0]] = port[1] return subprocess.run([exe, test], env=env, stdout=subprocess.PIPE) with LocalCUDACluster() as cluster: with Client(cluster) as client: workers = list(dxgb._get_client_workers(client).keys()) rabit_args = client.sync(dxgb._get_rabit_args, workers, client) futures = client.map(runit, workers, pure=False, workers=workers, rabit_args=rabit_args) results = client.gather(futures) for ret in results: msg = ret.stdout.decode('utf-8') assert msg.find('1 test from GPUQuantile') != -1, msg assert ret.returncode == 0, msg @pytest.mark.skipif(**tm.no_dask()) @pytest.mark.mgpu @pytest.mark.gtest def test_quantile_basic(self): self.run_quantile('AllReduceBasic') @pytest.mark.skipif(**tm.no_dask()) @pytest.mark.mgpu @pytest.mark.gtest def test_quantile_same_on_all_workers(self): self.run_quantile('SameOnAllWorkers')
class TestDistributedGPU: @pytest.mark.skipif(**tm.no_dask()) @pytest.mark.skipif(**tm.no_cudf()) @pytest.mark.skipif(**tm.no_dask_cudf()) @pytest.mark.skipif(**tm.no_dask_cuda()) @pytest.mark.mgpu def test_dask_dataframe(self, local_cuda_cluster: LocalCUDACluster) -> None: with Client(local_cuda_cluster) as client: run_with_dask_dataframe(dxgb.DaskDMatrix, client) run_with_dask_dataframe(dxgb.DaskDeviceQuantileDMatrix, client) @given( params=parameter_strategy, num_rounds=strategies.integers(1, 20), dataset=tm.dataset_strategy, ) @settings(deadline=duration(seconds=120), suppress_health_check=suppress) @pytest.mark.skipif(**tm.no_dask()) @pytest.mark.skipif(**tm.no_dask_cuda()) @pytest.mark.skipif(**tm.no_cupy()) @pytest.mark.parametrize( "local_cuda_cluster", [{"n_workers": 2}], indirect=["local_cuda_cluster"] ) @pytest.mark.mgpu def test_gpu_hist( self, params: Dict, num_rounds: int, dataset: tm.TestDataset, local_cuda_cluster: LocalCUDACluster, ) -> None: with Client(local_cuda_cluster) as client: run_gpu_hist(params, num_rounds, dataset, dxgb.DaskDMatrix, client) run_gpu_hist( params, num_rounds, dataset, dxgb.DaskDeviceQuantileDMatrix, client ) @pytest.mark.skipif(**tm.no_cupy()) @pytest.mark.skipif(**tm.no_dask()) @pytest.mark.skipif(**tm.no_dask_cuda()) @pytest.mark.mgpu def test_dask_array(self, local_cuda_cluster: LocalCUDACluster) -> None: with Client(local_cuda_cluster) as client: run_with_dask_array(dxgb.DaskDMatrix, client) run_with_dask_array(dxgb.DaskDeviceQuantileDMatrix, client) @pytest.mark.skipif(**tm.no_cupy()) @pytest.mark.skipif(**tm.no_dask()) @pytest.mark.skipif(**tm.no_dask_cuda()) def test_early_stopping(self, local_cuda_cluster: LocalCUDACluster) -> None: from sklearn.datasets import load_breast_cancer with Client(local_cuda_cluster) as client: X, y = load_breast_cancer(return_X_y=True) X, y = da.from_array(X), da.from_array(y) m = dxgb.DaskDMatrix(client, X, y) valid = dxgb.DaskDMatrix(client, X, y) early_stopping_rounds = 5 booster = dxgb.train(client, {'objective': 'binary:logistic', 'eval_metric': 'error', 'tree_method': 'gpu_hist'}, m, evals=[(valid, 'Valid')], num_boost_round=1000, early_stopping_rounds=early_stopping_rounds)[ 'booster'] assert hasattr(booster, 'best_score') dump = booster.get_dump(dump_format='json') assert len(dump) - booster.best_iteration == early_stopping_rounds + 1 valid_X = X valid_y = y cls = dxgb.DaskXGBClassifier(objective='binary:logistic', tree_method='gpu_hist', n_estimators=100) cls.client = client cls.fit(X, y, early_stopping_rounds=early_stopping_rounds, eval_set=[(valid_X, valid_y)]) booster = cls.get_booster() dump = booster.get_dump(dump_format='json') assert len(dump) - booster.best_iteration == early_stopping_rounds + 1 @pytest.mark.skipif(**tm.no_cudf()) @pytest.mark.skipif(**tm.no_dask()) @pytest.mark.skipif(**tm.no_dask_cuda()) @pytest.mark.parametrize("model", ["boosting"]) def test_dask_classifier( self, model: str, local_cuda_cluster: LocalCUDACluster ) -> None: import dask_cudf with Client(local_cuda_cluster) as client: X_, y_, w_ = generate_array(with_weights=True) y_ = (y_ * 10).astype(np.int32) X = dask_cudf.from_dask_dataframe(dd.from_dask_array(X_)) y = dask_cudf.from_dask_dataframe(dd.from_dask_array(y_)) w = dask_cudf.from_dask_dataframe(dd.from_dask_array(w_)) run_dask_classifier(X, y, w, model, client, 10) @pytest.mark.skipif(**tm.no_dask()) @pytest.mark.skipif(**tm.no_dask_cuda()) @pytest.mark.mgpu def test_empty_dmatrix(self, local_cuda_cluster: LocalCUDACluster) -> None: with Client(local_cuda_cluster) as client: parameters = {'tree_method': 'gpu_hist', 'debug_synchronize': True} run_empty_dmatrix_reg(client, parameters) run_empty_dmatrix_cls(client, parameters) def test_empty_dmatrix_auc(self, local_cuda_cluster: LocalCUDACluster) -> None: with Client(local_cuda_cluster) as client: n_workers = len(_get_client_workers(client)) run_empty_dmatrix_auc(client, "gpu_hist", n_workers) def test_auc(self, local_cuda_cluster: LocalCUDACluster) -> None: with Client(local_cuda_cluster) as client: run_auc(client, "gpu_hist") def test_data_initialization(self, local_cuda_cluster: LocalCUDACluster) -> None: with Client(local_cuda_cluster) as client: X, y, _ = generate_array() fw = da.random.random((random_cols, )) fw = fw - fw.min() m = dxgb.DaskDMatrix(client, X, y, feature_weights=fw) workers = _get_client_workers(client) rabit_args = client.sync(dxgb._get_rabit_args, len(workers), client) def worker_fn(worker_addr: str, data_ref: Dict) -> None: with dxgb.RabitContext(rabit_args): local_dtrain = dxgb._dmatrix_from_list_of_parts(**data_ref) fw_rows = local_dtrain.get_float_info("feature_weights").shape[0] assert fw_rows == local_dtrain.num_col() futures = [] for i in range(len(workers)): futures.append( client.submit( worker_fn, workers[i], m._create_fn_args(workers[i]), pure=False, workers=[workers[i]] ) ) client.gather(futures) def test_interface_consistency(self) -> None: sig = OrderedDict(signature(dxgb.DaskDMatrix).parameters) del sig["client"] ddm_names = list(sig.keys()) sig = OrderedDict(signature(dxgb.DaskDeviceQuantileDMatrix).parameters) del sig["client"] del sig["max_bin"] ddqdm_names = list(sig.keys()) assert len(ddm_names) == len(ddqdm_names) # between dask for i in range(len(ddm_names)): assert ddm_names[i] == ddqdm_names[i] sig = OrderedDict(signature(xgb.DMatrix).parameters) del sig["nthread"] # no nthread in dask dm_names = list(sig.keys()) sig = OrderedDict(signature(xgb.DeviceQuantileDMatrix).parameters) del sig["nthread"] del sig["max_bin"] dqdm_names = list(sig.keys()) # between single node assert len(dm_names) == len(dqdm_names) for i in range(len(dm_names)): assert dm_names[i] == dqdm_names[i] # ddm <-> dm for i in range(len(ddm_names)): assert ddm_names[i] == dm_names[i] # dqdm <-> ddqdm for i in range(len(ddqdm_names)): assert ddqdm_names[i] == dqdm_names[i] sig = OrderedDict(signature(xgb.XGBRanker.fit).parameters) ranker_names = list(sig.keys()) sig = OrderedDict(signature(xgb.dask.DaskXGBRanker.fit).parameters) dranker_names = list(sig.keys()) for rn, drn in zip(ranker_names, dranker_names): assert rn == drn def run_quantile(self, name: str, local_cuda_cluster: LocalCUDACluster) -> None: if sys.platform.startswith("win"): pytest.skip("Skipping dask tests on Windows") exe = None for possible_path in {'./testxgboost', './build/testxgboost', '../build/testxgboost', '../gpu-build/testxgboost'}: if os.path.exists(possible_path): exe = possible_path assert exe, 'No testxgboost executable found.' test = "--gtest_filter=GPUQuantile." + name def runit( worker_addr: str, rabit_args: List[bytes] ) -> subprocess.CompletedProcess: port_env = '' # setup environment for running the c++ part. for arg in rabit_args: if arg.decode('utf-8').startswith('DMLC_TRACKER_PORT'): port_env = arg.decode('utf-8') port = port_env.split('=') env = os.environ.copy() env[port[0]] = port[1] return subprocess.run([str(exe), test], env=env, stdout=subprocess.PIPE) with Client(local_cuda_cluster) as client: workers = _get_client_workers(client) rabit_args = client.sync(dxgb._get_rabit_args, workers, client) futures = client.map(runit, workers, pure=False, workers=workers, rabit_args=rabit_args) results = client.gather(futures) for ret in results: msg = ret.stdout.decode('utf-8') assert msg.find('1 test from GPUQuantile') != -1, msg assert ret.returncode == 0, msg @pytest.mark.skipif(**tm.no_dask()) @pytest.mark.skipif(**tm.no_dask_cuda()) @pytest.mark.mgpu @pytest.mark.gtest def test_quantile_basic(self, local_cuda_cluster: LocalCUDACluster) -> None: self.run_quantile('AllReduceBasic', local_cuda_cluster) @pytest.mark.skipif(**tm.no_dask()) @pytest.mark.skipif(**tm.no_dask_cuda()) @pytest.mark.mgpu @pytest.mark.gtest def test_quantile_same_on_all_workers( self, local_cuda_cluster: LocalCUDACluster ) -> None: self.run_quantile('SameOnAllWorkers', local_cuda_cluster)
m = await xgboost.dask.DaskDeviceQuantileDMatrix(client, X, y) output = await xgboost.dask.train(client, {'tree_method': 'gpu_hist'}, dtrain=m) with_m = await xgboost.dask.predict(client, output, m) with_X = await xgboost.dask.predict(client, output, X) inplace = await xgboost.dask.inplace_predict(client, output, X) assert isinstance(with_m, da.Array) assert isinstance(with_X, da.Array) assert isinstance(inplace, da.Array) cp.testing.assert_allclose(await client.compute(with_m), await client.compute(with_X)) cp.testing.assert_allclose(await client.compute(with_m), await client.compute(inplace)) client.shutdown() return output @pytest.mark.skipif(**tm.no_dask()) @pytest.mark.skipif(**tm.no_dask_cuda()) @pytest.mark.skipif(**tm.no_cupy()) @pytest.mark.mgpu def test_with_asyncio(local_cuda_cluster: LocalCUDACluster) -> None: with Client(local_cuda_cluster) as client: address = client.scheduler.address output = asyncio.run(run_from_dask_array_asyncio(address)) assert isinstance(output['booster'], xgboost.Booster) assert isinstance(output['history'], dict)
class TestDistributedGPU(unittest.TestCase): @pytest.mark.skipif(**tm.no_dask()) @pytest.mark.skipif(**tm.no_cudf()) @pytest.mark.skipif(**tm.no_dask_cudf()) @pytest.mark.skipif(**tm.no_dask_cuda()) def test_dask_dataframe(self): with LocalCUDACluster() as cluster: with Client(cluster) as client: X, y = generate_array() X = dd.from_dask_array(X) y = dd.from_dask_array(y) X = X.map_partitions(cudf.from_pandas) y = y.map_partitions(cudf.from_pandas) dtrain = dxgb.DaskDMatrix(client, X, y) out = dxgb.train(client, {'tree_method': 'gpu_hist'}, dtrain=dtrain, evals=[(dtrain, 'X')], num_boost_round=2) assert isinstance(out['booster'], dxgb.Booster) assert len(out['history']['X']['rmse']) == 2 predictions = dxgb.predict(client, out, dtrain).compute() assert isinstance(predictions, np.ndarray) @pytest.mark.skipif(**tm.no_dask()) @pytest.mark.skipif(**tm.no_dask_cuda()) @pytest.mark.mgpu def test_empty_dmatrix(self): def _check_outputs(out, predictions): assert isinstance(out['booster'], dxgb.Booster) assert len(out['history']['validation']['rmse']) == 2 assert isinstance(predictions, np.ndarray) assert predictions.shape[0] == 1 parameters = { 'tree_method': 'gpu_hist', 'verbosity': 3, 'debug_synchronize': True } with LocalCUDACluster() as cluster: with Client(cluster) as client: kRows, kCols = 1, 97 X = dd.from_array(np.random.randn(kRows, kCols)) y = dd.from_array(np.random.rand(kRows)) dtrain = dxgb.DaskDMatrix(client, X, y) out = dxgb.train(client, parameters, dtrain=dtrain, evals=[(dtrain, 'validation')], num_boost_round=2) predictions = dxgb.predict(client=client, model=out, data=dtrain).compute() _check_outputs(out, predictions) # train has more rows than evals valid = dtrain kRows += 1 X = dd.from_array(np.random.randn(kRows, kCols)) y = dd.from_array(np.random.rand(kRows)) dtrain = dxgb.DaskDMatrix(client, X, y) out = dxgb.train(client, parameters, dtrain=dtrain, evals=[(valid, 'validation')], num_boost_round=2) predictions = dxgb.predict(client=client, model=out, data=valid).compute() _check_outputs(out, predictions)
class TestWithDask: def test_global_config(self, client: "Client") -> None: X, y, _ = generate_array() xgb.config.set_config(verbosity=0) dtrain = DaskDMatrix(client, X, y) before_fname = './before_training-test_global_config' after_fname = './after_training-test_global_config' class TestCallback(xgb.callback.TrainingCallback): def write_file(self, fname: str) -> None: with open(fname, 'w') as fd: fd.write(str(xgb.config.get_config()['verbosity'])) def before_training(self, model: xgb.Booster) -> xgb.Booster: self.write_file(before_fname) assert xgb.config.get_config()['verbosity'] == 0 return model def after_training(self, model: xgb.Booster) -> xgb.Booster: assert xgb.config.get_config()['verbosity'] == 0 return model def before_iteration(self, model: xgb.Booster, epoch: int, evals_log: Dict) -> bool: assert xgb.config.get_config()['verbosity'] == 0 return False def after_iteration(self, model: xgb.Booster, epoch: int, evals_log: Dict) -> bool: self.write_file(after_fname) assert xgb.config.get_config()['verbosity'] == 0 return False xgb.dask.train(client, {}, dtrain, num_boost_round=4, callbacks=[TestCallback()])['booster'] with open(before_fname, 'r') as before, open(after_fname, 'r') as after: assert before.read() == '0' assert after.read() == '0' os.remove(before_fname) os.remove(after_fname) def run_updater_test(self, client: "Client", params: Dict, num_rounds: int, dataset: tm.TestDataset, tree_method: str) -> None: params['tree_method'] = tree_method params = dataset.set_params(params) # It doesn't make sense to distribute a completely # empty dataset. if dataset.X.shape[0] == 0: return chunk = 128 X = da.from_array(dataset.X, chunks=(chunk, dataset.X.shape[1])) y = da.from_array(dataset.y, chunks=(chunk, )) if dataset.w is not None: w = da.from_array(dataset.w, chunks=(chunk, )) else: w = None m = xgb.dask.DaskDMatrix(client, data=X, label=y, weight=w) history = xgb.dask.train(client, params=params, dtrain=m, num_boost_round=num_rounds, evals=[(m, 'train')])['history'] note(history) history = history['train'][dataset.metric] assert tm.non_increasing(history) # Make sure that it's decreasing assert history[-1] < history[0] @given(params=hist_parameter_strategy, dataset=tm.dataset_strategy) @settings(deadline=None, suppress_health_check=suppress) def test_hist(self, params: Dict, dataset: tm.TestDataset, client: "Client") -> None: num_rounds = 30 self.run_updater_test(client, params, num_rounds, dataset, 'hist') @given(params=exact_parameter_strategy, dataset=tm.dataset_strategy) @settings(deadline=None, suppress_health_check=suppress) def test_approx(self, client: "Client", params: Dict, dataset: tm.TestDataset) -> None: num_rounds = 30 self.run_updater_test(client, params, num_rounds, dataset, 'approx') def run_quantile(self, name: str) -> None: if sys.platform.startswith("win"): pytest.skip("Skipping dask tests on Windows") exe: Optional[str] = None for possible_path in { './testxgboost', './build/testxgboost', '../build/testxgboost', '../cpu-build/testxgboost' }: if os.path.exists(possible_path): exe = possible_path if exe is None: return test = "--gtest_filter=Quantile." + name def runit(worker_addr: str, rabit_args: List[bytes]) -> subprocess.CompletedProcess: port_env = '' # setup environment for running the c++ part. for arg in rabit_args: if arg.decode('utf-8').startswith('DMLC_TRACKER_PORT'): port_env = arg.decode('utf-8') port = port_env.split('=') env = os.environ.copy() env[port[0]] = port[1] return subprocess.run([str(exe), test], env=env, capture_output=True) with LocalCluster(n_workers=4) as cluster: with Client(cluster) as client: workers = list(_get_client_workers(client).keys()) rabit_args = client.sync(xgb.dask._get_rabit_args, len(workers), client) futures = client.map(runit, workers, pure=False, workers=workers, rabit_args=rabit_args) results = client.gather(futures) for ret in results: msg = ret.stdout.decode('utf-8') assert msg.find('1 test from Quantile') != -1, msg assert ret.returncode == 0, msg @pytest.mark.skipif(**tm.no_dask()) @pytest.mark.gtest def test_quantile_basic(self) -> None: self.run_quantile('DistributedBasic') @pytest.mark.skipif(**tm.no_dask()) @pytest.mark.gtest def test_quantile(self) -> None: self.run_quantile('Distributed') @pytest.mark.skipif(**tm.no_dask()) @pytest.mark.gtest def test_quantile_same_on_all_workers(self) -> None: self.run_quantile('SameOnAllWorkers') def test_n_workers(self) -> None: with LocalCluster(n_workers=2) as cluster: with Client(cluster) as client: workers = list(_get_client_workers(client).keys()) from sklearn.datasets import load_breast_cancer X, y = load_breast_cancer(return_X_y=True) dX = client.submit(da.from_array, X, workers=[workers[0]]).result() dy = client.submit(da.from_array, y, workers=[workers[0]]).result() train = xgb.dask.DaskDMatrix(client, dX, dy) dX = dd.from_array(X) dX = client.persist(dX, workers={dX: workers[1]}) dy = dd.from_array(y) dy = client.persist(dy, workers={dy: workers[1]}) valid = xgb.dask.DaskDMatrix(client, dX, dy) merged = xgb.dask._get_workers_from_data(train, evals=[(valid, 'Valid')]) assert len(merged) == 2 @pytest.mark.skipif(**tm.no_dask()) def test_feature_weights(self, client: "Client") -> None: kRows = 1024 kCols = 64 X = da.random.random((kRows, kCols), chunks=(32, -1)) y = da.random.random(kRows, chunks=32) fw = np.ones(shape=(kCols, )) for i in range(kCols): fw[i] *= float(i) fw = da.from_array(fw) poly_increasing = run_feature_weights(X, y, fw, model=xgb.dask.DaskXGBRegressor) fw = np.ones(shape=(kCols, )) for i in range(kCols): fw[i] *= float(kCols - i) fw = da.from_array(fw) poly_decreasing = run_feature_weights(X, y, fw, model=xgb.dask.DaskXGBRegressor) # Approxmated test, this is dependent on the implementation of random # number generator in std library. assert poly_increasing[0] > 0.08 assert poly_decreasing[0] < -0.08 @pytest.mark.skipif(**tm.no_dask()) @pytest.mark.skipif(**tm.no_sklearn()) def test_custom_objective(self, client: "Client") -> None: from sklearn.datasets import load_boston X, y = load_boston(return_X_y=True) X, y = da.from_array(X), da.from_array(y) rounds = 20 with tempfile.TemporaryDirectory() as tmpdir: path = os.path.join(tmpdir, 'log') def sqr(labels: np.ndarray, predts: np.ndarray) -> Tuple[np.ndarray, np.ndarray]: with open(path, 'a') as fd: print('Running sqr', file=fd) grad = predts - labels hess = np.ones(shape=labels.shape[0]) return grad, hess reg = xgb.dask.DaskXGBRegressor(n_estimators=rounds, objective=sqr, tree_method='hist') reg.fit(X, y, eval_set=[(X, y)]) # Check the obj is ran for rounds. with open(path, 'r') as fd: out = fd.readlines() assert len(out) == rounds results_custom = reg.evals_result() reg = xgb.dask.DaskXGBRegressor(n_estimators=rounds, tree_method='hist') reg.fit(X, y, eval_set=[(X, y)]) results_native = reg.evals_result() np.testing.assert_allclose(results_custom['validation_0']['rmse'], results_native['validation_0']['rmse']) tm.non_increasing(results_native['validation_0']['rmse']) def test_data_initialization(self) -> None: '''Assert each worker has the correct amount of data, and DMatrix initialization doesn't generate unnecessary copies of data. ''' with LocalCluster(n_workers=2) as cluster: with Client(cluster) as client: X, y, _ = generate_array() n_partitions = X.npartitions m = xgb.dask.DaskDMatrix(client, X, y) workers = list(_get_client_workers(client).keys()) rabit_args = client.sync(xgb.dask._get_rabit_args, len(workers), client) n_workers = len(workers) def worker_fn(worker_addr: str, data_ref: Dict) -> None: with xgb.dask.RabitContext(rabit_args): local_dtrain = xgb.dask._dmatrix_from_list_of_parts( **data_ref) total = np.array([local_dtrain.num_row()]) total = xgb.rabit.allreduce(total, xgb.rabit.Op.SUM) assert total[0] == kRows futures = [] for i in range(len(workers)): futures.append( client.submit(worker_fn, workers[i], m.create_fn_args(workers[i]), pure=False, workers=[workers[i]])) client.gather(futures) has_what = client.has_what() cnt = 0 data = set() for k, v in has_what.items(): for d in v: cnt += 1 data.add(d) assert len(data) == cnt # Subtract the on disk resource from each worker assert cnt - n_workers == n_partitions def run_shap(self, X: Any, y: Any, params: Dict[str, Any], client: "Client") -> None: X, y = da.from_array(X), da.from_array(y) Xy = xgb.dask.DaskDMatrix(client, X, y) booster = xgb.dask.train(client, params, Xy, num_boost_round=10)['booster'] test_Xy = xgb.dask.DaskDMatrix(client, X, y) shap = xgb.dask.predict(client, booster, test_Xy, pred_contribs=True).compute() margin = xgb.dask.predict(client, booster, test_Xy, output_margin=True).compute() assert np.allclose(np.sum(shap, axis=len(shap.shape) - 1), margin, 1e-5, 1e-5) def run_shap_cls_sklearn(self, X: Any, y: Any, client: "Client") -> None: X, y = da.from_array(X), da.from_array(y) cls = xgb.dask.DaskXGBClassifier() cls.client = client cls.fit(X, y) booster = cls.get_booster() test_Xy = xgb.dask.DaskDMatrix(client, X, y) shap = xgb.dask.predict(client, booster, test_Xy, pred_contribs=True).compute() margin = xgb.dask.predict(client, booster, test_Xy, output_margin=True).compute() assert np.allclose(np.sum(shap, axis=len(shap.shape) - 1), margin, 1e-5, 1e-5) def test_shap(self, client: "Client") -> None: from sklearn.datasets import load_boston, load_digits X, y = load_boston(return_X_y=True) params: Dict[str, Any] = {'objective': 'reg:squarederror'} self.run_shap(X, y, params, client) X, y = load_digits(return_X_y=True) params = {'objective': 'multi:softmax', 'num_class': 10} self.run_shap(X, y, params, client) params = {'objective': 'multi:softprob', 'num_class': 10} self.run_shap(X, y, params, client) self.run_shap_cls_sklearn(X, y, client) def run_shap_interactions(self, X: Any, y: Any, params: Dict[str, Any], client: "Client") -> None: X, y = da.from_array(X), da.from_array(y) Xy = xgb.dask.DaskDMatrix(client, X, y) booster = xgb.dask.train(client, params, Xy, num_boost_round=10)['booster'] test_Xy = xgb.dask.DaskDMatrix(client, X, y) shap = xgb.dask.predict(client, booster, test_Xy, pred_interactions=True).compute() margin = xgb.dask.predict(client, booster, test_Xy, output_margin=True).compute() assert np.allclose( np.sum(shap, axis=(len(shap.shape) - 1, len(shap.shape) - 2)), margin, 1e-5, 1e-5) def test_shap_interactions(self, client: "Client") -> None: from sklearn.datasets import load_boston X, y = load_boston(return_X_y=True) params = {'objective': 'reg:squarederror'} self.run_shap_interactions(X, y, params, client) @pytest.mark.skipif(**tm.no_sklearn()) def test_sklearn_io(self, client: 'Client') -> None: from sklearn.datasets import load_digits X_, y_ = load_digits(return_X_y=True) X, y = da.from_array(X_), da.from_array(y_) cls = xgb.dask.DaskXGBClassifier(n_estimators=10) cls.client = client cls.fit(X, y) predt_0 = cls.predict(X) with tempfile.TemporaryDirectory() as tmpdir: path = os.path.join(tmpdir, 'cls.json') cls.save_model(path) cls = xgb.dask.DaskXGBClassifier() cls.load_model(path) assert cls.n_classes_ == 10 predt_1 = cls.predict(X) np.testing.assert_allclose(predt_0.compute(), predt_1.compute()) # Use single node to load cls = xgb.XGBClassifier() cls.load_model(path) assert cls.n_classes_ == 10 predt_2 = cls.predict(X_) np.testing.assert_allclose(predt_0.compute(), predt_2)
class TestDistributedGPU: @pytest.mark.skipif(**tm.no_dask()) @pytest.mark.skipif(**tm.no_cudf()) @pytest.mark.skipif(**tm.no_dask_cudf()) @pytest.mark.skipif(**tm.no_dask_cuda()) @pytest.mark.mgpu def test_dask_dataframe(self, local_cuda_cluster: LocalCUDACluster) -> None: with Client(local_cuda_cluster) as client: run_with_dask_dataframe(dxgb.DaskDMatrix, client) run_with_dask_dataframe(dxgb.DaskDeviceQuantileDMatrix, client) @given(params=parameter_strategy, num_rounds=strategies.integers(1, 20), dataset=tm.dataset_strategy) @settings(deadline=duration(seconds=120)) @pytest.mark.skipif(**tm.no_dask()) @pytest.mark.skipif(**tm.no_dask_cuda()) @pytest.mark.parametrize('local_cuda_cluster', [{ 'n_workers': 2 }], indirect=['local_cuda_cluster']) @pytest.mark.mgpu def test_gpu_hist(self, params: Dict, num_rounds: int, dataset: tm.TestDataset, local_cuda_cluster: LocalCUDACluster) -> None: with Client(local_cuda_cluster) as client: run_gpu_hist(params, num_rounds, dataset, dxgb.DaskDMatrix, client) run_gpu_hist(params, num_rounds, dataset, dxgb.DaskDeviceQuantileDMatrix, client) @pytest.mark.skipif(**tm.no_cupy()) @pytest.mark.skipif(**tm.no_dask()) @pytest.mark.skipif(**tm.no_dask_cuda()) @pytest.mark.mgpu def test_dask_array(self, local_cuda_cluster: LocalCUDACluster) -> None: with Client(local_cuda_cluster) as client: run_with_dask_array(dxgb.DaskDMatrix, client) run_with_dask_array(dxgb.DaskDeviceQuantileDMatrix, client) @pytest.mark.skipif(**tm.no_cupy()) @pytest.mark.skipif(**tm.no_dask()) @pytest.mark.skipif(**tm.no_dask_cuda()) def test_early_stopping(self, local_cuda_cluster: LocalCUDACluster) -> None: from sklearn.datasets import load_breast_cancer with Client(local_cuda_cluster) as client: X, y = load_breast_cancer(return_X_y=True) X, y = da.from_array(X), da.from_array(y) m = dxgb.DaskDMatrix(client, X, y) valid = dxgb.DaskDMatrix(client, X, y) early_stopping_rounds = 5 booster = dxgb.train( client, { 'objective': 'binary:logistic', 'eval_metric': 'error', 'tree_method': 'gpu_hist' }, m, evals=[(valid, 'Valid')], num_boost_round=1000, early_stopping_rounds=early_stopping_rounds)['booster'] assert hasattr(booster, 'best_score') dump = booster.get_dump(dump_format='json') assert len( dump) - booster.best_iteration == early_stopping_rounds + 1 valid_X = X valid_y = y cls = dxgb.DaskXGBClassifier(objective='binary:logistic', tree_method='gpu_hist', n_estimators=100) cls.client = client cls.fit(X, y, early_stopping_rounds=early_stopping_rounds, eval_set=[(valid_X, valid_y)]) booster = cls.get_booster() dump = booster.get_dump(dump_format='json') assert len( dump) - booster.best_iteration == early_stopping_rounds + 1 @pytest.mark.skipif(**tm.no_dask()) @pytest.mark.skipif(**tm.no_dask_cuda()) @pytest.mark.mgpu def test_empty_dmatrix(self, local_cuda_cluster: LocalCUDACluster) -> None: with Client(local_cuda_cluster) as client: parameters = {'tree_method': 'gpu_hist', 'debug_synchronize': True} run_empty_dmatrix_reg(client, parameters) run_empty_dmatrix_cls(client, parameters) def run_quantile(self, name: str, local_cuda_cluster: LocalCUDACluster) -> None: if sys.platform.startswith("win"): pytest.skip("Skipping dask tests on Windows") exe = None for possible_path in { './testxgboost', './build/testxgboost', '../build/testxgboost', '../gpu-build/testxgboost' }: if os.path.exists(possible_path): exe = possible_path assert exe, 'No testxgboost executable found.' test = "--gtest_filter=GPUQuantile." + name def runit(worker_addr: str, rabit_args: List[bytes]) -> subprocess.CompletedProcess: port_env = '' # setup environment for running the c++ part. for arg in rabit_args: if arg.decode('utf-8').startswith('DMLC_TRACKER_PORT'): port_env = arg.decode('utf-8') port = port_env.split('=') env = os.environ.copy() env[port[0]] = port[1] return subprocess.run([str(exe), test], env=env, stdout=subprocess.PIPE) with Client(local_cuda_cluster) as client: workers = list(_get_client_workers(client).keys()) rabit_args = client.sync(dxgb._get_rabit_args, workers, client) futures = client.map(runit, workers, pure=False, workers=workers, rabit_args=rabit_args) results = client.gather(futures) for ret in results: msg = ret.stdout.decode('utf-8') assert msg.find('1 test from GPUQuantile') != -1, msg assert ret.returncode == 0, msg @pytest.mark.skipif(**tm.no_dask()) @pytest.mark.skipif(**tm.no_dask_cuda()) @pytest.mark.mgpu @pytest.mark.gtest def test_quantile_basic(self, local_cuda_cluster: LocalCUDACluster) -> None: self.run_quantile('AllReduceBasic', local_cuda_cluster) @pytest.mark.skipif(**tm.no_dask()) @pytest.mark.skipif(**tm.no_dask_cuda()) @pytest.mark.mgpu @pytest.mark.gtest def test_quantile_same_on_all_workers( self, local_cuda_cluster: LocalCUDACluster) -> None: self.run_quantile('SameOnAllWorkers', local_cuda_cluster)
import pytest import xgboost as xgb import numpy as np import sys if sys.platform.startswith("win"): pytest.skip("Skipping dask tests on Windows", allow_module_level=True) try: from distributed.utils_test import client, loop, cluster_fixture import dask.dataframe as dd import dask.array as da except ImportError: pass pytestmark = pytest.mark.skipif(**tm.no_dask()) def run_train(): # Contains one label equal to rank dmat = xgb.DMatrix([[0]], label=[xgb.rabit.get_rank()]) bst = xgb.train({"eta": 1.0, "lambda": 0.0}, dmat, 1) pred = bst.predict(dmat) expected_result = np.average(range(xgb.rabit.get_world_size())) assert all(p == expected_result for p in pred) def test_train(client): # Train two workers, the first has label 0, the second has label 1 # If they build the model together the output should be 0.5 xgb.dask.run(client, run_train)
class TestDistributedGPU(unittest.TestCase): @pytest.mark.skipif(**tm.no_dask()) @pytest.mark.skipif(**tm.no_cudf()) @pytest.mark.skipif(**tm.no_dask_cudf()) @pytest.mark.skipif(**tm.no_dask_cuda()) @pytest.mark.mgpu def test_dask_dataframe(self): with LocalCUDACluster() as cluster: with Client(cluster) as client: import cupy as cp cp.cuda.runtime.setDevice(0) X, y = generate_array() X = dd.from_dask_array(X) y = dd.from_dask_array(y) X = X.map_partitions(cudf.from_pandas) y = y.map_partitions(cudf.from_pandas) dtrain = dxgb.DaskDMatrix(client, X, y) out = dxgb.train(client, {'tree_method': 'gpu_hist'}, dtrain=dtrain, evals=[(dtrain, 'X')], num_boost_round=4) assert isinstance(out['booster'], dxgb.Booster) assert len(out['history']['X']['rmse']) == 4 predictions = dxgb.predict(client, out, dtrain).compute() assert isinstance(predictions, np.ndarray) series_predictions = dxgb.inplace_predict(client, out, X) assert isinstance(series_predictions, dd.Series) series_predictions = series_predictions.compute() single_node = out['booster'].predict( xgboost.DMatrix(X.compute())) cp.testing.assert_allclose(single_node, predictions) np.testing.assert_allclose(single_node, series_predictions.to_array()) predt = dxgb.predict(client, out, X) assert isinstance(predt, dd.Series) def is_df(part): assert isinstance(part, cudf.DataFrame), part return part predt.map_partitions(is_df, meta=dd.utils.make_meta( {'prediction': 'f4'})) cp.testing.assert_allclose(predt.values.compute(), single_node) @pytest.mark.skipif(**tm.no_cupy()) @pytest.mark.mgpu def test_dask_array(self): with LocalCUDACluster() as cluster: with Client(cluster) as client: import cupy as cp cp.cuda.runtime.setDevice(0) X, y = generate_array() X = X.map_blocks(cp.asarray) y = y.map_blocks(cp.asarray) dtrain = dxgb.DaskDMatrix(client, X, y) out = dxgb.train(client, {'tree_method': 'gpu_hist'}, dtrain=dtrain, evals=[(dtrain, 'X')], num_boost_round=2) from_dmatrix = dxgb.predict(client, out, dtrain).compute() inplace_predictions = dxgb.inplace_predict(client, out, X).compute() single_node = out['booster'].predict( xgboost.DMatrix(X.compute())) np.testing.assert_allclose(single_node, from_dmatrix) device = cp.cuda.runtime.getDevice() assert device == inplace_predictions.device.id single_node = cp.array(single_node) assert device == single_node.device.id cp.testing.assert_allclose(single_node, inplace_predictions) @pytest.mark.skipif(**tm.no_dask()) @pytest.mark.skipif(**tm.no_dask_cuda()) @pytest.mark.mgpu def test_empty_dmatrix(self): with LocalCUDACluster() as cluster: with Client(cluster) as client: parameters = {'tree_method': 'gpu_hist'} run_empty_dmatrix(client, parameters)
import sys import logging import pytest import sklearn sys.path.append("tests/python") import testing as tm if tm.no_dask()["condition"]: pytest.skip(msg=tm.no_spark()["reason"], allow_module_level=True) if sys.platform.startswith("win"): pytest.skip("Skipping PySpark tests on Windows", allow_module_level=True) from pyspark.sql import SparkSession from pyspark.ml.linalg import Vectors from xgboost.spark import SparkXGBRegressor, SparkXGBClassifier @pytest.fixture(scope="module", autouse=True) def spark_session_with_gpu(): spark_config = { "spark.master": "local-cluster[1, 4, 1024]", "spark.python.worker.reuse": "false", "spark.driver.host": "127.0.0.1", "spark.task.maxFailures": "1", "spark.sql.execution.pyspark.udf.simplifiedTraceback.enabled":
class TestDistributedGPU(unittest.TestCase): @pytest.mark.skipif(**tm.no_dask()) @pytest.mark.skipif(**tm.no_cudf()) @pytest.mark.skipif(**tm.no_dask_cudf()) @pytest.mark.skipif(**tm.no_dask_cuda()) @pytest.mark.mgpu def test_dask_dataframe(self): with LocalCUDACluster() as cluster: with Client(cluster) as client: import cupy as cp cp.cuda.runtime.setDevice(0) X, y = generate_array() X = dd.from_dask_array(X) y = dd.from_dask_array(y) X = X.map_partitions(cudf.from_pandas) y = y.map_partitions(cudf.from_pandas) dtrain = dxgb.DaskDMatrix(client, X, y) out = dxgb.train(client, { 'tree_method': 'gpu_hist', 'debug_synchronize': True }, dtrain=dtrain, evals=[(dtrain, 'X')], num_boost_round=4) assert isinstance(out['booster'], dxgb.Booster) assert len(out['history']['X']['rmse']) == 4 predictions = dxgb.predict(client, out, dtrain).compute() assert isinstance(predictions, np.ndarray) series_predictions = dxgb.inplace_predict(client, out, X) assert isinstance(series_predictions, dd.Series) series_predictions = series_predictions.compute() single_node = out['booster'].predict( xgboost.DMatrix(X.compute())) cp.testing.assert_allclose(single_node, predictions) np.testing.assert_allclose(single_node, series_predictions.to_array()) predt = dxgb.predict(client, out, X) assert isinstance(predt, dd.Series) def is_df(part): assert isinstance(part, cudf.DataFrame), part return part predt.map_partitions(is_df, meta=dd.utils.make_meta( {'prediction': 'f4'})) cp.testing.assert_allclose(predt.values.compute(), single_node) @given(parameter_strategy, strategies.integers(1, 20), tm.dataset_strategy) @settings(deadline=None) @pytest.mark.mgpu def test_gpu_hist(self, params, num_rounds, dataset): with LocalCUDACluster(n_workers=2) as cluster: with Client(cluster) as client: params['tree_method'] = 'gpu_hist' params = dataset.set_params(params) # multi class doesn't handle empty dataset well (empty # means at least 1 worker has data). if params['objective'] == "multi:softmax": return # It doesn't make sense to distribute a completely # empty dataset. if dataset.X.shape[0] == 0: return chunk = 128 X = da.from_array(dataset.X, chunks=(chunk, dataset.X.shape[1])) y = da.from_array(dataset.y, chunks=(chunk, )) if dataset.w is not None: w = da.from_array(dataset.w, chunks=(chunk, )) else: w = None m = dxgb.DaskDMatrix(client, data=X, label=y, weight=w) history = dxgb.train(client, params=params, dtrain=m, num_boost_round=num_rounds, evals=[(m, 'train')])['history'] note(history) assert tm.non_increasing(history['train'][dataset.metric]) @pytest.mark.skipif(**tm.no_cupy()) @pytest.mark.mgpu def test_dask_array(self): with LocalCUDACluster() as cluster: with Client(cluster) as client: import cupy as cp cp.cuda.runtime.setDevice(0) X, y = generate_array() X = X.map_blocks(cp.asarray) y = y.map_blocks(cp.asarray) dtrain = dxgb.DaskDMatrix(client, X, y) out = dxgb.train(client, { 'tree_method': 'gpu_hist', 'debug_synchronize': True }, dtrain=dtrain, evals=[(dtrain, 'X')], num_boost_round=2) from_dmatrix = dxgb.predict(client, out, dtrain).compute() inplace_predictions = dxgb.inplace_predict(client, out, X).compute() single_node = out['booster'].predict( xgboost.DMatrix(X.compute())) np.testing.assert_allclose(single_node, from_dmatrix) device = cp.cuda.runtime.getDevice() assert device == inplace_predictions.device.id single_node = cp.array(single_node) assert device == single_node.device.id cp.testing.assert_allclose(single_node, inplace_predictions) @pytest.mark.skipif(**tm.no_dask()) @pytest.mark.skipif(**tm.no_dask_cuda()) @pytest.mark.mgpu def test_empty_dmatrix(self): with LocalCUDACluster() as cluster: with Client(cluster) as client: parameters = { 'tree_method': 'gpu_hist', 'debug_synchronize': True } run_empty_dmatrix(client, parameters) def run_quantile(self, name): if sys.platform.startswith("win"): pytest.skip("Skipping dask tests on Windows") exe = None for possible_path in { './testxgboost', './build/testxgboost', '../build/testxgboost', '../gpu-build/testxgboost' }: if os.path.exists(possible_path): exe = possible_path assert exe, 'No testxgboost executable found.' test = "--gtest_filter=GPUQuantile." + name def runit(worker_addr, rabit_args): port = None # setup environment for running the c++ part. for arg in rabit_args: if arg.decode('utf-8').startswith('DMLC_TRACKER_PORT'): port = arg.decode('utf-8') port = port.split('=') env = os.environ.copy() env[port[0]] = port[1] return subprocess.run([exe, test], env=env, stdout=subprocess.PIPE) with LocalCUDACluster() as cluster: with Client(cluster) as client: workers = list(dxgb._get_client_workers(client).keys()) rabit_args = dxgb._get_rabit_args(workers, client) futures = client.map(runit, workers, pure=False, workers=workers, rabit_args=rabit_args) results = client.gather(futures) for ret in results: msg = ret.stdout.decode('utf-8') assert msg.find('1 test from GPUQuantile') != -1, msg assert ret.returncode == 0, msg @pytest.mark.skipif(**tm.no_dask()) @pytest.mark.mgpu @pytest.mark.gtest def test_quantile_basic(self): self.run_quantile('AllReduceBasic') @pytest.mark.skipif(**tm.no_dask()) @pytest.mark.mgpu @pytest.mark.gtest def test_quantile_same_on_all_workers(self): self.run_quantile('SameOnAllWorkers')