def test_data_initialization(self, local_cuda_cluster: LocalCUDACluster) -> None: with Client(local_cuda_cluster) as client: X, y, _ = generate_array() fw = da.random.random((random_cols, )) fw = fw - fw.min() m = dxgb.DaskDMatrix(client, X, y, feature_weights=fw) workers = _get_client_workers(client) rabit_args = client.sync(dxgb._get_rabit_args, len(workers), client) def worker_fn(worker_addr: str, data_ref: Dict) -> None: with dxgb.RabitContext(rabit_args): local_dtrain = dxgb._dmatrix_from_list_of_parts(**data_ref) fw_rows = local_dtrain.get_float_info("feature_weights").shape[0] assert fw_rows == local_dtrain.num_col() futures = [] for i in range(len(workers)): futures.append( client.submit( worker_fn, workers[i], m._create_fn_args(workers[i]), pure=False, workers=[workers[i]] ) ) client.gather(futures)
def run_rabit_ops(client, n_workers): from test_with_dask import _get_client_workers from xgboost.dask import RabitContext, _get_rabit_args from xgboost import rabit workers = _get_client_workers(client) rabit_args = client.sync(_get_rabit_args, len(workers), None, client) assert not rabit.is_distributed() n_workers_from_dask = len(workers) assert n_workers == n_workers_from_dask def local_test(worker_id): with RabitContext(rabit_args): a = 1 assert rabit.is_distributed() a = np.array([a]) reduced = rabit.allreduce(a, rabit.Op.SUM) assert reduced[0] == n_workers worker_id = np.array([worker_id]) reduced = rabit.allreduce(worker_id, rabit.Op.MAX) assert reduced == n_workers - 1 return 1 futures = client.map(local_test, range(len(workers)), workers=workers) results = client.gather(futures) assert sum(results) == n_workers
def run_quantile(self, name: str, local_cuda_cluster: LocalCUDACluster) -> None: if sys.platform.startswith("win"): pytest.skip("Skipping dask tests on Windows") exe = None for possible_path in { './testxgboost', './build/testxgboost', '../build/testxgboost', '../gpu-build/testxgboost' }: if os.path.exists(possible_path): exe = possible_path assert exe, 'No testxgboost executable found.' test = "--gtest_filter=GPUQuantile." + name def runit(worker_addr: str, rabit_args: List[bytes]) -> subprocess.CompletedProcess: port_env = '' # setup environment for running the c++ part. for arg in rabit_args: if arg.decode('utf-8').startswith('DMLC_TRACKER_PORT'): port_env = arg.decode('utf-8') port_env = arg.decode('utf-8') if arg.decode("utf-8").startswith("DMLC_TRACKER_URI"): uri_env = arg.decode("utf-8") port = port_env.split('=') env = os.environ.copy() env[port[0]] = port[1] uri = uri_env.split("=") env[uri[0]] = uri[1] return subprocess.run([str(exe), test], env=env, stdout=subprocess.PIPE) with Client(local_cuda_cluster) as client: workers = _get_client_workers(client) rabit_args = client.sync(dxgb._get_rabit_args, len(workers), None, client) futures = client.map(runit, workers, pure=False, workers=workers, rabit_args=rabit_args) results = client.gather(futures) for ret in results: msg = ret.stdout.decode('utf-8') assert msg.find('1 test from GPUQuantile') != -1, msg assert ret.returncode == 0, msg
def make_categorical( client: Client, n_samples: int, n_features: int, n_categories: int, onehot: bool = False, ) -> Tuple[dd.DataFrame, dd.Series]: workers = _get_client_workers(client) n_workers = len(workers) dfs = [] def pack(**kwargs: Any) -> dd.DataFrame: X, y = tm.make_categorical(**kwargs) X["label"] = y return X meta = pack(n_samples=1, n_features=n_features, n_categories=n_categories, onehot=False) for i, worker in enumerate(workers): l_n_samples = min(n_samples // n_workers, n_samples - i * (n_samples // n_workers)) future = client.submit( pack, n_samples=l_n_samples, n_features=n_features, n_categories=n_categories, onehot=False, workers=[worker], ) dfs.append(future) df = dd.from_delayed(dfs, meta=meta) y = df["label"] X = df[df.columns.difference(["label"])] if onehot: return dd.get_dummies(X), y return X, y
def run_quantile(self, name, local_cuda_cluster): if sys.platform.startswith("win"): pytest.skip("Skipping dask tests on Windows") exe = None for possible_path in { './testxgboost', './build/testxgboost', '../build/testxgboost', '../gpu-build/testxgboost' }: if os.path.exists(possible_path): exe = possible_path assert exe, 'No testxgboost executable found.' test = "--gtest_filter=GPUQuantile." + name def runit(worker_addr, rabit_args): port = None # setup environment for running the c++ part. for arg in rabit_args: if arg.decode('utf-8').startswith('DMLC_TRACKER_PORT'): port = arg.decode('utf-8') port = port.split('=') env = os.environ.copy() env[port[0]] = port[1] return subprocess.run([exe, test], env=env, stdout=subprocess.PIPE) with Client(local_cuda_cluster) as client: workers = list(_get_client_workers(client).keys()) rabit_args = client.sync(dxgb._get_rabit_args, workers, client) futures = client.map(runit, workers, pure=False, workers=workers, rabit_args=rabit_args) results = client.gather(futures) for ret in results: msg = ret.stdout.decode('utf-8') assert msg.find('1 test from GPUQuantile') != -1, msg assert ret.returncode == 0, msg
def test_empty_dmatrix_auc(self, local_cuda_cluster: LocalCUDACluster) -> None: with Client(local_cuda_cluster) as client: n_workers = len(_get_client_workers(client)) run_empty_dmatrix_auc(client, "gpu_hist", n_workers)