def test_workers(client, s, a, b): N = 10 with client.get_executor(workers=[b["address"]]) as e: fs = [e.submit(slowinc, i) for i in range(N)] wait(fs) has_what = client.has_what() assert not has_what.get(a["address"]) assert len(has_what[b["address"]]) == N
def test_workers(client, s, a, b): N = 10 with client.get_executor(workers=[b['address']]) as e: fs = [e.submit(slowinc, i) for i in range(N)] wait(fs) has_what = client.has_what() assert not has_what.get(a['address']) assert len(has_what[b['address']]) == N
def test_data_initialization(self) -> None: '''Assert each worker has the correct amount of data, and DMatrix initialization doesn't generate unnecessary copies of data. ''' with LocalCluster(n_workers=2) as cluster: with Client(cluster) as client: X, y, _ = generate_array() n_partitions = X.npartitions m = xgb.dask.DaskDMatrix(client, X, y) workers = list(_get_client_workers(client).keys()) rabit_args = client.sync(xgb.dask._get_rabit_args, len(workers), client) n_workers = len(workers) def worker_fn(worker_addr: str, data_ref: Dict) -> None: with xgb.dask.RabitContext(rabit_args): local_dtrain = xgb.dask._dmatrix_from_list_of_parts( **data_ref) total = np.array([local_dtrain.num_row()]) total = xgb.rabit.allreduce(total, xgb.rabit.Op.SUM) assert total[0] == kRows futures = [] for i in range(len(workers)): futures.append( client.submit(worker_fn, workers[i], m.create_fn_args(workers[i]), pure=False, workers=[workers[i]])) client.gather(futures) has_what = client.has_what() cnt = 0 data = set() for k, v in has_what.items(): for d in v: cnt += 1 data.add(d) assert len(data) == cnt # Subtract the on disk resource from each worker assert cnt - n_workers == n_partitions
def test_data_initialization(self): '''Assert each worker has the correct amount of data, and DMatrix initialization doesn't generate unnecessary copies of data. ''' with LocalCluster(n_workers=2) as cluster: with Client(cluster) as client: X, y = generate_array() n_partitions = X.npartitions m = xgb.dask.DaskDMatrix(client, X, y) workers = list(xgb.dask._get_client_workers(client).keys()) rabit_args = client.sync(xgb.dask._get_rabit_args, workers, client) n_workers = len(workers) def worker_fn(worker_addr, data_ref): with xgb.dask.RabitContext(rabit_args): local_dtrain = xgb.dask._dmatrix_from_worker_map( **data_ref) assert local_dtrain.num_row() == kRows / n_workers futures = client.map(worker_fn, workers, [m.create_fn_args()] * len(workers), pure=False, workers=workers) client.gather(futures) has_what = client.has_what() cnt = 0 data = set() for k, v in has_what.items(): for d in v: cnt += 1 data.add(d) assert len(data) == cnt # Subtract the on disk resource from each worker assert cnt - n_workers == n_partitions