def test_workers(client, s, a, b):
    N = 10
    with client.get_executor(workers=[b["address"]]) as e:
        fs = [e.submit(slowinc, i) for i in range(N)]
        wait(fs)
        has_what = client.has_what()
        assert not has_what.get(a["address"])
        assert len(has_what[b["address"]]) == N
Ejemplo n.º 2
0
def test_workers(client, s, a, b):
    N = 10
    with client.get_executor(workers=[b['address']]) as e:
        fs = [e.submit(slowinc, i) for i in range(N)]
        wait(fs)
        has_what = client.has_what()
        assert not has_what.get(a['address'])
        assert len(has_what[b['address']]) == N
Ejemplo n.º 3
0
    def test_data_initialization(self) -> None:
        '''Assert each worker has the correct amount of data, and DMatrix initialization doesn't
        generate unnecessary copies of data.

        '''
        with LocalCluster(n_workers=2) as cluster:
            with Client(cluster) as client:
                X, y, _ = generate_array()
                n_partitions = X.npartitions
                m = xgb.dask.DaskDMatrix(client, X, y)
                workers = list(_get_client_workers(client).keys())
                rabit_args = client.sync(xgb.dask._get_rabit_args,
                                         len(workers), client)
                n_workers = len(workers)

                def worker_fn(worker_addr: str, data_ref: Dict) -> None:
                    with xgb.dask.RabitContext(rabit_args):
                        local_dtrain = xgb.dask._dmatrix_from_list_of_parts(
                            **data_ref)
                        total = np.array([local_dtrain.num_row()])
                        total = xgb.rabit.allreduce(total, xgb.rabit.Op.SUM)
                        assert total[0] == kRows

                futures = []
                for i in range(len(workers)):
                    futures.append(
                        client.submit(worker_fn,
                                      workers[i],
                                      m.create_fn_args(workers[i]),
                                      pure=False,
                                      workers=[workers[i]]))
                client.gather(futures)

                has_what = client.has_what()
                cnt = 0
                data = set()
                for k, v in has_what.items():
                    for d in v:
                        cnt += 1
                        data.add(d)

                assert len(data) == cnt
                # Subtract the on disk resource from each worker
                assert cnt - n_workers == n_partitions
Ejemplo n.º 4
0
    def test_data_initialization(self):
        '''Assert each worker has the correct amount of data, and DMatrix initialization doesn't
        generate unnecessary copies of data.

        '''
        with LocalCluster(n_workers=2) as cluster:
            with Client(cluster) as client:
                X, y = generate_array()
                n_partitions = X.npartitions
                m = xgb.dask.DaskDMatrix(client, X, y)
                workers = list(xgb.dask._get_client_workers(client).keys())
                rabit_args = client.sync(xgb.dask._get_rabit_args, workers,
                                         client)
                n_workers = len(workers)

                def worker_fn(worker_addr, data_ref):
                    with xgb.dask.RabitContext(rabit_args):
                        local_dtrain = xgb.dask._dmatrix_from_worker_map(
                            **data_ref)
                        assert local_dtrain.num_row() == kRows / n_workers

                futures = client.map(worker_fn,
                                     workers,
                                     [m.create_fn_args()] * len(workers),
                                     pure=False,
                                     workers=workers)
                client.gather(futures)

                has_what = client.has_what()
                cnt = 0
                data = set()
                for k, v in has_what.items():
                    for d in v:
                        cnt += 1
                        data.add(d)

                assert len(data) == cnt
                # Subtract the on disk resource from each worker
                assert cnt - n_workers == n_partitions