def test_fast(client): L = client.map(inc, range(100)) L2 = client.map(dec, L) L3 = client.map(add, L, L2) p = progress(L3, multi=True, complete=True, notebook=True) client.sync(p.listen) assert set(p._last_response['all']) == {'inc', 'dec', 'add'}
def test_client_sync(client): with get_task_stream(client=client) as ts: sleep(0.1) # to smooth over time differences on the scheduler # to smooth over time differences on the scheduler futures = client.map(inc, range(10)) wait(futures) assert len(ts.data) == 10
def test_as_completed_update(client): total = 0 todo = list(range(10)) expected = sum(map(inc, todo)) ac = as_completed([]) while todo or not ac.is_empty(): if todo: work, todo = todo[:4], todo[4:] ac.update(client.map(inc, work)) batch = ac.next_batch(block=True) total += sum(r.result() for r in batch) assert total == expected
def test_lock_sync(client): def f(x): with Lock('x') as lock: client = get_client() assert client.get_metadata('locked') is False client.set_metadata('locked', True) sleep(0.05) assert client.get_metadata('locked') is True client.set_metadata('locked', False) client.set_metadata('locked', False) futures = client.map(f, range(10)) client.gather(futures)
def test_as_completed_add(client): total = 0 expected = sum(map(inc, range(10))) futures = client.map(inc, range(10)) ac = as_completed(futures) for future in ac: result = future.result() total += result if random.random() < 0.5: future = client.submit(add, future, 10) ac.add(future) expected += result + 10 assert total == expected
def test_get_task_stream_save(client, tmpdir): bokeh = pytest.importorskip('bokeh') tmpdir = str(tmpdir) fn = os.path.join(tmpdir, 'foo.html') with get_task_stream(plot='save', filename=fn) as ts: wait(client.map(inc, range(10))) with open(fn) as f: data = f.read() assert 'inc' in data assert 'bokeh' in data assert isinstance(ts.figure, bokeh.plotting.Figure)
def test_text_progressbar(capsys, client): futures = client.map(inc, range(10)) p = TextProgressBar(futures, interval=0.01, complete=True) client.gather(futures) start = time() while p.status != 'finished': sleep(0.01) assert time() - start < 5 check_bar_completed(capsys) assert p._last_response == {'all': 10, 'remaining': 0, 'status': 'finished'} assert p.comm.closed()
def test_text_progressbar(capsys, client): futures = client.map(inc, range(10)) p = TextProgressBar(futures, interval=0.01, complete=True) client.gather(futures) start = time() while p.status != "finished": sleep(0.01) assert time() - start < 5 check_bar_completed(capsys) assert p._last_response == { "all": 10, "remaining": 0, "status": "finished" } assert p.comm.closed()
def run_quantile(self, name: str) -> None: if sys.platform.startswith("win"): pytest.skip("Skipping dask tests on Windows") exe: Optional[str] = None for possible_path in { './testxgboost', './build/testxgboost', '../build/testxgboost', '../cpu-build/testxgboost' }: if os.path.exists(possible_path): exe = possible_path if exe is None: return test = "--gtest_filter=Quantile." + name def runit(worker_addr: str, rabit_args: List[bytes]) -> subprocess.CompletedProcess: port_env = '' # setup environment for running the c++ part. for arg in rabit_args: if arg.decode('utf-8').startswith('DMLC_TRACKER_PORT'): port_env = arg.decode('utf-8') port = port_env.split('=') env = os.environ.copy() env[port[0]] = port[1] return subprocess.run([str(exe), test], env=env, capture_output=True) with LocalCluster(n_workers=4) as cluster: with Client(cluster) as client: workers = list(_get_client_workers(client).keys()) rabit_args = client.sync(xgb.dask._get_rabit_args, len(workers), client) futures = client.map(runit, workers, pure=False, workers=workers, rabit_args=rabit_args) results = client.gather(futures) for ret in results: msg = ret.stdout.decode('utf-8') assert msg.find('1 test from Quantile') != -1, msg assert ret.returncode == 0, msg
def test_data_initialization(self): '''Assert each worker has the correct amount of data, and DMatrix initialization doesn't generate unnecessary copies of data. ''' with LocalCluster(n_workers=2) as cluster: with Client(cluster) as client: X, y = generate_array() n_partitions = X.npartitions m = xgb.dask.DaskDMatrix(client, X, y) workers = list(xgb.dask._get_client_workers(client).keys()) rabit_args = client.sync(xgb.dask._get_rabit_args, workers, client) n_workers = len(workers) def worker_fn(worker_addr, data_ref): with xgb.dask.RabitContext(rabit_args): local_dtrain = xgb.dask._dmatrix_from_worker_map( **data_ref) total = np.array([local_dtrain.num_row()]) total = xgb.rabit.allreduce(total, xgb.rabit.Op.SUM) assert total[0] == kRows futures = client.map(worker_fn, workers, [m.create_fn_args()] * len(workers), pure=False, workers=workers) client.gather(futures) has_what = client.has_what() cnt = 0 data = set() for k, v in has_what.items(): for d in v: cnt += 1 data.add(d) assert len(data) == cnt # Subtract the on disk resource from each worker assert cnt - n_workers == n_partitions
def test_worker_dies(): with cluster() as (scheduler, workers): with Client(scheduler["address"]) as client: sem = Semaphore(name="x", max_leases=1) def f(x, sem, kill_address): with sem: from distributed.worker import get_worker worker = get_worker() if worker.address == kill_address: import os os.kill(os.getpid(), 15) return x futures = client.map(f, range(100), sem=sem, kill_address=workers[0]["address"]) results = client.gather(futures) assert sorted(results) == list(range(100))
def test_threadpoolworkers_pick_correct_ioloop(cleanup): # gh4057 # About picking appropriate values for the various timings # * Sleep time in `access_limited` impacts test runtime but is arbitrary # * `lease-timeout` should be smaller than the sleep time. This is what the # test builds on. assuming the leases cannot be refreshed, e.g. wrong # event loop picked / PeriodicCallback never scheduled, the semaphore # would become oversubscribed and the len(protected_resources) becomes # non zero. This should also trigger a log message about "unknown leases" # and fails the test. # * `lease-validation-interval` interval should be the smallest quantity. # How often leases are checked for staleness is hard coded atm and a fifth # of the `lease-timeout`. Accounting for this and some jitter, this should # be sufficiently small to ensure smooth operation. with dask.config.set({ "distributed.scheduler.locks.lease-validation-interval": 0.01, "distributed.scheduler.locks.lease-timeout": 0.1, }): with Client(processes=False, threads_per_worker=4) as client: sem = Semaphore(max_leases=1, name="database") protected_resource = [] def access_limited(val, sem): import time with sem: assert len(protected_resource) == 0 protected_resource.append(val) # Interact with the DB time.sleep(0.2) protected_resource.remove(val) client.gather(client.map(access_limited, range(10), sem=sem))
def test_threadpoolworkers_pick_correct_ioloop(cleanup): # gh4057 with dask.config.set({ "distributed.scheduler.locks.lease-validation-interval": 0.01, "distributed.scheduler.locks.lease-timeout": 0.05, }): with Client(processes=False, threads_per_worker=4) as client: sem = Semaphore(max_leases=1, name="database") protected_ressource = [] def access_limited(val, sem): import time with sem: assert len(protected_ressource) == 0 protected_ressource.append(val) # Interact with the DB time.sleep(0.1) protected_ressource.remove(val) client.gather(client.map(access_limited, range(10), sem=sem))