def test_same_input(rsds_env): url = rsds_env.start([1]) client = Client(url) f1 = client.submit(comp_fn1, 10) f2 = client.submit(comp_fn2, f1, f1) r2 = client.gather(f2) assert r2 == 0
def test_recompute_existing(rsds_env): url = rsds_env.start([1]) client = Client(url) # assert delayed_fn1(10).compute() == 100 # assert delayed_fn1(10).compute() == 100 f1 = client.submit(comp_fn1, 10) f2 = client.submit(comp_fn1, 10) r1, r2 = client.gather([f1, f2]) assert r1 == 100 assert r2 == 100
def _sample_sync( self, label: str | None, client: Client, measure: str, interval: float ): key = client.sync( client.scheduler.memory_sampler_start, client=client.id, measure=measure, interval=interval, ) try: yield finally: samples = client.sync(client.scheduler.memory_sampler_stop, key=key) self.samples[label or key] = samples
def _fetch_resources(self): """Retrieve worker resources.""" client = Client(self._scheduler_address, timeout=30) while True: worker_memory = client.run(_process_memory) with self._lock: for worker, mem in worker_memory.items(): self._add_memory(worker, mem) worker_cpu = client.run(_process_cpu) with self._lock: for worker, cpu in worker_cpu.items(): self._add_cpu(worker, cpu) sleep(self.update_freq)
def __init__(self, name=None, client=None, maxsize=0): try: self.client = client or Client.current() except ValueError: # Initialise new client self.client = get_worker().client self.name = name or "variable-" + uuid.uuid4().hex
def test_unsupported_arguments(loop): with cluster() as (s, [a, b]): with Client(s['address'], loop=loop) as c: with pytest.raises(TypeError) as excinfo: c.get_executor(workers=[b['address']], foo=1, bar=2) assert ("unsupported arguments to ClientExecutor: ['bar', 'foo']" in str(excinfo.value))
async def try_connect(port): with contextlib.suppress(OSError): async with Client(f"tcp://localhost:{port}", asynchronous=True, timeout=0.5): return port return
def test_progressbar_done(loop): with cluster() as (s, [a, b]): with Client(s['address'], loop=loop) as c: L = [c.submit(inc, i) for i in range(5)] wait(L) p = ProgressWidget(L) sync(loop, p.listen) assert p.status == 'finished' assert p.bar.value == 1.0 assert p.bar.bar_style == 'success' assert 'Finished' in p.elapsed_time.value f = c.submit(throws, L) wait([f]) p = ProgressWidget([f]) sync(loop, p.listen) assert p.status == 'error' assert p.bar.value == 0.0 assert p.bar.bar_style == 'danger' assert 'Exception' in p.elapsed_time.value try: throws(1) except Exception as e: assert repr(e) in p.elapsed_time.value
def test_cancellation(loop): with cluster() as (s, [a, b]): with Client(s['address'], loop=loop) as c: with c.get_executor(pure=False) as e: fut = e.submit(time.sleep, 2.0) assert number_of_processing_tasks(c) > 0 assert not fut.done() fut.cancel() assert fut.cancelled() assert number_of_processing_tasks(c) == 0 with pytest.raises(CancelledError): fut.result() # With wait() with c.get_executor(pure=False) as e: N = 10 fs = [e.submit(slowinc, i, delay=0.02) for i in range(N)] fs[3].cancel() res = wait(fs, return_when=FIRST_COMPLETED) assert len(res.not_done) > 0 assert len(res.done) >= 1 assert fs[3] in res.done assert fs[3].cancelled() # With as_completed() with c.get_executor(pure=False) as e: N = 10 fs = [e.submit(slowinc, i, delay=0.02) for i in range(N)] fs[3].cancel() fs[8].cancel() n_cancelled = sum(f.cancelled() for f in as_completed(fs)) assert n_cancelled == 2
def test_scheduler_equals_client(loop): with cluster() as (s, [a, b]): with Client(s["address"], loop=loop) as client: x = delayed(lambda: 1)() assert x.compute(scheduler=client) == 1 assert client.run_on_scheduler( lambda dask_scheduler: dask_scheduler.story(x.key))
def test_shutdown(loop): with cluster(active_rpc_timeout=10) as (s, [a, b]): with Client(s['address'], loop=loop) as c: # shutdown(wait=True) waits for pending tasks to finish e = c.get_executor() fut = e.submit(time.sleep, 1.0) t1 = time.time() e.shutdown() dt = time.time() - t1 assert 0.5 <= dt <= 2.0 time.sleep(0.1) # wait for future outcome to propagate assert fut.done() fut.result() # doesn't raise with pytest.raises(RuntimeError): e.submit(time.sleep, 1.0) # shutdown(wait=False) cancels pending tasks e = c.get_executor() fut = e.submit(time.sleep, 2.0) t1 = time.time() e.shutdown(wait=False) dt = time.time() - t1 assert dt < 0.5 time.sleep(0.1) # wait for future outcome to propagate assert fut.cancelled() with pytest.raises(RuntimeError): e.submit(time.sleep, 1.0)
def test_retries(loop): args = [ZeroDivisionError("one"), ZeroDivisionError("two"), 42] with cluster() as (s, [a, b]): with Client(s['address'], loop=loop) as c: with c.get_executor(retries=3, pure=False) as e: future = e.submit(varying(args)) assert future.result() == 42 with c.get_executor(retries=2) as e: future = e.submit(varying(args)) result = future.result() assert result == 42 with c.get_executor(retries=1) as e: future = e.submit(varying(args)) with pytest.raises(ZeroDivisionError) as exc_info: res = future.result() exc_info.match("two") with c.get_executor(retries=0) as e: future = e.submit(varying(args)) with pytest.raises(ZeroDivisionError) as exc_info: res = future.result() exc_info.match("one")
def test_map(loop): with cluster() as (s, [a, b]): with Client(s['address'], loop=loop) as c: with c.get_executor() as e: N = 10 it = e.map(inc, range(N)) expected = set(range(1, N + 1)) for x in it: expected.remove(x) assert not expected with c.get_executor(pure=False) as e: N = 10 it = e.map(slowinc, range(N), [0.1] * N, timeout=0.4) results = [] with pytest.raises(TimeoutError): for x in it: results.append(x) assert 2 <= len(results) < 7 with c.get_executor(pure=False) as e: N = 10 # Not consuming the iterator will cancel remaining tasks it = e.map(slowinc, range(N), [0.1] * N) for x in take(2, it): pass # Some tasks still processing assert number_of_processing_tasks(c) > 0 # Garbage collect the iterator => remaining tasks are cancelled del it assert number_of_processing_tasks(c) == 0
def main(n_to_pull=100): path = "audit" if os.path.exists(os.path.join(path, "_inspection_version.txt")): with open(os.path.join(path, "_inspection_version.txt")) as f: db_version = f.read() else: db_version = "" if db_version != complete_version and os.path.exists(path): shutil.rmtree(path) if not os.path.exists(path): os.makedirs(path) with open(os.path.join(path, "_inspection_version.txt"), "w") as f: f.write(complete_version) all_extracted_artifacts = web_interface.get_all_extracted_artifacts() existing_artifacts = glob.glob(f"{path}/**/*.json", recursive=True) existing_artifact_names = { k.partition("/")[2].replace(".json", "") for k in existing_artifacts } artifacts = sorted( list(set(all_extracted_artifacts) - set(existing_artifact_names))) # Don't have the artifacts in alphabetical order shuffle(artifacts) with Client(threads_per_worker=100): db.from_sequence( artifacts[:n_to_pull]).map(inner_loop_and_write).compute()
def user_scheduler( scheduler: Client, scenario: Scenario, backend: IUserManagerBackend, context: dict, ): """Schedules users inside user manager on events from scenario. Args: scheduler (Client): Dask client to start users scenario (Scenario): User Scenario user_manager_id (str): ID of this user manager backend (IUserManagerBackend): Backend implementation for user manager to use context (dict): Test context """ while True: for user_id in backend.get_new_users(): fut = scheduler.submit( user_runner, scenario=scenario, user_id=user_id, backend=backend.get_user_backend(user_id), context=context, pure=False, ) # NOTE: may be better waiting for all futures to finish fire_and_forget(fut) backend.send_user_results() time.sleep(1)
def _get_client(client): if client is None: return _get_global_client() elif isinstance(client, Client): return client else: # e.g., connection string return Client(client)
def test_async_run(capsys): with Client(): g = setup_graph() # persist assert en error because the given collection is not of type # dask.base.Base futures = g.run(keys=('cleaned_data', 'pool1')) data = g.results(futures).values()[0] assert data == 'cleaned_data'
def test_cluster(capsys): with Client(): g = setup_graph() data = g.get([('analyzed_data', 'pool1'), ('analyzed_data', 'pool2')]) assert isinstance(data, list) assert data == ['analyzed_cleaned_data', 'analyzed_cleaned_data'] out, err = capsys.readouterr() assert not out
def __setstate__(self, state): name, address = state try: client = get_client(address) assert client.scheduler.address == address except (AttributeError, AssertionError): client = Client(address, set_as_default=False) self.__init__(name=name, client=client)
def test_futures_to_delayed_bag(loop): db = pytest.importorskip("dask.bag") L = [1, 2, 3] with cluster() as (s, [a, b]): with Client(s["address"], loop=loop) as c: futures = c.scatter([L, L]) b = db.from_delayed(futures) assert list(b) == L + L
def run_user( self, scenario_name: str, user_manager_id: str, backend_address: str, encoded_context: str, ): """Startup function when user is started. Runs scenario user loop. Args: scenario_name (str): Name of scenario being run user_manager_id (str): Unique ID of user manager assigned by scenario backend_address (str): Address of backend client to receive work and save results encoded_context (str): Context from test containing previous results """ scenario = self.scenarios[scenario_name] context = decode_context(encoded_context) client = Client() # Create buffer actor buffer_fut = client.submit( UserBufferActor, user_manager_id=user_manager_id, backend_address=backend_address, actor=True, ) buffer = buffer_fut.result() fire_and_forget(buffer_fut) backend = UserManagerBackend( user_manager_id=user_manager_id, buffer=buffer, address=backend_address, ) atexit.register(lambda: backend.send_user_results()) user_scheduler( client, scenario, backend, context, )
def __init__(self, name=None, client=None): try: self.client = client or Client.current() except ValueError: # Initialise new client self.client = get_worker().client self.name = name or "lock-" + uuid.uuid4().hex self.id = uuid.uuid4().hex self._locked = False
def test_fast(loop): with cluster() as (s, [a, b]): with Client(s['address'], loop=loop) as c: L = c.map(inc, range(100)) L2 = c.map(dec, L) L3 = c.map(add, L, L2) p = progress(L3, multi=True, complete=True, notebook=True) sync(loop, p.listen) assert set(p._last_response['all']) == {'inc', 'dec', 'add'}
async def try_connect(port): with contextlib.suppress(OSError, asyncio.TimeoutError): async with Client( f"tcp://localhost:{port}", asynchronous=True, timeout=1, # Minimum of 1 for Windows ): return port return
def test_more_clients(rsds_env): url = rsds_env.start([1]) client1 = Client(url) client2 = Client(url) f1 = client1.submit(comp_fn1, 10) f2 = client2.submit(comp_fn1, 20) r2 = client2.gather(f2) r1 = client1.gather(f1) assert r1 == 100 assert r2 == 200
def test_scatter(rsds_env): url = rsds_env.start([1]) client = Client(url) client.wait_for_workers(1) metadata = client.scheduler_info() worker = list(metadata["workers"].keys())[0] futures = client.scatter(range(10), workers=[worker]) fut = client.submit(comp_fn3, futures) assert client.gather(fut) == list(range(1, 11))
def test_dask_cv_single(self): test_cluster = LocalCluster(1) test_client = Client(test_cluster) iris = load_iris() reg = tree.DecisionTreeClassifier() cv_score = test_client.submit(cross_val_score, reg, iris.data, iris.target) self.assertGreater(cv_score.result().mean(), 0) test_cluster.scale_up(4) _cv_results = { 'reg_%i': test_client.submit(cross_val_score, tree.DecisionTreeClassifier(min_samples_leaf=i), iris.data, iris.target) for i in range(5) } cv_results = test_client.gather(list(_cv_results.values())) for cv_result in cv_results: self.assertGreaterEqual(cv_result.mean(), 0)
def test_long_chain(rsds_env): url = rsds_env.start([1]) client = Client(url) t = delayed_fn1(1) for _ in range(10): t = delayed_fn1(t) r = t.compute() assert r == 100_000_000_000
def test_workers(loop): with cluster() as (s, [a, b]): with Client(s['address'], loop=loop) as c: N = 10 with c.get_executor(workers=[b['address']]) as e: fs = [e.submit(slowinc, i) for i in range(N)] wait(fs) has_what = c.has_what() assert not has_what.get(a['address']) assert len(has_what[b['address']]) == N
def test_progressbar_cancel(loop): with cluster() as (s, [a, b]): with Client(s['address'], loop=loop) as c: import time L = [c.submit(lambda: time.sleep(0.3), i) for i in range(5)] p = ProgressWidget(L) sync(loop, p.listen) L[-1].cancel() wait(L[:-1]) assert p.status == 'error' assert p.bar.value == 0 # no tasks finish before cancel is called