async def test_cleanup(c, s, a, b): v = Variable("v") vv = Variable("v") x = c.submit(lambda x: x + 1, 10) y = c.submit(lambda x: x + 1, 20) x_key = x.key await v.set(x) del x await asyncio.sleep(0.1) t_future = xx = asyncio.ensure_future(vv._get()) await asyncio.sleep(0) asyncio.ensure_future(v.set(y)) future = await t_future assert future.key == x_key result = await future assert result == 11
def f(i): with worker_client() as c: v = Variable("x", client=c) for _ in range(NITERS): future = v.get() x = future.result() y = c.submit(inc, x) v.set(y) sleep(0.01 * random.random()) result = v.get().result() sleep(0.1) # allow fire-and-forget messages to clear return result
def fit(self, X, y=None): w = get_worker() dsk_lock = Lock(self.lock_name, client=w.client) dsk_counter = Variable(self.counter_name, client=w.client) dsk_killed_workers = Variable(self.killed_workers_name, client=w.client) for e in list(w.executing): should_die = False t = literal_eval(e) with dsk_lock: c = dsk_counter.get() dsk_counter.set(c + 1) killed_workers = dsk_killed_workers.get() if c > self.min_complete and t not in killed_workers: killed_workers[t] = True should_die = True dsk_killed_workers.set(killed_workers) if should_die: os.kill(os.getpid(), 9) return self
def test_variable(c, s, a, b): x = Variable("x") xx = Variable("x") assert x.client is c future = c.submit(inc, 1) yield x.set(future) future2 = yield xx.get() assert future.key == future2.key del future, future2 yield gen.sleep(0.1) assert s.tasks # future still present x.delete() start = time() while s.tasks: yield gen.sleep(0.01) assert time() < start + 5
def test_future_erred_sync(loop): with cluster() as (s, [a, b]): with Client(s['address']) as c: future = c.submit(div, 1, 0) var = Variable() var.set(future) sleep(0.1) future2 = var.get() with pytest.raises(ZeroDivisionError): future2.result()
def test_timeout_sync(client): v = Variable("v") start = IOLoop.current().time() with pytest.raises(TimeoutError): v.get(timeout=0.2) stop = IOLoop.current().time() if WINDOWS: assert 0.1 < stop - start < 2.0 else: assert 0.2 < stop - start < 2.0 with pytest.raises(TimeoutError): v.get(timeout=0.01)
def _pre_start_yield(self) -> None: from distributed import Variable is_inproc = self.client.scheduler.address.startswith( "inproc") # type: ignore if self.address is not None or is_inproc: self._futures = weakref.WeakSet() self._should_run_var = Variable(f"prefect-{uuid.uuid4().hex}", client=self.client) self._should_run_var.set(True) self._watch_dask_events_task = asyncio.run_coroutine_threadsafe( self._watch_dask_events(), self.client.loop.asyncio_loop # type: ignore )
async def test_timeout(c, s, a, b): v = Variable("v") start = monotonic() with pytest.raises(TimeoutError): await v.get(timeout="200ms") stop = monotonic() if WINDOWS: # timing is weird with asyncio and Windows assert 0.1 < stop - start < 2.0 else: assert 0.2 < stop - start < 2.0 with pytest.raises(TimeoutError): await v.get(timeout=timedelta(milliseconds=10))
def test_timeout(c, s, a, b): v = Variable("v") start = IOLoop.current().time() with pytest.raises(TimeoutError): yield v.get(timeout=0.2) stop = IOLoop.current().time() if WINDOWS: # timing is weird with asyncio and Windows assert 0.1 < stop - start < 2.0 else: assert 0.2 < stop - start < 2.0 with pytest.raises(TimeoutError): yield v.get(timeout=0.01)
async def test_variables_do_not_leak_client(c, s, a, b): # https://github.com/dask/distributed/issues/3899 clients_pre = set(s.clients) # setup variable with future x = Variable("x") future = c.submit(inc, 1) await x.set(future) # complete teardown x.delete() start = time() while set(s.clients) != clients_pre: await asyncio.sleep(0.01) assert time() < start + 5
def test_variable_in_task(loop): # Ensure that we can create a Variable inside a task on a # worker in a separate Python process than the client with popen(["dask-scheduler", "--no-dashboard"]): with popen(["dask-worker", "127.0.0.1:8786"]): with Client("tcp://127.0.0.1:8786", loop=loop) as c: c.wait_for_workers(1) x = Variable("x") x.set(123) def foo(): y = Variable("x") return y.get() result = c.submit(foo).result() assert result == 123
def _maybe_run(var_name: str, fn: Callable, *args: Any, **kwargs: Any) -> Any: """Check if the task should run against a `distributed.Variable` before starting the task. This offers stronger guarantees than distributed's current cancellation mechanism, which only cancels pending tasks.""" # In certain configurations, the way distributed unpickles variables can # lead to excess client connections being created. To avoid this issue we # manually lookup the variable by name. from distributed import Variable, get_client var = Variable(var_name, client=get_client()) try: should_run = var.get(timeout=0) except Exception: # Errors here indicate the get operation timed out, which can happen if # the variable is undefined (usually indicating the flow runner has # stopped or the cluster is shutting down). should_run = False if should_run: return fn(*args, **kwargs)
def process(self, events): from distributed import worker_client, Variable, Lock assert isinstance(self.proc, BaseProcessor) assert not isinstance(self.proc, _Preheater) s = self.proc.get_dataset(events).data_source d = self.prefix + s with worker_client(separate_thread=False) as c: v = Variable(d, c) l = Lock(d, c) if l.acquire(blocking=False): self.proc.process(events) cols = set() for col in events.materialized: col = col.replace("_", ".", 1) try: attrgetter(col)(events) except AttributeError: pass else: cols.add(col) cols = sorted(cols) v.set(cols) return dict_accumulator({s: set_accumulator(cols)}) else: cols = v.get() for ag in map(attrgetter, cols): data = ag(events) data = getattr(data, "content", data) if callable(getattr(data, "materialize")): data.materialize() return dict_accumulator({})
def __init__(self, remote=None): self._queue = Queue(client=remote) self._stop = Variable(client=remote) self._stop.set(False) self._continue_semaphore = DistSemaphore(0, remote) self._last_report_time = time.time()
def foo(): y = Variable("x") return y.get()