async def test_environment_variable(c, s): a = Nanny(s.address, loop=s.loop, memory_limit=0, env={"FOO": "123"}) b = Nanny(s.address, loop=s.loop, memory_limit=0, env={"FOO": "456"}) await asyncio.gather(a, b) results = await c.run(lambda: os.environ["FOO"]) assert results == {a.worker_address: "123", b.worker_address: "456"} await asyncio.gather(a.close(), b.close())
def test_environment_variable(c, s): a = Nanny(s.address, loop=s.loop, memory_limit=0, env={"FOO": "123"}) b = Nanny(s.address, loop=s.loop, memory_limit=0, env={"FOO": "456"}) yield [a, b] results = yield c.run(lambda: os.environ["FOO"]) assert results == {a.worker_address: "123", b.worker_address: "456"} yield [a.close(), b.close()]
async def test_environment_variable_by_config(c, s, monkeypatch): with dask.config.set({"distributed.nanny.environ": "456"}): with pytest.raises(TypeError, match="configuration must be of type dict"): Nanny(s.address, loop=s.loop, memory_limit=0) with dask.config.set({"distributed.nanny.environ": {"FOO": "456"}}): # precedence # kwargs > env var > config with mock.patch.dict(os.environ, {"FOO": "BAR"}, clear=True): a = Nanny(s.address, loop=s.loop, memory_limit=0, env={"FOO": "123"}) x = Nanny(s.address, loop=s.loop, memory_limit=0) b = Nanny(s.address, loop=s.loop, memory_limit=0) await asyncio.gather(a, b, x) results = await c.run(lambda: os.environ["FOO"]) assert results == { a.worker_address: "123", b.worker_address: "456", x.worker_address: "BAR", } await asyncio.gather(a.close(), b.close(), x.close())
def test_worker_who_has_clears_after_failed_connection(c, s, a, b): n = Nanny(s.address, nthreads=2, loop=s.loop) n.start(0) start = time() while len(s.nthreads) < 3: yield gen.sleep(0.01) assert time() < start + 5 futures = c.map(slowinc, range(20), delay=0.01, key=["f%d" % i for i in range(20)]) yield wait(futures) result = yield c.submit(sum, futures, workers=a.address) for dep in set(a.dep_state) - set(a.task_state): a.release_dep(dep, report=True) n_worker_address = n.worker_address with ignoring(CommClosedError): yield c._run(os._exit, 1, workers=[n_worker_address]) while len(s.workers) > 2: yield gen.sleep(0.01) total = c.submit(sum, futures, workers=a.address) yield total assert not a.has_what.get(n_worker_address) assert not any(n_worker_address in s for s in a.who_has.values()) yield n.close()
def test_wait_for_scheduler(): with captured_logger("distributed") as log: w = Nanny("127.0.0.1:44737") IOLoop.current().add_callback(w.start) yield gen.sleep(6) yield w.close() log = log.getvalue() assert "error" not in log.lower(), log assert "restart" not in log.lower(), log
def create_and_destroy_worker(delay): start = time() while time() < start + 5: n = Nanny(s.address, ncores=2, loop=s.loop) n.start(0) yield gen.sleep(delay) yield n.close() print("Killed nanny")
def test_worker_uses_same_host_as_nanny(c, s): for host in ["tcp://0.0.0.0", "tcp://127.0.0.2"]: n = Nanny(s.address) yield n._start(host) def func(dask_worker): return dask_worker.listener.listen_address result = yield c.run(func) assert host in first(result.values()) yield n.close()
def test_submit_after_failed_worker_async(c, s, a, b): n = Nanny(s.address, nthreads=2, loop=s.loop) n.start(0) while len(s.workers) < 3: yield gen.sleep(0.1) L = c.map(inc, range(10)) yield wait(L) s.loop.add_callback(n.kill) total = c.submit(sum, L) result = yield total assert result == sum(map(inc, range(10))) yield n.close()
def test_broken_worker_during_computation(c, s, a, b): s.allowed_failures = 100 n = Nanny(s.address, ncores=2, loop=s.loop) n.start(0) start = time() while len(s.ncores) < 3: yield gen.sleep(0.01) assert time() < start + 5 N = 256 expected_result = N * (N + 1) // 2 i = 0 L = c.map(inc, range(N), key=["inc-%d-%d" % (i, j) for j in range(N)]) while len(L) > 1: i += 1 L = c.map( slowadd, *zip(*partition_all(2, L)), key=["add-%d-%d" % (i, j) for j in range(len(L) // 2)] ) yield gen.sleep(random.random() / 20) with ignoring(CommClosedError): # comm will be closed abrupty yield c._run(os._exit, 1, workers=[n.worker_address]) yield gen.sleep(random.random() / 20) while len(s.workers) < 3: yield gen.sleep(0.01) with ignoring( CommClosedError, EnvironmentError ): # perhaps new worker can't be contacted yet yield c._run(os._exit, 1, workers=[n.worker_address]) [result] = yield c.gather(L) assert isinstance(result, int) assert result == expected_result yield n.close()