def become_dask_worker(ip, port, nanny=False, **kwargs): """Task function for becoming a dask.distributed Worker Parameters ---------- ip: str The IP address of the dask Scheduler. port: int The port of the dask Scheduler. **kwargs: Any additional keyword arguments will be passed to the Worker constructor. """ shell = get_ipython() kernel = shell.kernel if getattr(kernel, 'dask_worker', None) is not None: kernel.log.info("Dask worker is already running.") return from distributed import Worker, Nanny if nanny: w = Nanny(ip, port, **kwargs) else: w = Worker(ip, port, **kwargs) shell.user_ns['dask_worker'] = shell.user_ns['distributed_worker'] = kernel.distributed_worker = w w.start(0)
def test_broken_worker_during_computation(c, s, a, b): n = Nanny(s.ip, s.port, ncores=2, loop=s.loop) n.start(0) start = time() while len(s.ncores) < 3: yield gen.sleep(0.01) assert time() < start + 5 L = c.map(inc, range(256)) for i in range(8): L = c.map(add, *zip(*partition_all(2, L))) from random import random yield gen.sleep(random() / 2) with ignoring(OSError): n.process.terminate() yield gen.sleep(random() / 2) with ignoring(OSError): n.process.terminate() result = yield c._gather(L) assert isinstance(result[0], int) yield n._close()
def test_worker_who_has_clears_after_failed_connection(c, s, a, b): n = Nanny(s.ip, s.port, ncores=2, loop=s.loop) n.start(0) start = time() while len(s.ncores) < 3: yield gen.sleep(0.01) assert time() < start + 5 futures = c.map(slowinc, range(20), delay=0.01, key=['f%d' % i for i in range(20)]) yield wait(futures) result = yield c.submit(sum, futures, workers=a.address) for dep in set(a.dep_state) - set(a.task_state): a.release_dep(dep, report=True) n_worker_address = n.worker_address with ignoring(CommClosedError): yield c._run(os._exit, 1, workers=[n_worker_address]) while len(s.workers) > 2: yield gen.sleep(0.01) total = c.submit(sum, futures, workers=a.address) yield total assert not a.has_what.get(n_worker_address) assert not any(n_worker_address in s for s in a.who_has.values()) yield n._close()
def test_broken_worker_during_computation(c, s, a, b): s.allowed_failures = 100 n = Nanny(s.ip, s.port, ncores=2, loop=s.loop) n.start(0) start = time() while len(s.ncores) < 3: yield gen.sleep(0.01) assert time() < start + 5 L = c.map(inc, range(256)) for i in range(8): L = c.map(add, *zip(*partition_all(2, L))) from random import random yield gen.sleep(random() / 2) with ignoring(CommClosedError): # comm will be closed abrupty yield c._run(os._exit, 1, workers=[n.worker_address]) yield gen.sleep(random() / 2) with ignoring( CommClosedError, EnvironmentError): # perhaps new worker can't be contacted yet yield c._run(os._exit, 1, workers=[n.worker_address]) result = yield c._gather(L) assert isinstance(result[0], int) yield n._close()
def become_distributed_worker(ip, port, nanny=False, **kwargs): """Task function for becoming a distributed Worker Parameters ---------- ip: str The IP address of the Scheduler. port: int The port of the Scheduler. **kwargs: Any additional keyword arguments will be passed to the Worker constructor. """ shell = get_ipython() kernel = shell.kernel if getattr(kernel, 'distributed_worker', None) is not None: kernel.log.info("Distributed worker is already running.") return from distributed import Worker, Nanny if nanny: w = Nanny(ip, port, **kwargs) else: w = Worker(ip, port, **kwargs) shell.user_ns['distributed_worker'] = kernel.distributed_worker = w w.start(0)
def test_worker_who_has_clears_after_failed_connection(c, s, a, b): n = Nanny(s.ip, s.port, ncores=2, loop=s.loop) n.start(0) start = time() while len(s.ncores) < 3: yield gen.sleep(0.01) assert time() < start + 5 futures = c.map(slowinc, range(20), delay=0.01) yield _wait(futures) result = yield c.submit(sum, futures, workers=a.address) for dep in set(a.dep_state) - set(a.task_state): a.release_dep(dep, report=True) n_worker_address = n.worker_address n.process.terminate() while len(s.workers) > 2: yield gen.sleep(0.01) total = c.submit(sum, futures, workers=a.address) yield total assert not a.has_what.get(n_worker_address) assert not any(n_worker_address in s for s in a.who_has.values()) yield n._close()
def create_and_destroy_worker(delay): start = time() while time() < start + 5: n = Nanny(s.address, ncores=2, loop=s.loop) n.start(0) yield gen.sleep(delay) yield n._close() print("Killed nanny")
def test_wait_for_scheduler(): with captured_logger("distributed") as log: w = Nanny("127.0.0.1:44737") w.start() yield gen.sleep(6) yield w.close() log = log.getvalue() assert "error" not in log.lower(), log assert "restart" not in log.lower(), log
def test_submit_after_failed_worker_async(c, s, a, b): n = Nanny(s.ip, s.port, ncores=2, loop=s.loop) n.start(0) while len(s.workers) < 3: yield gen.sleep(0.1) L = c.map(inc, range(10)) yield wait(L) s.loop.add_callback(n.kill) total = c.submit(sum, L) result = yield total assert result == sum(map(inc, range(10))) yield n._close()
def test_broken_worker_during_computation(c, s, a, b): s.allowed_failures = 100 n = Nanny(s.address, ncores=2, loop=s.loop) n.start(0) start = time() while len(s.ncores) < 3: yield gen.sleep(0.01) assert time() < start + 5 N = 256 expected_result = N * (N + 1) // 2 i = 0 L = c.map(inc, range(N), key=["inc-%d-%d" % (i, j) for j in range(N)]) while len(L) > 1: i += 1 L = c.map( slowadd, *zip(*partition_all(2, L)), key=["add-%d-%d" % (i, j) for j in range(len(L) // 2)] ) yield gen.sleep(random.random() / 20) with ignoring(CommClosedError): # comm will be closed abrupty yield c._run(os._exit, 1, workers=[n.worker_address]) yield gen.sleep(random.random() / 20) while len(s.workers) < 3: yield gen.sleep(0.01) with ignoring( CommClosedError, EnvironmentError ): # perhaps new worker can't be contacted yet yield c._run(os._exit, 1, workers=[n.worker_address]) [result] = yield c.gather(L) assert isinstance(result, int) assert result == expected_result yield n.close()
def test_broken_worker_during_computation(c, s, a, b): s.allowed_failures = 100 n = Nanny(s.ip, s.port, ncores=2, loop=s.loop) n.start(0) start = time() while len(s.ncores) < 3: yield gen.sleep(0.01) assert time() < start + 5 N = 256 expected_result = N * (N + 1) // 2 i = 0 L = c.map(inc, range(N), key=['inc-%d-%d' % (i, j) for j in range(N)]) while len(L) > 1: i += 1 L = c.map(slowadd, *zip(*partition_all(2, L)), key=['add-%d-%d' % (i, j) for j in range(len(L) // 2)]) yield gen.sleep(random.random() / 20) with ignoring(CommClosedError): # comm will be closed abrupty yield c._run(os._exit, 1, workers=[n.worker_address]) yield gen.sleep(random.random() / 20) while len(s.workers) < 3: yield gen.sleep(0.01) with ignoring(CommClosedError, EnvironmentError): # perhaps new worker can't be contacted yet yield c._run(os._exit, 1, workers=[n.worker_address]) [result] = yield c.gather(L) assert isinstance(result, int) assert result == expected_result yield n._close()
def _create_worker(self): worker = Worker(scheduler_ip=self.ip, scheduler_port=self.port, ncores=1) worker.start(0) return worker