def test_environment_variable(c, s): a = Nanny(s.address, loop=s.loop, memory_limit=0, env={"FOO": "123"}) b = Nanny(s.address, loop=s.loop, memory_limit=0, env={"FOO": "456"}) yield [a._start(), b._start()] results = yield c.run(lambda: os.environ['FOO']) assert results == {a.worker_address: "123", b.worker_address: "456"} yield [a._close(), b._close()]
def test_wait_for_scheduler(): with captured_logger('distributed') as log: w = Nanny('127.0.0.1:44737') w._start() yield gen.sleep(6) log = log.getvalue() assert 'error' not in log.lower(), log assert 'restart' not in log.lower(), log
def test_wait_for_scheduler(): with captured_logger("distributed") as log: w = Nanny("127.0.0.1:44737") w._start() yield gen.sleep(6) log = log.getvalue() assert "error" not in log.lower(), log assert "restart" not in log.lower(), log
def test_nanny(s): n = Nanny(s.ip, s.port, ncores=2, loop=s.loop) yield n._start(0) with rpc(n.address) as nn: assert n.is_alive() assert s.ncores[n.worker_address] == 2 assert s.worker_info[n.worker_address]['services']['nanny'] > 1024 yield nn.kill() assert not n.is_alive() assert n.worker_address not in s.ncores assert n.worker_address not in s.worker_info yield nn.kill() assert not n.is_alive() assert n.worker_address not in s.ncores assert n.worker_address not in s.worker_info yield nn.instantiate() assert n.is_alive() assert s.ncores[n.worker_address] == 2 assert s.worker_info[n.worker_address]['services']['nanny'] > 1024 yield nn.terminate() assert not n.is_alive() yield n._close()
def test_many_kills(s): n = Nanny(s.address, ncores=2, loop=s.loop) yield n._start(0) assert n.is_alive() yield [n.kill() for i in range(5)] yield [n.kill() for i in range(5)] yield n._close()
def test_nanny(s): n = Nanny(s.ip, s.port, ncores=2, ip='127.0.0.1', loop=s.loop) yield n._start(0) nn = rpc(ip=n.ip, port=n.port) assert n.process.is_alive() assert s.ncores[n.worker_address] == 2 assert s.worker_info[n.worker_address]['services']['nanny'] > 1024 yield nn.kill() assert n.worker_address not in s.ncores assert n.worker_address not in s.worker_info assert not n.process yield nn.kill() assert n.worker_address not in s.ncores assert n.worker_address not in s.worker_info assert not n.process yield nn.instantiate() assert n.process.is_alive() assert s.ncores[n.worker_address] == 2 assert s.worker_info[n.worker_address]['services']['nanny'] > 1024 yield nn.terminate() assert not n.process yield n._close()
def test_nanny(s): n = Nanny(s.ip, s.port, ncores=2, ip='127.0.0.1', loop=s.loop) yield n._start(0) with rpc(ip=n.ip, port=n.port) as nn: assert isalive(n.process) # alive assert s.ncores[n.worker_address] == 2 assert s.worker_info[n.worker_address]['services']['nanny'] > 1024 yield nn.kill() assert not n.process assert n.worker_address not in s.ncores assert n.worker_address not in s.worker_info yield nn.kill() assert n.worker_address not in s.ncores assert n.worker_address not in s.worker_info assert not n.process yield nn.instantiate() assert isalive(n.process) assert s.ncores[n.worker_address] == 2 assert s.worker_info[n.worker_address]['services']['nanny'] > 1024 yield nn.terminate() assert not n.process yield n._close()
def test_nanny_death_timeout(s): yield s.close() w = Nanny(s.address, death_timeout=1) yield w._start() yield gen.sleep(3) assert w.status == 'closed'
def test_nanny(): c = Center('127.0.0.1') c.listen(0) n = Nanny(c.ip, c.port, ncores=2, ip='127.0.0.1') yield n._start(0) nn = rpc(ip=n.ip, port=n.port) assert n.process.is_alive() assert c.ncores[n.worker_address] == 2 assert c.worker_services[n.worker_address]['nanny'] > 1024 yield nn.kill() assert n.worker_address not in c.ncores assert n.worker_address not in c.worker_services assert not n.process yield nn.kill() assert n.worker_address not in c.ncores assert n.worker_address not in c.worker_services assert not n.process yield nn.instantiate() assert n.process.is_alive() assert c.ncores[n.worker_address] == 2 assert c.worker_services[n.worker_address]['nanny'] > 1024 yield nn.terminate() assert not n.process if n.process: n.process.terminate() yield n._close() c.stop()
def test_nanny(s): n = Nanny(s.ip, s.port, ncores=2, loop=s.loop) yield n._start(0) with rpc(n.address) as nn: assert n.is_alive() assert s.ncores[n.worker_address] == 2 assert s.workers[n.worker_address].services['nanny'] > 1024 yield nn.kill() assert not n.is_alive() assert n.worker_address not in s.ncores assert n.worker_address not in s.workers yield nn.kill() assert not n.is_alive() assert n.worker_address not in s.ncores assert n.worker_address not in s.workers yield nn.instantiate() assert n.is_alive() assert s.ncores[n.worker_address] == 2 assert s.workers[n.worker_address].services['nanny'] > 1024 yield nn.terminate() assert not n.is_alive() yield n._close()
def test_monitor_resources(): pytest.importorskip('psutil') c = Center(ip='127.0.0.1') c.listen(0) n = Nanny(c.ip, c.port, ncores=2, ip='127.0.0.1') yield n._start() nn = rpc(ip=n.ip, port=n.port) assert n.process.is_alive() d = n.resource_collect() assert {'cpu_percent', 'memory_percent'}.issubset(d) assert isinstance(d['timestamp'], datetime) stream = yield connect(ip=n.ip, port=n.port) yield write(stream, {'op': 'monitor_resources', 'interval': 0.01}) for i in range(3): msg = yield read(stream) assert isinstance(msg, dict) assert {'cpu_percent', 'memory_percent'}.issubset(msg) stream.close() yield n._close() c.stop()
def test_scheduler_file(): with tmpfile() as fn: s = Scheduler(scheduler_file=fn) s.start(8008) w = Nanny(scheduler_file=fn) yield w._start() assert s.workers == {w.worker_address} yield w._close() s.stop()
def test_scheduler_file(): with tmpfile() as fn: s = Scheduler(scheduler_file=fn) s.start(8008) w = Nanny(scheduler_file=fn) yield w._start() assert set(s.workers) == {w.worker_address} yield w._close() s.stop()
def test_run(s): pytest.importorskip('psutil') n = Nanny(s.ip, s.port, ncores=2, ip='127.0.0.1', loop=s.loop) yield n._start() nn = rpc(n.address) response = yield nn.run(function=dumps(lambda: 1)) assert response['status'] == 'OK' assert loads(response['result']) == 1
def test_restart(): from distributed import Nanny, rpc c = Center('127.0.0.1') c.listen(0) a = Nanny(c.ip, c.port, ncores=2, ip='127.0.0.1') b = Nanny(c.ip, c.port, ncores=2, ip='127.0.0.1') yield [a._start(), b._start()] e = Executor((c.ip, c.port), start=False, loop=IOLoop.current()) yield e._start() assert e.scheduler.ncores == {a.worker_address: 2, b.worker_address: 2} x = e.submit(inc, 1) y = e.submit(inc, x) yield y._result() cc = rpc(ip=c.ip, port=c.port) who_has = yield cc.who_has() try: assert e.scheduler.who_has == who_has assert set(e.scheduler.who_has) == {x.key, y.key} f = yield e._restart() assert f is e assert len(e.scheduler.stacks) == 2 assert len(e.scheduler.processing) == 2 who_has = yield cc.who_has() assert not who_has assert not e.scheduler.who_has assert x.cancelled() assert y.cancelled() finally: yield a._close() yield b._close() yield e._shutdown(fast=True) c.stop()
def test_worker_uses_same_host_as_nanny(c, s): for host in ['tcp://0.0.0.0', 'tcp://127.0.0.2']: n = Nanny(s.address) yield n._start(host) def func(dask_worker): return dask_worker.listener.listen_address result = yield c.run(func) assert host in first(result.values()) yield n._close()
def test_run(s): pytest.importorskip('psutil') n = Nanny(s.ip, s.port, ncores=2, loop=s.loop) yield n._start() with rpc(n.address) as nn: response = yield nn.run(function=dumps(lambda: 1)) assert response['status'] == 'OK' assert response['result'] == 1 yield n._close()
def test_monitor_resources(): pytest.importorskip('psutil') c = Center('127.0.0.1') c.listen(0) a = Nanny(c.ip, c.port, ncores=2, ip='127.0.0.1') b = Nanny(c.ip, c.port, ncores=2, ip='127.0.0.1') s = Scheduler((c.ip, c.port), resource_interval=0.01, resource_log_size=3) yield a._start() yield b._start() yield s.sync_center() done = s.start() try: assert s.ncores == { ('127.0.0.1', a.worker_port): 2, ('127.0.0.1', b.worker_port): 2 } assert s.nannies == {(n.ip, n.worker_port): n.port for n in [a, b]} while any(len(v) < 3 for v in s.resource_logs.values()): yield gen.sleep(0.01) yield gen.sleep(0.1) assert set(s.resource_logs) == {a.address, b.address} assert all(len(v) == 3 for v in s.resource_logs.values()) d = s.diagnostic_resources(n=2) assert set(d) == {a.worker_address, b.worker_address} assert set(d[a.worker_address]).issubset({'cpu', 'memory', 'time'}) assert all(len(v) == 2 for v in d[a.worker_address].values()) s.put({'op': 'close'}) yield done finally: with ignoring(TimeoutError, StreamClosedError, OSError): yield a._close(timeout=0.5) with ignoring(TimeoutError, StreamClosedError, OSError): yield b._close(timeout=0.5) c.stop()
def test_scheduler_address_config(c, s): with dask.config.set({'scheduler-address': s.address}): nanny = Nanny(loop=s.loop) yield nanny._start() assert nanny.scheduler.address == s.address start = time() while not s.workers: yield gen.sleep(0.1) assert time() < start + 10 yield nanny._close()
def test_monitor_resources(): pytest.importorskip('psutil') c = Center('127.0.0.1') c.listen(0) a = Nanny(c.ip, c.port, ncores=2, ip='127.0.0.1') b = Nanny(c.ip, c.port, ncores=2, ip='127.0.0.1') s = Scheduler((c.ip, c.port), resource_interval=0.01, resource_log_size=3) yield a._start() yield b._start() yield s.sync_center() done = s.start() try: assert s.ncores == {('127.0.0.1', a.worker_port): 2, ('127.0.0.1', b.worker_port): 2} assert s.nannies == {(n.ip, n.worker_port): n.port for n in [a, b]} while any(len(v) < 3 for v in s.resource_logs.values()): yield gen.sleep(0.01) yield gen.sleep(0.1) assert set(s.resource_logs) == {a.address, b.address} assert all(len(v) == 3 for v in s.resource_logs.values()) d = s.diagnostic_resources(n=2) assert set(d) == {a.worker_address, b.worker_address} assert set(d[a.worker_address]).issubset({'cpu', 'memory', 'time'}) assert all(len(v) == 2 for v in d[a.worker_address].values()) s.put({'op': 'close'}) yield done finally: with ignoring(TimeoutError, StreamClosedError, OSError): yield a._close(timeout=0.5) with ignoring(TimeoutError, StreamClosedError, OSError): yield b._close(timeout=0.5) c.stop()
def run_nanny(q, center_port, **kwargs): from distributed import Nanny from tornado.ioloop import IOLoop, PeriodicCallback import logging with log_errors(): IOLoop.clear_instance() loop = IOLoop(); loop.make_current() PeriodicCallback(lambda: None, 500).start() logging.getLogger("tornado").setLevel(logging.CRITICAL) worker = Nanny('127.0.0.1', center_port, ip='127.0.0.1', **kwargs) loop.run_sync(lambda: worker._start(0)) q.put(worker.port) loop.start()
def run_nanny(q, scheduler_q, **kwargs): from distributed import Nanny with log_errors(): with pristine_loop() as loop: scheduler_addr = scheduler_q.get() worker = Nanny(scheduler_addr, validate=True, **kwargs) loop.run_sync(lambda: worker._start(0)) q.put(worker.address) try: loop.start() finally: loop.run_sync(worker._close) loop.close(all_fds=True)
def test_num_fds(s): psutil = pytest.importorskip('psutil') proc = psutil.Process() # Warm up w = Nanny(s.address) yield w._start() yield w._close() del w gc.collect() before = proc.num_fds() for i in range(3): w = Nanny(s.address) yield w._start() yield gen.sleep(0.1) yield w._close() start = time() while proc.num_fds() > before: print("fds:", before, proc.num_fds()) yield gen.sleep(0.1) assert time() < start + 10
def test_scheduler_address_config(c, s): config['scheduler-address'] = s.address try: nanny = Nanny(loop=s.loop) yield nanny._start() assert nanny.scheduler.address == s.address start = time() while not s.workers: yield gen.sleep(0.1) assert time() < start + 10 finally: del config['scheduler-address'] yield nanny._close()
def test_avoid_memory_monitor_if_zero_limit(c, s): nanny = Nanny(s.address, loop=s.loop, memory_limit=0) yield nanny._start() typ = yield c.run(lambda dask_worker: type(dask_worker.data)) assert typ == {nanny.worker_address: dict} pcs = yield c.run(lambda dask_worker: list(dask_worker.periodic_callbacks)) assert 'memory' not in pcs assert 'memory' not in nanny.periodic_callbacks future = c.submit(inc, 1) assert (yield future) == 2 yield gen.sleep(0.02) yield c.submit(inc, 2) # worker doesn't pause yield nanny._close()
def run_nanny(q, scheduler_port, **kwargs): from distributed import Nanny from tornado.ioloop import IOLoop, PeriodicCallback with log_errors(): IOLoop.clear_instance() loop = IOLoop(); loop.make_current() PeriodicCallback(lambda: None, 500).start() worker = Nanny('127.0.0.1', scheduler_port, ip='127.0.0.1', loop=loop, validate=True, **kwargs) loop.run_sync(lambda: worker._start(0)) q.put(worker.port) try: loop.start() finally: loop.run_sync(worker._close) loop.close(all_fds=True)
def run_nanny(q, scheduler_port, **kwargs): from distributed import Nanny from tornado.ioloop import IOLoop, PeriodicCallback with log_errors(): IOLoop.clear_instance() loop = IOLoop(); loop.make_current() PeriodicCallback(lambda: None, 500).start() worker = Nanny('127.0.0.1', scheduler_port, ip='127.0.0.1', loop=loop, **kwargs) loop.run_sync(lambda: worker._start(0)) q.put(worker.port) try: loop.start() finally: loop.run_sync(worker._close) loop.close(all_fds=True)
def run_nanny(q, scheduler_q, **kwargs): from distributed import Nanny from tornado.ioloop import IOLoop, PeriodicCallback with log_errors(): with pristine_loop() as loop: PeriodicCallback(lambda: None, 500).start() scheduler_addr = scheduler_q.get() worker = Nanny(scheduler_addr, validate=True, **kwargs) loop.run_sync(lambda: worker._start(0)) q.put(worker.address) try: loop.start() finally: loop.run_sync(worker._close) loop.close(all_fds=True)
def test_nanny_process_failure(c, s): n = Nanny(s.ip, s.port, ncores=2, loop=s.loop) yield n._start() first_dir = n.worker_dir assert os.path.exists(first_dir) original_address = n.worker_address ww = rpc(n.worker_address) yield ww.update_data(data=valmap(dumps, {'x': 1, 'y': 2})) pid = n.pid assert pid is not None with ignoring(CommClosedError): yield c._run(os._exit, 0, workers=[n.worker_address]) start = time() while n.pid == pid: # wait while process dies and comes back yield gen.sleep(0.01) assert time() - start < 5 start = time() while not n.is_alive(): # wait while process comes back yield gen.sleep(0.01) assert time() - start < 5 # assert n.worker_address != original_address # most likely start = time() while n.worker_address not in s.ncores or n.worker_dir is None: yield gen.sleep(0.01) assert time() - start < 5 second_dir = n.worker_dir yield n._close() assert not os.path.exists(second_dir) assert not os.path.exists(first_dir) assert first_dir != n.worker_dir ww.close_rpc() s.stop()
def test_nanny_process_failure(): c = Center('127.0.0.1') c.listen(0) n = Nanny(c.ip, c.port, ncores=2, ip='127.0.0.1') yield n._start() nn = rpc(ip=n.ip, port=n.port) first_dir = n.worker_dir assert os.path.exists(first_dir) ww = rpc(ip=n.ip, port=n.worker_port) yield ww.update_data(data=valmap(dumps, {'x': 1, 'y': 2})) with ignoring(StreamClosedError): yield ww.compute(function=dumps(sys.exit), args=dumps((0,)), key='z') start = time() while n.process.is_alive(): # wait while process dies yield gen.sleep(0.01) assert time() - start < 2 start = time() while not n.process.is_alive(): # wait while process comes back yield gen.sleep(0.01) assert time() - start < 2 start = time() while n.worker_address not in c.ncores or n.worker_dir is None: yield gen.sleep(0.01) assert time() - start < 2 second_dir = n.worker_dir yield n._close() assert not os.path.exists(second_dir) assert not os.path.exists(first_dir) assert first_dir != n.worker_dir nn.close_streams() c.stop()
def test_nanny_process_failure(s): n = Nanny(s.ip, s.port, ncores=2, ip='127.0.0.1', loop=s.loop) yield n._start() nn = rpc(ip=n.ip, port=n.port) first_dir = n.worker_dir assert os.path.exists(first_dir) original_process = n.process ww = rpc(ip=n.ip, port=n.worker_port) yield ww.update_data(data=valmap(dumps, {'x': 1, 'y': 2})) with ignoring(StreamClosedError): yield ww.compute(function=dumps(sys.exit), args=dumps((0,)), key='z') start = time() while n.process is original_process: # wait while process dies yield gen.sleep(0.01) assert time() - start < 5 start = time() while not n.process.poll() is None: # wait while process comes back yield gen.sleep(0.01) assert time() - start < 5 start = time() while n.worker_address not in s.ncores or n.worker_dir is None: yield gen.sleep(0.01) assert time() - start < 5 second_dir = n.worker_dir yield n._close() assert not os.path.exists(second_dir) assert not os.path.exists(first_dir) assert first_dir != n.worker_dir nn.close_streams() s.stop()
def test_monitor_resources(s): pytest.importorskip('psutil') n = Nanny(s.ip, s.port, ncores=2, ip='127.0.0.1', loop=s.loop) yield n._start() assert isalive(n.process) d = n.resource_collect() assert {'cpu_percent', 'memory_percent'}.issubset(d) assert 'timestamp' in d stream = yield connect(ip=n.ip, port=n.port) yield write(stream, {'op': 'monitor_resources', 'interval': 0.01}) for i in range(3): msg = yield read(stream) assert isinstance(msg, dict) assert {'cpu_percent', 'memory_percent'}.issubset(msg) close(stream) yield n._close() s.stop()
def test_monitor_resources(s): pytest.importorskip('psutil') n = Nanny(s.ip, s.port, ncores=2, loop=s.loop) yield n._start() assert isalive(n.process) d = n.resource_collect() assert {'cpu_percent', 'memory_percent'}.issubset(d) assert 'timestamp' in d comm = yield connect(n.address) yield comm.write({'op': 'monitor_resources', 'interval': 0.01}) for i in range(3): msg = yield comm.read() assert isinstance(msg, dict) assert {'cpu_percent', 'memory_percent'}.issubset(msg) yield comm.close() yield n._close() s.stop()
def test_nanny_process_failure(c, s): n = Nanny(s.ip, s.port, ncores=2, ip='127.0.0.1', loop=s.loop) yield n._start() first_dir = n.worker_dir assert os.path.exists(first_dir) original_process = n.process ww = rpc(ip=n.ip, port=n.worker_port) yield ww.update_data(data=valmap(dumps, {'x': 1, 'y': 2})) with ignoring(StreamClosedError): yield c._run(sys.exit, 0, workers=[n.worker_address]) start = time() while n.process is original_process: # wait while process dies yield gen.sleep(0.01) assert time() - start < 5 start = time() while not isalive(n.process): # wait while process comes back yield gen.sleep(0.01) assert time() - start < 5 start = time() while n.worker_address not in s.ncores or n.worker_dir is None: yield gen.sleep(0.01) assert time() - start < 5 second_dir = n.worker_dir yield n._close() assert not os.path.exists(second_dir) assert not os.path.exists(first_dir) assert first_dir != n.worker_dir ww.close_rpc() s.stop()
def test_nanny_death_timeout(): w = Nanny('127.0.0.1', 38848, death_timeout=1) yield w._start() yield gen.sleep(3) assert w.status == 'closed'