def test_worker_with_port_zero(): s = Scheduler() s.listen(8007) w = Worker(s.ip, s.port, ip='127.0.0.1') yield w._start() assert isinstance(w.port, int) assert w.port > 1024
def f(c, a, b): s = Scheduler((c.ip, c.port), loop=loop) yield s._sync_center() done = s.start() sched, report = Queue(), Queue(); s.handle_queues(sched, report) msg = yield report.get(); assert msg['op'] == 'stream-start' class Counter(SchedulerPlugin): def start(self, scheduler): scheduler.add_plugin(self) self.count = 0 def task_finished(self, scheduler, key, worker, nbytes): self.count += 1 counter = Counter() counter.start(s) assert counter.count == 0 sched.put_nowait({'op': 'update-graph', 'dsk': {'x': (inc, 1), 'y': (inc, 'x'), 'z': (inc, 'y')}, 'keys': ['z']}) while True: msg = yield report.get() if msg['op'] == 'key-in-memory' and msg['key'] == 'z': break assert counter.count == 3 sched.put_nowait({'op': 'close'}) yield done
def test_scheduler_as_center(): s = Scheduler() done = s.start(0) a = Worker('127.0.0.1', s.port, ip='127.0.0.1', ncores=1) a.data.update({'x': 1, 'y': 2}) b = Worker('127.0.0.1', s.port, ip='127.0.0.1', ncores=2) b.data.update({'y': 2, 'z': 3}) c = Worker('127.0.0.1', s.port, ip='127.0.0.1', ncores=3) yield [w._start(0) for w in [a, b, c]] assert s.ncores == {w.address: w.ncores for w in [a, b, c]} assert s.who_has == {'x': {a.address}, 'y': {a.address, b.address}, 'z': {b.address}} s.update_graph(tasks={'a': dumps_task((inc, 1))}, keys=['a'], dependencies={'a': []}) start = time() while not s.who_has['a']: assert time() - start < 5 yield gen.sleep(0.01) assert 'a' in a.data or 'a' in b.data or 'a' in c.data with ignoring(StreamClosedError): yield [w._close() for w in [a, b, c]] assert s.ncores == {} assert s.who_has == {} yield s.close()
def f(): s = Scheduler() s.listen(0) x = Worker(s.ip, s.port, ip='127.0.0.1') y = Worker(s.ip, s.port, ip='127.0.0.1') z = Worker(s.ip, s.port, ip='127.0.0.1') x.data['a'] = 1 y.data['a'] = 2 yield [x._start(), y._start(), z._start()] zz = rpc(ip=z.ip, port=z.port) yield zz.compute(function=dumps(inc), args=dumps(('a',)), who_has={'a': [x.address]}, key='b') assert z.data['b'] == 2 if 'a' in z.data: del z.data['a'] yield zz.compute(function=dumps(inc), args=dumps(('a',)), who_has={'a': [y.address]}, key='c') assert z.data['c'] == 3 yield [x._close(), y._close(), z._close()] zz.close_streams()
def test_scheduler_as_center(): s = Scheduler() done = s.start() a = Worker('127.0.0.1', s.port, ip='127.0.0.1', ncores=1) a.data.update({'x': 1, 'y': 2}) b = Worker('127.0.0.1', s.port, ip='127.0.0.1', ncores=2) b.data.update({'y': 2, 'z': 3}) c = Worker('127.0.0.1', s.port, ip='127.0.0.1', ncores=3) yield [w._start() for w in [a, b, c]] assert s.ncores == {w.address: w.ncores for w in [a, b, c]} assert s.who_has == {'x': {a.address}, 'y': {a.address, b.address}, 'z': {b.address}} s.update_graph(dsk={'a': (inc, 1)}, keys=['a']) while not s.who_has['a']: yield gen.sleep(0.01) assert 'a' in a.data or 'a' in b.data or 'a' in c.data yield [w._close() for w in [a, b, c]] assert s.ncores == {} assert s.who_has == {} yield s.close()
def test_scheduler_file(): with tmpfile() as fn: s = Scheduler(scheduler_file=fn) s.start(0) with open(fn) as f: data = json.load(f) assert data['address'] == s.address c = yield Client(scheduler_file=fn, loop=s.loop, asynchronous=True) yield s.close()
def f(c, a, b): s = Scheduler((c.ip, c.port), loop=loop) yield s._sync_center() done = s.start() sched, report = Queue(), Queue(); s.handle_queues(sched, report) msg = yield report.get(); assert msg['op'] == 'stream-start' p = TextProgressBar([], scheduler=s) p.start() assert p.status == 'finished' check_bar_completed(capsys)
def test_worker_name(): s = Scheduler() s.start(0) w = Worker(s.ip, s.port, name='alice') yield w._start() assert s.worker_info[w.address]['name'] == 'alice' assert s.aliases['alice'] == w.address with pytest.raises(ValueError): w = Worker(s.ip, s.port, name='alice') yield w._start() yield s.close() yield w._close()
def f(c, a, b): s = Scheduler((c.ip, c.port)) yield s._sync_center() done = s.start() # Test update graph s.scheduler_queue.put_nowait({'op': 'update-graph', 'dsk': {'x': (inc, 1), 'y': (inc, 'x'), 'z': (inc, 'y')}, 'keys': ['z']}) while True: msg = yield s.report_queue.get() if msg['op'] == 'key-in-memory' and msg['key'] == 'z': break assert a.data.get('x') == 2 or b.data.get('x') == 2 # Test erring tasks s.scheduler_queue.put_nowait({'op': 'update-graph', 'dsk': {'a': (div, 1, 0), 'b': (inc, 'a')}, 'keys': ['a', 'b']}) while True: msg = yield s.report_queue.get() if msg['op'] == 'task-erred' and msg['key'] == 'b': break # Test missing data s.scheduler_queue.put_nowait({'op': 'missing-data', 'missing': ['z']}) while True: msg = yield s.report_queue.get() if msg['op'] == 'key-in-memory' and msg['key'] == 'z': break # Test missing data without being informed for w in [a, b]: if 'z' in w.data: del w.data['z'] s.scheduler_queue.put_nowait({'op': 'update-graph', 'dsk': {'zz': (inc, 'z')}, 'keys': ['zz']}) while True: msg = yield s.report_queue.get() if msg['op'] == 'key-in-memory' and msg['key'] == 'zz': break s.scheduler_queue.put_nowait({'op': 'close'}) yield done
def test_worker_name(): s = Scheduler(validate=True) s.start(0) w = Worker(s.ip, s.port, name='alice') yield w._start() assert s.workers[w.address].name == 'alice' assert s.aliases['alice'] == w.address with pytest.raises(ValueError): w2 = Worker(s.ip, s.port, name='alice') yield w2._start() yield w2._close() yield s.close() yield w._close()
def f(c, a, b): s = Scheduler((c.ip, c.port), loop=loop) yield s._sync_center() done = s.start() sched, report = Queue(), Queue(); s.handle_queues(sched, report) msg = yield report.get(); assert msg['op'] == 'stream-start' s.update_graph(dsk={'x': (inc, 1), 'y': (inc, 'x'), 'z': (inc, 'y')}, keys=['z']) progress = TextProgressBar(['z'], scheduler=s) progress.start() assert progress.all_keys == {'x', 'y', 'z'} assert progress.keys == {'x', 'y', 'z'} while True: msg = yield report.get() if msg['op'] == 'key-in-memory' and msg['key'] == 'z': break assert progress.keys == set() check_bar_completed(capsys) assert progress not in s.plugins sched.put_nowait({'op': 'close'}) yield done
def f(c, a, b): s = Scheduler((c.ip, c.port), loop=loop) yield s._sync_center() done = s.start() sched, report = Queue(), Queue(); s.handle_queues(sched, report) msg = yield report.get(); assert msg['op'] == 'stream-start' class Bad(SchedulerPlugin): def task_finished(self, scheduler, key, worker, nbytes): raise Exception() bad = Bad() s.add_plugin(bad) sched.put_nowait({'op': 'update-graph', 'dsk': {'x': (inc, 1), 'y': (inc, 'x'), 'z': (inc, 'y')}, 'keys': ['z']}) while True: # normal execution msg = yield report.get() if msg['op'] == 'key-in-memory' and msg['key'] == 'z': break sched.put_nowait({'op': 'close'}) yield done
def f(c, a, b): s = Scheduler((c.ip, c.port), loop=loop) yield s._sync_center() done = s.start() sched, report = Queue(), Queue(); s.handle_queues(sched, report) msg = yield report.get(); assert msg['op'] == 'stream-start' s.update_graph(dsk={'x': (div, 1, 0)}, keys=['x']) progress = TextProgressBar(['x'], scheduler=s) progress.start() while True: msg = yield report.get() if msg.get('key') == 'x': break assert progress.status == 'error' assert not progress._timer.is_alive() progress = TextProgressBar(['x'], scheduler=s) progress.start() assert progress.status == 'error' assert not progress._timer or not progress._timer.is_alive() sched.put_nowait({'op': 'close'}) yield done
def test_add_worker_is_idempotent(loop): s = Scheduler(loop=loop) s.start(0) s.add_worker(address=alice, ncores=1, coerce_address=False) ncores = s.ncores.copy() s.add_worker(address=alice, coerce_address=False) assert s.ncores == s.ncores
def f(c, a, b): s = Scheduler((c.ip, c.port), loop=loop) yield s._sync_center() done = s.start() sched, report = Queue(), Queue(); s.handle_queues(sched, report) msg = yield report.get(); assert msg['op'] == 'stream-start' s.update_graph(dsk={'x-1': (inc, 1), 'x-2': (inc, 'x-1'), 'x-3': (inc, 'x-2'), 'y-1': (dec, 'x-3'), 'y-2': (dec, 'y-1'), 'e': (throws, 'y-2'), 'other': (inc, 123)}, keys=['e']) while True: msg = yield report.get() if msg['op'] == 'key-in-memory' and msg['key'] == 'y-2': break p = MultiProgressWidget(['x-1', 'x-2', 'x-3'], scheduler=s) assert set(concat(p.all_keys.values())).issuperset({'x-1', 'x-2', 'x-3'}) assert 'x' in p.bars sched.put_nowait({'op': 'close'}) yield done
def f(c, a, b): s = Scheduler((c.ip, c.port), loop=loop) yield s._sync_center() done = s.start() sched, report = Queue(), Queue(); s.handle_queues(sched, report) msg = yield report.get(); assert msg['op'] == 'stream-start' s.update_graph(dsk={'x-1': (inc, 1), 'x-2': (inc, 'x-1'), 'x-3': (inc, 'x-2'), 'y-1': (dec, 'x-3'), 'y-2': (dec, 'y-1'), 'e': (throws, 'y-2'), 'other': (inc, 123)}, keys=['e']) while True: msg = yield report.get() if msg['op'] == 'task-erred' and msg['key'] == 'e': break p = MultiProgressWidget(['e'], scheduler=s, complete=True) assert set(concat(p.all_keys.values())) == {'x-1', 'x-2', 'x-3', 'y-1', 'y-2', 'e'} assert all(b.value == 1.0 for b in p.bars.values()) assert p.texts['x'].value == '3 / 3' assert p.texts['y'].value == '2 / 2' sched.put_nowait({'op': 'close'}) yield done
def f(c, a, b): e1 = Executor((c.ip, c.port), start=False, loop=loop) yield e1._start() assert isinstance(e1.center, rpc) assert isinstance(e1.scheduler, Scheduler) s = Scheduler((c.ip, c.port)) yield s.sync_center() done = s.start() e2 = Executor(s, start=False, loop=loop) yield e2._start() assert isinstance(e2.center, rpc) assert isinstance(e2.scheduler, Scheduler) s.listen(8042) e3 = Executor(('127.0.0.1', s.port), start=False, loop=loop) yield e3._start() assert isinstance(e3.center, rpc) assert isinstance(e3.scheduler, rpc) s.stop() yield e1._shutdown() yield e2._shutdown() yield e3._shutdown()
def test_launch_without_blocked_services(): from distributed.http import HTTPScheduler s = Scheduler(services={('http', 3849): HTTPScheduler}) s.start(0) s2 = Scheduler(services={('http', 3849): HTTPScheduler}) s2.start(0) assert not s2.services yield [s.close(), s2.close()]
def test_service_hosts(): pytest.importorskip('bokeh') from distributed.bokeh.scheduler import BokehScheduler for port in [0, ('127.0.0.3', 0)]: for url, expected in [('tcp://0.0.0.0', ('::', '0.0.0.0')), ('tcp://127.0.0.2', '127.0.0.2'), ('tcp://127.0.0.2:38275', '127.0.0.2')]: services = {('bokeh', port): BokehScheduler} s = Scheduler(services=services) yield s.start(url) sock = first(s.services['bokeh'].server._http._sockets.values()) if isinstance(port, tuple): # host explicitly overridden assert sock.getsockname()[0] == port[0] elif isinstance(expected, tuple): assert sock.getsockname()[0] in expected else: assert sock.getsockname()[0] == expected yield s.close()
def test_worker_name(): s = yield Scheduler(validate=True, port=0) w = yield Worker(s.address, name="alice") assert s.workers[w.address].name == "alice" assert s.aliases["alice"] == w.address with pytest.raises(ValueError): w2 = yield Worker(s.address, name="alice") yield w2.close() yield w.close() yield s.close()
async def test_lifetime(cleanup): async with Scheduler() as s: async with Worker(s.address) as a, Worker(s.address, lifetime="1 seconds") as b: async with Client(s.address, asynchronous=True) as c: futures = c.map(slowinc, range(200), delay=0.1) await gen.sleep(1.5) assert b.status != "running" await b.finished() assert set(b.data).issubset( a.data) # successfully moved data over
async def test_worker_port_range(cleanup): async with Scheduler() as s: port = "9867:9868" async with Worker(s.address, port=port) as w1: assert w1.port == 9867 # Selects first port in range async with Worker(s.address, port=port) as w2: assert w2.port == 9868 # Selects next port in range with pytest.raises( ValueError, match="Could not start Worker"): # No more ports left async with Worker(s.address, port=port): pass
def f(c, a, b): s = Scheduler((c.ip, c.port), loop=loop) yield s.sync_center() done = s.start(0) sched, report = Queue(), Queue(); s.handle_queues(sched, report) msg = yield report.get(); assert msg['op'] == 'stream-start' class Counter(SchedulerPlugin): def start(self, scheduler): scheduler.add_plugin(self) self.count = 0 def task_finished(self, scheduler, key, worker, nbytes): self.count += 1 counter = Counter() counter.start(s) assert counter.count == 0 sched.put_nowait({'op': 'update-graph', 'tasks': {'x': (inc, 1), 'y': (inc, 'x'), 'z': (inc, 'y')}, 'dependencies': {'y': {'x'}, 'z': {'y'}}, 'keys': ['z']}) while True: msg = yield report.get() if msg['op'] == 'key-in-memory' and msg['key'] == 'z': break assert counter.count == 3 sched.put_nowait({'op': 'close'}) yield done
def test_scheduler_as_center(): s = Scheduler() done = s.start(0) a = Worker('127.0.0.1', s.port, ip='127.0.0.1', ncores=1) a.data.update({'x': 1, 'y': 2}) b = Worker('127.0.0.1', s.port, ip='127.0.0.1', ncores=2) b.data.update({'y': 2, 'z': 3}) c = Worker('127.0.0.1', s.port, ip='127.0.0.1', ncores=3) yield [w._start(0) for w in [a, b, c]] assert s.ncores == {w.address: w.ncores for w in [a, b, c]} assert not s.who_has s.update_graph(tasks={'a': dumps_task((inc, 1))}, keys=['a'], dependencies={'a': []}) start = time() while not s.who_has['a']: assert time() - start < 5 yield gen.sleep(0.01) assert 'a' in a.data or 'a' in b.data or 'a' in c.data with ignoring(StreamClosedError): yield [w._close() for w in [a, b, c]] assert s.ncores == {} assert s.who_has == {} yield s.close()
def __init__( self, scheduler: Scheduler, # The following parameters are exposed so that one may create, run, and throw # away on the fly a specialized manager, separate from the main one. policies: set[ActiveMemoryManagerPolicy] | None = None, *, register: bool = True, start: bool | None = None, interval: float | None = None, ): self.scheduler = scheduler self.policies = set() if policies is None: # Initialize policies from config policies = set() for kwargs in dask.config.get( "distributed.scheduler.active-memory-manager.policies"): kwargs = kwargs.copy() cls = import_term(kwargs.pop("class")) policies.add(cls(**kwargs)) for policy in policies: self.add_policy(policy) if register: scheduler.extensions["amm"] = self scheduler.handlers["amm_handler"] = self.amm_handler if interval is None: interval = parse_timedelta( dask.config.get( "distributed.scheduler.active-memory-manager.interval")) self.interval = interval if start is None: start = dask.config.get( "distributed.scheduler.active-memory-manager.start") if start: self.start()
def test_scheduler_as_center(): s = Scheduler() done = s.start(0) a = Worker('127.0.0.1', s.port, ip='127.0.0.1', ncores=1) a.data.update({'x': 1, 'y': 2}) b = Worker('127.0.0.1', s.port, ip='127.0.0.1', ncores=2) b.data.update({'y': 2, 'z': 3}) c = Worker('127.0.0.1', s.port, ip='127.0.0.1', ncores=3) yield [w._start() for w in [a, b, c]] assert s.ncores == {w.address: w.ncores for w in [a, b, c]} assert s.who_has == { 'x': {a.address}, 'y': {a.address, b.address}, 'z': {b.address} } s.update_graph(tasks={'a': dumps_task((inc, 1))}, keys=['a'], dependencies={'a': set()}) while not s.who_has['a']: yield gen.sleep(0.01) assert 'a' in a.data or 'a' in b.data or 'a' in c.data yield [w._close() for w in [a, b, c]] assert s.ncores == {} assert s.who_has == {} yield s.close()
def test_scheduler_as_center(): s = Scheduler(validate=True) done = s.start(0) a = Worker(s.address, ncores=1) a.data.update({'x': 1, 'y': 2}) b = Worker(s.address, ncores=2) b.data.update({'y': 2, 'z': 3}) c = Worker(s.address, ncores=3) yield [w._start(0) for w in [a, b, c]] assert s.ncores == {w.address: w.ncores for w in [a, b, c]} assert not s.who_has s.update_graph(tasks={'a': dumps_task((inc, 1))}, keys=['a'], dependencies={'a': []}) start = time() while not 'a' in s.who_has: assert time() - start < 5 yield gen.sleep(0.01) assert 'a' in a.data or 'a' in b.data or 'a' in c.data yield [w._close() for w in [a, b, c]] assert s.ncores == {} assert s.who_has == {} yield s.close()
def test_get_task_duration(): with dask.config.set( {"distributed.scheduler.default-task-durations": { "prefix_1": 100 }}): s = Scheduler(port=0) assert "prefix_1" in s.task_duration assert s.task_duration["prefix_1"] == 100 ts_pref1 = TaskState("prefix_1-abcdefab", None) assert s.get_task_duration(ts_pref1) == 100 # make sure get_task_duration adds TaskStates to unknown dict assert len(s.unknown_durations) == 0 ts_pref2 = TaskState("prefix_2-abcdefab", None) assert s.get_task_duration(ts_pref2) == 0.5 # default assert len(s.unknown_durations) == 1 assert len(s.unknown_durations["prefix_2"]) == 1 ts_pref2_2 = TaskState("prefix_2-accdefab", None) assert s.get_task_duration(ts_pref2_2) == 0.5 # default assert len(s.unknown_durations) == 1 assert len(s.unknown_durations["prefix_2"]) == 2
async def test_memory_limit_auto(): async with Scheduler() as s: async with Worker(s.address, nthreads=1) as a, Worker( s.address, nthreads=2) as b, Worker( s.address, nthreads=100) as c, Worker(s.address, nthreads=200) as d: assert isinstance(a.memory_limit, Number) assert isinstance(b.memory_limit, Number) if CPU_COUNT > 1: assert a.memory_limit < b.memory_limit assert c.memory_limit == d.memory_limit
def test_scheduler_file(): with tmpfile() as fn: s = Scheduler(scheduler_file=fn) s.start(8009) w = Worker(scheduler_file=fn) yield w._start() assert s.workers == {w.address} yield w._close() s.stop()
def test_service_hosts_match_scheduler(): from distributed.http.scheduler import HTTPScheduler services = {('http', 0): HTTPScheduler} s = Scheduler(services=services) yield s.start('tcp://0.0.0.0') sock = first(s.services['http']._sockets.values()) assert sock.getsockname()[0] in ('::', '0.0.0.0') yield s.close() for host in ['tcp://127.0.0.2', 'tcp://127.0.0.2:38275']: s = Scheduler(services=services) yield s.start(host) sock = first(s.services['http']._sockets.values()) assert sock.getsockname()[0] == '127.0.0.2' yield s.close()
def test_service_hosts_match_scheduler(): pytest.importorskip('bokeh') from distributed.bokeh.scheduler import BokehScheduler services = {('bokeh', 0): BokehScheduler} s = Scheduler(services=services) yield s.start('tcp://0.0.0.0') sock = first(s.services['bokeh'].server._http._sockets.values()) assert sock.getsockname()[0] in ('::', '0.0.0.0') yield s.close() for host in ['tcp://127.0.0.2', 'tcp://127.0.0.2:38275']: s = Scheduler(services=services) yield s.start(host) sock = first(s.services['bokeh'].server._http._sockets.values()) assert sock.getsockname()[0] == '127.0.0.2' yield s.close()
def test_file_descriptors_dont_leak(loop): psutil = pytest.importorskip('psutil') proc = psutil.Process() before = proc.num_fds() s = Scheduler() s.start(0) w = Worker(s.ip, s.port) @gen.coroutine def f(): yield w._start(0) yield w._close() loop.run_sync(f) during = proc.num_fds() s.stop() s.close() start = time() while proc.num_fds() > before: loop.run_sync(lambda: gen.sleep(0.01)) assert time() < start + 5
def f(c, a, b): s = Scheduler((c.ip, c.port), loop=loop) yield s._sync_center() done = s.start() sched, report = Queue(), Queue(); s.handle_queues(sched, report) msg = yield report.get(); assert msg['op'] == 'stream-start' s.update_graph(dsk={'x-1': (inc, 1), 'x-2': (inc, 'x-1'), 'x-3': (inc, 'x-2'), 'y-1': (dec, 'x-3'), 'y-2': (dec, 'y-1')}, keys=['y-2']) p = MultiProgress(['y-2'], scheduler=s, func=lambda s: s.split('-')[0]) assert p.keys == {'x': {'x-1', 'x-2', 'x-3'}, 'y': {'y-1', 'y-2'}} while True: msg = yield report.get() if msg['op'] == 'key-in-memory' and msg['key'] == 'x-3': break assert p.keys == {'x': set(), 'y': {'y-1', 'y-2'}} while True: msg = yield report.get() if msg['op'] == 'key-in-memory' and msg['key'] == 'y-2': break assert p.keys == {'x': set(), 'y': set()} assert p.status == 'finished' sched.put_nowait({'op': 'close'}) yield done
def test_coerce_address(): with dask.config.set({"distributed.comm.timeouts.connect": "100ms"}): s = yield Scheduler(validate=True, port=0) print("scheduler:", s.address, s.listen_address) a = Worker(s.address, name="alice") b = Worker(s.address, name=123) c = Worker("127.0.0.1", s.port, name="charlie") yield [a, b, c] assert s.coerce_address("127.0.0.1:8000") == "tcp://127.0.0.1:8000" assert s.coerce_address("[::1]:8000") == "tcp://[::1]:8000" assert s.coerce_address( "tcp://127.0.0.1:8000") == "tcp://127.0.0.1:8000" assert s.coerce_address("tcp://[::1]:8000") == "tcp://[::1]:8000" assert s.coerce_address("localhost:8000") in ( "tcp://127.0.0.1:8000", "tcp://[::1]:8000", ) assert s.coerce_address(u"localhost:8000") in ( "tcp://127.0.0.1:8000", "tcp://[::1]:8000", ) assert s.coerce_address(a.address) == a.address # Aliases assert s.coerce_address("alice") == a.address assert s.coerce_address(123) == b.address assert s.coerce_address("charlie") == c.address assert s.coerce_hostname("127.0.0.1") == "127.0.0.1" assert s.coerce_hostname("alice") == a.ip assert s.coerce_hostname(123) == b.ip assert s.coerce_hostname("charlie") == c.ip assert s.coerce_hostname("jimmy") == "jimmy" assert s.coerce_address("zzzt:8000", resolve=False) == "tcp://zzzt:8000" yield s.close() yield [w.close() for w in [a, b, c]]
def test_monitor_resources(): pytest.importorskip('psutil') c = Center('127.0.0.1') c.listen(0) a = Nanny(c.ip, c.port, ncores=2, ip='127.0.0.1') b = Nanny(c.ip, c.port, ncores=2, ip='127.0.0.1') s = Scheduler((c.ip, c.port), resource_interval=0.01, resource_log_size=3) yield a._start() yield b._start() yield s.sync_center() done = s.start() try: assert s.ncores == { ('127.0.0.1', a.worker_port): 2, ('127.0.0.1', b.worker_port): 2 } assert s.nannies == {(n.ip, n.worker_port): n.port for n in [a, b]} while any(len(v) < 3 for v in s.resource_logs.values()): yield gen.sleep(0.01) yield gen.sleep(0.1) assert set(s.resource_logs) == {a.address, b.address} assert all(len(v) == 3 for v in s.resource_logs.values()) d = s.diagnostic_resources(n=2) assert set(d) == {a.worker_address, b.worker_address} assert set(d[a.worker_address]).issubset({'cpu', 'memory', 'time'}) assert all(len(v) == 2 for v in d[a.worker_address].values()) s.put({'op': 'close'}) yield done finally: with ignoring(TimeoutError, StreamClosedError, OSError): yield a._close(timeout=0.5) with ignoring(TimeoutError, StreamClosedError, OSError): yield b._close(timeout=0.5) c.stop()
def f(c, a, b): s = Scheduler((c.ip, c.port)) yield s._sync_center() done = s.start() sched, report = Queue(), Queue() s.handle_queues(sched, report) msg = yield report.get() assert msg['op'] == 'stream-start' # Test update graph sched.put_nowait({'op': 'update-graph', 'dsk': {'x': (inc, 1), 'y': (inc, 'x'), 'z': (inc, 'y')}, 'keys': ['z']}) while True: msg = yield report.get() if msg['op'] == 'key-in-memory' and msg['key'] == 'z': break slen, rlen = len(s.scheduler_queues), len(s.report_queues) sched2, report2 = Queue(), Queue() s.handle_queues(sched2, report2) assert slen + 1 == len(s.scheduler_queues) assert rlen + 1 == len(s.report_queues) sched2.put_nowait({'op': 'update-graph', 'dsk': {'a': (inc, 10)}, 'keys': ['a']}) for q in [report, report2]: while True: msg = yield q.get() if msg['op'] == 'key-in-memory' and msg['key'] == 'a': break sched.put_nowait({'op': 'close'}) yield done
def f(c, a, b): s = Scheduler((c.ip, c.port), loop=loop) yield s._sync_center() done = s.start() sched, report = Queue(), Queue(); s.handle_queues(sched, report) msg = yield report.get(); assert msg['op'] == 'stream-start' s.update_graph(dsk={'x': (inc, 1), 'y': (inc, 'x'), 'z': (inc, 'y')}, keys=['z']) bars = [Progress(keys=['z'], scheduler=s) for i in range(10)] while True: msg = yield report.get() if msg['op'] == 'key-in-memory' and msg['key'] == 'z': break assert all(b.status == 'finished' for b in bars) sched.put_nowait({'op': 'close'}) yield done
def f(c, a, b): s = Scheduler((c.ip, c.port), loop=loop) yield s._sync_center() done = s.start() sched, report = Queue(), Queue(); s.handle_queues(sched, report) msg = yield report.get(); assert msg['op'] == 'stream-start' s.update_graph(dsk={'x': (inc, 1), 'y': (inc, 'x'), 'z': (inc, 'y')}, keys=['z']) progress = ProgressWidget(['z'], scheduler=s) while True: msg = yield report.get() if msg['op'] == 'key-in-memory' and msg['key'] == 'z': break progress._update() assert progress.bar.value == 1.0 assert 's' in progress.bar.description sched.put_nowait({'op': 'close'}) yield done
def test_monitor_resources(): pytest.importorskip('psutil') c = Center('127.0.0.1') c.listen(0) a = Nanny(c.ip, c.port, ncores=2, ip='127.0.0.1') b = Nanny(c.ip, c.port, ncores=2, ip='127.0.0.1') s = Scheduler((c.ip, c.port), resource_interval=0.01, resource_log_size=3) yield a._start() yield b._start() yield s.sync_center() done = s.start() try: assert s.ncores == {('127.0.0.1', a.worker_port): 2, ('127.0.0.1', b.worker_port): 2} assert s.nannies == {(n.ip, n.worker_port): n.port for n in [a, b]} while any(len(v) < 3 for v in s.resource_logs.values()): yield gen.sleep(0.01) yield gen.sleep(0.1) assert set(s.resource_logs) == {a.address, b.address} assert all(len(v) == 3 for v in s.resource_logs.values()) d = s.diagnostic_resources(n=2) assert set(d) == {a.worker_address, b.worker_address} assert set(d[a.worker_address]).issubset({'cpu', 'memory', 'time'}) assert all(len(v) == 2 for v in d[a.worker_address].values()) s.put({'op': 'close'}) yield done finally: with ignoring(TimeoutError, StreamClosedError, OSError): yield a._close(timeout=0.5) with ignoring(TimeoutError, StreamClosedError, OSError): yield b._close(timeout=0.5) c.stop()
def test_io_loop(loop): s = Scheduler(loop=loop) s.listen(0) assert s.io_loop is loop w = Worker(s.address, loop=loop) assert w.io_loop is loop
def test_update_state_with_processing(loop): s = Scheduler() s.start(0) s.add_worker(address=alice, ncores=1, coerce_address=False) s.update_graph(tasks={'x': 1, 'y': (inc, 'x'), 'z': (inc, 'y')}, keys=['z'], dependencies={'y': {'x'}, 'x': set(), 'z': {'y'}}, client='client') s.mark_task_finished('x', alice, nbytes=10, type=dumps(int), compute_start=10, compute_stop=11) s.ensure_occupied(alice) assert s.waiting == {'z': {'y'}} assert s.waiting_data == {'x': {'y'}, 'y': {'z'}, 'z': set()} assert list(s.ready) == [] assert s.who_wants == {'z': {'client'}} assert s.wants_what == {'client': {'z'}} assert s.who_has == {'x': {alice}} s.update_graph(tasks={'a': (inc, 'x'), 'b': (add,'a','y'), 'c': (inc, 'z')}, keys=['b', 'c'], dependencies={'a': {'x'}, 'b': {'a', 'y'}, 'c': {'z'}}, client='client') assert s.waiting == {'z': {'y'}, 'b': {'a', 'y'}, 'c': {'z'}} assert 'a' in s.stacks[alice] or 'a' in s.processing[alice] assert not s.ready assert s.waiting_data == {'x': {'y', 'a'}, 'y': {'z', 'b'}, 'z': {'c'}, 'a': {'b'}, 'b': set(), 'c': set()} assert s.who_wants == {'b': {'client'}, 'c': {'client'}, 'z': {'client'}} assert s.wants_what == {'client': {'b', 'c', 'z'}} s.stop()
def test_coerce_address(): s = Scheduler() s.start(0) a = Worker(s.ip, s.port, name='alice') b = Worker(s.ip, s.port, name=123) c = Worker(s.ip, s.port, name='charlie', ip='127.0.0.2') yield [a._start(), b._start(), c._start()] assert s.coerce_address(b'127.0.0.1') == '127.0.0.1' assert s.coerce_address(('127.0.0.1', 8000)) == '127.0.0.1:8000' assert s.coerce_address(['127.0.0.1', 8000]) == '127.0.0.1:8000' assert s.coerce_address([b'127.0.0.1', 8000]) == '127.0.0.1:8000' assert s.coerce_address(('127.0.0.1', '8000')) == '127.0.0.1:8000' assert s.coerce_address(b'localhost') == '127.0.0.1' assert s.coerce_address('localhost') == '127.0.0.1' assert s.coerce_address(u'localhost') == '127.0.0.1' assert s.coerce_address('localhost:8000') == '127.0.0.1:8000' assert s.coerce_address(a.address) == a.address assert s.coerce_address(a.address_tuple) == a.address assert s.coerce_address(123) == b.address assert s.coerce_address('charlie') == c.address yield s.close() yield [w._close() for w in [a, b, c]]
def test_host_address(): s = yield Scheduler(host="127.0.0.2", port=0) assert "127.0.0.2" in s.address yield s.close()
def test_update_state_respects_data_in_memory(loop): s = Scheduler() s.start(0) s.add_worker(address=alice, ncores=1, coerce_address=False) s.update_graph(tasks={ 'x': 1, 'y': (inc, 'x') }, keys=['y'], dependencies={ 'y': {'x'}, 'x': set() }, client='client') s.mark_task_finished('x', alice, nbytes=10, type=dumps(int), compute_start=10, compute_stop=11) s.ensure_occupied(alice) s.mark_task_finished('y', alice, nbytes=10, type=dumps(int), compute_start=11, compute_stop=12) s.ensure_occupied(alice) assert s.released == {'x'} assert s.who_has == {'y': {alice}} s.update_graph(tasks={ 'x': 1, 'y': (inc, 'x'), 'z': (add, 'y', 'x') }, keys=['z'], dependencies={ 'y': {'x'}, 'z': {'y', 'x'} }, client='client') assert s.released == set() assert s.waiting == {'z': {'x'}} assert set(s.processing[alice]) == {'x' } # x was released need to recompute assert s.waiting_data == {'x': {'z'}, 'y': {'z'}, 'z': set()} assert s.who_wants == {'y': {'client'}, 'z': {'client'}} assert s.wants_what == {'client': {'y', 'z'}} s.stop()
async def test_io_loop(cleanup): async with Scheduler(port=0) as s: async with Worker(s.address, loop=s.loop) as w: assert w.io_loop is s.loop
async def test_worker_nthreads(cleanup): async with Scheduler() as s: async with Worker(s.address) as w: assert w.executor._max_workers == CPU_COUNT
def test_update_state(loop): s = Scheduler() s.start(0) s.add_worker(address=alice, ncores=1, coerce_address=False) s.update_graph(tasks={ 'x': 1, 'y': (inc, 'x') }, keys=['y'], dependencies={ 'y': 'x', 'x': set() }, client='client') s.mark_task_finished('x', alice, nbytes=10, type=dumps(int), compute_start=10, compute_stop=11) s.ensure_occupied(alice) assert set(s.processing[alice]) == {'y'} assert not s.ready assert s.who_wants == {'y': {'client'}} assert s.wants_what == {'client': {'y'}} s.update_graph(tasks={ 'a': 1, 'z': (add, 'y', 'a') }, keys=['z'], dependencies={'z': {'y', 'a'}}, client='client') assert s.tasks == {'x': 1, 'y': (inc, 'x'), 'a': 1, 'z': (add, 'y', 'a')} assert s.dependencies == { 'x': set(), 'a': set(), 'y': {'x'}, 'z': {'a', 'y'} } assert s.dependents == {'z': set(), 'y': {'z'}, 'a': {'z'}, 'x': {'y'}} assert s.waiting == {'z': {'a', 'y'}} assert s.waiting_data == {'x': {'y'}, 'y': {'z'}, 'a': {'z'}, 'z': set()} assert s.who_wants == {'z': {'client'}, 'y': {'client'}} assert s.wants_what == {'client': {'y', 'z'}} assert 'a' in s.ready or 'a' in s.processing[alice] s.stop()
def test_scheduler_init_pulls_blocked_handlers_from_config(): with dask.config.set({"distributed.scheduler.blocked-handlers": ["test-handler"]}): s = Scheduler() assert s.blocked_handlers == ["test-handler"]
def test_coerce_address(): s = Scheduler(validate=True) s.start(0) print("scheduler:", s.address, s.listen_address) a = Worker(s.ip, s.port, name='alice') b = Worker(s.ip, s.port, name=123) c = Worker('127.0.0.1', s.port, name='charlie') yield [a._start(), b._start(), c._start()] assert s.coerce_address('127.0.0.1:8000') == 'tcp://127.0.0.1:8000' assert s.coerce_address('[::1]:8000') == 'tcp://[::1]:8000' assert s.coerce_address('tcp://127.0.0.1:8000') == 'tcp://127.0.0.1:8000' assert s.coerce_address('tcp://[::1]:8000') == 'tcp://[::1]:8000' assert s.coerce_address('localhost:8000') in ('tcp://127.0.0.1:8000', 'tcp://[::1]:8000') assert s.coerce_address(u'localhost:8000') in ('tcp://127.0.0.1:8000', 'tcp://[::1]:8000') assert s.coerce_address(a.address) == a.address # Aliases assert s.coerce_address('alice') == a.address assert s.coerce_address(123) == b.address assert s.coerce_address('charlie') == c.address assert s.coerce_hostname('127.0.0.1') == '127.0.0.1' assert s.coerce_hostname('alice') == a.ip assert s.coerce_hostname(123) == b.ip assert s.coerce_hostname('charlie') == c.ip assert s.coerce_hostname('jimmy') == 'jimmy' assert s.coerce_address('zzzt:8000', resolve=False) == 'tcp://zzzt:8000' yield s.close() yield [w._close() for w in [a, b, c]]
def test_io_loop(loop): s = Scheduler(loop=loop) s.listen(0) assert s.io_loop is loop w = Worker(s.ip, s.port, loop=loop) assert w.io_loop is loop
def test_update_state_supports_recomputing_released_results(loop): s = Scheduler() s.start(0) s.add_worker(address=alice, ncores=1, coerce_address=False) s.update_graph(tasks={'x': 1, 'y': (inc, 'x'), 'z': (inc, 'x')}, keys=['z'], dependencies={'y': {'x'}, 'x': set(), 'z': {'y'}}, client='client') s.mark_task_finished('x', alice, nbytes=10, type=dumps(int), compute_start=10, compute_stop=11) s.ensure_occupied(alice) s.mark_task_finished('y', alice, nbytes=10, type=dumps(int), compute_start=10, compute_stop=11) s.ensure_occupied(alice) s.mark_task_finished('z', alice, nbytes=10, type=dumps(int), compute_start=10, compute_stop=11) s.ensure_occupied(alice) assert not s.waiting assert not s.ready assert s.waiting_data == {'z': set()} assert s.who_has == {'z': {alice}} s.update_graph(tasks={'x': 1, 'y': (inc, 'x')}, keys=['y'], dependencies={'y': {'x'}}, client='client') assert s.waiting == {'y': {'x'}} assert s.waiting_data == {'x': {'y'}, 'y': set(), 'z': set()} assert s.who_wants == {'z': {'client'}, 'y': {'client'}} assert s.wants_what == {'client': {'y', 'z'}} assert s.processing[alice] == {'x'} s.stop()
def test_coerce_address(): s = Scheduler(validate=True) s.start(0) a = Worker(s.ip, s.port, name='alice') b = Worker(s.ip, s.port, name=123) c = Worker(s.ip, s.port, name='charlie', ip='127.0.0.2') yield [a._start(), b._start(), c._start()] assert s.coerce_address(b'127.0.0.1') == '127.0.0.1' assert s.coerce_address(('127.0.0.1', 8000)) == '127.0.0.1:8000' assert s.coerce_address(['127.0.0.1', 8000]) == '127.0.0.1:8000' assert s.coerce_address([b'127.0.0.1', 8000]) == '127.0.0.1:8000' assert s.coerce_address(('127.0.0.1', '8000')) == '127.0.0.1:8000' assert s.coerce_address(b'localhost') == '127.0.0.1' assert s.coerce_address('localhost') == '127.0.0.1' assert s.coerce_address(u'localhost') == '127.0.0.1' assert s.coerce_address('localhost:8000') == '127.0.0.1:8000' assert s.coerce_address(a.address) == a.address assert s.coerce_address(a.address_tuple) == a.address assert s.coerce_address(123) == b.address assert s.coerce_address('charlie') == c.address yield s.close() yield [w._close() for w in [a, b, c]]
def test_persist_taskstate(): s = Scheduler(validate=True, persist_file='persist_test') s.start(0) assert s.persist_scheduler s.update_graph(tasks={ 'x': dumps_task((inc, 1)), 'y': dumps_task((inc, 'x')), 'z': dumps_task((inc, 2)) }, keys=['y'], dependencies={ 'y': 'x', 'x': [], 'z': [] }, client='client') taskstates = s.tasks s.close() s.stop() del s s = Scheduler(validate=True, persist_file='persist_test') s.start(0) assert ([taskstates.keys()] == [s.tasks.keys()] and [x.state for x in taskstates.values() ] == [x.state for x in s.tasks.values()]) s.close() for f in glob.glob("persist_test*"): os.remove(f)
async def test_worker_listens_on_same_interface_by_default(Worker): async with Scheduler(host="localhost") as s: assert s.ip in {"127.0.0.1", "localhost"} async with Worker(s.address) as w: assert s.ip == w.ip
def test_update_state_respects_data_in_memory(loop): s = Scheduler() s.start(0) s.add_worker(address=alice, ncores=1, coerce_address=False) s.update_graph(tasks={'x': 1, 'y': (inc, 'x')}, keys=['y'], dependencies={'y': {'x'}, 'x': set()}, client='client') s.mark_task_finished('x', alice, nbytes=10, type=dumps(int), compute_start=10, compute_stop=11) s.ensure_occupied(alice) s.mark_task_finished('y', alice, nbytes=10, type=dumps(int), compute_start=11, compute_stop=12) s.ensure_occupied(alice) assert s.released == {'x'} assert s.who_has == {'y': {alice}} s.update_graph(tasks={'x': 1, 'y': (inc, 'x'), 'z': (add, 'y', 'x')}, keys=['z'], dependencies={'y': {'x'}, 'z': {'y', 'x'}}, client='client') assert s.released == set() assert s.waiting == {'z': {'x'}} assert set(s.processing[alice]) == {'x'} # x was released need to recompute assert set(s.rprocessing['x']) == {alice} # x was released need to recompute assert s.waiting_data == {'x': {'z'}, 'y': {'z'}, 'z': set()} assert s.who_wants == {'y': {'client'}, 'z': {'client'}} assert s.wants_what == {'client': {'y', 'z'}} s.stop()
def test_io_loop(loop): s = Scheduler(loop=loop, validate=True) assert s.io_loop is loop