def test_progressbar_widget(loop): @gen.coroutine def f(c, a, b): s = Scheduler((c.ip, c.port), loop=loop) yield s._sync_center() done = s.start() sched, report = Queue(), Queue(); s.handle_queues(sched, report) msg = yield report.get(); assert msg['op'] == 'stream-start' s.update_graph(dsk={'x': (inc, 1), 'y': (inc, 'x'), 'z': (inc, 'y')}, keys=['z']) progress = ProgressWidget(['z'], scheduler=s) while True: msg = yield report.get() if msg['op'] == 'key-in-memory' and msg['key'] == 'z': break progress._update() assert progress.bar.value == 1.0 assert 's' in progress.bar.description sched.put_nowait({'op': 'close'}) yield done _test_cluster(f, loop)
def test_restrictions_map(): @gen.coroutine def f(c, a, b): e = Executor((c.ip, c.port), start=False) IOLoop.current().spawn_callback(e._go) L = e.map(inc, range(5), workers={a.ip}) yield _wait(L) assert set(a.data) == {x.key for x in L} assert not b.data for x in L: assert e.restrictions[x.key] == {a.ip} L = e.map(inc, [10, 11, 12], workers=[{a.ip}, {a.ip, b.ip}, {b.ip}]) yield _wait(L) assert e.restrictions[L[0].key] == {a.ip} assert e.restrictions[L[1].key] == {a.ip, b.ip} assert e.restrictions[L[2].key] == {b.ip} with pytest.raises(ValueError): e.map(inc, [10, 11, 12], workers=[{a.ip}]) yield e._shutdown() _test_cluster(f)
def test_gather_with_missing_worker(loop): @gen.coroutine def f(c, a, b): bad = '127.0.0.1:9001' # this worker doesn't exist c.who_has['x'].add(bad) c.has_what[bad].add('x') c.who_has['z'].add(bad) c.has_what[bad].add('z') c.ncores['z'] = 4 c.who_has['z'].add(a.address) c.has_what[a.address].add('z') a.data['z'] = 5 result = yield _gather((c.ip, c.port), ['z']) assert result == [5] try: yield _gather((c.ip, c.port), ['x']) assert False except KeyError as e: assert 'x' in e.args _test_cluster(f)
def test_upload_file(loop): @gen.coroutine def f(c, a, b): assert not os.path.exists(os.path.join(a.local_dir, 'foobar.py')) assert not os.path.exists(os.path.join(b.local_dir, 'foobar.py')) assert a.local_dir != b.local_dir aa = rpc(ip=a.ip, port=a.port) bb = rpc(ip=b.ip, port=b.port) yield [aa.upload_file(filename='foobar.py', data=b'x = 123'), bb.upload_file(filename='foobar.py', data=b'x = 123')] assert os.path.exists(os.path.join(a.local_dir, 'foobar.py')) assert os.path.exists(os.path.join(b.local_dir, 'foobar.py')) def g(): import foobar return foobar.x yield aa.compute(function=g, key='x') result = yield aa.get_data(keys=['x']) assert result == {'x': 123} yield a._close() yield b._close() aa.close_streams() bb.close_streams() assert not os.path.exists(os.path.join(a.local_dir, 'foobar.py')) _test_cluster(f)
def test_scatter_delete(loop): @gen.coroutine def f(c, a, b): keys = yield _scatter((c.ip, c.port), [1, 2, 3]) assert merge(a.data, b.data) == \ {k: i for k, i in zip(keys, [1, 2, 3])} assert set(c.who_has) == set(keys) assert all(len(v) == 1 for v in c.who_has.values()) keys2, who_has, nbytes = yield scatter_to_workers([a.address, b.address], [4, 5, 6]) m = merge(a.data, b.data) for k, v in zip(keys2, [4, 5, 6]): assert m[k] == v assert isinstance(who_has, dict) assert set(concat(who_has.values())) == {a.address, b.address} assert len(who_has) == len(keys2) assert isinstance(nbytes, dict) assert set(nbytes) == set(who_has) assert all(isinstance(v, int) for v in nbytes.values()) result = yield _gather((c.ip, c.port), keys2) assert result == [4, 5, 6] _test_cluster(f)
def test_garbage_collection(): import gc @gen.coroutine def f(c, a, b): e = Executor((c.ip, c.port), start=False) a = e.submit(inc, 1) b = e.submit(inc, 1) assert e.refcount[a.key] == 2 a.__del__() assert e.refcount[a.key] == 1 c = e.submit(inc, b) b.__del__() IOLoop.current().spawn_callback(e._go) result = yield c._result() assert result == 3 bkey = b.key b.__del__() assert bkey not in e.futures _test_cluster(f)
def test_missing_data_heals(loop): @gen.coroutine def f(c, a, b): e = Executor((c.ip, c.port), delete_batch_time=0, start=False, loop=loop) yield e._start() x = e.submit(inc, 1) y = e.submit(inc, x) z = e.submit(inc, y) yield _wait([x, y, z]) # Secretly delete y's key if y.key in a.data: del a.data[y.key] if y.key in b.data: del b.data[y.key] w = e.submit(add, y, z) result = yield w._result() assert result == 3 + 4 yield e._shutdown() _test_cluster(f, loop)
def test_input_types(loop): @gen.coroutine def f(c, a, b): e1 = Executor((c.ip, c.port), start=False, loop=loop) yield e1._start() assert isinstance(e1.center, rpc) assert isinstance(e1.scheduler, Scheduler) s = Scheduler((c.ip, c.port)) yield s.sync_center() done = s.start() e2 = Executor(s, start=False, loop=loop) yield e2._start() assert isinstance(e2.center, rpc) assert isinstance(e2.scheduler, Scheduler) s.listen(8042) e3 = Executor(('127.0.0.1', s.port), start=False, loop=loop) yield e3._start() assert isinstance(e3.center, rpc) assert isinstance(e3.scheduler, rpc) s.stop() yield e1._shutdown() yield e2._shutdown() yield e3._shutdown() _test_cluster(f, loop)
def test_upload_file(loop): @gen.coroutine def f(c, a, b): e = Executor((c.ip, c.port), start=False, loop=loop) yield e._start() def g(): import myfile return myfile.f() with tmp_text('myfile.py', 'def f():\n return 123') as fn: yield e._upload_file(fn) sleep(1) # TODO: why is this necessary? x = e.submit(g, pure=False) result = yield x._result() assert result == 123 with tmp_text('myfile.py', 'def f():\n return 456') as fn: yield e._upload_file(fn) y = e.submit(g, pure=False) result = yield y._result() assert result == 456 yield e._shutdown() _test_cluster(f, loop)
def test_TextProgressBar_error(loop, capsys): @gen.coroutine def f(c, a, b): s = Scheduler((c.ip, c.port), loop=loop) s.listen(0) yield s.sync_center() done = s.start() s.update_graph(dsk={'x': (div, 1, 0)}, keys=['x']) progress = TextProgressBar(['x'], scheduler=(s.ip, s.port), start=False, interval=0.01) yield progress.listen() assert progress.status == 'error' assert progress.stream.closed() progress = TextProgressBar(['x'], scheduler=(s.ip, s.port), start=False, interval=0.01) yield progress.listen() assert progress.status == 'error' assert progress.stream.closed() s.close() yield done _test_cluster(f, loop)
def test_multiple_executors(loop): @gen.coroutine def f(c, a, b): a = Executor((c.ip, c.port), start=False, loop=loop) yield a._start() b = Executor(scheduler=a.scheduler, start=False, loop=loop) yield b._start() x = a.submit(inc, 1) y = b.submit(inc, 2) assert x.executor is a assert y.executor is b xx = yield x._result() yy = yield y._result() assert xx == 2 assert yy == 3 z = a.submit(add, x, y) assert z.executor is a zz = yield z._result() assert zz == 5 yield a._shutdown() yield b._shutdown() _test_cluster(f, loop)
def test_garbage_collection_with_scatter(loop): @gen.coroutine def f(c, a, b): e = Executor((c.ip, c.port), delete_batch_time=0, start=False, loop=loop) yield e._start() [a] = yield e._scatter([1]) assert a.key in e.futures assert a.status == 'finished' assert a.event.is_set() assert e.refcount[a.key] == 1 a.__del__() assert e.refcount[a.key] == 0 start = time() while True: if a.key not in c.who_has: break else: assert time() < start + 3 yield gen.sleep(0.1) yield e._shutdown() _test_cluster(f)
def test_many_Progresss(loop): @gen.coroutine def f(c, a, b): s = Scheduler((c.ip, c.port), loop=loop) yield s._sync_center() done = s.start() sched, report = Queue(), Queue(); s.handle_queues(sched, report) msg = yield report.get(); assert msg['op'] == 'stream-start' s.update_graph(dsk={'x': (inc, 1), 'y': (inc, 'x'), 'z': (inc, 'y')}, keys=['z']) bars = [Progress(keys=['z'], scheduler=s) for i in range(10)] while True: msg = yield report.get() if msg['op'] == 'key-in-memory' and msg['key'] == 'z': break assert all(b.status == 'finished' for b in bars) sched.put_nowait({'op': 'close'}) yield done _test_cluster(f, loop)
def test_multi_progressbar_widget_after_close(loop): @gen.coroutine def f(c, a, b): s = Scheduler((c.ip, c.port), loop=loop) yield s._sync_center() done = s.start() sched, report = Queue(), Queue(); s.handle_queues(sched, report) msg = yield report.get(); assert msg['op'] == 'stream-start' s.update_graph(dsk={'x-1': (inc, 1), 'x-2': (inc, 'x-1'), 'x-3': (inc, 'x-2'), 'y-1': (dec, 'x-3'), 'y-2': (dec, 'y-1'), 'e': (throws, 'y-2'), 'other': (inc, 123)}, keys=['e']) while True: msg = yield report.get() if msg['op'] == 'key-in-memory' and msg['key'] == 'y-2': break p = MultiProgressWidget(['x-1', 'x-2', 'x-3'], scheduler=s) assert set(concat(p.all_keys.values())).issuperset({'x-1', 'x-2', 'x-3'}) assert 'x' in p.bars sched.put_nowait({'op': 'close'}) yield done _test_cluster(f, loop)
def test_recompute_released_key(loop): @gen.coroutine def f(c, a, b): e = Executor((c.ip, c.port), delete_batch_time=0, start=False, loop=loop) yield e._start() x = e.submit(inc, 100) result1 = yield x._result() xkey = x.key del x import gc; gc.collect() assert e.refcount[xkey] == 0 # 1 second batching needs a second action to trigger while xkey in c.who_has or xkey in a.data or xkey in b.data: yield gen.sleep(0.1) x = e.submit(inc, 100) assert x.key in e.futures result2 = yield x._result() assert result1 == result2 yield e._shutdown() _test_cluster(f, loop)
def test__scatter(loop): @gen.coroutine def f(c, a, b): e = Executor((c.ip, c.port), start=False, loop=loop) yield e._start() d = yield e._scatter({'y': 20}) assert isinstance(d['y'], Future) assert a.data.get('y') == 20 or b.data.get('y') == 20 assert (a.address in e.scheduler.who_has['y'] or b.address in e.scheduler.who_has['y']) assert c.who_has['y'] assert e.scheduler.nbytes == {'y': sizeof(20)} yy = yield e._gather([d['y']]) assert yy == [20] [x] = yield e._scatter([10]) assert isinstance(x, Future) assert a.data.get(x.key) == 10 or b.data.get(x.key) == 10 xx = yield e._gather([x]) assert c.who_has[x.key] assert (a.address in e.scheduler.who_has[x.key] or b.address in e.scheduler.who_has[x.key]) assert e.scheduler.nbytes == {'y': sizeof(20), x.key: sizeof(10)} assert xx == [10] z = e.submit(add, x, d['y']) # submit works on RemoteData result = yield z._result() assert result == 10 + 20 result = yield e._gather([z, x]) assert result == [30, 10] yield e._shutdown() _test_cluster(f, loop)
def test_submit_quotes(loop): def assert_list(x, z=[]): return isinstance(x, list) and isinstance(z, list) @gen.coroutine def f(c, a, b): e = Executor((c.ip, c.port), start=False, loop=loop) yield e._start() x = e.submit(assert_list, [1, 2, 3]) result = yield x._result() assert result x = e.submit(assert_list, [1, 2, 3], z=[4, 5, 6]) result = yield x._result() assert result x = e.submit(inc, 1) y = e.submit(inc, 2) z = e.submit(assert_list, [x, y]) result = yield z._result() assert result yield e._shutdown() _test_cluster(f, loop)
def test_TextProgressBar(loop, capsys): @gen.coroutine def f(c, a, b): s = Scheduler((c.ip, c.port), loop=loop) yield s._sync_center() done = s.start() sched, report = Queue(), Queue(); s.handle_queues(sched, report) msg = yield report.get(); assert msg['op'] == 'stream-start' s.update_graph(dsk={'x': (inc, 1), 'y': (inc, 'x'), 'z': (inc, 'y')}, keys=['z']) progress = TextProgressBar(['z'], scheduler=s) progress.start() assert progress.all_keys == {'x', 'y', 'z'} assert progress.keys == {'x', 'y', 'z'} while True: msg = yield report.get() if msg['op'] == 'key-in-memory' and msg['key'] == 'z': break assert progress.keys == set() check_bar_completed(capsys) assert progress not in s.plugins sched.put_nowait({'op': 'close'}) yield done _test_cluster(f, loop)
def test_TextProgressBar_error(loop, capsys): @gen.coroutine def f(c, a, b): s = Scheduler((c.ip, c.port), loop=loop) yield s._sync_center() done = s.start() sched, report = Queue(), Queue(); s.handle_queues(sched, report) msg = yield report.get(); assert msg['op'] == 'stream-start' s.update_graph(dsk={'x': (div, 1, 0)}, keys=['x']) progress = TextProgressBar(['x'], scheduler=s) progress.start() while True: msg = yield report.get() if msg.get('key') == 'x': break assert progress.status == 'error' assert not progress._timer.is_alive() progress = TextProgressBar(['x'], scheduler=s) progress.start() assert progress.status == 'error' assert not progress._timer or not progress._timer.is_alive() sched.put_nowait({'op': 'close'}) yield done _test_cluster(f, loop)
def test_robust_to_bad_plugin(loop): @gen.coroutine def f(c, a, b): s = Scheduler((c.ip, c.port), loop=loop) yield s._sync_center() done = s.start() sched, report = Queue(), Queue(); s.handle_queues(sched, report) msg = yield report.get(); assert msg['op'] == 'stream-start' class Bad(SchedulerPlugin): def task_finished(self, scheduler, key, worker, nbytes): raise Exception() bad = Bad() s.add_plugin(bad) sched.put_nowait({'op': 'update-graph', 'dsk': {'x': (inc, 1), 'y': (inc, 'x'), 'z': (inc, 'y')}, 'keys': ['z']}) while True: # normal execution msg = yield report.get() if msg['op'] == 'key-in-memory' and msg['key'] == 'z': break sched.put_nowait({'op': 'close'}) yield done _test_cluster(f, loop)
def test_upload_egg(loop): @gen.coroutine def f(c, a, b): eggname = 'mytestegg-1.0.0-py3.4.egg' local_file = __file__.replace('test_worker.py', eggname) assert not os.path.exists(os.path.join(a.local_dir, eggname)) assert not os.path.exists(os.path.join(b.local_dir, eggname)) assert a.local_dir != b.local_dir aa = rpc(ip=a.ip, port=a.port) bb = rpc(ip=b.ip, port=b.port) with open(local_file, 'rb') as f: payload = f.read() yield [aa.upload_file(filename=eggname, data=payload), bb.upload_file(filename=eggname, data=payload)] assert os.path.exists(os.path.join(a.local_dir, eggname)) assert os.path.exists(os.path.join(b.local_dir, eggname)) def g(x): import testegg return testegg.inc(x) yield aa.compute(function=g, key='x', args=(10,)) result = yield aa.get_data(keys=['x']) assert result == {'x': 10 + 1} yield a._close() yield b._close() aa.close_streams() bb.close_streams() assert not os.path.exists(os.path.join(a.local_dir, eggname)) _test_cluster(f)
def test_garbage_collection(loop): import gc @gen.coroutine def f(c, a, b): e = Executor((c.ip, c.port), start=False, loop=loop) a = e.submit(inc, 1) b = e.submit(inc, 1) assert e.refcount[a.key] == 2 a.__del__() assert e.refcount[a.key] == 1 c = e.submit(inc, b) b.__del__() yield e._start() result = yield c._result() assert result == 3 bkey = b.key b.__del__() assert bkey not in e.futures yield e._shutdown() _test_cluster(f)
def test_gather_with_missing_worker(loop): @gen.coroutine def f(c, a, b): bad = ("127.0.0.1", 9001) # this worker doesn't exist c.who_has["x"].add(bad) c.has_what[bad].add("x") c.who_has["z"].add(bad) c.has_what[bad].add("z") c.who_has["z"].add(a.address) c.has_what[a.address].add("z") a.data["z"] = 5 result = yield _gather((c.ip, c.port), ["z"]) assert result == [5] try: yield _gather((c.ip, c.port), ["x"]) assert False except KeyError as e: assert "x" in e.args _test_cluster(f)
def test_multibar_complete(loop): @gen.coroutine def f(c, a, b): s = Scheduler((c.ip, c.port), loop=loop) yield s._sync_center() done = s.start() sched, report = Queue(), Queue(); s.handle_queues(sched, report) msg = yield report.get(); assert msg['op'] == 'stream-start' s.update_graph(dsk={'x-1': (inc, 1), 'x-2': (inc, 'x-1'), 'x-3': (inc, 'x-2'), 'y-1': (dec, 'x-3'), 'y-2': (dec, 'y-1'), 'e': (throws, 'y-2'), 'other': (inc, 123)}, keys=['e']) while True: msg = yield report.get() if msg['op'] == 'task-erred' and msg['key'] == 'e': break p = MultiProgressWidget(['e'], scheduler=s, complete=True) assert set(concat(p.all_keys.values())) == {'x-1', 'x-2', 'x-3', 'y-1', 'y-2', 'e'} assert all(b.value == 1.0 for b in p.bars.values()) assert p.texts['x'].value == '3 / 3' assert p.texts['y'].value == '2 / 2' sched.put_nowait({'op': 'close'}) yield done _test_cluster(f, loop)
def test_scheduler(loop): @gen.coroutine def f(c, a, b): s = Scheduler((c.ip, c.port)) yield s._sync_center() done = s.start() sched, report = Queue(), Queue() s.handle_queues(sched, report) msg = yield report.get() assert msg['op'] == 'stream-start' # Test update graph s.put({'op': 'update-graph', 'dsk': {'x': (inc, 1), 'y': (inc, 'x'), 'z': (inc, 'y')}, 'keys': ['z']}) while True: msg = yield report.get() if msg['op'] == 'key-in-memory' and msg['key'] == 'z': break assert a.data.get('x') == 2 or b.data.get('x') == 2 # Test erring tasks s.put({'op': 'update-graph', 'dsk': {'a': (div, 1, 0), 'b': (inc, 'a')}, 'keys': ['a', 'b']}) while True: msg = yield report.get() if msg['op'] == 'task-erred' and msg['key'] == 'b': break # Test missing data s.put({'op': 'missing-data', 'missing': ['z']}) while True: msg = yield report.get() if msg['op'] == 'key-in-memory' and msg['key'] == 'z': break # Test missing data without being informed for w in [a, b]: if 'z' in w.data: del w.data['z'] s.put({'op': 'update-graph', 'dsk': {'zz': (inc, 'z')}, 'keys': ['zz']}) while True: msg = yield report.get() if msg['op'] == 'key-in-memory' and msg['key'] == 'zz': break s.put({'op': 'close'}) yield done _test_cluster(f, loop)
def test_worker(loop): @gen.coroutine def f(c, a, b): aa = rpc(ip=a.ip, port=a.port) bb = rpc(ip=b.ip, port=b.port) result = yield aa.identity() assert not a.active response = yield aa.compute(key='x', function=dumps(add), args=dumps([1, 2]), who_has={}, close=True) assert not a.active assert response['status'] == 'OK' assert a.data['x'] == 3 assert c.who_has['x'] == {a.address} response = yield bb.compute(key='y', function=dumps(add), args=dumps(['x', 10]), who_has={'x': [a.address]}) assert response['status'] == 'OK' assert b.data['y'] == 13 assert c.who_has['y'] == {b.address} assert response['nbytes'] == sizeof(b.data['y']) def bad_func(): 1 / 0 response = yield bb.compute(key='z', function=dumps(bad_func), args=dumps(()), close=True) assert not b.active assert response['status'] == 'error' assert isinstance(loads(response['exception']), ZeroDivisionError) if sys.version_info[0] >= 3: assert any('1 / 0' in line for line in pluck(3, traceback.extract_tb( loads(response['traceback']))) if line) aa.close_streams() yield a._close() assert a.address not in c.ncores and b.address in c.ncores assert list(c.ncores.keys()) == [b.address] assert isinstance(b.address, str) assert b.ip in b.address assert str(b.port) in b.address bb.close_streams() yield b._close() _test_cluster(f)
def test_scatter_round_robins_between_calls(loop): @gen.coroutine def f(c, a, b): for i in range(10): yield _scatter((c.ip, c.port), [i]) assert a.data assert b.data _test_cluster(f)
def test_worker(loop): @gen.coroutine def f(c, a, b): aa = rpc(ip=a.ip, port=a.port) bb = rpc(ip=b.ip, port=b.port) result = yield aa.identity() assert not a.active response = yield aa.compute(key='x', function=dumps(add), args=dumps([1, 2]), who_has={}, close=True) assert not a.active assert response['status'] == 'OK' assert a.data['x'] == 3 assert c.who_has['x'] == {a.address} response = yield bb.compute(key='y', function=dumps(add), args=dumps(['x', 10]), who_has={'x': [a.address]}) assert response['status'] == 'OK' assert b.data['y'] == 13 assert c.who_has['y'] == {b.address} assert response['nbytes'] == sizeof(b.data['y']) def bad_func(): 1 / 0 response = yield bb.compute(key='z', function=dumps(bad_func), args=dumps(()), close=True) assert not b.active assert response['status'] == 'error' assert isinstance(loads(response['exception']), ZeroDivisionError) if sys.version_info[0] >= 3: assert any('1 / 0' in line for line in pluck( 3, traceback.extract_tb(loads(response['traceback']))) if line) aa.close_streams() yield a._close() assert a.address not in c.ncores and b.address in c.ncores assert list(c.ncores.keys()) == [b.address] assert isinstance(b.address, str) assert b.ip in b.address assert str(b.port) in b.address bb.close_streams() yield b._close() _test_cluster(f)
def test_worker(loop): @gen.coroutine def f(c, a, b): aa = rpc(ip=a.ip, port=a.port) bb = rpc(ip=b.ip, port=b.port) assert not a.active response, _ = yield aa.compute(key='x', function=add, args=[1, 2], needed=[], close=True) assert not a.active assert response == b'OK' assert a.data['x'] == 3 assert c.who_has['x'] == set([(a.ip, a.port)]) response, info = yield bb.compute(key='y', function=add, args=['x', 10], needed=['x']) assert response == b'OK' assert b.data['y'] == 13 assert c.who_has['y'] == set([(b.ip, b.port)]) assert info['nbytes'] == sizeof(b.data['y']) def bad_func(): 1 / 0 response, content = yield bb.compute(key='z', function=bad_func, args=(), needed=(), close=True) assert not b.active assert response == b'error' assert isinstance(content['exception'], ZeroDivisionError) if sys.version_info[0] >= 3: assert any('1 / 0' in line for line in pluck( 3, traceback.extract_tb(content['traceback'])) if line) aa.close_streams() yield a._close() assert a.address not in c.ncores and b.address in c.ncores assert list(c.ncores.keys()) == [(b.ip, b.port)] assert isinstance(b.address_string, str) assert b.ip in b.address_string assert str(b.port) in b.address_string bb.close_streams() yield b._close() _test_cluster(f)
def test_broadcast(loop): @gen.coroutine def f(c, a, b): cc = rpc(ip=c.ip, port=c.port) results = yield cc.broadcast(msg={'op': 'ping'}) assert results == {a.address: b'pong', b.address: b'pong'} cc.close_streams() _test_cluster(f)
def test_directed_scatter(loop): @gen.coroutine def f(c, a, b): e = Executor((c.ip, c.port), start=False, loop=loop) yield e._start() yield e._scatter([1, 2, 3], workers=[a.address]) assert len(a.data) == 3 assert not b.data yield e._shutdown() _test_cluster(f, loop)
def test_get_releases_data(loop): @gen.coroutine def f(c, a, b): e = Executor((c.ip, c.port), start=False, loop=loop) yield e._start() [x] = yield e._get({'x': (inc, 1)}, ['x']) import gc; gc.collect() assert e.refcount['x'] == 0 yield e._shutdown() _test_cluster(f, loop)
def test_long_tasks_dont_trigger_timeout(loop): @gen.coroutine def f(c, a, b): e = Executor((c.ip, c.port), start=False, loop=loop) yield e._start() from time import sleep x = e.submit(sleep, 3) yield x._result() yield e._shutdown() _test_cluster(f, loop)
def test_clear(loop): @gen.coroutine def f(c, a, b): data = yield _scatter((c.ip, c.port), [1, 2, 3]) assert set(a.data.values()) | set(b.data.values()) == {1, 2, 3} yield _delete((c.ip, c.port), [data[0]]) assert set(a.data.values()) | set(b.data.values()) == {2, 3} yield _clear((c.ip, c.port)) assert not a.data and not b.data _test_cluster(f)
def test_map(): @gen.coroutine def f(c, a, b): e = Executor((c.ip, c.port), start=False) IOLoop.current().spawn_callback(e._go) L1 = e.map(inc, range(5)) assert len(L1) == 5 assert isdistinct(x.key for x in L1) assert all(isinstance(x, Future) for x in L1) result = yield L1[0]._result() assert result == inc(0) assert len(e.dask) == 5 L2 = e.map(inc, L1) result = yield L2[1]._result() assert result == inc(inc(1)) assert len(e.dask) == 10 assert L1[0].key in e.dask[L2[0].key] total = e.submit(sum, L2) result = yield total._result() assert result == sum(map(inc, map(inc, range(5)))) L3 = e.map(add, L1, L2) result = yield L3[1]._result() assert result == inc(1) + inc(inc(1)) L4 = e.map(add, range(3), range(4)) results = yield e._gather(L4) if sys.version_info[0] >= 3: assert results == list(map(add, range(3), range(4))) def f(x, y=10): return x + y L5 = e.map(f, range(5), y=5) results = yield e._gather(L5) assert results == list(range(5, 10)) y = e.submit(f, 10) L6 = e.map(f, range(5), y=y) results = yield e._gather(L6) assert results == list(range(20, 25)) yield e._shutdown() _test_cluster(f)
def test_submit_naming(): @gen.coroutine def f(c, a, b): e = Executor((c.ip, c.port), start=False) a = e.submit(inc, 1) b = e.submit(inc, 1) assert a.event is b.event c = e.submit(inc, 1, pure=False) assert c.key != a.key _test_cluster(f)
def test_submit_then_get_with_Future(): @gen.coroutine def f(c, a, b): e = Executor((c.ip, c.port), start=False) IOLoop.current().spawn_callback(e._go) x = e.submit(slowinc, 1) dsk = {'y': (inc, x)} result = yield e._get(dsk, 'y') assert result == 3 yield e._shutdown() _test_cluster(f)
def test_submit_quotes(): def assert_list(x, z=None): return isinstance(x, list) and isinstance(z, list) @gen.coroutine def f(c, a, b): e = Executor((c.ip, c.port), start=False) IOLoop.current().spawn_callback(e._go) x = e.submit(assert_list, [1, 2, 3], z=[4, 5, 6]) result = yield x._result() assert result yield e._shutdown() _test_cluster(f)
def test_restrictions_get(): @gen.coroutine def f(c, a, b): e = Executor((c.ip, c.port), start=False) IOLoop.current().spawn_callback(e._go) dsk = {'x': 1, 'y': (inc, 'x'), 'z': (inc, 'y')} restrictions = {'y': {a.ip}, 'z': {b.ip}} result = yield e._get(dsk, 'z', restrictions) assert result == 3 assert 'y' in a.data assert 'z' in b.data yield e._shutdown() _test_cluster(f)
def test_map_naming(): @gen.coroutine def f(c, a, b): e = Executor((c.ip, c.port), start=False) L1 = e.map(inc, range(5)) L2 = e.map(inc, range(5)) assert [x.key for x in L1] == [x.key for x in L2] L3 = e.map(inc, [1, 1, 1, 1]) assert len({x.event for x in L3}) == 1 L4 = e.map(inc, [1, 1, 1, 1], pure=False) assert len({x.event for x in L4}) == 4 _test_cluster(f)
def test_get(): @gen.coroutine def f(c, a, b): e = Executor((c.ip, c.port), start=False) IOLoop.current().spawn_callback(e._go) result = yield e._get({'x': (inc, 1)}, 'x') assert result == 2 result = yield e._get({'x': (inc, 1)}, ['x']) assert result == [2] result = yield e._get({}, []) assert result == [] yield e._shutdown() _test_cluster(f)
def test_gc(): @gen.coroutine def f(c, a, b): e = Executor((c.ip, c.port), start=False) IOLoop.current().spawn_callback(e._go) x = e.submit(inc, 10) result = yield x._result() assert c.who_has[x.key] x.__del__() yield e._shutdown() assert not c.who_has[x.key] _test_cluster(f)
def dont_test_bad_restrictions_raise_exception(): @gen.coroutine def f(c, a, b): e = Executor((c.ip, c.port), start=False) IOLoop.current().spawn_callback(e._go) z = e.submit(inc, 2, workers={'bad-address'}) try: yield z._result() assert False except ValueError as e: assert 'bad-address' in str(e) assert z.key in str(e) yield e._shutdown() _test_cluster(f)
def test_gather(): @gen.coroutine def f(c, a, b): e = Executor((c.ip, c.port), start=False) IOLoop.current().spawn_callback(e._go) x = e.submit(inc, 10) y = e.submit(inc, x) result = yield e._gather(x) assert result == 11 result = yield e._gather([x]) assert result == [11] result = yield e._gather({'x': x, 'y': [y]}) assert result == {'x': 11, 'y': [12]} yield e._shutdown() _test_cluster(f)
def test_TextProgressBar_empty(loop, capsys): @gen.coroutine def f(c, a, b): s = Scheduler((c.ip, c.port), loop=loop) yield s.sync_center() done = s.start(0) progress = TextProgressBar([], scheduler=(s.ip, s.port), start=False, interval=0.01) yield progress.listen() assert progress.status == 'finished' check_bar_completed(capsys) s.close() yield done _test_cluster(f, loop)
def test_scatter_delete(loop): @gen.coroutine def f(c, a, b): data = yield _scatter((c.ip, c.port), [1, 2, 3]) assert c.ip in str(data[0]) assert c.ip in repr(data[0]) assert merge(a.data, b.data) == \ {d.key: i for d, i in zip(data, [1, 2, 3])} assert set(c.who_has) == {d.key for d in data} assert all(len(v) == 1 for v in c.who_has.values()) result = yield [d._get() for d in data] assert result == [1, 2, 3] yield data[0]._delete() assert merge(a.data, b.data) == \ {d.key: i for d, i in zip(data[1:], [2, 3])} assert data[0].key not in c.who_has data, who_has, nbytes = yield scatter_to_workers( (c.ip, c.port), [a.address, b.address], [4, 5, 6]) m = merge(a.data, b.data) for d, v in zip(data, [4, 5, 6]): assert m[d.key] == v assert isinstance(who_has, dict) assert set(concat(who_has.values())) == {a.address, b.address} assert len(who_has) == len(data) assert isinstance(nbytes, dict) assert set(nbytes) == set(who_has) assert all(isinstance(v, int) for v in nbytes.values()) result = yield _gather((c.ip, c.port), data) assert result == [4, 5, 6] _test_cluster(f)
def test_wait(): @gen.coroutine def f(c, a, b): e = Executor((c.ip, c.port), start=False) IOLoop.current().spawn_callback(e._go) a = e.submit(inc, 1) b = e.submit(inc, 1) c = e.submit(inc, 2) done, not_done = yield _wait([a, b, c]) assert done == {a, b, c} assert not_done == set() assert a.status == b.status == 'finished' yield e._shutdown() _test_cluster(f)
def test_restrictions_submit(): @gen.coroutine def f(c, a, b): e = Executor((c.ip, c.port), start=False) IOLoop.current().spawn_callback(e._go) x = e.submit(inc, 1, workers={a.ip}) y = e.submit(inc, x, workers={b.ip}) yield _wait([x, y]) assert e.restrictions[x.key] == {a.ip} assert x.key in a.data assert e.restrictions[y.key] == {b.ip} assert y.key in b.data yield e._shutdown() _test_cluster(f)
def test_missing_worker(): @gen.coroutine def f(c, a, b): bad = ('bad-host', 8788) c.ncores[bad] = 4 c.who_has['b'] = {bad} c.has_what[bad] = {'b'} e = Executor((c.ip, c.port), start=False) IOLoop.current().spawn_callback(e._go) dsk = {'a': 1, 'b': (inc, 'a'), 'c': (inc, 'b')} result = yield e._get(dsk, 'c') assert result == 3 assert bad not in e.ncores yield e._shutdown() _test_cluster(f)
def test_workers_update_center(loop): @gen.coroutine def f(c, a, b): aa = rpc(ip=a.ip, port=a.port) response = yield aa.update_data(data={'x': dumps(1), 'y': dumps(2)}) assert response['status'] == 'OK' assert response['nbytes'] == {'x': sizeof(1), 'y': sizeof(2)} assert a.data == {'x': 1, 'y': 2} assert c.who_has == {'x': {a.address}, 'y': {a.address}} assert c.has_what[a.address] == {'x', 'y'} yield aa.delete_data(keys=['x'], close=True) assert not c.who_has['x'] assert all('x' not in s for s in c.has_what.values()) aa.close_streams() _test_cluster(f)
def test__as_completed(): @gen.coroutine def f(c, a, b): e = Executor((c.ip, c.port), start=False) IOLoop.current().spawn_callback(e._go) a = e.submit(inc, 1) b = e.submit(inc, 1) c = e.submit(inc, 2) from distributed.compatibility import Queue queue = Queue() yield _as_completed([a, b, c], queue) assert queue.qsize() == 3 assert {queue.get(), queue.get(), queue.get()} == {a, b, c} yield e._shutdown() _test_cluster(f)
def test_stress_1(): @gen.coroutine def f(c, a, b): e = Executor((c.ip, c.port), start=False) IOLoop.current().spawn_callback(e._go) n = 2**6 seq = e.map(inc, range(n)) while len(seq) > 1: yield gen.sleep(0.1) seq = [ e.submit(add, seq[i], seq[i + 1]) for i in range(0, len(seq), 2) ] result = yield seq[0]._result() assert result == sum(map(inc, range(n))) yield e._shutdown() _test_cluster(f)
def test_gather_robust_to_missing_data(): @gen.coroutine def f(c, a, b): e = Executor((c.ip, c.port), start=False) IOLoop.current().spawn_callback(e._go) x, y, z = e.map(inc, range(3)) yield _wait([x, y, z]) # everything computed for q in [x, y]: if q.key in a.data: del a.data[q.key] if q.key in b.data: del b.data[q.key] xx, yy, zz = yield e._gather([x, y, z]) assert (xx, yy, zz) == (1, 2, 3) yield e._shutdown() _test_cluster(f)
def dont_test_delete_data_with_missing_worker(loop): @gen.coroutine def f(c, a, b): bad = '127.0.0.1:9001' # this worker doesn't exist c.who_has['z'].add(bad) c.who_has['z'].add(a.address) c.has_what[bad].add('z') c.has_what[a.address].add('z') a.data['z'] = 5 cc = rpc(ip=c.ip, port=c.port) yield cc.delete_data(keys=['z']) # TODO: this hangs for a while assert 'z' not in a.data assert not c.who_has['z'] assert not c.has_what[bad] assert not c.has_what[a.address] cc.close_streams() _test_cluster(f)
def test_exceptions(): @gen.coroutine def f(c, a, b): e = Executor((c.ip, c.port), start=False) IOLoop.current().spawn_callback(e._go) x = e.submit(div, 1, 2) result = yield x._result() assert result == 1 / 2 x = e.submit(div, 1, 0) with pytest.raises(ZeroDivisionError): result = yield x._result() x = e.submit(div, 10, 2) # continues to operate result = yield x._result() assert result == 10 / 2 yield e._shutdown() _test_cluster(f)
def test_garbage_collection(loop): @gen.coroutine def f(c, a, b): import gc gc.collect() RemoteData.trash[(c.ip, c.port)].clear() remote = yield _scatter((c.ip, c.port), [1, 2, 3]) keys = [r.key for r in remote] assert set(keys) == set(a.data) | set(b.data) for r in remote: r.__del__() assert RemoteData.trash[(c.ip, c.port)] == set(keys) n = yield RemoteData._garbage_collect(c.ip, c.port) assert set() == set(a.data) | set(b.data) assert n == len(keys) _test_cluster(f)
def test_submit(): @gen.coroutine def f(c, a, b): e = Executor((c.ip, c.port), start=False) IOLoop.current().spawn_callback(e._go) x = e.submit(inc, 10) assert not x.done() assert isinstance(x, Future) assert x.executor is e result = yield x._result() assert result == 11 assert x.done() y = e.submit(inc, 20) z = e.submit(add, x, y) result = yield z._result() assert result == 11 + 21 yield e._shutdown() assert c.who_has[z.key] _test_cluster(f)
def test_diagnostic(loop): @gen.coroutine def f(c, a, b): s = Scheduler((c.ip, c.port), loop=loop) yield s.sync_center() done = s.start(0) sched, report = Queue(), Queue(); s.handle_queues(sched, report) msg = yield report.get(); assert msg['op'] == 'stream-start' class Counter(SchedulerPlugin): def start(self, scheduler): scheduler.add_plugin(self) self.count = 0 def task_finished(self, scheduler, key, worker, nbytes): self.count += 1 counter = Counter() counter.start(s) assert counter.count == 0 sched.put_nowait({'op': 'update-graph', 'tasks': {'x': (inc, 1), 'y': (inc, 'x'), 'z': (inc, 'y')}, 'dependencies': {'y': {'x'}, 'z': {'y'}}, 'keys': ['z']}) while True: msg = yield report.get() if msg['op'] == 'key-in-memory' and msg['key'] == 'z': break assert counter.count == 3 sched.put_nowait({'op': 'close'}) yield done _test_cluster(f, loop)
def test__futures_to_dask_dataframe(): dfs = [pd.DataFrame({'x': [1, 2, 3]}, index=[0, 10, 20]), pd.DataFrame({'x': [4, 5, 6]}, index=[30, 40, 50]), pd.DataFrame({'x': [7, 8, 9]}, index=[60, 70, 80])] @gen.coroutine def f(c, a, b): e = Executor((c.ip, c.port), start=False) IOLoop.current().spawn_callback(e._go) remote_dfs = e.map(lambda x: x, dfs) ddf = yield _futures_to_dask_dataframe(e, remote_dfs, divisions=True) assert isinstance(ddf, dd.DataFrame) assert ddf.divisions == (0, 30, 60, 80) expr = ddf.x.sum() result = yield e._get(expr.dask, expr._keys()) assert result == [sum([df.x.sum() for df in dfs])] yield e._shutdown() _test_cluster(f)