def test_eventstream(c, s, *workers): pytest.importorskip('bokeh') es = EventStream() s.add_plugin(es) assert es.buffer == [] futures = c.map(div, [1] * 10, range(10)) total = c.submit(sum, futures[1:]) yield wait(total) yield wait(futures) assert len(es.buffer) == 11 from distributed.bokeh import messages from distributed.diagnostics.progress_stream import task_stream_append lists = deepcopy(messages['task-events']['rectangles']) workers = dict() for msg in es.buffer: task_stream_append(lists, msg, workers) assert len([n for n in lists['name'] if n.startswith('transfer')]) == 2 for name, color in zip(lists['name'], lists['color']): if name == 'transfer': assert color == 'red' assert any(c == 'black' for c in lists['color'])
def test_client_sync(client): with get_task_stream(client=client) as ts: sleep(0.1) # to smooth over time differences on the scheduler # to smooth over time differences on the scheduler futures = client.map(inc, range(10)) wait(futures) assert len(ts.data) == 10
def test_progressbar_cancel(client): import time L = [client.submit(lambda: time.sleep(0.3), i) for i in range(5)] p = ProgressWidget(L) client.sync(p.listen) L[-1].cancel() wait(L[:-1]) assert p.status == 'error' assert p.bar.value == 0 # no tasks finish before cancel is called
def test_dataframe_set_index_sync(wait, client): df = dd.demo.make_timeseries('2000', '2001', {'value': float, 'name': str, 'id': int}, freq='2H', partition_freq='1M', seed=1) df = client.persist(df) wait(df) df2 = df.set_index('name', shuffle='tasks') df2 = client.persist(df2) assert len(df2)
def test_progressbar_cancel(loop): with cluster() as (s, [a, b]): with Client(('127.0.0.1', s['port']), loop=loop) as c: import time L = [c.submit(lambda: time.sleep(0.3), i) for i in range(5)] p = ProgressWidget(L) sync(loop, p.listen) L[-1].cancel() wait(L[:-1]) assert p.status == 'error' assert p.bar.value == 0 # no tasks finish before cancel is called
def test_persist(c, s, a, b): x = delayed(inc)(1) x2, = persist(x) yield wait(x2) assert x2.key in a.data or x2.key in b.data y = delayed(inc)(10) y2, one = persist(y, 1) yield wait(y2) assert y2.key in a.data or y2.key in b.data
def test_get_task_stream_save(client, tmpdir): bokeh = pytest.importorskip('bokeh') tmpdir = str(tmpdir) fn = os.path.join(tmpdir, 'foo.html') with get_task_stream(plot='save', filename=fn) as ts: wait(client.map(inc, range(10))) with open(fn) as f: data = f.read() assert 'inc' in data assert 'bokeh' in data assert isinstance(ts.figure, bokeh.plotting.Figure)
def test_compute(c, s, a, b): x = delayed(inc)(1) y = delayed(inc)(x) yy = c.compute(y, resources={x: {'A': 1}, y: {'B': 1}}) yield wait(yy) assert b.data xs = [delayed(inc)(i) for i in range(10, 20)] xxs = c.compute(xs, resources={'B': 1}) yield wait(xxs) assert len(b.data) > 10
def test_dataframe_set_index_sync(loop, wait): with cluster() as (c, [a, b]): with Client(("127.0.0.1", c["port"]), loop=loop) as c: with dask.set_options(get=c.get): df = dd.demo.make_timeseries( "2000", "2001", {"value": float, "name": str, "id": int}, freq="2H", partition_freq="1M", seed=1 ) df = c.persist(df) wait(df) df2 = df.set_index("name", shuffle="tasks") df2 = c.persist(df2) assert len(df2)
def test_values(client): L = [client.submit(inc, i) for i in range(5)] wait(L) p = MultiProgressWidget(L) client.sync(p.listen) assert set(p.bars) == {'inc'} assert p.status == 'finished' assert p.comm.closed() assert '5 / 5' in p.bar_texts['inc'].value assert p.bars['inc'].value == 1.0 x = client.submit(throws, 1) p = MultiProgressWidget([x]) client.sync(p.listen) assert p.status == 'error'
def test_move(c, s, a, b): [x] = yield c._scatter([1], workers=b.address) future = c.submit(inc, x, resources={'A': 1}) yield wait(future) assert a.data[future.key] == 2
def test_balance_resources(c, s, a, b): futures = c.map(slowinc, range(100), delay=0.1, workers=a.address) constrained = c.map(inc, range(2), resources={'A': 1}) yield wait(constrained) assert any(f.key in a.data for f in constrained) # share assert any(f.key in b.data for f in constrained)
def test_work_stealing(c, s, a, b): [x] = yield c._scatter([1], workers=a.address) futures = c.map(slowadd, range(50), [x] * 50, delay=0.1) yield gen.sleep(0.1) yield wait(futures) assert len(a.data) > 10 assert len(b.data) > 10
def test_TaskStreamPlugin(c, s, *workers): es = TaskStreamPlugin(s) assert not es.buffer futures = c.map(div, [1] * 10, range(10)) total = c.submit(sum, futures[1:]) yield wait(total) assert len(es.buffer) == 11 workers = dict() rects = es.rectangles(0, 10, workers) assert workers assert all(n == 'div' for n in rects['name']) assert all(d > 0 for d in rects['duration']) counts = frequencies(rects['color']) assert counts['black'] == 1 assert set(counts.values()) == {9, 1} assert len(set(rects['y'])) == 3 rects = es.rectangles(2, 5, workers) assert all(len(L) == 3 for L in rects.values()) starts = sorted(rects['start']) rects = es.rectangles(2, 5, workers=workers, start_boundary=(starts[0] + starts[1]) / 2000) assert set(rects['start']).issubset(set(starts[1:]))
def test_collect(c, s, a, b): tasks = TaskStreamPlugin(s) start = time() futures = c.map(slowinc, range(10), delay=0.1) yield wait(futures) L = tasks.collect() assert len(L) == len(futures) L = tasks.collect(start=start) assert len(L) == len(futures) L = tasks.collect(start=start + 0.2) assert 4 <= len(L) <= len(futures) L = tasks.collect(start='20 s') assert len(L) == len(futures) L = tasks.collect(start='500ms') assert 0 < len(L) <= len(futures) L = tasks.collect(count=3) assert len(L) == 3 assert L == list(tasks.buffer)[-3:] assert tasks.collect(stop=start + 100, count=3) == tasks.collect(count=3) assert tasks.collect(start=start, count=3) == list(tasks.buffer)[:3]
def test_values(loop): with cluster() as (s, [a, b]): with Client(('127.0.0.1', s['port']), loop=loop) as c: L = [c.submit(inc, i) for i in range(5)] wait(L) p = MultiProgressWidget(L) sync(loop, p.listen) assert set(p.bars) == {'inc'} assert p.status == 'finished' assert p.stream.closed() assert '5 / 5' in p.bar_texts['inc'].value assert p.bars['inc'].value == 1.0 x = c.submit(throws, 1) p = MultiProgressWidget([x]) sync(loop, p.listen) assert p.status == 'error'
def test_dont_work_steal(c, s, a, b): [x] = yield c._scatter([1], workers=a.address) futures = [c.submit(slowadd, x, i, resources={'A': 1}, delay=0.05) for i in range(10)] yield wait(futures) assert all(f.key in a.data for f in futures)
def test_minimum_resource(c, s, a): futures = c.map(slowinc, range(30), resources={'A': 1, 'B': 1}, delay=0.02) while len(a.data) < 30: yield gen.sleep(0.01) assert len(a.executing) <= 1 yield wait(futures) assert a.total_resources == a.available_resources
def test_get_task_stream_plot(c, s, a, b): bokeh = pytest.importorskip('bokeh') yield c.get_task_stream() futures = c.map(slowinc, range(10), delay=0.1) yield wait(futures) data, figure = yield c.get_task_stream(plot=True) assert isinstance(figure, bokeh.plotting.Figure)
def test_persist(c, s, a, b): x = delayed(inc)(1) y = delayed(inc)(x) xx, yy = c.persist([x, y], resources={x: {'A': 1}, y: {'B': 1}}) yield wait([xx, yy]) assert x.key in a.data assert y.key in b.data
def test_progressbar_done(loop): with cluster() as (s, [a, b]): with Client(('127.0.0.1', s['port']), loop=loop) as c: L = [c.submit(inc, i) for i in range(5)] wait(L) p = ProgressWidget(L) sync(loop, p.listen) assert p.status == 'finished' assert p.bar.value == 1.0 assert p.bar.bar_style == 'success' f = c.submit(throws, L) wait([f]) p = ProgressWidget([f]) sync(loop, p.listen) assert p.status == 'error' assert p.bar.value == 0.0 assert p.bar.bar_style == 'danger'
def test_client(c, s, a, b): L = yield c.get_task_stream() assert L == () futures = c.map(slowinc, range(10), delay=0.1) yield wait(futures) tasks = [p for p in s.plugins if isinstance(p, TaskStreamPlugin)][0] L = yield c.get_task_stream() assert L == tuple(tasks.buffer)
def test_persist_tuple(c, s, a, b): x = delayed(inc)(1) y = delayed(inc)(x) xx, yy = c.persist([x, y], resources={(x, y): {'A': 1}}) yield wait([xx, yy]) assert x.key in a.data assert y.key in a.data assert not b.data
def test_submit_many_non_overlapping(c, s, a, b): futures = c.map(slowinc, range(100), resources={'A': 1}, delay=0.02) while len(a.data) + len(b.data) < 100: yield gen.sleep(0.01) assert len(a.executing) <= 2 assert len(b.executing) <= 1 yield wait(futures) assert a.total_resources == a.available_resources assert b.total_resources == b.available_resources
def test_dataframe_set_index_sync(loop, wait): with cluster() as (s, [a, b]): with Client(s['address'], loop=loop) as c: with dask.set_options(get=c.get): df = dd.demo.make_timeseries('2000', '2001', { 'value': float, 'name': str, 'id': int }, freq='2H', partition_freq='1M', seed=1) df = c.persist(df) wait(df) df2 = df.set_index('name', shuffle='tasks') df2 = c.persist(df2) assert len(df2)
def test_resource_submit(c, s, a, b): x = c.submit(inc, 1, resources={'A': 3}) y = c.submit(inc, 2, resources={'B': 1}) z = c.submit(inc, 3, resources={'C': 2}) yield wait(x) assert x.key in a.data yield wait(y) assert y.key in b.data assert s.get_task_status(keys=[z.key]) == {z.key: 'no-worker'} d = Worker(s.ip, s.port, loop=s.loop, resources={'C': 10}) yield d._start() yield wait(z) assert z.key in d.data yield d._close()
def test_dataframe_set_index_sync(wait, client): df = dd.demo.make_timeseries( "2000", "2001", { "value": float, "name": str, "id": int }, freq="2H", partition_freq="1M", seed=1, ) df = client.persist(df) wait(df) df2 = df.set_index("name", shuffle="tasks") df2 = client.persist(df2) assert len(df2)
def test_resource_submit(c, s, a, b): x = c.submit(inc, 1, resources={'A': 3}) y = c.submit(inc, 2, resources={'B': 1}) z = c.submit(inc, 3, resources={'C': 2}) yield wait(x) assert x.key in a.data yield wait(y) assert y.key in b.data assert z.key in s.unrunnable d = Worker(s.ip, s.port, loop=s.loop, resources={'C': 10}) yield d._start() yield wait(z) assert z.key in d.data yield d._close()
def test_dataframe_set_index_sync(wait, client): df = dask.datasets.timeseries( start="2000", end="2001", dtypes={ "value": float, "name": str, "id": int }, freq="2H", partition_freq="1M", seed=1, ) df = df.persist() wait(df) df2 = df.set_index("name", shuffle="tasks") df2 = df2.persist() assert len(df2)
def test_task_stream_clear_interval(c, s, a, b): ts = TaskStream(s, clear_interval=200) yield wait(c.map(inc, range(10))) ts.update() yield gen.sleep(0.010) yield wait(c.map(dec, range(10))) ts.update() assert len(set(map(len, ts.source.data.values()))) == 1 assert ts.source.data['name'].count('inc') == 10 assert ts.source.data['name'].count('dec') == 10 yield gen.sleep(0.300) yield wait(c.map(inc, range(10, 20))) ts.update() assert len(set(map(len, ts.source.data.values()))) == 1 assert ts.source.data['name'].count('inc') == 10 assert ts.source.data['name'].count('dec') == 0
def test_submit_many_non_overlapping(c, s, a, b): futures = c.map(slowinc, range(100), resources={"A": 1}, delay=0.02) while len(a.data) + len(b.data) < 100: yield gen.sleep(0.01) assert len(a.executing) <= 2 assert len(b.executing) <= 1 yield wait(futures) assert a.total_resources == a.available_resources assert b.total_resources == b.available_resources
def test_persist_tuple(c, s, a, b): x = delayed(inc)(1) y = delayed(inc)(x) xx, yy = c.persist([x, y], resources={(x, y): {"A": 1}}) yield wait([xx, yy]) assert x.key in a.data assert y.key in a.data assert not b.data
def test_CurrentLoad(c, s, a, b): cl = CurrentLoad(s) futures = c.map(slowinc, range(10), delay=0.001) yield wait(futures) cl.update() d = dict(cl.source.data) assert all(len(L) == 2 for L in d.values()) assert all(d['nbytes'])
def test_tls(c, s, a, b): x = c.submit(inc, 1) y = c.submit(inc, x) z = c.submit(inc, y) yield wait(z) progress = ProgressWidget([z], scheduler=s.address, complete=True) yield progress.listen() assert progress.bar.value == 1.0 assert '3 / 3' in progress.bar_text.value
def test_dataset_grid_results(self): examples, labels = _create_dataset() ds, _ = DataSet.objects.get_or_create( name='TEST', examples=SimpleUploadedFile(examples.name, examples.read()), labels=SimpleUploadedFile(labels.name, labels.read())) gs = ATGridSearchCV(tree.DecisionTreeClassifier(), { 'criterion': ['gini', 'entropy'], 'max_depth': range(1, 21), 'max_features': ['auto', 'log2', 'sqrt', None] }, dataset=ds.pk, webserver_url=self.live_server_url) wait(gs.fit()) client = DjangoClient() response = client.get( reverse('grid_results', kwargs={'uuid': gs._uuid})) self.assertEqual(200, response.status_code) self.assertEqual( GridSearch.objects.get(uuid=gs._uuid).results.all().count(), len(response.data))
def test_worksteal_many_thieves(c, s, *workers): x = c.submit(slowinc, -1, delay=0.1) yield x xs = c.map(slowinc, [x] * 100, pure=False, delay=0.1) yield wait(xs) for w, keys in s.has_what.items(): assert 2 < len(keys) < 30 assert sum(map(len, s.has_what.values())) < 150
def test_prefer_constrained(c, s, a): futures = c.map(slowinc, range(1000), delay=0.1) constrained = c.map(inc, range(10), resources={'A': 1}) start = time() yield wait(constrained) end = time() assert end - start < 4 has_what = dict(s.has_what) processing = dict(s.processing) assert len(has_what) < len(constrained) + 2 # at most two slowinc's finished assert s.processing[a.address]
def test_persist_collections(c, s, a, b): da = pytest.importorskip('dask.array') x = da.arange(10, chunks=(5,)) y = x.map_blocks(lambda x: x + 1) z = y.map_blocks(lambda x: 2 * x) w = z.sum() ww, yy = c.persist([w, y], resources={tuple(y.__dask_keys__()): {'A': 1}}) yield wait([ww, yy]) assert all(tokey(key) in a.data for key in y.__dask_keys__())
def test_progressbar_done(loop): with cluster() as (s, [a, b]): with Client(s['address'], loop=loop) as c: L = [c.submit(inc, i) for i in range(5)] wait(L) p = ProgressWidget(L) sync(loop, p.listen) assert p.status == 'finished' assert p.bar.value == 1.0 assert p.bar.bar_style == 'success' assert 'Finished' in p.elapsed_time.value f = c.submit(throws, L) wait([f]) p = ProgressWidget([f]) sync(loop, p.listen) assert p.status == 'error' assert p.bar.value == 0.0 assert p.bar.bar_style == 'danger' assert 'Exception' in p.elapsed_time.value
def test_write_bytes(c, s, a, b): with make_hdfs() as (hdfs, basedir): hdfs.mkdir('%s/data/' % basedir) data = [b'123', b'456', b'789'] remote_data = yield c._scatter(data) futures = c.compute( write_bytes(remote_data, 'hdfs://%s/data/file.*.dat' % basedir)) yield wait(futures) yield futures[0] assert len(hdfs.ls('%s/data/' % basedir)) == 3 with hdfs.open('%s/data/file.1.dat' % basedir) as f: assert f.read() == b'456' hdfs.mkdir('%s/data2/' % basedir) futures = c.compute( write_bytes(remote_data, 'hdfs://%s/data2/' % basedir)) yield wait(futures) assert len(hdfs.ls('%s/data2/' % basedir)) == 3
def test_AllProgress_lost_key(c, s, a, b, timeout=None): p = AllProgress(s) futures = c.map(inc, range(5)) yield wait(futures) assert len(p.state['memory']['inc']) == 5 yield a._close() yield b._close() start = time() while len(p.state['memory']['inc']) > 0: yield gen.sleep(0.1) assert time() < start + 2
def test_TaskGraph(c, s, a, b): gp = TaskGraph(s) futures = c.map(inc, range(5)) total = c.submit(sum, futures) yield total gp.update() assert set(map(len, gp.node_source.data.values())) == {6} assert set(map(len, gp.edge_source.data.values())) == {5} json.dumps(gp.edge_source.data) json.dumps(gp.node_source.data) da = pytest.importorskip("dask.array") x = da.random.random((20, 20), chunks=(10, 10)).persist() y = (x + x.T) - x.mean(axis=0) y = y.persist() yield wait(y) gp.update() gp.update() yield c.compute((x + y).sum()) gp.update() future = c.submit(inc, 10) future2 = c.submit(inc, future) yield wait(future2) key = future.key del future, future2 while key in s.tasks: yield gen.sleep(0.01) assert "memory" in gp.node_source.data["state"] gp.update() gp.update() assert not all(x == "False" for x in gp.edge_source.data["visible"])
def test_AllProgress_lost_key(c, s, a, b, timeout=None): p = AllProgress(s) futures = c.map(inc, range(5)) yield wait(futures) assert len(p.state["memory"]["inc"]) == 5 yield a._close() yield b._close() start = time() while len(p.state["memory"]["inc"]) > 0: yield gen.sleep(0.1) assert time() < start + 5
def test_progressbar_widget(c, s, a, b): x = c.submit(inc, 1) y = c.submit(inc, x) z = c.submit(inc, y) yield wait(z) progress = ProgressWidget([z.key], scheduler=(s.ip, s.port), complete=True) yield progress.listen() assert progress.bar.value == 1.0 assert '3 / 3' in progress.bar_text.value progress = ProgressWidget([z.key], scheduler=(s.ip, s.port)) yield progress.listen()
def test_task_stream(c, s, a, b): ts = TaskStream(s) futures = c.map(slowinc, range(10), delay=0.001) yield wait(futures) ts.update() d = dict(ts.source.data) assert all(len(L) == 10 for L in d.values()) assert min(d["start"]) == 0 # zero based ts.update() d = dict(ts.source.data) assert all(len(L) == 10 for L in d.values()) total = c.submit(sum, futures) yield wait(total) ts.update() d = dict(ts.source.data) assert len(set(map(len, d.values()))) == 1
def test_pandas_input(self): # check cross_val_score doesn't destroy pandas dataframe types = [(MockDataFrame, MockDataFrame)] try: from pandas import Series, DataFrame types.append((DataFrame, Series)) except ImportError: pass X = np.arange(100).reshape(10, 10) y = np.array([0] * 5 + [1] * 5) for InputFeatureType, TargetType in types: # X dataframe, y series X_df, y_ser = InputFeatureType(X), TargetType(y) check_df = lambda x: isinstance(x, InputFeatureType) check_series = lambda x: isinstance(x, TargetType) clf = CheckingClassifier(check_X=check_df, check_y=check_series) grid_search = ATGridSearchCV(clf, {'foo_param': [1, 2, 3]}, webserver_url=self.live_server_url) wait(grid_search.fit(X_df, y_ser)) assert_true(hasattr(grid_search, "grid_scores_"))
def test_grid_search_sparse(self): # Test that grid search works with both dense and sparse matrices X_, y_ = make_classification(n_samples=200, n_features=100, random_state=0) clf = LinearSVC() cv = ATGridSearchCV(clf, {'C': [0.1, 1.0]}, webserver_url=self.live_server_url) wait(cv.fit(X_[:180], y_[:180])) y_pred = cv.best_estimator_.predict(X_[180:]) C = cv.best_estimator_.C X_ = sp.csr_matrix(X_) clf = LinearSVC() cv = ATGridSearchCV(clf, {'C': [0.1, 1.0]}, webserver_url=self.live_server_url) wait(cv.fit(X_[:180].tocoo(), y_[:180])) y_pred2 = cv.best_estimator_.predict(X_[180:]) C2 = cv.best_estimator_.C assert_true(np.mean(y_pred == y_pred2) >= .9) assert_equal(C, C2)
def test_TaskProgress_empty(c, s, a, b): tp = TaskProgress(s) tp.update() futures = [c.submit(inc, i, key="f-" + "a" * i) for i in range(20)] yield wait(futures) tp.update() del futures while s.tasks: yield gen.sleep(0.01) tp.update() assert not any(len(v) for v in tp.source.data.values())
def test_progressbar_widget(c, s, a, b): x = c.submit(inc, 1) y = c.submit(inc, x) z = c.submit(inc, y) yield wait(z) progress = ProgressWidget([z.key], scheduler=s.address, complete=True) yield progress.listen() assert progress.bar.value == 1.0 assert "3 / 3" in progress.bar_text.value progress = ProgressWidget([z.key], scheduler=s.address) yield progress.listen()
def test_grid_search_no_score(self): # Test grid-search on classifier that has no score function. clf = LinearSVC(random_state=0) X, y = make_blobs(random_state=0, centers=2) Cs = [.1, 1, 10] clf_no_score = LinearSVCNoScore(random_state=0) grid_search = ATGridSearchCV(clf, {'C': Cs}, scoring='accuracy', webserver_url=self.live_server_url) wait(grid_search.fit(X, y)) grid_search_no_score = ATGridSearchCV( clf_no_score, {'C': Cs}, scoring='accuracy', webserver_url=self.live_server_url) # smoketest grid search wait(grid_search_no_score.fit(X, y)) # check that best params are equal try: assert_equal(grid_search_no_score.best_params_, grid_search.best_params_) except AssertionError: if grid_search.best_params_ == {'C': 1}: assert_equal(grid_search_no_score.best_params_, {'C': 10}) else: assert_equal(grid_search_no_score.best_params_, {'C': 1}) # check that we can call score and that it gives the correct result assert_equal(grid_search.score(X, y), grid_search_no_score.score(X, y)) # giving no scoring function raises an error grid_search_no_score = ATGridSearchCV( clf_no_score, {'C': Cs}, webserver_url=self.live_server_url) assert_raise_message(TypeError, "no scoring", grid_search_no_score.fit, [[1]], webserver_url=self.live_server_url)
def test_GraphPlot(c, s, a, b): gp = GraphPlot(s) futures = c.map(inc, range(5)) total = c.submit(sum, futures) yield total gp.update() assert set(map(len, gp.node_source.data.values())) == {6} assert set(map(len, gp.edge_source.data.values())) == {5} da = pytest.importorskip('dask.array') x = da.random.random((20, 20), chunks=(10, 10)).persist() y = (x + x.T) - x.mean(axis=0) y = y.persist() yield wait(y) gp.update() gp.update() yield c.compute((x + y).sum()) gp.update() future = c.submit(inc, 10) future2 = c.submit(inc, future) yield wait(future2) key = future.key del future, future2 while key in s.tasks: yield gen.sleep(0.01) assert 'memory' in gp.node_source.data['state'] gp.update() gp.update() assert not all(x == 'False' for x in gp.edge_source.data['visible'])
def test_TaskGraph_limit(c, s, a, b): gp = TaskGraph(s) def func(x): return x f1 = c.submit(func, 1) yield wait(f1) gp.update() assert len(gp.node_source.data["x"]) == 1 f2 = c.submit(func, 2) yield wait(f2) gp.update() assert len(gp.node_source.data["x"]) == 2 f3 = c.submit(func, 3) yield wait(f3) gp.update() assert len(gp.node_source.data["x"]) == 2 del f1 del f2 del f3 _ = c.submit(func, 1) async_wait_for(lambda: len(gp.node_source.data["x"]) == 1, timeout=1)
def test_resources_str(c, s, a, b): pd = pytest.importorskip("pandas") dd = pytest.importorskip("dask.dataframe") yield a.set_resources(MyRes=1) x = dd.from_pandas(pd.DataFrame({"A": [1, 2], "B": [3, 4]}), npartitions=1) y = x.apply(lambda row: row.sum(), axis=1, meta=(None, "int64")) yy = y.persist(resources={"MyRes": 1}) yield wait(yy) ts_first = s.tasks[tokey(y.__dask_keys__()[0])] assert ts_first.resource_restrictions == {"MyRes": 1} ts_last = s.tasks[tokey(y.__dask_keys__()[-1])] assert ts_last.resource_restrictions == {"MyRes": 1}
def test_progressbar_done(client): L = [client.submit(inc, i) for i in range(5)] wait(L) p = ProgressWidget(L) client.sync(p.listen) assert p.status == 'finished' assert p.bar.value == 1.0 assert p.bar.bar_style == 'success' assert 'Finished' in p.elapsed_time.value f = client.submit(throws, L) wait([f]) p = ProgressWidget([f]) client.sync(p.listen) assert p.status == 'error' assert p.bar.value == 0.0 assert p.bar.bar_style == 'danger' assert 'Exception' in p.elapsed_time.value try: throws(1) except Exception as e: assert repr(e) in p.elapsed_time.value
def test_progressbar_done(client): L = [client.submit(inc, i) for i in range(5)] wait(L) p = ProgressWidget(L) client.sync(p.listen) assert p.status == "finished" assert p.bar.value == 1.0 assert p.bar.bar_style == "success" assert "Finished" in p.elapsed_time.value f = client.submit(throws, L) wait([f]) p = ProgressWidget([f]) client.sync(p.listen) assert p.status == "error" assert p.bar.value == 0.0 assert p.bar.bar_style == "danger" assert "Exception" in p.elapsed_time.value try: throws(1) except Exception as e: assert repr(e) in p.elapsed_time.value
def test_grid_search_score_method(self): X, y = make_classification(n_samples=100, n_classes=2, flip_y=.2, random_state=0) clf = LinearSVC(random_state=0) grid = {'C': [.1]} search_no_scoring = ATGridSearchCV(clf, grid, scoring=None, webserver_url=self.live_server_url) wait(search_no_scoring.fit(X, y)) search_accuracy = ATGridSearchCV(clf, grid, scoring='accuracy', webserver_url=self.live_server_url) wait(search_accuracy.fit(X, y)) search_no_score_method_auc = ATGridSearchCV( LinearSVCNoScore(), grid, scoring='roc_auc', webserver_url=self.live_server_url) wait(search_no_score_method_auc.fit(X, y)) search_auc = ATGridSearchCV(clf, grid, scoring='roc_auc', webserver_url=self.live_server_url) wait(search_auc.fit(X, y)) # ChangedBehaviourWarning occurred previously (prior to #9005) score_no_scoring = assert_no_warnings(search_no_scoring.score, X, y) score_accuracy = assert_no_warnings(search_accuracy.score, X, y) score_no_score_auc = assert_no_warnings( search_no_score_method_auc.score, X, y) score_auc = assert_no_warnings(search_auc.score, X, y) # ensure the test is sane assert_true(score_auc < 1.0) assert_true(score_accuracy < 1.0) assert_not_equal(score_auc, score_accuracy) assert_almost_equal(score_accuracy, score_no_scoring) assert_almost_equal(score_auc, score_no_score_auc)
def test_CommunicatingStream(c, s, a, b): aa = CommunicatingStream(a) bb = CommunicatingStream(b) xs = c.map(inc, range(10), workers=a.address) ys = c.map(dec, range(10), workers=b.address) adds = c.map(add, xs, ys, workers=a.address) subs = c.map(sub, xs, ys, workers=b.address) yield wait([adds, subs]) aa.update() bb.update() assert (len(first(aa.outgoing.data.values())) and len(first(bb.outgoing.data.values()))) assert (len(first(aa.incoming.data.values())) and len(first(bb.incoming.data.values())))