def test_prefer_cheap_dependent(): dsk = {'x': (f, 0.01, 10), 'y': (f, 0.000001, 1, 'x')} c = Cache(10000) with c: get_sync(dsk, 'y') assert c.cache.scorer.cost['x'] < c.cache.scorer.cost['y']
def test_prefer_cheap_dependent(): dsk = {"x": (f, 0.01, 10), "y": (f, 0.000001, 1, "x")} c = Cache(10000) with c: get_sync(dsk, "y") assert c.cache.scorer.cost["x"] < c.cache.scorer.cost["y"]
def test_cache_options(): cache = {} def inc2(x): assert "y" in cache return x + 1 with dask.config.set(cache=cache): get_sync({"x": (inc2, "y"), "y": 1}, "x")
def test_start_callback(): flag = [False] class MyCallback(Callback): def _start(self, dsk): flag[0] = True with MyCallback(): get_sync({'x': 1}, 'x') assert flag[0] is True
def test_start_state_callback(): flag = [False] class MyCallback(Callback): def _start_state(self, dsk, state): flag[0] = True assert dsk["x"] == 1 assert len(state["cache"]) == 1 with MyCallback(): get_sync({"x": 1}, "x") assert flag[0] is True
def test_ordering(): L = [] def append(i): L.append(i) dsk = {('x', i): (append, i) for i in range(10)} x_keys = sorted(dsk) dsk['y'] = (lambda *args: None, list(x_keys)) get_sync(dsk, 'y') assert L == sorted(L, reverse=True)
def test_cache_options(): try: from chest import Chest except ImportError: return cache = Chest() def inc2(x): assert 'y' in cache return x + 1 with dask.set_options(cache=cache): get_sync({'x': (inc2, 'y'), 'y': 1}, 'x')
def test_cache_options(): try: from chest import Chest except ImportError: return cache = Chest() def inc2(x): assert "y" in cache return x + 1 with dask.config.set(cache=cache): get_sync({"x": (inc2, "y"), "y": 1}, "x")
def test_start_state_callback(): flag = [False] class MyCallback(Callback): def _start_state(self, dsk, state): flag[0] = True assert dsk['x'] == 1 assert len(state['cache']) == 1 with MyCallback(): get_sync({'x': 1}, 'x') assert flag[0] is True
def test_ordering(): L = [] def append(i): L.append(i) dsk = {("x", i): (append, i) for i in range(10)} x_keys = sorted(dsk) dsk["y"] = (lambda *args: None, list(x_keys)) get_sync(dsk, "y") assert L == sorted(L)
def construct_multiple(graph, names, validate=True): """Construct multiple Blocks from given graph and endpoint names. """ # deserialize import paths where necessary and cast lists to tuples new_graph = {} for key, value in graph.items(): cls = value[0] if isinstance(cls, str): cls = Block.from_import_path(cls) if not issubclass(cls, Block): raise TypeError( "Cannot construct from object of type '{}'".format(cls)) args = tuple(value[1:]) if validate: new_graph[key] = (cls, ) + args else: token = _reconstruct_token(key) if token is None: logger.warning( "Construct received a key with an invalid name ('%s')," "while validation was turned off", key, ) new_graph[key] = (cls._init_no_validation, token) + args return get_sync(new_graph, names, pack_exception=_construct_exc_callback)
def test_SubgraphCallable(): non_hashable = [1, 2, 3] dsk = { 'a': (apply, add, ['in1', 2]), 'b': (apply, partial_by_order, ['in2'], { 'function': func_with_kwargs, 'other': [(1, 20)], 'c': 4 }), 'c': (apply, partial_by_order, ['in2', 'in1'], { 'function': func_with_kwargs, 'other': [(1, 20)] }), 'd': (inc, 'a'), 'e': (add, 'c', 'd'), 'f': ['a', 2, 'b', (add, 'b', (sum, non_hashable))], 'h': (add, (sum, 'f'), (sum, ['a', 'b'])) } f = SubgraphCallable(dsk, 'h', ['in1', 'in2'], name='test') assert f.name == 'test' assert repr(f) == 'test' dsk2 = dsk.copy() dsk2.update({'in1': 1, 'in2': 2}) assert f(1, 2) == get_sync(cull(dsk2, ['h'])[0], ['h'])[0] assert f(1, 2) == f(1, 2) f2 = pickle.loads(pickle.dumps(f)) assert f2(1, 2) == f(1, 2)
def custom_scheduler(*args, **kwargs): nonlocal using_custom_scheduler try: using_custom_scheduler = True return get_sync(*args, **kwargs) finally: using_custom_scheduler = False
def test_SubgraphCallable(): non_hashable = [1, 2, 3] dsk = { "a": (apply, add, ["in1", 2]), "b": ( apply, partial_by_order, ["in2"], {"function": func_with_kwargs, "other": [(1, 20)], "c": 4}, ), "c": ( apply, partial_by_order, ["in2", "in1"], {"function": func_with_kwargs, "other": [(1, 20)]}, ), "d": (inc, "a"), "e": (add, "c", "d"), "f": ["a", 2, "b", (add, "b", (sum, non_hashable))], "h": (add, (sum, "f"), (sum, ["a", "b"])), } f = SubgraphCallable(dsk, "h", ["in1", "in2"], name="test") assert f.name == "test" assert repr(f) == "test" dsk2 = dsk.copy() dsk2.update({"in1": 1, "in2": 2}) assert f(1, 2) == get_sync(cull(dsk2, ["h"])[0], ["h"])[0] assert f(1, 2) == f(1, 2) f2 = pickle.loads(pickle.dumps(f)) assert f2(1, 2) == f(1, 2)
def test_SubgraphCallable(): non_hashable = [1, 2, 3] dsk = {'a': (apply, add, ['in1', 2]), 'b': (apply, partial_by_order, ['in2'], {'function': func_with_kwargs, 'other': [(1, 20)], 'c': 4}), 'c': (apply, partial_by_order, ['in2', 'in1'], {'function': func_with_kwargs, 'other': [(1, 20)]}), 'd': (inc, 'a'), 'e': (add, 'c', 'd'), 'f': ['a', 2, 'b', (add, 'b', (sum, non_hashable))], 'g': (dontcall, 'in1'), 'h': (add, (sum, 'f'), (sum, ['a', 'b']))} f = SubgraphCallable(dsk, 'h', ['in1', 'in2'], name='test') assert f.name == 'test' assert repr(f) == 'test' dsk2 = dsk.copy() dsk2.update({'in1': 1, 'in2': 2}) assert f(1, 2) == get_sync(cull(dsk2, ['h'])[0], ['h'])[0] assert f(1, 2) == f(1, 2) f2 = pickle.loads(pickle.dumps(f)) assert f2(1, 2) == f(1, 2)
def test_complex_ordering(): da = pytest.importorskip("dask.array") from dask.diagnostics import Callback actual_order = [] def track_order(key, dask, state): actual_order.append(key) x = da.random.normal(size=(20, 20), chunks=(-1, -1)) res = (x.dot(x.T) - x.mean(axis=0)).std() dsk = dict(res.__dask_graph__()) exp_order_dict = order(dsk) exp_order = sorted(exp_order_dict.keys(), key=exp_order_dict.get) with Callback(pretask=track_order): get_sync(dsk, exp_order[-1]) assert actual_order == exp_order
def test_finish_always_called(): flag = [False] class MyCallback(Callback): def _finish(self, dsk, state, errored): flag[0] = True assert errored dsk = {'x': (lambda: 1 / 0, )} # `raise_on_exception=True` try: with MyCallback(): get_sync(dsk, 'x') except Exception as e: assert isinstance(e, ZeroDivisionError) assert flag[0] # `raise_on_exception=False` flag[0] = False try: with MyCallback(): get_threaded(dsk, 'x') except Exception as e: assert isinstance(e, ZeroDivisionError) assert flag[0] # KeyboardInterrupt def raise_keyboard(): raise KeyboardInterrupt() dsk = {'x': (raise_keyboard, )} flag[0] = False try: with MyCallback(): get_sync(dsk, 'x') except BaseException as e: assert isinstance(e, KeyboardInterrupt) assert flag[0]
def test_finish_always_called(): flag = [False] class MyCallback(Callback): def _finish(self, dsk, state, errored): flag[0] = True assert errored dsk = {'x': (lambda: 1 / 0,)} # `raise_on_exception=True` try: with MyCallback(): get_sync(dsk, 'x') except Exception as e: assert isinstance(e, ZeroDivisionError) assert flag[0] # `raise_on_exception=False` flag[0] = False try: with MyCallback(): get_threaded(dsk, 'x') except Exception as e: assert isinstance(e, ZeroDivisionError) assert flag[0] # KeyboardInterrupt def raise_keyboard(): raise KeyboardInterrupt() dsk = {'x': (raise_keyboard,)} flag[0] = False try: with MyCallback(): get_sync(dsk, 'x') except BaseException as e: assert isinstance(e, KeyboardInterrupt) assert flag[0]
def compute(graph, name, *args, **kwargs): """Compute a graph ({name: [func, arg1, arg2, ...]}) using dask.get_sync """ return get_sync(graph, [name])[0]
def custom_scheduler(*args, **kwargs): nonlocal counter counter += 1 return get_sync(*args, **kwargs)