def test_inline_cull_dependencies(): d = {'a': 1, 'b': 'a', 'c': 'b', 'd': ['a', 'b', 'c'], 'e': (add, (len, 'd'), 'a')} d2, dependencies = cull(d, ['d', 'e']) inline(d2, {'b'}, dependencies=dependencies)
def test_inline_cull_dependencies(): d = {'a': 1, 'b': 'a', 'c': 'b', 'd': ['a', 'b', 'c'], 'e': (add, (len, 'd'), 'a')} d2, dependencies = cull(d, ['d', 'e']) inline(d2, {'b'}, dependencies=dependencies)
def test_inline_cull_dependencies(): d = { "a": 1, "b": "a", "c": "b", "d": ["a", "b", "c"], "e": (add, (len, "d"), "a") } d2, dependencies = cull(d, ["d", "e"]) inline(d2, {"b"}, dependencies=dependencies)
def inlined_array(a, inline_arrays=None): """ Flatten underlying graph """ agraph = a.__dask_graph__() akeys = set(flatten(a.__dask_keys__())) # Inline everything except the output keys if inline_arrays is None: inline_keys = set(agraph.keys()) - akeys dsk2 = inline(agraph, keys=inline_keys, inline_constants=True) dsk3, _ = cull(dsk2, akeys) graph = HighLevelGraph.from_collections(a.name, dsk3, []) return da.Array(graph, a.name, a.chunks, dtype=a.dtype) # We're given specific arrays to inline, promote to list if isinstance(inline_arrays, da.Array): inline_arrays = [inline_arrays] elif isinstance(inline_arrays, tuple): inline_arrays = list(inline_arrays) if not isinstance(inline_arrays, list): raise TypeError("Invalid inline_arrays, must be " "(None, list, tuple, dask.array.Array)") inline_names = set(a.name for a in inline_arrays) layers = agraph.layers.copy() deps = {k: v.copy() for k, v in agraph.dependencies.items()} # We want to inline layers that depend on the inlined arrays inline_layers = set(k for k, v in deps.items() if len(inline_names.intersection(v)) > 0) for layer_name in inline_layers: dsk = dict(layers[layer_name]) layer_keys = set(dsk.keys()) inline_keys = set() for array in inline_arrays: dsk.update(layers[array.name]) deps.pop(array.name, None) deps[layer_name].discard(array.name) inline_keys.update(layers[array.name].keys()) dsk2 = inline(dsk, keys=inline_keys, inline_constants=True) layers[layer_name], _ = cull(dsk2, layer_keys) # Remove layers containing the inlined arrays for inline_name in inline_names: layers.pop(inline_name) return da.Array(HighLevelGraph(layers, deps), a.name, a.chunks, a.dtype)
def inlined_array(a, inline_arrays=None): """ Flatten underlying graph """ agraph = a.__dask_graph__() akeys = set(flatten(a.__dask_keys__())) # Inline everything except the output keys if inline_arrays is None: inline_keys = set(agraph.keys()) - akeys dsk2 = inline(agraph, keys=inline_keys, inline_constants=True) dsk3, _ = cull(dsk2, akeys) graph = HighLevelGraph.from_collections(a.name, dsk3, []) return da.Array(graph, a.name, a.chunks, dtype=a.dtype) # We're given specific arrays to inline, promote to list if isinstance(inline_arrays, da.Array): inline_arrays = [inline_arrays] elif isinstance(inline_arrays, tuple): inline_arrays = list(inline_arrays) if not isinstance(inline_arrays, list): raise TypeError("Invalid inline_arrays, must be " "(None, list, tuple, dask.array.Array)") layers = agraph.layers.copy() deps = agraph.dependencies.copy() inline_keys = set() dsk = dict(layers[a.name]) # Inline specified arrays for array in inline_arrays: # Remove array from layers and dependencies try: dsk.update(layers.pop(array.name)) del deps[array.name] except KeyError: raise ValueError("%s is not a valid dependency of a" % array.name) # Record keys to inline inline_keys.update(flatten(array.__dask_keys__())) dsk2 = inline(dsk, keys=inline_keys, inline_constants=True) dsk3, _ = cull(dsk2, akeys) layers[a.name] = dsk3 graph = HighLevelGraph(layers, deps) return da.Array(graph, a.name, a.chunks, a.dtype)
def inline_pattern(dsk: dict, pat_ls: List[str], inline_constants: bool) -> dict: """ Inline tasks whose keys match certain patterns. Parameters ---------- dsk : dict Input dask graph. pat_ls : List[str] List of patterns to check. inline_constants : bool Whether to inline constants. Returns ------- dsk : dict Dask graph with keys inlined. See Also ------- dask.optimization.inline """ keys = [k for k in dsk.keys() if check_pat(k, pat_ls)] if keys: dsk = inline(dsk, keys, inline_constants=inline_constants) for k in keys: del dsk[k] if inline_constants: dsk, dep = cull(dsk, set(list(flatten(keys)))) return dsk
def cached_array(array): """ Return a new array that functionally has the same values as array, but flattens the underlying graph and introduces a cache lookup when the individual array chunks are accessed. Useful for caching data that can fit in-memory for the duration of the graph's execution. """ dsk = dict(array.__dask_graph__()) keys = set(flatten(array.__dask_keys__())) # Inline + cull everything except the current array inline_keys = set(dsk.keys() - keys) dsk2 = inline(dsk, inline_keys, inline_constants=True) dsk3, _ = cull(dsk2, keys) # Create a cache used to store array values cache = ArrayCache(uuid.uuid4().hex) for k in keys: dsk3[k] = (cache_entry, cache, Key(k), dsk3.pop(k)) graph = HighLevelGraph.from_collections(array.name, dsk3, []) return da.Array(graph, array.name, array.chunks, array.dtype)
def test_inline(): d = {"a": 1, "b": (inc, "a"), "c": (inc, "b"), "d": (add, "a", "c")} assert inline(d) == {"a": 1, "b": (inc, 1), "c": (inc, "b"), "d": (add, 1, "c")} assert inline(d, ["a", "b", "c"]) == { "a": 1, "b": (inc, 1), "c": (inc, (inc, 1)), "d": (add, 1, (inc, (inc, 1))), } d = {"x": 1, "y": (inc, "x"), "z": (add, "x", "y")} assert inline(d) == {"x": 1, "y": (inc, 1), "z": (add, 1, "y")} assert inline(d, keys="y") == {"x": 1, "y": (inc, 1), "z": (add, 1, (inc, 1))} assert inline(d, keys="y", inline_constants=False) == { "x": 1, "y": (inc, "x"), "z": (add, "x", (inc, "x")), } d = {"a": 1, "b": "a", "c": "b", "d": ["a", "b", "c"], "e": (add, (len, "d"), "a")} assert inline(d, "d") == { "a": 1, "b": 1, "c": 1, "d": [1, 1, 1], "e": (add, (len, [1, 1, 1]), 1), } assert inline(d, "a", inline_constants=False) == { "a": 1, "b": 1, "c": "b", "d": [1, "b", "c"], "e": (add, (len, "d"), 1), }
def test_inline(): d = {'a': 1, 'b': (inc, 'a'), 'c': (inc, 'b'), 'd': (add, 'a', 'c')} assert inline(d) == { 'a': 1, 'b': (inc, 1), 'c': (inc, 'b'), 'd': (add, 1, 'c') } assert inline(d, ['a', 'b', 'c']) == { 'a': 1, 'b': (inc, 1), 'c': (inc, (inc, 1)), 'd': (add, 1, (inc, (inc, 1))) } d = {'x': 1, 'y': (inc, 'x'), 'z': (add, 'x', 'y')} assert inline(d) == {'x': 1, 'y': (inc, 1), 'z': (add, 1, 'y')} assert inline(d, keys='y') == { 'x': 1, 'y': (inc, 1), 'z': (add, 1, (inc, 1)) } assert inline(d, keys='y', inline_constants=False) == { 'x': 1, 'y': (inc, 'x'), 'z': (add, 'x', (inc, 'x')) } d = { 'a': 1, 'b': 'a', 'c': 'b', 'd': ['a', 'b', 'c'], 'e': (add, (len, 'd'), 'a') } assert inline(d, 'd') == { 'a': 1, 'b': 1, 'c': 1, 'd': [1, 1, 1], 'e': (add, (len, [1, 1, 1]), 1) } assert inline(d, 'a', inline_constants=False) == { 'a': 1, 'b': 1, 'c': 'b', 'd': [1, 'b', 'c'], 'e': (add, (len, 'd'), 1) }
def test_inline(): d = {'a': 1, 'b': (inc, 'a'), 'c': (inc, 'b'), 'd': (add, 'a', 'c')} assert inline(d) == {'a': 1, 'b': (inc, 1), 'c': (inc, 'b'), 'd': (add, 1, 'c')} assert inline(d, ['a', 'b', 'c']) == {'a': 1, 'b': (inc, 1), 'c': (inc, (inc, 1)), 'd': (add, 1, (inc, (inc, 1)))} d = {'x': 1, 'y': (inc, 'x'), 'z': (add, 'x', 'y')} assert inline(d) == {'x': 1, 'y': (inc, 1), 'z': (add, 1, 'y')} assert inline(d, keys='y') == {'x': 1, 'y': (inc, 1), 'z': (add, 1, (inc, 1))} assert inline(d, keys='y', inline_constants=False) == {'x': 1, 'y': (inc, 'x'), 'z': (add, 'x', (inc, 'x'))} d = {'a': 1, 'b': 'a', 'c': 'b', 'd': ['a', 'b', 'c'], 'e': (add, (len, 'd'), 'a')} assert inline(d, 'd') == {'a': 1, 'b': 1, 'c': 1, 'd': [1, 1, 1], 'e': (add, (len, [1, 1, 1]), 1)} assert inline(d, 'a', inline_constants=False) == {'a': 1, 'b': 1, 'c': 'b', 'd': [1, 'b', 'c'], 'e': (add, (len, 'd'), 1)}
def cached_array(array, token=None): """ Return a new array that functionally has the same values as array, but flattens the underlying graph and introduces a cache lookup when the individual array chunks are accessed. Useful for caching data that can fit in-memory for the duration of the graph's execution. Parameters ---------- array : :class:`dask.array.Array` dask array to cache. token : optional, str A unique token for identifying the internal cache. If None, it will be automatically generated. """ dsk = dict(array.__dask_graph__()) keys = set(flatten(array.__dask_keys__())) if token is None: token = uuid.uuid4().hex # Inline + cull everything except the current array inline_keys = set(dsk.keys() - keys) dsk2 = inline(dsk, inline_keys, inline_constants=True) dsk3, _ = cull(dsk2, keys) # Create a cache used to store array values cache = ArrayCache(token) assert len(dsk3) == len(keys) for k in keys: dsk3[k] = (cache_entry, cache, Key(k), dsk3.pop(k)) graph = HighLevelGraph.from_collections(array.name, dsk3, []) return da.Array(graph, array.name, array.chunks, array.dtype)
'nwords': (len, (str.split, 'words')), 'val1': 'orange', 'val2': 'apple', 'val3': 'pear', 'count1': (str.count, 'words', 'val1'), 'count2': (str.count, 'words', 'val2'), 'count3': (str.count, 'words', 'val3'), 'out1': (format_str, 'count1', 'val1', 'nwords'), 'out2': (format_str, 'count2', 'val2', 'nwords'), 'out3': (format_str, 'count3', 'val3', 'nwords'), 'print1': (print_and_return, 'out1'), 'print2': (print_and_return, 'out2'), 'print3': (print_and_return, 'out3') } dask.visualize(dsk, filename='/Users/longguangbin/Work/temp/dask2.pdf') from dask.threaded import get from dask.optimization import cull from dask.optimization import inline outputs = ['print1', 'print2'] results = get(dsk, outputs) dsk1, dependencies = cull(dsk, outputs) dsk2 = inline(dsk1, dependencies=dependencies) results = get(dsk2, outputs) # https://docs.dask.org/en/latest/optimize.html
def time_inline_keys(self): inline(self.dsk, keys=self.inline_keys, dependencies=self.deps)
def time_inline_constants(self): inline(self.dsk, inline_constants=True, dependencies=self.deps)