def test_fuse_keys(): fuse = fuse2 # tests both `fuse` and `fuse_linear` d = {"a": 1, "b": (inc, "a"), "c": (inc, "b")} keys = ["b"] assert fuse(d, keys, rename_keys=False) == with_deps( {"b": (inc, 1), "c": (inc, "b")} ) assert fuse(d, keys, rename_keys=True) == with_deps( {"a-b": (inc, 1), "c": (inc, "a-b"), "b": "a-b"} ) d = { "w": (inc, "x"), "x": (inc, "y"), "y": (inc, "z"), "z": (add, "a", "b"), "a": 1, "b": 2, } keys = ["x", "z"] assert fuse(d, keys, rename_keys=False) == with_deps( {"w": (inc, "x"), "x": (inc, (inc, "z")), "z": (add, "a", "b"), "a": 1, "b": 2} ) assert fuse(d, keys, rename_keys=True) == with_deps( { "w": (inc, "y-x"), "y-x": (inc, (inc, "z")), "z": (add, "a", "b"), "a": 1, "b": 2, "x": "y-x", } )
def _chunked_array_copy(spec: CopySpec) -> Delayed: """Chunked copy between arrays.""" if spec.intermediate.array is None: target_store_delayed = _direct_array_copy( spec.read.array, spec.write.array, spec.read.chunks, ) # fuse target_dsk = dask.utils.ensure_dict(target_store_delayed.dask) dsk_fused, _ = fuse(target_dsk) return Delayed(target_store_delayed.key, dsk_fused) else: # do intermediate store int_store_delayed = _direct_array_copy( spec.read.array, spec.intermediate.array, spec.read.chunks, ) target_store_delayed = _direct_array_copy( spec.intermediate.array, spec.write.array, spec.write.chunks, ) # now do some hacking to chain these together into a single graph. # get the two graphs as dicts int_dsk = dask.utils.ensure_dict(int_store_delayed.dask) target_dsk = dask.utils.ensure_dict(target_store_delayed.dask) # find the root store key representing the read root_keys = [] for key in target_dsk: if isinstance(key, str): if key.startswith("from-zarr"): root_keys.append(key) assert len(root_keys) == 1 root_key = root_keys[0] # now rewrite the graph target_dsk[root_key] = ( lambda a, *b: a, target_dsk[root_key], *int_dsk[int_store_delayed.key], ) target_dsk.update(int_dsk) # fuse dsk_fused, _ = fuse(target_dsk) return Delayed(target_store_delayed.key, dsk_fused)
def TestOneInput(data): if len(data) < 10: return fdp = atheris.FuzzedDataProvider(data) fuzzed_dict = get_fuse_dict(data) if len(fuzzed_dict) == 0: return if fdp.ConsumeBool(): fuse(fuzzed_dict, rename_keys=fdp.ConsumeBool()) else: fuse_linear(fuzzed_dict, rename_keys=fdp.ConsumeBool())
def optimize(dsk, keys, **kwargs): if not isinstance(keys, (list, set)): keys = [keys] keys = list(core.flatten(keys)) if not isinstance(dsk, HighLevelGraph): dsk = HighLevelGraph.from_collections(id(dsk), dsk, dependencies=()) else: # Perform Blockwise optimizations for HLG input dsk = optimize_dataframe_getitem(dsk, keys=keys) dsk = optimize_blockwise(dsk, keys=keys) dsk = fuse_roots(dsk, keys=keys) dsk = dsk.cull(set(keys)) # Do not perform low-level fusion unless the user has # specified True explicitly. The configuration will # be None by default. if not config.get("optimization.fuse.active"): return dsk dependencies = dsk.get_all_dependencies() dsk = ensure_dict(dsk) fuse_subgraphs = config.get("optimization.fuse.subgraphs") if fuse_subgraphs is None: fuse_subgraphs = True dsk, _ = fuse( dsk, keys, dependencies=dependencies, fuse_subgraphs=fuse_subgraphs, ) dsk, _ = cull(dsk, keys) return dsk
def test_fused_keys_max_length(): # generic fix for gh-5999 d = { "u-looooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooong": ( inc, "v-looooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooong", ), "v-looooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooong": ( inc, "w-looooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooong", ), "w-looooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooong": ( inc, "x-looooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooong", ), "x-looooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooong": ( inc, "y-looooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooong", ), "y-looooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooong": ( inc, "z-looooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooong", ), "z-looooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooong": ( add, "a", "b", ), "a": 1, "b": 2, } fused, deps = fuse(d, rename_keys=True) for key in fused: assert len(key) < 150
def test_fuse_subgraphs_linear_chains_of_duplicate_deps( compare_subgraph_callables): dsk = { "x-1": 1, "add-1": (add, "x-1", "x-1"), "add-2": (add, "add-1", "add-1"), "add-3": (add, "add-2", "add-2"), "add-4": (add, "add-3", "add-3"), "add-5": (add, "add-4", "add-4"), } res = fuse(dsk, "add-5", fuse_subgraphs=True) sol = with_deps({ "add-x-1": (SubgraphCallable( { "x-1": 1, "add-1": (add, "x-1", "x-1"), "add-2": (add, "add-1", "add-1"), "add-3": (add, "add-2", "add-2"), "add-4": (add, "add-3", "add-3"), "add-5": (add, "add-4", "add-4"), }, "add-5", (), ), ), "add-5": "add-x-1", }) assert res == sol
def custom_delay_optimize( dsk: dict, keys: list, fast_functions=[], inline_patterns=[], **kwargs ) -> dict: """ Custom optimization functions for delayed tasks. By default only fusing of tasks will be carried out. Parameters ---------- dsk : dict Input dask task graph. keys : list Output task keys. fast_functions : list, optional List of fast functions to be inlined. By default `[]`. inline_patterns : list, optional List of patterns of task keys to be inlined. By default `[]`. Returns ------- dsk : dict Optimized dask graph. """ dsk, _ = fuse(ensure_dict(dsk), rename_keys=custom_fused_keys_renamer) if inline_patterns: dsk = inline_pattern(dsk, inline_patterns, inline_constants=False) if fast_functions: dsk = inline_functions( dsk, [], fast_functions=fast_functions, ) return dsk
def test_fuse_subgraphs_linear_chains_of_duplicate_deps(): dsk = { 'x-1': 1, 'add-1': (add, 'x-1', 'x-1'), 'add-2': (add, 'add-1', 'add-1'), 'add-3': (add, 'add-2', 'add-2'), 'add-4': (add, 'add-3', 'add-3'), 'add-5': (add, 'add-4', 'add-4') } res = fuse(dsk, 'add-5', fuse_subgraphs=True) sol = with_deps({ 'add-x-1': (SubgraphCallable( { 'x-1': 1, 'add-1': (add, 'x-1', 'x-1'), 'add-2': (add, 'add-1', 'add-1'), 'add-3': (add, 'add-2', 'add-2'), 'add-4': (add, 'add-3', 'add-3'), 'add-5': (add, 'add-4', 'add-4') }, 'add-5', ()), ), 'add-5': 'add-x-1' }) assert res == sol
def optimize(dsk, keys, **kwargs): flatkeys = list(flatten(keys)) if isinstance(keys, list) else [keys] dsk, dependencies = cull(dsk, flatkeys) dsk, dependencies = fuse(dsk, keys, dependencies=dependencies, ave_width=_globals.get('fuse_ave_width', 1)) dsk, _ = cull(dsk, keys) return dsk
def fuse2(*args, **kwargs): """Run both ``fuse`` and ``fuse_linear`` and compare results""" rv1 = fuse_linear(*args, **kwargs) if kwargs.get('rename_keys') is not False: return rv1 rv2 = fuse(*args, **kwargs) assert rv1 == rv2 return rv1
def test_fuse_config(): with dask.config.set({"optimization.fuse.active": False}): d = { "a": 1, "b": (inc, "a"), } dependencies = {"b": ("a", )} assert fuse(d, "b", dependencies=dependencies) == (d, dependencies)
def fuse2(*args, **kwargs): """Run both ``fuse`` and ``fuse_linear`` and compare results""" rv1 = fuse_linear(*args, **kwargs) if kwargs.get("rename_keys") is not False: return rv1 rv2 = fuse(*args, **kwargs) assert rv1 == rv2 return rv1
def fuse_delayed(tasks: dask.delayed) -> dask.delayed: """ Apply task fusion optimization to tasks. Useful (or even required) because dask.delayed optimization doesn't do this step. """ dsk_fused, deps = fuse(dask.utils.ensure_dict(tasks.dask)) fused = Delayed(tasks._key, dsk_fused) return fused
def test_fuse_keys(): fuse = fuse2 # tests both `fuse` and `fuse_linear` d = { 'a': 1, 'b': (inc, 'a'), 'c': (inc, 'b'), } keys = ['b'] assert fuse(d, keys, rename_keys=False) == with_deps({ 'b': (inc, 1), 'c': (inc, 'b'), }) assert fuse(d, keys, rename_keys=True) == with_deps({ 'a-b': (inc, 1), 'c': (inc, 'a-b'), 'b': 'a-b', }) d = { 'w': (inc, 'x'), 'x': (inc, 'y'), 'y': (inc, 'z'), 'z': (add, 'a', 'b'), 'a': 1, 'b': 2, } keys = ['x', 'z'] assert fuse(d, keys, rename_keys=False) == with_deps({ 'w': (inc, 'x'), 'x': (inc, (inc, 'z')), 'z': (add, 'a', 'b'), 'a': 1, 'b': 2, }) assert fuse(d, keys, rename_keys=True) == with_deps({ 'w': (inc, 'y-x'), 'y-x': (inc, (inc, 'z')), 'z': (add, 'a', 'b'), 'a': 1, 'b': 2, 'x': 'y-x', })
def test_optimize_slicing(): dsk = {'a': (range, 10), 'b': (getter, 'a', (slice(None, None, None),)), 'c': (getter, 'b', (slice(None, None, None),)), 'd': (getter, 'c', (slice(0, 5, None),)), 'e': (getter, 'd', (slice(None, None, None),))} expected = {'e': (getter, (range, 10), (slice(0, 5, None),))} result = optimize_slices(fuse(dsk, [], rename_keys=False)[0]) assert result == expected # protect output keys expected = {'c': (getter, (range, 10), (slice(0, None, None),)), 'd': (getter, 'c', (slice(0, 5, None),)), 'e': (getter, 'd', (slice(None, None, None),))} result = optimize_slices(fuse(dsk, ['c', 'd', 'e'], rename_keys=False)[0]) assert result == expected
def test_dont_fuse_numpy_arrays(): """ Some types should stay in the graph bare This helps with things like serialization """ np = pytest.importorskip("numpy") dsk = {"x": np.arange(5), "y": (inc, "x")} assert fuse(dsk, "y")[0] == dsk
def optimize( dsk, keys, fuse_keys=None, fast_functions=None, inline_functions_fast_functions=(getter_inline,), rename_fused_keys=True, **kwargs, ): """Optimize dask for array computation 1. Cull tasks not necessary to evaluate keys 2. Remove full slicing, e.g. x[:] 3. Inline fast functions like getitem and np.transpose """ if not isinstance(keys, (list, set)): keys = [keys] keys = list(flatten(keys)) if not isinstance(dsk, HighLevelGraph): dsk = HighLevelGraph.from_collections(id(dsk), dsk, dependencies=()) dsk = optimize_blockwise(dsk, keys=keys) dsk = fuse_roots(dsk, keys=keys) dsk = dsk.cull(set(keys)) # Perform low-level fusion unless the user has # specified False explicitly. if config.get("optimization.fuse.active") is False: return dsk dependencies = dsk.get_all_dependencies() dsk = ensure_dict(dsk) # Low level task optimizations if fast_functions is not None: inline_functions_fast_functions = fast_functions hold = hold_keys(dsk, dependencies) dsk, dependencies = fuse( dsk, hold + keys + (fuse_keys or []), dependencies, rename_keys=rename_fused_keys, ) if inline_functions_fast_functions: dsk = inline_functions( dsk, keys, dependencies=dependencies, fast_functions=inline_functions_fast_functions, ) return optimize_slices(dsk)
def test_fuse_keys(): fuse = fuse2 # tests both `fuse` and `fuse_linear` d = { 'a': 1, 'b': (inc, 'a'), 'c': (inc, 'b'), } keys = ['b'] assert fuse(d, keys, rename_keys=False) == with_deps({ 'b': (inc, 1), 'c': (inc, 'b'), }) assert fuse(d, keys, rename_keys=True) == with_deps({ 'a-b': (inc, 1), 'c': (inc, 'a-b'), 'b': 'a-b', }) d = { 'w': (inc, 'x'), 'x': (inc, 'y'), 'y': (inc, 'z'), 'z': (add, 'a', 'b'), 'a': 1, 'b': 2, } keys = ['x', 'z'] assert fuse(d, keys, rename_keys=False) == with_deps({ 'w': (inc, 'x'), 'x': (inc, (inc, 'z')), 'z': (add, 'a', 'b'), 'a': 1, 'b': 2 , }) assert fuse(d, keys, rename_keys=True) == with_deps({ 'w': (inc, 'y-x'), 'y-x': (inc, (inc, 'z')), 'z': (add, 'a', 'b'), 'a': 1, 'b': 2 , 'x': 'y-x', })
def test_optimize_slicing(): dsk = { "a": (range, 10), "b": (getter, "a", (slice(None, None, None), )), "c": (getter, "b", (slice(None, None, None), )), "d": (getter, "c", (slice(0, 5, None), )), "e": (getter, "d", (slice(None, None, None), )), } expected = {"e": (getter, (range, 10), (slice(0, 5, None), ))} result = optimize_slices(fuse(dsk, [], rename_keys=False)[0]) assert result == expected # protect output keys expected = { "c": (getter, (range, 10), (slice(0, None, None), )), "d": (getter, "c", (slice(0, 5, None), )), "e": (getter, "d", (slice(None, None, None), )), } result = optimize_slices(fuse(dsk, ["c", "d", "e"], rename_keys=False)[0]) assert result == expected
def test_fuse_subgraphs_linear_chains_of_duplicate_deps(): dsk = {'x-1': 1, 'add-1': (add, 'x-1', 'x-1'), 'add-2': (add, 'add-1', 'add-1'), 'add-3': (add, 'add-2', 'add-2'), 'add-4': (add, 'add-3', 'add-3'), 'add-5': (add, 'add-4', 'add-4')} res = fuse(dsk, 'add-5', fuse_subgraphs=True) sol = with_deps({ 'add-x-1': ( SubgraphCallable({ 'x-1': 1, 'add-1': (add, 'x-1', 'x-1'), 'add-2': (add, 'add-1', 'add-1'), 'add-3': (add, 'add-2', 'add-2'), 'add-4': (add, 'add-3', 'add-3'), 'add-5': (add, 'add-4', 'add-4') }, 'add-5', ()),), 'add-5': 'add-x-1' }) assert res == sol
def test_fuse(): fuse = fuse2 # tests both `fuse` and `fuse_linear` d = { "w": (inc, "x"), "x": (inc, "y"), "y": (inc, "z"), "z": (add, "a", "b"), "a": 1, "b": 2, } assert fuse(d, rename_keys=False) == with_deps({ "w": (inc, (inc, (inc, (add, "a", "b")))), "a": 1, "b": 2 }) assert fuse(d, rename_keys=True) == with_deps({ "z-y-x-w": (inc, (inc, (inc, (add, "a", "b")))), "a": 1, "b": 2, "w": "z-y-x-w", }) d = { "NEW": (inc, "y"), "w": (inc, "x"), "x": (inc, "y"), "y": (inc, "z"), "z": (add, "a", "b"), "a": 1, "b": 2, } assert fuse(d, rename_keys=False) == with_deps({ "NEW": (inc, "y"), "w": (inc, (inc, "y")), "y": (inc, (add, "a", "b")), "a": 1, "b": 2, }) assert fuse(d, rename_keys=True) == with_deps({ "NEW": (inc, "z-y"), "x-w": (inc, (inc, "z-y")), "z-y": (inc, (add, "a", "b")), "a": 1, "b": 2, "w": "x-w", "y": "z-y", }) d = { "v": (inc, "y"), "u": (inc, "w"), "w": (inc, "x"), "x": (inc, "y"), "y": (inc, "z"), "z": (add, "a", "b"), "a": (inc, "c"), "b": (inc, "d"), "c": 1, "d": 2, } assert fuse(d, rename_keys=False) == with_deps({ "u": (inc, (inc, (inc, "y"))), "v": (inc, "y"), "y": (inc, (add, "a", "b")), "a": (inc, 1), "b": (inc, 2), }) assert fuse(d, rename_keys=True) == with_deps({ "x-w-u": (inc, (inc, (inc, "z-y"))), "v": (inc, "z-y"), "z-y": (inc, (add, "c-a", "d-b")), "c-a": (inc, 1), "d-b": (inc, 2), "a": "c-a", "b": "d-b", "u": "x-w-u", "y": "z-y", }) d = { "a": (inc, "x"), "b": (inc, "x"), "c": (inc, "x"), "d": (inc, "c"), "x": (inc, "y"), "y": 0, } assert fuse(d, rename_keys=False) == with_deps({ "a": (inc, "x"), "b": (inc, "x"), "d": (inc, (inc, "x")), "x": (inc, 0) }) assert fuse(d, rename_keys=True) == with_deps({ "a": (inc, "y-x"), "b": (inc, "y-x"), "c-d": (inc, (inc, "y-x")), "y-x": (inc, 0), "d": "c-d", "x": "y-x", }) d = {"a": 1, "b": (inc, "a"), "c": (add, "b", "b")} assert fuse(d, rename_keys=False) == with_deps({ "b": (inc, 1), "c": (add, "b", "b") }) assert fuse(d, rename_keys=True) == with_deps({ "a-b": (inc, 1), "c": (add, "a-b", "a-b"), "b": "a-b" })
def time_fuse(self, kind): fuse(self.dsk, self.keys, self.deps, **self.extra_kwargs)
def test_fuse_reductions_single_input(): def f(*args): return args d = {"a": 1, "b1": (f, "a"), "b2": (f, "a", "a"), "c": (f, "b1", "b2")} assert fuse(d, ave_width=1.9, rename_keys=False) == with_deps(d) assert fuse(d, ave_width=1.9, rename_keys=True) == with_deps(d) assert fuse(d, ave_width=2, rename_keys=False) == with_deps({ "a": 1, "c": (f, (f, "a"), (f, "a", "a")) }) assert fuse(d, ave_width=2, rename_keys=True) == with_deps({ "a": 1, "b1-b2-c": (f, (f, "a"), (f, "a", "a")), "c": "b1-b2-c" }) d = { "a": 1, "b1": (f, "a"), "b2": (f, "a", "a"), "b3": (f, "a", "a", "a"), "c": (f, "b1", "b2", "b3"), } assert fuse(d, ave_width=2.9, rename_keys=False) == with_deps(d) assert fuse(d, ave_width=2.9, rename_keys=True) == with_deps(d) assert fuse(d, ave_width=3, rename_keys=False) == with_deps({ "a": 1, "c": (f, (f, "a"), (f, "a", "a"), (f, "a", "a", "a")) }) assert fuse(d, ave_width=3, rename_keys=True) == with_deps({ "a": 1, "b1-b2-b3-c": (f, (f, "a"), (f, "a", "a"), (f, "a", "a", "a")), "c": "b1-b2-b3-c", }) d = {"a": 1, "b1": (f, "a"), "b2": (f, "a"), "c": (f, "a", "b1", "b2")} assert fuse(d, ave_width=1.9, rename_keys=False) == with_deps(d) assert fuse(d, ave_width=1.9, rename_keys=True) == with_deps(d) assert fuse(d, ave_width=2, rename_keys=False) == with_deps({ "a": 1, "c": (f, "a", (f, "a"), (f, "a")) }) assert fuse(d, ave_width=2, rename_keys=True) == with_deps({ "a": 1, "b1-b2-c": (f, "a", (f, "a"), (f, "a")), "c": "b1-b2-c" }) d = { "a": 1, "b1": (f, "a"), "b2": (f, "a"), "c": (f, "b1", "b2"), "d1": (f, "c"), "d2": (f, "c"), "e": (f, "d1", "d2"), } assert fuse(d, ave_width=1.9, rename_keys=False) == with_deps(d) assert fuse(d, ave_width=1.9, rename_keys=True) == with_deps(d) assert fuse(d, ave_width=2, rename_keys=False) == with_deps({ "a": 1, "c": (f, (f, "a"), (f, "a")), "e": (f, (f, "c"), (f, "c")) }) assert fuse(d, ave_width=2, rename_keys=True) == with_deps({ "a": 1, "b1-b2-c": (f, (f, "a"), (f, "a")), "d1-d2-e": (f, (f, "c"), (f, "c")), "c": "b1-b2-c", "e": "d1-d2-e", }) d = { "a": 1, "b1": (f, "a"), "b2": (f, "a"), "b3": (f, "a"), "b4": (f, "a"), "c1": (f, "b1", "b2"), "c2": (f, "b3", "b4"), "d": (f, "c1", "c2"), } assert fuse(d, ave_width=1.9, rename_keys=False) == with_deps(d) assert fuse(d, ave_width=1.9, rename_keys=True) == with_deps(d) expected = with_deps({ "a": 1, "c1": (f, (f, "a"), (f, "a")), "c2": (f, (f, "a"), (f, "a")), "d": (f, "c1", "c2"), }) assert fuse(d, ave_width=2, rename_keys=False) == expected assert fuse(d, ave_width=2.9, rename_keys=False) == expected expected = with_deps({ "a": 1, "b1-b2-c1": (f, (f, "a"), (f, "a")), "b3-b4-c2": (f, (f, "a"), (f, "a")), "d": (f, "c1", "c2"), "c1": "b1-b2-c1", "c2": "b3-b4-c2", }) assert fuse(d, ave_width=2, rename_keys=True) == expected assert fuse(d, ave_width=2.9, rename_keys=True) == expected assert fuse(d, ave_width=3, rename_keys=False) == with_deps({ "a": 1, "d": (f, (f, (f, "a"), (f, "a")), (f, (f, "a"), (f, "a"))) }) assert fuse(d, ave_width=3, rename_keys=True) == with_deps({ "a": 1, "b1-b2-b3-b4-c1-c2-d": ( f, (f, (f, "a"), (f, "a")), (f, (f, "a"), (f, "a")), ), "d": "b1-b2-b3-b4-c1-c2-d", }) d = { "a": 1, "b1": (f, "a"), "b2": (f, "a"), "b3": (f, "a"), "b4": (f, "a"), "b5": (f, "a"), "b6": (f, "a"), "b7": (f, "a"), "b8": (f, "a"), "c1": (f, "b1", "b2"), "c2": (f, "b3", "b4"), "c3": (f, "b5", "b6"), "c4": (f, "b7", "b8"), "d1": (f, "c1", "c2"), "d2": (f, "c3", "c4"), "e": (f, "d1", "d2"), } assert fuse(d, ave_width=1.9, rename_keys=False) == with_deps(d) assert fuse(d, ave_width=1.9, rename_keys=True) == with_deps(d) expected = with_deps({ "a": 1, "c1": (f, (f, "a"), (f, "a")), "c2": (f, (f, "a"), (f, "a")), "c3": (f, (f, "a"), (f, "a")), "c4": (f, (f, "a"), (f, "a")), "d1": (f, "c1", "c2"), "d2": (f, "c3", "c4"), "e": (f, "d1", "d2"), }) assert fuse(d, ave_width=2, rename_keys=False) == expected assert fuse(d, ave_width=2.9, rename_keys=False) == expected expected = with_deps({ "a": 1, "b1-b2-c1": (f, (f, "a"), (f, "a")), "b3-b4-c2": (f, (f, "a"), (f, "a")), "b5-b6-c3": (f, (f, "a"), (f, "a")), "b7-b8-c4": (f, (f, "a"), (f, "a")), "d1": (f, "c1", "c2"), "d2": (f, "c3", "c4"), "e": (f, "d1", "d2"), "c1": "b1-b2-c1", "c2": "b3-b4-c2", "c3": "b5-b6-c3", "c4": "b7-b8-c4", }) assert fuse(d, ave_width=2, rename_keys=True) == expected assert fuse(d, ave_width=2.9, rename_keys=True) == expected expected = with_deps({ "a": 1, "d1": (f, (f, (f, "a"), (f, "a")), (f, (f, "a"), (f, "a"))), "d2": (f, (f, (f, "a"), (f, "a")), (f, (f, "a"), (f, "a"))), "e": (f, "d1", "d2"), }) assert fuse(d, ave_width=3, rename_keys=False) == expected assert fuse(d, ave_width=4.6, rename_keys=False) == expected expected = with_deps({ "a": 1, "b1-b2-b3-b4-c1-c2-d1": ( f, (f, (f, "a"), (f, "a")), (f, (f, "a"), (f, "a")), ), "b5-b6-b7-b8-c3-c4-d2": ( f, (f, (f, "a"), (f, "a")), (f, (f, "a"), (f, "a")), ), "e": (f, "d1", "d2"), "d1": "b1-b2-b3-b4-c1-c2-d1", "d2": "b5-b6-b7-b8-c3-c4-d2", }) assert fuse(d, ave_width=3, rename_keys=True) == expected assert fuse(d, ave_width=4.6, rename_keys=True) == expected assert fuse(d, ave_width=4.7, rename_keys=False) == with_deps({ "a": 1, "e": ( f, (f, (f, (f, "a"), (f, "a")), (f, (f, "a"), (f, "a"))), (f, (f, (f, "a"), (f, "a")), (f, (f, "a"), (f, "a"))), ), }) assert fuse(d, ave_width=4.7, rename_keys=True) == with_deps({ "a": 1, "b1-b2-b3-b4-b5-b6-b7-b8-c1-c2-c3-c4-d1-d2-e": ( f, (f, (f, (f, "a"), (f, "a")), (f, (f, "a"), (f, "a"))), (f, (f, (f, "a"), (f, "a")), (f, (f, "a"), (f, "a"))), ), "e": "b1-b2-b3-b4-b5-b6-b7-b8-c1-c2-c3-c4-d1-d2-e", }) d = { "a": 1, "b1": (f, "a"), "b2": (f, "a"), "b3": (f, "a"), "b4": (f, "a"), "b5": (f, "a"), "b6": (f, "a"), "b7": (f, "a"), "b8": (f, "a"), "b9": (f, "a"), "b10": (f, "a"), "b11": (f, "a"), "b12": (f, "a"), "b13": (f, "a"), "b14": (f, "a"), "b15": (f, "a"), "b16": (f, "a"), "c1": (f, "b1", "b2"), "c2": (f, "b3", "b4"), "c3": (f, "b5", "b6"), "c4": (f, "b7", "b8"), "c5": (f, "b9", "b10"), "c6": (f, "b11", "b12"), "c7": (f, "b13", "b14"), "c8": (f, "b15", "b16"), "d1": (f, "c1", "c2"), "d2": (f, "c3", "c4"), "d3": (f, "c5", "c6"), "d4": (f, "c7", "c8"), "e1": (f, "d1", "d2"), "e2": (f, "d3", "d4"), "f": (f, "e1", "e2"), } assert fuse(d, ave_width=1.9, rename_keys=False) == with_deps(d) assert fuse(d, ave_width=1.9, rename_keys=True) == with_deps(d) expected = with_deps({ "a": 1, "c1": (f, (f, "a"), (f, "a")), "c2": (f, (f, "a"), (f, "a")), "c3": (f, (f, "a"), (f, "a")), "c4": (f, (f, "a"), (f, "a")), "c5": (f, (f, "a"), (f, "a")), "c6": (f, (f, "a"), (f, "a")), "c7": (f, (f, "a"), (f, "a")), "c8": (f, (f, "a"), (f, "a")), "d1": (f, "c1", "c2"), "d2": (f, "c3", "c4"), "d3": (f, "c5", "c6"), "d4": (f, "c7", "c8"), "e1": (f, "d1", "d2"), "e2": (f, "d3", "d4"), "f": (f, "e1", "e2"), }) assert fuse(d, ave_width=2, rename_keys=False) == expected assert fuse(d, ave_width=2.9, rename_keys=False) == expected expected = with_deps({ "a": 1, "b1-b2-c1": (f, (f, "a"), (f, "a")), "b3-b4-c2": (f, (f, "a"), (f, "a")), "b5-b6-c3": (f, (f, "a"), (f, "a")), "b7-b8-c4": (f, (f, "a"), (f, "a")), "b10-b9-c5": (f, (f, "a"), (f, "a")), "b11-b12-c6": (f, (f, "a"), (f, "a")), "b13-b14-c7": (f, (f, "a"), (f, "a")), "b15-b16-c8": (f, (f, "a"), (f, "a")), "d1": (f, "c1", "c2"), "d2": (f, "c3", "c4"), "d3": (f, "c5", "c6"), "d4": (f, "c7", "c8"), "e1": (f, "d1", "d2"), "e2": (f, "d3", "d4"), "f": (f, "e1", "e2"), "c1": "b1-b2-c1", "c2": "b3-b4-c2", "c3": "b5-b6-c3", "c4": "b7-b8-c4", "c5": "b10-b9-c5", "c6": "b11-b12-c6", "c7": "b13-b14-c7", "c8": "b15-b16-c8", }) assert fuse(d, ave_width=2, rename_keys=True) == expected assert fuse(d, ave_width=2.9, rename_keys=True) == expected expected = with_deps({ "a": 1, "d1": (f, (f, (f, "a"), (f, "a")), (f, (f, "a"), (f, "a"))), "d2": (f, (f, (f, "a"), (f, "a")), (f, (f, "a"), (f, "a"))), "d3": (f, (f, (f, "a"), (f, "a")), (f, (f, "a"), (f, "a"))), "d4": (f, (f, (f, "a"), (f, "a")), (f, (f, "a"), (f, "a"))), "e1": (f, "d1", "d2"), "e2": (f, "d3", "d4"), "f": (f, "e1", "e2"), }) assert fuse(d, ave_width=3, rename_keys=False) == expected assert fuse(d, ave_width=4.6, rename_keys=False) == expected expected = with_deps({ "a": 1, "b1-b2-b3-b4-c1-c2-d1": ( f, (f, (f, "a"), (f, "a")), (f, (f, "a"), (f, "a")), ), "b5-b6-b7-b8-c3-c4-d2": ( f, (f, (f, "a"), (f, "a")), (f, (f, "a"), (f, "a")), ), "b10-b11-b12-b9-c5-c6-d3": ( f, (f, (f, "a"), (f, "a")), (f, (f, "a"), (f, "a")), ), "b13-b14-b15-b16-c7-c8-d4": ( f, (f, (f, "a"), (f, "a")), (f, (f, "a"), (f, "a")), ), "e1": (f, "d1", "d2"), "e2": (f, "d3", "d4"), "f": (f, "e1", "e2"), "d1": "b1-b2-b3-b4-c1-c2-d1", "d2": "b5-b6-b7-b8-c3-c4-d2", "d3": "b10-b11-b12-b9-c5-c6-d3", "d4": "b13-b14-b15-b16-c7-c8-d4", }) assert fuse(d, ave_width=3, rename_keys=True) == expected assert fuse(d, ave_width=4.6, rename_keys=True) == expected expected = with_deps({ "a": 1, "e1": ( f, (f, (f, (f, "a"), (f, "a")), (f, (f, "a"), (f, "a"))), (f, (f, (f, "a"), (f, "a")), (f, (f, "a"), (f, "a"))), ), "e2": ( f, (f, (f, (f, "a"), (f, "a")), (f, (f, "a"), (f, "a"))), (f, (f, (f, "a"), (f, "a")), (f, (f, "a"), (f, "a"))), ), "f": (f, "e1", "e2"), }) assert fuse(d, ave_width=4.7, rename_keys=False) == expected assert fuse(d, ave_width=7.4, rename_keys=False) == expected expected = with_deps({ "a": 1, "b1-b2-b3-b4-b5-b6-b7-b8-c1-c2-c3-c4-d1-d2-e1": ( f, (f, (f, (f, "a"), (f, "a")), (f, (f, "a"), (f, "a"))), (f, (f, (f, "a"), (f, "a")), (f, (f, "a"), (f, "a"))), ), "b10-b11-b12-b13-b14-b15-b16-b9-c5-c6-c7-c8-d3-d4-e2": ( f, (f, (f, (f, "a"), (f, "a")), (f, (f, "a"), (f, "a"))), (f, (f, (f, "a"), (f, "a")), (f, (f, "a"), (f, "a"))), ), "f": (f, "e1", "e2"), "e1": "b1-b2-b3-b4-b5-b6-b7-b8-c1-c2-c3-c4-d1-d2-e1", "e2": "b10-b11-b12-b13-b14-b15-b16-b9-c5-c6-c7-c8-d3-d4-e2", }) assert fuse(d, ave_width=4.7, rename_keys=True) == expected assert fuse(d, ave_width=7.4, rename_keys=True) == expected assert fuse(d, ave_width=7.5, rename_keys=False) == with_deps({ "a": 1, "f": ( f, ( f, (f, (f, (f, "a"), (f, "a")), (f, (f, "a"), (f, "a"))), (f, (f, (f, "a"), (f, "a")), (f, (f, "a"), (f, "a"))), ), ( f, (f, (f, (f, "a"), (f, "a")), (f, (f, "a"), (f, "a"))), (f, (f, (f, "a"), (f, "a")), (f, (f, "a"), (f, "a"))), ), ), }) assert fuse(d, ave_width=7.5, rename_keys=True) == with_deps({ "a": 1, "b1-b10-b11-b12-b13-b14-b15-b16-b2-b3-b4-b5-b6-b7-b8-b9-c1-c2-c3-c4-c5-c6-c7-c8-d1-d2-d3-d4-e1-e2-f": ( f, ( f, (f, (f, (f, "a"), (f, "a")), (f, (f, "a"), (f, "a"))), (f, (f, (f, "a"), (f, "a")), (f, (f, "a"), (f, "a"))), ), ( f, (f, (f, (f, "a"), (f, "a")), (f, (f, "a"), (f, "a"))), (f, (f, (f, "a"), (f, "a")), (f, (f, "a"), (f, "a"))), ), ), "f": "b1-b10-b11-b12-b13-b14-b15-b16-b2-b3-b4-b5-b6-b7-b8-b9-c1-c2-c3-c4-c5-c6-c7-c8-d1-d2-d3-d4-e1-e2-f", }) d = {"a": 1, "b": (f, "a")} assert fuse(d, ave_width=1, rename_keys=False) == with_deps({"b": (f, 1)}) assert fuse(d, ave_width=1, rename_keys=True) == with_deps({ "a-b": (f, 1), "b": "a-b" }) d = {"a": 1, "b": (f, "a"), "c": (f, "b"), "d": (f, "c")} assert fuse(d, ave_width=1, rename_keys=False) == with_deps({"d": (f, (f, (f, 1)))}) assert fuse(d, ave_width=1, rename_keys=True) == with_deps({ "a-b-c-d": (f, (f, (f, 1))), "d": "a-b-c-d" }) d = {"a": 1, "b": (f, "a"), "c": (f, "a", "b"), "d": (f, "a", "c")} assert fuse(d, ave_width=1, rename_keys=False) == with_deps({ "a": 1, "d": (f, "a", (f, "a", (f, "a"))) }) assert fuse(d, ave_width=1, rename_keys=True) == with_deps({ "a": 1, "b-c-d": (f, "a", (f, "a", (f, "a"))), "d": "b-c-d" }) d = { "a": 1, "b1": (f, "a"), "b2": (f, "a"), "c1": (f, "b1"), "d1": (f, "c1"), "e1": (f, "d1"), "f": (f, "e1", "b2"), } expected = with_deps({ "a": 1, "b2": (f, "a"), "e1": (f, (f, (f, (f, "a")))), "f": (f, "e1", "b2") }) assert fuse(d, ave_width=1, rename_keys=False) == expected assert fuse(d, ave_width=1.9, rename_keys=False) == expected expected = with_deps({ "a": 1, "b2": (f, "a"), "b1-c1-d1-e1": (f, (f, (f, (f, "a")))), "f": (f, "e1", "b2"), "e1": "b1-c1-d1-e1", }) assert fuse(d, ave_width=1, rename_keys=True) == expected assert fuse(d, ave_width=1.9, rename_keys=True) == expected assert fuse(d, ave_width=2, rename_keys=False) == with_deps({ "a": 1, "f": (f, (f, (f, (f, (f, "a")))), (f, "a")) }) assert fuse(d, ave_width=2, rename_keys=True) == with_deps({ "a": 1, "b1-b2-c1-d1-e1-f": (f, (f, (f, (f, (f, "a")))), (f, "a")), "f": "b1-b2-c1-d1-e1-f", }) d = { "a": 1, "b1": (f, "a"), "b2": (f, "a"), "c1": (f, "a", "b1"), "d1": (f, "a", "c1"), "e1": (f, "a", "d1"), "f": (f, "a", "e1", "b2"), } expected = with_deps({ "a": 1, "b2": (f, "a"), "e1": (f, "a", (f, "a", (f, "a", (f, "a")))), "f": (f, "a", "e1", "b2"), }) assert fuse(d, ave_width=1, rename_keys=False) == expected assert fuse(d, ave_width=1.9, rename_keys=False) == expected expected = with_deps({ "a": 1, "b2": (f, "a"), "b1-c1-d1-e1": (f, "a", (f, "a", (f, "a", (f, "a")))), "f": (f, "a", "e1", "b2"), "e1": "b1-c1-d1-e1", }) assert fuse(d, ave_width=1, rename_keys=True) == expected assert fuse(d, ave_width=1.9, rename_keys=True) == expected assert fuse(d, ave_width=2, rename_keys=False) == with_deps({ "a": 1, "f": (f, "a", (f, "a", (f, "a", (f, "a", (f, "a")))), (f, "a")) }) assert fuse(d, ave_width=2, rename_keys=True) == with_deps({ "a": 1, "b1-b2-c1-d1-e1-f": ( f, "a", (f, "a", (f, "a", (f, "a", (f, "a")))), (f, "a"), ), "f": "b1-b2-c1-d1-e1-f", }) d = { "a": 1, "b1": (f, "a"), "b2": (f, "a"), "b3": (f, "a"), "c1": (f, "b1"), "c2": (f, "b2"), "c3": (f, "b3"), "d1": (f, "c1"), "d2": (f, "c2"), "d3": (f, "c3"), "e": (f, "d1", "d2", "d3"), "f": (f, "e"), "g": (f, "f"), } assert fuse(d, ave_width=1, rename_keys=False) == with_deps({ "a": 1, "d1": (f, (f, (f, "a"))), "d2": (f, (f, (f, "a"))), "d3": (f, (f, (f, "a"))), "g": (f, (f, (f, "d1", "d2", "d3"))), }) assert fuse(d, ave_width=1, rename_keys=True) == with_deps({ "a": 1, "b1-c1-d1": (f, (f, (f, "a"))), "b2-c2-d2": (f, (f, (f, "a"))), "b3-c3-d3": (f, (f, (f, "a"))), "e-f-g": (f, (f, (f, "d1", "d2", "d3"))), "d1": "b1-c1-d1", "d2": "b2-c2-d2", "d3": "b3-c3-d3", "g": "e-f-g", }) d = { "a": 1, "b": (f, "a"), "c": (f, "b"), "d": (f, "b", "c"), "e": (f, "d"), "f": (f, "e"), "g": (f, "d", "f"), } assert fuse(d, ave_width=1, rename_keys=False) == with_deps({ "b": (f, 1), "d": (f, "b", (f, "b")), "g": (f, "d", (f, (f, "d"))) }) assert fuse(d, ave_width=1, rename_keys=True) == with_deps({ "a-b": (f, 1), "c-d": (f, "b", (f, "b")), "e-f-g": (f, "d", (f, (f, "d"))), "b": "a-b", "d": "c-d", "g": "e-f-g", })
def _rechunk_array( source_array, target_chunks, max_mem, target_store_or_group, temp_store_or_group=None, name=None, source_storage_options={}, temp_storage_options={}, target_storage_options={}, ): shape = source_array.shape source_chunks = source_array.chunks dtype = source_array.dtype itemsize = dtype.itemsize if target_chunks is None: # this is just a pass-through copy target_chunks = source_chunks if isinstance(target_chunks, dict): array_dims = _get_dims_from_zarr_array(source_array) try: target_chunks = _shape_dict_to_tuple(array_dims, target_chunks) except KeyError: raise KeyError( "You must explicitly specify each dimension size in target_chunks. " f"Got array_dims {array_dims}, target_chunks {target_chunks}.") read_chunks, int_chunks, write_chunks = rechunking_plan( shape, source_chunks, target_chunks, itemsize, max_mem) print(source_chunks, read_chunks, int_chunks, write_chunks, target_chunks) source_read = dsa.from_zarr(source_array, chunks=read_chunks, storage_options=source_storage_options) # create target shape = tuple(int(x) for x in shape) # ensure python ints for serialization target_chunks = tuple(int(x) for x in target_chunks) int_chunks = tuple(int(x) for x in int_chunks) write_chunks = tuple(int(x) for x in write_chunks) target_array = _zarr_empty(shape, target_store_or_group, target_chunks, dtype, name=name) target_array.attrs.update(source_array.attrs) if read_chunks == write_chunks: target_store_delayed = dsa.store(source_read, target_array, lock=False, compute=False) return target_store_delayed else: # do intermediate store assert temp_store_or_group is not None int_array = _zarr_empty(shape, temp_store_or_group, int_chunks, dtype, name=name) intermediate_store_delayed = dsa.store(source_read, int_array, lock=False, compute=False) int_read = dsa.from_zarr(int_array, chunks=write_chunks, storage_options=temp_storage_options) target_store_delayed = dsa.store(int_read, target_array, lock=False, compute=False) # now do some hacking to chain these together into a single graph. # get the two graphs as dicts int_dsk = dask.utils.ensure_dict(intermediate_store_delayed.dask) target_dsk = dask.utils.ensure_dict(target_store_delayed.dask) # find the root store key representing the read root_keys = [] for key in target_dsk: if isinstance(key, str): if key.startswith("from-zarr"): root_keys.append(key) assert len(root_keys) == 1 root_key = root_keys[0] # now rewrite the graph target_dsk[root_key] = ( lambda a, *b: a, target_dsk[root_key], *int_dsk[intermediate_store_delayed.key], ) target_dsk.update(int_dsk) # fuse dsk_fused, deps = fuse(target_dsk) delayed_fused = Delayed(target_store_delayed.key, dsk_fused) print("Two step rechunking plan") return delayed_fused
def test_fuse(): fuse = fuse2 # tests both `fuse` and `fuse_linear` d = { 'w': (inc, 'x'), 'x': (inc, 'y'), 'y': (inc, 'z'), 'z': (add, 'a', 'b'), 'a': 1, 'b': 2, } assert fuse(d, rename_keys=False) == with_deps({ 'w': (inc, (inc, (inc, (add, 'a', 'b')))), 'a': 1, 'b': 2, }) assert fuse(d, rename_keys=True) == with_deps({ 'z-y-x-w': (inc, (inc, (inc, (add, 'a', 'b')))), 'a': 1, 'b': 2, 'w': 'z-y-x-w', }) d = { 'NEW': (inc, 'y'), 'w': (inc, 'x'), 'x': (inc, 'y'), 'y': (inc, 'z'), 'z': (add, 'a', 'b'), 'a': 1, 'b': 2, } assert fuse(d, rename_keys=False) == with_deps({ 'NEW': (inc, 'y'), 'w': (inc, (inc, 'y')), 'y': (inc, (add, 'a', 'b')), 'a': 1, 'b': 2, }) assert fuse(d, rename_keys=True) == with_deps({ 'NEW': (inc, 'z-y'), 'x-w': (inc, (inc, 'z-y')), 'z-y': (inc, (add, 'a', 'b')), 'a': 1, 'b': 2, 'w': 'x-w', 'y': 'z-y', }) d = { 'v': (inc, 'y'), 'u': (inc, 'w'), 'w': (inc, 'x'), 'x': (inc, 'y'), 'y': (inc, 'z'), 'z': (add, 'a', 'b'), 'a': (inc, 'c'), 'b': (inc, 'd'), 'c': 1, 'd': 2, } assert fuse(d, rename_keys=False) == with_deps({ 'u': (inc, (inc, (inc, 'y'))), 'v': (inc, 'y'), 'y': (inc, (add, 'a', 'b')), 'a': (inc, 1), 'b': (inc, 2), }) assert fuse(d, rename_keys=True) == with_deps({ 'x-w-u': (inc, (inc, (inc, 'z-y'))), 'v': (inc, 'z-y'), 'z-y': (inc, (add, 'c-a', 'd-b')), 'c-a': (inc, 1), 'd-b': (inc, 2), 'a': 'c-a', 'b': 'd-b', 'u': 'x-w-u', 'y': 'z-y', }) d = { 'a': (inc, 'x'), 'b': (inc, 'x'), 'c': (inc, 'x'), 'd': (inc, 'c'), 'x': (inc, 'y'), 'y': 0, } assert fuse(d, rename_keys=False) == with_deps({ 'a': (inc, 'x'), 'b': (inc, 'x'), 'd': (inc, (inc, 'x')), 'x': (inc, 0) }) assert fuse(d, rename_keys=True) == with_deps({ 'a': (inc, 'y-x'), 'b': (inc, 'y-x'), 'c-d': (inc, (inc, 'y-x')), 'y-x': (inc, 0), 'd': 'c-d', 'x': 'y-x', }) d = { 'a': 1, 'b': (inc, 'a'), 'c': (add, 'b', 'b'), } assert fuse(d, rename_keys=False) == with_deps({ 'b': (inc, 1), 'c': (add, 'b', 'b'), }) assert fuse(d, rename_keys=True) == with_deps({ 'a-b': (inc, 1), 'c': (add, 'a-b', 'a-b'), 'b': 'a-b', })
def test_fuse_reductions_single_input(): def f(*args): return args d = { 'a': 1, 'b1': (f, 'a'), 'b2': (f, 'a', 'a'), 'c': (f, 'b1', 'b2'), } assert fuse(d, ave_width=1.9, rename_keys=False) == with_deps(d) assert fuse(d, ave_width=1.9, rename_keys=True) == with_deps(d) assert fuse(d, ave_width=2, rename_keys=False) == with_deps({ 'a': 1, 'c': (f, (f, 'a'), (f, 'a', 'a')), }) assert fuse(d, ave_width=2, rename_keys=True) == with_deps({ 'a': 1, 'b1-b2-c': (f, (f, 'a'), (f, 'a', 'a')), 'c': 'b1-b2-c', }) d = { 'a': 1, 'b1': (f, 'a'), 'b2': (f, 'a', 'a'), 'b3': (f, 'a', 'a', 'a'), 'c': (f, 'b1', 'b2', 'b3'), } assert fuse(d, ave_width=2.9, rename_keys=False) == with_deps(d) assert fuse(d, ave_width=2.9, rename_keys=True) == with_deps(d) assert fuse(d, ave_width=3, rename_keys=False) == with_deps({ 'a': 1, 'c': (f, (f, 'a'), (f, 'a', 'a'), (f, 'a', 'a', 'a')), }) assert fuse(d, ave_width=3, rename_keys=True) == with_deps({ 'a': 1, 'b1-b2-b3-c': (f, (f, 'a'), (f, 'a', 'a'), (f, 'a', 'a', 'a')), 'c': 'b1-b2-b3-c', }) d = { 'a': 1, 'b1': (f, 'a'), 'b2': (f, 'a'), 'c': (f, 'a', 'b1', 'b2'), } assert fuse(d, ave_width=1.9, rename_keys=False) == with_deps(d) assert fuse(d, ave_width=1.9, rename_keys=True) == with_deps(d) assert fuse(d, ave_width=2, rename_keys=False) == with_deps({ 'a': 1, 'c': (f, 'a', (f, 'a'), (f, 'a')), }) assert fuse(d, ave_width=2, rename_keys=True) == with_deps({ 'a': 1, 'b1-b2-c': (f, 'a', (f, 'a'), (f, 'a')), 'c': 'b1-b2-c', }) d = { 'a': 1, 'b1': (f, 'a'), 'b2': (f, 'a'), 'c': (f, 'b1', 'b2'), 'd1': (f, 'c'), 'd2': (f, 'c'), 'e': (f, 'd1', 'd2'), } assert fuse(d, ave_width=1.9, rename_keys=False) == with_deps(d) assert fuse(d, ave_width=1.9, rename_keys=True) == with_deps(d) assert fuse(d, ave_width=2, rename_keys=False) == with_deps({ 'a': 1, 'c': (f, (f, 'a'), (f, 'a')), 'e': (f, (f, 'c'), (f, 'c')), }) assert fuse(d, ave_width=2, rename_keys=True) == with_deps({ 'a': 1, 'b1-b2-c': (f, (f, 'a'), (f, 'a')), 'd1-d2-e': (f, (f, 'c'), (f, 'c')), 'c': 'b1-b2-c', 'e': 'd1-d2-e', }) d = { 'a': 1, 'b1': (f, 'a'), 'b2': (f, 'a'), 'b3': (f, 'a'), 'b4': (f, 'a'), 'c1': (f, 'b1', 'b2'), 'c2': (f, 'b3', 'b4'), 'd': (f, 'c1', 'c2'), } assert fuse(d, ave_width=1.9, rename_keys=False) == with_deps(d) assert fuse(d, ave_width=1.9, rename_keys=True) == with_deps(d) expected = with_deps({ 'a': 1, 'c1': (f, (f, 'a'), (f, 'a')), 'c2': (f, (f, 'a'), (f, 'a')), 'd': (f, 'c1', 'c2'), }) assert fuse(d, ave_width=2, rename_keys=False) == expected assert fuse(d, ave_width=2.9, rename_keys=False) == expected expected = with_deps({ 'a': 1, 'b1-b2-c1': (f, (f, 'a'), (f, 'a')), 'b3-b4-c2': (f, (f, 'a'), (f, 'a')), 'd': (f, 'c1', 'c2'), 'c1': 'b1-b2-c1', 'c2': 'b3-b4-c2', }) assert fuse(d, ave_width=2, rename_keys=True) == expected assert fuse(d, ave_width=2.9, rename_keys=True) == expected assert fuse(d, ave_width=3, rename_keys=False) == with_deps({ 'a': 1, 'd': (f, (f, (f, 'a'), (f, 'a')), (f, (f, 'a'), (f, 'a'))), }) assert fuse(d, ave_width=3, rename_keys=True) == with_deps({ 'a': 1, 'b1-b2-b3-b4-c1-c2-d': (f, (f, (f, 'a'), (f, 'a')), (f, (f, 'a'), (f, 'a'))), 'd': 'b1-b2-b3-b4-c1-c2-d', }) d = { 'a': 1, 'b1': (f, 'a'), 'b2': (f, 'a'), 'b3': (f, 'a'), 'b4': (f, 'a'), 'b5': (f, 'a'), 'b6': (f, 'a'), 'b7': (f, 'a'), 'b8': (f, 'a'), 'c1': (f, 'b1', 'b2'), 'c2': (f, 'b3', 'b4'), 'c3': (f, 'b5', 'b6'), 'c4': (f, 'b7', 'b8'), 'd1': (f, 'c1', 'c2'), 'd2': (f, 'c3', 'c4'), 'e': (f, 'd1', 'd2'), } assert fuse(d, ave_width=1.9, rename_keys=False) == with_deps(d) assert fuse(d, ave_width=1.9, rename_keys=True) == with_deps(d) expected = with_deps({ 'a': 1, 'c1': (f, (f, 'a'), (f, 'a')), 'c2': (f, (f, 'a'), (f, 'a')), 'c3': (f, (f, 'a'), (f, 'a')), 'c4': (f, (f, 'a'), (f, 'a')), 'd1': (f, 'c1', 'c2'), 'd2': (f, 'c3', 'c4'), 'e': (f, 'd1', 'd2'), }) assert fuse(d, ave_width=2, rename_keys=False) == expected assert fuse(d, ave_width=2.9, rename_keys=False) == expected expected = with_deps({ 'a': 1, 'b1-b2-c1': (f, (f, 'a'), (f, 'a')), 'b3-b4-c2': (f, (f, 'a'), (f, 'a')), 'b5-b6-c3': (f, (f, 'a'), (f, 'a')), 'b7-b8-c4': (f, (f, 'a'), (f, 'a')), 'd1': (f, 'c1', 'c2'), 'd2': (f, 'c3', 'c4'), 'e': (f, 'd1', 'd2'), 'c1': 'b1-b2-c1', 'c2': 'b3-b4-c2', 'c3': 'b5-b6-c3', 'c4': 'b7-b8-c4', }) assert fuse(d, ave_width=2, rename_keys=True) == expected assert fuse(d, ave_width=2.9, rename_keys=True) == expected expected = with_deps({ 'a': 1, 'd1': (f, (f, (f, 'a'), (f, 'a')), (f, (f, 'a'), (f, 'a'))), 'd2': (f, (f, (f, 'a'), (f, 'a')), (f, (f, 'a'), (f, 'a'))), 'e': (f, 'd1', 'd2'), }) assert fuse(d, ave_width=3, rename_keys=False) == expected assert fuse(d, ave_width=4.6, rename_keys=False) == expected expected = with_deps({ 'a': 1, 'b1-b2-b3-b4-c1-c2-d1': (f, (f, (f, 'a'), (f, 'a')), (f, (f, 'a'), (f, 'a'))), 'b5-b6-b7-b8-c3-c4-d2': (f, (f, (f, 'a'), (f, 'a')), (f, (f, 'a'), (f, 'a'))), 'e': (f, 'd1', 'd2'), 'd1': 'b1-b2-b3-b4-c1-c2-d1', 'd2': 'b5-b6-b7-b8-c3-c4-d2', }) assert fuse(d, ave_width=3, rename_keys=True) == expected assert fuse(d, ave_width=4.6, rename_keys=True) == expected assert fuse(d, ave_width=4.7, rename_keys=False) == with_deps({ 'a': 1, 'e': (f, (f, (f, (f, 'a'), (f, 'a')), (f, (f, 'a'), (f, 'a'))), (f, (f, (f, 'a'), (f, 'a')), (f, (f, 'a'), (f, 'a')))) }) assert fuse(d, ave_width=4.7, rename_keys=True) == with_deps({ 'a': 1, 'b1-b2-b3-b4-b5-b6-b7-b8-c1-c2-c3-c4-d1-d2-e': ( f, (f, (f, (f, 'a'), (f, 'a')), (f, (f, 'a'), (f, 'a'))), (f, (f, (f, 'a'), (f, 'a')), (f, (f, 'a'), (f, 'a'))) ), 'e': 'b1-b2-b3-b4-b5-b6-b7-b8-c1-c2-c3-c4-d1-d2-e', }) d = { 'a': 1, 'b1': (f, 'a'), 'b2': (f, 'a'), 'b3': (f, 'a'), 'b4': (f, 'a'), 'b5': (f, 'a'), 'b6': (f, 'a'), 'b7': (f, 'a'), 'b8': (f, 'a'), 'b9': (f, 'a'), 'b10': (f, 'a'), 'b11': (f, 'a'), 'b12': (f, 'a'), 'b13': (f, 'a'), 'b14': (f, 'a'), 'b15': (f, 'a'), 'b16': (f, 'a'), 'c1': (f, 'b1', 'b2'), 'c2': (f, 'b3', 'b4'), 'c3': (f, 'b5', 'b6'), 'c4': (f, 'b7', 'b8'), 'c5': (f, 'b9', 'b10'), 'c6': (f, 'b11', 'b12'), 'c7': (f, 'b13', 'b14'), 'c8': (f, 'b15', 'b16'), 'd1': (f, 'c1', 'c2'), 'd2': (f, 'c3', 'c4'), 'd3': (f, 'c5', 'c6'), 'd4': (f, 'c7', 'c8'), 'e1': (f, 'd1', 'd2'), 'e2': (f, 'd3', 'd4'), 'f': (f, 'e1', 'e2'), } assert fuse(d, ave_width=1.9, rename_keys=False) == with_deps(d) assert fuse(d, ave_width=1.9, rename_keys=True) == with_deps(d) expected = with_deps({ 'a': 1, 'c1': (f, (f, 'a'), (f, 'a')), 'c2': (f, (f, 'a'), (f, 'a')), 'c3': (f, (f, 'a'), (f, 'a')), 'c4': (f, (f, 'a'), (f, 'a')), 'c5': (f, (f, 'a'), (f, 'a')), 'c6': (f, (f, 'a'), (f, 'a')), 'c7': (f, (f, 'a'), (f, 'a')), 'c8': (f, (f, 'a'), (f, 'a')), 'd1': (f, 'c1', 'c2'), 'd2': (f, 'c3', 'c4'), 'd3': (f, 'c5', 'c6'), 'd4': (f, 'c7', 'c8'), 'e1': (f, 'd1', 'd2'), 'e2': (f, 'd3', 'd4'), 'f': (f, 'e1', 'e2'), }) assert fuse(d, ave_width=2, rename_keys=False) == expected assert fuse(d, ave_width=2.9, rename_keys=False) == expected expected = with_deps({ 'a': 1, 'b1-b2-c1': (f, (f, 'a'), (f, 'a')), 'b3-b4-c2': (f, (f, 'a'), (f, 'a')), 'b5-b6-c3': (f, (f, 'a'), (f, 'a')), 'b7-b8-c4': (f, (f, 'a'), (f, 'a')), 'b10-b9-c5': (f, (f, 'a'), (f, 'a')), 'b11-b12-c6': (f, (f, 'a'), (f, 'a')), 'b13-b14-c7': (f, (f, 'a'), (f, 'a')), 'b15-b16-c8': (f, (f, 'a'), (f, 'a')), 'd1': (f, 'c1', 'c2'), 'd2': (f, 'c3', 'c4'), 'd3': (f, 'c5', 'c6'), 'd4': (f, 'c7', 'c8'), 'e1': (f, 'd1', 'd2'), 'e2': (f, 'd3', 'd4'), 'f': (f, 'e1', 'e2'), 'c1': 'b1-b2-c1', 'c2': 'b3-b4-c2', 'c3': 'b5-b6-c3', 'c4': 'b7-b8-c4', 'c5': 'b10-b9-c5', 'c6': 'b11-b12-c6', 'c7': 'b13-b14-c7', 'c8': 'b15-b16-c8', }) assert fuse(d, ave_width=2, rename_keys=True) == expected assert fuse(d, ave_width=2.9, rename_keys=True) == expected expected = with_deps({ 'a': 1, 'd1': (f, (f, (f, 'a'), (f, 'a')), (f, (f, 'a'), (f, 'a'))), 'd2': (f, (f, (f, 'a'), (f, 'a')), (f, (f, 'a'), (f, 'a'))), 'd3': (f, (f, (f, 'a'), (f, 'a')), (f, (f, 'a'), (f, 'a'))), 'd4': (f, (f, (f, 'a'), (f, 'a')), (f, (f, 'a'), (f, 'a'))), 'e1': (f, 'd1', 'd2'), 'e2': (f, 'd3', 'd4'), 'f': (f, 'e1', 'e2'), }) assert fuse(d, ave_width=3, rename_keys=False) == expected assert fuse(d, ave_width=4.6, rename_keys=False) == expected expected = with_deps({ 'a': 1, 'b1-b2-b3-b4-c1-c2-d1': (f, (f, (f, 'a'), (f, 'a')), (f, (f, 'a'), (f, 'a'))), 'b5-b6-b7-b8-c3-c4-d2': (f, (f, (f, 'a'), (f, 'a')), (f, (f, 'a'), (f, 'a'))), 'b10-b11-b12-b9-c5-c6-d3': (f, (f, (f, 'a'), (f, 'a')), (f, (f, 'a'), (f, 'a'))), 'b13-b14-b15-b16-c7-c8-d4': (f, (f, (f, 'a'), (f, 'a')), (f, (f, 'a'), (f, 'a'))), 'e1': (f, 'd1', 'd2'), 'e2': (f, 'd3', 'd4'), 'f': (f, 'e1', 'e2'), 'd1': 'b1-b2-b3-b4-c1-c2-d1', 'd2': 'b5-b6-b7-b8-c3-c4-d2', 'd3': 'b10-b11-b12-b9-c5-c6-d3', 'd4': 'b13-b14-b15-b16-c7-c8-d4', }) assert fuse(d, ave_width=3, rename_keys=True) == expected assert fuse(d, ave_width=4.6, rename_keys=True) == expected expected = with_deps({ 'a': 1, 'e1': (f, (f, (f, (f, 'a'), (f, 'a')), (f, (f, 'a'), (f, 'a'))), (f, (f, (f, 'a'), (f, 'a')), (f, (f, 'a'), (f, 'a')))), 'e2': (f, (f, (f, (f, 'a'), (f, 'a')), (f, (f, 'a'), (f, 'a'))), (f, (f, (f, 'a'), (f, 'a')), (f, (f, 'a'), (f, 'a')))), 'f': (f, 'e1', 'e2'), }) assert fuse(d, ave_width=4.7, rename_keys=False) == expected assert fuse(d, ave_width=7.4, rename_keys=False) == expected expected = with_deps({ 'a': 1, 'b1-b2-b3-b4-b5-b6-b7-b8-c1-c2-c3-c4-d1-d2-e1': ( f, (f, (f, (f, 'a'), (f, 'a')), (f, (f, 'a'), (f, 'a'))), (f, (f, (f, 'a'), (f, 'a')), (f, (f, 'a'), (f, 'a'))) ), 'b10-b11-b12-b13-b14-b15-b16-b9-c5-c6-c7-c8-d3-d4-e2': ( f, (f, (f, (f, 'a'), (f, 'a')), (f, (f, 'a'), (f, 'a'))), (f, (f, (f, 'a'), (f, 'a')), (f, (f, 'a'), (f, 'a'))) ), 'f': (f, 'e1', 'e2'), 'e1': 'b1-b2-b3-b4-b5-b6-b7-b8-c1-c2-c3-c4-d1-d2-e1', 'e2': 'b10-b11-b12-b13-b14-b15-b16-b9-c5-c6-c7-c8-d3-d4-e2', }) assert fuse(d, ave_width=4.7, rename_keys=True) == expected assert fuse(d, ave_width=7.4, rename_keys=True) == expected assert fuse(d, ave_width=7.5, rename_keys=False) == with_deps({ 'a': 1, 'f': (f, (f, (f, (f, (f, 'a'), (f, 'a')), (f, (f, 'a'), (f, 'a'))), (f, (f, (f, 'a'), (f, 'a')), (f, (f, 'a'), (f, 'a')))), (f, (f, (f, (f, 'a'), (f, 'a')), (f, (f, 'a'), (f, 'a'))), (f, (f, (f, 'a'), (f, 'a')), (f, (f, 'a'), (f, 'a'))))), }) assert fuse(d, ave_width=7.5, rename_keys=True) == with_deps({ 'a': 1, 'b1-b10-b11-b12-b13-b14-b15-b16-b2-b3-b4-b5-b6-b7-b8-b9-c1-c2-c3-c4-c5-c6-c7-c8-d1-d2-d3-d4-e1-e2-f': ( f, (f, (f, (f, (f, 'a'), (f, 'a')), (f, (f, 'a'), (f, 'a'))), (f, (f, (f, 'a'), (f, 'a')), (f, (f, 'a'), (f, 'a')))), (f, (f, (f, (f, 'a'), (f, 'a')), (f, (f, 'a'), (f, 'a'))), (f, (f, (f, 'a'), (f, 'a')), (f, (f, 'a'), (f, 'a')))) ), 'f': 'b1-b10-b11-b12-b13-b14-b15-b16-b2-b3-b4-b5-b6-b7-b8-b9-c1-c2-c3-c4-c5-c6-c7-c8-d1-d2-d3-d4-e1-e2-f', }) d = { 'a': 1, 'b': (f, 'a'), } assert fuse(d, ave_width=1, rename_keys=False) == with_deps({ 'b': (f, 1) }) assert fuse(d, ave_width=1, rename_keys=True) == with_deps({ 'a-b': (f, 1), 'b': 'a-b', }) d = { 'a': 1, 'b': (f, 'a'), 'c': (f, 'b'), 'd': (f, 'c'), } assert fuse(d, ave_width=1, rename_keys=False) == with_deps({ 'd': (f, (f, (f, 1))) }) assert fuse(d, ave_width=1, rename_keys=True) == with_deps({ 'a-b-c-d': (f, (f, (f, 1))), 'd': 'a-b-c-d', }) d = { 'a': 1, 'b': (f, 'a'), 'c': (f, 'a', 'b'), 'd': (f, 'a', 'c'), } assert fuse(d, ave_width=1, rename_keys=False) == with_deps({ 'a': 1, 'd': (f, 'a', (f, 'a', (f, 'a'))), }) assert fuse(d, ave_width=1, rename_keys=True) == with_deps({ 'a': 1, 'b-c-d': (f, 'a', (f, 'a', (f, 'a'))), 'd': 'b-c-d', }) d = { 'a': 1, 'b1': (f, 'a'), 'b2': (f, 'a'), 'c1': (f, 'b1'), 'd1': (f, 'c1'), 'e1': (f, 'd1'), 'f': (f, 'e1', 'b2'), } expected = with_deps({ 'a': 1, 'b2': (f, 'a'), 'e1': (f, (f, (f, (f, 'a')))), 'f': (f, 'e1', 'b2'), }) assert fuse(d, ave_width=1, rename_keys=False) == expected assert fuse(d, ave_width=1.9, rename_keys=False) == expected expected = with_deps({ 'a': 1, 'b2': (f, 'a'), 'b1-c1-d1-e1': (f, (f, (f, (f, 'a')))), 'f': (f, 'e1', 'b2'), 'e1': 'b1-c1-d1-e1', }) assert fuse(d, ave_width=1, rename_keys=True) == expected assert fuse(d, ave_width=1.9, rename_keys=True) == expected assert fuse(d, ave_width=2, rename_keys=False) == with_deps({ 'a': 1, 'f': (f, (f, (f, (f, (f, 'a')))), (f, 'a')), }) assert fuse(d, ave_width=2, rename_keys=True) == with_deps({ 'a': 1, 'b1-b2-c1-d1-e1-f': (f, (f, (f, (f, (f, 'a')))), (f, 'a')), 'f': 'b1-b2-c1-d1-e1-f', }) d = { 'a': 1, 'b1': (f, 'a'), 'b2': (f, 'a'), 'c1': (f, 'a', 'b1'), 'd1': (f, 'a', 'c1'), 'e1': (f, 'a', 'd1'), 'f': (f, 'a', 'e1', 'b2'), } expected = with_deps({ 'a': 1, 'b2': (f, 'a'), 'e1': (f, 'a', (f, 'a', (f, 'a', (f, 'a')))), 'f': (f, 'a', 'e1', 'b2'), }) assert fuse(d, ave_width=1, rename_keys=False) == expected assert fuse(d, ave_width=1.9, rename_keys=False) == expected expected = with_deps({ 'a': 1, 'b2': (f, 'a'), 'b1-c1-d1-e1': (f, 'a', (f, 'a', (f, 'a', (f, 'a')))), 'f': (f, 'a', 'e1', 'b2'), 'e1': 'b1-c1-d1-e1', }) assert fuse(d, ave_width=1, rename_keys=True) == expected assert fuse(d, ave_width=1.9, rename_keys=True) == expected assert fuse(d, ave_width=2, rename_keys=False) == with_deps({ 'a': 1, 'f': (f, 'a', (f, 'a', (f, 'a', (f, 'a', (f, 'a')))), (f, 'a')), }) assert fuse(d, ave_width=2, rename_keys=True) == with_deps({ 'a': 1, 'b1-b2-c1-d1-e1-f': (f, 'a', (f, 'a', (f, 'a', (f, 'a', (f, 'a')))), (f, 'a')), 'f': 'b1-b2-c1-d1-e1-f', }) d = { 'a': 1, 'b1': (f, 'a'), 'b2': (f, 'a'), 'b3': (f, 'a'), 'c1': (f, 'b1'), 'c2': (f, 'b2'), 'c3': (f, 'b3'), 'd1': (f, 'c1'), 'd2': (f, 'c2'), 'd3': (f, 'c3'), 'e': (f, 'd1', 'd2', 'd3'), 'f': (f, 'e'), 'g': (f, 'f'), } assert fuse(d, ave_width=1, rename_keys=False) == with_deps({ 'a': 1, 'd1': (f, (f, (f, 'a'))), 'd2': (f, (f, (f, 'a'))), 'd3': (f, (f, (f, 'a'))), 'g': (f, (f, (f, 'd1', 'd2', 'd3'))), }) assert fuse(d, ave_width=1, rename_keys=True) == with_deps({ 'a': 1, 'b1-c1-d1': (f, (f, (f, 'a'))), 'b2-c2-d2': (f, (f, (f, 'a'))), 'b3-c3-d3': (f, (f, (f, 'a'))), 'e-f-g': (f, (f, (f, 'd1', 'd2', 'd3'))), 'd1': 'b1-c1-d1', 'd2': 'b2-c2-d2', 'd3': 'b3-c3-d3', 'g': 'e-f-g', }) d = { 'a': 1, 'b': (f, 'a'), 'c': (f, 'b'), 'd': (f, 'b', 'c'), 'e': (f, 'd'), 'f': (f, 'e'), 'g': (f, 'd', 'f'), } assert fuse(d, ave_width=1, rename_keys=False) == with_deps({ 'b': (f, 1), 'd': (f, 'b', (f, 'b')), 'g': (f, 'd', (f, (f, 'd'))), }) assert fuse(d, ave_width=1, rename_keys=True) == with_deps({ 'a-b': (f, 1), 'c-d': (f, 'b', (f, 'b')), 'e-f-g': (f, 'd', (f, (f, 'd'))), 'b': 'a-b', 'd': 'c-d', 'g': 'e-f-g', })
def test_fuse_subgraphs(): dsk = {'x-1': 1, 'inc-1': (inc, 'x-1'), 'inc-2': (inc, 'inc-1'), 'add-1': (add, 'x-1', 'inc-2'), 'inc-3': (inc, 'add-1'), 'inc-4': (inc, 'inc-3'), 'add-2': (add, 'add-1', 'inc-4'), 'inc-5': (inc, 'add-2'), 'inc-6': (inc, 'inc-5')} res = fuse(dsk, 'inc-6', fuse_subgraphs=True) sol = with_deps({ 'inc-6': 'add-inc-x-1', 'add-inc-x-1': (SubgraphCallable({ 'x-1': 1, 'add-1': (add, 'x-1', (inc, (inc, 'x-1'))), 'inc-6': (inc, (inc, (add, 'add-1', (inc, (inc, 'add-1'))))) }, 'inc-6', ()),) }) assert res == sol res = fuse(dsk, 'inc-6', fuse_subgraphs=True, rename_keys=False) sol = with_deps({ 'inc-6': (SubgraphCallable({ 'x-1': 1, 'add-1': (add, 'x-1', (inc, (inc, 'x-1'))), 'inc-6': (inc, (inc, (add, 'add-1', (inc, (inc, 'add-1'))))) }, 'inc-6', ()),) }) assert res == sol res = fuse(dsk, 'add-2', fuse_subgraphs=True) sol = with_deps({ 'add-inc-x-1': (SubgraphCallable({ 'x-1': 1, 'add-1': (add, 'x-1', (inc, (inc, 'x-1'))), 'add-2': (add, 'add-1', (inc, (inc, 'add-1'))) }, 'add-2', ()),), 'add-2': 'add-inc-x-1', 'inc-6': (inc, (inc, 'add-2')) }) assert res == sol res = fuse(dsk, 'inc-2', fuse_subgraphs=True) # ordering of arguements is unstable, check all permutations sols = [] for inkeys in itertools.permutations(('x-1', 'inc-2')): sols.append(with_deps({ 'x-1': 1, 'inc-2': (inc, (inc, 'x-1')), 'inc-6': 'inc-add-1', 'inc-add-1': ( SubgraphCallable({ 'add-1': (add, 'x-1', 'inc-2'), 'inc-6': (inc, (inc, (add, 'add-1', (inc, (inc, 'add-1'))))) }, 'inc-6', inkeys),) + inkeys })) assert res in sols res = fuse(dsk, ['inc-2', 'add-2'], fuse_subgraphs=True) # ordering of arguements is unstable, check all permutations sols = [] for inkeys in itertools.permutations(('x-1', 'inc-2')): sols.append(with_deps({ 'x-1': 1, 'inc-2': (inc, (inc, 'x-1')), 'inc-add-1': ( SubgraphCallable({ 'add-1': (add, 'x-1', 'inc-2'), 'add-2': (add, 'add-1', (inc, (inc, 'add-1'))) }, 'add-2', inkeys),) + inkeys, 'add-2': 'inc-add-1', 'inc-6': (inc, (inc, 'add-2')) })) assert res in sols
def test_fuse_reductions_multiple_input(): def f(*args): return args d = { 'a1': 1, 'a2': 2, 'b': (f, 'a1', 'a2'), 'c': (f, 'b'), } assert fuse(d, ave_width=2, rename_keys=False) == with_deps({ 'c': (f, (f, 1, 2)), }) assert fuse(d, ave_width=2, rename_keys=True) == with_deps({ 'a1-a2-b-c': (f, (f, 1, 2)), 'c': 'a1-a2-b-c', }) assert fuse(d, ave_width=1, rename_keys=False) == with_deps({ 'a1': 1, 'a2': 2, 'c': (f, (f, 'a1', 'a2')), }) assert fuse(d, ave_width=1, rename_keys=True) == with_deps({ 'a1': 1, 'a2': 2, 'b-c': (f, (f, 'a1', 'a2')), 'c': 'b-c', }) d = { 'a1': 1, 'a2': 2, 'b1': (f, 'a1'), 'b2': (f, 'a1', 'a2'), 'b3': (f, 'a2'), 'c': (f, 'b1', 'b2', 'b3'), } expected = with_deps(d) assert fuse(d, ave_width=1, rename_keys=False) == expected assert fuse(d, ave_width=2.9, rename_keys=False) == expected assert fuse(d, ave_width=1, rename_keys=True) == expected assert fuse(d, ave_width=2.9, rename_keys=True) == expected assert fuse(d, ave_width=3, rename_keys=False) == with_deps({ 'a1': 1, 'a2': 2, 'c': (f, (f, 'a1'), (f, 'a1', 'a2'), (f, 'a2')), }) assert fuse(d, ave_width=3, rename_keys=True) == with_deps({ 'a1': 1, 'a2': 2, 'b1-b2-b3-c': (f, (f, 'a1'), (f, 'a1', 'a2'), (f, 'a2')), 'c': 'b1-b2-b3-c', }) d = { 'a1': 1, 'a2': 2, 'b1': (f, 'a1'), 'b2': (f, 'a1', 'a2'), 'b3': (f, 'a2'), 'c1': (f, 'b1', 'b2'), 'c2': (f, 'b2', 'b3'), } assert fuse(d, ave_width=1, rename_keys=False) == with_deps(d) assert fuse(d, ave_width=1, rename_keys=True) == with_deps(d) assert fuse(d, ave_width=2, rename_keys=False) == with_deps({ 'a1': 1, 'a2': 2, 'b2': (f, 'a1', 'a2'), 'c1': (f, (f, 'a1'), 'b2'), 'c2': (f, 'b2', (f, 'a2')), }) assert fuse(d, ave_width=2, rename_keys=True) == with_deps({ 'a1': 1, 'a2': 2, 'b2': (f, 'a1', 'a2'), 'b1-c1': (f, (f, 'a1'), 'b2'), 'b3-c2': (f, 'b2', (f, 'a2')), 'c1': 'b1-c1', 'c2': 'b3-c2', }) d = { 'a1': 1, 'a2': 2, 'b1': (f, 'a1'), 'b2': (f, 'a1', 'a2'), 'b3': (f, 'a2'), 'c1': (f, 'b1', 'b2'), 'c2': (f, 'b2', 'b3'), 'd': (f, 'c1', 'c2'), } assert fuse(d, ave_width=1, rename_keys=False) == with_deps(d) assert fuse(d, ave_width=1, rename_keys=True) == with_deps(d) # A more aggressive heuristic could do this at `ave_width=2`. Perhaps # we can improve this. Nevertheless, this is behaving as intended. assert fuse(d, ave_width=3, rename_keys=False) == with_deps({ 'a1': 1, 'a2': 2, 'b2': (f, 'a1', 'a2'), 'd': (f, (f, (f, 'a1'), 'b2'), (f, 'b2', (f, 'a2'))), }) assert fuse(d, ave_width=3, rename_keys=True) == with_deps({ 'a1': 1, 'a2': 2, 'b2': (f, 'a1', 'a2'), 'b1-b3-c1-c2-d': (f, (f, (f, 'a1'), 'b2'), (f, 'b2', (f, 'a2'))), 'd': 'b1-b3-c1-c2-d', })
def _fuse_delayed(d): # type: ignore[no-untyped-def] """Perform task fusion within a Delayed object""" # from https://github.com/dask/dask/issues/6219 dsk_fused, _ = fuse(dask.utils.ensure_dict(d.dask)) return Delayed(d._key, dsk_fused)
def test_fuse_stressed(): def f(*args): return args d = { 'array-original-27b9f9d257a80fa6adae06a98faf71eb': 1, ('cholesky-upper-26a6b670a8aabb7e2f8936db7ccb6a88', 0, 0): ( f, ('cholesky-26a6b670a8aabb7e2f8936db7ccb6a88', 0, 0), ), ('cholesky-26a6b670a8aabb7e2f8936db7ccb6a88', 1, 0): ( f, ('cholesky-upper-26a6b670a8aabb7e2f8936db7ccb6a88', 0, 1), ), ('array-27b9f9d257a80fa6adae06a98faf71eb', 0, 0): ( f, 'array-original-27b9f9d257a80fa6adae06a98faf71eb', (slice(0, 10, None), slice(0, 10, None)), ), ('cholesky-upper-26a6b670a8aabb7e2f8936db7ccb6a88', 1, 0): ('cholesky-26a6b670a8aabb7e2f8936db7ccb6a88', 0, 1), ('cholesky-26a6b670a8aabb7e2f8936db7ccb6a88', 1, 1): ( f, (f, ('array-27b9f9d257a80fa6adae06a98faf71eb', 1, 1), (f, [('cholesky-lt-dot-26a6b670a8aabb7e2f8936db7ccb6a88', 1, 0, 1, 0)])) ), ('cholesky-lt-dot-26a6b670a8aabb7e2f8936db7ccb6a88', 1, 0, 1, 0): ( f, ('cholesky-26a6b670a8aabb7e2f8936db7ccb6a88', 1, 0), ('cholesky-upper-26a6b670a8aabb7e2f8936db7ccb6a88', 0, 1), ), ('array-27b9f9d257a80fa6adae06a98faf71eb', 0, 1): ( f, 'array-original-27b9f9d257a80fa6adae06a98faf71eb', (slice(0, 10, None), slice(10, 20, None)), ), ('cholesky-upper-26a6b670a8aabb7e2f8936db7ccb6a88', 1, 1): ( f, ('cholesky-26a6b670a8aabb7e2f8936db7ccb6a88', 1, 1) ), ('cholesky-26a6b670a8aabb7e2f8936db7ccb6a88', 0, 1): ( f, (10, 10) ), ('array-27b9f9d257a80fa6adae06a98faf71eb', 1, 1): ( f, 'array-original-27b9f9d257a80fa6adae06a98faf71eb', (slice(10, 20, None), slice(10, 20, None)), ), ('cholesky-upper-26a6b670a8aabb7e2f8936db7ccb6a88', 0, 1): ( f, ('cholesky-26a6b670a8aabb7e2f8936db7ccb6a88', 0, 0), ('array-27b9f9d257a80fa6adae06a98faf71eb', 0, 1), ), ('cholesky-26a6b670a8aabb7e2f8936db7ccb6a88', 0, 0): ( f, ('array-27b9f9d257a80fa6adae06a98faf71eb', 0, 0), ), } keys = { ('cholesky-upper-26a6b670a8aabb7e2f8936db7ccb6a88', 0, 0), ('cholesky-upper-26a6b670a8aabb7e2f8936db7ccb6a88', 0, 1), ('cholesky-upper-26a6b670a8aabb7e2f8936db7ccb6a88', 1, 0), ('cholesky-upper-26a6b670a8aabb7e2f8936db7ccb6a88', 1, 1), } rv = fuse(d, keys=keys, ave_width=2, rename_keys=True) assert rv == with_deps(rv[0])
def test_fuse_stressed(): def f(*args): return args d = { "array-original-27b9f9d257a80fa6adae06a98faf71eb": 1, ("cholesky-upper-26a6b670a8aabb7e2f8936db7ccb6a88", 0, 0): ( f, ("cholesky-26a6b670a8aabb7e2f8936db7ccb6a88", 0, 0), ), ("cholesky-26a6b670a8aabb7e2f8936db7ccb6a88", 1, 0): ( f, ("cholesky-upper-26a6b670a8aabb7e2f8936db7ccb6a88", 0, 1), ), ("array-27b9f9d257a80fa6adae06a98faf71eb", 0, 0): ( f, "array-original-27b9f9d257a80fa6adae06a98faf71eb", (slice(0, 10, None), slice(0, 10, None)), ), ("cholesky-upper-26a6b670a8aabb7e2f8936db7ccb6a88", 1, 0): ( "cholesky-26a6b670a8aabb7e2f8936db7ccb6a88", 0, 1, ), ("cholesky-26a6b670a8aabb7e2f8936db7ccb6a88", 1, 1): ( f, ( f, ("array-27b9f9d257a80fa6adae06a98faf71eb", 1, 1), (f, [("cholesky-lt-dot-26a6b670a8aabb7e2f8936db7ccb6a88", 1, 0, 1, 0)]), ), ), ("cholesky-lt-dot-26a6b670a8aabb7e2f8936db7ccb6a88", 1, 0, 1, 0): ( f, ("cholesky-26a6b670a8aabb7e2f8936db7ccb6a88", 1, 0), ("cholesky-upper-26a6b670a8aabb7e2f8936db7ccb6a88", 0, 1), ), ("array-27b9f9d257a80fa6adae06a98faf71eb", 0, 1): ( f, "array-original-27b9f9d257a80fa6adae06a98faf71eb", (slice(0, 10, None), slice(10, 20, None)), ), ("cholesky-upper-26a6b670a8aabb7e2f8936db7ccb6a88", 1, 1): ( f, ("cholesky-26a6b670a8aabb7e2f8936db7ccb6a88", 1, 1), ), ("cholesky-26a6b670a8aabb7e2f8936db7ccb6a88", 0, 1): (f, (10, 10)), ("array-27b9f9d257a80fa6adae06a98faf71eb", 1, 1): ( f, "array-original-27b9f9d257a80fa6adae06a98faf71eb", (slice(10, 20, None), slice(10, 20, None)), ), ("cholesky-upper-26a6b670a8aabb7e2f8936db7ccb6a88", 0, 1): ( f, ("cholesky-26a6b670a8aabb7e2f8936db7ccb6a88", 0, 0), ("array-27b9f9d257a80fa6adae06a98faf71eb", 0, 1), ), ("cholesky-26a6b670a8aabb7e2f8936db7ccb6a88", 0, 0): ( f, ("array-27b9f9d257a80fa6adae06a98faf71eb", 0, 0), ), } keys = { ("cholesky-upper-26a6b670a8aabb7e2f8936db7ccb6a88", 0, 0), ("cholesky-upper-26a6b670a8aabb7e2f8936db7ccb6a88", 0, 1), ("cholesky-upper-26a6b670a8aabb7e2f8936db7ccb6a88", 1, 0), ("cholesky-upper-26a6b670a8aabb7e2f8936db7ccb6a88", 1, 1), } rv = fuse(d, keys=keys, ave_width=2, rename_keys=True) assert rv == with_deps(rv[0])
def test_fuse_subgraphs(compare_subgraph_callables): dsk = { "x-1": 1, "inc-1": (inc, "x-1"), "inc-2": (inc, "inc-1"), "add-1": (add, "x-1", "inc-2"), "inc-3": (inc, "add-1"), "inc-4": (inc, "inc-3"), "add-2": (add, "add-1", "inc-4"), "inc-5": (inc, "add-2"), "inc-6": (inc, "inc-5"), } res = fuse(dsk, "inc-6", fuse_subgraphs=True) sol = with_deps({ "inc-6": "add-inc-x-1", "add-inc-x-1": (SubgraphCallable( { "x-1": 1, "add-1": (add, "x-1", (inc, (inc, "x-1"))), "inc-6": (inc, (inc, (add, "add-1", (inc, (inc, "add-1"))))), }, "inc-6", (), ), ), }) assert res == sol res = fuse(dsk, "inc-6", fuse_subgraphs=True, rename_keys=False) sol = with_deps({ "inc-6": (SubgraphCallable( { "x-1": 1, "add-1": (add, "x-1", (inc, (inc, "x-1"))), "inc-6": (inc, (inc, (add, "add-1", (inc, (inc, "add-1"))))), }, "inc-6", (), ), ) }) assert res == sol res = fuse(dsk, "add-2", fuse_subgraphs=True) sol = with_deps({ "add-inc-x-1": (SubgraphCallable( { "x-1": 1, "add-1": (add, "x-1", (inc, (inc, "x-1"))), "add-2": (add, "add-1", (inc, (inc, "add-1"))), }, "add-2", (), ), ), "add-2": "add-inc-x-1", "inc-6": (inc, (inc, "add-2")), }) assert res == sol res = fuse(dsk, "inc-2", fuse_subgraphs=True) # ordering of arguments is unstable, check all permutations sols = [] for inkeys in itertools.permutations(("x-1", "inc-2")): sols.append( with_deps({ "x-1": 1, "inc-2": (inc, (inc, "x-1")), "inc-6": "inc-add-1", "inc-add-1": (SubgraphCallable( { "add-1": (add, "x-1", "inc-2"), "inc-6": ( inc, (inc, (add, "add-1", (inc, (inc, "add-1")))), ), }, "inc-6", inkeys, ), ) + inkeys, })) assert res in sols res = fuse(dsk, ["inc-2", "add-2"], fuse_subgraphs=True) # ordering of arguments is unstable, check all permutations sols = [] for inkeys in itertools.permutations(("x-1", "inc-2")): sols.append( with_deps({ "x-1": 1, "inc-2": (inc, (inc, "x-1")), "inc-add-1": (SubgraphCallable( { "add-1": (add, "x-1", "inc-2"), "add-2": (add, "add-1", (inc, (inc, "add-1"))), }, "add-2", inkeys, ), ) + inkeys, "add-2": "inc-add-1", "inc-6": (inc, (inc, "add-2")), })) assert res in sols
def rechunk_zarr2zarr_w_dask(source_array, target_chunks, max_mem, target_store, temp_store=None, source_storage_options={}, temp_storage_options={}, target_storage_options={}): shape = source_array.shape source_chunks = source_array.chunks dtype = source_array.dtype itemsize = dtype.itemsize read_chunks, int_chunks, write_chunks = rechunking_plan( shape, source_chunks, target_chunks, itemsize, max_mem) source_read = dsa.from_zarr(source_array, chunks=read_chunks, storage_options=source_storage_options) # create target target_array = zarr.empty(shape, chunks=target_chunks, dtype=dtype, store=target_store) target_array.attrs.update(source_array.attrs) if int_chunks == target_chunks: target_store_delayed = dsa.store(source_read, target_array, lock=False, compute=False) print("One step rechunking plan") return target_store_delayed else: # do intermediate store assert temp_store is not None int_array = zarr.empty(shape, chunks=int_chunks, dtype=dtype, store=temp_store) intermediate_store_delayed = dsa.store(source_read, int_array, lock=False, compute=False) int_read = dsa.from_zarr(int_array, chunks=write_chunks, storage_options=temp_storage_options) target_store_delayed = dsa.store(int_read, target_array, lock=False, compute=False) # now do some hacking to chain these together into a single graph. # get the two graphs as dicts int_dsk = dask.utils.ensure_dict(intermediate_store_delayed.dask) target_dsk = dask.utils.ensure_dict(target_store_delayed.dask) # find the root store key representing the read root_keys = [] for key in target_dsk: if isinstance(key, str): if key.startswith('from-zarr'): root_keys.append(key) assert len(root_keys) == 1 root_key = root_keys[0] # now rewrite the graph target_dsk[root_key] = (lambda a, *b: a, target_dsk[root_key], *int_dsk[intermediate_store_delayed.key]) target_dsk.update(int_dsk) # fuse dsk_fused, deps = fuse(target_dsk) delayed_fused = Delayed(target_store_delayed.key, dsk_fused) print("Two step rechunking plan") return delayed_fused
def get( dsk: Mapping, keys: Sequence[Hashable] | Hashable, num_workers=None, func_loads=None, func_dumps=None, optimize_graph=True, pool=None, initializer=None, chunksize=None, **kwargs, ): """Multiprocessed get function appropriate for Bags Parameters ---------- dsk : dict dask graph keys : object or list Desired results from graph num_workers : int Number of worker processes (defaults to number of cores) func_dumps : function Function to use for function serialization (defaults to cloudpickle.dumps) func_loads : function Function to use for function deserialization (defaults to cloudpickle.loads) optimize_graph : bool If True [default], `fuse` is applied to the graph before computation. pool : Executor or Pool Some sort of `Executor` or `Pool` to use initializer: function Ignored if ``pool`` has been set. Function to initialize a worker process before running any tasks in it. chunksize: int, optional Size of chunks to use when dispatching work. Defaults to 5 as some batching is helpful. If -1, will be computed to evenly divide ready work across workers. """ chunksize = chunksize or config.get("chunksize", 6) pool = pool or config.get("pool", None) initializer = initializer or config.get("multiprocessing.initializer", None) num_workers = num_workers or config.get("num_workers", None) or CPU_COUNT if pool is None: # In order to get consistent hashing in subprocesses, we need to set a # consistent seed for the Python hash algorithm. Unfortunately, there # is no way to specify environment variables only for the Pool # processes, so we have to rely on environment variables being # inherited. if os.environ.get("PYTHONHASHSEED") in (None, "0"): # This number is arbitrary; it was chosen to commemorate # https://github.com/dask/dask/issues/6640. os.environ["PYTHONHASHSEED"] = "6640" context = get_context() initializer = partial(initialize_worker_process, user_initializer=initializer) pool = ProcessPoolExecutor(num_workers, mp_context=context, initializer=initializer) cleanup = True else: if initializer is not None: warn( "The ``initializer`` argument is ignored when ``pool`` is provided. " "The user should configure ``pool`` with the needed ``initializer`` " "on creation.") if isinstance(pool, multiprocessing.pool.Pool): pool = MultiprocessingPoolExecutor(pool) cleanup = False # Optimize Dask dsk = ensure_dict(dsk) dsk2, dependencies = cull(dsk, keys) if optimize_graph: dsk3, dependencies = fuse(dsk2, keys, dependencies) else: dsk3 = dsk2 # We specify marshalling functions in order to catch serialization # errors and report them to the user. loads = func_loads or config.get("func_loads", None) or _loads dumps = func_dumps or config.get("func_dumps", None) or _dumps # Note former versions used a multiprocessing Manager to share # a Queue between parent and workers, but this is fragile on Windows # (issue #1652). try: # Run result = get_async( pool.submit, pool._max_workers, dsk3, keys, get_id=_process_get_id, dumps=dumps, loads=loads, pack_exception=pack_exception, raise_exception=reraise, chunksize=chunksize, **kwargs, ) finally: if cleanup: pool.shutdown() return result
def test_fuse_reductions_multiple_input(): def f(*args): return args d = {"a1": 1, "a2": 2, "b": (f, "a1", "a2"), "c": (f, "b")} assert fuse(d, ave_width=2, rename_keys=False) == with_deps({"c": (f, (f, 1, 2))}) assert fuse(d, ave_width=2, rename_keys=True) == with_deps({ "a1-a2-b-c": (f, (f, 1, 2)), "c": "a1-a2-b-c" }) assert fuse(d, ave_width=1, rename_keys=False) == with_deps({ "a1": 1, "a2": 2, "c": (f, (f, "a1", "a2")) }) assert fuse(d, ave_width=1, rename_keys=True) == with_deps({ "a1": 1, "a2": 2, "b-c": (f, (f, "a1", "a2")), "c": "b-c" }) d = { "a1": 1, "a2": 2, "b1": (f, "a1"), "b2": (f, "a1", "a2"), "b3": (f, "a2"), "c": (f, "b1", "b2", "b3"), } expected = with_deps(d) assert fuse(d, ave_width=1, rename_keys=False) == expected assert fuse(d, ave_width=2.9, rename_keys=False) == expected assert fuse(d, ave_width=1, rename_keys=True) == expected assert fuse(d, ave_width=2.9, rename_keys=True) == expected assert fuse(d, ave_width=3, rename_keys=False) == with_deps({ "a1": 1, "a2": 2, "c": (f, (f, "a1"), (f, "a1", "a2"), (f, "a2")) }) assert fuse(d, ave_width=3, rename_keys=True) == with_deps({ "a1": 1, "a2": 2, "b1-b2-b3-c": (f, (f, "a1"), (f, "a1", "a2"), (f, "a2")), "c": "b1-b2-b3-c", }) d = { "a1": 1, "a2": 2, "b1": (f, "a1"), "b2": (f, "a1", "a2"), "b3": (f, "a2"), "c1": (f, "b1", "b2"), "c2": (f, "b2", "b3"), } assert fuse(d, ave_width=1, rename_keys=False) == with_deps(d) assert fuse(d, ave_width=1, rename_keys=True) == with_deps(d) assert fuse(d, ave_width=2, rename_keys=False) == with_deps({ "a1": 1, "a2": 2, "b2": (f, "a1", "a2"), "c1": (f, (f, "a1"), "b2"), "c2": (f, "b2", (f, "a2")), }) assert fuse(d, ave_width=2, rename_keys=True) == with_deps({ "a1": 1, "a2": 2, "b2": (f, "a1", "a2"), "b1-c1": (f, (f, "a1"), "b2"), "b3-c2": (f, "b2", (f, "a2")), "c1": "b1-c1", "c2": "b3-c2", }) d = { "a1": 1, "a2": 2, "b1": (f, "a1"), "b2": (f, "a1", "a2"), "b3": (f, "a2"), "c1": (f, "b1", "b2"), "c2": (f, "b2", "b3"), "d": (f, "c1", "c2"), } assert fuse(d, ave_width=1, rename_keys=False) == with_deps(d) assert fuse(d, ave_width=1, rename_keys=True) == with_deps(d) # A more aggressive heuristic could do this at `ave_width=2`. Perhaps # we can improve this. Nevertheless, this is behaving as intended. assert fuse(d, ave_width=3, rename_keys=False) == with_deps({ "a1": 1, "a2": 2, "b2": (f, "a1", "a2"), "d": (f, (f, (f, "a1"), "b2"), (f, "b2", (f, "a2"))), }) assert fuse(d, ave_width=3, rename_keys=True) == with_deps({ "a1": 1, "a2": 2, "b2": (f, "a1", "a2"), "b1-b3-c1-c2-d": (f, (f, (f, "a1"), "b2"), (f, "b2", (f, "a2"))), "d": "b1-b3-c1-c2-d", })