Example #1
0
def test_fuse_keys():
    fuse = fuse2  # tests both `fuse` and `fuse_linear`
    d = {"a": 1, "b": (inc, "a"), "c": (inc, "b")}
    keys = ["b"]
    assert fuse(d, keys, rename_keys=False) == with_deps(
        {"b": (inc, 1), "c": (inc, "b")}
    )
    assert fuse(d, keys, rename_keys=True) == with_deps(
        {"a-b": (inc, 1), "c": (inc, "a-b"), "b": "a-b"}
    )

    d = {
        "w": (inc, "x"),
        "x": (inc, "y"),
        "y": (inc, "z"),
        "z": (add, "a", "b"),
        "a": 1,
        "b": 2,
    }
    keys = ["x", "z"]
    assert fuse(d, keys, rename_keys=False) == with_deps(
        {"w": (inc, "x"), "x": (inc, (inc, "z")), "z": (add, "a", "b"), "a": 1, "b": 2}
    )
    assert fuse(d, keys, rename_keys=True) == with_deps(
        {
            "w": (inc, "y-x"),
            "y-x": (inc, (inc, "z")),
            "z": (add, "a", "b"),
            "a": 1,
            "b": 2,
            "x": "y-x",
        }
    )
Example #2
0
def _chunked_array_copy(spec: CopySpec) -> Delayed:
    """Chunked copy between arrays."""
    if spec.intermediate.array is None:
        target_store_delayed = _direct_array_copy(
            spec.read.array,
            spec.write.array,
            spec.read.chunks,
        )

        # fuse
        target_dsk = dask.utils.ensure_dict(target_store_delayed.dask)
        dsk_fused, _ = fuse(target_dsk)

        return Delayed(target_store_delayed.key, dsk_fused)

    else:
        # do intermediate store
        int_store_delayed = _direct_array_copy(
            spec.read.array,
            spec.intermediate.array,
            spec.read.chunks,
        )
        target_store_delayed = _direct_array_copy(
            spec.intermediate.array,
            spec.write.array,
            spec.write.chunks,
        )

        # now do some hacking to chain these together into a single graph.
        # get the two graphs as dicts
        int_dsk = dask.utils.ensure_dict(int_store_delayed.dask)
        target_dsk = dask.utils.ensure_dict(target_store_delayed.dask)

        # find the root store key representing the read
        root_keys = []
        for key in target_dsk:
            if isinstance(key, str):
                if key.startswith("from-zarr"):
                    root_keys.append(key)
        assert len(root_keys) == 1
        root_key = root_keys[0]

        # now rewrite the graph
        target_dsk[root_key] = (
            lambda a, *b: a,
            target_dsk[root_key],
            *int_dsk[int_store_delayed.key],
        )
        target_dsk.update(int_dsk)

        # fuse
        dsk_fused, _ = fuse(target_dsk)
        return Delayed(target_store_delayed.key, dsk_fused)
Example #3
0
def TestOneInput(data):
    if len(data) < 10:
        return
    fdp = atheris.FuzzedDataProvider(data)
    fuzzed_dict = get_fuse_dict(data)
    if len(fuzzed_dict) == 0:
        return

    if fdp.ConsumeBool():
        fuse(fuzzed_dict, rename_keys=fdp.ConsumeBool())
    else:
        fuse_linear(fuzzed_dict, rename_keys=fdp.ConsumeBool())
Example #4
0
def optimize(dsk, keys, **kwargs):
    if not isinstance(keys, (list, set)):
        keys = [keys]
    keys = list(core.flatten(keys))

    if not isinstance(dsk, HighLevelGraph):
        dsk = HighLevelGraph.from_collections(id(dsk), dsk, dependencies=())
    else:
        # Perform Blockwise optimizations for HLG input
        dsk = optimize_dataframe_getitem(dsk, keys=keys)
        dsk = optimize_blockwise(dsk, keys=keys)
        dsk = fuse_roots(dsk, keys=keys)
    dsk = dsk.cull(set(keys))

    # Do not perform low-level fusion unless the user has
    # specified True explicitly. The configuration will
    # be None by default.
    if not config.get("optimization.fuse.active"):
        return dsk

    dependencies = dsk.get_all_dependencies()
    dsk = ensure_dict(dsk)

    fuse_subgraphs = config.get("optimization.fuse.subgraphs")
    if fuse_subgraphs is None:
        fuse_subgraphs = True
    dsk, _ = fuse(
        dsk,
        keys,
        dependencies=dependencies,
        fuse_subgraphs=fuse_subgraphs,
    )
    dsk, _ = cull(dsk, keys)
    return dsk
Example #5
0
def test_fused_keys_max_length():  # generic fix for gh-5999
    d = {
        "u-looooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooong": (
            inc,
            "v-looooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooong",
        ),
        "v-looooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooong": (
            inc,
            "w-looooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooong",
        ),
        "w-looooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooong": (
            inc,
            "x-looooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooong",
        ),
        "x-looooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooong": (
            inc,
            "y-looooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooong",
        ),
        "y-looooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooong": (
            inc,
            "z-looooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooong",
        ),
        "z-looooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooong": (
            add,
            "a",
            "b",
        ),
        "a": 1,
        "b": 2,
    }

    fused, deps = fuse(d, rename_keys=True)
    for key in fused:
        assert len(key) < 150
Example #6
0
def test_fuse_subgraphs_linear_chains_of_duplicate_deps(
        compare_subgraph_callables):
    dsk = {
        "x-1": 1,
        "add-1": (add, "x-1", "x-1"),
        "add-2": (add, "add-1", "add-1"),
        "add-3": (add, "add-2", "add-2"),
        "add-4": (add, "add-3", "add-3"),
        "add-5": (add, "add-4", "add-4"),
    }

    res = fuse(dsk, "add-5", fuse_subgraphs=True)
    sol = with_deps({
        "add-x-1": (SubgraphCallable(
            {
                "x-1": 1,
                "add-1": (add, "x-1", "x-1"),
                "add-2": (add, "add-1", "add-1"),
                "add-3": (add, "add-2", "add-2"),
                "add-4": (add, "add-3", "add-3"),
                "add-5": (add, "add-4", "add-4"),
            },
            "add-5",
            (),
        ), ),
        "add-5":
        "add-x-1",
    })
    assert res == sol
Example #7
0
def custom_delay_optimize(
    dsk: dict, keys: list, fast_functions=[], inline_patterns=[], **kwargs
) -> dict:
    """
    Custom optimization functions for delayed tasks.

    By default only fusing of tasks will be carried out.

    Parameters
    ----------
    dsk : dict
        Input dask task graph.
    keys : list
        Output task keys.
    fast_functions : list, optional
        List of fast functions to be inlined. By default `[]`.
    inline_patterns : list, optional
        List of patterns of task keys to be inlined. By default `[]`.

    Returns
    -------
    dsk : dict
        Optimized dask graph.
    """
    dsk, _ = fuse(ensure_dict(dsk), rename_keys=custom_fused_keys_renamer)
    if inline_patterns:
        dsk = inline_pattern(dsk, inline_patterns, inline_constants=False)
    if fast_functions:
        dsk = inline_functions(
            dsk,
            [],
            fast_functions=fast_functions,
        )
    return dsk
Example #8
0
def test_fuse_subgraphs_linear_chains_of_duplicate_deps():
    dsk = {
        'x-1': 1,
        'add-1': (add, 'x-1', 'x-1'),
        'add-2': (add, 'add-1', 'add-1'),
        'add-3': (add, 'add-2', 'add-2'),
        'add-4': (add, 'add-3', 'add-3'),
        'add-5': (add, 'add-4', 'add-4')
    }

    res = fuse(dsk, 'add-5', fuse_subgraphs=True)
    sol = with_deps({
        'add-x-1': (SubgraphCallable(
            {
                'x-1': 1,
                'add-1': (add, 'x-1', 'x-1'),
                'add-2': (add, 'add-1', 'add-1'),
                'add-3': (add, 'add-2', 'add-2'),
                'add-4': (add, 'add-3', 'add-3'),
                'add-5': (add, 'add-4', 'add-4')
            }, 'add-5', ()), ),
        'add-5':
        'add-x-1'
    })
    assert res == sol
Example #9
0
def optimize(dsk, keys, **kwargs):
    flatkeys = list(flatten(keys)) if isinstance(keys, list) else [keys]
    dsk, dependencies = cull(dsk, flatkeys)
    dsk, dependencies = fuse(dsk, keys, dependencies=dependencies,
                             ave_width=_globals.get('fuse_ave_width', 1))
    dsk, _ = cull(dsk, keys)
    return dsk
Example #10
0
def fuse2(*args, **kwargs):
    """Run both ``fuse`` and ``fuse_linear`` and compare results"""
    rv1 = fuse_linear(*args, **kwargs)
    if kwargs.get('rename_keys') is not False:
        return rv1
    rv2 = fuse(*args, **kwargs)
    assert rv1 == rv2
    return rv1
Example #11
0
def test_fuse_config():
    with dask.config.set({"optimization.fuse.active": False}):
        d = {
            "a": 1,
            "b": (inc, "a"),
        }
        dependencies = {"b": ("a", )}
        assert fuse(d, "b", dependencies=dependencies) == (d, dependencies)
Example #12
0
def fuse2(*args, **kwargs):
    """Run both ``fuse`` and ``fuse_linear`` and compare results"""
    rv1 = fuse_linear(*args, **kwargs)
    if kwargs.get("rename_keys") is not False:
        return rv1
    rv2 = fuse(*args, **kwargs)
    assert rv1 == rv2
    return rv1
Example #13
0
def fuse_delayed(tasks: dask.delayed) -> dask.delayed:
    """
    Apply task fusion optimization to tasks. Useful (or even required)
    because dask.delayed optimization doesn't do this step.
    """
    dsk_fused, deps = fuse(dask.utils.ensure_dict(tasks.dask))
    fused = Delayed(tasks._key, dsk_fused)
    return fused
Example #14
0
def test_fuse_keys():
    fuse = fuse2  # tests both `fuse` and `fuse_linear`
    d = {
        'a': 1,
        'b': (inc, 'a'),
        'c': (inc, 'b'),
    }
    keys = ['b']
    assert fuse(d, keys, rename_keys=False) == with_deps({
        'b': (inc, 1),
        'c': (inc, 'b'),
    })
    assert fuse(d, keys, rename_keys=True) == with_deps({
        'a-b': (inc, 1),
        'c': (inc, 'a-b'),
        'b': 'a-b',
    })

    d = {
        'w': (inc, 'x'),
        'x': (inc, 'y'),
        'y': (inc, 'z'),
        'z': (add, 'a', 'b'),
        'a': 1,
        'b': 2,
    }
    keys = ['x', 'z']
    assert fuse(d, keys, rename_keys=False) == with_deps({
        'w': (inc, 'x'),
        'x': (inc, (inc, 'z')),
        'z': (add, 'a', 'b'),
        'a': 1,
        'b': 2,
    })
    assert fuse(d, keys, rename_keys=True) == with_deps({
        'w': (inc, 'y-x'),
        'y-x': (inc, (inc, 'z')),
        'z': (add, 'a', 'b'),
        'a':
        1,
        'b':
        2,
        'x':
        'y-x',
    })
Example #15
0
def test_optimize_slicing():
    dsk = {'a': (range, 10),
           'b': (getter, 'a', (slice(None, None, None),)),
           'c': (getter, 'b', (slice(None, None, None),)),
           'd': (getter, 'c', (slice(0, 5, None),)),
           'e': (getter, 'd', (slice(None, None, None),))}

    expected = {'e': (getter, (range, 10), (slice(0, 5, None),))}
    result = optimize_slices(fuse(dsk, [], rename_keys=False)[0])
    assert result == expected

    # protect output keys
    expected = {'c': (getter, (range, 10), (slice(0, None, None),)),
                'd': (getter, 'c', (slice(0, 5, None),)),
                'e': (getter, 'd', (slice(None, None, None),))}
    result = optimize_slices(fuse(dsk, ['c', 'd', 'e'], rename_keys=False)[0])

    assert result == expected
Example #16
0
def test_optimize_slicing():
    dsk = {'a': (range, 10),
           'b': (getter, 'a', (slice(None, None, None),)),
           'c': (getter, 'b', (slice(None, None, None),)),
           'd': (getter, 'c', (slice(0, 5, None),)),
           'e': (getter, 'd', (slice(None, None, None),))}

    expected = {'e': (getter, (range, 10), (slice(0, 5, None),))}
    result = optimize_slices(fuse(dsk, [], rename_keys=False)[0])
    assert result == expected

    # protect output keys
    expected = {'c': (getter, (range, 10), (slice(0, None, None),)),
                'd': (getter, 'c', (slice(0, 5, None),)),
                'e': (getter, 'd', (slice(None, None, None),))}
    result = optimize_slices(fuse(dsk, ['c', 'd', 'e'], rename_keys=False)[0])

    assert result == expected
Example #17
0
def test_dont_fuse_numpy_arrays():
    """
    Some types should stay in the graph bare

    This helps with things like serialization
    """
    np = pytest.importorskip("numpy")
    dsk = {"x": np.arange(5), "y": (inc, "x")}

    assert fuse(dsk, "y")[0] == dsk
Example #18
0
def optimize(
    dsk,
    keys,
    fuse_keys=None,
    fast_functions=None,
    inline_functions_fast_functions=(getter_inline,),
    rename_fused_keys=True,
    **kwargs,
):
    """Optimize dask for array computation

    1.  Cull tasks not necessary to evaluate keys
    2.  Remove full slicing, e.g. x[:]
    3.  Inline fast functions like getitem and np.transpose
    """
    if not isinstance(keys, (list, set)):
        keys = [keys]
    keys = list(flatten(keys))

    if not isinstance(dsk, HighLevelGraph):
        dsk = HighLevelGraph.from_collections(id(dsk), dsk, dependencies=())

    dsk = optimize_blockwise(dsk, keys=keys)
    dsk = fuse_roots(dsk, keys=keys)
    dsk = dsk.cull(set(keys))

    # Perform low-level fusion unless the user has
    # specified False explicitly.
    if config.get("optimization.fuse.active") is False:
        return dsk

    dependencies = dsk.get_all_dependencies()
    dsk = ensure_dict(dsk)

    # Low level task optimizations
    if fast_functions is not None:
        inline_functions_fast_functions = fast_functions

    hold = hold_keys(dsk, dependencies)

    dsk, dependencies = fuse(
        dsk,
        hold + keys + (fuse_keys or []),
        dependencies,
        rename_keys=rename_fused_keys,
    )
    if inline_functions_fast_functions:
        dsk = inline_functions(
            dsk,
            keys,
            dependencies=dependencies,
            fast_functions=inline_functions_fast_functions,
        )

    return optimize_slices(dsk)
Example #19
0
def test_fuse_keys():
    fuse = fuse2  # tests both `fuse` and `fuse_linear`
    d = {
        'a': 1,
        'b': (inc, 'a'),
        'c': (inc, 'b'),
    }
    keys = ['b']
    assert fuse(d, keys, rename_keys=False) == with_deps({
        'b': (inc, 1),
        'c': (inc, 'b'),
    })
    assert fuse(d, keys, rename_keys=True) == with_deps({
        'a-b': (inc, 1),
        'c': (inc, 'a-b'),
        'b': 'a-b',
    })

    d = {
        'w': (inc, 'x'),
        'x': (inc, 'y'),
        'y': (inc, 'z'),
        'z': (add, 'a', 'b'),
        'a': 1,
        'b': 2,
    }
    keys = ['x', 'z']
    assert fuse(d, keys, rename_keys=False) == with_deps({
        'w': (inc, 'x'),
        'x': (inc, (inc, 'z')),
        'z': (add, 'a', 'b'),
        'a': 1,
        'b': 2 ,
    })
    assert fuse(d, keys, rename_keys=True) == with_deps({
        'w': (inc, 'y-x'),
        'y-x': (inc, (inc, 'z')),
        'z': (add, 'a', 'b'),
        'a': 1,
        'b': 2 ,
        'x': 'y-x',
    })
def test_optimize_slicing():
    dsk = {
        "a": (range, 10),
        "b": (getter, "a", (slice(None, None, None), )),
        "c": (getter, "b", (slice(None, None, None), )),
        "d": (getter, "c", (slice(0, 5, None), )),
        "e": (getter, "d", (slice(None, None, None), )),
    }

    expected = {"e": (getter, (range, 10), (slice(0, 5, None), ))}
    result = optimize_slices(fuse(dsk, [], rename_keys=False)[0])
    assert result == expected

    # protect output keys
    expected = {
        "c": (getter, (range, 10), (slice(0, None, None), )),
        "d": (getter, "c", (slice(0, 5, None), )),
        "e": (getter, "d", (slice(None, None, None), )),
    }
    result = optimize_slices(fuse(dsk, ["c", "d", "e"], rename_keys=False)[0])

    assert result == expected
Example #21
0
def test_fuse_subgraphs_linear_chains_of_duplicate_deps():
    dsk = {'x-1': 1,
           'add-1': (add, 'x-1', 'x-1'),
           'add-2': (add, 'add-1', 'add-1'),
           'add-3': (add, 'add-2', 'add-2'),
           'add-4': (add, 'add-3', 'add-3'),
           'add-5': (add, 'add-4', 'add-4')}

    res = fuse(dsk, 'add-5', fuse_subgraphs=True)
    sol = with_deps({
        'add-x-1': (
            SubgraphCallable({
                'x-1': 1,
                'add-1': (add, 'x-1', 'x-1'),
                'add-2': (add, 'add-1', 'add-1'),
                'add-3': (add, 'add-2', 'add-2'),
                'add-4': (add, 'add-3', 'add-3'),
                'add-5': (add, 'add-4', 'add-4')
            }, 'add-5', ()),),
        'add-5': 'add-x-1'
    })
    assert res == sol
Example #22
0
def test_fuse():
    fuse = fuse2  # tests both `fuse` and `fuse_linear`
    d = {
        "w": (inc, "x"),
        "x": (inc, "y"),
        "y": (inc, "z"),
        "z": (add, "a", "b"),
        "a": 1,
        "b": 2,
    }
    assert fuse(d, rename_keys=False) == with_deps({
        "w": (inc, (inc, (inc, (add, "a", "b")))),
        "a":
        1,
        "b":
        2
    })
    assert fuse(d, rename_keys=True) == with_deps({
        "z-y-x-w": (inc, (inc, (inc, (add, "a", "b")))),
        "a":
        1,
        "b":
        2,
        "w":
        "z-y-x-w",
    })

    d = {
        "NEW": (inc, "y"),
        "w": (inc, "x"),
        "x": (inc, "y"),
        "y": (inc, "z"),
        "z": (add, "a", "b"),
        "a": 1,
        "b": 2,
    }
    assert fuse(d, rename_keys=False) == with_deps({
        "NEW": (inc, "y"),
        "w": (inc, (inc, "y")),
        "y": (inc, (add, "a", "b")),
        "a": 1,
        "b": 2,
    })
    assert fuse(d, rename_keys=True) == with_deps({
        "NEW": (inc, "z-y"),
        "x-w": (inc, (inc, "z-y")),
        "z-y": (inc, (add, "a", "b")),
        "a": 1,
        "b": 2,
        "w": "x-w",
        "y": "z-y",
    })

    d = {
        "v": (inc, "y"),
        "u": (inc, "w"),
        "w": (inc, "x"),
        "x": (inc, "y"),
        "y": (inc, "z"),
        "z": (add, "a", "b"),
        "a": (inc, "c"),
        "b": (inc, "d"),
        "c": 1,
        "d": 2,
    }
    assert fuse(d, rename_keys=False) == with_deps({
        "u": (inc, (inc, (inc, "y"))),
        "v": (inc, "y"),
        "y": (inc, (add, "a", "b")),
        "a": (inc, 1),
        "b": (inc, 2),
    })
    assert fuse(d, rename_keys=True) == with_deps({
        "x-w-u": (inc, (inc, (inc, "z-y"))),
        "v": (inc, "z-y"),
        "z-y": (inc, (add, "c-a", "d-b")),
        "c-a": (inc, 1),
        "d-b": (inc, 2),
        "a":
        "c-a",
        "b":
        "d-b",
        "u":
        "x-w-u",
        "y":
        "z-y",
    })

    d = {
        "a": (inc, "x"),
        "b": (inc, "x"),
        "c": (inc, "x"),
        "d": (inc, "c"),
        "x": (inc, "y"),
        "y": 0,
    }
    assert fuse(d, rename_keys=False) == with_deps({
        "a": (inc, "x"),
        "b": (inc, "x"),
        "d": (inc, (inc, "x")),
        "x": (inc, 0)
    })
    assert fuse(d, rename_keys=True) == with_deps({
        "a": (inc, "y-x"),
        "b": (inc, "y-x"),
        "c-d": (inc, (inc, "y-x")),
        "y-x": (inc, 0),
        "d": "c-d",
        "x": "y-x",
    })

    d = {"a": 1, "b": (inc, "a"), "c": (add, "b", "b")}
    assert fuse(d, rename_keys=False) == with_deps({
        "b": (inc, 1),
        "c": (add, "b", "b")
    })
    assert fuse(d, rename_keys=True) == with_deps({
        "a-b": (inc, 1),
        "c": (add, "a-b", "a-b"),
        "b": "a-b"
    })
Example #23
0
 def time_fuse(self, kind):
     fuse(self.dsk, self.keys, self.deps, **self.extra_kwargs)
Example #24
0
def test_fuse_reductions_single_input():
    def f(*args):
        return args

    d = {"a": 1, "b1": (f, "a"), "b2": (f, "a", "a"), "c": (f, "b1", "b2")}
    assert fuse(d, ave_width=1.9, rename_keys=False) == with_deps(d)
    assert fuse(d, ave_width=1.9, rename_keys=True) == with_deps(d)
    assert fuse(d, ave_width=2, rename_keys=False) == with_deps({
        "a":
        1,
        "c": (f, (f, "a"), (f, "a", "a"))
    })
    assert fuse(d, ave_width=2, rename_keys=True) == with_deps({
        "a":
        1,
        "b1-b2-c": (f, (f, "a"), (f, "a", "a")),
        "c":
        "b1-b2-c"
    })

    d = {
        "a": 1,
        "b1": (f, "a"),
        "b2": (f, "a", "a"),
        "b3": (f, "a", "a", "a"),
        "c": (f, "b1", "b2", "b3"),
    }
    assert fuse(d, ave_width=2.9, rename_keys=False) == with_deps(d)
    assert fuse(d, ave_width=2.9, rename_keys=True) == with_deps(d)
    assert fuse(d, ave_width=3, rename_keys=False) == with_deps({
        "a":
        1,
        "c": (f, (f, "a"), (f, "a", "a"), (f, "a", "a", "a"))
    })
    assert fuse(d, ave_width=3, rename_keys=True) == with_deps({
        "a":
        1,
        "b1-b2-b3-c": (f, (f, "a"), (f, "a", "a"), (f, "a", "a", "a")),
        "c":
        "b1-b2-b3-c",
    })

    d = {"a": 1, "b1": (f, "a"), "b2": (f, "a"), "c": (f, "a", "b1", "b2")}
    assert fuse(d, ave_width=1.9, rename_keys=False) == with_deps(d)
    assert fuse(d, ave_width=1.9, rename_keys=True) == with_deps(d)
    assert fuse(d, ave_width=2, rename_keys=False) == with_deps({
        "a":
        1,
        "c": (f, "a", (f, "a"), (f, "a"))
    })
    assert fuse(d, ave_width=2, rename_keys=True) == with_deps({
        "a":
        1,
        "b1-b2-c": (f, "a", (f, "a"), (f, "a")),
        "c":
        "b1-b2-c"
    })

    d = {
        "a": 1,
        "b1": (f, "a"),
        "b2": (f, "a"),
        "c": (f, "b1", "b2"),
        "d1": (f, "c"),
        "d2": (f, "c"),
        "e": (f, "d1", "d2"),
    }
    assert fuse(d, ave_width=1.9, rename_keys=False) == with_deps(d)
    assert fuse(d, ave_width=1.9, rename_keys=True) == with_deps(d)
    assert fuse(d, ave_width=2, rename_keys=False) == with_deps({
        "a":
        1,
        "c": (f, (f, "a"), (f, "a")),
        "e": (f, (f, "c"), (f, "c"))
    })
    assert fuse(d, ave_width=2, rename_keys=True) == with_deps({
        "a":
        1,
        "b1-b2-c": (f, (f, "a"), (f, "a")),
        "d1-d2-e": (f, (f, "c"), (f, "c")),
        "c":
        "b1-b2-c",
        "e":
        "d1-d2-e",
    })

    d = {
        "a": 1,
        "b1": (f, "a"),
        "b2": (f, "a"),
        "b3": (f, "a"),
        "b4": (f, "a"),
        "c1": (f, "b1", "b2"),
        "c2": (f, "b3", "b4"),
        "d": (f, "c1", "c2"),
    }
    assert fuse(d, ave_width=1.9, rename_keys=False) == with_deps(d)
    assert fuse(d, ave_width=1.9, rename_keys=True) == with_deps(d)
    expected = with_deps({
        "a": 1,
        "c1": (f, (f, "a"), (f, "a")),
        "c2": (f, (f, "a"), (f, "a")),
        "d": (f, "c1", "c2"),
    })
    assert fuse(d, ave_width=2, rename_keys=False) == expected
    assert fuse(d, ave_width=2.9, rename_keys=False) == expected
    expected = with_deps({
        "a": 1,
        "b1-b2-c1": (f, (f, "a"), (f, "a")),
        "b3-b4-c2": (f, (f, "a"), (f, "a")),
        "d": (f, "c1", "c2"),
        "c1": "b1-b2-c1",
        "c2": "b3-b4-c2",
    })
    assert fuse(d, ave_width=2, rename_keys=True) == expected
    assert fuse(d, ave_width=2.9, rename_keys=True) == expected
    assert fuse(d, ave_width=3, rename_keys=False) == with_deps({
        "a":
        1,
        "d": (f, (f, (f, "a"), (f, "a")), (f, (f, "a"), (f, "a")))
    })
    assert fuse(d, ave_width=3, rename_keys=True) == with_deps({
        "a":
        1,
        "b1-b2-b3-b4-c1-c2-d": (
            f,
            (f, (f, "a"), (f, "a")),
            (f, (f, "a"), (f, "a")),
        ),
        "d":
        "b1-b2-b3-b4-c1-c2-d",
    })

    d = {
        "a": 1,
        "b1": (f, "a"),
        "b2": (f, "a"),
        "b3": (f, "a"),
        "b4": (f, "a"),
        "b5": (f, "a"),
        "b6": (f, "a"),
        "b7": (f, "a"),
        "b8": (f, "a"),
        "c1": (f, "b1", "b2"),
        "c2": (f, "b3", "b4"),
        "c3": (f, "b5", "b6"),
        "c4": (f, "b7", "b8"),
        "d1": (f, "c1", "c2"),
        "d2": (f, "c3", "c4"),
        "e": (f, "d1", "d2"),
    }
    assert fuse(d, ave_width=1.9, rename_keys=False) == with_deps(d)
    assert fuse(d, ave_width=1.9, rename_keys=True) == with_deps(d)
    expected = with_deps({
        "a": 1,
        "c1": (f, (f, "a"), (f, "a")),
        "c2": (f, (f, "a"), (f, "a")),
        "c3": (f, (f, "a"), (f, "a")),
        "c4": (f, (f, "a"), (f, "a")),
        "d1": (f, "c1", "c2"),
        "d2": (f, "c3", "c4"),
        "e": (f, "d1", "d2"),
    })
    assert fuse(d, ave_width=2, rename_keys=False) == expected
    assert fuse(d, ave_width=2.9, rename_keys=False) == expected
    expected = with_deps({
        "a": 1,
        "b1-b2-c1": (f, (f, "a"), (f, "a")),
        "b3-b4-c2": (f, (f, "a"), (f, "a")),
        "b5-b6-c3": (f, (f, "a"), (f, "a")),
        "b7-b8-c4": (f, (f, "a"), (f, "a")),
        "d1": (f, "c1", "c2"),
        "d2": (f, "c3", "c4"),
        "e": (f, "d1", "d2"),
        "c1": "b1-b2-c1",
        "c2": "b3-b4-c2",
        "c3": "b5-b6-c3",
        "c4": "b7-b8-c4",
    })
    assert fuse(d, ave_width=2, rename_keys=True) == expected
    assert fuse(d, ave_width=2.9, rename_keys=True) == expected
    expected = with_deps({
        "a":
        1,
        "d1": (f, (f, (f, "a"), (f, "a")), (f, (f, "a"), (f, "a"))),
        "d2": (f, (f, (f, "a"), (f, "a")), (f, (f, "a"), (f, "a"))),
        "e": (f, "d1", "d2"),
    })
    assert fuse(d, ave_width=3, rename_keys=False) == expected
    assert fuse(d, ave_width=4.6, rename_keys=False) == expected
    expected = with_deps({
        "a":
        1,
        "b1-b2-b3-b4-c1-c2-d1": (
            f,
            (f, (f, "a"), (f, "a")),
            (f, (f, "a"), (f, "a")),
        ),
        "b5-b6-b7-b8-c3-c4-d2": (
            f,
            (f, (f, "a"), (f, "a")),
            (f, (f, "a"), (f, "a")),
        ),
        "e": (f, "d1", "d2"),
        "d1":
        "b1-b2-b3-b4-c1-c2-d1",
        "d2":
        "b5-b6-b7-b8-c3-c4-d2",
    })
    assert fuse(d, ave_width=3, rename_keys=True) == expected
    assert fuse(d, ave_width=4.6, rename_keys=True) == expected
    assert fuse(d, ave_width=4.7, rename_keys=False) == with_deps({
        "a":
        1,
        "e": (
            f,
            (f, (f, (f, "a"), (f, "a")), (f, (f, "a"), (f, "a"))),
            (f, (f, (f, "a"), (f, "a")), (f, (f, "a"), (f, "a"))),
        ),
    })
    assert fuse(d, ave_width=4.7, rename_keys=True) == with_deps({
        "a":
        1,
        "b1-b2-b3-b4-b5-b6-b7-b8-c1-c2-c3-c4-d1-d2-e": (
            f,
            (f, (f, (f, "a"), (f, "a")), (f, (f, "a"), (f, "a"))),
            (f, (f, (f, "a"), (f, "a")), (f, (f, "a"), (f, "a"))),
        ),
        "e":
        "b1-b2-b3-b4-b5-b6-b7-b8-c1-c2-c3-c4-d1-d2-e",
    })

    d = {
        "a": 1,
        "b1": (f, "a"),
        "b2": (f, "a"),
        "b3": (f, "a"),
        "b4": (f, "a"),
        "b5": (f, "a"),
        "b6": (f, "a"),
        "b7": (f, "a"),
        "b8": (f, "a"),
        "b9": (f, "a"),
        "b10": (f, "a"),
        "b11": (f, "a"),
        "b12": (f, "a"),
        "b13": (f, "a"),
        "b14": (f, "a"),
        "b15": (f, "a"),
        "b16": (f, "a"),
        "c1": (f, "b1", "b2"),
        "c2": (f, "b3", "b4"),
        "c3": (f, "b5", "b6"),
        "c4": (f, "b7", "b8"),
        "c5": (f, "b9", "b10"),
        "c6": (f, "b11", "b12"),
        "c7": (f, "b13", "b14"),
        "c8": (f, "b15", "b16"),
        "d1": (f, "c1", "c2"),
        "d2": (f, "c3", "c4"),
        "d3": (f, "c5", "c6"),
        "d4": (f, "c7", "c8"),
        "e1": (f, "d1", "d2"),
        "e2": (f, "d3", "d4"),
        "f": (f, "e1", "e2"),
    }
    assert fuse(d, ave_width=1.9, rename_keys=False) == with_deps(d)
    assert fuse(d, ave_width=1.9, rename_keys=True) == with_deps(d)
    expected = with_deps({
        "a": 1,
        "c1": (f, (f, "a"), (f, "a")),
        "c2": (f, (f, "a"), (f, "a")),
        "c3": (f, (f, "a"), (f, "a")),
        "c4": (f, (f, "a"), (f, "a")),
        "c5": (f, (f, "a"), (f, "a")),
        "c6": (f, (f, "a"), (f, "a")),
        "c7": (f, (f, "a"), (f, "a")),
        "c8": (f, (f, "a"), (f, "a")),
        "d1": (f, "c1", "c2"),
        "d2": (f, "c3", "c4"),
        "d3": (f, "c5", "c6"),
        "d4": (f, "c7", "c8"),
        "e1": (f, "d1", "d2"),
        "e2": (f, "d3", "d4"),
        "f": (f, "e1", "e2"),
    })
    assert fuse(d, ave_width=2, rename_keys=False) == expected
    assert fuse(d, ave_width=2.9, rename_keys=False) == expected
    expected = with_deps({
        "a": 1,
        "b1-b2-c1": (f, (f, "a"), (f, "a")),
        "b3-b4-c2": (f, (f, "a"), (f, "a")),
        "b5-b6-c3": (f, (f, "a"), (f, "a")),
        "b7-b8-c4": (f, (f, "a"), (f, "a")),
        "b10-b9-c5": (f, (f, "a"), (f, "a")),
        "b11-b12-c6": (f, (f, "a"), (f, "a")),
        "b13-b14-c7": (f, (f, "a"), (f, "a")),
        "b15-b16-c8": (f, (f, "a"), (f, "a")),
        "d1": (f, "c1", "c2"),
        "d2": (f, "c3", "c4"),
        "d3": (f, "c5", "c6"),
        "d4": (f, "c7", "c8"),
        "e1": (f, "d1", "d2"),
        "e2": (f, "d3", "d4"),
        "f": (f, "e1", "e2"),
        "c1": "b1-b2-c1",
        "c2": "b3-b4-c2",
        "c3": "b5-b6-c3",
        "c4": "b7-b8-c4",
        "c5": "b10-b9-c5",
        "c6": "b11-b12-c6",
        "c7": "b13-b14-c7",
        "c8": "b15-b16-c8",
    })
    assert fuse(d, ave_width=2, rename_keys=True) == expected
    assert fuse(d, ave_width=2.9, rename_keys=True) == expected
    expected = with_deps({
        "a":
        1,
        "d1": (f, (f, (f, "a"), (f, "a")), (f, (f, "a"), (f, "a"))),
        "d2": (f, (f, (f, "a"), (f, "a")), (f, (f, "a"), (f, "a"))),
        "d3": (f, (f, (f, "a"), (f, "a")), (f, (f, "a"), (f, "a"))),
        "d4": (f, (f, (f, "a"), (f, "a")), (f, (f, "a"), (f, "a"))),
        "e1": (f, "d1", "d2"),
        "e2": (f, "d3", "d4"),
        "f": (f, "e1", "e2"),
    })
    assert fuse(d, ave_width=3, rename_keys=False) == expected
    assert fuse(d, ave_width=4.6, rename_keys=False) == expected
    expected = with_deps({
        "a":
        1,
        "b1-b2-b3-b4-c1-c2-d1": (
            f,
            (f, (f, "a"), (f, "a")),
            (f, (f, "a"), (f, "a")),
        ),
        "b5-b6-b7-b8-c3-c4-d2": (
            f,
            (f, (f, "a"), (f, "a")),
            (f, (f, "a"), (f, "a")),
        ),
        "b10-b11-b12-b9-c5-c6-d3": (
            f,
            (f, (f, "a"), (f, "a")),
            (f, (f, "a"), (f, "a")),
        ),
        "b13-b14-b15-b16-c7-c8-d4": (
            f,
            (f, (f, "a"), (f, "a")),
            (f, (f, "a"), (f, "a")),
        ),
        "e1": (f, "d1", "d2"),
        "e2": (f, "d3", "d4"),
        "f": (f, "e1", "e2"),
        "d1":
        "b1-b2-b3-b4-c1-c2-d1",
        "d2":
        "b5-b6-b7-b8-c3-c4-d2",
        "d3":
        "b10-b11-b12-b9-c5-c6-d3",
        "d4":
        "b13-b14-b15-b16-c7-c8-d4",
    })
    assert fuse(d, ave_width=3, rename_keys=True) == expected
    assert fuse(d, ave_width=4.6, rename_keys=True) == expected
    expected = with_deps({
        "a":
        1,
        "e1": (
            f,
            (f, (f, (f, "a"), (f, "a")), (f, (f, "a"), (f, "a"))),
            (f, (f, (f, "a"), (f, "a")), (f, (f, "a"), (f, "a"))),
        ),
        "e2": (
            f,
            (f, (f, (f, "a"), (f, "a")), (f, (f, "a"), (f, "a"))),
            (f, (f, (f, "a"), (f, "a")), (f, (f, "a"), (f, "a"))),
        ),
        "f": (f, "e1", "e2"),
    })
    assert fuse(d, ave_width=4.7, rename_keys=False) == expected
    assert fuse(d, ave_width=7.4, rename_keys=False) == expected
    expected = with_deps({
        "a":
        1,
        "b1-b2-b3-b4-b5-b6-b7-b8-c1-c2-c3-c4-d1-d2-e1": (
            f,
            (f, (f, (f, "a"), (f, "a")), (f, (f, "a"), (f, "a"))),
            (f, (f, (f, "a"), (f, "a")), (f, (f, "a"), (f, "a"))),
        ),
        "b10-b11-b12-b13-b14-b15-b16-b9-c5-c6-c7-c8-d3-d4-e2": (
            f,
            (f, (f, (f, "a"), (f, "a")), (f, (f, "a"), (f, "a"))),
            (f, (f, (f, "a"), (f, "a")), (f, (f, "a"), (f, "a"))),
        ),
        "f": (f, "e1", "e2"),
        "e1":
        "b1-b2-b3-b4-b5-b6-b7-b8-c1-c2-c3-c4-d1-d2-e1",
        "e2":
        "b10-b11-b12-b13-b14-b15-b16-b9-c5-c6-c7-c8-d3-d4-e2",
    })
    assert fuse(d, ave_width=4.7, rename_keys=True) == expected
    assert fuse(d, ave_width=7.4, rename_keys=True) == expected
    assert fuse(d, ave_width=7.5, rename_keys=False) == with_deps({
        "a":
        1,
        "f": (
            f,
            (
                f,
                (f, (f, (f, "a"), (f, "a")), (f, (f, "a"), (f, "a"))),
                (f, (f, (f, "a"), (f, "a")), (f, (f, "a"), (f, "a"))),
            ),
            (
                f,
                (f, (f, (f, "a"), (f, "a")), (f, (f, "a"), (f, "a"))),
                (f, (f, (f, "a"), (f, "a")), (f, (f, "a"), (f, "a"))),
            ),
        ),
    })
    assert fuse(d, ave_width=7.5, rename_keys=True) == with_deps({
        "a":
        1,
        "b1-b10-b11-b12-b13-b14-b15-b16-b2-b3-b4-b5-b6-b7-b8-b9-c1-c2-c3-c4-c5-c6-c7-c8-d1-d2-d3-d4-e1-e2-f":
        (
            f,
            (
                f,
                (f, (f, (f, "a"), (f, "a")), (f, (f, "a"), (f, "a"))),
                (f, (f, (f, "a"), (f, "a")), (f, (f, "a"), (f, "a"))),
            ),
            (
                f,
                (f, (f, (f, "a"), (f, "a")), (f, (f, "a"), (f, "a"))),
                (f, (f, (f, "a"), (f, "a")), (f, (f, "a"), (f, "a"))),
            ),
        ),
        "f":
        "b1-b10-b11-b12-b13-b14-b15-b16-b2-b3-b4-b5-b6-b7-b8-b9-c1-c2-c3-c4-c5-c6-c7-c8-d1-d2-d3-d4-e1-e2-f",
    })

    d = {"a": 1, "b": (f, "a")}
    assert fuse(d, ave_width=1, rename_keys=False) == with_deps({"b": (f, 1)})
    assert fuse(d, ave_width=1, rename_keys=True) == with_deps({
        "a-b": (f, 1),
        "b": "a-b"
    })

    d = {"a": 1, "b": (f, "a"), "c": (f, "b"), "d": (f, "c")}
    assert fuse(d, ave_width=1,
                rename_keys=False) == with_deps({"d": (f, (f, (f, 1)))})
    assert fuse(d, ave_width=1, rename_keys=True) == with_deps({
        "a-b-c-d": (f, (f, (f, 1))),
        "d":
        "a-b-c-d"
    })

    d = {"a": 1, "b": (f, "a"), "c": (f, "a", "b"), "d": (f, "a", "c")}
    assert fuse(d, ave_width=1, rename_keys=False) == with_deps({
        "a":
        1,
        "d": (f, "a", (f, "a", (f, "a")))
    })
    assert fuse(d, ave_width=1, rename_keys=True) == with_deps({
        "a":
        1,
        "b-c-d": (f, "a", (f, "a", (f, "a"))),
        "d":
        "b-c-d"
    })

    d = {
        "a": 1,
        "b1": (f, "a"),
        "b2": (f, "a"),
        "c1": (f, "b1"),
        "d1": (f, "c1"),
        "e1": (f, "d1"),
        "f": (f, "e1", "b2"),
    }
    expected = with_deps({
        "a": 1,
        "b2": (f, "a"),
        "e1": (f, (f, (f, (f, "a")))),
        "f": (f, "e1", "b2")
    })
    assert fuse(d, ave_width=1, rename_keys=False) == expected
    assert fuse(d, ave_width=1.9, rename_keys=False) == expected
    expected = with_deps({
        "a": 1,
        "b2": (f, "a"),
        "b1-c1-d1-e1": (f, (f, (f, (f, "a")))),
        "f": (f, "e1", "b2"),
        "e1": "b1-c1-d1-e1",
    })
    assert fuse(d, ave_width=1, rename_keys=True) == expected
    assert fuse(d, ave_width=1.9, rename_keys=True) == expected
    assert fuse(d, ave_width=2, rename_keys=False) == with_deps({
        "a":
        1,
        "f": (f, (f, (f, (f, (f, "a")))), (f, "a"))
    })
    assert fuse(d, ave_width=2, rename_keys=True) == with_deps({
        "a":
        1,
        "b1-b2-c1-d1-e1-f": (f, (f, (f, (f, (f, "a")))), (f, "a")),
        "f":
        "b1-b2-c1-d1-e1-f",
    })

    d = {
        "a": 1,
        "b1": (f, "a"),
        "b2": (f, "a"),
        "c1": (f, "a", "b1"),
        "d1": (f, "a", "c1"),
        "e1": (f, "a", "d1"),
        "f": (f, "a", "e1", "b2"),
    }
    expected = with_deps({
        "a": 1,
        "b2": (f, "a"),
        "e1": (f, "a", (f, "a", (f, "a", (f, "a")))),
        "f": (f, "a", "e1", "b2"),
    })
    assert fuse(d, ave_width=1, rename_keys=False) == expected
    assert fuse(d, ave_width=1.9, rename_keys=False) == expected
    expected = with_deps({
        "a": 1,
        "b2": (f, "a"),
        "b1-c1-d1-e1": (f, "a", (f, "a", (f, "a", (f, "a")))),
        "f": (f, "a", "e1", "b2"),
        "e1": "b1-c1-d1-e1",
    })
    assert fuse(d, ave_width=1, rename_keys=True) == expected
    assert fuse(d, ave_width=1.9, rename_keys=True) == expected
    assert fuse(d, ave_width=2, rename_keys=False) == with_deps({
        "a":
        1,
        "f": (f, "a", (f, "a", (f, "a", (f, "a", (f, "a")))), (f, "a"))
    })
    assert fuse(d, ave_width=2, rename_keys=True) == with_deps({
        "a":
        1,
        "b1-b2-c1-d1-e1-f": (
            f,
            "a",
            (f, "a", (f, "a", (f, "a", (f, "a")))),
            (f, "a"),
        ),
        "f":
        "b1-b2-c1-d1-e1-f",
    })

    d = {
        "a": 1,
        "b1": (f, "a"),
        "b2": (f, "a"),
        "b3": (f, "a"),
        "c1": (f, "b1"),
        "c2": (f, "b2"),
        "c3": (f, "b3"),
        "d1": (f, "c1"),
        "d2": (f, "c2"),
        "d3": (f, "c3"),
        "e": (f, "d1", "d2", "d3"),
        "f": (f, "e"),
        "g": (f, "f"),
    }
    assert fuse(d, ave_width=1, rename_keys=False) == with_deps({
        "a":
        1,
        "d1": (f, (f, (f, "a"))),
        "d2": (f, (f, (f, "a"))),
        "d3": (f, (f, (f, "a"))),
        "g": (f, (f, (f, "d1", "d2", "d3"))),
    })
    assert fuse(d, ave_width=1, rename_keys=True) == with_deps({
        "a":
        1,
        "b1-c1-d1": (f, (f, (f, "a"))),
        "b2-c2-d2": (f, (f, (f, "a"))),
        "b3-c3-d3": (f, (f, (f, "a"))),
        "e-f-g": (f, (f, (f, "d1", "d2", "d3"))),
        "d1":
        "b1-c1-d1",
        "d2":
        "b2-c2-d2",
        "d3":
        "b3-c3-d3",
        "g":
        "e-f-g",
    })

    d = {
        "a": 1,
        "b": (f, "a"),
        "c": (f, "b"),
        "d": (f, "b", "c"),
        "e": (f, "d"),
        "f": (f, "e"),
        "g": (f, "d", "f"),
    }
    assert fuse(d, ave_width=1, rename_keys=False) == with_deps({
        "b": (f, 1),
        "d": (f, "b", (f, "b")),
        "g": (f, "d", (f, (f, "d")))
    })
    assert fuse(d, ave_width=1, rename_keys=True) == with_deps({
        "a-b": (f, 1),
        "c-d": (f, "b", (f, "b")),
        "e-f-g": (f, "d", (f, (f, "d"))),
        "b":
        "a-b",
        "d":
        "c-d",
        "g":
        "e-f-g",
    })
Example #25
0
def _rechunk_array(
    source_array,
    target_chunks,
    max_mem,
    target_store_or_group,
    temp_store_or_group=None,
    name=None,
    source_storage_options={},
    temp_storage_options={},
    target_storage_options={},
):

    shape = source_array.shape
    source_chunks = source_array.chunks
    dtype = source_array.dtype
    itemsize = dtype.itemsize

    if target_chunks is None:
        # this is just a pass-through copy
        target_chunks = source_chunks

    if isinstance(target_chunks, dict):
        array_dims = _get_dims_from_zarr_array(source_array)
        try:
            target_chunks = _shape_dict_to_tuple(array_dims, target_chunks)
        except KeyError:
            raise KeyError(
                "You must explicitly specify each dimension size in target_chunks. "
                f"Got array_dims {array_dims}, target_chunks {target_chunks}.")

    read_chunks, int_chunks, write_chunks = rechunking_plan(
        shape, source_chunks, target_chunks, itemsize, max_mem)

    print(source_chunks, read_chunks, int_chunks, write_chunks, target_chunks)

    source_read = dsa.from_zarr(source_array,
                                chunks=read_chunks,
                                storage_options=source_storage_options)

    # create target
    shape = tuple(int(x)
                  for x in shape)  # ensure python ints for serialization
    target_chunks = tuple(int(x) for x in target_chunks)
    int_chunks = tuple(int(x) for x in int_chunks)
    write_chunks = tuple(int(x) for x in write_chunks)

    target_array = _zarr_empty(shape,
                               target_store_or_group,
                               target_chunks,
                               dtype,
                               name=name)
    target_array.attrs.update(source_array.attrs)

    if read_chunks == write_chunks:
        target_store_delayed = dsa.store(source_read,
                                         target_array,
                                         lock=False,
                                         compute=False)
        return target_store_delayed

    else:
        # do intermediate store
        assert temp_store_or_group is not None
        int_array = _zarr_empty(shape,
                                temp_store_or_group,
                                int_chunks,
                                dtype,
                                name=name)
        intermediate_store_delayed = dsa.store(source_read,
                                               int_array,
                                               lock=False,
                                               compute=False)

        int_read = dsa.from_zarr(int_array,
                                 chunks=write_chunks,
                                 storage_options=temp_storage_options)
        target_store_delayed = dsa.store(int_read,
                                         target_array,
                                         lock=False,
                                         compute=False)

        # now do some hacking to chain these together into a single graph.
        # get the two graphs as dicts
        int_dsk = dask.utils.ensure_dict(intermediate_store_delayed.dask)
        target_dsk = dask.utils.ensure_dict(target_store_delayed.dask)

        # find the root store key representing the read
        root_keys = []
        for key in target_dsk:
            if isinstance(key, str):
                if key.startswith("from-zarr"):
                    root_keys.append(key)
        assert len(root_keys) == 1
        root_key = root_keys[0]

        # now rewrite the graph
        target_dsk[root_key] = (
            lambda a, *b: a,
            target_dsk[root_key],
            *int_dsk[intermediate_store_delayed.key],
        )
        target_dsk.update(int_dsk)

        # fuse
        dsk_fused, deps = fuse(target_dsk)
        delayed_fused = Delayed(target_store_delayed.key, dsk_fused)

        print("Two step rechunking plan")
        return delayed_fused
Example #26
0
def test_fuse():
    fuse = fuse2  # tests both `fuse` and `fuse_linear`
    d = {
        'w': (inc, 'x'),
        'x': (inc, 'y'),
        'y': (inc, 'z'),
        'z': (add, 'a', 'b'),
        'a': 1,
        'b': 2,
    }
    assert fuse(d, rename_keys=False) == with_deps({
        'w': (inc, (inc, (inc, (add, 'a', 'b')))),
        'a': 1,
        'b': 2,
    })
    assert fuse(d, rename_keys=True) == with_deps({
        'z-y-x-w': (inc, (inc, (inc, (add, 'a', 'b')))),
        'a': 1,
        'b': 2,
        'w': 'z-y-x-w',
    })

    d = {
        'NEW': (inc, 'y'),
        'w': (inc, 'x'),
        'x': (inc, 'y'),
        'y': (inc, 'z'),
        'z': (add, 'a', 'b'),
        'a': 1,
        'b': 2,
    }
    assert fuse(d, rename_keys=False) == with_deps({
        'NEW': (inc, 'y'),
        'w': (inc, (inc, 'y')),
        'y': (inc, (add, 'a', 'b')),
        'a': 1,
        'b': 2,
    })
    assert fuse(d, rename_keys=True) == with_deps({
        'NEW': (inc, 'z-y'),
        'x-w': (inc, (inc, 'z-y')),
        'z-y': (inc, (add, 'a', 'b')),
        'a': 1,
        'b': 2,
        'w': 'x-w',
        'y': 'z-y',
    })

    d = {
        'v': (inc, 'y'),
        'u': (inc, 'w'),
        'w': (inc, 'x'),
        'x': (inc, 'y'),
        'y': (inc, 'z'),
        'z': (add, 'a', 'b'),
        'a': (inc, 'c'),
        'b': (inc, 'd'),
        'c': 1,
        'd': 2,
    }
    assert fuse(d, rename_keys=False) == with_deps({
        'u': (inc, (inc, (inc, 'y'))),
        'v': (inc, 'y'),
        'y': (inc, (add, 'a', 'b')),
        'a': (inc, 1),
        'b': (inc, 2),
    })
    assert fuse(d, rename_keys=True) == with_deps({
        'x-w-u': (inc, (inc, (inc, 'z-y'))),
        'v': (inc, 'z-y'),
        'z-y': (inc, (add, 'c-a', 'd-b')),
        'c-a': (inc, 1),
        'd-b': (inc, 2),
        'a': 'c-a',
        'b': 'd-b',
        'u': 'x-w-u',
        'y': 'z-y',
    })

    d = {
        'a': (inc, 'x'),
        'b': (inc, 'x'),
        'c': (inc, 'x'),
        'd': (inc, 'c'),
        'x': (inc, 'y'),
        'y': 0,
    }
    assert fuse(d, rename_keys=False) == with_deps({
        'a': (inc, 'x'),
        'b': (inc, 'x'),
        'd': (inc, (inc, 'x')),
        'x': (inc, 0)
    })
    assert fuse(d, rename_keys=True) == with_deps({
        'a': (inc, 'y-x'),
        'b': (inc, 'y-x'),
        'c-d': (inc, (inc, 'y-x')),
        'y-x': (inc, 0),
        'd': 'c-d',
        'x': 'y-x',
    })

    d = {
        'a': 1,
        'b': (inc, 'a'),
        'c': (add, 'b', 'b'),
    }
    assert fuse(d, rename_keys=False) == with_deps({
        'b': (inc, 1),
        'c': (add, 'b', 'b'),
    })
    assert fuse(d, rename_keys=True) == with_deps({
        'a-b': (inc, 1),
        'c': (add, 'a-b', 'a-b'),
        'b': 'a-b',
    })
Example #27
0
def test_fuse_reductions_single_input():
    def f(*args):
        return args

    d = {
        'a': 1,
        'b1': (f, 'a'),
        'b2': (f, 'a', 'a'),
        'c': (f, 'b1', 'b2'),
    }
    assert fuse(d, ave_width=1.9, rename_keys=False) == with_deps(d)
    assert fuse(d, ave_width=1.9, rename_keys=True) == with_deps(d)
    assert fuse(d, ave_width=2, rename_keys=False) == with_deps({
        'a': 1,
        'c': (f, (f, 'a'), (f, 'a', 'a')),
    })
    assert fuse(d, ave_width=2, rename_keys=True) == with_deps({
        'a': 1,
        'b1-b2-c': (f, (f, 'a'), (f, 'a', 'a')),
        'c': 'b1-b2-c',
    })

    d = {
        'a': 1,
        'b1': (f, 'a'),
        'b2': (f, 'a', 'a'),
        'b3': (f, 'a', 'a', 'a'),
        'c': (f, 'b1', 'b2', 'b3'),
    }
    assert fuse(d, ave_width=2.9, rename_keys=False) == with_deps(d)
    assert fuse(d, ave_width=2.9, rename_keys=True) == with_deps(d)
    assert fuse(d, ave_width=3, rename_keys=False) == with_deps({
        'a': 1,
        'c': (f, (f, 'a'), (f, 'a', 'a'), (f, 'a', 'a', 'a')),
    })
    assert fuse(d, ave_width=3, rename_keys=True) == with_deps({
        'a': 1,
        'b1-b2-b3-c': (f, (f, 'a'), (f, 'a', 'a'), (f, 'a', 'a', 'a')),
        'c': 'b1-b2-b3-c',
    })

    d = {
        'a': 1,
        'b1': (f, 'a'),
        'b2': (f, 'a'),
        'c': (f, 'a', 'b1', 'b2'),
    }
    assert fuse(d, ave_width=1.9, rename_keys=False) == with_deps(d)
    assert fuse(d, ave_width=1.9, rename_keys=True) == with_deps(d)
    assert fuse(d, ave_width=2, rename_keys=False) == with_deps({
        'a': 1,
        'c': (f, 'a', (f, 'a'), (f, 'a')),
    })
    assert fuse(d, ave_width=2, rename_keys=True) == with_deps({
        'a': 1,
        'b1-b2-c': (f, 'a', (f, 'a'), (f, 'a')),
        'c': 'b1-b2-c',
    })

    d = {
        'a': 1,
        'b1': (f, 'a'),
        'b2': (f, 'a'),
        'c': (f, 'b1', 'b2'),
        'd1': (f, 'c'),
        'd2': (f, 'c'),
        'e': (f, 'd1', 'd2'),
    }
    assert fuse(d, ave_width=1.9, rename_keys=False) == with_deps(d)
    assert fuse(d, ave_width=1.9, rename_keys=True) == with_deps(d)
    assert fuse(d, ave_width=2, rename_keys=False) == with_deps({
        'a': 1,
        'c': (f, (f, 'a'), (f, 'a')),
        'e': (f, (f, 'c'), (f, 'c')),
    })
    assert fuse(d, ave_width=2, rename_keys=True) == with_deps({
        'a': 1,
        'b1-b2-c': (f, (f, 'a'), (f, 'a')),
        'd1-d2-e': (f, (f, 'c'), (f, 'c')),
        'c': 'b1-b2-c',
        'e': 'd1-d2-e',
    })

    d = {
        'a': 1,
        'b1': (f, 'a'),
        'b2': (f, 'a'),
        'b3': (f, 'a'),
        'b4': (f, 'a'),
        'c1': (f, 'b1', 'b2'),
        'c2': (f, 'b3', 'b4'),
        'd': (f, 'c1', 'c2'),
    }
    assert fuse(d, ave_width=1.9, rename_keys=False) == with_deps(d)
    assert fuse(d, ave_width=1.9, rename_keys=True) == with_deps(d)
    expected = with_deps({
        'a': 1,
        'c1': (f, (f, 'a'), (f, 'a')),
        'c2': (f, (f, 'a'), (f, 'a')),
        'd': (f, 'c1', 'c2'),
    })
    assert fuse(d, ave_width=2, rename_keys=False) == expected
    assert fuse(d, ave_width=2.9, rename_keys=False) == expected
    expected = with_deps({
        'a': 1,
        'b1-b2-c1': (f, (f, 'a'), (f, 'a')),
        'b3-b4-c2': (f, (f, 'a'), (f, 'a')),
        'd': (f, 'c1', 'c2'),
        'c1': 'b1-b2-c1',
        'c2': 'b3-b4-c2',
    })
    assert fuse(d, ave_width=2, rename_keys=True) == expected
    assert fuse(d, ave_width=2.9, rename_keys=True) == expected
    assert fuse(d, ave_width=3, rename_keys=False) == with_deps({
        'a': 1,
        'd': (f, (f, (f, 'a'), (f, 'a')), (f, (f, 'a'), (f, 'a'))),
    })
    assert fuse(d, ave_width=3, rename_keys=True) == with_deps({
        'a': 1,
        'b1-b2-b3-b4-c1-c2-d': (f, (f, (f, 'a'), (f, 'a')), (f, (f, 'a'), (f, 'a'))),
        'd': 'b1-b2-b3-b4-c1-c2-d',
    })

    d = {
        'a': 1,
        'b1': (f, 'a'),
        'b2': (f, 'a'),
        'b3': (f, 'a'),
        'b4': (f, 'a'),
        'b5': (f, 'a'),
        'b6': (f, 'a'),
        'b7': (f, 'a'),
        'b8': (f, 'a'),
        'c1': (f, 'b1', 'b2'),
        'c2': (f, 'b3', 'b4'),
        'c3': (f, 'b5', 'b6'),
        'c4': (f, 'b7', 'b8'),
        'd1': (f, 'c1', 'c2'),
        'd2': (f, 'c3', 'c4'),
        'e': (f, 'd1', 'd2'),
    }
    assert fuse(d, ave_width=1.9, rename_keys=False) == with_deps(d)
    assert fuse(d, ave_width=1.9, rename_keys=True) == with_deps(d)
    expected = with_deps({
        'a': 1,
        'c1': (f, (f, 'a'), (f, 'a')),
        'c2': (f, (f, 'a'), (f, 'a')),
        'c3': (f, (f, 'a'), (f, 'a')),
        'c4': (f, (f, 'a'), (f, 'a')),
        'd1': (f, 'c1', 'c2'),
        'd2': (f, 'c3', 'c4'),
        'e': (f, 'd1', 'd2'),
    })
    assert fuse(d, ave_width=2, rename_keys=False) == expected
    assert fuse(d, ave_width=2.9, rename_keys=False) == expected
    expected = with_deps({
        'a': 1,
        'b1-b2-c1': (f, (f, 'a'), (f, 'a')),
        'b3-b4-c2': (f, (f, 'a'), (f, 'a')),
        'b5-b6-c3': (f, (f, 'a'), (f, 'a')),
        'b7-b8-c4': (f, (f, 'a'), (f, 'a')),
        'd1': (f, 'c1', 'c2'),
        'd2': (f, 'c3', 'c4'),
        'e': (f, 'd1', 'd2'),
        'c1': 'b1-b2-c1',
        'c2': 'b3-b4-c2',
        'c3': 'b5-b6-c3',
        'c4': 'b7-b8-c4',
    })
    assert fuse(d, ave_width=2, rename_keys=True) == expected
    assert fuse(d, ave_width=2.9, rename_keys=True) == expected
    expected = with_deps({
        'a': 1,
        'd1': (f, (f, (f, 'a'), (f, 'a')), (f, (f, 'a'), (f, 'a'))),
        'd2': (f, (f, (f, 'a'), (f, 'a')), (f, (f, 'a'), (f, 'a'))),
        'e': (f, 'd1', 'd2'),
    })
    assert fuse(d, ave_width=3, rename_keys=False) == expected
    assert fuse(d, ave_width=4.6, rename_keys=False) == expected
    expected = with_deps({
        'a': 1,
        'b1-b2-b3-b4-c1-c2-d1': (f, (f, (f, 'a'), (f, 'a')), (f, (f, 'a'), (f, 'a'))),
        'b5-b6-b7-b8-c3-c4-d2': (f, (f, (f, 'a'), (f, 'a')), (f, (f, 'a'), (f, 'a'))),
        'e': (f, 'd1', 'd2'),
        'd1': 'b1-b2-b3-b4-c1-c2-d1',
        'd2': 'b5-b6-b7-b8-c3-c4-d2',

    })
    assert fuse(d, ave_width=3, rename_keys=True) == expected
    assert fuse(d, ave_width=4.6, rename_keys=True) == expected
    assert fuse(d, ave_width=4.7, rename_keys=False) == with_deps({
        'a': 1,
        'e': (f, (f, (f, (f, 'a'), (f, 'a')), (f, (f, 'a'), (f, 'a'))),
              (f, (f, (f, 'a'), (f, 'a')), (f, (f, 'a'), (f, 'a'))))
    })
    assert fuse(d, ave_width=4.7, rename_keys=True) == with_deps({
        'a': 1,
        'b1-b2-b3-b4-b5-b6-b7-b8-c1-c2-c3-c4-d1-d2-e': (
            f,
            (f, (f, (f, 'a'), (f, 'a')), (f, (f, 'a'), (f, 'a'))),
            (f, (f, (f, 'a'), (f, 'a')), (f, (f, 'a'), (f, 'a')))
        ),
        'e': 'b1-b2-b3-b4-b5-b6-b7-b8-c1-c2-c3-c4-d1-d2-e',
    })

    d = {
        'a': 1,
        'b1': (f, 'a'),
        'b2': (f, 'a'),
        'b3': (f, 'a'),
        'b4': (f, 'a'),
        'b5': (f, 'a'),
        'b6': (f, 'a'),
        'b7': (f, 'a'),
        'b8': (f, 'a'),
        'b9': (f, 'a'),
        'b10': (f, 'a'),
        'b11': (f, 'a'),
        'b12': (f, 'a'),
        'b13': (f, 'a'),
        'b14': (f, 'a'),
        'b15': (f, 'a'),
        'b16': (f, 'a'),
        'c1': (f, 'b1', 'b2'),
        'c2': (f, 'b3', 'b4'),
        'c3': (f, 'b5', 'b6'),
        'c4': (f, 'b7', 'b8'),
        'c5': (f, 'b9', 'b10'),
        'c6': (f, 'b11', 'b12'),
        'c7': (f, 'b13', 'b14'),
        'c8': (f, 'b15', 'b16'),
        'd1': (f, 'c1', 'c2'),
        'd2': (f, 'c3', 'c4'),
        'd3': (f, 'c5', 'c6'),
        'd4': (f, 'c7', 'c8'),
        'e1': (f, 'd1', 'd2'),
        'e2': (f, 'd3', 'd4'),
        'f': (f, 'e1', 'e2'),
    }
    assert fuse(d, ave_width=1.9, rename_keys=False) == with_deps(d)
    assert fuse(d, ave_width=1.9, rename_keys=True) == with_deps(d)
    expected = with_deps({
        'a': 1,
        'c1': (f, (f, 'a'), (f, 'a')),
        'c2': (f, (f, 'a'), (f, 'a')),
        'c3': (f, (f, 'a'), (f, 'a')),
        'c4': (f, (f, 'a'), (f, 'a')),
        'c5': (f, (f, 'a'), (f, 'a')),
        'c6': (f, (f, 'a'), (f, 'a')),
        'c7': (f, (f, 'a'), (f, 'a')),
        'c8': (f, (f, 'a'), (f, 'a')),
        'd1': (f, 'c1', 'c2'),
        'd2': (f, 'c3', 'c4'),
        'd3': (f, 'c5', 'c6'),
        'd4': (f, 'c7', 'c8'),
        'e1': (f, 'd1', 'd2'),
        'e2': (f, 'd3', 'd4'),
        'f': (f, 'e1', 'e2'),
    })
    assert fuse(d, ave_width=2, rename_keys=False) == expected
    assert fuse(d, ave_width=2.9, rename_keys=False) == expected
    expected = with_deps({
        'a': 1,
        'b1-b2-c1': (f, (f, 'a'), (f, 'a')),
        'b3-b4-c2': (f, (f, 'a'), (f, 'a')),
        'b5-b6-c3': (f, (f, 'a'), (f, 'a')),
        'b7-b8-c4': (f, (f, 'a'), (f, 'a')),
        'b10-b9-c5': (f, (f, 'a'), (f, 'a')),
        'b11-b12-c6': (f, (f, 'a'), (f, 'a')),
        'b13-b14-c7': (f, (f, 'a'), (f, 'a')),
        'b15-b16-c8': (f, (f, 'a'), (f, 'a')),
        'd1': (f, 'c1', 'c2'),
        'd2': (f, 'c3', 'c4'),
        'd3': (f, 'c5', 'c6'),
        'd4': (f, 'c7', 'c8'),
        'e1': (f, 'd1', 'd2'),
        'e2': (f, 'd3', 'd4'),
        'f': (f, 'e1', 'e2'),
        'c1': 'b1-b2-c1',
        'c2': 'b3-b4-c2',
        'c3': 'b5-b6-c3',
        'c4': 'b7-b8-c4',
        'c5': 'b10-b9-c5',
        'c6': 'b11-b12-c6',
        'c7': 'b13-b14-c7',
        'c8': 'b15-b16-c8',
    })
    assert fuse(d, ave_width=2, rename_keys=True) == expected
    assert fuse(d, ave_width=2.9, rename_keys=True) == expected
    expected = with_deps({
        'a': 1,
        'd1': (f, (f, (f, 'a'), (f, 'a')), (f, (f, 'a'), (f, 'a'))),
        'd2': (f, (f, (f, 'a'), (f, 'a')), (f, (f, 'a'), (f, 'a'))),
        'd3': (f, (f, (f, 'a'), (f, 'a')), (f, (f, 'a'), (f, 'a'))),
        'd4': (f, (f, (f, 'a'), (f, 'a')), (f, (f, 'a'), (f, 'a'))),
        'e1': (f, 'd1', 'd2'),
        'e2': (f, 'd3', 'd4'),
        'f': (f, 'e1', 'e2'),
    })
    assert fuse(d, ave_width=3, rename_keys=False) == expected
    assert fuse(d, ave_width=4.6, rename_keys=False) == expected
    expected = with_deps({
        'a': 1,
        'b1-b2-b3-b4-c1-c2-d1': (f, (f, (f, 'a'), (f, 'a')), (f, (f, 'a'), (f, 'a'))),
        'b5-b6-b7-b8-c3-c4-d2': (f, (f, (f, 'a'), (f, 'a')), (f, (f, 'a'), (f, 'a'))),
        'b10-b11-b12-b9-c5-c6-d3': (f, (f, (f, 'a'), (f, 'a')), (f, (f, 'a'), (f, 'a'))),
        'b13-b14-b15-b16-c7-c8-d4': (f, (f, (f, 'a'), (f, 'a')), (f, (f, 'a'), (f, 'a'))),
        'e1': (f, 'd1', 'd2'),
        'e2': (f, 'd3', 'd4'),
        'f': (f, 'e1', 'e2'),
        'd1': 'b1-b2-b3-b4-c1-c2-d1',
        'd2': 'b5-b6-b7-b8-c3-c4-d2',
        'd3': 'b10-b11-b12-b9-c5-c6-d3',
        'd4': 'b13-b14-b15-b16-c7-c8-d4',
    })
    assert fuse(d, ave_width=3, rename_keys=True) == expected
    assert fuse(d, ave_width=4.6, rename_keys=True) == expected
    expected = with_deps({
        'a': 1,
        'e1': (f, (f, (f, (f, 'a'), (f, 'a')), (f, (f, 'a'), (f, 'a'))),
               (f, (f, (f, 'a'), (f, 'a')), (f, (f, 'a'), (f, 'a')))),
        'e2': (f, (f, (f, (f, 'a'), (f, 'a')), (f, (f, 'a'), (f, 'a'))),
               (f, (f, (f, 'a'), (f, 'a')), (f, (f, 'a'), (f, 'a')))),
        'f': (f, 'e1', 'e2'),
    })
    assert fuse(d, ave_width=4.7, rename_keys=False) == expected
    assert fuse(d, ave_width=7.4, rename_keys=False) == expected
    expected = with_deps({
        'a': 1,
        'b1-b2-b3-b4-b5-b6-b7-b8-c1-c2-c3-c4-d1-d2-e1': (
            f,
            (f, (f, (f, 'a'), (f, 'a')), (f, (f, 'a'), (f, 'a'))),
            (f, (f, (f, 'a'), (f, 'a')), (f, (f, 'a'), (f, 'a')))
        ),
        'b10-b11-b12-b13-b14-b15-b16-b9-c5-c6-c7-c8-d3-d4-e2': (
            f,
            (f, (f, (f, 'a'), (f, 'a')), (f, (f, 'a'), (f, 'a'))),
            (f, (f, (f, 'a'), (f, 'a')), (f, (f, 'a'), (f, 'a')))
        ),
        'f': (f, 'e1', 'e2'),
        'e1': 'b1-b2-b3-b4-b5-b6-b7-b8-c1-c2-c3-c4-d1-d2-e1',
        'e2': 'b10-b11-b12-b13-b14-b15-b16-b9-c5-c6-c7-c8-d3-d4-e2',

    })
    assert fuse(d, ave_width=4.7, rename_keys=True) == expected
    assert fuse(d, ave_width=7.4, rename_keys=True) == expected
    assert fuse(d, ave_width=7.5, rename_keys=False) == with_deps({
        'a': 1,
        'f': (f, (f, (f, (f, (f, 'a'), (f, 'a')), (f, (f, 'a'), (f, 'a'))),
                  (f, (f, (f, 'a'), (f, 'a')), (f, (f, 'a'), (f, 'a')))),
              (f, (f, (f, (f, 'a'), (f, 'a')), (f, (f, 'a'), (f, 'a'))),
               (f, (f, (f, 'a'), (f, 'a')), (f, (f, 'a'), (f, 'a'))))),
    })
    assert fuse(d, ave_width=7.5, rename_keys=True) == with_deps({
        'a': 1,
        'b1-b10-b11-b12-b13-b14-b15-b16-b2-b3-b4-b5-b6-b7-b8-b9-c1-c2-c3-c4-c5-c6-c7-c8-d1-d2-d3-d4-e1-e2-f': (
            f,
            (f, (f, (f, (f, 'a'), (f, 'a')), (f, (f, 'a'), (f, 'a'))),
             (f, (f, (f, 'a'), (f, 'a')), (f, (f, 'a'), (f, 'a')))),
            (f, (f, (f, (f, 'a'), (f, 'a')), (f, (f, 'a'), (f, 'a'))),
             (f, (f, (f, 'a'), (f, 'a')), (f, (f, 'a'), (f, 'a'))))
        ),
        'f': 'b1-b10-b11-b12-b13-b14-b15-b16-b2-b3-b4-b5-b6-b7-b8-b9-c1-c2-c3-c4-c5-c6-c7-c8-d1-d2-d3-d4-e1-e2-f',

    })

    d = {
        'a': 1,
        'b': (f, 'a'),
    }
    assert fuse(d, ave_width=1, rename_keys=False) == with_deps({
        'b': (f, 1)
    })
    assert fuse(d, ave_width=1, rename_keys=True) == with_deps({
        'a-b': (f, 1),
        'b': 'a-b',
    })

    d = {
        'a': 1,
        'b': (f, 'a'),
        'c': (f, 'b'),
        'd': (f, 'c'),
    }
    assert fuse(d, ave_width=1, rename_keys=False) == with_deps({
        'd': (f, (f, (f, 1)))
    })
    assert fuse(d, ave_width=1, rename_keys=True) == with_deps({
        'a-b-c-d': (f, (f, (f, 1))),
        'd': 'a-b-c-d',
    })

    d = {
        'a': 1,
        'b': (f, 'a'),
        'c': (f, 'a', 'b'),
        'd': (f, 'a', 'c'),
    }
    assert fuse(d, ave_width=1, rename_keys=False) == with_deps({
        'a': 1,
        'd': (f, 'a', (f, 'a', (f, 'a'))),
    })
    assert fuse(d, ave_width=1, rename_keys=True) == with_deps({
        'a': 1,
        'b-c-d': (f, 'a', (f, 'a', (f, 'a'))),
        'd': 'b-c-d',
    })

    d = {
        'a': 1,
        'b1': (f, 'a'),
        'b2': (f, 'a'),
        'c1': (f, 'b1'),
        'd1': (f, 'c1'),
        'e1': (f, 'd1'),
        'f': (f, 'e1', 'b2'),
    }
    expected = with_deps({
        'a': 1,
        'b2': (f, 'a'),
        'e1': (f, (f, (f, (f, 'a')))),
        'f': (f, 'e1', 'b2'),

    })
    assert fuse(d, ave_width=1, rename_keys=False) == expected
    assert fuse(d, ave_width=1.9, rename_keys=False) == expected
    expected = with_deps({
        'a': 1,
        'b2': (f, 'a'),
        'b1-c1-d1-e1': (f, (f, (f, (f, 'a')))),
        'f': (f, 'e1', 'b2'),
        'e1': 'b1-c1-d1-e1',

    })
    assert fuse(d, ave_width=1, rename_keys=True) == expected
    assert fuse(d, ave_width=1.9, rename_keys=True) == expected
    assert fuse(d, ave_width=2, rename_keys=False) == with_deps({
        'a': 1,
        'f': (f, (f, (f, (f, (f, 'a')))), (f, 'a')),
    })
    assert fuse(d, ave_width=2, rename_keys=True) == with_deps({
        'a': 1,
        'b1-b2-c1-d1-e1-f': (f, (f, (f, (f, (f, 'a')))), (f, 'a')),
        'f': 'b1-b2-c1-d1-e1-f',
    })

    d = {
        'a': 1,
        'b1': (f, 'a'),
        'b2': (f, 'a'),
        'c1': (f, 'a', 'b1'),
        'd1': (f, 'a', 'c1'),
        'e1': (f, 'a', 'd1'),
        'f': (f, 'a', 'e1', 'b2'),
    }
    expected = with_deps({
        'a': 1,
        'b2': (f, 'a'),
        'e1': (f, 'a', (f, 'a', (f, 'a', (f, 'a')))),
        'f': (f, 'a', 'e1', 'b2'),

    })
    assert fuse(d, ave_width=1, rename_keys=False) == expected
    assert fuse(d, ave_width=1.9, rename_keys=False) == expected
    expected = with_deps({
        'a': 1,
        'b2': (f, 'a'),
        'b1-c1-d1-e1': (f, 'a', (f, 'a', (f, 'a', (f, 'a')))),
        'f': (f, 'a', 'e1', 'b2'),
        'e1': 'b1-c1-d1-e1',
    })
    assert fuse(d, ave_width=1, rename_keys=True) == expected
    assert fuse(d, ave_width=1.9, rename_keys=True) == expected
    assert fuse(d, ave_width=2, rename_keys=False) == with_deps({
        'a': 1,
        'f': (f, 'a', (f, 'a', (f, 'a', (f, 'a', (f, 'a')))), (f, 'a')),
    })
    assert fuse(d, ave_width=2, rename_keys=True) == with_deps({
        'a': 1,
        'b1-b2-c1-d1-e1-f': (f, 'a', (f, 'a', (f, 'a', (f, 'a', (f, 'a')))), (f, 'a')),
        'f': 'b1-b2-c1-d1-e1-f',
    })

    d = {
        'a': 1,
        'b1': (f, 'a'),
        'b2': (f, 'a'),
        'b3': (f, 'a'),
        'c1': (f, 'b1'),
        'c2': (f, 'b2'),
        'c3': (f, 'b3'),
        'd1': (f, 'c1'),
        'd2': (f, 'c2'),
        'd3': (f, 'c3'),
        'e': (f, 'd1', 'd2', 'd3'),
        'f': (f, 'e'),
        'g': (f, 'f'),
    }
    assert fuse(d, ave_width=1, rename_keys=False) == with_deps({
        'a': 1,
        'd1': (f, (f, (f, 'a'))),
        'd2': (f, (f, (f, 'a'))),
        'd3': (f, (f, (f, 'a'))),
        'g': (f, (f, (f, 'd1', 'd2', 'd3'))),
    })
    assert fuse(d, ave_width=1, rename_keys=True) == with_deps({
        'a': 1,
        'b1-c1-d1': (f, (f, (f, 'a'))),
        'b2-c2-d2': (f, (f, (f, 'a'))),
        'b3-c3-d3': (f, (f, (f, 'a'))),
        'e-f-g': (f, (f, (f, 'd1', 'd2', 'd3'))),
        'd1': 'b1-c1-d1',
        'd2': 'b2-c2-d2',
        'd3': 'b3-c3-d3',
        'g': 'e-f-g',
    })

    d = {
        'a': 1,
        'b': (f, 'a'),
        'c': (f, 'b'),
        'd': (f, 'b', 'c'),
        'e': (f, 'd'),
        'f': (f, 'e'),
        'g': (f, 'd', 'f'),
    }
    assert fuse(d, ave_width=1, rename_keys=False) == with_deps({
        'b': (f, 1),
        'd': (f, 'b', (f, 'b')),
        'g': (f, 'd', (f, (f, 'd'))),
    })
    assert fuse(d, ave_width=1, rename_keys=True) == with_deps({
        'a-b': (f, 1),
        'c-d': (f, 'b', (f, 'b')),
        'e-f-g': (f, 'd', (f, (f, 'd'))),
        'b': 'a-b',
        'd': 'c-d',
        'g': 'e-f-g',
    })
Example #28
0
def test_fuse_subgraphs():
    dsk = {'x-1': 1,
           'inc-1': (inc, 'x-1'),
           'inc-2': (inc, 'inc-1'),
           'add-1': (add, 'x-1', 'inc-2'),
           'inc-3': (inc, 'add-1'),
           'inc-4': (inc, 'inc-3'),
           'add-2': (add, 'add-1', 'inc-4'),
           'inc-5': (inc, 'add-2'),
           'inc-6': (inc, 'inc-5')}

    res = fuse(dsk, 'inc-6', fuse_subgraphs=True)
    sol = with_deps({
        'inc-6': 'add-inc-x-1',
        'add-inc-x-1': (SubgraphCallable({
            'x-1': 1,
            'add-1': (add, 'x-1', (inc, (inc, 'x-1'))),
            'inc-6': (inc, (inc, (add, 'add-1', (inc, (inc, 'add-1')))))
        }, 'inc-6', ()),)
    })
    assert res == sol

    res = fuse(dsk, 'inc-6', fuse_subgraphs=True, rename_keys=False)
    sol = with_deps({
        'inc-6': (SubgraphCallable({
            'x-1': 1,
            'add-1': (add, 'x-1', (inc, (inc, 'x-1'))),
            'inc-6': (inc, (inc, (add, 'add-1', (inc, (inc, 'add-1')))))
        }, 'inc-6', ()),)
    })
    assert res == sol

    res = fuse(dsk, 'add-2', fuse_subgraphs=True)
    sol = with_deps({
        'add-inc-x-1': (SubgraphCallable({
            'x-1': 1,
            'add-1': (add, 'x-1', (inc, (inc, 'x-1'))),
            'add-2': (add, 'add-1', (inc, (inc, 'add-1')))
        }, 'add-2', ()),),
        'add-2': 'add-inc-x-1',
        'inc-6': (inc, (inc, 'add-2'))
    })
    assert res == sol

    res = fuse(dsk, 'inc-2', fuse_subgraphs=True)
    # ordering of arguements is unstable, check all permutations
    sols = []
    for inkeys in itertools.permutations(('x-1', 'inc-2')):
        sols.append(with_deps({
            'x-1': 1,
            'inc-2': (inc, (inc, 'x-1')),
            'inc-6': 'inc-add-1',
            'inc-add-1': (
                SubgraphCallable({
                    'add-1': (add, 'x-1', 'inc-2'),
                    'inc-6': (inc, (inc, (add, 'add-1', (inc, (inc, 'add-1')))))
                }, 'inc-6', inkeys),) + inkeys
        }))
    assert res in sols

    res = fuse(dsk, ['inc-2', 'add-2'], fuse_subgraphs=True)
    # ordering of arguements is unstable, check all permutations
    sols = []
    for inkeys in itertools.permutations(('x-1', 'inc-2')):
        sols.append(with_deps({
            'x-1': 1,
            'inc-2': (inc, (inc, 'x-1')),
            'inc-add-1': (
                SubgraphCallable({
                    'add-1': (add, 'x-1', 'inc-2'),
                    'add-2': (add, 'add-1', (inc, (inc, 'add-1')))
                }, 'add-2', inkeys),) + inkeys,
            'add-2': 'inc-add-1',
            'inc-6': (inc, (inc, 'add-2'))
        }))
    assert res in sols
Example #29
0
def test_fuse_reductions_multiple_input():
    def f(*args):
        return args

    d = {
        'a1': 1,
        'a2': 2,
        'b': (f, 'a1', 'a2'),
        'c': (f, 'b'),
    }
    assert fuse(d, ave_width=2, rename_keys=False) == with_deps({
        'c': (f, (f, 1, 2)),
    })
    assert fuse(d, ave_width=2, rename_keys=True) == with_deps({
        'a1-a2-b-c': (f, (f, 1, 2)),
        'c': 'a1-a2-b-c',
    })
    assert fuse(d, ave_width=1, rename_keys=False) == with_deps({
        'a1': 1,
        'a2': 2,
        'c': (f, (f, 'a1', 'a2')),
    })
    assert fuse(d, ave_width=1, rename_keys=True) == with_deps({
        'a1': 1,
        'a2': 2,
        'b-c': (f, (f, 'a1', 'a2')),
        'c': 'b-c',
    })

    d = {
        'a1': 1,
        'a2': 2,
        'b1': (f, 'a1'),
        'b2': (f, 'a1', 'a2'),
        'b3': (f, 'a2'),
        'c': (f, 'b1', 'b2', 'b3'),
    }
    expected = with_deps(d)
    assert fuse(d, ave_width=1, rename_keys=False) == expected
    assert fuse(d, ave_width=2.9, rename_keys=False) == expected
    assert fuse(d, ave_width=1, rename_keys=True) == expected
    assert fuse(d, ave_width=2.9, rename_keys=True) == expected
    assert fuse(d, ave_width=3, rename_keys=False) == with_deps({
        'a1': 1,
        'a2': 2,
        'c': (f, (f, 'a1'), (f, 'a1', 'a2'), (f, 'a2')),
    })
    assert fuse(d, ave_width=3, rename_keys=True) == with_deps({
        'a1': 1,
        'a2': 2,
        'b1-b2-b3-c': (f, (f, 'a1'), (f, 'a1', 'a2'), (f, 'a2')),
        'c': 'b1-b2-b3-c',
    })

    d = {
        'a1': 1,
        'a2': 2,
        'b1': (f, 'a1'),
        'b2': (f, 'a1', 'a2'),
        'b3': (f, 'a2'),
        'c1': (f, 'b1', 'b2'),
        'c2': (f, 'b2', 'b3'),
    }
    assert fuse(d, ave_width=1, rename_keys=False) == with_deps(d)
    assert fuse(d, ave_width=1, rename_keys=True) == with_deps(d)
    assert fuse(d, ave_width=2, rename_keys=False) == with_deps({
        'a1': 1,
        'a2': 2,
        'b2': (f, 'a1', 'a2'),
        'c1': (f, (f, 'a1'), 'b2'),
        'c2': (f, 'b2', (f, 'a2')),
    })
    assert fuse(d, ave_width=2, rename_keys=True) == with_deps({
        'a1': 1,
        'a2': 2,
        'b2': (f, 'a1', 'a2'),
        'b1-c1': (f, (f, 'a1'), 'b2'),
        'b3-c2': (f, 'b2', (f, 'a2')),
        'c1': 'b1-c1',
        'c2': 'b3-c2',
    })

    d = {
        'a1': 1,
        'a2': 2,
        'b1': (f, 'a1'),
        'b2': (f, 'a1', 'a2'),
        'b3': (f, 'a2'),
        'c1': (f, 'b1', 'b2'),
        'c2': (f, 'b2', 'b3'),
        'd': (f, 'c1', 'c2'),
    }
    assert fuse(d, ave_width=1, rename_keys=False) == with_deps(d)
    assert fuse(d, ave_width=1, rename_keys=True) == with_deps(d)

    # A more aggressive heuristic could do this at `ave_width=2`.  Perhaps
    # we can improve this.  Nevertheless, this is behaving as intended.
    assert fuse(d, ave_width=3, rename_keys=False) == with_deps({
        'a1': 1,
        'a2': 2,
        'b2': (f, 'a1', 'a2'),
        'd': (f, (f, (f, 'a1'), 'b2'), (f, 'b2', (f, 'a2'))),
    })
    assert fuse(d, ave_width=3, rename_keys=True) == with_deps({
        'a1': 1,
        'a2': 2,
        'b2': (f, 'a1', 'a2'),
        'b1-b3-c1-c2-d': (f, (f, (f, 'a1'), 'b2'), (f, 'b2', (f, 'a2'))),
        'd': 'b1-b3-c1-c2-d',
    })
Example #30
0
def _fuse_delayed(d):  # type: ignore[no-untyped-def]
    """Perform task fusion within a Delayed object"""
    # from https://github.com/dask/dask/issues/6219
    dsk_fused, _ = fuse(dask.utils.ensure_dict(d.dask))
    return Delayed(d._key, dsk_fused)
Example #31
0
def test_fuse_stressed():
    def f(*args):
        return args

    d = {
        'array-original-27b9f9d257a80fa6adae06a98faf71eb': 1,
        ('cholesky-upper-26a6b670a8aabb7e2f8936db7ccb6a88', 0, 0): (
            f,
            ('cholesky-26a6b670a8aabb7e2f8936db7ccb6a88', 0, 0),
        ),
        ('cholesky-26a6b670a8aabb7e2f8936db7ccb6a88', 1, 0): (
            f,
            ('cholesky-upper-26a6b670a8aabb7e2f8936db7ccb6a88', 0, 1),
        ),
        ('array-27b9f9d257a80fa6adae06a98faf71eb', 0, 0): (
            f,
            'array-original-27b9f9d257a80fa6adae06a98faf71eb',
            (slice(0, 10, None), slice(0, 10, None)),
        ),
        ('cholesky-upper-26a6b670a8aabb7e2f8936db7ccb6a88', 1, 0): ('cholesky-26a6b670a8aabb7e2f8936db7ccb6a88', 0, 1),
        ('cholesky-26a6b670a8aabb7e2f8936db7ccb6a88', 1, 1): (
            f,
            (f,
             ('array-27b9f9d257a80fa6adae06a98faf71eb', 1, 1),
             (f, [('cholesky-lt-dot-26a6b670a8aabb7e2f8936db7ccb6a88', 1, 0, 1, 0)]))
        ),
        ('cholesky-lt-dot-26a6b670a8aabb7e2f8936db7ccb6a88', 1, 0, 1, 0): (
            f,
            ('cholesky-26a6b670a8aabb7e2f8936db7ccb6a88', 1, 0),
            ('cholesky-upper-26a6b670a8aabb7e2f8936db7ccb6a88', 0, 1),
        ),
        ('array-27b9f9d257a80fa6adae06a98faf71eb', 0, 1): (
            f,
            'array-original-27b9f9d257a80fa6adae06a98faf71eb',
            (slice(0, 10, None), slice(10, 20, None)),
        ),
        ('cholesky-upper-26a6b670a8aabb7e2f8936db7ccb6a88', 1, 1): (
            f,
            ('cholesky-26a6b670a8aabb7e2f8936db7ccb6a88', 1, 1)
        ),
        ('cholesky-26a6b670a8aabb7e2f8936db7ccb6a88', 0, 1): (
            f,
            (10, 10)
        ),
        ('array-27b9f9d257a80fa6adae06a98faf71eb', 1, 1): (
            f,
            'array-original-27b9f9d257a80fa6adae06a98faf71eb',
            (slice(10, 20, None), slice(10, 20, None)),
        ),
        ('cholesky-upper-26a6b670a8aabb7e2f8936db7ccb6a88', 0, 1): (
            f,
            ('cholesky-26a6b670a8aabb7e2f8936db7ccb6a88', 0, 0),
            ('array-27b9f9d257a80fa6adae06a98faf71eb', 0, 1),
        ),
        ('cholesky-26a6b670a8aabb7e2f8936db7ccb6a88', 0, 0): (
            f,
            ('array-27b9f9d257a80fa6adae06a98faf71eb', 0, 0),
        ),
    }
    keys = {
        ('cholesky-upper-26a6b670a8aabb7e2f8936db7ccb6a88', 0, 0),
        ('cholesky-upper-26a6b670a8aabb7e2f8936db7ccb6a88', 0, 1),
        ('cholesky-upper-26a6b670a8aabb7e2f8936db7ccb6a88', 1, 0),
        ('cholesky-upper-26a6b670a8aabb7e2f8936db7ccb6a88', 1, 1),
    }
    rv = fuse(d, keys=keys, ave_width=2, rename_keys=True)
    assert rv == with_deps(rv[0])
Example #32
0
def test_fuse():
    fuse = fuse2  # tests both `fuse` and `fuse_linear`
    d = {
        'w': (inc, 'x'),
        'x': (inc, 'y'),
        'y': (inc, 'z'),
        'z': (add, 'a', 'b'),
        'a': 1,
        'b': 2,
    }
    assert fuse(d, rename_keys=False) == with_deps({
        'w': (inc, (inc, (inc, (add, 'a', 'b')))),
        'a': 1,
        'b': 2,
    })
    assert fuse(d, rename_keys=True) == with_deps({
        'z-y-x-w': (inc, (inc, (inc, (add, 'a', 'b')))),
        'a': 1,
        'b': 2,
        'w': 'z-y-x-w',
    })

    d = {
        'NEW': (inc, 'y'),
        'w': (inc, 'x'),
        'x': (inc, 'y'),
        'y': (inc, 'z'),
        'z': (add, 'a', 'b'),
        'a': 1,
        'b': 2,
    }
    assert fuse(d, rename_keys=False) == with_deps({
        'NEW': (inc, 'y'),
        'w': (inc, (inc, 'y')),
        'y': (inc, (add, 'a', 'b')),
        'a': 1,
        'b': 2,
    })
    assert fuse(d, rename_keys=True) == with_deps({
        'NEW': (inc, 'z-y'),
        'x-w': (inc, (inc, 'z-y')),
        'z-y': (inc, (add, 'a', 'b')),
        'a': 1,
        'b': 2,
        'w': 'x-w',
        'y': 'z-y',
    })

    d = {
        'v': (inc, 'y'),
        'u': (inc, 'w'),
        'w': (inc, 'x'),
        'x': (inc, 'y'),
        'y': (inc, 'z'),
        'z': (add, 'a', 'b'),
        'a': (inc, 'c'),
        'b': (inc, 'd'),
        'c': 1,
        'd': 2,
    }
    assert fuse(d, rename_keys=False) == with_deps({
        'u': (inc, (inc, (inc, 'y'))),
        'v': (inc, 'y'),
        'y': (inc, (add, 'a', 'b')),
        'a': (inc, 1),
        'b': (inc, 2),
    })
    assert fuse(d, rename_keys=True) == with_deps({
        'x-w-u': (inc, (inc, (inc, 'z-y'))),
        'v': (inc, 'z-y'),
        'z-y': (inc, (add, 'c-a', 'd-b')),
        'c-a': (inc, 1),
        'd-b': (inc, 2),
        'a': 'c-a',
        'b': 'd-b',
        'u': 'x-w-u',
        'y': 'z-y',
    })

    d = {
        'a': (inc, 'x'),
        'b': (inc, 'x'),
        'c': (inc, 'x'),
        'd': (inc, 'c'),
        'x': (inc, 'y'),
        'y': 0,
    }
    assert fuse(d, rename_keys=False) == with_deps({
        'a': (inc, 'x'),
        'b': (inc, 'x'),
        'd': (inc, (inc, 'x')),
        'x': (inc, 0)
    })
    assert fuse(d, rename_keys=True) == with_deps({
        'a': (inc, 'y-x'),
        'b': (inc, 'y-x'),
        'c-d': (inc, (inc, 'y-x')),
        'y-x': (inc, 0),
        'd': 'c-d',
        'x': 'y-x',
    })

    d = {
        'a': 1,
        'b': (inc, 'a'),
        'c': (add, 'b', 'b'),
    }
    assert fuse(d, rename_keys=False) == with_deps({
        'b': (inc, 1),
        'c': (add, 'b', 'b'),
    })
    assert fuse(d, rename_keys=True) == with_deps({
        'a-b': (inc, 1),
        'c': (add, 'a-b', 'a-b'),
        'b': 'a-b',
    })
Example #33
0
def test_fuse_reductions_single_input():
    def f(*args):
        return args

    d = {
        'a': 1,
        'b1': (f, 'a'),
        'b2': (f, 'a', 'a'),
        'c': (f, 'b1', 'b2'),
    }
    assert fuse(d, ave_width=1.9, rename_keys=False) == with_deps(d)
    assert fuse(d, ave_width=1.9, rename_keys=True) == with_deps(d)
    assert fuse(d, ave_width=2, rename_keys=False) == with_deps({
        'a': 1,
        'c': (f, (f, 'a'), (f, 'a', 'a')),
    })
    assert fuse(d, ave_width=2, rename_keys=True) == with_deps({
        'a': 1,
        'b1-b2-c': (f, (f, 'a'), (f, 'a', 'a')),
        'c': 'b1-b2-c',
    })

    d = {
        'a': 1,
        'b1': (f, 'a'),
        'b2': (f, 'a', 'a'),
        'b3': (f, 'a', 'a', 'a'),
        'c': (f, 'b1', 'b2', 'b3'),
    }
    assert fuse(d, ave_width=2.9, rename_keys=False) == with_deps(d)
    assert fuse(d, ave_width=2.9, rename_keys=True) == with_deps(d)
    assert fuse(d, ave_width=3, rename_keys=False) == with_deps({
        'a': 1,
        'c': (f, (f, 'a'), (f, 'a', 'a'), (f, 'a', 'a', 'a')),
    })
    assert fuse(d, ave_width=3, rename_keys=True) == with_deps({
        'a': 1,
        'b1-b2-b3-c': (f, (f, 'a'), (f, 'a', 'a'), (f, 'a', 'a', 'a')),
        'c': 'b1-b2-b3-c',
    })

    d = {
        'a': 1,
        'b1': (f, 'a'),
        'b2': (f, 'a'),
        'c': (f, 'a', 'b1', 'b2'),
    }
    assert fuse(d, ave_width=1.9, rename_keys=False) == with_deps(d)
    assert fuse(d, ave_width=1.9, rename_keys=True) == with_deps(d)
    assert fuse(d, ave_width=2, rename_keys=False) == with_deps({
        'a': 1,
        'c': (f, 'a', (f, 'a'), (f, 'a')),
    })
    assert fuse(d, ave_width=2, rename_keys=True) == with_deps({
        'a': 1,
        'b1-b2-c': (f, 'a', (f, 'a'), (f, 'a')),
        'c': 'b1-b2-c',
    })

    d = {
        'a': 1,
        'b1': (f, 'a'),
        'b2': (f, 'a'),
        'c': (f, 'b1', 'b2'),
        'd1': (f, 'c'),
        'd2': (f, 'c'),
        'e': (f, 'd1', 'd2'),
    }
    assert fuse(d, ave_width=1.9, rename_keys=False) == with_deps(d)
    assert fuse(d, ave_width=1.9, rename_keys=True) == with_deps(d)
    assert fuse(d, ave_width=2, rename_keys=False) == with_deps({
        'a': 1,
        'c': (f, (f, 'a'), (f, 'a')),
        'e': (f, (f, 'c'), (f, 'c')),
    })
    assert fuse(d, ave_width=2, rename_keys=True) == with_deps({
        'a': 1,
        'b1-b2-c': (f, (f, 'a'), (f, 'a')),
        'd1-d2-e': (f, (f, 'c'), (f, 'c')),
        'c': 'b1-b2-c',
        'e': 'd1-d2-e',
    })

    d = {
        'a': 1,
        'b1': (f, 'a'),
        'b2': (f, 'a'),
        'b3': (f, 'a'),
        'b4': (f, 'a'),
        'c1': (f, 'b1', 'b2'),
        'c2': (f, 'b3', 'b4'),
        'd': (f, 'c1', 'c2'),
    }
    assert fuse(d, ave_width=1.9, rename_keys=False) == with_deps(d)
    assert fuse(d, ave_width=1.9, rename_keys=True) == with_deps(d)
    expected = with_deps({
        'a': 1,
        'c1': (f, (f, 'a'), (f, 'a')),
        'c2': (f, (f, 'a'), (f, 'a')),
        'd': (f, 'c1', 'c2'),
    })
    assert fuse(d, ave_width=2, rename_keys=False) == expected
    assert fuse(d, ave_width=2.9, rename_keys=False) == expected
    expected = with_deps({
        'a': 1,
        'b1-b2-c1': (f, (f, 'a'), (f, 'a')),
        'b3-b4-c2': (f, (f, 'a'), (f, 'a')),
        'd': (f, 'c1', 'c2'),
        'c1': 'b1-b2-c1',
        'c2': 'b3-b4-c2',
    })
    assert fuse(d, ave_width=2, rename_keys=True) == expected
    assert fuse(d, ave_width=2.9, rename_keys=True) == expected
    assert fuse(d, ave_width=3, rename_keys=False) == with_deps({
        'a': 1,
        'd': (f, (f, (f, 'a'), (f, 'a')), (f, (f, 'a'), (f, 'a'))),
    })
    assert fuse(d, ave_width=3, rename_keys=True) == with_deps({
        'a': 1,
        'b1-b2-b3-b4-c1-c2-d': (f, (f, (f, 'a'), (f, 'a')), (f, (f, 'a'), (f, 'a'))),
        'd': 'b1-b2-b3-b4-c1-c2-d',
    })

    d = {
        'a': 1,
        'b1': (f, 'a'),
        'b2': (f, 'a'),
        'b3': (f, 'a'),
        'b4': (f, 'a'),
        'b5': (f, 'a'),
        'b6': (f, 'a'),
        'b7': (f, 'a'),
        'b8': (f, 'a'),
        'c1': (f, 'b1', 'b2'),
        'c2': (f, 'b3', 'b4'),
        'c3': (f, 'b5', 'b6'),
        'c4': (f, 'b7', 'b8'),
        'd1': (f, 'c1', 'c2'),
        'd2': (f, 'c3', 'c4'),
        'e': (f, 'd1', 'd2'),
    }
    assert fuse(d, ave_width=1.9, rename_keys=False) == with_deps(d)
    assert fuse(d, ave_width=1.9, rename_keys=True) == with_deps(d)
    expected = with_deps({
        'a': 1,
        'c1': (f, (f, 'a'), (f, 'a')),
        'c2': (f, (f, 'a'), (f, 'a')),
        'c3': (f, (f, 'a'), (f, 'a')),
        'c4': (f, (f, 'a'), (f, 'a')),
        'd1': (f, 'c1', 'c2'),
        'd2': (f, 'c3', 'c4'),
        'e': (f, 'd1', 'd2'),
    })
    assert fuse(d, ave_width=2, rename_keys=False) == expected
    assert fuse(d, ave_width=2.9, rename_keys=False) == expected
    expected = with_deps({
        'a': 1,
        'b1-b2-c1': (f, (f, 'a'), (f, 'a')),
        'b3-b4-c2': (f, (f, 'a'), (f, 'a')),
        'b5-b6-c3': (f, (f, 'a'), (f, 'a')),
        'b7-b8-c4': (f, (f, 'a'), (f, 'a')),
        'd1': (f, 'c1', 'c2'),
        'd2': (f, 'c3', 'c4'),
        'e': (f, 'd1', 'd2'),
        'c1': 'b1-b2-c1',
        'c2': 'b3-b4-c2',
        'c3': 'b5-b6-c3',
        'c4': 'b7-b8-c4',
    })
    assert fuse(d, ave_width=2, rename_keys=True) == expected
    assert fuse(d, ave_width=2.9, rename_keys=True) == expected
    expected = with_deps({
        'a': 1,
        'd1': (f, (f, (f, 'a'), (f, 'a')), (f, (f, 'a'), (f, 'a'))),
        'd2': (f, (f, (f, 'a'), (f, 'a')), (f, (f, 'a'), (f, 'a'))),
        'e': (f, 'd1', 'd2'),
    })
    assert fuse(d, ave_width=3, rename_keys=False) == expected
    assert fuse(d, ave_width=4.6, rename_keys=False) == expected
    expected = with_deps({
        'a': 1,
        'b1-b2-b3-b4-c1-c2-d1': (f, (f, (f, 'a'), (f, 'a')), (f, (f, 'a'), (f, 'a'))),
        'b5-b6-b7-b8-c3-c4-d2': (f, (f, (f, 'a'), (f, 'a')), (f, (f, 'a'), (f, 'a'))),
        'e': (f, 'd1', 'd2'),
        'd1': 'b1-b2-b3-b4-c1-c2-d1',
        'd2': 'b5-b6-b7-b8-c3-c4-d2',

    })
    assert fuse(d, ave_width=3, rename_keys=True) == expected
    assert fuse(d, ave_width=4.6, rename_keys=True) == expected
    assert fuse(d, ave_width=4.7, rename_keys=False) == with_deps({
        'a': 1,
        'e': (f, (f, (f, (f, 'a'), (f, 'a')), (f, (f, 'a'), (f, 'a'))),
              (f, (f, (f, 'a'), (f, 'a')), (f, (f, 'a'), (f, 'a'))))
    })
    assert fuse(d, ave_width=4.7, rename_keys=True) == with_deps({
        'a': 1,
        'b1-b2-b3-b4-b5-b6-b7-b8-c1-c2-c3-c4-d1-d2-e': (
            f,
            (f, (f, (f, 'a'), (f, 'a')), (f, (f, 'a'), (f, 'a'))),
            (f, (f, (f, 'a'), (f, 'a')), (f, (f, 'a'), (f, 'a')))
        ),
        'e': 'b1-b2-b3-b4-b5-b6-b7-b8-c1-c2-c3-c4-d1-d2-e',
    })

    d = {
        'a': 1,
        'b1': (f, 'a'),
        'b2': (f, 'a'),
        'b3': (f, 'a'),
        'b4': (f, 'a'),
        'b5': (f, 'a'),
        'b6': (f, 'a'),
        'b7': (f, 'a'),
        'b8': (f, 'a'),
        'b9': (f, 'a'),
        'b10': (f, 'a'),
        'b11': (f, 'a'),
        'b12': (f, 'a'),
        'b13': (f, 'a'),
        'b14': (f, 'a'),
        'b15': (f, 'a'),
        'b16': (f, 'a'),
        'c1': (f, 'b1', 'b2'),
        'c2': (f, 'b3', 'b4'),
        'c3': (f, 'b5', 'b6'),
        'c4': (f, 'b7', 'b8'),
        'c5': (f, 'b9', 'b10'),
        'c6': (f, 'b11', 'b12'),
        'c7': (f, 'b13', 'b14'),
        'c8': (f, 'b15', 'b16'),
        'd1': (f, 'c1', 'c2'),
        'd2': (f, 'c3', 'c4'),
        'd3': (f, 'c5', 'c6'),
        'd4': (f, 'c7', 'c8'),
        'e1': (f, 'd1', 'd2'),
        'e2': (f, 'd3', 'd4'),
        'f': (f, 'e1', 'e2'),
    }
    assert fuse(d, ave_width=1.9, rename_keys=False) == with_deps(d)
    assert fuse(d, ave_width=1.9, rename_keys=True) == with_deps(d)
    expected = with_deps({
        'a': 1,
        'c1': (f, (f, 'a'), (f, 'a')),
        'c2': (f, (f, 'a'), (f, 'a')),
        'c3': (f, (f, 'a'), (f, 'a')),
        'c4': (f, (f, 'a'), (f, 'a')),
        'c5': (f, (f, 'a'), (f, 'a')),
        'c6': (f, (f, 'a'), (f, 'a')),
        'c7': (f, (f, 'a'), (f, 'a')),
        'c8': (f, (f, 'a'), (f, 'a')),
        'd1': (f, 'c1', 'c2'),
        'd2': (f, 'c3', 'c4'),
        'd3': (f, 'c5', 'c6'),
        'd4': (f, 'c7', 'c8'),
        'e1': (f, 'd1', 'd2'),
        'e2': (f, 'd3', 'd4'),
        'f': (f, 'e1', 'e2'),
    })
    assert fuse(d, ave_width=2, rename_keys=False) == expected
    assert fuse(d, ave_width=2.9, rename_keys=False) == expected
    expected = with_deps({
        'a': 1,
        'b1-b2-c1': (f, (f, 'a'), (f, 'a')),
        'b3-b4-c2': (f, (f, 'a'), (f, 'a')),
        'b5-b6-c3': (f, (f, 'a'), (f, 'a')),
        'b7-b8-c4': (f, (f, 'a'), (f, 'a')),
        'b10-b9-c5': (f, (f, 'a'), (f, 'a')),
        'b11-b12-c6': (f, (f, 'a'), (f, 'a')),
        'b13-b14-c7': (f, (f, 'a'), (f, 'a')),
        'b15-b16-c8': (f, (f, 'a'), (f, 'a')),
        'd1': (f, 'c1', 'c2'),
        'd2': (f, 'c3', 'c4'),
        'd3': (f, 'c5', 'c6'),
        'd4': (f, 'c7', 'c8'),
        'e1': (f, 'd1', 'd2'),
        'e2': (f, 'd3', 'd4'),
        'f': (f, 'e1', 'e2'),
        'c1': 'b1-b2-c1',
        'c2': 'b3-b4-c2',
        'c3': 'b5-b6-c3',
        'c4': 'b7-b8-c4',
        'c5': 'b10-b9-c5',
        'c6': 'b11-b12-c6',
        'c7': 'b13-b14-c7',
        'c8': 'b15-b16-c8',
    })
    assert fuse(d, ave_width=2, rename_keys=True) == expected
    assert fuse(d, ave_width=2.9, rename_keys=True) == expected
    expected = with_deps({
        'a': 1,
        'd1': (f, (f, (f, 'a'), (f, 'a')), (f, (f, 'a'), (f, 'a'))),
        'd2': (f, (f, (f, 'a'), (f, 'a')), (f, (f, 'a'), (f, 'a'))),
        'd3': (f, (f, (f, 'a'), (f, 'a')), (f, (f, 'a'), (f, 'a'))),
        'd4': (f, (f, (f, 'a'), (f, 'a')), (f, (f, 'a'), (f, 'a'))),
        'e1': (f, 'd1', 'd2'),
        'e2': (f, 'd3', 'd4'),
        'f': (f, 'e1', 'e2'),
    })
    assert fuse(d, ave_width=3, rename_keys=False) == expected
    assert fuse(d, ave_width=4.6, rename_keys=False) == expected
    expected = with_deps({
        'a': 1,
        'b1-b2-b3-b4-c1-c2-d1': (f, (f, (f, 'a'), (f, 'a')), (f, (f, 'a'), (f, 'a'))),
        'b5-b6-b7-b8-c3-c4-d2': (f, (f, (f, 'a'), (f, 'a')), (f, (f, 'a'), (f, 'a'))),
        'b10-b11-b12-b9-c5-c6-d3': (f, (f, (f, 'a'), (f, 'a')), (f, (f, 'a'), (f, 'a'))),
        'b13-b14-b15-b16-c7-c8-d4': (f, (f, (f, 'a'), (f, 'a')), (f, (f, 'a'), (f, 'a'))),
        'e1': (f, 'd1', 'd2'),
        'e2': (f, 'd3', 'd4'),
        'f': (f, 'e1', 'e2'),
        'd1': 'b1-b2-b3-b4-c1-c2-d1',
        'd2': 'b5-b6-b7-b8-c3-c4-d2',
        'd3': 'b10-b11-b12-b9-c5-c6-d3',
        'd4': 'b13-b14-b15-b16-c7-c8-d4',
    })
    assert fuse(d, ave_width=3, rename_keys=True) == expected
    assert fuse(d, ave_width=4.6, rename_keys=True) == expected
    expected = with_deps({
        'a': 1,
        'e1': (f, (f, (f, (f, 'a'), (f, 'a')), (f, (f, 'a'), (f, 'a'))),
               (f, (f, (f, 'a'), (f, 'a')), (f, (f, 'a'), (f, 'a')))),
        'e2': (f, (f, (f, (f, 'a'), (f, 'a')), (f, (f, 'a'), (f, 'a'))),
               (f, (f, (f, 'a'), (f, 'a')), (f, (f, 'a'), (f, 'a')))),
        'f': (f, 'e1', 'e2'),
    })
    assert fuse(d, ave_width=4.7, rename_keys=False) == expected
    assert fuse(d, ave_width=7.4, rename_keys=False) == expected
    expected = with_deps({
        'a': 1,
        'b1-b2-b3-b4-b5-b6-b7-b8-c1-c2-c3-c4-d1-d2-e1': (
            f,
            (f, (f, (f, 'a'), (f, 'a')), (f, (f, 'a'), (f, 'a'))),
            (f, (f, (f, 'a'), (f, 'a')), (f, (f, 'a'), (f, 'a')))
        ),
        'b10-b11-b12-b13-b14-b15-b16-b9-c5-c6-c7-c8-d3-d4-e2': (
            f,
            (f, (f, (f, 'a'), (f, 'a')), (f, (f, 'a'), (f, 'a'))),
            (f, (f, (f, 'a'), (f, 'a')), (f, (f, 'a'), (f, 'a')))
        ),
        'f': (f, 'e1', 'e2'),
        'e1': 'b1-b2-b3-b4-b5-b6-b7-b8-c1-c2-c3-c4-d1-d2-e1',
        'e2': 'b10-b11-b12-b13-b14-b15-b16-b9-c5-c6-c7-c8-d3-d4-e2',

    })
    assert fuse(d, ave_width=4.7, rename_keys=True) == expected
    assert fuse(d, ave_width=7.4, rename_keys=True) == expected
    assert fuse(d, ave_width=7.5, rename_keys=False) == with_deps({
        'a': 1,
        'f': (f, (f, (f, (f, (f, 'a'), (f, 'a')), (f, (f, 'a'), (f, 'a'))),
                  (f, (f, (f, 'a'), (f, 'a')), (f, (f, 'a'), (f, 'a')))),
              (f, (f, (f, (f, 'a'), (f, 'a')), (f, (f, 'a'), (f, 'a'))),
               (f, (f, (f, 'a'), (f, 'a')), (f, (f, 'a'), (f, 'a'))))),
    })
    assert fuse(d, ave_width=7.5, rename_keys=True) == with_deps({
        'a': 1,
        'b1-b10-b11-b12-b13-b14-b15-b16-b2-b3-b4-b5-b6-b7-b8-b9-c1-c2-c3-c4-c5-c6-c7-c8-d1-d2-d3-d4-e1-e2-f': (
            f,
            (f, (f, (f, (f, 'a'), (f, 'a')), (f, (f, 'a'), (f, 'a'))),
             (f, (f, (f, 'a'), (f, 'a')), (f, (f, 'a'), (f, 'a')))),
            (f, (f, (f, (f, 'a'), (f, 'a')), (f, (f, 'a'), (f, 'a'))),
             (f, (f, (f, 'a'), (f, 'a')), (f, (f, 'a'), (f, 'a'))))
        ),
        'f': 'b1-b10-b11-b12-b13-b14-b15-b16-b2-b3-b4-b5-b6-b7-b8-b9-c1-c2-c3-c4-c5-c6-c7-c8-d1-d2-d3-d4-e1-e2-f',

    })

    d = {
        'a': 1,
        'b': (f, 'a'),
    }
    assert fuse(d, ave_width=1, rename_keys=False) == with_deps({
        'b': (f, 1)
    })
    assert fuse(d, ave_width=1, rename_keys=True) == with_deps({
        'a-b': (f, 1),
        'b': 'a-b',
    })

    d = {
        'a': 1,
        'b': (f, 'a'),
        'c': (f, 'b'),
        'd': (f, 'c'),
    }
    assert fuse(d, ave_width=1, rename_keys=False) == with_deps({
        'd': (f, (f, (f, 1)))
    })
    assert fuse(d, ave_width=1, rename_keys=True) == with_deps({
        'a-b-c-d': (f, (f, (f, 1))),
        'd': 'a-b-c-d',
    })

    d = {
        'a': 1,
        'b': (f, 'a'),
        'c': (f, 'a', 'b'),
        'd': (f, 'a', 'c'),
    }
    assert fuse(d, ave_width=1, rename_keys=False) == with_deps({
        'a': 1,
        'd': (f, 'a', (f, 'a', (f, 'a'))),
    })
    assert fuse(d, ave_width=1, rename_keys=True) == with_deps({
        'a': 1,
        'b-c-d': (f, 'a', (f, 'a', (f, 'a'))),
        'd': 'b-c-d',
    })

    d = {
        'a': 1,
        'b1': (f, 'a'),
        'b2': (f, 'a'),
        'c1': (f, 'b1'),
        'd1': (f, 'c1'),
        'e1': (f, 'd1'),
        'f': (f, 'e1', 'b2'),
    }
    expected = with_deps({
        'a': 1,
        'b2': (f, 'a'),
        'e1': (f, (f, (f, (f, 'a')))),
        'f': (f, 'e1', 'b2'),

    })
    assert fuse(d, ave_width=1, rename_keys=False) == expected
    assert fuse(d, ave_width=1.9, rename_keys=False) == expected
    expected = with_deps({
        'a': 1,
        'b2': (f, 'a'),
        'b1-c1-d1-e1': (f, (f, (f, (f, 'a')))),
        'f': (f, 'e1', 'b2'),
        'e1': 'b1-c1-d1-e1',

    })
    assert fuse(d, ave_width=1, rename_keys=True) == expected
    assert fuse(d, ave_width=1.9, rename_keys=True) == expected
    assert fuse(d, ave_width=2, rename_keys=False) == with_deps({
        'a': 1,
        'f': (f, (f, (f, (f, (f, 'a')))), (f, 'a')),
    })
    assert fuse(d, ave_width=2, rename_keys=True) == with_deps({
        'a': 1,
        'b1-b2-c1-d1-e1-f': (f, (f, (f, (f, (f, 'a')))), (f, 'a')),
        'f': 'b1-b2-c1-d1-e1-f',
    })

    d = {
        'a': 1,
        'b1': (f, 'a'),
        'b2': (f, 'a'),
        'c1': (f, 'a', 'b1'),
        'd1': (f, 'a', 'c1'),
        'e1': (f, 'a', 'd1'),
        'f': (f, 'a', 'e1', 'b2'),
    }
    expected = with_deps({
        'a': 1,
        'b2': (f, 'a'),
        'e1': (f, 'a', (f, 'a', (f, 'a', (f, 'a')))),
        'f': (f, 'a', 'e1', 'b2'),

    })
    assert fuse(d, ave_width=1, rename_keys=False) == expected
    assert fuse(d, ave_width=1.9, rename_keys=False) == expected
    expected = with_deps({
        'a': 1,
        'b2': (f, 'a'),
        'b1-c1-d1-e1': (f, 'a', (f, 'a', (f, 'a', (f, 'a')))),
        'f': (f, 'a', 'e1', 'b2'),
        'e1': 'b1-c1-d1-e1',
    })
    assert fuse(d, ave_width=1, rename_keys=True) == expected
    assert fuse(d, ave_width=1.9, rename_keys=True) == expected
    assert fuse(d, ave_width=2, rename_keys=False) == with_deps({
        'a': 1,
        'f': (f, 'a', (f, 'a', (f, 'a', (f, 'a', (f, 'a')))), (f, 'a')),
    })
    assert fuse(d, ave_width=2, rename_keys=True) == with_deps({
        'a': 1,
        'b1-b2-c1-d1-e1-f': (f, 'a', (f, 'a', (f, 'a', (f, 'a', (f, 'a')))), (f, 'a')),
        'f': 'b1-b2-c1-d1-e1-f',
    })

    d = {
        'a': 1,
        'b1': (f, 'a'),
        'b2': (f, 'a'),
        'b3': (f, 'a'),
        'c1': (f, 'b1'),
        'c2': (f, 'b2'),
        'c3': (f, 'b3'),
        'd1': (f, 'c1'),
        'd2': (f, 'c2'),
        'd3': (f, 'c3'),
        'e': (f, 'd1', 'd2', 'd3'),
        'f': (f, 'e'),
        'g': (f, 'f'),
    }
    assert fuse(d, ave_width=1, rename_keys=False) == with_deps({
        'a': 1,
        'd1': (f, (f, (f, 'a'))),
        'd2': (f, (f, (f, 'a'))),
        'd3': (f, (f, (f, 'a'))),
        'g': (f, (f, (f, 'd1', 'd2', 'd3'))),
    })
    assert fuse(d, ave_width=1, rename_keys=True) == with_deps({
        'a': 1,
        'b1-c1-d1': (f, (f, (f, 'a'))),
        'b2-c2-d2': (f, (f, (f, 'a'))),
        'b3-c3-d3': (f, (f, (f, 'a'))),
        'e-f-g': (f, (f, (f, 'd1', 'd2', 'd3'))),
        'd1': 'b1-c1-d1',
        'd2': 'b2-c2-d2',
        'd3': 'b3-c3-d3',
        'g': 'e-f-g',
    })

    d = {
        'a': 1,
        'b': (f, 'a'),
        'c': (f, 'b'),
        'd': (f, 'b', 'c'),
        'e': (f, 'd'),
        'f': (f, 'e'),
        'g': (f, 'd', 'f'),
    }
    assert fuse(d, ave_width=1, rename_keys=False) == with_deps({
        'b': (f, 1),
        'd': (f, 'b', (f, 'b')),
        'g': (f, 'd', (f, (f, 'd'))),
    })
    assert fuse(d, ave_width=1, rename_keys=True) == with_deps({
        'a-b': (f, 1),
        'c-d': (f, 'b', (f, 'b')),
        'e-f-g': (f, 'd', (f, (f, 'd'))),
        'b': 'a-b',
        'd': 'c-d',
        'g': 'e-f-g',
    })
Example #34
0
def test_fuse_stressed():
    def f(*args):
        return args

    d = {
        "array-original-27b9f9d257a80fa6adae06a98faf71eb":
        1,
        ("cholesky-upper-26a6b670a8aabb7e2f8936db7ccb6a88", 0, 0): (
            f,
            ("cholesky-26a6b670a8aabb7e2f8936db7ccb6a88", 0, 0),
        ),
        ("cholesky-26a6b670a8aabb7e2f8936db7ccb6a88", 1, 0): (
            f,
            ("cholesky-upper-26a6b670a8aabb7e2f8936db7ccb6a88", 0, 1),
        ),
        ("array-27b9f9d257a80fa6adae06a98faf71eb", 0, 0): (
            f,
            "array-original-27b9f9d257a80fa6adae06a98faf71eb",
            (slice(0, 10, None), slice(0, 10, None)),
        ),
        ("cholesky-upper-26a6b670a8aabb7e2f8936db7ccb6a88", 1, 0): (
            "cholesky-26a6b670a8aabb7e2f8936db7ccb6a88",
            0,
            1,
        ),
        ("cholesky-26a6b670a8aabb7e2f8936db7ccb6a88", 1, 1): (
            f,
            (
                f,
                ("array-27b9f9d257a80fa6adae06a98faf71eb", 1, 1),
                (f, [("cholesky-lt-dot-26a6b670a8aabb7e2f8936db7ccb6a88", 1, 0,
                      1, 0)]),
            ),
        ),
        ("cholesky-lt-dot-26a6b670a8aabb7e2f8936db7ccb6a88", 1, 0, 1, 0): (
            f,
            ("cholesky-26a6b670a8aabb7e2f8936db7ccb6a88", 1, 0),
            ("cholesky-upper-26a6b670a8aabb7e2f8936db7ccb6a88", 0, 1),
        ),
        ("array-27b9f9d257a80fa6adae06a98faf71eb", 0, 1): (
            f,
            "array-original-27b9f9d257a80fa6adae06a98faf71eb",
            (slice(0, 10, None), slice(10, 20, None)),
        ),
        ("cholesky-upper-26a6b670a8aabb7e2f8936db7ccb6a88", 1, 1): (
            f,
            ("cholesky-26a6b670a8aabb7e2f8936db7ccb6a88", 1, 1),
        ),
        ("cholesky-26a6b670a8aabb7e2f8936db7ccb6a88", 0, 1): (f, (10, 10)),
        ("array-27b9f9d257a80fa6adae06a98faf71eb", 1, 1): (
            f,
            "array-original-27b9f9d257a80fa6adae06a98faf71eb",
            (slice(10, 20, None), slice(10, 20, None)),
        ),
        ("cholesky-upper-26a6b670a8aabb7e2f8936db7ccb6a88", 0, 1): (
            f,
            ("cholesky-26a6b670a8aabb7e2f8936db7ccb6a88", 0, 0),
            ("array-27b9f9d257a80fa6adae06a98faf71eb", 0, 1),
        ),
        ("cholesky-26a6b670a8aabb7e2f8936db7ccb6a88", 0, 0): (
            f,
            ("array-27b9f9d257a80fa6adae06a98faf71eb", 0, 0),
        ),
    }
    keys = {
        ("cholesky-upper-26a6b670a8aabb7e2f8936db7ccb6a88", 0, 0),
        ("cholesky-upper-26a6b670a8aabb7e2f8936db7ccb6a88", 0, 1),
        ("cholesky-upper-26a6b670a8aabb7e2f8936db7ccb6a88", 1, 0),
        ("cholesky-upper-26a6b670a8aabb7e2f8936db7ccb6a88", 1, 1),
    }
    rv = fuse(d, keys=keys, ave_width=2, rename_keys=True)
    assert rv == with_deps(rv[0])
Example #35
0
def test_fuse_subgraphs(compare_subgraph_callables):
    dsk = {
        "x-1": 1,
        "inc-1": (inc, "x-1"),
        "inc-2": (inc, "inc-1"),
        "add-1": (add, "x-1", "inc-2"),
        "inc-3": (inc, "add-1"),
        "inc-4": (inc, "inc-3"),
        "add-2": (add, "add-1", "inc-4"),
        "inc-5": (inc, "add-2"),
        "inc-6": (inc, "inc-5"),
    }

    res = fuse(dsk, "inc-6", fuse_subgraphs=True)
    sol = with_deps({
        "inc-6":
        "add-inc-x-1",
        "add-inc-x-1": (SubgraphCallable(
            {
                "x-1": 1,
                "add-1": (add, "x-1", (inc, (inc, "x-1"))),
                "inc-6": (inc, (inc, (add, "add-1", (inc, (inc, "add-1"))))),
            },
            "inc-6",
            (),
        ), ),
    })
    assert res == sol

    res = fuse(dsk, "inc-6", fuse_subgraphs=True, rename_keys=False)
    sol = with_deps({
        "inc-6": (SubgraphCallable(
            {
                "x-1": 1,
                "add-1": (add, "x-1", (inc, (inc, "x-1"))),
                "inc-6": (inc, (inc, (add, "add-1", (inc, (inc, "add-1"))))),
            },
            "inc-6",
            (),
        ), )
    })
    assert res == sol

    res = fuse(dsk, "add-2", fuse_subgraphs=True)
    sol = with_deps({
        "add-inc-x-1": (SubgraphCallable(
            {
                "x-1": 1,
                "add-1": (add, "x-1", (inc, (inc, "x-1"))),
                "add-2": (add, "add-1", (inc, (inc, "add-1"))),
            },
            "add-2",
            (),
        ), ),
        "add-2":
        "add-inc-x-1",
        "inc-6": (inc, (inc, "add-2")),
    })
    assert res == sol

    res = fuse(dsk, "inc-2", fuse_subgraphs=True)
    # ordering of arguments is unstable, check all permutations
    sols = []
    for inkeys in itertools.permutations(("x-1", "inc-2")):
        sols.append(
            with_deps({
                "x-1":
                1,
                "inc-2": (inc, (inc, "x-1")),
                "inc-6":
                "inc-add-1",
                "inc-add-1": (SubgraphCallable(
                    {
                        "add-1": (add, "x-1", "inc-2"),
                        "inc-6": (
                            inc,
                            (inc, (add, "add-1", (inc, (inc, "add-1")))),
                        ),
                    },
                    "inc-6",
                    inkeys,
                ), ) + inkeys,
            }))
    assert res in sols

    res = fuse(dsk, ["inc-2", "add-2"], fuse_subgraphs=True)
    # ordering of arguments is unstable, check all permutations
    sols = []
    for inkeys in itertools.permutations(("x-1", "inc-2")):
        sols.append(
            with_deps({
                "x-1":
                1,
                "inc-2": (inc, (inc, "x-1")),
                "inc-add-1": (SubgraphCallable(
                    {
                        "add-1": (add, "x-1", "inc-2"),
                        "add-2": (add, "add-1", (inc, (inc, "add-1"))),
                    },
                    "add-2",
                    inkeys,
                ), ) + inkeys,
                "add-2":
                "inc-add-1",
                "inc-6": (inc, (inc, "add-2")),
            }))
    assert res in sols
Example #36
0
def rechunk_zarr2zarr_w_dask(source_array,
                             target_chunks,
                             max_mem,
                             target_store,
                             temp_store=None,
                             source_storage_options={},
                             temp_storage_options={},
                             target_storage_options={}):

    shape = source_array.shape
    source_chunks = source_array.chunks
    dtype = source_array.dtype
    itemsize = dtype.itemsize

    read_chunks, int_chunks, write_chunks = rechunking_plan(
        shape, source_chunks, target_chunks, itemsize, max_mem)

    source_read = dsa.from_zarr(source_array,
                                chunks=read_chunks,
                                storage_options=source_storage_options)

    # create target
    target_array = zarr.empty(shape,
                              chunks=target_chunks,
                              dtype=dtype,
                              store=target_store)
    target_array.attrs.update(source_array.attrs)

    if int_chunks == target_chunks:
        target_store_delayed = dsa.store(source_read,
                                         target_array,
                                         lock=False,
                                         compute=False)
        print("One step rechunking plan")
        return target_store_delayed

    else:
        # do intermediate store
        assert temp_store is not None
        int_array = zarr.empty(shape,
                               chunks=int_chunks,
                               dtype=dtype,
                               store=temp_store)
        intermediate_store_delayed = dsa.store(source_read,
                                               int_array,
                                               lock=False,
                                               compute=False)

        int_read = dsa.from_zarr(int_array,
                                 chunks=write_chunks,
                                 storage_options=temp_storage_options)
        target_store_delayed = dsa.store(int_read,
                                         target_array,
                                         lock=False,
                                         compute=False)

        # now do some hacking to chain these together into a single graph.
        # get the two graphs as dicts
        int_dsk = dask.utils.ensure_dict(intermediate_store_delayed.dask)
        target_dsk = dask.utils.ensure_dict(target_store_delayed.dask)

        # find the root store key representing the read
        root_keys = []
        for key in target_dsk:
            if isinstance(key, str):
                if key.startswith('from-zarr'):
                    root_keys.append(key)
        assert len(root_keys) == 1
        root_key = root_keys[0]

        # now rewrite the graph
        target_dsk[root_key] = (lambda a, *b: a, target_dsk[root_key],
                                *int_dsk[intermediate_store_delayed.key])
        target_dsk.update(int_dsk)

        # fuse
        dsk_fused, deps = fuse(target_dsk)
        delayed_fused = Delayed(target_store_delayed.key, dsk_fused)

        print("Two step rechunking plan")
        return delayed_fused
Example #37
0
def get(
    dsk: Mapping,
    keys: Sequence[Hashable] | Hashable,
    num_workers=None,
    func_loads=None,
    func_dumps=None,
    optimize_graph=True,
    pool=None,
    initializer=None,
    chunksize=None,
    **kwargs,
):
    """Multiprocessed get function appropriate for Bags

    Parameters
    ----------
    dsk : dict
        dask graph
    keys : object or list
        Desired results from graph
    num_workers : int
        Number of worker processes (defaults to number of cores)
    func_dumps : function
        Function to use for function serialization (defaults to cloudpickle.dumps)
    func_loads : function
        Function to use for function deserialization (defaults to cloudpickle.loads)
    optimize_graph : bool
        If True [default], `fuse` is applied to the graph before computation.
    pool : Executor or Pool
        Some sort of `Executor` or `Pool` to use
    initializer: function
        Ignored if ``pool`` has been set.
        Function to initialize a worker process before running any tasks in it.
    chunksize: int, optional
        Size of chunks to use when dispatching work.
        Defaults to 5 as some batching is helpful.
        If -1, will be computed to evenly divide ready work across workers.
    """
    chunksize = chunksize or config.get("chunksize", 6)
    pool = pool or config.get("pool", None)
    initializer = initializer or config.get("multiprocessing.initializer",
                                            None)
    num_workers = num_workers or config.get("num_workers", None) or CPU_COUNT
    if pool is None:
        # In order to get consistent hashing in subprocesses, we need to set a
        # consistent seed for the Python hash algorithm. Unfortunately, there
        # is no way to specify environment variables only for the Pool
        # processes, so we have to rely on environment variables being
        # inherited.
        if os.environ.get("PYTHONHASHSEED") in (None, "0"):
            # This number is arbitrary; it was chosen to commemorate
            # https://github.com/dask/dask/issues/6640.
            os.environ["PYTHONHASHSEED"] = "6640"
        context = get_context()
        initializer = partial(initialize_worker_process,
                              user_initializer=initializer)
        pool = ProcessPoolExecutor(num_workers,
                                   mp_context=context,
                                   initializer=initializer)
        cleanup = True
    else:
        if initializer is not None:
            warn(
                "The ``initializer`` argument is ignored when ``pool`` is provided. "
                "The user should configure ``pool`` with the needed ``initializer`` "
                "on creation.")
        if isinstance(pool, multiprocessing.pool.Pool):
            pool = MultiprocessingPoolExecutor(pool)
        cleanup = False

    # Optimize Dask
    dsk = ensure_dict(dsk)
    dsk2, dependencies = cull(dsk, keys)
    if optimize_graph:
        dsk3, dependencies = fuse(dsk2, keys, dependencies)
    else:
        dsk3 = dsk2

    # We specify marshalling functions in order to catch serialization
    # errors and report them to the user.
    loads = func_loads or config.get("func_loads", None) or _loads
    dumps = func_dumps or config.get("func_dumps", None) or _dumps

    # Note former versions used a multiprocessing Manager to share
    # a Queue between parent and workers, but this is fragile on Windows
    # (issue #1652).
    try:
        # Run
        result = get_async(
            pool.submit,
            pool._max_workers,
            dsk3,
            keys,
            get_id=_process_get_id,
            dumps=dumps,
            loads=loads,
            pack_exception=pack_exception,
            raise_exception=reraise,
            chunksize=chunksize,
            **kwargs,
        )
    finally:
        if cleanup:
            pool.shutdown()
    return result
Example #38
0
def test_fuse_reductions_multiple_input():
    def f(*args):
        return args

    d = {"a1": 1, "a2": 2, "b": (f, "a1", "a2"), "c": (f, "b")}
    assert fuse(d, ave_width=2,
                rename_keys=False) == with_deps({"c": (f, (f, 1, 2))})
    assert fuse(d, ave_width=2, rename_keys=True) == with_deps({
        "a1-a2-b-c": (f, (f, 1, 2)),
        "c":
        "a1-a2-b-c"
    })
    assert fuse(d, ave_width=1, rename_keys=False) == with_deps({
        "a1":
        1,
        "a2":
        2,
        "c": (f, (f, "a1", "a2"))
    })
    assert fuse(d, ave_width=1, rename_keys=True) == with_deps({
        "a1":
        1,
        "a2":
        2,
        "b-c": (f, (f, "a1", "a2")),
        "c":
        "b-c"
    })

    d = {
        "a1": 1,
        "a2": 2,
        "b1": (f, "a1"),
        "b2": (f, "a1", "a2"),
        "b3": (f, "a2"),
        "c": (f, "b1", "b2", "b3"),
    }
    expected = with_deps(d)
    assert fuse(d, ave_width=1, rename_keys=False) == expected
    assert fuse(d, ave_width=2.9, rename_keys=False) == expected
    assert fuse(d, ave_width=1, rename_keys=True) == expected
    assert fuse(d, ave_width=2.9, rename_keys=True) == expected
    assert fuse(d, ave_width=3, rename_keys=False) == with_deps({
        "a1":
        1,
        "a2":
        2,
        "c": (f, (f, "a1"), (f, "a1", "a2"), (f, "a2"))
    })
    assert fuse(d, ave_width=3, rename_keys=True) == with_deps({
        "a1":
        1,
        "a2":
        2,
        "b1-b2-b3-c": (f, (f, "a1"), (f, "a1", "a2"), (f, "a2")),
        "c":
        "b1-b2-b3-c",
    })

    d = {
        "a1": 1,
        "a2": 2,
        "b1": (f, "a1"),
        "b2": (f, "a1", "a2"),
        "b3": (f, "a2"),
        "c1": (f, "b1", "b2"),
        "c2": (f, "b2", "b3"),
    }
    assert fuse(d, ave_width=1, rename_keys=False) == with_deps(d)
    assert fuse(d, ave_width=1, rename_keys=True) == with_deps(d)
    assert fuse(d, ave_width=2, rename_keys=False) == with_deps({
        "a1":
        1,
        "a2":
        2,
        "b2": (f, "a1", "a2"),
        "c1": (f, (f, "a1"), "b2"),
        "c2": (f, "b2", (f, "a2")),
    })
    assert fuse(d, ave_width=2, rename_keys=True) == with_deps({
        "a1":
        1,
        "a2":
        2,
        "b2": (f, "a1", "a2"),
        "b1-c1": (f, (f, "a1"), "b2"),
        "b3-c2": (f, "b2", (f, "a2")),
        "c1":
        "b1-c1",
        "c2":
        "b3-c2",
    })

    d = {
        "a1": 1,
        "a2": 2,
        "b1": (f, "a1"),
        "b2": (f, "a1", "a2"),
        "b3": (f, "a2"),
        "c1": (f, "b1", "b2"),
        "c2": (f, "b2", "b3"),
        "d": (f, "c1", "c2"),
    }
    assert fuse(d, ave_width=1, rename_keys=False) == with_deps(d)
    assert fuse(d, ave_width=1, rename_keys=True) == with_deps(d)

    # A more aggressive heuristic could do this at `ave_width=2`.  Perhaps
    # we can improve this.  Nevertheless, this is behaving as intended.
    assert fuse(d, ave_width=3, rename_keys=False) == with_deps({
        "a1":
        1,
        "a2":
        2,
        "b2": (f, "a1", "a2"),
        "d": (f, (f, (f, "a1"), "b2"), (f, "b2", (f, "a2"))),
    })
    assert fuse(d, ave_width=3, rename_keys=True) == with_deps({
        "a1":
        1,
        "a2":
        2,
        "b2": (f, "a1", "a2"),
        "b1-b3-c1-c2-d": (f, (f, (f, "a1"), "b2"), (f, "b2", (f, "a2"))),
        "d":
        "b1-b3-c1-c2-d",
    })
Example #39
0
def test_fuse_reductions_multiple_input():
    def f(*args):
        return args

    d = {
        'a1': 1,
        'a2': 2,
        'b': (f, 'a1', 'a2'),
        'c': (f, 'b'),
    }
    assert fuse(d, ave_width=2, rename_keys=False) == with_deps({
        'c': (f, (f, 1, 2)),
    })
    assert fuse(d, ave_width=2, rename_keys=True) == with_deps({
        'a1-a2-b-c': (f, (f, 1, 2)),
        'c': 'a1-a2-b-c',
    })
    assert fuse(d, ave_width=1, rename_keys=False) == with_deps({
        'a1': 1,
        'a2': 2,
        'c': (f, (f, 'a1', 'a2')),
    })
    assert fuse(d, ave_width=1, rename_keys=True) == with_deps({
        'a1': 1,
        'a2': 2,
        'b-c': (f, (f, 'a1', 'a2')),
        'c': 'b-c',
    })

    d = {
        'a1': 1,
        'a2': 2,
        'b1': (f, 'a1'),
        'b2': (f, 'a1', 'a2'),
        'b3': (f, 'a2'),
        'c': (f, 'b1', 'b2', 'b3'),
    }
    expected = with_deps(d)
    assert fuse(d, ave_width=1, rename_keys=False) == expected
    assert fuse(d, ave_width=2.9, rename_keys=False) == expected
    assert fuse(d, ave_width=1, rename_keys=True) == expected
    assert fuse(d, ave_width=2.9, rename_keys=True) == expected
    assert fuse(d, ave_width=3, rename_keys=False) == with_deps({
        'a1': 1,
        'a2': 2,
        'c': (f, (f, 'a1'), (f, 'a1', 'a2'), (f, 'a2')),
    })
    assert fuse(d, ave_width=3, rename_keys=True) == with_deps({
        'a1': 1,
        'a2': 2,
        'b1-b2-b3-c': (f, (f, 'a1'), (f, 'a1', 'a2'), (f, 'a2')),
        'c': 'b1-b2-b3-c',
    })

    d = {
        'a1': 1,
        'a2': 2,
        'b1': (f, 'a1'),
        'b2': (f, 'a1', 'a2'),
        'b3': (f, 'a2'),
        'c1': (f, 'b1', 'b2'),
        'c2': (f, 'b2', 'b3'),
    }
    assert fuse(d, ave_width=1, rename_keys=False) == with_deps(d)
    assert fuse(d, ave_width=1, rename_keys=True) == with_deps(d)
    assert fuse(d, ave_width=2, rename_keys=False) == with_deps({
        'a1': 1,
        'a2': 2,
        'b2': (f, 'a1', 'a2'),
        'c1': (f, (f, 'a1'), 'b2'),
        'c2': (f, 'b2', (f, 'a2')),
    })
    assert fuse(d, ave_width=2, rename_keys=True) == with_deps({
        'a1': 1,
        'a2': 2,
        'b2': (f, 'a1', 'a2'),
        'b1-c1': (f, (f, 'a1'), 'b2'),
        'b3-c2': (f, 'b2', (f, 'a2')),
        'c1': 'b1-c1',
        'c2': 'b3-c2',
    })

    d = {
        'a1': 1,
        'a2': 2,
        'b1': (f, 'a1'),
        'b2': (f, 'a1', 'a2'),
        'b3': (f, 'a2'),
        'c1': (f, 'b1', 'b2'),
        'c2': (f, 'b2', 'b3'),
        'd': (f, 'c1', 'c2'),
    }
    assert fuse(d, ave_width=1, rename_keys=False) == with_deps(d)
    assert fuse(d, ave_width=1, rename_keys=True) == with_deps(d)

    # A more aggressive heuristic could do this at `ave_width=2`.  Perhaps
    # we can improve this.  Nevertheless, this is behaving as intended.
    assert fuse(d, ave_width=3, rename_keys=False) == with_deps({
        'a1': 1,
        'a2': 2,
        'b2': (f, 'a1', 'a2'),
        'd': (f, (f, (f, 'a1'), 'b2'), (f, 'b2', (f, 'a2'))),
    })
    assert fuse(d, ave_width=3, rename_keys=True) == with_deps({
        'a1': 1,
        'a2': 2,
        'b2': (f, 'a1', 'a2'),
        'b1-b3-c1-c2-d': (f, (f, (f, 'a1'), 'b2'), (f, 'b2', (f, 'a2'))),
        'd': 'b1-b3-c1-c2-d',
    })