コード例 #1
0
def test_inline_cull_dependencies():
    d = {'a': 1,
         'b': 'a',
         'c': 'b',
         'd': ['a', 'b', 'c'],
         'e': (add, (len, 'd'), 'a')}

    d2, dependencies = cull(d, ['d', 'e'])
    inline(d2, {'b'}, dependencies=dependencies)
コード例 #2
0
def test_inline_cull_dependencies():
    d = {'a': 1,
         'b': 'a',
         'c': 'b',
         'd': ['a', 'b', 'c'],
         'e': (add, (len, 'd'), 'a')}

    d2, dependencies = cull(d, ['d', 'e'])
    inline(d2, {'b'}, dependencies=dependencies)
コード例 #3
0
ファイル: test_optimization.py プロジェクト: rubenvdg/dask
def test_inline_cull_dependencies():
    d = {
        "a": 1,
        "b": "a",
        "c": "b",
        "d": ["a", "b", "c"],
        "e": (add, (len, "d"), "a")
    }

    d2, dependencies = cull(d, ["d", "e"])
    inline(d2, {"b"}, dependencies=dependencies)
コード例 #4
0
def inlined_array(a, inline_arrays=None):
    """ Flatten underlying graph """
    agraph = a.__dask_graph__()
    akeys = set(flatten(a.__dask_keys__()))

    # Inline everything except the output keys
    if inline_arrays is None:
        inline_keys = set(agraph.keys()) - akeys
        dsk2 = inline(agraph, keys=inline_keys, inline_constants=True)
        dsk3, _ = cull(dsk2, akeys)

        graph = HighLevelGraph.from_collections(a.name, dsk3, [])
        return da.Array(graph, a.name, a.chunks, dtype=a.dtype)

    # We're given specific arrays to inline, promote to list
    if isinstance(inline_arrays, da.Array):
        inline_arrays = [inline_arrays]
    elif isinstance(inline_arrays, tuple):
        inline_arrays = list(inline_arrays)

    if not isinstance(inline_arrays, list):
        raise TypeError("Invalid inline_arrays, must be "
                        "(None, list, tuple, dask.array.Array)")

    inline_names = set(a.name for a in inline_arrays)
    layers = agraph.layers.copy()
    deps = {k: v.copy() for k, v in agraph.dependencies.items()}
    # We want to inline layers that depend on the inlined arrays
    inline_layers = set(k for k, v in deps.items()
                        if len(inline_names.intersection(v)) > 0)

    for layer_name in inline_layers:
        dsk = dict(layers[layer_name])
        layer_keys = set(dsk.keys())
        inline_keys = set()

        for array in inline_arrays:
            dsk.update(layers[array.name])
            deps.pop(array.name, None)
            deps[layer_name].discard(array.name)
            inline_keys.update(layers[array.name].keys())

        dsk2 = inline(dsk, keys=inline_keys, inline_constants=True)
        layers[layer_name], _ = cull(dsk2, layer_keys)

    # Remove layers containing the inlined arrays
    for inline_name in inline_names:
        layers.pop(inline_name)

    return da.Array(HighLevelGraph(layers, deps), a.name, a.chunks, a.dtype)
コード例 #5
0
ファイル: optimisation.py プロジェクト: smasoka/dask-ms
def inlined_array(a, inline_arrays=None):
    """ Flatten underlying graph """
    agraph = a.__dask_graph__()
    akeys = set(flatten(a.__dask_keys__()))

    # Inline everything except the output keys
    if inline_arrays is None:
        inline_keys = set(agraph.keys()) - akeys
        dsk2 = inline(agraph, keys=inline_keys, inline_constants=True)
        dsk3, _ = cull(dsk2, akeys)

        graph = HighLevelGraph.from_collections(a.name, dsk3, [])
        return da.Array(graph, a.name, a.chunks, dtype=a.dtype)

    # We're given specific arrays to inline, promote to list
    if isinstance(inline_arrays, da.Array):
        inline_arrays = [inline_arrays]
    elif isinstance(inline_arrays, tuple):
        inline_arrays = list(inline_arrays)

    if not isinstance(inline_arrays, list):
        raise TypeError("Invalid inline_arrays, must be "
                        "(None, list, tuple, dask.array.Array)")

    layers = agraph.layers.copy()
    deps = agraph.dependencies.copy()
    inline_keys = set()
    dsk = dict(layers[a.name])

    # Inline specified arrays
    for array in inline_arrays:
        # Remove array from layers and dependencies
        try:
            dsk.update(layers.pop(array.name))
            del deps[array.name]
        except KeyError:
            raise ValueError("%s is not a valid dependency of a"
                             % array.name)

        # Record keys to inline
        inline_keys.update(flatten(array.__dask_keys__()))

    dsk2 = inline(dsk, keys=inline_keys, inline_constants=True)
    dsk3, _ = cull(dsk2, akeys)

    layers[a.name] = dsk3
    graph = HighLevelGraph(layers, deps)

    return da.Array(graph, a.name, a.chunks, a.dtype)
コード例 #6
0
ファイル: utilities.py プロジェクト: evermountaintech/minian
def inline_pattern(dsk: dict, pat_ls: List[str], inline_constants: bool) -> dict:
    """
    Inline tasks whose keys match certain patterns.

    Parameters
    ----------
    dsk : dict
        Input dask graph.
    pat_ls : List[str]
        List of patterns to check.
    inline_constants : bool
        Whether to inline constants.

    Returns
    -------
    dsk : dict
        Dask graph with keys inlined.

    See Also
    -------
    dask.optimization.inline
    """
    keys = [k for k in dsk.keys() if check_pat(k, pat_ls)]
    if keys:
        dsk = inline(dsk, keys, inline_constants=inline_constants)
        for k in keys:
            del dsk[k]
        if inline_constants:
            dsk, dep = cull(dsk, set(list(flatten(keys))))
    return dsk
コード例 #7
0
ファイル: optimisation.py プロジェクト: gitter-badger/dask-ms
def cached_array(array):
    """
    Return a new array that functionally has the same values as array,
    but flattens the underlying graph and introduces a cache lookup
    when the individual array chunks are accessed.

    Useful for caching data that can fit in-memory for the duration
    of the graph's execution.
    """
    dsk = dict(array.__dask_graph__())
    keys = set(flatten(array.__dask_keys__()))

    # Inline + cull everything except the current array
    inline_keys = set(dsk.keys() - keys)
    dsk2 = inline(dsk, inline_keys, inline_constants=True)
    dsk3, _ = cull(dsk2, keys)

    # Create a cache used to store array values
    cache = ArrayCache(uuid.uuid4().hex)

    for k in keys:
        dsk3[k] = (cache_entry, cache, Key(k), dsk3.pop(k))

    graph = HighLevelGraph.from_collections(array.name, dsk3, [])

    return da.Array(graph, array.name, array.chunks, array.dtype)
コード例 #8
0
ファイル: test_optimization.py プロジェクト: yetudada/dask
def test_inline():
    d = {"a": 1, "b": (inc, "a"), "c": (inc, "b"), "d": (add, "a", "c")}
    assert inline(d) == {"a": 1, "b": (inc, 1), "c": (inc, "b"), "d": (add, 1, "c")}
    assert inline(d, ["a", "b", "c"]) == {
        "a": 1,
        "b": (inc, 1),
        "c": (inc, (inc, 1)),
        "d": (add, 1, (inc, (inc, 1))),
    }
    d = {"x": 1, "y": (inc, "x"), "z": (add, "x", "y")}
    assert inline(d) == {"x": 1, "y": (inc, 1), "z": (add, 1, "y")}
    assert inline(d, keys="y") == {"x": 1, "y": (inc, 1), "z": (add, 1, (inc, 1))}
    assert inline(d, keys="y", inline_constants=False) == {
        "x": 1,
        "y": (inc, "x"),
        "z": (add, "x", (inc, "x")),
    }

    d = {"a": 1, "b": "a", "c": "b", "d": ["a", "b", "c"], "e": (add, (len, "d"), "a")}
    assert inline(d, "d") == {
        "a": 1,
        "b": 1,
        "c": 1,
        "d": [1, 1, 1],
        "e": (add, (len, [1, 1, 1]), 1),
    }
    assert inline(d, "a", inline_constants=False) == {
        "a": 1,
        "b": 1,
        "c": "b",
        "d": [1, "b", "c"],
        "e": (add, (len, "d"), 1),
    }
コード例 #9
0
def test_inline():
    d = {'a': 1, 'b': (inc, 'a'), 'c': (inc, 'b'), 'd': (add, 'a', 'c')}
    assert inline(d) == {
        'a': 1,
        'b': (inc, 1),
        'c': (inc, 'b'),
        'd': (add, 1, 'c')
    }
    assert inline(d, ['a', 'b', 'c']) == {
        'a': 1,
        'b': (inc, 1),
        'c': (inc, (inc, 1)),
        'd': (add, 1, (inc, (inc, 1)))
    }
    d = {'x': 1, 'y': (inc, 'x'), 'z': (add, 'x', 'y')}
    assert inline(d) == {'x': 1, 'y': (inc, 1), 'z': (add, 1, 'y')}
    assert inline(d, keys='y') == {
        'x': 1,
        'y': (inc, 1),
        'z': (add, 1, (inc, 1))
    }
    assert inline(d, keys='y', inline_constants=False) == {
        'x': 1,
        'y': (inc, 'x'),
        'z': (add, 'x', (inc, 'x'))
    }

    d = {
        'a': 1,
        'b': 'a',
        'c': 'b',
        'd': ['a', 'b', 'c'],
        'e': (add, (len, 'd'), 'a')
    }
    assert inline(d, 'd') == {
        'a': 1,
        'b': 1,
        'c': 1,
        'd': [1, 1, 1],
        'e': (add, (len, [1, 1, 1]), 1)
    }
    assert inline(d, 'a', inline_constants=False) == {
        'a': 1,
        'b': 1,
        'c': 'b',
        'd': [1, 'b', 'c'],
        'e': (add, (len, 'd'), 1)
    }
コード例 #10
0
def test_inline():
    d = {'a': 1,
         'b': (inc, 'a'),
         'c': (inc, 'b'),
         'd': (add, 'a', 'c')}
    assert inline(d) == {'a': 1,
                         'b': (inc, 1),
                         'c': (inc, 'b'),
                         'd': (add, 1, 'c')}
    assert inline(d, ['a', 'b', 'c']) == {'a': 1,
                                          'b': (inc, 1),
                                          'c': (inc, (inc, 1)),
                                          'd': (add, 1, (inc, (inc, 1)))}
    d = {'x': 1,
         'y': (inc, 'x'),
         'z': (add, 'x', 'y')}
    assert inline(d) == {'x': 1,
                         'y': (inc, 1),
                         'z': (add, 1, 'y')}
    assert inline(d, keys='y') == {'x': 1,
                                   'y': (inc, 1),
                                   'z': (add, 1, (inc, 1))}
    assert inline(d, keys='y',
                  inline_constants=False) == {'x': 1,
                                              'y': (inc, 'x'),
                                              'z': (add, 'x', (inc, 'x'))}

    d = {'a': 1,
         'b': 'a',
         'c': 'b',
         'd': ['a', 'b', 'c'],
         'e': (add, (len, 'd'), 'a')}
    assert inline(d, 'd') == {'a': 1,
                              'b': 1,
                              'c': 1,
                              'd': [1, 1, 1],
                              'e': (add, (len, [1, 1, 1]), 1)}
    assert inline(d, 'a',
                  inline_constants=False) == {'a': 1,
                                              'b': 1,
                                              'c': 'b',
                                              'd': [1, 'b', 'c'],
                                              'e': (add, (len, 'd'), 1)}
コード例 #11
0
def cached_array(array, token=None):
    """
    Return a new array that functionally has the same values as array,
    but flattens the underlying graph and introduces a cache lookup
    when the individual array chunks are accessed.

    Useful for caching data that can fit in-memory for the duration
    of the graph's execution.

    Parameters
    ----------
    array : :class:`dask.array.Array`
        dask array to cache.
    token : optional, str
        A unique token for identifying the internal cache.
        If None, it will be automatically generated.
    """
    dsk = dict(array.__dask_graph__())
    keys = set(flatten(array.__dask_keys__()))

    if token is None:
        token = uuid.uuid4().hex

    # Inline + cull everything except the current array
    inline_keys = set(dsk.keys() - keys)
    dsk2 = inline(dsk, inline_keys, inline_constants=True)
    dsk3, _ = cull(dsk2, keys)

    # Create a cache used to store array values
    cache = ArrayCache(token)

    assert len(dsk3) == len(keys)

    for k in keys:
        dsk3[k] = (cache_entry, cache, Key(k), dsk3.pop(k))

    graph = HighLevelGraph.from_collections(array.name, dsk3, [])

    return da.Array(graph, array.name, array.chunks, array.dtype)
コード例 #12
0
    'nwords': (len, (str.split, 'words')),
    'val1': 'orange',
    'val2': 'apple',
    'val3': 'pear',
    'count1': (str.count, 'words', 'val1'),
    'count2': (str.count, 'words', 'val2'),
    'count3': (str.count, 'words', 'val3'),
    'out1': (format_str, 'count1', 'val1', 'nwords'),
    'out2': (format_str, 'count2', 'val2', 'nwords'),
    'out3': (format_str, 'count3', 'val3', 'nwords'),
    'print1': (print_and_return, 'out1'),
    'print2': (print_and_return, 'out2'),
    'print3': (print_and_return, 'out3')
}

dask.visualize(dsk, filename='/Users/longguangbin/Work/temp/dask2.pdf')

from dask.threaded import get
from dask.optimization import cull
from dask.optimization import inline

outputs = ['print1', 'print2']
results = get(dsk, outputs)

dsk1, dependencies = cull(dsk, outputs)

dsk2 = inline(dsk1, dependencies=dependencies)
results = get(dsk2, outputs)

# https://docs.dask.org/en/latest/optimize.html
コード例 #13
0
 def time_inline_keys(self):
     inline(self.dsk, keys=self.inline_keys, dependencies=self.deps)
コード例 #14
0
 def time_inline_constants(self):
     inline(self.dsk, inline_constants=True, dependencies=self.deps)