예제 #1
0
def test_optimizations_ctd():
    da = pytest.importorskip("dask.array")
    x = da.arange(2, chunks=1)[:1]
    dsk1 = collections_to_dsk([x])
    with dask.config.set({"optimizations": [lambda dsk, keys: dsk]}):
        dsk2 = collections_to_dsk([x])

    assert dsk1 == dsk2
예제 #2
0
def persistent_collections_to_dsk(collections,
                                  key=None,
                                  serializers=None,
                                  cache=None,
                                  *args,
                                  **kwargs):
    """
    wrapper arount dask.base.collections_to_dsk
    *args and **kwargs are passed to collections_to_dsk
    """

    dsk = collections_to_dsk(collections, *args, **kwargs)

    if key is not None:
        dsk, _ = cull(dsk, key)

    if serializers is not None:
        # load instead of compute
        dsk_serialized = get_relevant_keys_from_on_disk_cache(dsk, serializers)
        dsk.update(dsk_serialized)

    if cache is not None:
        # use cache instead of loadind
        dsk_cached = get_relevant_keys_from_memory_cache(dsk, cache)
        dsk.update(dsk_cached)

    # filter again task after function have been replaced by load or values
    if key is not None:
        dsk, _ = cull(dsk, key)

    return dsk
예제 #3
0
 def setup(self):
     n = 16
     A = da.random.random((n, n), chunks=(1, 1))
     Bs = [A]
     # The top-left of A is shared by all the Bs.  For example, for i=2:
     # AAB.B
     # AAB.B
     # BBB.B
     # ....B
     # BBBBB
     for i in range(1, n):
         B = da.random.random((i, i), chunks=(1, 1))
         B = da.concatenate([da.concatenate([B, A.blocks[i:, :i]]), A.blocks[:, i:]], axis=1)
         Bs.append(B)
     self.dsk = collections_to_dsk([da.linalg.cholesky(B) for B in Bs])
     self.dsk_lower = collections_to_dsk([da.linalg.cholesky(B, lower=True) for B in Bs])
예제 #4
0
파일: dag.py 프로젝트: jakirkham/persist
    def add_task(self, func, *args, **kwargs):
        """
        Special keyword arguments are:
        - dask_key_name
        """
        key = kwargs.get('dask_key_name')
        if key:
            assert key not in self._dask, "specified key is already used"

        delayed_func = delayed(func, pure=True)
        collections = dask_to_collections(self._dask)
        # normalize args and kwargs replacing values that are in the graph by
        # Delayed objects

        args = [collections[arg] if in_dict(
            arg, collections) else arg for arg in args]
        kwargs.update({k: v for k, v in collections.items() if k in kwargs})

        delayed_func = delayed_func(*args, **kwargs)

        if key is None:
            key = delayed_func._key
        else:
            # coherence check. TODO: remove
            assert key == delayed_func._key

        # update state
        collections[key] = delayed_func
        self.dask = collections_to_dsk(collections.values())

        return delayed_func
예제 #5
0
 def setup(self):
     n = 1000
     x = da.random.normal(size=(n, 100), chunks=(1, 100))
     y = da.random.normal(size=(n,), chunks=(1,))
     xy = (x * y[:, None]).cumsum(axis=0)
     xx = (x[:, None, :] * x[:, :, None]).cumsum(axis=0)
     beta = da.stack(
         [da.linalg.solve(xx[i], xy[i]) for i in range(xx.shape[0])], axis=0
     )
     ey = (x * beta).sum(axis=1)
     self.dsk_linalg = collections_to_dsk([ey])
예제 #6
0
 def setup(self):
     a = da.random.normal(size=(4e6, 30e2), chunks=(2e4, 3e1))
     a = a.rechunk((int(1e4 / 10), int(30e2)))
     b = a.T.dot(a)
     self.dsk_rechunk_transpose = collections_to_dsk([b])
예제 #7
0
 def setup(self):
     a = da.random.random((6000, 64), chunks=(10, 64))
     u, s, v = da.linalg.svd_compressed(a, 100, iterator="power", n_power_iter=0)
     self.dsk_svd = collections_to_dsk([u, s, v])
예제 #8
0
 def setup(self, param):
     size, chunks, depth = param
     a = da.random.random(size, chunks=chunks)
     b = a.map_overlap(lambda e: 2 * e, depth=depth)
     self.dsk = collections_to_dsk([b])
예제 #9
0
 def setup(self):
     n = 50
     A = da.random.random((n, n), chunks=(1, 1))
     self.dsk = collections_to_dsk([da.linalg.cholesky(A)])
     self.dsk_lower = collections_to_dsk([da.linalg.cholesky(A, lower=True)])
예제 #10
0
def plot_experiment_tree(nodes):
    import graphviz
    import dask
    from dask import dot
    from dask.base import collections_to_dsk

    dsk = dict(collections_to_dsk(list(nodes.values())))

    node_attr = None
    edge_attr = None
    data_attributes = {}
    function_attributes = {}

    graph_attr = {}
    graph_attr["rankdir"] = "BT"
    #     graph_attr.update(kwargs)
    g = graphviz.Digraph(graph_attr=graph_attr,
                         node_attr=node_attr,
                         edge_attr=edge_attr)

    seen = set()

    states_uuids = {v.key: k for k, v in nodes.items()}

    for k, v in dsk.items():
        k_name = dask.dot.name(k)
        if k_name not in seen:
            seen.add(k_name)
            attrs = data_attributes.get(k, {})
            attrs.setdefault("label", dot.box_label((k, states_uuids[str(k)])))
            attrs.setdefault("shape", "box")
            g.node(k_name, **attrs)

        if dask.dot.istask(v):
            func_name = dask.dot.name((k, "function"))
            if func_name not in seen:
                seen.add(func_name)
                attrs = function_attributes.get(k, {})
                attrs.setdefault(
                    "label",
                    ",\n".join([
                        k for k in v[0].keywords
                        if v[0].keywords[k] is not None
                    ]),
                )  # dask.dot.key_split(k))
                attrs.setdefault("shape", "circle")
                g.node(func_name, **attrs)
            g.edge(func_name, k_name)

            for dep in dask.dot.get_dependencies(dsk, k):
                dep_name = dask.dot.name(dep)
                if dep_name not in seen:
                    seen.add(dep_name)
                    attrs = data_attributes.get(dep, {})
                    attrs.setdefault(
                        "label", dot.box_label((dep, states_uuids[str(dep)])))
                    attrs.setdefault("shape", "box")
                    g.node(dep_name, **attrs)
                g.edge(dep_name, func_name)
        elif ishashable(v) and v in dsk:
            g.edge(name(v), k_name)
    return g
예제 #11
0
def _get_dsk(node):
    d = node.todelayed()
    collections, repack = unpack_collections(d, traverse=False)
    return collections_to_dsk(collections, True)