def test_optimizations_ctd(): da = pytest.importorskip("dask.array") x = da.arange(2, chunks=1)[:1] dsk1 = collections_to_dsk([x]) with dask.config.set({"optimizations": [lambda dsk, keys: dsk]}): dsk2 = collections_to_dsk([x]) assert dsk1 == dsk2
def persistent_collections_to_dsk(collections, key=None, serializers=None, cache=None, *args, **kwargs): """ wrapper arount dask.base.collections_to_dsk *args and **kwargs are passed to collections_to_dsk """ dsk = collections_to_dsk(collections, *args, **kwargs) if key is not None: dsk, _ = cull(dsk, key) if serializers is not None: # load instead of compute dsk_serialized = get_relevant_keys_from_on_disk_cache(dsk, serializers) dsk.update(dsk_serialized) if cache is not None: # use cache instead of loadind dsk_cached = get_relevant_keys_from_memory_cache(dsk, cache) dsk.update(dsk_cached) # filter again task after function have been replaced by load or values if key is not None: dsk, _ = cull(dsk, key) return dsk
def setup(self): n = 16 A = da.random.random((n, n), chunks=(1, 1)) Bs = [A] # The top-left of A is shared by all the Bs. For example, for i=2: # AAB.B # AAB.B # BBB.B # ....B # BBBBB for i in range(1, n): B = da.random.random((i, i), chunks=(1, 1)) B = da.concatenate([da.concatenate([B, A.blocks[i:, :i]]), A.blocks[:, i:]], axis=1) Bs.append(B) self.dsk = collections_to_dsk([da.linalg.cholesky(B) for B in Bs]) self.dsk_lower = collections_to_dsk([da.linalg.cholesky(B, lower=True) for B in Bs])
def add_task(self, func, *args, **kwargs): """ Special keyword arguments are: - dask_key_name """ key = kwargs.get('dask_key_name') if key: assert key not in self._dask, "specified key is already used" delayed_func = delayed(func, pure=True) collections = dask_to_collections(self._dask) # normalize args and kwargs replacing values that are in the graph by # Delayed objects args = [collections[arg] if in_dict( arg, collections) else arg for arg in args] kwargs.update({k: v for k, v in collections.items() if k in kwargs}) delayed_func = delayed_func(*args, **kwargs) if key is None: key = delayed_func._key else: # coherence check. TODO: remove assert key == delayed_func._key # update state collections[key] = delayed_func self.dask = collections_to_dsk(collections.values()) return delayed_func
def setup(self): n = 1000 x = da.random.normal(size=(n, 100), chunks=(1, 100)) y = da.random.normal(size=(n,), chunks=(1,)) xy = (x * y[:, None]).cumsum(axis=0) xx = (x[:, None, :] * x[:, :, None]).cumsum(axis=0) beta = da.stack( [da.linalg.solve(xx[i], xy[i]) for i in range(xx.shape[0])], axis=0 ) ey = (x * beta).sum(axis=1) self.dsk_linalg = collections_to_dsk([ey])
def setup(self): a = da.random.normal(size=(4e6, 30e2), chunks=(2e4, 3e1)) a = a.rechunk((int(1e4 / 10), int(30e2))) b = a.T.dot(a) self.dsk_rechunk_transpose = collections_to_dsk([b])
def setup(self): a = da.random.random((6000, 64), chunks=(10, 64)) u, s, v = da.linalg.svd_compressed(a, 100, iterator="power", n_power_iter=0) self.dsk_svd = collections_to_dsk([u, s, v])
def setup(self, param): size, chunks, depth = param a = da.random.random(size, chunks=chunks) b = a.map_overlap(lambda e: 2 * e, depth=depth) self.dsk = collections_to_dsk([b])
def setup(self): n = 50 A = da.random.random((n, n), chunks=(1, 1)) self.dsk = collections_to_dsk([da.linalg.cholesky(A)]) self.dsk_lower = collections_to_dsk([da.linalg.cholesky(A, lower=True)])
def plot_experiment_tree(nodes): import graphviz import dask from dask import dot from dask.base import collections_to_dsk dsk = dict(collections_to_dsk(list(nodes.values()))) node_attr = None edge_attr = None data_attributes = {} function_attributes = {} graph_attr = {} graph_attr["rankdir"] = "BT" # graph_attr.update(kwargs) g = graphviz.Digraph(graph_attr=graph_attr, node_attr=node_attr, edge_attr=edge_attr) seen = set() states_uuids = {v.key: k for k, v in nodes.items()} for k, v in dsk.items(): k_name = dask.dot.name(k) if k_name not in seen: seen.add(k_name) attrs = data_attributes.get(k, {}) attrs.setdefault("label", dot.box_label((k, states_uuids[str(k)]))) attrs.setdefault("shape", "box") g.node(k_name, **attrs) if dask.dot.istask(v): func_name = dask.dot.name((k, "function")) if func_name not in seen: seen.add(func_name) attrs = function_attributes.get(k, {}) attrs.setdefault( "label", ",\n".join([ k for k in v[0].keywords if v[0].keywords[k] is not None ]), ) # dask.dot.key_split(k)) attrs.setdefault("shape", "circle") g.node(func_name, **attrs) g.edge(func_name, k_name) for dep in dask.dot.get_dependencies(dsk, k): dep_name = dask.dot.name(dep) if dep_name not in seen: seen.add(dep_name) attrs = data_attributes.get(dep, {}) attrs.setdefault( "label", dot.box_label((dep, states_uuids[str(dep)]))) attrs.setdefault("shape", "box") g.node(dep_name, **attrs) g.edge(dep_name, func_name) elif ishashable(v) and v in dsk: g.edge(name(v), k_name) return g
def _get_dsk(node): d = node.todelayed() collections, repack = unpack_collections(d, traverse=False) return collections_to_dsk(collections, True)