def functions_of(task): """Set of functions contained within nested task Examples -------- >>> task = (add, (mul, 1, 2), (inc, 3)) # doctest: +SKIP >>> functions_of(task) # doctest: +SKIP set([add, mul, inc]) """ funcs = set() work = [task] sequence_types = {list, tuple} while work: new_work = [] for task in work: if type(task) in sequence_types: if istask(task): funcs.add(unwrap_partial(task[0])) new_work.extend(task[1:]) else: new_work.extend(task) work = new_work return funcs
def head(task): """Return the top level node of a task""" if istask(task): return task[0] if isinstance(task, list): return list return task
def args(task): """Get the arguments for the current task""" if istask(task): return task[1:] if isinstance(task, list): return task return ()
def inline_functions(dsk, output, fast_functions=None, inline_constants=False, dependencies=None): """Inline cheap functions into larger operations Examples -------- >>> double = lambda x: x*2 # doctest: +SKIP >>> dsk = {'out': (add, 'i', 'd'), # doctest: +SKIP ... 'i': (inc, 'x'), ... 'd': (double, 'y'), ... 'x': 1, 'y': 1} >>> inline_functions(dsk, [], [inc]) # doctest: +SKIP {'out': (add, (inc, 'x'), 'd'), 'd': (double, 'y'), 'x': 1, 'y': 1} Protect output keys. In the example below ``i`` is not inlined because it is marked as an output key. >>> inline_functions(dsk, ['i', 'out'], [inc, double]) # doctest: +SKIP {'out': (add, 'i', (double, 'y')), 'i': (inc, 'x'), 'x': 1, 'y': 1} """ if not fast_functions: return dsk output = set(output) fast_functions = set(fast_functions) if dependencies is None: dependencies = {k: get_dependencies(dsk, k) for k in dsk} dependents = reverse_dict(dependencies) def inlinable(v): try: return functions_of(v).issubset(fast_functions) except TypeError: return False keys = [ k for k, v in dsk.items() if istask(v) and dependents[k] and k not in output and inlinable(v) ] if keys: dsk = inline(dsk, keys, inline_constants=inline_constants, dependencies=dependencies) for k in keys: del dsk[k] return dsk
def inline(dsk, keys=None, inline_constants=True, dependencies=None): """Return new dask with the given keys inlined with their values. Inlines all constants if ``inline_constants`` keyword is True. Note that the constant keys will remain in the graph, to remove them follow ``inline`` with ``cull``. Examples -------- >>> d = {'x': 1, 'y': (inc, 'x'), 'z': (add, 'x', 'y')} >>> inline(d) # doctest: +SKIP {'x': 1, 'y': (inc, 1), 'z': (add, 1, 'y')} >>> inline(d, keys='y') # doctest: +SKIP {'x': 1, 'y': (inc, 1), 'z': (add, 1, (inc, 1))} >>> inline(d, keys='y', inline_constants=False) # doctest: +SKIP {'x': 1, 'y': (inc, 1), 'z': (add, 'x', (inc, 'x'))} """ if dependencies and isinstance(next(iter(dependencies.values())), list): dependencies = {k: set(v) for k, v in dependencies.items()} keys = _flat_set(keys) if dependencies is None: dependencies = {k: get_dependencies(dsk, k) for k in dsk} if inline_constants: keys.update(k for k, v in dsk.items() if (ishashable(v) and v in dsk) or ( not dependencies[k] and not istask(v))) # Keys may depend on other keys, so determine replace order with toposort. # The values stored in `keysubs` do not include other keys. replaceorder = toposort(dict((k, dsk[k]) for k in keys if k in dsk), dependencies=dependencies) keysubs = {} for key in replaceorder: val = dsk[key] for dep in keys & dependencies[key]: if dep in keysubs: replace = keysubs[dep] else: replace = dsk[dep] val = subs(val, dep, replace) keysubs[key] = val # Make new dask with substitutions dsk2 = keysubs.copy() for key, val in dsk.items(): if key not in dsk2: for item in keys & dependencies[key]: val = subs(val, item, keysubs[item]) dsk2[key] = val return dsk2
def test_istask(): assert istask((inc, 1)) assert not istask(1) assert not istask((1, 2)) f = namedtuple("f", ["x", "y"]) assert not istask(f(sum, 2))
def _bottom_up(net, term): if istask(term): term = (head(term), ) + tuple(_bottom_up(net, t) for t in args(term)) elif isinstance(term, list): term = [_bottom_up(net, t) for t in args(term)] return net._rewrite(term)
def _inplace_fuse_subgraphs(dsk, keys, dependencies, fused_trees, rename_keys): """Subroutine of fuse.Mutates dsk, depenencies, and fused_trees inplace""" # locate all members of linear chains child2parent = {} unfusible = set() for parent in dsk: deps = dependencies[parent] has_many_children = len(deps) > 1 for child in deps: if keys is not None and child in keys: unfusible.add(child) elif child in child2parent: del child2parent[child] unfusible.add(child) elif has_many_children: unfusible.add(child) elif child not in unfusible: child2parent[child] = parent # construct the chains from ancestor to descendant chains = [] parent2child = {v: k for k, v in child2parent.items()} while child2parent: child, parent = child2parent.popitem() chain = [child, parent] while parent in child2parent: parent = child2parent.pop(parent) del parent2child[parent] chain.append(parent) chain.reverse() while child in parent2child: child = parent2child.pop(child) del child2parent[child] chain.append(child) # Skip chains with < 2 executable tasks ntasks = 0 for key in chain: ntasks += istask(dsk[key]) if ntasks > 1: chains.append(chain) break # Mutate dsk fusing chains into subgraphs for chain in chains: subgraph = {k: dsk[k] for k in chain} outkey = chain[0] # Update dependencies and graph inkeys_set = dependencies[outkey] = dependencies[chain[-1]] for k in chain[1:]: del dependencies[k] del dsk[k] # Create new task inkeys = tuple(inkeys_set) dsk[outkey] = (SubgraphCallable(subgraph, outkey, inkeys), ) + inkeys # Mutate `fused_trees` if key renaming is needed (renaming done in fuse) if rename_keys: chain2 = [] for k in chain: subchain = fused_trees.pop(k, False) if subchain: chain2.extend(subchain) else: chain2.append(k) fused_trees[outkey] = chain2