Exemple #1
0
def functions_of(task):
    """Set of functions contained within nested task

    Examples
    --------
    >>> task = (add, (mul, 1, 2), (inc, 3))  # doctest: +SKIP
    >>> functions_of(task)  # doctest: +SKIP
    set([add, mul, inc])
    """
    funcs = set()

    work = [task]
    sequence_types = {list, tuple}

    while work:
        new_work = []
        for task in work:
            if type(task) in sequence_types:
                if istask(task):
                    funcs.add(unwrap_partial(task[0]))
                    new_work.extend(task[1:])
                else:
                    new_work.extend(task)
        work = new_work

    return funcs
Exemple #2
0
def head(task):
    """Return the top level node of a task"""

    if istask(task):
        return task[0]
    if isinstance(task, list):
        return list
    return task
Exemple #3
0
def args(task):
    """Get the arguments for the current task"""

    if istask(task):
        return task[1:]
    if isinstance(task, list):
        return task
    return ()
Exemple #4
0
def inline_functions(dsk,
                     output,
                     fast_functions=None,
                     inline_constants=False,
                     dependencies=None):
    """Inline cheap functions into larger operations

    Examples
    --------
    >>> double = lambda x: x*2  # doctest: +SKIP
    >>> dsk = {'out': (add, 'i', 'd'),  # doctest: +SKIP
    ...        'i': (inc, 'x'),
    ...        'd': (double, 'y'),
    ...        'x': 1, 'y': 1}
    >>> inline_functions(dsk, [], [inc])  # doctest: +SKIP
    {'out': (add, (inc, 'x'), 'd'),
     'd': (double, 'y'),
     'x': 1, 'y': 1}

    Protect output keys.  In the example below ``i`` is not inlined because it
    is marked as an output key.

    >>> inline_functions(dsk, ['i', 'out'], [inc, double])  # doctest: +SKIP
    {'out': (add, 'i', (double, 'y')),
     'i': (inc, 'x'),
     'x': 1, 'y': 1}
    """
    if not fast_functions:
        return dsk

    output = set(output)

    fast_functions = set(fast_functions)

    if dependencies is None:
        dependencies = {k: get_dependencies(dsk, k) for k in dsk}
    dependents = reverse_dict(dependencies)

    def inlinable(v):
        try:
            return functions_of(v).issubset(fast_functions)
        except TypeError:
            return False

    keys = [
        k for k, v in dsk.items()
        if istask(v) and dependents[k] and k not in output and inlinable(v)
    ]

    if keys:
        dsk = inline(dsk,
                     keys,
                     inline_constants=inline_constants,
                     dependencies=dependencies)
        for k in keys:
            del dsk[k]
    return dsk
Exemple #5
0
def inline(dsk, keys=None, inline_constants=True, dependencies=None):
    """Return new dask with the given keys inlined with their values.

    Inlines all constants if ``inline_constants`` keyword is True. Note that
    the constant keys will remain in the graph, to remove them follow
    ``inline`` with ``cull``.

    Examples
    --------
    >>> d = {'x': 1, 'y': (inc, 'x'), 'z': (add, 'x', 'y')}
    >>> inline(d)  # doctest: +SKIP
    {'x': 1, 'y': (inc, 1), 'z': (add, 1, 'y')}
    >>> inline(d, keys='y')  # doctest: +SKIP
    {'x': 1, 'y': (inc, 1), 'z': (add, 1, (inc, 1))}
    >>> inline(d, keys='y', inline_constants=False)  # doctest: +SKIP
    {'x': 1, 'y': (inc, 1), 'z': (add, 'x', (inc, 'x'))}
    """
    if dependencies and isinstance(next(iter(dependencies.values())), list):
        dependencies = {k: set(v) for k, v in dependencies.items()}

    keys = _flat_set(keys)

    if dependencies is None:
        dependencies = {k: get_dependencies(dsk, k) for k in dsk}

    if inline_constants:
        keys.update(k for k, v in dsk.items()
                    if (ishashable(v) and v in dsk) or (
                        not dependencies[k] and not istask(v)))

    # Keys may depend on other keys, so determine replace order with toposort.
    # The values stored in `keysubs` do not include other keys.
    replaceorder = toposort(dict((k, dsk[k]) for k in keys if k in dsk),
                            dependencies=dependencies)
    keysubs = {}
    for key in replaceorder:
        val = dsk[key]
        for dep in keys & dependencies[key]:
            if dep in keysubs:
                replace = keysubs[dep]
            else:
                replace = dsk[dep]
            val = subs(val, dep, replace)
        keysubs[key] = val

    # Make new dask with substitutions
    dsk2 = keysubs.copy()
    for key, val in dsk.items():
        if key not in dsk2:
            for item in keys & dependencies[key]:
                val = subs(val, item, keysubs[item])
            dsk2[key] = val
    return dsk2
def test_istask():
    assert istask((inc, 1))
    assert not istask(1)
    assert not istask((1, 2))
    f = namedtuple("f", ["x", "y"])
    assert not istask(f(sum, 2))
Exemple #7
0
def _bottom_up(net, term):
    if istask(term):
        term = (head(term), ) + tuple(_bottom_up(net, t) for t in args(term))
    elif isinstance(term, list):
        term = [_bottom_up(net, t) for t in args(term)]
    return net._rewrite(term)
Exemple #8
0
def _inplace_fuse_subgraphs(dsk, keys, dependencies, fused_trees, rename_keys):
    """Subroutine of fuse.Mutates dsk, depenencies, and fused_trees inplace"""
    # locate all members of linear chains
    child2parent = {}
    unfusible = set()
    for parent in dsk:
        deps = dependencies[parent]
        has_many_children = len(deps) > 1
        for child in deps:
            if keys is not None and child in keys:
                unfusible.add(child)
            elif child in child2parent:
                del child2parent[child]
                unfusible.add(child)
            elif has_many_children:
                unfusible.add(child)
            elif child not in unfusible:
                child2parent[child] = parent

    # construct the chains from ancestor to descendant
    chains = []
    parent2child = {v: k for k, v in child2parent.items()}
    while child2parent:
        child, parent = child2parent.popitem()
        chain = [child, parent]
        while parent in child2parent:
            parent = child2parent.pop(parent)
            del parent2child[parent]
            chain.append(parent)
        chain.reverse()
        while child in parent2child:
            child = parent2child.pop(child)
            del child2parent[child]
            chain.append(child)
        # Skip chains with < 2 executable tasks
        ntasks = 0
        for key in chain:
            ntasks += istask(dsk[key])
            if ntasks > 1:
                chains.append(chain)
                break

    # Mutate dsk fusing chains into subgraphs
    for chain in chains:
        subgraph = {k: dsk[k] for k in chain}
        outkey = chain[0]

        # Update dependencies and graph
        inkeys_set = dependencies[outkey] = dependencies[chain[-1]]
        for k in chain[1:]:
            del dependencies[k]
            del dsk[k]

        # Create new task
        inkeys = tuple(inkeys_set)
        dsk[outkey] = (SubgraphCallable(subgraph, outkey, inkeys), ) + inkeys

        # Mutate `fused_trees` if key renaming is needed (renaming done in fuse)
        if rename_keys:
            chain2 = []
            for k in chain:
                subchain = fused_trees.pop(k, False)
                if subchain:
                    chain2.extend(subchain)
                else:
                    chain2.append(k)
            fused_trees[outkey] = chain2