Exemple #1
0
def analyze_iterations(nodes):
    """
    Attach :class:`IterationProperty` to :class:`Iteration` objects within
    ``nodes``. The recognized IterationProperty decorators are listed in
    ``nodes.IterationProperty._KNOWN``.
    """
    sections = FindSections().visit(nodes)

    # Local analysis: detect Iteration properties, inspecting trees in isolation
    mapper = OrderedDict()
    for tree, exprs in sections.items():
        deps_graph = compute_dependency_graph(exprs)

        mapper = detect_fully_parallel(tree, deps_graph, mapper)
        mapper = detect_outermost_parallel(tree, deps_graph, mapper)
        mapper = detect_outermost_sequential_inner_parallel(
            tree, deps_graph, mapper)
        mapper = detect_innermost_unitstride(tree, deps_graph, mapper)
        mapper = detect_wrappable_iterations(tree, deps_graph, mapper)

    # Global analysis
    for k, v in list(mapper.items()):
        args = k.args
        # SEQUENTIAL kills PARALLEL
        properties = [i for i in v if i != PARALLEL] if SEQUENTIAL in v else v
        properties = as_tuple(args.pop('properties')) + as_tuple(properties)
        mapper[k] = Iteration(properties=properties, **args)

    # Store the discovered properties in the Iteration/Expression tree
    processed = NestedTransformer(mapper).visit(nodes)

    return processed
Exemple #2
0
def iet_insert_C_decls(iet, func_table):
    """
    Given an Iteration/Expression tree ``iet``, build a new tree with the
    necessary symbol declarations. Declarations are placed as close as
    possible to the first symbol use.

    :param iet: The input Iteration/Expression tree.
    :param func_table: A mapper from callable names to :class:`Callable`s
                       called from within ``iet``.
    """
    # Resolve function calls first
    scopes = []
    me = MapExpressions()
    for k, v in me.visit(iet).items():
        if k.is_Call:
            func = func_table[k.name]
            if func.local:
                scopes.extend(me.visit(func.root, queue=list(v)).items())
        else:
            scopes.append((k, v))

    # Determine all required declarations
    allocator = Allocator()
    mapper = OrderedDict()
    for k, v in scopes:
        if k.is_scalar:
            # Inline declaration
            mapper[k] = LocalExpression(**k.args)
        elif k.write is None or k.write._mem_external:
            # Nothing to do, e.g., variable passed as kernel argument
            continue
        elif k.write._mem_stack:
            # On the stack
            key = lambda i: not i.is_Parallel
            site = filter_iterations(v, key=key, stop='asap') or [iet]
            allocator.push_stack(site[-1], k.write)
        else:
            # On the heap, as a tensor that must be globally accessible
            allocator.push_heap(k.write)

    # Introduce declarations on the stack
    for k, v in allocator.onstack:
        mapper[k] = tuple(Element(i) for i in v)
    iet = NestedTransformer(mapper).visit(iet)
    for k, v in list(func_table.items()):
        if v.local:
            func_table[k] = MetaCall(
                Transformer(mapper).visit(v.root), v.local)

    # Introduce declarations on the heap (if any)
    if allocator.onheap:
        decls, allocs, frees = zip(*allocator.onheap)
        iet = List(header=decls + allocs, body=iet, footer=frees)

    return iet
Exemple #3
0
def test_nested_transformer(exprs, iters, block2):
    """Unlike Transformer, based on BFS, a NestedTransformer applies transformations
    performing a DFS. This test simultaneously replace an inner expression and an
    Iteration sorrounding it."""
    target_loop = block2.nodes[1]
    target_expr = target_loop.nodes[0].nodes[0]
    mapper = {
        target_loop: iters[3](target_loop.nodes[0]),
        target_expr: exprs[3]
    }
    processed = NestedTransformer(mapper).visit(block2)
    assert printAST(processed) == """<Iteration i::i::(0, 3, 1)::(0, 0)>
Exemple #4
0
def fold_blockable_tree(node, exclude_innermost=False):
    """
    Create :class:`IterationFold`s from sequences of nested :class:`Iteration`.
    """
    found = FindAdjacentIterations().visit(node)
    found.pop('seen_iteration')

    mapper = {}
    for k, v in found.items():
        for i in v:
            # Pre-condition: they all must be perfect iterations
            assert len(i) > 1
            if any(not IsPerfectIteration().visit(j) for j in i):
                continue
            # Only retain consecutive trees having same depth
            trees = [retrieve_iteration_tree(j)[0] for j in i]
            handle = []
            for j in trees:
                if len(j) != len(trees[0]):
                    break
                handle.append(j)
            trees = handle
            if not trees:
                continue
            # Check foldability
            pairwise_folds = list(zip(*reversed(trees)))
            if any(not is_foldable(j) for j in pairwise_folds):
                continue
            # Maybe heuristically exclude innermost Iteration
            if exclude_innermost is True:
                pairwise_folds = pairwise_folds[:-1]
            # Perhaps there's nothing to fold
            if len(pairwise_folds) == 1:
                continue
            # Perform folding
            for j in pairwise_folds:
                root, remainder = j[0], j[1:]
                folds = [(tuple(y - x
                                for x, y in zip(i.offsets, root.offsets)),
                          i.nodes) for i in remainder]
                mapper[root] = IterationFold(folds=folds, **root.args)
                for k in remainder:
                    mapper[k] = None

    # Insert the IterationFolds in the Iteration/Expression tree
    processed = NestedTransformer(mapper).visit(node)

    return processed
Exemple #5
0
    def _insert_declarations(self, nodes):
        """Populate the Operator's body with the necessary variable declarations."""

        # Resolve function calls first
        scopes = []
        me = MapExpressions()
        for k, v in me.visit(nodes).items():
            if k.is_Call:
                func = self.func_table[k.name]
                if func.local:
                    scopes.extend(me.visit(func.root, queue=list(v)).items())
            else:
                scopes.append((k, v))

        # Determine all required declarations
        allocator = Allocator()
        mapper = OrderedDict()
        for k, v in scopes:
            if k.is_scalar:
                # Inline declaration
                mapper[k] = LocalExpression(**k.args)
            elif k.write._mem_external:
                # Nothing to do, variable passed as kernel argument
                continue
            elif k.write._mem_stack:
                # On the stack, as established by the DLE
                key = lambda i: not i.is_Parallel
                site = filter_iterations(v, key=key, stop='asap') or [nodes]
                allocator.push_stack(site[-1], k.write)
            else:
                # On the heap, as a tensor that must be globally accessible
                allocator.push_heap(k.write)

        # Introduce declarations on the stack
        for k, v in allocator.onstack:
            mapper[k] = tuple(Element(i) for i in v)
        nodes = NestedTransformer(mapper).visit(nodes)
        for k, v in list(self.func_table.items()):
            if v.local:
                self.func_table[k] = FunMeta(
                    Transformer(mapper).visit(v.root), v.local)

        # Introduce declarations on the heap (if any)
        if allocator.onheap:
            decls, allocs, frees = zip(*allocator.onheap)
            nodes = List(header=decls + allocs, body=nodes, footer=frees)

        return nodes
Exemple #6
0
def iet_analyze(iet):
    """
    Attach :class:`IterationProperty` to :class:`Iteration` objects within
    ``nodes``. The recognized IterationProperty decorators are listed in
    ``nodes.IterationProperty._KNOWN``.
    """
    analysis = mark_parallel(iet)
    analysis = mark_vectorizable(analysis)
    analysis = mark_wrappable(analysis)

    # Decorate the Iteration/Expression tree with the found properties
    mapper = OrderedDict()
    for k, v in list(analysis.properties.items()):
        args = k.args
        properties = as_tuple(args.pop('properties')) + as_tuple(v)
        mapper[k] = Iteration(properties=properties, **args)
    processed = NestedTransformer(mapper).visit(iet)

    return processed
Exemple #7
0
def iet_make(clusters):
    """
    Create an Iteration/Expression tree (IET) given an iterable of :class:`Cluster`s.

    :param clusters: The iterable :class:`Cluster`s for which the IET is built.
    """
    # {Iteration -> [c0, c1, ...]}, shared clusters
    shared = {}
    # The constructed IET
    processed = []
    # {Interval -> Iteration}, carried from preceding cluster
    schedule = OrderedDict()

    # Build IET
    for cluster in clusters:
        body = [Expression(e) for e in cluster.exprs]

        if cluster.ispace.empty:
            # No Iterations are needed
            processed.extend(body)
            continue

        root = None
        itintervals = cluster.ispace.iteration_intervals

        # Can I reuse any of the previously scheduled Iterations ?
        index = 0
        for i0, i1 in zip(itintervals, list(schedule)):
            if i0 != i1 or i0.dim in cluster.atomics:
                break
            root = schedule[i1]
            index += 1
        needed = itintervals[index:]

        # Build Expressions
        if not needed:
            body = List(body=body)

        # Build Iterations
        scheduling = []
        for i in reversed(needed):
            # Update IET and scheduling
            if i.dim in cluster.guards:
                # Must wrap within an if-then scope
                body = Conditional(cluster.guards[i.dim], body)
                # Adding (None, None) ensures that nested iterations won't
                # be reused by the next cluster
                scheduling.insert(0, (None, None))
            iteration = Iteration(body,
                                  i.dim,
                                  i.dim.limits,
                                  offsets=i.limits,
                                  direction=i.direction)
            scheduling.insert(0, (i, iteration))

            # Prepare for next dimension
            body = iteration

        # If /needed/ is != [], root.dim might be a guarded dimension for /cluster/
        if root is not None and root.dim in cluster.guards:
            body = Conditional(cluster.guards[root.dim], body)

        # Update the current schedule
        if root is None:
            processed.append(body)
        else:
            nodes = list(root.nodes) + [body]
            transf = Transformer(
                {root: root._rebuild(nodes, **root.args_frozen)})
            processed = list(transf.visit(processed))
            scheduling = list(schedule.items())[:index] + list(scheduling)
            scheduling = [(k, transf.rebuilt.get(v, v)) for k, v in scheduling]
            shared = {transf.rebuilt.get(k, k): v for k, v in shared.items()}
        schedule = OrderedDict(scheduling)

        # Record that /cluster/ was used to build the iterations in /schedule/
        shared.update(
            {i: shared.get(i, []) + [cluster]
             for i in schedule.values() if i})
    iet = List(body=processed)

    # Add in unbounded indices, if needed
    mapper = {}
    for k, v in shared.items():
        uindices = []
        ispace = IterationSpace.merge(*[i.ispace.project([k.dim]) for i in v])
        for j, offs in ispace.sub_iterators.get(k.dim, []):
            modulo = len(offs)
            for n, o in enumerate(filter_ordered(offs)):
                name = "%s%d" % (j.name, n)
                vname = Scalar(name=name, dtype=np.int32)
                value = (k.dim + o) % modulo
                uindices.append(UnboundedIndex(vname, value, value, j, j + o))
        mapper[k] = k._rebuild(uindices=uindices)
    iet = NestedTransformer(mapper).visit(iet)

    return iet
Exemple #8
0
    def _create_elemental_functions(self, nodes, state):
        """
        Extract :class:`Iteration` sub-trees and move them into :class:`Callable`s.

        Currently, only tagged, elementizable Iteration objects are targeted.
        """
        noinline = self._compiler_decoration('noinline',
                                             c.Comment('noinline?'))

        functions = OrderedDict()
        mapper = {}
        for tree in retrieve_iteration_tree(nodes, mode='superset'):
            # Search an elementizable sub-tree (if any)
            tagged = filter_iterations(tree, lambda i: i.tag is not None,
                                       'asap')
            if not tagged:
                continue
            root = tagged[0]
            if not root.is_Elementizable:
                continue
            target = tree[tree.index(root):]

            # Elemental function arguments
            args = []  # Found so far (scalars, tensors)
            maybe_required = set()  # Scalars that *may* have to be passed in
            not_required = set()  # Elemental function locally declared scalars

            # Build a new Iteration/Expression tree with free bounds
            free = []
            for i in target:
                name, bounds = i.dim.name, i.bounds_symbolic
                # Iteration bounds
                start = Scalar(name='%s_start' % name, dtype=np.int32)
                finish = Scalar(name='%s_finish' % name, dtype=np.int32)
                args.extend(zip([ccode(j) for j in bounds], (start, finish)))
                # Iteration unbounded indices
                ufunc = [
                    Scalar(name='%s_ub%d' % (name, j), dtype=np.int32)
                    for j in range(len(i.uindices))
                ]
                args.extend(zip([ccode(j.start) for j in i.uindices], ufunc))
                limits = [Symbol(start.name), Symbol(finish.name), 1]
                uindices = [
                    UnboundedIndex(j.index, i.dim + as_symbol(k))
                    for j, k in zip(i.uindices, ufunc)
                ]
                free.append(
                    i._rebuild(limits=limits, offsets=None, uindices=uindices))
                not_required.update({i.dim}, set(j.index for j in i.uindices))

            # Construct elemental function body, and inspect it
            free = NestedTransformer(dict((zip(target, free)))).visit(root)
            expressions = FindNodes(Expression).visit(free)
            fsymbols = FindSymbols('symbolics').visit(free)

            # Add all definitely-required arguments
            not_required.update({i.output for i in expressions if i.is_scalar})
            for i in fsymbols:
                if i in not_required:
                    continue
                elif i.is_Array:
                    args.append(
                        ("(%s*)%s" % (c.dtype_to_ctype(i.dtype), i.name), i))
                elif i.is_TensorFunction:
                    args.append(("%s_vec" % i.name, i))
                elif i.is_Scalar:
                    args.append((i.name, i))

            # Add all maybe-required arguments that turn out to be required
            maybe_required.update(
                set(FindSymbols(mode='free-symbols').visit(free)))
            for i in fsymbols:
                not_required.update({as_symbol(i), i.indexify()})
                for j in i.symbolic_shape:
                    maybe_required.update(j.free_symbols)
            required = filter_sorted(maybe_required - not_required,
                                     key=attrgetter('name'))
            args.extend([(i.name, Scalar(name=i.name, dtype=i.dtype))
                         for i in required])

            call, params = zip(*args)
            handle = flatten([p.rtargs for p in params])
            name = "f_%d" % root.tag

            # Produce the new Call
            mapper[root] = List(header=noinline, body=Call(name, call))

            # Produce the new Callable
            functions.setdefault(
                name, Callable(name, free, 'void', handle, ('static', )))

        # Transform the main tree
        processed = Transformer(mapper).visit(nodes)

        return processed, {'elemental_functions': functions.values()}
Exemple #9
0
    def _create_elemental_functions(self, nodes, state):
        """
        Extract :class:`Iteration` sub-trees and move them into :class:`Callable`s.

        Currently, only tagged, elementizable Iteration objects are targeted.
        """
        noinline = self._compiler_decoration('noinline',
                                             c.Comment('noinline?'))

        functions = OrderedDict()
        mapper = {}
        for tree in retrieve_iteration_tree(nodes, mode='superset'):
            # Search an elementizable sub-tree (if any)
            tagged = filter_iterations(tree, lambda i: i.tag is not None,
                                       'asap')
            if not tagged:
                continue
            root = tagged[0]
            if not root.is_Elementizable:
                continue
            target = tree[tree.index(root):]

            # Elemental function arguments
            args = []  # Found so far (scalars, tensors)
            defined_args = {}  # Map of argument values defined by loop bounds

            # Build a new Iteration/Expression tree with free bounds
            free = []
            for i in target:
                name, bounds = i.dim.name, i.bounds_symbolic
                # Iteration bounds
                start = Scalar(name='%s_start' % name, dtype=np.int32)
                finish = Scalar(name='%s_finish' % name, dtype=np.int32)
                defined_args[start.name] = bounds[0]
                defined_args[finish.name] = bounds[1]

                # Iteration unbounded indices
                ufunc = [
                    Scalar(name='%s_ub%d' % (name, j), dtype=np.int32)
                    for j in range(len(i.uindices))
                ]
                defined_args.update(
                    {uf.name: j.start
                     for uf, j in zip(ufunc, i.uindices)})
                limits = [
                    Scalar(name=start.name, dtype=np.int32),
                    Scalar(name=finish.name, dtype=np.int32), 1
                ]
                uindices = [
                    UnboundedIndex(j.index, i.dim + as_symbol(k))
                    for j, k in zip(i.uindices, ufunc)
                ]
                free.append(
                    i._rebuild(limits=limits, offsets=None, uindices=uindices))

            # Construct elemental function body, and inspect it
            free = NestedTransformer(dict((zip(target, free)))).visit(root)

            # Insert array casts for all non-defined
            f_symbols = FindSymbols('symbolics').visit(free)
            defines = [s.name for s in FindSymbols('defines').visit(free)]
            casts = [
                ArrayCast(f) for f in f_symbols
                if f.is_Tensor and f.name not in defines
            ]
            free = (List(body=casts), free)

            for i in derive_parameters(free):
                if i.name in defined_args:
                    args.append((defined_args[i.name], i))
                elif i.is_Dimension:
                    d = Scalar(name=i.name, dtype=i.dtype)
                    args.append((d, d))
                else:
                    args.append((i, i))

            call, params = zip(*args)
            name = "f_%d" % root.tag

            # Produce the new Call
            mapper[root] = List(header=noinline, body=Call(name, call))

            # Produce the new Callable
            functions.setdefault(
                name,
                Callable(name, free, 'void', flatten(params), ('static', )))

        # Transform the main tree
        processed = Transformer(mapper).visit(nodes)

        return processed, {'elemental_functions': functions.values()}
Exemple #10
0
def iet_insert_C_decls(iet, func_table=None):
    """
    Given an Iteration/Expression tree ``iet``, build a new tree with the
    necessary symbol declarations. Declarations are placed as close as
    possible to the first symbol use.

    :param iet: The input Iteration/Expression tree.
    :param func_table: (Optional) a mapper from callable names within ``iet``
                       to :class:`Callable`s.
    """
    func_table = func_table or {}
    allocator = Allocator()
    mapper = OrderedDict()

    # First, schedule declarations for Expressions
    scopes = []
    me = MapExpressions()
    for k, v in me.visit(iet).items():
        if k.is_Call:
            func = func_table.get(k.name)
            if func is not None and func.local:
                scopes.extend(me.visit(func.root, queue=list(v)).items())
        else:
            scopes.append((k, v))
    for k, v in scopes:
        if k.is_scalar:
            # Inline declaration
            mapper[k] = LocalExpression(**k.args)
        elif k.write is None or k.write._mem_external:
            # Nothing to do, e.g., variable passed as kernel argument
            continue
        elif k.write._mem_stack:
            # On the stack
            key = lambda i: not i.is_Parallel
            site = filter_iterations(v, key=key, stop='asap') or [iet]
            allocator.push_stack(site[-1], k.write)
        else:
            # On the heap, as a tensor that must be globally accessible
            allocator.push_heap(k.write)

    # Then, schedule declarations callables arguments passed by reference/pointer
    # (as modified internally by the callable)
    scopes = [(k, v) for k, v in me.visit(iet).items() if k.is_Call]
    for k, v in scopes:
        site = v[-1] if v else iet
        for i in k.params:
            try:
                if i.is_LocalObject:
                    # On the stack
                    allocator.push_stack(site, i)
                elif i.is_Array:
                    if i._mem_stack:
                        # On the stack
                        allocator.push_stack(site, i)
                    elif i._mem_heap:
                        # On the heap
                        allocator.push_heap(i)
            except AttributeError:
                # E.g., a generic SymPy expression
                pass

    # Introduce declarations on the stack
    for k, v in allocator.onstack:
        mapper[k] = tuple(Element(i) for i in v)
    iet = NestedTransformer(mapper).visit(iet)
    for k, v in list(func_table.items()):
        if v.local:
            func_table[k] = MetaCall(Transformer(mapper).visit(v.root), v.local)

    # Introduce declarations on the heap (if any)
    if allocator.onheap:
        decls, allocs, frees = zip(*allocator.onheap)
        iet = List(header=decls + allocs, body=iet, footer=frees)

    return iet