Beispiel #1
def _(expr, terms):
    derivs, others = split(terms, lambda i: i.deriv is not None)
    if not derivs:
        return expr, Term(expr)

    # Map by type of derivative
    mapper = as_mapper(derivs, lambda i: key(i.deriv))
    if len(mapper) == len(derivs):
        return expr, Term(expr)

    processed = []
    for v in mapper.values():
        fact, nonfact = split(v, lambda i: _is_const_coeff(i.other, i.deriv))
        if fact:
            # Finally factorize derivative arguments
            func = fact[0].deriv._new_from_self
            exprs = []
            for i in fact:
                if i.func:
                    exprs.append(i.func(i.other, i.deriv.expr))
                    assert i.other == 1
            fact = [Term(func(expr=expr.func(*exprs)))]

        for i in fact + nonfact:
            if i.func:
                processed.append(i.func(i.other, i.deriv))

    others = [i.other for i in others]
    expr = expr.func(*(processed + others))

    return expr, Term(expr)
Beispiel #2
    def _extract(self, exprs, context, n):
        # Forbid CIRE involving Dimension-independent dependencies, e.g.:
        # r0 = ...
        # u[x, y] = ... r0*a[x, y] ...
        # NOTE: if one uses the DSL in a conventional way and sticks to the default
        # compilation pipelines where CSE always happens after CIRE, then `exclude`
        # will always be empty
        exclude = {i.source.indexed for i in context[None].scope.d_flow.independent()}

        mapper = Uxmapper()
        for e in exprs:
            for i in search_potential_deriv(e, n):
                if i.free_symbols & exclude:

                key = lambda a: a.is_Add
                terms, others = split(i.args, key)

                if self._opt_maxalias:
                    # Treat `e` as an FD expression and pull out the derivative
                    # coefficient from `i`
                    # Note: typically derivative coefficients are numbers, but
                    # sometimes they could be provided in symbolic form through an
                    # arbitrary Function.  In the latter case, we rely on the
                    # heuristic that such Function's basically never span the whole
                    # grid, but rather a single Grid dimension (e.g., `c[z, n]` for a
                    # stencil of diameter `n` along `z`)
                    if e.grid is not None and terms:
                        key = partial(maybe_coeff_key, e.grid)
                        others, more_terms = split(others, key)
                        terms += more_terms

                mapper.add(i, self._make_symbol, terms)

        return mapper
Beispiel #3
    def extract(cls, n, context, min_cost, max_alias, cluster, sregistry):
        make = lambda: Scalar(name=sregistry.make_name(), dtype=cluster.dtype

        # The `depth` determines "how big" the extracted sum-of-products will be.
        # We observe that in typical FD codes:
        #   add(mul, mul, ...) -> stems from first order derivative
        #   add(mul(add(mul, mul, ...), ...), ...) -> stems from second order derivative
        # To search the muls in the former case, we need `depth=0`; to search the outer
        # muls in the latter case, we need `depth=2`
        depth = n

        exclude = {
            for i in cluster.scope.d_flow.independent()
        rule0 = lambda e: not e.free_symbols & exclude
        rule1 = lambda e: e.is_Mul and q_terminalop(e, depth)
        rule = lambda e: rule0(e) and rule1(e)

        extracted = OrderedDict()
        mapper = {}
        for e in cluster.exprs:
            for i in search(e, rule, 'all', 'bfs_first_hit'):
                if i in mapper:

                key = lambda a: a.is_Add
                terms, others = split(list(i.args), key)

                if max_alias:
                    # Treat `e` as an FD expression and pull out the derivative
                    # coefficient from `i`
                    # Note: typically derivative coefficients are numbers, but
                    # sometimes they could be provided in symbolic form through an
                    # arbitrary Function.  In the latter case, we rely on the
                    # heuristic that such Function's basically never span the whole
                    # grid, but rather a single Grid dimension (e.g., `c[z, n]` for a
                    # stencil of diameter `n` along `z`)
                    if e.grid is not None and terms:
                        key = partial(maybe_coeff_key, e.grid)
                        others, more_terms = split(others, key)

                if terms:
                    k = i.func(*terms)
                        symbol, _ = extracted[k]
                    except KeyError:
                        symbol, _ = extracted.setdefault(k, (make(), e))
                    mapper[i] = i.func(symbol, *others)

        if mapper:
            extracted = [e.func(v, k) for k, (v, e) in extracted.items()]
            processed = [uxreplace(e, mapper) for e in cluster.exprs]
            return extracted + processed, extracted
            return cluster.exprs, []
Beispiel #4
def relax_incr_dimensions(iet, **kwargs):
    This pass adjusts the bounds of blocked Iterations in order to include the "remainder
    regions".  Without the relaxation that occurs in this pass, the only way to iterate
    over the entire iteration space is to have step increments that are perfect divisors
    of the iteration space (e.g. in case of an iteration space of size 67 and block size
    8 only 64 iterations would be computed, as `67 - 67mod8 = 64`.

    A simple 1D example: nested Iterations are transformed from:

    <Iteration x0_blk0; (x_m, x_M, x0_blk0_size)>
        <Iteration x; (x0_blk0, x0_blk0 + x0_blk0_size - 1, 1)>


    <Iteration x0_blk0; (x_m, x_M, x0_blk0_size)>
        <Iteration x; (x0_blk0, MIN(x_M, x0_blk0 + x0_blk0_size - 1)), 1)>

    mapper = {}
    for tree in retrieve_iteration_tree(iet):
        iterations = [i for i in tree if i.dim.is_Block]
        if not iterations:

        root = iterations[0]
        if root in mapper:

        assert all(i.direction is Forward for i in iterations)
        outer, inner = split(iterations, lambda i: not i.dim.parent.is_Block)

        # Get root's `symbolic_max` out of each outer Dimension
        roots_max = {i.dim.root: i.symbolic_max for i in outer}

        # Process inner iterations and adjust their bounds
        for n, i in enumerate(inner):
            # The Iteration's maximum is the MIN of (a) the `symbolic_max` of current
            # Iteration e.g. `x0_blk0 + x0_blk0_size - 1` and (b) the `symbolic_max`
            # of the current Iteration's root Dimension e.g. `x_M`. The generated
            # maximum will be `MIN(x0_blk0 + x0_blk0_size - 1, x_M)

            # In some corner cases an offset may be added (e.g. after CIRE passes)
            # E.g. assume `i.symbolic_max = x0_blk0 + x0_blk0_size + 1` and
            # `i.dim.symbolic_max = x0_blk0 + x0_blk0_size - 1` then the generated
            # maximum will be `MIN(x0_blk0 + x0_blk0_size + 1, x_M + 2)`

            root_max = roots_max[i.dim.root] + i.symbolic_max - i.dim.symbolic_max
            iter_max = evalrel(min, [i.symbolic_max, root_max])
            mapper[i] = i._rebuild(limits=(i.symbolic_min, iter_max, i.step))

    if mapper:
        iet = Transformer(mapper, nested=True).visit(iet)

        headers = [('%s(a,b)' %, ('(((a) < (b)) ? (a) : (b))')),
                   ('%s(a,b)' %, ('(((a) > (b)) ? (a) : (b))'))]
        headers = []

    return iet, {'headers': headers}
Beispiel #5
def topological_sort(exprs):
    """Topologically sort the temporaries in a list of equations."""
    mapper = {e.lhs: e for e in exprs}
    assert len(mapper) == len(exprs)  # Expect SSA

    # Build DAG and topologically-sort temporaries
    temporaries, tensors = split(exprs, lambda e: not e.lhs.is_Indexed)
    dag = DAG(nodes=temporaries)
    for e in temporaries:
        for r in retrieve_terminals(e.rhs):
            if r not in mapper:
            elif mapper[r] is e:
                # Avoid cyclic dependences, such as
                # Eq(f, f + 1)
            elif r.is_Indexed:
                # Only scalars enforce an ordering
                dag.add_edge(mapper[r], e, force_add=True)
    processed = dag.topological_sort()

    # Append tensor equations at the end in user-provided order

    return processed
Beispiel #6
    def __new__(cls, *args, **kwargs):
        # A Mul, being a DifferentiableOp, may not trigger evaluation upon
        # construction (e.g., when an EvalDerivative is present among its
        # arguments), so here we apply a small set of basic simplifications
        # to avoid generating functional, but also ugly, code

        # (a*b)*c -> a*b*c (flattening)
        nested, others = split(args, lambda e: isinstance(e, Mul))
        args = flatten(e.args for e in nested) + list(others)

        # a*0 -> 0
        if any(i == 0 for i in args):
            return sympy.S.Zero

        # a*1 -> a
        args = [i for i in args if i != 1]

        # a*-1*-1 -> a
        nminus = len([i for i in args if i == sympy.S.NegativeOne])
        if nminus % 2 == 0:
            args = [i for i in args if i != sympy.S.NegativeOne]

        # Reorder for homogeneity with pure SymPy types

        return super().__new__(cls, *args, **kwargs)
Beispiel #7
def _eval_numbers(expr, args):
    Helper function for in-place reduction of the expr arguments.
    numbers, others = split(args, lambda i: i.is_Number)
    if len(numbers) > 1:
        args[:] = [expr.func(*numbers)] + others
Beispiel #8
def _uxreplace(expr, rule):
    if expr in rule:
        v = rule[expr]
        if not isinstance(v, dict):
            return v, True
        args, eargs = split(expr.args, lambda i: i in v)
        args = [v[i] for i in args if v[i] is not None]
        changed = True
        args, eargs = [], expr.args
        changed = False

    if rule:
        for a in eargs:
                ax, flag = _uxreplace(a, rule)
                changed |= flag
            except AttributeError:
                # E.g., un-sympified numbers
        if changed:
            return _uxreplace_handle(expr, args), True

    return expr, False
Beispiel #9
def _(expr, terms):
    derivs, others = split(terms, lambda i: i.deriv is not None)
    if len(derivs) == 1:
        # Linear => propagate found Derivative upstream
        deriv = derivs[0].deriv
        other = expr.func(*[i.other for i in others])  # De-nest terms
        return expr, Term(other, deriv, expr.func)
        return expr, Term(expr)
Beispiel #10
    def run(expr):
        if expr.is_Atom or expr.is_Indexed:
            return expr, rule(expr)
        elif expr.is_Pow:
            base, flag = run(expr.base)
            if flag and costmodel(base):
                return expr.func(replace(base), expr.exp, evaluate=False), False
            elif flag and costmodel(expr):
                return replace(expr), False
                return expr.func(base, expr.exp, evaluate=False), rule(expr)
            children = [run(a) for a in expr.args]
            matching = [a for a, flag in children if flag]
            other = [a for a, _ in children if a not in matching]

            if not matching:
                return expr.func(*other, evaluate=False), False

            if eager is False:
                matched = expr.func(*matching, evaluate=False)
                if len(matching) == len(children) and rule(expr):
                    # Go look for larger expressions first
                    return matched, True
                elif rule(matched) and costmodel(matched):
                    # E.g.: a*b*c*d -> a*r0
                    rebuilt = expr.func(*(other + [replace(matched)]), evaluate=False)
                    return rebuilt, False
                    # E.g.: a*b*c*d -> a*r0*r1*r2
                    replaced = [replace(e) for e in matching if costmodel(e)]
                    unreplaced = [e for e in matching if not costmodel(e)]
                    rebuilt = expr.func(*(other + replaced + unreplaced), evaluate=False)
                    return rebuilt, False
                replaceable, unreplaced = split(matching, lambda e: costmodel(e))
                if replaceable:
                    # E.g.: a*b*c*d -> a*r0*r1*r2
                    replaced = [replace(e) for e in replaceable]
                    rebuilt = expr.func(*(other + replaced + unreplaced), evaluate=False)
                    return rebuilt, False
                matched = expr.func(*matching, evaluate=False)
                if rule(matched) and costmodel(matched):
                    if len(matching) == len(children):
                        # E.g.: a*b*c*d -> r0
                        return replace(matched), False
                        # E.g.: a*b*c*d -> a*r0
                        rebuilt = expr.func(*(other + [replace(matched)]), evaluate=False)
                        return rebuilt, False
                elif len(matching) == len(children) and rule(expr):
                    # Go look for larger expressions
                    return matched, True
                    # E.g.: a*b*c*d; a,b,a*b replaceable but not satisfying the cost
                    # model, hence giving up as c,d,c*d aren't replaceable
                    return expr.func(*(matching + other), evaluate=False), False
Beispiel #11
    def prepare_arguments(self, **kwargs):
        Process runtime arguments passed to ``.apply()` and derive
        default values for any remaining arguments.
        # Process data-carriers (first overrides, then fill up with whatever is needed)
        args = ReducerMap()
            [p._arg_values(**kwargs) for p in self.input if in kwargs])
            [p._arg_values() for p in self.input if not in args])
        args = args.reduce_all()

        # Process dimensions (derived go after as they might need/affect their parents)
        derived, main = split(self.dimensions, lambda i: i.is_Derived)
        for p in main:
            args.update(p._arg_values(args, self._dspace[p], **kwargs))
        for p in derived:
            args.update(p._arg_values(args, self._dspace[p], **kwargs))

        # Sanity check
        for p in self.input:
            p._arg_check(args, self._dspace[p])

        # Derive additional values for DLE arguments
        # TODO: This is not pretty, but it works for now. Ideally, the
        # DLE arguments would be massaged into the IET so as to comply
        # with the rest of the argument derivation procedure.
        for arg in self.dle_args:
            dim = arg.argument
            osize = args[]
            if dim.symbolic_size in self.parameters:
                if arg.value is None:
                    args[] = osize
                elif isinstance(arg.value, int):
                    args[] = arg.value
                    args[] = arg.value(osize)

        # Add in the profiler argument
        args[] =

        # Add in any backend-specific argument
        args.update(kwargs.pop('backend', {}))

        # Execute autotuning and adjust arguments accordingly
        if kwargs.pop('autotune', False):
            args = self._autotune(args)

        # Check all user-provided keywords are known to the Operator
        for k, v in kwargs.items():
            if k not in self.known_arguments:
                raise ValueError(
                    "Unrecognized argument %s=%s passed to `apply`" % (k, v))

        return args
Beispiel #12
def _(expr, mapper, nn_derivs=None):
    nn_derivs = nn_derivs or mapper.get(expr)

    args = [aggregate_coeffs(a, mapper, nn_derivs) for a in expr.args]
    expr = reuse_if_untouched(expr, args)

    # Separate arguments containing derivatives from those which do not
    hope_coeffs = []
    with_derivs = []
    for a in args:
        if isinstance(a, sympy.Derivative):
            with_derivs.append((a, [a], []))
            derivs, others = split(a.args, lambda i: isinstance(i, sympy.Derivative))
            if a.is_Add and derivs:
                with_derivs.append((a, derivs, others))

    # E.g., non-linear term, expansion won't help (in fact, it would only
    # cause an increase in operation count), so we skip
    if len(with_derivs) > 1:
        return expr

        with_deriv, derivs, others = with_derivs.pop(0)
    except IndexError:
        # No derivatives found, give up
        return expr

    # Aggregating the potential coefficient won't help if, in the current scope
    # at least one derivative type does not appear more than once. In fact, aggregation
    # might even have a detrimental effect due to increasing the operation count by
    # expanding Muls), so we rather give if that's the case
    if not any(nn_derivs[i._metadata] > 1 for i in derivs):
        return expr

    # Is the potential coefficient really a coefficient?
    csymbols = set().union(*[i.free_symbols for i in hope_coeffs])
    cdims = [i._defines for i in csymbols if i.is_Dimension]
    ddims = [set(i.dims) for i in derivs]
    if any(i & j for i, j in product(cdims, ddims)):
        return expr

    # Redundancies unlikely to pop up along the time dimension
    if any(d.is_Time for d in flatten(ddims)):
        return expr

    if len(derivs) == 1 and with_deriv is derivs[0]:
        expr = with_deriv._new_from_self(expr=expr.func(*hope_coeffs, with_deriv.expr))
        others = [expr.func(*hope_coeffs, a) for a in others]
        derivs = [a._new_from_self(expr=expr.func(*hope_coeffs, a.expr)) for a in derivs]
        expr = with_deriv.func(*(derivs + others))

    return expr
Beispiel #13
    def _prepare_arguments(self, **kwargs):
        Process runtime arguments passed to ``.apply()` and derive
        default values for any remaining arguments.
        # Process data-carriers (first overrides, then fill up with whatever is needed)
        args = ReducerMap()
            [p._arg_values(**kwargs) for p in self.input if in kwargs])
            [p._arg_values() for p in self.input if not in args])
        args = args.reduce_all()

        # All TensorFunctions should be defined on the same Grid
        functions = [
            kwargs.get(p, p) for p in self.input if p.is_TensorFunction
        mapper = ReducerMap([('grid', i.grid) for i in functions if i.grid])
            grid = mapper.unique('grid')
        except (KeyError, ValueError):
            if mapper and configuration['mpi']:
                raise RuntimeError("Multiple `Grid`s found before `apply`")
            grid = None

        # Process dimensions (derived go after as they might need/affect their parents)
        derived, main = split(self.dimensions, lambda i: i.is_Derived)
        for p in main:
            args.update(p._arg_values(args, self._dspace[p], grid, **kwargs))
        for p in derived:
            args.update(p._arg_values(args, self._dspace[p], grid, **kwargs))

        # Sanity check
        for p in self.input:
            p._arg_check(args, self._dspace[p])

        # Add in the profiler argument
        args[] = self._profiler.timer.reset()

        # Add in any backend-specific argument
        args.update(kwargs.pop('backend', {}))

        # Execute autotuning and adjust arguments accordingly
        args = self._autotune(
            args, kwargs.pop('autotune', configuration['autotuning']))

        # Check all user-provided keywords are known to the Operator
        if not configuration['ignore-unknowns']:
            for k, v in kwargs.items():
                if k not in self._known_arguments:
                    raise ValueError("Unrecognized argument %s=%s" % (k, v))

        return args
Beispiel #14
def diff_parameters(iet, root):
    Derive the parameters of a sub-IET, `iet`, within a Callable, `root`, and
    split them into two groups:

        * the "read-only" parameters, and
        * the "dynamic" parameters, whose value changes at some point in `root`.
    # TODO: this is currently very rudimentary
    required = derive_parameters(iet)

    known = set(root.parameters) | set(i for i in required if i.is_Array)

    parameters, dynamic_parameters = split(required, lambda i: i in known)

    return required, parameters, dynamic_parameters
Beispiel #15
    def __new__(cls, *args, **kwargs):
        # Here, often we get `evaluate=False` to prevent SymPy evaluation (e.g.,
        # when `cls==EvalDerivative`), but in all cases we at least apply a small
        # set of basic simplifications

        # (a+b)+c -> a+b+c (flattening)
        nested, others = split(args, lambda e: isinstance(e, Add))
        args = flatten(e.args for e in nested) + list(others)

        # a+0 -> a
        args = [i for i in args if i != 0]

        # Reorder for homogeneity with pure SymPy types

        return super().__new__(cls, *args, **kwargs)
Beispiel #16
    def __make_tfunc(self, name, iet, root, threads):
        # Create the SharedData
        required = derive_parameters(iet)
        known = (root.parameters +
                 tuple(i for i in required if i.is_Array and i._mem_shared))
        parameters, dynamic_parameters = split(required, lambda i: i in known)

        sdata = SharedData(name=self.sregistry.make_name(prefix='sdata'),

        # Prepend the unwinded SharedData fields, available upon thread activation
        preactions = [
            DummyExpr(i, FieldFromPointer(, sdata.symbolic_base))
            for i in dynamic_parameters
                      FieldFromPointer(sdata._field_id, sdata.symbolic_base)))

        # Append the flag reset
        postactions = [
                    FieldFromPointer(sdata._field_flag, sdata.symbolic_base),

        iet = List(body=preactions + [iet] + postactions)

        # Append the flag reset

        # The thread has work to do when it receives the signal that all locks have
        # been set to 0 by the main thread
        iet = Conditional(
            CondEq(FieldFromPointer(sdata._field_flag, sdata.symbolic_base),
                   2), iet)

        # The thread keeps spinning until the alive flag is set to 0 by the main thread
        iet = While(
            CondNe(FieldFromPointer(sdata._field_flag, sdata.symbolic_base),
                   0), iet)

        return Callable(name, iet, 'void', parameters, 'static'), sdata
Beispiel #17
    def _extract(self, exprs, context, n):
        extracted = super()._extract(exprs, context, n).extracted

        rule = lambda e: any(a in extracted for a in e.args)

        mapper = Uxmapper()
        for e in exprs:
            for i in search(e, rule, 'all', 'dfs'):
                if not i.is_commutative:

                key = lambda a: a in extracted
                terms, others = split(i.args, key)

                mapper.add(i, self._make_symbol, terms)

        return mapper
Beispiel #18
def diff_parameters(iet, root, indirectly_provided=None):
    Derive the parameters of a sub-IET, `iet`, within a Callable, `root`, and
    split them into two groups:

        * the "read-only" parameters, and
        * the "dynamic" parameters, whose value changes at some point in `root`.

    The `indirectly_provided` are the parameters that are provided indirectly to
    `iet`, for example via a composite type (e.g., a C struct).
    required = derive_parameters(iet)
    required = [i for i in required if i not in as_tuple(indirectly_provided)]

    known = set(root.parameters) | set(i for i in required if i.is_Array)

    parameters, dynamic_parameters = split(required, lambda i: i in known)

    return required, parameters, dynamic_parameters
Beispiel #19
    def extract(cls, n, context, min_cost, cluster, sregistry):
        make = lambda: Scalar(name=sregistry.make_name(), dtype=cluster.dtype

        # The `depth` determines "how big" the extracted sum-of-products will be.
        # We observe that in typical FD codes:
        #   add(mul, mul, ...) -> stems from first order derivative
        #   add(mul(add(mul, mul, ...), ...), ...) -> stems from second order derivative
        # To search the muls in the former case, we need `depth=0`; to search the outer
        # muls in the latter case, we need `depth=2`
        depth = n

        exclude = {
            for i in cluster.scope.d_flow.independent()
        rule0 = lambda e: not e.free_symbols & exclude
        rule1 = lambda e: e.is_Mul and q_terminalop(e, depth)
        rule = lambda e: rule0(e) and rule1(e)

        extracted = OrderedDict()
        mapper = {}
        for e in cluster.exprs:
            for i in search(e, rule, 'all', 'bfs_first_hit'):
                if i in mapper:

                # Separate numbers and Functions, as they could be a derivative coeff
                terms, others = split(i.args, lambda a: a.is_Add)
                if terms:
                    k = i.func(*terms)
                        symbol, _ = extracted[k]
                    except KeyError:
                        symbol, _ = extracted.setdefault(k, (make(), e))
                    mapper[i] = i.func(symbol, *others)

        if mapper:
            extracted = [e.func(v, k) for k, (v, e) in extracted.items()]
            processed = [uxreplace(e, mapper) for e in cluster.exprs]
            return extracted + processed, extracted
            return cluster.exprs, []
Beispiel #20
    def _generate(self, exprs, exclude):
        # E.g., extract `sin(x)` and `sqrt(x)` from `a*sin(x)*sqrt(x)`
        rule = lambda e: e.is_Function or (e.is_Pow and e.exp.is_Number and 0 <
                                           e.exp < 1)
        cbk_search = lambda e: search(e, rule, 'all', 'bfs_first_hit')
        basextr = self._do_generate(exprs, exclude, cbk_search)
        if not basextr:
        yield basextr

        # E.g., extract `sin(x)*cos(x)` from `a*sin(x)*cos(x)`
        def cbk_search(expr):
            found, others = split(expr.args, lambda a: a in basextr)
            ret = [expr] if found else []
            for a in others:
            return ret

        cbk_compose = lambda e: split(e.args, lambda a: a in basextr)[0]
        yield self._do_generate(exprs, exclude, cbk_search, cbk_compose)
Beispiel #21
def _(expr):
    args = [factorize_derivatives(a) for a in expr.args]

    derivs, others = split(args, lambda a: isinstance(a, sympy.Derivative))
    if not derivs:
        return expr

    # Map by type of derivative
    # Note: `D0(a) + D1(b) == D(a + b)` <=> `D0` and `D1`'s metadata match,
    # i.e. they are the same type of derivative
    mapper = as_mapper(derivs, lambda i: i._metadata)
    if len(mapper) == len(derivs):
        return expr

    args = list(others)
    for v in mapper.values():
        c = v[0]
        if len(v) == 1:
            args.append(c._new_from_self(expr=expr.func(*[i.expr for i in v])))
    expr = expr.func(*args)

    return expr
Beispiel #22
    def _prepare_arguments(self, **kwargs):
        Process runtime arguments passed to ``.apply()` and derive
        default values for any remaining arguments.
        overrides, defaults = split(self.input, lambda p: in kwargs)
        # Process data-carrier overrides
        args = ReducerMap()
        for p in overrides:
                args = ReducerMap(args.reduce_all())
            except ValueError:
                raise ValueError(
                    "Override `%s` is incompatible with overrides `%s`" %
                    (p, [i for i in overrides if in args]))
        # Process data-carrier defaults
        for p in defaults:
            if in args:
                # E.g., SubFunctions
            for k, v in p._arg_values(**kwargs).items():
                if k in args and args[k] != v:
                    raise ValueError(
                        "Default `%s` is incompatible with other args as "
                        "`%s=%s`, while `%s=%s` is expected. Perhaps you "
                        "forgot to override `%s`?" % (p, k, v, k, args[k], p))
                args[k] = v
        args = args.reduce_all()

        # All DiscreteFunctions should be defined on the same Grid
        grids = {getattr(p, 'grid', None) for p in self.input} - {None}
        if len(grids) > 1 and configuration['mpi']:
            raise ValueError("Multiple Grids found")
            grid = grids.pop()
        except KeyError:
            grid = None

        # Process Dimensions (derived go after as they might need/affect their parents)
        derived, main = split(self.dimensions, lambda i: i.is_Derived)
        for d in main:
            args.update(d._arg_values(args, self._dspace[d], grid, **kwargs))
        for d in derived:
            args.update(d._arg_values(args, self._dspace[d], grid, **kwargs))

        # Process Objects (which may need some `args`)
        for o in self.objects:
            args.update(o._arg_values(args, **kwargs))

        # Sanity check
        for p in self.parameters:
            p._arg_check(args, self._dspace[p])

        # Turn arguments into a format suitable for the generated code
        # E.g., instead of NumPy arrays for Functions, the generated code expects
        # pointers to ctypes.Struct
        for p in self.parameters:
                args.update(kwargs.get(, p)._arg_as_ctype(args, alias=p))
            except AttributeError:
                # User-provided floats/ndarray obviously do not have `_arg_as_ctype`
                args.update(p._arg_as_ctype(args, alias=p))

        # Add in the profiler argument
        args[] = self._profiler.timer.reset()

        # Add in any backend-specific argument
        args.update(kwargs.pop('backend', {}))

        # Execute autotuning and adjust arguments accordingly
        args = self._autotune(
            args, kwargs.pop('autotune', configuration['autotuning']))

        # Check all user-provided keywords are known to the Operator
        if not configuration['ignore-unknowns']:
            for k, v in kwargs.items():
                if k not in self._known_arguments:
                    raise ValueError("Unrecognized argument %s=%s" % (k, v))

        return args
Beispiel #23
def collect(extracted, ispace, min_storage):
    Find groups of aliasing expressions.

    We shall introduce the following (loose) terminology:

        * A ``terminal`` is the leaf of a mathematical operation. Terminals
          can be numbers (n), literals (l), or Indexeds (I).
        * ``R`` is the relaxation operator := ``R(n) = n``, ``R(l) = l``,
          ``R(I) = J``, where ``J`` has the same base as ``I`` but with all
          offsets stripped away. For example, ``R(a[i+2,j-1]) = a[i,j]``.
        * A ``relaxed expression`` is an expression in which all of the
          terminals are relaxed.

    Now we define the concept of aliasing. We say that an expression A
    aliases an expression B if:

        * ``R(A) == R(B)``
        * all pairwise Indexeds in A and B access memory locations at a
          fixed constant distance along each Dimension.

    For example, consider the following expressions:

        * a[i+1] + b[i+1]
        * a[i+1] + b[j+1]
        * a[i] + c[i]
        * a[i+2] - b[i+2]
        * a[i+2] + b[i]
        * a[i-1] + b[i-1]

    Out of the expressions above, the following alias to `a[i] + b[i]`:

        * a[i+1] + b[i+1] : same operands and operations, distance along i: 1
        * a[i-1] + b[i-1] : same operands and operations, distance along i: -1

    Whereas the following do not:

        * a[i+1] + b[j+1] : because at least one index differs
        * a[i] + c[i] : because at least one of the operands differs
        * a[i+2] - b[i+2] : because at least one operation differs
        * a[i+2] + b[i] : because the distances along ``i`` differ (+2 and +0)
    # Find the potential aliases
    found = []
    for expr in extracted:
        assert not expr.is_Equality

        indexeds = retrieve_indexed(expr)

        bases = []
        offsets = []
        for i in indexeds:
            ii = IterationInstance(i)
            if ii.is_irregular:

            base = []
            offset = []
            for e, ai in zip(ii, ii.aindices):
                if q_constant(e):
                    offset.append((ai, e - ai))

        if not indexeds or len(bases) == len(indexeds):
            found.append(Candidate(expr, ispace, indexeds, bases, offsets))

    # Create groups of aliasing expressions
    mapper = OrderedDict()
    unseen = list(found)
    while unseen:
        c = unseen.pop(0)
        group = [c]
        for u in list(unseen):
            # Is the arithmetic structure of `c` and `u` equivalent ?
            if not compare_ops(c.expr, u.expr):

            # Is `c` translated w.r.t. `u` ?
            if not c.translated(u):

        group = Group(group)

        if min_storage:
            k = group.dimensions_translated
            k = group.dimensions
        mapper.setdefault(k, []).append(group)

    aliases = AliasMapper()
    queue = list(mapper.values())
    while queue:
        groups = queue.pop(0)

        while groups:
            # For each Dimension, determine the Minimum Intervals (MI) spanning
            # all of the Groups diameters
            # Example: x's largest_diameter=2  => [x[-2,0], x[-1,1], x[0,2]]
            # Note: Groups that cannot evaluate their diameter are dropped
            mapper = defaultdict(int)
            for g in list(groups):
                        {d: max(mapper[d], v)
                         for d, v in g.diameter.items()})
                except ValueError:
            intervalss = {
                d: make_rotations_table(d, v)
                for d, v in mapper.items()

            # For each Group, find a rotation that is compatible with a given MI
            mapper = {}

            for d, intervals in intervalss.items():
                # Not all groups may access all dimensions
                # Example: `d=t` and groups=[Group(...[t, x]...), Group(...[time, x]...)]
                impacted = [g for g in groups if d in g.dimensions]

                for interval in list(intervals):
                    found = {
                        g: g.find_rotation_distance(d, interval)
                        for g in impacted
                    if all(distance is not None
                           for distance in found.values()):
                        # `interval` is OK !
                        mapper[interval] = found

            if len(mapper) == len(intervalss):

            # Try again with fewer groups
            # Heuristic: first try retaining the larger ones
            smallest = len(min(groups, key=len))
            fallback = groups
            groups, remainder = split(groups, lambda g: len(g) > smallest)
            if groups:
            elif len(remainder) > 1:
                # No luck with the heuristic, e.g. there are two groups
                # and both have same `len`
                groups = [fallback.pop(0)]

        for g in groups:
            c = g.pivot
            distances = defaultdict(int, [(i.dim, v.get(g))
                                          for i, v in mapper.items()])

            # Create the basis alias
            offsets = [
                LabeledVector([(l, v[l] + distances[l]) for l in v.labels])
                for v in c.offsets
            subs = {
                i: i.function[[l + v.fromlabel(l, 0) for l in b]]
                for i, b, v in zip(c.indexeds, c.bases, offsets)
            alias = uxreplace(c.expr, subs)

            # All aliased expressions
            aliaseds = [extracted[i.expr] for i in g]

            # Distance of each aliased expression from the basis alias
            distances = []
            for i in g:
                distance = [o.distance(v) for o, v in zip(i.offsets, offsets)]
                distance = [(d, set(v))
                            for d, v in LabeledVector.transpose(*distance)]
                    LabeledVector([(d, v.pop()) for d, v in distance]))

            aliases.add(alias, list(mapper), aliaseds, distances)

    return aliases
Beispiel #24
    def _prepare_arguments(self, **kwargs):
        Process runtime arguments passed to ``.apply()` and derive
        default values for any remaining arguments.
        overrides, defaults = split(self.input, lambda p: in kwargs)

        # Process data-carrier overrides
        args = ReducerMap()
        for p in overrides:
                args = ReducerMap(args.reduce_all())
            except ValueError:
                raise ValueError(
                    "Override `%s` is incompatible with overrides `%s`" %
                    (p, [i for i in overrides if in args]))
        # Process data-carrier defaults
        for p in defaults:
            if in args:
                # E.g., SubFunctions
            for k, v in p._arg_values(**kwargs).items():
                if k in args and args[k] != v:
                    raise ValueError(
                        "Default `%s` is incompatible with other args as "
                        "`%s=%s`, while `%s=%s` is expected. Perhaps you "
                        "forgot to override `%s`?" % (p, k, v, k, args[k], p))
                args[k] = v
        args = args.reduce_all()

        # All DiscreteFunctions should be defined on the same Grid
        grids = {getattr(kwargs[], 'grid', None) for p in overrides}
        grids.update({getattr(p, 'grid', None) for p in defaults})
        if len(grids) > 1 and configuration['mpi']:
            raise ValueError("Multiple Grids found")
            grid = grids.pop()
        except KeyError:
            grid = None

        # Process Dimensions
        # A topological sorting is used so that derived Dimensions are processed after
        # their parents (note that a leaf Dimension can have an arbitrary long list of
        # ancestors)
        dag = DAG(self.dimensions,
                  [(i, i.parent) for i in self.dimensions if i.is_Derived])
        for d in reversed(dag.topological_sort()):
            args.update(d._arg_values(args, self._dspace[d], grid, **kwargs))

        # Process Objects (which may need some `args`)
        for o in self.objects:
            args.update(o._arg_values(args, grid=grid, **kwargs))

        # Sanity check
        for p in self.parameters:
            p._arg_check(args, self._dspace[p])
        for d in self.dimensions:
            if d.is_Derived:
                d._arg_check(args, self._dspace[p])

        # Turn arguments into a format suitable for the generated code
        # E.g., instead of NumPy arrays for Functions, the generated code expects
        # pointers to ctypes.Struct
        for p in self.parameters:
                args.update(kwargs.get(, p)._arg_as_ctype(args, alias=p))
            except AttributeError:
                # User-provided floats/ndarray obviously do not have `_arg_as_ctype`
                args.update(p._arg_as_ctype(args, alias=p))

        # Execute autotuning and adjust arguments accordingly
        args = self._autotune(
            args, kwargs.pop('autotune', configuration['autotuning']))

        # Check all user-provided keywords are known to the Operator
        if not configuration['ignore-unknowns']:
            for k, v in kwargs.items():
                if k not in self._known_arguments:
                    raise ValueError("Unrecognized argument %s=%s" % (k, v))

        # Attach `grid` to the arguments map
        args = ArgumentsMap(grid, **args)

        return args
Beispiel #25
def relax_incr_dimensions(iet, **kwargs):
    Recast Iterations over IncrDimensions as ElementalFunctions; insert
    ElementalCalls to iterate over the "main" and "remainder" regions induced
    by the IncrDimensions.
    sregistry = kwargs['sregistry']

    efuncs = []
    mapper = {}
    for tree in retrieve_iteration_tree(iet):
        iterations = [i for i in tree if i.dim.is_Incr]
        if not iterations:

        root = iterations[0]
        if root in mapper:

        outer, inner = split(iterations, lambda i: not i.dim.parent.is_Incr)

        # Compute the iteration ranges
        ranges = []
        for i in outer:
            maxb = i.symbolic_max - (i.symbolic_size % i.dim.step)
            ranges.append(((i.symbolic_min, maxb, i.dim.step),
                           (maxb + 1, i.symbolic_max, i.symbolic_max - maxb)))

        # Remove any offsets
        # E.g., `x = x_m + 2 to x_M - 2` --> `x = x_m to x_M`
        outer = [i._rebuild(limits=(i.dim.root.symbolic_min, i.dim.root.symbolic_max,
                 for i in outer]

        # Create the ElementalFunction
        name = sregistry.make_name(prefix="bf")
        body = compose_nodes(outer)
        dynamic_parameters = flatten((i.symbolic_bounds, i.step) for i in outer)
        dynamic_parameters.extend([i.step for i in inner if not is_integer(i.step)])
        efunc = make_efunc(name, body, dynamic_parameters)


        # Create the ElementalCalls
        calls = []
        for p in product(*ranges):
            dynamic_args_mapper = {}
            for i, (m, M, b) in zip(outer, p):
                dynamic_args_mapper[i.symbolic_min] = m
                dynamic_args_mapper[i.symbolic_max] = M
                dynamic_args_mapper[i.step] = b
                for j in inner:
                    if j.dim.root is i.dim.root and not is_integer(j.step):
                        value = j.step if b is i.step else b
                        dynamic_args_mapper[j.step] = (value,)

        mapper[root] = List(body=calls)

    iet = Transformer(mapper).visit(iet)

    return iet, {'efuncs': efuncs}
Beispiel #26
 def cbk_search(expr):
     found, others = split(expr.args, lambda a: a in basextr)
     ret = [expr] if found else []
     for a in others:
     return ret
Beispiel #27
    def _prepare_arguments(self, **kwargs):
        Process runtime arguments passed to ``.apply()` and derive
        default values for any remaining arguments.
        overrides, defaults = split(self.input, lambda p: in kwargs)
        # Process data-carrier overrides
        args = ReducerMap()
        for p in overrides:
                args = ReducerMap(args.reduce_all())
            except ValueError:
                raise ValueError("Override `%s` is incompatible with overrides `%s`" %
                                 (p, [i for i in overrides if in args]))
        # Process data-carrier defaults
        for p in defaults:
            if in args:
                # E.g., SubFunctions
            for k, v in p._arg_values(**kwargs).items():
                if k in args and args[k] != v:
                    raise ValueError("Default `%s` is incompatible with other args as "
                                     "`%s=%s`, while `%s=%s` is expected. Perhaps you "
                                     "forgot to override `%s`?" %
                                     (p, k, v, k, args[k], p))
                args[k] = v
        args = args.reduce_all()

        # All DiscreteFunctions should be defined on the same Grid
        grids = {getattr(p, 'grid', None) for p in self.input} - {None}
        if len(grids) > 1 and configuration['mpi']:
            raise ValueError("Multiple Grids found")
            grid = grids.pop()
        except KeyError:
            grid = None

        # Process Dimensions (derived go after as they might need/affect their parents)
        derived, main = split(self.dimensions, lambda i: i.is_Derived)
        for d in main:
            args.update(d._arg_values(args, self._dspace[d], grid, **kwargs))
        for d in derived:
            args.update(d._arg_values(args, self._dspace[d], grid, **kwargs))

        # Process Objects (which may need some `args`)
        for o in self.objects:
            args.update(o._arg_values(args, **kwargs))

        # Sanity check
        for p in self.parameters:
            p._arg_check(args, self._dspace[p])

        # Turn arguments into a format suitable for the generated code
        # E.g., instead of NumPy arrays for Functions, the generated code expects
        # pointers to ctypes.Struct
        for p in self.parameters:
                args.update(kwargs.get(, p)._arg_as_ctype(args, alias=p))
            except AttributeError:
                # User-provided floats/ndarray obviously do not have `_arg_as_ctype`
                args.update(p._arg_as_ctype(args, alias=p))

        # Add in the profiler argument
        args[] = self._profiler.timer.reset()

        # Add in any backend-specific argument
        args.update(kwargs.pop('backend', {}))

        # Execute autotuning and adjust arguments accordingly
        args = self._autotune(args, kwargs.pop('autotune', configuration['autotuning']))

        # Check all user-provided keywords are known to the Operator
        if not configuration['ignore-unknowns']:
            for k, v in kwargs.items():
                if k not in self._known_arguments:
                    raise ValueError("Unrecognized argument %s=%s" % (k, v))

        return args
Beispiel #28
    def _prepare_arguments(self, autotune=None, **kwargs):
        Process runtime arguments passed to ``.apply()` and derive
        default values for any remaining arguments.
        # Sanity check -- all user-provided keywords must be known to the Operator
        if not configuration['ignore-unknowns']:
            for k, v in kwargs.items():
                if k not in self._known_arguments:
                    raise ValueError("Unrecognized argument %s=%s" % (k, v))

        overrides, defaults = split(self.input, lambda p: in kwargs)

        # Process data-carrier overrides
        args = kwargs['args'] = ReducerMap()
        for p in overrides:
            except ValueError:
                raise ValueError(
                    "Override `%s` is incompatible with overrides `%s`" %
                    (p, [i for i in overrides if in args]))
        # Process data-carrier defaults
        for p in defaults:
            if in args:
                # E.g., SubFunctions
            for k, v in p._arg_values(**kwargs).items():
                if k in args and args[k] != v:
                    raise ValueError(
                        "Default `%s` is incompatible with other args as "
                        "`%s=%s`, while `%s=%s` is expected. Perhaps you "
                        "forgot to override `%s`?" % (p, k, v, k, args[k], p))
                args[k] = v
        args = kwargs['args'] = args.reduce_all()

        # DiscreteFunctions may be created from CartesianDiscretizations, which in
        # turn could be Grids or SubDomains. Both may provide arguments
        discretizations = {
            getattr(kwargs[], 'grid', None)
            for p in overrides
        discretizations.update({getattr(p, 'grid', None) for p in defaults})
        for i in discretizations:

        # There can only be one Grid from which DiscreteFunctions were created
        grids = {i for i in discretizations if isinstance(i, Grid)}
        if len(grids) > 1:
            # We loosely tolerate multiple Grids for backwards compatibility
            # with spacial subsampling, which should be revisited however. And
            # With MPI it would definitely break!
            if configuration['mpi']:
                raise ValueError("Multiple Grids found")
            grid = grids.pop()
        except KeyError:
            grid = None

        # An ArgumentsMap carries additional metadata that may be used by
        # the subsequent phases of the arguments processing
        args = kwargs['args'] = ArgumentsMap(args, grid, self._allocator,

        # Process Dimensions
        # A topological sorting is used so that derived Dimensions are processed after
        # their parents (note that a leaf Dimension can have an arbitrary long list of
        # ancestors)
        dag = DAG(self.dimensions,
                  [(i, i.parent) for i in self.dimensions if i.is_Derived])
        for d in reversed(dag.topological_sort()):
            args.update(d._arg_values(self._dspace[d], grid, **kwargs))

        # Process Objects
        for o in self.objects:
            args.update(o._arg_values(grid=grid, **kwargs))

        # In some "lower-level" Operators implementing a random piece of C, such as
        # one or more calls to third-party library functions, there could still be
        # at this point unprocessed arguments (e.g., scalars)
        args.update({k: v for k, v in kwargs.items() if k not in args})

        # Sanity check
        for p in self.parameters:
            p._arg_check(args, self._dspace[p])
        for d in self.dimensions:
            if d.is_Derived:
                d._arg_check(args, self._dspace[p])

        # Turn arguments into a format suitable for the generated code
        # E.g., instead of NumPy arrays for Functions, the generated code expects
        # pointers to ctypes.Struct
        for p in self.parameters:
                args.update(kwargs.get(, p)._arg_finalize(args, alias=p))
            except AttributeError:
                # User-provided floats/ndarray obviously do not have `_arg_finalize`
                args.update(p._arg_finalize(args, alias=p))

        # Execute autotuning and adjust arguments accordingly
            self._autotune(args, autotune or configuration['autotuning']))

        return args
Beispiel #29
def detect_accesses(exprs):
    Return a mapper `M : F -> S`, where F are Functions appearing in `exprs`
    and S are Stencils. `M[f]` represents all data accesses to `f` within
    `exprs`. Also map `M[None]` to all Dimensions used in `exprs` as plain
    symbols, rather than as array indices.
    # Compute M : F -> S
    mapper = defaultdict(Stencil)
    for e in retrieve_indexed(exprs, deep=True):
        f = e.function

        for a, d0 in zip(e.indices, f.dimensions):
            if isinstance(a, ModuloDimension) and a.parent.is_Stepping:
                # Explicitly unfold SteppingDimensions-induced ModuloDimensions
                mapper[f][a.root].update([a.offset - a.root])
            elif isinstance(a, Dimension):
            elif a.is_Add:
                dims = {i for i in a.free_symbols if isinstance(i, Dimension)}

                if not dims:
                elif len(dims) > 1:
                    # There are two reasons we may end up here, 1) indirect
                    # accesses (e.g., a[b[x, y] + 1, y]) or 2) as a result of
                    # skewing-based optimizations, such as time skewing (e.g.,
                    # `x - time + 1`) or CIRE rotation (e.g., `x + xx - 4`)
                    d, others = split(dims, lambda i: d0 in i._defines)

                    if any(i.is_Indexed for i in a.args) or len(d) != 1:
                        # Case 1) -- with indirect accesses there's not much we can infer
                        # Case 2)
                        d, = d
                        _, o = split(others, lambda i: i.is_Custom)
                        off = sum(i for i in a.args
                                  if i.is_integer or i.free_symbols & o)
                    d, = dims

                    # At this point, typically, the offset will be an integer.
                    # In some cases though it could be an expression, e.g.
                    # `db0 + time_m - 1` (from CustomDimensions due to buffering)
                    # or `x + o_x` (from MPI routines) or `time - ns` (from
                    # guarded accesses to TimeFunctions) or ... In all these cases,
                    # what really matters is the integer part of the offset, as
                    # any other symbols may resolve to zero at runtime, which is
                    # the base case scenario we fallback to
                    off = sum(i for i in a.args if i.is_integer)

                    # NOTE: `d in a.args` is too restrictive because of guarded
                    # accesses such as `time / factor - 1`
                    assert d in a.free_symbols

                if (d.is_Custom
                        or d.is_Default) and d.symbolic_size.is_integer:
                    # Explicitly unfold Default and CustomDimensions
                    mapper[f][d].update(range(off, d.symbolic_size + off))

    # Compute M[None]
    other_dims = set()
    for e in as_tuple(exprs):
        other_dims.update(i for i in e.free_symbols
                          if isinstance(i, Dimension))
    mapper[None] = Stencil([(i, 0) for i in other_dims])

    return mapper
Beispiel #30
    def _prepare_arguments(self, **kwargs):
        Process runtime arguments passed to ``.apply()` and derive
        default values for any remaining arguments.
        # Process data-carriers (first overrides, then fill up with whatever is needed)
        args = ReducerMap()
            [p._arg_values(**kwargs) for p in self.input if in kwargs])
            [p._arg_values() for p in self.input if not in args])
        args = args.reduce_all()

        # All TensorFunctions should be defined on the same Grid
        functions = [
            kwargs.get(p, p) for p in self.input if p.is_TensorFunction
        mapper = ReducerMap([('grid', i.grid) for i in functions if i.grid])
            grid = mapper.unique('grid')
        except (KeyError, ValueError):
            if mapper and configuration['mpi']:
                raise RuntimeError("Multiple `Grid`s found before `apply`")
            grid = None

        # Process dimensions (derived go after as they might need/affect their parents)
        derived, main = split(self.dimensions, lambda i: i.is_Derived)
        for p in main:
            args.update(p._arg_values(args, self._dspace[p], grid, **kwargs))
        for p in derived:
            args.update(p._arg_values(args, self._dspace[p], grid, **kwargs))

        # Sanity check
        for p in self.input:
            p._arg_check(args, self._dspace[p])

        # Derive additional values for DLE arguments
        # TODO: This is not pretty, but it works for now. Ideally, the
        # DLE arguments would be massaged into the IET so as to comply
        # with the rest of the argument derivation procedure.
        for arg in self._dle_args:
            dim = arg.argument
            osize = (1 + arg.original_dim.symbolic_end -
            if arg.value is None:
                args[] = osize
            elif isinstance(arg.value, int):
                args[] = arg.value
                args[] = arg.value(osize)

        # Add in the profiler argument
        args[] = self.profiler.timer.reset()

        # Add in any backend-specific argument
        args.update(kwargs.pop('backend', {}))

        # Execute autotuning and adjust arguments accordingly
        if kwargs.pop('autotune', configuration['autotuning'].level):
            args = self._autotune(args)

        # Check all user-provided keywords are known to the Operator
        for k, v in kwargs.items():
            if k not in self._known_arguments:
                raise ValueError(
                    "Unrecognized argument %s=%s passed to `apply`" % (k, v))

        return args