Example #1
0
    def map_subscript(self, expr, type_context):
        from loopy.kernel.data import TemporaryVariable

        ary = self.find_array(expr)

        if (isinstance(ary, TemporaryVariable)
                and ary.address_space == AddressSpace.PRIVATE):
            # generate access code for acccess to private-index temporaries

            gsize, lsize = self.kernel.get_grid_size_upper_bounds_as_exprs()
            if lsize:
                lsize, = lsize
                from loopy.kernel.array import get_access_info
                from pymbolic import evaluate

                access_info = get_access_info(self.kernel.target, ary, expr.index,
                    lambda expr: evaluate(expr, self.codegen_state.var_subst_map),
                    self.codegen_state.vectorization_info)

                subscript, = access_info.subscripts
                result = var(access_info.array_name)[
                        var("programIndex") + self.rec(lsize*subscript, 'i')]

                if access_info.vector_index is not None:
                    return self.kernel.target.add_vector_access(
                        result, access_info.vector_index)
                else:
                    return result

        return super(ExprToISPCExprMapper, self).map_subscript(
                expr, type_context)
Example #2
0
    def map_planned_flux(self, expr):
        try:
            return self.expr_to_var[expr]
        except KeyError:
            for fb in self.flux_batches:
                try:
                    idx = fb.flux_exprs.index(expr)
                except ValueError:
                    pass
                else:
                    # found at idx
                    mapped_fluxes = [
                            self.internal_map_flux(f)
                            for f in fb.flux_exprs]

                    names = [self.get_var_name() for f in mapped_fluxes]
                    self.code.append(
                            self.make_flux_batch_assign(
                                names, mapped_fluxes, fb.repr_op))

                    from pymbolic import var
                    for n, f in zip(names, fb.flux_exprs):
                        self.expr_to_var[f] = var(n)

                    return var(names[idx])

            raise RuntimeError("flux '%s' not in any flux batch" % expr)
Example #3
0
    def map_Assignment(self, node):
        lhs = self.parse_expr(node.variable)
        from pymbolic.primitives import Subscript
        if isinstance(lhs, Subscript):
            lhs_name = lhs.aggregate.name
        else:
            lhs_name = lhs.name

        scope = self.scope_stack[-1]
        scope.use_name(lhs_name)
        infer_type = scope.get_type_inference_mapper()

        rhs = self.parse_expr(node.expr)
        lhs_dtype = infer_type(lhs)
        rhs_dtype = infer_type(rhs)

        # check for silent truncation of complex
        if lhs_dtype.kind != 'c' and rhs_dtype.kind == 'c':
            from pymbolic import var
            rhs = var("real")(rhs)
        # check for silent widening of real
        if lhs_dtype.kind == 'c' and rhs_dtype.kind != 'c':
            from pymbolic import var
            rhs = var("fromreal")(rhs)

        return cgen.Assign(self.gen_expr(lhs), self.gen_expr(rhs))
Example #4
0
    def get_kernel():
        from sumpy.symbolic import pymbolic_real_norm_2
        from pymbolic.primitives import make_sym_vector
        from pymbolic import var

        d = make_sym_vector("d", 3)
        r = pymbolic_real_norm_2(d[:-1])
        # r3d = pymbolic_real_norm_2(d)
        #expr = var("log")(r3d)

        log = var("log")
        sqrt = var("sqrt")

        a = d[-1]

        expr = log(r)
        expr = log(sqrt(r**2 + a**2))
        expr = log(sqrt(r + a**2))
        #expr = log(sqrt(r**2 + a**2))-a**2/2/(r**2+a**2)
        #expr = 2*log(sqrt(r**2 + a**2))

        scaling = 1/(2*var("pi"))

        from sumpy.kernel import ExpressionKernel
        return ExpressionKernel(
                dim=3,
                expression=expr,
                global_scaling_const=scaling,
                is_complex_valued=False)
Example #5
0
    def map_substitution(self, name, tag, arguments, expn_state):
        if not (
                name == self.subst_name
                and self.within(
                    expn_state.kernel,
                    expn_state.instruction,
                    expn_state.stack)
                and (self.subst_tag is None or self.subst_tag == tag)):
            return super(RuleInvocationReplacer, self).map_substitution(
                    name, tag, arguments, expn_state)

        # {{{ check if in footprint

        rule = self.rule_mapping_context.old_subst_rules[name]
        arg_context = self.make_new_arg_context(
                    name, rule.arguments, arguments, expn_state.arg_context)
        args = [arg_context[arg_name] for arg_name in rule.arguments]

        accdesc = AccessDescriptor(
                storage_axis_exprs=storage_axis_exprs(
                    self.storage_axis_sources, args))

        if not self.array_base_map.is_access_descriptor_in_footprint(accdesc):
            return super(RuleInvocationReplacer, self).map_substitution(
                    name, tag, arguments, expn_state)

        # }}}

        assert len(arguments) == len(rule.arguments)

        abm = self.array_base_map

        stor_subscript = []
        for sax_name, sax_source, sax_base_idx in zip(
                self.storage_axis_names,
                self.storage_axis_sources,
                abm.storage_base_indices):
            if sax_name not in self.non1_storage_axis_names:
                continue

            if isinstance(sax_source, int):
                # an argument
                ax_index = arguments[sax_source]
            else:
                # an iname
                ax_index = var(sax_source)

            from loopy.isl_helpers import simplify_via_aff
            ax_index = simplify_via_aff(ax_index - sax_base_idx)
            stor_subscript.append(ax_index)

        new_outer_expr = var(self.temporary_name)
        if stor_subscript:
            new_outer_expr = new_outer_expr.index(tuple(stor_subscript))

        # Can't possibly be nested, and no need to traverse
        # further as compute expression has already been seen
        # by rule_mapping_context.

        return new_outer_expr
Example #6
0
    def map_reduction(expr, rec, nresults=1):
        if frozenset(expr.inames) != inames_set:
            return type(expr)(
                    operation=expr.operation,
                    inames=expr.inames,
                    expr=rec(expr.expr),
                    allow_simultaneous=expr.allow_simultaneous)

        if subst_rule_name is None:
            subst_rule_prefix = "red_%s_arg" % "_".join(inames)
            my_subst_rule_name = var_name_gen(subst_rule_prefix)
        else:
            my_subst_rule_name = subst_rule_name

        if my_subst_rule_name in substs:
            raise LoopyError("substitution rule '%s' already exists"
                    % my_subst_rule_name)

        from loopy.kernel.data import SubstitutionRule
        substs[my_subst_rule_name] = SubstitutionRule(
                name=my_subst_rule_name,
                arguments=tuple(inames),
                expression=expr.expr)

        from pymbolic import var
        iname_vars = [var(iname) for iname in inames]

        return type(expr)(
                operation=expr.operation,
                inames=expr.inames,
                expr=var(my_subst_rule_name)(*iname_vars),
                allow_simultaneous=expr.allow_simultaneous)
Example #7
0
    def transform_access(self, index, expn_state):
        my_insn_id = expn_state.insn_id

        if my_insn_id in self.definition_insn_ids:
            return None

        my_def_id = self.usage_to_definition[my_insn_id]

        if not self.within(
                expn_state.kernel,
                expn_state.instruction,
                expn_state.stack):
            self.saw_unmatched_usage_sites[my_def_id] = True
            return None

        subst_name = self.get_subst_name(my_def_id)

        if self.extra_arguments:
            if index is None:
                index = self.extra_arguments
            else:
                index = index + self.extra_arguments

        from pymbolic import var
        if index is None:
            return var(subst_name)
        elif not isinstance(index, tuple):
            return var(subst_name)(index)
        else:
            return var(subst_name)(*index)
Example #8
0
def test_func_dep_consistency():
    from pymbolic import var
    from pymbolic.mapper.dependency import DependencyMapper
    f = var('f')
    x = var('x')
    dep_map = DependencyMapper(include_calls="descend_args")
    assert dep_map(f(x)) == set([x])
    assert dep_map(f(x=x)) == set([x])
Example #9
0
File: qbx.py Project: inducer/sumpy
    def get_kernel_exprs(self, result_names):
        isrc_sym = var("isrc")
        exprs = [var(name) * self.get_strength_or_not(isrc_sym, i)
                 for i, name in enumerate(result_names)]

        return [lp.Assignment(id=None,
                    assignee="pair_result_%d" % i, expression=expr,
                    temp_var_type=lp.auto)
                for i, expr in enumerate(exprs)]
Example #10
0
def test_child_invalid_type_cast():
    from pymbolic import var
    knl = lp.make_kernel(
        "{[i]: 0<=i<n}",
        ["<> ctr = make_uint2(0, 0)",
         lp.Assignment("a[i]", lp.TypeCast(np.int64, var("ctr")) << var("i"))]
        )

    with pytest.raises(lp.LoopyError):
        knl = lp.preprocess_kernel(knl)
Example #11
0
def test_is_expression_equal():
    from loopy.symbolic import is_expression_equal
    from pymbolic import var

    x = var("x")
    y = var("y")

    assert is_expression_equal(x+2, 2+x)

    assert is_expression_equal((x+2)**2, x**2 + 4*x + 4)
    assert is_expression_equal((x+y)**2, x**2 + 2*x*y + y**2)
Example #12
0
    def apply_offset(sub):
        import loopy as lp

        if ary.offset:
            if ary.offset is lp.auto:
                return var(array_name+"_offset") + sub
            elif isinstance(ary.offset, str):
                return var(ary.offset) + sub
            else:
                # assume it's an expression
                return ary.offset + sub
        else:
            return sub
Example #13
0
    def map_field_component(self, expr):
        if expr.is_interior:
            where = "int_side"
        else:
            where = "ext_side"

        arg_name = self.flux_var_info.flux_idx_and_dep_to_arg_name[
                self.flux_idx, expr]

        if not arg_name:
            return 0
        else:
            from pymbolic import var
            return var(arg_name+"_it")[var(where+"_idx")]
Example #14
0
File: p2p.py Project: inducer/sumpy
    def get_kernel_exprs(self, result_names):
        from pymbolic import var

        isrc_sym = var("isrc")
        exprs = [var(name) * self.get_strength_or_not(isrc_sym, i)
                 for i, name in enumerate(result_names)]

        if self.exclude_self:
            from pymbolic.primitives import If, Variable
            exprs = [If(Variable("is_self"), 0, expr) for expr in exprs]

        return [lp.Assignment(id=None,
                    assignee="pair_result_%d" % i, expression=expr,
                    temp_var_type=lp.auto)
                for i, expr in enumerate(exprs)]
Example #15
0
    def __init__(self, dim=None, icomp=None, jcomp=None, viscosity_mu_name="mu",
                        stresslet_vector_name="stresslet_vec"):
        """
        :arg viscosity_mu_name: The argument name to use for
                dynamic viscosity :math:`\mu` the then generating functions to
                evaluate this kernel.
        """
        # Mu is unused but kept for consistency with the stokeslet.

        if dim == 2:
            d = make_sym_vector("d", dim)
            n = make_sym_vector(stresslet_vector_name, dim)
            r = pymbolic_real_norm_2(d)
            expr = (
                sum(n[axis]*d[axis] for axis in range(dim))
                *
                d[icomp]*d[jcomp]/r**4
                )
            scaling = 1/(var("pi"))

        elif dim == 3:
            d = make_sym_vector("d", dim)
            n = make_sym_vector(stresslet_vector_name, dim)
            r = pymbolic_real_norm_2(d)
            expr = (
                sum(n[axis]*d[axis] for axis in range(dim))
                *
                d[icomp]*d[jcomp]/r**5
                )
            scaling = -3/(4*var("pi"))

        elif dim is None:
            expr = None
            scaling = None
        else:
            raise RuntimeError("unsupported dimensionality")

        self.viscosity_mu_name = viscosity_mu_name
        self.stresslet_vector_name = stresslet_vector_name
        self.icomp = icomp
        self.jcomp = jcomp

        ExpressionKernel.__init__(
                self,
                dim,
                expression=expr,
                scaling=scaling,
                is_complex_valued=False)
Example #16
0
    def handle_alloc(self, gen, arg, kernel_arg, strify, skip_arg_checks):
        """
        Handle allocation of non-specified arguements for pyopencl execution
        """
        from pymbolic import var

        num_axes = len(arg.strides)
        for i in range(num_axes):
            gen("_lpy_shape_%d = %s" % (i, strify(arg.unvec_shape[i])))

        itemsize = kernel_arg.dtype.numpy_dtype.itemsize
        for i in range(num_axes):
            gen("_lpy_strides_%d = %s" % (i, strify(
                itemsize*arg.unvec_strides[i])))

        if not skip_arg_checks:
            for i in range(num_axes):
                gen("assert _lpy_strides_%d > 0, "
                        "\"'%s' has negative stride in axis %d\""
                        % (i, arg.name, i))

        sym_strides = tuple(
                var("_lpy_strides_%d" % i)
                for i in range(num_axes))
        sym_shape = tuple(
                var("_lpy_shape_%d" % i)
                for i in range(num_axes))

        alloc_size_expr = (sum(astrd*(alen-1)
            for alen, astrd in zip(sym_shape, sym_strides))
            + itemsize)

        gen("_lpy_alloc_size = %s" % strify(alloc_size_expr))
        gen("%(name)s = _lpy_cl_array.Array(queue, %(shape)s, "
                "%(dtype)s, strides=%(strides)s, "
                "data=allocator(_lpy_alloc_size), allocator=allocator)"
                % dict(
                    name=arg.name,
                    shape=strify(sym_shape),
                    strides=strify(sym_strides),
                    dtype=self.python_dtype_str(kernel_arg.dtype.numpy_dtype)))

        if not skip_arg_checks:
            for i in range(num_axes):
                gen("del _lpy_shape_%d" % i)
                gen("del _lpy_strides_%d" % i)
            gen("del _lpy_alloc_size")
            gen("")
Example #17
0
    def emit_call_insn(self, insn, target, expression_to_code_mapper):
        # reorder arguments, e.g. a,c = f(b,d) to f(a,b,c,d)
        parameters = []
        reads = iter(insn.expression.parameters)
        writes = iter(insn.assignees)
        for ac in self.access:
            if ac is READ:
                parameters.append(next(reads))
            else:
                parameters.append(next(writes))

        # pass layer argument if needed
        for layer in reads:
            parameters.append(layer)

        par_dtypes = tuple(expression_to_code_mapper.infer_type(p) for p in parameters)

        from loopy.expression import dtype_to_type_context
        from pymbolic.mapper.stringifier import PREC_NONE
        from pymbolic import var

        c_parameters = [
            expression_to_code_mapper(
                par, PREC_NONE, dtype_to_type_context(target, par_dtype),
                par_dtype).expr
            for par, par_dtype in zip(parameters, par_dtypes)]

        assignee_is_returned = False
        return var(self.name_in_target)(*c_parameters), assignee_is_returned
Example #18
0
    def __init__(self, dim=None):
        r = pymbolic_real_norm_2(make_sym_vector("d", dim))
        if dim == 2:
            expr = r**2 * var("log")(r)
            scaling = 1/(8*var("pi"))
        elif dim == 3:
            expr = r
            scaling = 1  # FIXME: Unknown
        else:
            raise RuntimeError("unsupported dimensionality")

        super(BiharmonicKernel, self).__init__(
                dim,
                expression=expr,
                global_scaling_const=scaling,
                is_complex_valued=False)
Example #19
0
    def map_ref_diff_op_binding(self, expr):
        try:
            return self.expr_to_var[expr]
        except KeyError:
            all_diffs = [diff
                    for diff in self.diff_ops
                    if diff.op.equal_except_for_axis(expr.op)
                    and diff.field == expr.field]

            names = [self.get_var_name() for d in all_diffs]

            from pytools import single_valued
            op_class=single_valued(type(d.op) for d in all_diffs)

            from hedge.optemplate.operators import \
                    ReferenceQuadratureStiffnessTOperator
            if isinstance(op_class, ReferenceQuadratureStiffnessTOperator):
                assign_class = QuadratureDiffBatchAssign
            else:
                assign_class = DiffBatchAssign

            self.code.append(
                    assign_class(
                        names=names,
                        op_class=op_class,
                        operators=[d.op for d in all_diffs],
                        field=self.rec(
                            single_valued(d.field for d in all_diffs)),
                        dep_mapper_factory=self.dep_mapper_factory))

            from pymbolic import var
            for n, d in zip(names, all_diffs):
                self.expr_to_var[d] = var(n)

            return self.expr_to_var[expr]
Example #20
0
    def realize_conditional(self, node, context_cond=None):
        scope = self.scope_stack[-1]

        cond_name = intern("loopy_cond%d" % self.condition_id_counter)
        self.condition_id_counter += 1
        assert cond_name not in scope.type_map

        scope.type_map[cond_name] = np.int32

        from pymbolic import var
        cond_var = var(cond_name)

        self.add_expression_instruction(
                cond_var, self.parse_expr(node, node.expr))

        cond_expr = cond_var
        if context_cond is not None:
            from pymbolic.primitives import LogicalAnd
            cond_expr = LogicalAnd((cond_var, context_cond))

            self.conditions_data.append((context_cond, cond_var))
        else:
            self.conditions_data.append((None, cond_var))

        self.conditions.append(cond_expr)
Example #21
0
    def _compile(self, expression, variables):
        import pymbolic.primitives as primi
        self._Expression = expression
        self._Variables = [primi.make_variable(v) for v in variables]
        ctx = self.context().copy()

        try:
            import numpy
        except ImportError:
            pass
        else:
            ctx["numpy"] = numpy

        from pymbolic.mapper.dependency import DependencyMapper
        used_variables = DependencyMapper(
                composite_leaves=False)(self._Expression)
        used_variables -= set(self._Variables)
        used_variables -= set(pymbolic.var(key) for key in list(ctx.keys()))
        used_variables = list(used_variables)
        used_variables.sort()
        all_variables = self._Variables + used_variables

        expr_s = CompileMapper()(self._Expression, PREC_NONE)
        func_s = "lambda %s: %s" % (",".join(str(v) for v in all_variables),
                expr_s)
        self._code = eval(func_s, ctx)
Example #22
0
def test_kill_trivial_assignments():
    from pymbolic import var
    x, y, t0, t1, t2 = [var(s) for s in "x y t0 t1 t2".split()]

    assignments = (
        ("t0", 6),
        ("t1", -t0),
        ("t2", 6*x),
        ("nt", x**y),
        # users of trivial assignments
        ("u0", t0 + 1),
        ("u1", t1 + 1),
        ("u2", t2 + 1),
    )

    from sumpy.codegen import kill_trivial_assignments
    result = kill_trivial_assignments(
        assignments,
        retain_names=("u0", "u1", "u2"))

    from pymbolic.primitives import Sum

    def _s(*vals):
        return Sum(vals)

    assert result == [
        ('nt', x**y),
        ('u0', _s(6, 1)),
        ('u1', _s(-6, 1)),
        ('u2', _s(6*x, 1))]
Example #23
0
def get_loopy_instructions_as_maxima(kernel, prefix):
    """Sample use for code comparison::

        load("knl-optFalse.mac");
        load("knl-optTrue.mac");

        vname: bessel_j_8;

        un_name : concat(''un_, vname);
        opt_name : concat(''opt_, vname);

        print(ratsimp(ev(un_name - opt_name)));
    """
    from loopy.preprocess import add_boostability_and_automatic_dependencies
    kernel = add_boostability_and_automatic_dependencies(kernel)

    my_variable_names = (
            avn
            for insn in kernel.instructions
            for avn, _ in insn.assignees_and_indices()
            )

    from pymbolic import var
    subst_dict = dict(
            (vn, var(prefix+vn)) for vn in my_variable_names)

    mstr = MaximaStringifyMapper()
    from loopy.symbolic import SubstitutionMapper
    from pymbolic.mapper.substitutor import make_subst_func
    substitute = SubstitutionMapper(make_subst_func(subst_dict))

    result = ["ratprint:false;"]

    written_insn_ids = set()

    from loopy.kernel import InstructionBase, ExpressionInstruction

    def write_insn(insn):
        if not isinstance(insn, InstructionBase):
            insn = kernel.id_to_insn[insn]
        if not isinstance(insn, ExpressionInstruction):
            raise RuntimeError("non-expression instructions not supported "
                    "in maxima export")

        for dep in insn.insn_deps:
            if dep not in written_insn_ids:
                write_insn(dep)

        (aname, _), = insn.assignees_and_indices()
        result.append("%s%s : %s;" % (
            prefix, aname,
            mstr(substitute(insn.expression))))

        written_insn_ids.add(insn.id)

    for insn in kernel.instructions:
        if insn.id not in written_insn_ids:
            write_insn(insn)

    return "\n".join(result)
Example #24
0
def get_ge_neutral(dtype):
    """Return a number y that satisfies (x >= y) for all y."""

    if dtype.numpy_dtype.kind == "f":
        # OpenCL 1.1, section 6.11.2
        return -var("INFINITY")
    elif dtype.numpy_dtype.kind == "i":
        # OpenCL 1.1, section 6.11.3
        if dtype.numpy_dtype.itemsize == 4:
            #32 bit integer
            return var("INT_MIN")
        elif dtype.numpy_dtype.itemsize == 8:
            #64 bit integer
            return var('LONG_MIN')
    else:
        raise NotImplementedError("less")
Example #25
0
    def map_flux_exchange(self, expr):
        try:
            return self.expr_to_var[expr]
        except KeyError:
            from hedge.tools import is_field_equal
            all_flux_xchgs = [fe
                    for fe in self.flux_exchange_ops
                    if is_field_equal(fe.arg_fields, expr.arg_fields)]

            assert len(all_flux_xchgs) > 0

            from pytools import single_valued
            names = [self.get_var_name() for d in all_flux_xchgs]
            self.code.append(
                    FluxExchangeBatchAssign(
                        names=names,
                        indices_and_ranks=[
                            (fe.index, fe.rank)
                            for fe in all_flux_xchgs],
                        arg_fields=[self.rec(arg_field) for arg_field in fe.arg_fields],
                        dep_mapper_factory=self.dep_mapper_factory))

            from pymbolic import var
            for n, d in zip(names, all_flux_xchgs):
                self.expr_to_var[d] = var(n)

            return self.expr_to_var[expr]
def main():
    from pytential import sym
    from pymbolic import var

    ndomains = 5
    k_values = tuple(
            "k%d" % i
            for i in range(ndomains))

    from pytential.symbolic.pde.scalar import TMDielectric2DBoundaryOperator
    pde_op = TMDielectric2DBoundaryOperator(
            k_vacuum=1,
            interfaces=tuple(
                (0, i, sym.DEFAULT_SOURCE)
                for i in range(ndomains)
                ),
            domain_k_exprs=k_values,
            beta=var("beta"))

    op_unknown_sym = pde_op.make_unknown("unknown")

    from pytential.symbolic.mappers import GraphvizMapper
    gvm = GraphvizMapper()
    gvm(pde_op.operator(op_unknown_sym))
    with open("helmholtz-op.dot", "wt") as outf:
        outf.write(gvm.get_dot_code())
Example #27
0
def test_tim2d(ctx_factory):
    dtype = np.float32
    ctx = ctx_factory()
    order = "C"

    n = 8

    from pymbolic import var
    K_sym = var("K")

    field_shape = (K_sym, n, n)

    # K - run-time symbolic
    knl = lp.make_kernel(ctx.devices[0],
            "[K] -> {[i,j,e,m,o,gi]: 0<=i,j,m,o<%d and 0<=e<K and 0<=gi<3}" % n,
           [
            "ur(a,b) := sum_float32(@o, D[a,o]*u[e,o,b])",
            "us(a,b) := sum_float32(@o, D[b,o]*u[e,a,o])",

            "lap[e,i,j]  = "
            "  sum_float32(m, D[m,i]*(G[0,e,m,j]*ur(m,j) + G[1,e,m,j]*us(m,j)))"
            "+ sum_float32(m, D[m,j]*(G[1,e,i,m]*ur(i,m) + G[2,e,i,m]*us(i,m)))"

            ],
            [
            lp.ArrayArg("u", dtype, shape=field_shape, order=order),
            lp.ArrayArg("lap", dtype, shape=field_shape, order=order),
            lp.ArrayArg("G", dtype, shape=(3,)+field_shape, order=order),
#            lp.ConstantArrayArg("D", dtype, shape=(n, n), order=order),
            lp.ArrayArg("D", dtype, shape=(n, n), order=order),
#            lp.ImageArg("D", dtype, shape=(n, n)),
            lp.ValueArg("K", np.int32, approximately=1000),
            ],
             name="semlap2D", assumptions="K>=1")

    unroll = 32

    seq_knl = knl
    knl = lp.add_prefetch(knl, "D", ["m", "j", "i","o"], default_tag="l.auto")
    knl = lp.add_prefetch(knl, "u", ["i", "j",  "o"], default_tag="l.auto")
    knl = lp.precompute(knl, "ur", np.float32, ["a", "b"], default_tag="l.auto")
    knl = lp.precompute(knl, "us", np.float32, ["a", "b"], default_tag="l.auto")
    knl = lp.split_iname(knl, "e", 1, outer_tag="g.0")#, slabs=(0, 1))

    knl = lp.tag_inames(knl, dict(i="l.0", j="l.1"))
    knl = lp.tag_inames(knl, dict(o="unr"))
    knl = lp.tag_inames(knl, dict(m="unr"))


#    knl = lp.add_prefetch(knl, "G", [2,3], default_tag=None) # axis/argument indices on G
    knl = lp.add_prefetch(knl, "G", [2,3], default_tag="l.auto") # axis/argument indices on G

    kernel_gen = lp.generate_loop_schedules(knl)
    kernel_gen = lp.check_kernels(kernel_gen, dict(K=1000))

    K = 1000
    lp.auto_test_vs_ref(seq_knl, ctx, kernel_gen,
            op_count=K*(n*n*n*2*2 + n*n*2*3 + n**3 * 2*2)/1e9,
            op_label="GFlops",
            parameters={"K": K})
Example #28
0
def disambiguate_identifiers(statements_a, statements_b,
        should_disambiguate_name=None):
    if should_disambiguate_name is None:
        def should_disambiguate_name(name):  # pylint:disable=function-redefined
            return True

    from pymbolic.imperative.analysis import get_all_used_identifiers

    id_a = get_all_used_identifiers(statements_a)
    id_b = get_all_used_identifiers(statements_b)

    from pytools import UniqueNameGenerator
    vng = UniqueNameGenerator(id_a | id_b)

    from pymbolic import var
    subst_b = {}
    for clash in id_a & id_b:
        if should_disambiguate_name(clash):
            unclash = vng(clash)
            subst_b[clash] = var(unclash)

    from pymbolic.mapper.substitutor import (
            make_subst_func, SubstitutionMapper)
    subst_map = SubstitutionMapper(make_subst_func(subst_b))

    statements_b = [
            stmt.map_expressions(subst_map) for stmt in statements_b]

    return statements_b, subst_b
Example #29
0
    def map_reduction(expr, rec):
        # Only expand one level of reduction at a time, going from outermost to
        # innermost. Otherwise we get the (iname + insn) dependencies wrong.

        from pymbolic import var

        target_var_name = var_name_gen("acc_"+"_".join(expr.inames))
        target_var = var(target_var_name)

        try:
            arg_dtype = type_inf_mapper(expr.expr)
        except DependencyTypeInferenceFailure:
            raise LoopyError("failed to determine type of accumulator for "
                    "reduction '%s'" % expr)

        from loopy.kernel.data import ExpressionInstruction, TemporaryVariable

        new_temporary_variables[target_var_name] = TemporaryVariable(
                name=target_var_name,
                shape=(),
                dtype=expr.operation.result_dtype(
                    kernel.target, arg_dtype, expr.inames),
                is_local=False)

        outer_insn_inames = temp_kernel.insn_inames(insn)
        bad_inames = frozenset(expr.inames) & outer_insn_inames
        if bad_inames:
            raise LoopyError("reduction used within loop(s) that it was "
                    "supposed to reduce over: " + ", ".join(bad_inames))

        init_id = temp_kernel.make_unique_instruction_id(
                based_on="%s_%s_init" % (insn.id, "_".join(expr.inames)),
                extra_used_ids=set(i.id for i in generated_insns))

        init_insn = ExpressionInstruction(
                id=init_id,
                assignee=target_var,
                forced_iname_deps=outer_insn_inames - frozenset(expr.inames),
                insn_deps=frozenset(),
                expression=expr.operation.neutral_element(arg_dtype, expr.inames))

        generated_insns.append(init_insn)

        update_id = temp_kernel.make_unique_instruction_id(
                based_on="%s_%s_update" % (insn.id, "_".join(expr.inames)),
                extra_used_ids=set(i.id for i in generated_insns))

        reduction_insn = ExpressionInstruction(
                id=update_id,
                assignee=target_var,
                expression=expr.operation(
                    arg_dtype, target_var, expr.expr, expr.inames),
                insn_deps=frozenset([init_insn.id]) | insn.insn_deps,
                forced_iname_deps=temp_kernel.insn_inames(insn) | set(expr.inames))

        generated_insns.append(reduction_insn)

        new_insn_insn_deps.add(reduction_insn.id)

        return target_var
Example #30
0
    def map_reduction_seq(expr, rec, nresults, arg_dtype,
            reduction_dtypes):
        outer_insn_inames = temp_kernel.insn_inames(insn)

        from pymbolic import var
        acc_var_names = [
                var_name_gen("acc_"+"_".join(expr.inames))
                for i in range(nresults)]
        acc_vars = tuple(var(n) for n in acc_var_names)

        from loopy.kernel.data import TemporaryVariable, temp_var_scope

        for name, dtype in zip(acc_var_names, reduction_dtypes):
            new_temporary_variables[name] = TemporaryVariable(
                    name=name,
                    shape=(),
                    dtype=dtype,
                    scope=temp_var_scope.PRIVATE)

        init_id = insn_id_gen(
                "%s_%s_init" % (insn.id, "_".join(expr.inames)))

        init_insn = make_assignment(
                id=init_id,
                assignees=acc_vars,
                forced_iname_deps=outer_insn_inames - frozenset(expr.inames),
                forced_iname_deps_is_final=insn.forced_iname_deps_is_final,
                depends_on=frozenset(),
                expression=expr.operation.neutral_element(arg_dtype, expr.inames))

        generated_insns.append(init_insn)

        update_id = insn_id_gen(
                based_on="%s_%s_update" % (insn.id, "_".join(expr.inames)))

        update_insn_iname_deps = temp_kernel.insn_inames(insn) | set(expr.inames)
        if insn.forced_iname_deps_is_final:
            update_insn_iname_deps = insn.forced_iname_deps | set(expr.inames)

        reduction_insn = make_assignment(
                id=update_id,
                assignees=acc_vars,
                expression=expr.operation(
                    arg_dtype,
                    acc_vars if len(acc_vars) > 1 else acc_vars[0],
                    expr.expr, expr.inames),
                depends_on=frozenset([init_insn.id]) | insn.depends_on,
                forced_iname_deps=update_insn_iname_deps,
                forced_iname_deps_is_final=insn.forced_iname_deps_is_final)

        generated_insns.append(reduction_insn)

        new_insn_add_depends_on.add(reduction_insn.id)

        if nresults == 1:
            assert len(acc_vars) == 1
            return acc_vars[0]
        else:
            return acc_vars
Example #31
0
def test_generate_c_snippet():
    from pymbolic import var
    I = var("I")  # noqa
    f = var("f")
    df = var("df")
    q_v = var("q_v")
    eN = var("eN")  # noqa
    k = var("k")
    u = var("u")

    from functools import partial
    l_sum = partial(lp.Reduction, "sum", allow_simultaneous=True)

    Instr = lp.Assignment  # noqa

    knl = lp.make_kernel("{[I, k]: 0<=I<nSpace and 0<=k<nQuad}", [
        Instr(f[I], l_sum(k, q_v[k, I] * u)),
        Instr(df[I], l_sum(k, q_v[k, I])),
    ], [
        lp.GlobalArg("q_v", np.float64, shape="nQuad, nSpace"),
        lp.GlobalArg("f,df", np.float64, shape="nSpace"),
        lp.ValueArg("u", np.float64),
        "...",
    ],
                         target=CTarget(),
                         assumptions="nQuad>=1")

    if 0:  # enable to play with prefetching
        # (prefetch currently requires constant sizes)
        knl = lp.fix_parameters(knl, nQuad=5, nSpace=3)
        knl = lp.add_prefetch(knl, "q_v", "k,I", default_tag=None)

    knl = lp.split_iname(knl, "k", 4, inner_tag="unr", slabs=(0, 1))
    knl = lp.prioritize_loops(knl, "I,k_outer,k_inner")

    knl = lp.preprocess_kernel(knl)
    knl = lp.get_one_scheduled_kernel(knl)
    print(lp.generate_body(knl))
Example #32
0
def test_basic_assign_rhs_codegen():
    """Test whether the code generator generates RHS evaluation code
    properly."""
    cbuild = RawCodeBuilder()
    cbuild.add_and_get_ids(
        Assign(id="assign_rhs1",
               assignee="<state>y",
               assignee_subscript=(),
               expression=var("y")(t=var("<t>")),
               depends_on=[]),
        Assign(id="assign_rhs2",
               assignee="<state>y",
               assignee_subscript=(),
               expression=var("yy")(t=var("<t>"), y=var("<state>y")),
               depends_on=["assign_rhs1"]),
        YieldState(id="return",
                   time=0,
                   time_id="final",
                   expression=var("<state>y"),
                   component_id="<state>",
                   depends_on=["assign_rhs2"]))
    cbuild.commit()
    code = create_DAGCode_with_init_and_main_phases(
        init_statements=[], main_statements=cbuild.statements)
    codegen = PythonCodeGenerator(class_name="Method")
    Method = codegen.get_class(code)  # noqa

    def y(t):
        return 6

    def yy(t, y):
        return y + 6

    method = Method({"y": y, "yy": yy})
    method.set_up(t_start=0, dt_start=0, context={"y": 0})
    hist = [s for s in method.run(max_steps=2)]
    assert len(hist) == 3
    assert isinstance(hist[1], method.StateComputed)
    assert hist[1].state_component == 12
    assert isinstance(hist[2], method.StepCompleted)
Example #33
0
    def make_spectra_knl(self, is_real, rank_shape):
        from pymbolic import var, parse
        indices = i, j, k = parse("i, j, k")
        momenta = [var("momenta_"+xx) for xx in ("x", "y", "z")]
        ksq = sum((dk_i * mom[ii])**2
                  for mom, dk_i, ii in zip(momenta, self.dk, indices))
        kmag = var("sqrt")(ksq)
        bin_expr = var("round")(kmag / self.bin_width)

        if is_real:
            from pymbolic.primitives import If, Comparison, LogicalAnd
            nyq = self.grid_shape[-1] / 2
            condition = LogicalAnd((Comparison(momenta[2][k], ">", 0),
                                    Comparison(momenta[2][k], "<", nyq)))
            count = If(condition, 2, 1)
        else:
            count = 1

        fk = var("fk")[i, j, k]
        weight_expr = count * kmag**(var("k_power")) * var("abs")(fk)**2

        histograms = {"spectrum": (bin_expr, weight_expr)}

        args = [
            lp.GlobalArg("fk", self.cdtype, shape=("Nx", "Ny", "Nz"),
                         offset=lp.auto),
            lp.GlobalArg("momenta_x", self.rdtype, shape=("Nx",)),
            lp.GlobalArg("momenta_y", self.rdtype, shape=("Ny",)),
            lp.GlobalArg("momenta_z", self.rdtype, shape=("Nz",)),
            lp.ValueArg("k_power", self.rdtype),
            ...
        ]

        from pystella.histogram import Histogrammer
        return Histogrammer(self.decomp, histograms, self.num_bins,
                            self.rdtype, args=args, rank_shape=rank_shape)
Example #34
0
    def __init__(self, dim=None, yukawa_lambda_name="lam"):
        """
        :arg yukawa_lambda_name: The argument name to use for the Yukawa
            parameter when generating functions to evaluate this kernel.
        """
        lam = var(yukawa_lambda_name)

        if dim == 2:
            r = pymbolic_real_norm_2(make_sym_vector("d", dim))

            # http://dlmf.nist.gov/10.27#E8
            expr = var("hankel_1")(0, var("I") * lam * r)
            scaling_for_K0 = 1 / 2 * var("pi") * var("I")  # noqa: N806

            scaling = -1 / (2 * var("pi")) * scaling_for_K0
        else:
            raise RuntimeError("unsupported dimensionality")

        super(YukawaKernel, self).__init__(dim,
                                           expression=expr,
                                           global_scaling_const=scaling,
                                           is_complex_valued=True)

        self.yukawa_lambda_name = yukawa_lambda_name
Example #35
0
def precompute(
        kernel,
        subst_use,
        sweep_inames=[],
        within=None,
        storage_axes=None,
        temporary_name=None,
        precompute_inames=None,
        precompute_outer_inames=None,
        storage_axis_to_tag={},

        # "None" is a valid value here, distinct from the default.
        default_tag=_not_provided,
        dtype=None,
        fetch_bounding_box=False,
        temporary_address_space=None,
        compute_insn_id=None,
        **kwargs):
    """Precompute the expression described in the substitution rule determined by
    *subst_use* and store it in a temporary array. A precomputation needs two
    things to operate, a list of *sweep_inames* (order irrelevant) and an
    ordered list of *storage_axes* (whose order will describe the axis ordering
    of the temporary array).

    :arg subst_use: Describes what to prefetch.

        The following objects may be given for *subst_use*:

        * The name of the substitution rule.

        * The tagged name ("name$tag") of the substitution rule.

        * A list of invocations of the substitution rule.
          This list of invocations, when swept across *sweep_inames*, then serves
          to define the footprint of the precomputation.

          Invocations may be tagged ("name$tag") to filter out a subset of the
          usage sites of the substitution rule. (Namely those usage sites that
          use the same tagged name.)

          Invocations may be given as a string or as a
          :class:`pymbolic.primitives.Expression` object.

          If only one invocation is to be given, then the only entry of the list
          may be given directly.

    If the list of invocations generating the footprint is not given,
    all (tag-matching, if desired) usage sites of the substitution rule
    are used to determine the footprint.

    The following cases can arise for each sweep axis:

    * The axis is an iname that occurs within arguments specified at
      usage sites of the substitution rule. This case is assumed covered
      by the storage axes provided for the argument.

    * The axis is an iname that occurs within the *value* of the rule, but not
      within its arguments. A new, dedicated storage axis is allocated for
      such an axis.

    :arg sweep_inames: A :class:`list` of inames to be swept.
        May also equivalently be a comma-separated string.
    :arg within: a stack match as understood by
        :func:`loopy.match.parse_stack_match`.
    :arg storage_axes: A :class:`list` of inames and/or rule argument
        names/indices to be used as storage axes.
        May also equivalently be a comma-separated string.
    :arg temporary_name:
        The temporary variable name to use for storing the precomputed data.
        If it does not exist, it will be created. If it does exist, its properties
        (such as size, type) are checked (and updated, if possible) to match
        its use.
    :arg precompute_inames:
        A tuple of inames to be used to carry out the precomputation.
        If the specified inames do not already exist, they will be
        created. If they do already exist, their loop domain is verified
        against the one required for this precomputation. This tuple may
        be shorter than the (provided or automatically found) *storage_axes*
        tuple, in which case names will be automatically created.
        May also equivalently be a comma-separated string.

    :arg precompute_outer_inames: A :class:`frozenset` of inames within which
        the compute instruction is nested. If *None*, make an educated guess.
        May also be specified as a comma-separated string.

    :arg default_tag: The :ref:`iname tag <iname-tags>` to be applied to the
        inames created to perform the precomputation. The current default will
        make them local axes and automatically split them to fit the work
        group size, but this default will disappear in favor of simply leaving them
        untagged in 2019. For 2018, a warning will be issued if no *default_tag* is
        specified.

    :arg compute_insn_id: The ID of the instruction generated to perform the
        precomputation.

    If `storage_axes` is not specified, it defaults to the arrangement
    `<direct sweep axes><arguments>` with the direct sweep axes being the
    slower-varying indices.

    Trivial storage axes (i.e. axes of length 1 with respect to the sweep) are
    eliminated.
    """

    # {{{ unify temporary_address_space / temporary_scope

    temporary_scope = kwargs.pop("temporary_scope", None)

    from loopy.kernel.data import AddressSpace
    if temporary_scope is not None:
        from warnings import warn
        warn(
            "temporary_scope is deprecated. Use temporary_address_space instead",
            DeprecationWarning,
            stacklevel=2)

        if temporary_address_space is not None:
            raise LoopyError(
                "may not specify both temporary_address_space and "
                "temporary_scope")

        temporary_address_space = temporary_scope

    del temporary_scope

    # }}}

    if kwargs:
        raise TypeError("unrecognized keyword arguments: %s" %
                        ", ".join(kwargs.keys()))

    # {{{ check, standardize arguments

    if isinstance(sweep_inames, str):
        sweep_inames = [iname.strip() for iname in sweep_inames.split(",")]

    for iname in sweep_inames:
        if iname not in kernel.all_inames():
            raise RuntimeError("sweep iname '%s' is not a known iname" % iname)

    sweep_inames = list(sweep_inames)
    sweep_inames_set = frozenset(sweep_inames)

    if isinstance(storage_axes, str):
        storage_axes = [ax.strip() for ax in storage_axes.split(",")]

    if isinstance(precompute_inames, str):
        precompute_inames = [
            iname.strip() for iname in precompute_inames.split(",")
        ]

    if isinstance(precompute_outer_inames, str):
        precompute_outer_inames = frozenset(
            iname.strip() for iname in precompute_outer_inames.split(","))

    if isinstance(subst_use, str):
        subst_use = [subst_use]

    footprint_generators = None

    subst_name = None
    subst_tag = None

    from pymbolic.primitives import Variable, Call
    from loopy.symbolic import parse, TaggedVariable

    for use in subst_use:
        if isinstance(use, str):
            use = parse(use)

        if isinstance(use, Call):
            if footprint_generators is None:
                footprint_generators = []

            footprint_generators.append(use)
            subst_name_as_expr = use.function
        else:
            subst_name_as_expr = use

        if isinstance(subst_name_as_expr, TaggedVariable):
            new_subst_name = subst_name_as_expr.name
            new_subst_tag = subst_name_as_expr.tag
        elif isinstance(subst_name_as_expr, Variable):
            new_subst_name = subst_name_as_expr.name
            new_subst_tag = None
        else:
            raise ValueError("unexpected type of subst_name")

        if (subst_name, subst_tag) == (None, None):
            subst_name, subst_tag = new_subst_name, new_subst_tag
        else:
            if (subst_name, subst_tag) != (new_subst_name, new_subst_tag):
                raise ValueError("not all uses in subst_use agree "
                                 "on rule name and tag")

    from loopy.match import parse_stack_match
    within = parse_stack_match(within)

    try:
        subst = kernel.substitutions[subst_name]
    except KeyError:
        raise LoopyError("substitution rule '%s' not found" % subst_name)

    c_subst_name = subst_name.replace(".", "_")

    # {{{ handle default_tag

    from loopy.transform.data import _not_provided \
            as transform_data_not_provided

    if default_tag is _not_provided or default_tag is transform_data_not_provided:
        # no need to warn for scalar precomputes
        if sweep_inames:
            from warnings import warn
            warn(
                "Not specifying default_tag is deprecated, and default_tag "
                "will become mandatory in 2019.x. "
                "Pass 'default_tag=\"l.auto\" to match the current default, "
                "or Pass 'default_tag=None to leave the loops untagged, which "
                "is the recommended behavior.",
                DeprecationWarning,
                stacklevel=(

                    # In this case, we came here through add_prefetch. Increase
                    # the stacklevel.
                    3 if default_tag is transform_data_not_provided else 2))

        default_tag = "l.auto"

    from loopy.kernel.data import parse_tag
    default_tag = parse_tag(default_tag)

    # }}}

    # }}}

    # {{{ process invocations in footprint generators, start access_descriptors

    if footprint_generators:
        from pymbolic.primitives import Variable, Call

        access_descriptors = []

        for fpg in footprint_generators:
            if isinstance(fpg, Variable):
                args = ()
            elif isinstance(fpg, Call):
                args = fpg.parameters
            else:
                raise ValueError("footprint generator must "
                                 "be substitution rule invocation")

            access_descriptors.append(
                RuleAccessDescriptor(identifier=access_descriptor_id(
                    args, None),
                                     args=args))

    # }}}

    # {{{ gather up invocations in kernel code, finish access_descriptors

    if not footprint_generators:
        rule_mapping_context = SubstitutionRuleMappingContext(
            kernel.substitutions, kernel.get_var_name_generator())
        invg = RuleInvocationGatherer(rule_mapping_context, kernel, subst_name,
                                      subst_tag, within)
        del rule_mapping_context

        import loopy as lp
        for insn in kernel.instructions:
            if isinstance(insn, lp.MultiAssignmentBase):
                for assignee in insn.assignees:
                    invg(assignee, kernel, insn)
                invg(insn.expression, kernel, insn)

        access_descriptors = invg.access_descriptors
        if not access_descriptors:
            raise RuntimeError("no invocations of '%s' found" % subst_name)

    # }}}

    # {{{ find inames used in arguments

    expanding_usage_arg_deps = set()

    for accdesc in access_descriptors:
        for arg in accdesc.args:
            expanding_usage_arg_deps.update(
                get_dependencies(arg) & kernel.all_inames())

    # }}}

    var_name_gen = kernel.get_var_name_generator()

    # {{{ use given / find new storage_axes

    # extra axes made necessary because they don't occur in the arguments
    extra_storage_axes = set(sweep_inames_set - expanding_usage_arg_deps)

    from loopy.symbolic import SubstitutionRuleExpander
    submap = SubstitutionRuleExpander(kernel.substitutions)

    value_inames = (get_dependencies(submap(subst.expression)) -
                    frozenset(subst.arguments)) & kernel.all_inames()
    if value_inames - expanding_usage_arg_deps < extra_storage_axes:
        raise RuntimeError("unreferenced sweep inames specified: " +
                           ", ".join(extra_storage_axes - value_inames -
                                     expanding_usage_arg_deps))

    new_iname_to_tag = {}

    if storage_axes is None:
        storage_axes = []

        # Add sweep_inames (in given--rather than arbitrary--order) to
        # storage_axes *if* they are part of extra_storage_axes.
        for iname in sweep_inames:
            if iname in extra_storage_axes:
                extra_storage_axes.remove(iname)
                storage_axes.append(iname)

        if extra_storage_axes:
            if (precompute_inames is not None
                    and len(storage_axes) < len(precompute_inames)):
                raise LoopyError(
                    "must specify a sufficient number of "
                    "storage_axes to uniquely determine the meaning "
                    "of the given precompute_inames. (%d storage_axes "
                    "needed)" % len(precompute_inames))
            storage_axes.extend(sorted(extra_storage_axes))

        storage_axes.extend(range(len(subst.arguments)))

    del extra_storage_axes

    prior_storage_axis_name_dict = {}

    storage_axis_names = []
    storage_axis_sources = []  # number for arg#, or iname

    # {{{ check for pre-existing precompute_inames

    if precompute_inames is not None:
        preexisting_precompute_inames = (set(precompute_inames)
                                         & kernel.all_inames())
    else:
        preexisting_precompute_inames = set()

    # }}}

    for i, saxis in enumerate(storage_axes):
        tag_lookup_saxis = saxis

        if saxis in subst.arguments:
            saxis = subst.arguments.index(saxis)

        storage_axis_sources.append(saxis)

        if isinstance(saxis, int):
            # argument index
            name = old_name = subst.arguments[saxis]
        else:
            old_name = saxis
            name = "%s_%s" % (c_subst_name, old_name)

        if (precompute_inames is not None and i < len(precompute_inames)
                and precompute_inames[i]):
            name = precompute_inames[i]
            tag_lookup_saxis = name
            if (name not in preexisting_precompute_inames
                    and var_name_gen.is_name_conflicting(name)):
                raise RuntimeError("new storage axis name '%s' "
                                   "conflicts with existing name" % name)
        else:
            name = var_name_gen(name)

        storage_axis_names.append(name)
        if name not in preexisting_precompute_inames:
            new_iname_to_tag[name] = storage_axis_to_tag.get(
                tag_lookup_saxis, default_tag)

        prior_storage_axis_name_dict[name] = old_name

    del storage_axis_to_tag
    del storage_axes
    del precompute_inames

    # }}}

    # {{{ fill out access_descriptors[...].storage_axis_exprs

    access_descriptors = [
        accdesc.copy(storage_axis_exprs=storage_axis_exprs(
            storage_axis_sources, accdesc.args))
        for accdesc in access_descriptors
    ]

    # }}}

    expanding_inames = sweep_inames_set | frozenset(expanding_usage_arg_deps)
    assert expanding_inames <= kernel.all_inames()

    if storage_axis_names:
        # {{{ find domain to be changed

        change_inames = expanding_inames | preexisting_precompute_inames

        from loopy.kernel.tools import DomainChanger
        domch = DomainChanger(kernel, change_inames)

        if domch.leaf_domain_index is not None:
            # If the sweep inames are at home in parent domains, then we'll add
            # fetches with loops over copies of these parent inames that will end
            # up being scheduled *within* loops over these parents.

            for iname in sweep_inames_set:
                if kernel.get_home_domain_index(
                        iname) != domch.leaf_domain_index:
                    raise RuntimeError(
                        "sweep iname '%s' is not 'at home' in the "
                        "sweep's leaf domain" % iname)

        # }}}

        abm = ArrayToBufferMap(kernel, domch.domain, sweep_inames,
                               access_descriptors, len(storage_axis_names))

        non1_storage_axis_names = []
        for i, saxis in enumerate(storage_axis_names):
            if abm.non1_storage_axis_flags[i]:
                non1_storage_axis_names.append(saxis)
            else:
                del new_iname_to_tag[saxis]

                if saxis in preexisting_precompute_inames:
                    raise LoopyError(
                        "precompute axis %d (1-based) was "
                        "eliminated as "
                        "having length 1 but also mapped to existing "
                        "iname '%s'" % (i + 1, saxis))

        mod_domain = domch.domain

        # {{{ modify the domain, taking into account preexisting inames

        # inames may already exist in mod_domain, add them primed to start
        primed_non1_saxis_names = [
            iname + "'" for iname in non1_storage_axis_names
        ]

        mod_domain = abm.augment_domain_with_sweep(
            domch.domain,
            primed_non1_saxis_names,
            boxify_sweep=fetch_bounding_box)

        check_domain = mod_domain

        for i, saxis in enumerate(non1_storage_axis_names):
            var_dict = mod_domain.get_var_dict(isl.dim_type.set)

            if saxis in preexisting_precompute_inames:
                # add equality constraint between existing and new variable

                dt, dim_idx = var_dict[saxis]
                saxis_aff = isl.Aff.var_on_domain(mod_domain.space, dt,
                                                  dim_idx)

                dt, dim_idx = var_dict[primed_non1_saxis_names[i]]
                new_var_aff = isl.Aff.var_on_domain(mod_domain.space, dt,
                                                    dim_idx)

                mod_domain = mod_domain.add_constraint(
                    isl.Constraint.equality_from_aff(new_var_aff - saxis_aff))

                # project out the new one
                mod_domain = mod_domain.project_out(dt, dim_idx, 1)

            else:
                # remove the prime from the new variable
                dt, dim_idx = var_dict[primed_non1_saxis_names[i]]
                mod_domain = mod_domain.set_dim_name(dt, dim_idx, saxis)

        def add_assumptions(d):
            assumption_non_param = isl.BasicSet.from_params(kernel.assumptions)
            assumptions, domain = isl.align_two(assumption_non_param, d)
            return assumptions & domain

        # {{{ check that we got the desired domain

        check_domain = add_assumptions(
            check_domain.project_out_except(primed_non1_saxis_names,
                                            [isl.dim_type.set]))

        mod_check_domain = add_assumptions(mod_domain)

        # re-add the prime from the new variable
        var_dict = mod_check_domain.get_var_dict(isl.dim_type.set)

        for saxis in non1_storage_axis_names:
            dt, dim_idx = var_dict[saxis]
            mod_check_domain = mod_check_domain.set_dim_name(
                dt, dim_idx, saxis + "'")

        mod_check_domain = mod_check_domain.project_out_except(
            primed_non1_saxis_names, [isl.dim_type.set])

        mod_check_domain, check_domain = isl.align_two(mod_check_domain,
                                                       check_domain)

        # The modified domain can't get bigger by adding constraints
        assert mod_check_domain <= check_domain

        if not check_domain <= mod_check_domain:
            print(check_domain)
            print(mod_check_domain)
            raise LoopyError("domain of preexisting inames does not match "
                             "domain needed for precompute")

        # }}}

        # {{{ check that we didn't shrink the original domain

        # project out the new names from the modified domain
        orig_domain_inames = list(domch.domain.get_var_dict(isl.dim_type.set))
        mod_check_domain = add_assumptions(
            mod_domain.project_out_except(orig_domain_inames,
                                          [isl.dim_type.set]))

        check_domain = add_assumptions(domch.domain)

        mod_check_domain, check_domain = isl.align_two(mod_check_domain,
                                                       check_domain)

        # The modified domain can't get bigger by adding constraints
        assert mod_check_domain <= check_domain

        if not check_domain <= mod_check_domain:
            print(check_domain)
            print(mod_check_domain)
            raise LoopyError(
                "original domain got shrunk by applying the precompute")

        # }}}

        # }}}

        new_kernel_domains = domch.get_domains_with(mod_domain)

    else:
        # leave kernel domains unchanged
        new_kernel_domains = kernel.domains

        non1_storage_axis_names = []
        abm = NoOpArrayToBufferMap()

    kernel = kernel.copy(domains=new_kernel_domains)

    # {{{ set up compute insn

    if temporary_name is None:
        temporary_name = var_name_gen(based_on=c_subst_name)

    assignee = var(temporary_name)

    if non1_storage_axis_names:
        assignee = assignee[tuple(
            var(iname) for iname in non1_storage_axis_names)]

    # {{{ process substitutions on compute instruction

    storage_axis_subst_dict = {}

    for arg_name, bi in zip(storage_axis_names, abm.storage_base_indices):
        if arg_name in non1_storage_axis_names:
            arg = var(arg_name)
        else:
            arg = 0

        storage_axis_subst_dict[prior_storage_axis_name_dict.get(
            arg_name, arg_name)] = arg + bi

    rule_mapping_context = SubstitutionRuleMappingContext(
        kernel.substitutions, kernel.get_var_name_generator())

    from loopy.match import parse_stack_match
    expr_subst_map = RuleAwareSubstitutionMapper(
        rule_mapping_context,
        make_subst_func(storage_axis_subst_dict),
        within=parse_stack_match(None))

    compute_expression = expr_subst_map(subst.expression, kernel, None)

    # }}}

    from loopy.kernel.data import Assignment
    if compute_insn_id is None:
        compute_insn_id = kernel.make_unique_instruction_id(
            based_on=c_subst_name)

    compute_insn = Assignment(
        id=compute_insn_id,
        assignee=assignee,
        expression=compute_expression,
        # within_inames determined below
    )
    compute_dep_id = compute_insn_id
    added_compute_insns = [compute_insn]

    if temporary_address_space == AddressSpace.GLOBAL:
        barrier_insn_id = kernel.make_unique_instruction_id(
            based_on=c_subst_name + "_barrier")
        from loopy.kernel.instruction import BarrierInstruction
        barrier_insn = BarrierInstruction(id=barrier_insn_id,
                                          depends_on=frozenset(
                                              [compute_insn_id]),
                                          synchronization_kind="global",
                                          mem_kind="global")
        compute_dep_id = barrier_insn_id

        added_compute_insns.append(barrier_insn)

    # }}}

    # {{{ substitute rule into expressions in kernel (if within footprint)

    from loopy.symbolic import SubstitutionRuleExpander
    expander = SubstitutionRuleExpander(kernel.substitutions)

    invr = RuleInvocationReplacer(rule_mapping_context,
                                  subst_name,
                                  subst_tag,
                                  within,
                                  access_descriptors,
                                  abm,
                                  storage_axis_names,
                                  storage_axis_sources,
                                  non1_storage_axis_names,
                                  temporary_name,
                                  compute_insn_id,
                                  compute_dep_id,
                                  compute_read_variables=get_dependencies(
                                      expander(compute_expression)))

    kernel = invr.map_kernel(kernel)
    kernel = kernel.copy(instructions=added_compute_insns +
                         kernel.instructions)
    kernel = rule_mapping_context.finish_kernel(kernel)

    # }}}

    # {{{ add dependencies to compute insn

    kernel = kernel.copy(instructions=[
        insn.copy(depends_on=frozenset(invr.compute_insn_depends_on)) if insn.
        id == compute_insn_id else insn for insn in kernel.instructions
    ])

    # }}}

    # {{{ propagate storage iname subst to dependencies of compute instructions

    from loopy.kernel.tools import find_recursive_dependencies
    compute_deps = find_recursive_dependencies(kernel,
                                               frozenset([compute_insn_id]))

    # FIXME: Need to verify that there are no outside dependencies
    # on compute_deps

    prior_storage_axis_names = frozenset(storage_axis_subst_dict)

    new_insns = []
    for insn in kernel.instructions:
        if (insn.id in compute_deps
                and insn.within_inames & prior_storage_axis_names):
            insn = (insn.with_transformed_expressions(
                lambda expr: expr_subst_map(expr, kernel, insn)).copy(
                    within_inames=frozenset(
                        storage_axis_subst_dict.get(iname, var(iname)).name
                        for iname in insn.within_inames)))

            new_insns.append(insn)
        else:
            new_insns.append(insn)

    kernel = kernel.copy(instructions=new_insns)

    # }}}

    # {{{ determine inames for compute insn

    if precompute_outer_inames is None:
        from loopy.kernel.tools import guess_iname_deps_based_on_var_use
        precompute_outer_inames = (
            frozenset(non1_storage_axis_names)
            | frozenset((expanding_usage_arg_deps | value_inames) -
                        sweep_inames_set)
            | guess_iname_deps_based_on_var_use(kernel, compute_insn))
    else:
        if not isinstance(precompute_outer_inames, frozenset):
            raise TypeError("precompute_outer_inames must be a frozenset")

        precompute_outer_inames = precompute_outer_inames \
                | frozenset(non1_storage_axis_names)

    kernel = kernel.copy(instructions=[
        insn.copy(within_inames=precompute_outer_inames) if insn.id ==
        compute_insn_id else insn for insn in kernel.instructions
    ])

    # }}}

    # {{{ set up temp variable

    import loopy as lp
    if dtype is not None:
        dtype = np.dtype(dtype)

    if temporary_address_space is None:
        temporary_address_space = lp.auto

    new_temp_shape = tuple(abm.non1_storage_shape)

    new_temporary_variables = kernel.temporary_variables.copy()
    if temporary_name not in new_temporary_variables:
        temp_var = lp.TemporaryVariable(
            name=temporary_name,
            dtype=dtype,
            base_indices=(0, ) * len(new_temp_shape),
            shape=tuple(abm.non1_storage_shape),
            address_space=temporary_address_space,
            dim_names=tuple(non1_storage_axis_names))

    else:
        temp_var = new_temporary_variables[temporary_name]

        # {{{ check and adapt existing temporary

        if temp_var.dtype is lp.auto:
            pass
        elif temp_var.dtype is not lp.auto and dtype is lp.auto:
            dtype = temp_var.dtype
        elif temp_var.dtype is not lp.auto and dtype is not lp.auto:
            if temp_var.dtype != dtype:
                raise LoopyError("Existing and new dtype of temporary '%s' "
                                 "do not match (existing: %s, new: %s)" %
                                 (temporary_name, temp_var.dtype, dtype))

        temp_var = temp_var.copy(dtype=dtype)

        if len(temp_var.shape) != len(new_temp_shape):
            raise LoopyError(
                "Existing and new temporary '%s' do not "
                "have matching number of dimensions ('%d' vs. '%d') " %
                (temporary_name, len(temp_var.shape), len(new_temp_shape)))

        if temp_var.base_indices != (0, ) * len(new_temp_shape):
            raise LoopyError(
                "Existing and new temporary '%s' do not "
                "have matching number of dimensions ('%d' vs. '%d') " %
                (temporary_name, len(temp_var.shape), len(new_temp_shape)))

        new_temp_shape = tuple(
            max(i, ex_i) for i, ex_i in zip(new_temp_shape, temp_var.shape))

        temp_var = temp_var.copy(shape=new_temp_shape)

        if temporary_address_space == temp_var.address_space:
            pass
        elif temporary_address_space is lp.auto:
            temporary_address_space = temp_var.address_space
        elif temp_var.address_space is lp.auto:
            pass
        else:
            raise LoopyError("Existing and new temporary '%s' do not "
                             "have matching scopes (existing: %s, new: %s)" %
                             (temporary_name,
                              AddressSpace.stringify(temp_var.address_space),
                              AddressSpace.stringify(temporary_address_space)))

        temp_var = temp_var.copy(address_space=temporary_address_space)

        # }}}

    new_temporary_variables[temporary_name] = temp_var

    kernel = kernel.copy(temporary_variables=new_temporary_variables)

    # }}}

    from loopy import tag_inames
    kernel = tag_inames(kernel, new_iname_to_tag)

    from loopy.kernel.data import AutoFitLocalIndexTag, filter_iname_tags_by_type

    if filter_iname_tags_by_type(new_iname_to_tag.values(),
                                 AutoFitLocalIndexTag):
        from loopy.kernel.tools import assign_automatic_axes
        kernel = assign_automatic_axes(kernel)

    return kernel
Example #36
0
def test_expand():
    from pymbolic import var, expand

    x = var("x")
    u = (x + 1)**5
    expand(u)
Example #37
0
def test_tim3d(ctx_factory):
    dtype = np.float32
    ctx = ctx_factory()
    order = "C"

    n = 8

    from pymbolic import var
    K_sym = var("K")

    field_shape = (K_sym, n, n, n)

    # K - run-time symbolic
    knl = lp.make_kernel(
        ctx.devices[0],
        "[K] -> {[i,j,k,e,m,o,gi]: 0<=i,j,k,m,o<%d and 0<=e<K and 0<=gi<6}" %
        n,
        [
            "ur(a,b,c) := sum_float32(@o, D[a,o]*u[e,o,b,c])",
            "us(a,b,c) := sum_float32(@o, D[b,o]*u[e,a,o,c])",
            "ut(a,b,c) := sum_float32(@o, D[c,o]*u[e,a,b,o])",
            "lap[e,i,j,k]  = "
            "   sum_float32(m, D[m,i]*(G[0,e,m,j,k]*ur(m,j,k) + G[1,e,m,j,k]*us(m,j,k) + G[2,e,m,j,k]*ut(m,j,k)))"
            " + sum_float32(m, D[m,j]*(G[1,e,i,m,k]*ur(i,m,k) + G[3,e,i,m,k]*us(i,m,k) + G[4,e,i,m,k]*ut(i,m,k)))"
            " + sum_float32(m, D[m,k]*(G[2,e,i,j,m]*ur(i,j,m) + G[4,e,i,j,m]*us(i,j,m) + G[5,e,i,j,m]*ut(i,j,m)))"
        ],
        [
            lp.ArrayArg("u", dtype, shape=field_shape, order=order),
            lp.ArrayArg("lap", dtype, shape=field_shape, order=order),
            lp.ArrayArg("G", dtype, shape=(6, ) + field_shape, order=order),
            #            lp.ConstantArrayArg("D", dtype, shape=(n, n), order=order),
            lp.ArrayArg("D", dtype, shape=(n, n), order=order),
            #            lp.ImageArg("D", dtype, shape=(n, n)),
            lp.ValueArg("K", np.int32, approximately=1000),
        ],
        name="semlap3D",
        assumptions="K>=1")

    seq_knl = knl
    knl = lp.add_prefetch(knl, "D", ["m", "j", "i", "k", "o"])
    knl = lp.add_prefetch(knl, "u", ["i", "j", "o", "k"])
    knl = lp.precompute(knl, "ur", np.float32, ["a", "b", "c"])
    knl = lp.precompute(knl, "us", np.float32, ["a", "b", "c"])
    knl = lp.precompute(knl, "ut", np.float32, ["a", "b", "c"])
    knl = lp.split_iname(knl, "e", 1, outer_tag="g.0")  #, slabs=(0, 1))
    knl = lp.split_iname(knl, "k", n, inner_tag="l.2")  #, slabs=(0, 1))
    knl = lp.split_iname(knl, "j", n, inner_tag="l.1")  #, slabs=(0, 1))
    knl = lp.split_iname(knl, "i", n, inner_tag="l.0")  #, slabs=(0, 1))

    #    knl = lp.tag_inames(knl, dict(k_nner="unr"))

    knl = lp.tag_inames(knl, dict(o="unr"))
    knl = lp.tag_inames(knl, dict(m="unr"))
    #    knl = lp.tag_inames(knl, dict(i="unr"))

    knl = lp.add_prefetch(knl, "G", [2, 3, 4])  # axis/argument indices on G

    kernel_gen = lp.generate_loop_schedules(knl)
    kernel_gen = lp.check_kernels(kernel_gen, dict(K=1000))

    K = 4000
    lp.auto_test_vs_ref(seq_knl,
                        ctx,
                        kernel_gen,
                        op_count=K * ((n**4) * 3 * 2 + (n**3) * 5 * 3 +
                                      (n**4) * 3 * 2) / 1e9,
                        op_label="GFlops",
                        parameters={"K": K})
Example #38
0
    def __init__(self,
                 iname_exprs,
                 code,
                 read_variables=frozenset(),
                 assignees=tuple(),
                 id=None,
                 depends_on=None,
                 depends_on_is_final=None,
                 groups=None,
                 conflicts_with_groups=None,
                 no_sync_with=None,
                 within_inames_is_final=None,
                 within_inames=None,
                 priority=0,
                 boostable=None,
                 boostable_into=None,
                 predicates=frozenset(),
                 tags=None,
                 insn_deps=None,
                 insn_deps_is_final=None):
        """
        :arg iname_exprs: Like :attr:`iname_exprs`, but instead of tuples,
            simple strings pepresenting inames are also allowed. A single
            string is also allowed, which should consists of comma-separated
            inames.
        :arg assignees: Like :attr:`assignees`, but may also be a
            semicolon-separated string of such expressions or a
            sequence of strings parseable into the desired format.
        """

        InstructionBase.__init__(self,
                                 id=id,
                                 depends_on=depends_on,
                                 depends_on_is_final=depends_on_is_final,
                                 groups=groups,
                                 conflicts_with_groups=conflicts_with_groups,
                                 no_sync_with=no_sync_with,
                                 within_inames_is_final=within_inames_is_final,
                                 within_inames=within_inames,
                                 boostable=boostable,
                                 boostable_into=boostable_into,
                                 priority=priority,
                                 predicates=predicates,
                                 tags=tags,
                                 insn_deps=insn_deps,
                                 insn_deps_is_final=insn_deps_is_final)

        # {{{ normalize iname_exprs

        if isinstance(iname_exprs, str):
            iname_exprs = [i.strip() for i in iname_exprs.split(",")]
            iname_exprs = [i for i in iname_exprs if i]

        from pymbolic import var
        new_iname_exprs = []
        for i in iname_exprs:
            if isinstance(i, str):
                new_iname_exprs.append((i, var(i)))
            else:
                new_iname_exprs.append(i)

        # }}}

        # {{{ normalize assignees

        if isinstance(assignees, str):
            assignees = [i.strip() for i in assignees.split(";")]
            assignees = [i for i in assignees if i]

        new_assignees = []
        from loopy.symbolic import parse
        for i in assignees:
            if isinstance(i, str):
                new_assignees.append(parse(i))
            else:
                new_assignees.append(i)
        # }}}

        self.iname_exprs = new_iname_exprs
        from loopy.tools import remove_common_indentation
        self.code = remove_common_indentation(code)
        self.read_variables = read_variables
        self.assignees = new_assignees
Example #39
0
 def test_euler_dt_var(self):
     self._test_scheme(EulerStep(pm.var('dt')))
Example #40
0
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
"""

from pymbolic import parse, var
from pymbolic.mapper.dependency import DependencyMapper

x = var("x")
y = var("y")

expr2 = 3 * x + 5 - y
expr = parse("3*x+5-y")

print(expr)
print(expr2)

dm = DependencyMapper()
print(dm(expr))
Example #41
0
    def __init__(self, fft, dk):
        self.fft = fft
        grid_size = fft.grid_shape[0] * fft.grid_shape[1] * fft.grid_shape[2]

        queue = self.fft.sub_k["momenta_x"].queue
        sub_k = list(x.get().astype("int") for x in self.fft.sub_k.values())
        k_names = ("k_x", "k_y", "k_z")
        self.momenta = {}
        for mu, (name, kk) in enumerate(zip(k_names, sub_k)):
            kk_mu = dk[mu] * kk.astype(fft.rdtype)
            self.momenta[name + "_2"] = cla.to_device(queue, kk_mu)

            # zero Nyquist mode for first derivatives
            kk_mu[abs(sub_k[mu]) == fft.grid_shape[mu] // 2] = 0.
            kk_mu[sub_k[mu] == 0] = 0.
            self.momenta[name + "_1"] = cla.to_device(queue, kk_mu)

        args = [
            lp.GlobalArg("fk", shape="(Nx, Ny, Nz)"),
            lp.GlobalArg("k_x_1, k_x_2", fft.rdtype, shape=("Nx", )),
            lp.GlobalArg("k_y_1, k_y_2", fft.rdtype, shape=("Ny", )),
            lp.GlobalArg("k_z_1, k_z_2", fft.rdtype, shape=("Nz", )),
        ]

        from pystella.field import Field
        fk = Field("fk")
        pd = tuple(Field(pdi) for pdi in ("pdx_k", "pdy_k", "pdz_k"))

        indices = fk.indices

        from pymbolic import var
        mom_vars = tuple(var(name + "_1") for name in k_names)

        fk_tmp = var("fk_tmp")
        tmp_insns = [(fk_tmp, fk * (1 / grid_size))]

        pdx, pdy, pdz = ({
            pdi: kk_i[indices[i]] * 1j * fk_tmp
        } for i, (pdi, kk_i) in enumerate(zip(pd, mom_vars)))

        pdx_incr, pdy_incr, pdz_incr = ({
            Field("div"):
            Field("div") + kk_i[indices[i]] * 1j * fk_tmp
        } for i, kk_i in enumerate(mom_vars))

        mom_vars = tuple(var(name + "_2") for name in k_names)
        kmag_sq = sum(kk_i[x_i]**2 for kk_i, x_i in zip(mom_vars, indices))
        lap = {Field("lap_k"): -kmag_sq * fk_tmp}

        from pystella.elementwise import ElementWiseMap
        common_args = dict(halo_shape=0,
                           args=args,
                           lsize=(16, 2, 1),
                           tmp_instructions=tmp_insns,
                           options=lp.Options(return_dict=True))
        self.pdx_knl = ElementWiseMap(pdx, **common_args)
        self.pdy_knl = ElementWiseMap(pdy, **common_args)
        self.pdz_knl = ElementWiseMap(pdz, **common_args)
        self.pdx_incr_knl = ElementWiseMap(pdx_incr, **common_args)
        self.pdy_incr_knl = ElementWiseMap(pdy_incr, **common_args)
        self.pdz_incr_knl = ElementWiseMap(pdz_incr, **common_args)
        self.lap_knl = ElementWiseMap(lap, **common_args)

        common_args["lsize"] = (16, 1, 1)
        self.grad_knl = ElementWiseMap({**pdx, **pdy, **pdz}, **common_args)
        self.grad_lap_knl = ElementWiseMap({
            **pdx,
            **pdy,
            **pdz,
            **lap
        }, **common_args)
Example #42
0
def RestrictionBase(coefs, StencilKernel, halo_shape, **kwargs):
    """
    A base function for generating a restriction kernel.

    :arg coefs: The coefficients representing the restriction formula.
        Follows the convention of :func:`pystella.derivs.centered_diff`
        (since the restriction is applied recursively in each dimension).

    :arg StencilKernel: The stencil mapper to create an instance of.
        Defaults to :class:`~pystella.Stencil`.

    :arg halo_shape: The number of halo layers on (both sides of) each axis of
        the computational grid.
        Currently must be an :class:`int`.

    :arg lsize: The shape of prefetched arrays in shared memory.
        See :class:`~pystella.ElementWiseMap`.
        Defaults to ``(4, 4, 4)``.

    :arg correct: A :class:`bool` determining whether to produce a kernel which
        corrects an output array by the restricted array, or to only perform
        strict restriction.
        Defaults to *False*.

    :returns: An instance of ``StencilKernel`` which executes the requested
        restriction.
    """

    lsize = kwargs.pop("lsize", (4, 4, 4))

    # ensure grid dimensions are *not* passed, as they will be misinterpreted
    for N in ["Nx", "Ny", "Nz"]:
        _ = kwargs.pop(N, None)

    restrict_coefs = {}
    for a, c_a in coefs.items():
        for b, c_b in coefs.items():
            for c, c_c in coefs.items():
                restrict_coefs[(a, b, c)] = c_a * c_b * c_c

    from pymbolic import parse, var
    i, j, k = parse("i, j, k")
    f1 = Field("f1", offset="h", indices=(2 * i, 2 * j, 2 * k))
    f2 = Field("f2", offset="h")
    tmp = var("tmp")

    tmp_dict = {tmp: expand_stencil(f1, restrict_coefs)}

    if kwargs.pop("correct", False):
        restrict_dict = {f2: f2 - tmp}
    else:
        restrict_dict = {f2: tmp}

    args = [
        lp.GlobalArg("f1", shape="(2*Nx+2*h, 2*Ny+2*h, 2*Nz+2*h)"),
        lp.GlobalArg("f2", shape="(Nx+2*h, Ny+2*h, Nz+2*h)")
    ]

    if isinstance(StencilKernel, Stencil):
        return StencilKernel(restrict_dict,
                             tmp_instructions=tmp_dict,
                             args=args,
                             prefetch_args=["f1"],
                             halo_shape=halo_shape,
                             lsize=lsize,
                             **kwargs)
    else:
        return StencilKernel(restrict_dict,
                             tmp_instructions=tmp_dict,
                             args=args,
                             halo_shape=halo_shape,
                             lsize=lsize,
                             **kwargs)
Example #43
0
def InterpolationBase(even_coefs, odd_coefs, StencilKernel, halo_shape,
                      **kwargs):
    """
    A base function for generating a restriction kernel.

    :arg even_coefs: The coefficients representing the interpolation formula
        for gridpoints on the coarse and fine grid which coincide in space.
        Follows the convention of :func:`pystella.derivs.centered_diff`
        (since the restriction is applied recursively in each dimension).

    :arg odd_coefs: Same as ``even_coefs``, but for points on the fine grid which
        lie between points on the coarse grid.

    :arg StencilKernel: The stencil mapper to create an instance of.
        Defaults to :class:`~pystella.Stencil`.

    :arg halo_shape: The number of halo layers on (both sides of) each axis of
        the computational grid.
        Currently must be an :class:`int`.

    :arg correct: A :class:`bool` determining whether to produce a kernel which
        corrects an output array by the interpolated array, or to only perform
        strict interpolation.
        Defaults to *False*.

    :returns: An instance of ``StencilKernel`` which executes the requested
        interpolation.
    """

    from pymbolic import parse, var
    i, j, k = parse("i, j, k")
    f1 = Field("f1", offset="h")

    tmp_insns = {}
    tmp = var("tmp")

    import itertools
    for parity in tuple(itertools.product((0, 1), (0, 1), (0, 1))):
        result = 0
        for a, c_a in odd_coefs.items() if parity[0] else even_coefs.items():
            for b, c_b in odd_coefs.items() if parity[1] else even_coefs.items(
            ):
                for c, c_c in odd_coefs.items(
                ) if parity[2] else even_coefs.items():
                    f2 = Field("f2",
                               offset="h",
                               indices=((i + a) // 2, (j + b) // 2,
                                        (k + c) // 2))
                    result += c_a * c_b * c_c * f2

        tmp_insns[tmp[parity]] = result

    from pymbolic.primitives import Remainder
    a, b, c = (Remainder(ind, 2) for ind in (i, j, k))

    if kwargs.pop("correct", False):
        interp_dict = {f1: f1 + tmp[a, b, c]}
    else:
        interp_dict = {f1: tmp[a, b, c]}

    args = [
        lp.GlobalArg("f1", shape="(Nx+2*h, Ny+2*h, Nz+2*h)"),
        lp.GlobalArg("f2", shape="(Nx//2+2*h, Ny//2+2*h, Nz//2+2*h)")
    ]

    return StencilKernel(interp_dict,
                         tmp_instructions=tmp_insns,
                         args=args,
                         prefetch_args=["f2"],
                         halo_shape=halo_shape,
                         **kwargs)
Example #44
0
def get_access_info(target, ary, index, eval_expr, vectorization_info):
    """
    :arg ary: an object of type :class:`ArrayBase`
    :arg index: a tuple of indices representing a subscript into ary
    :arg vectorization_info: an instance of :class:`loopy.codegen.VectorizationInfo`,
        or *None*.
    """

    import loopy as lp
    from pymbolic import var

    def eval_expr_assert_integer_constant(i, expr):
        from pymbolic.mapper.evaluator import UnknownVariableError
        try:
            result = eval_expr(expr)
        except UnknownVariableError as e:
            raise LoopyError(
                "When trying to index the array '%s' along axis "
                "%d (tagged '%s'), the index was not a compile-time "
                "constant (but it has to be in order for code to be "
                "generated). You likely want to unroll the iname(s) '%s'." %
                (ary.name, i, ary.dim_tags[i], str(e)))

        if not is_integer(result):
            raise LoopyError("subscript '%s[%s]' has non-constant "
                             "index for separate-array axis %d (0-based)" %
                             (ary.name, index, i))

        return result

    def apply_offset(sub):
        import loopy as lp

        if ary.offset:
            if ary.offset is lp.auto:
                return var(array_name + "_offset") + sub
            elif isinstance(ary.offset, str):
                return var(ary.offset) + sub
            else:
                # assume it's an expression
                return ary.offset + sub
        else:
            return sub

    if not isinstance(index, tuple):
        index = (index, )

    array_name = ary.name

    if ary.dim_tags is None:
        if len(index) != 1:
            raise LoopyError(
                "Array '%s' has no known axis implementation "
                "tags and therefore only supports one-dimensional "
                "indexing. (Did you mean 'shape=loopy.auto' instead of "
                "'shape=None'?)" % ary.name)

        return AccessInfo(array_name=array_name,
                          subscripts=(apply_offset(index[0]), ),
                          vector_index=None)

    if len(ary.dim_tags) != len(index):
        raise LoopyError("subscript to '%s[%s]' has the wrong "
                         "number of indices (got: %d, expected: %d)" %
                         (ary.name, index, len(index), len(ary.dim_tags)))

    num_target_axes = ary.num_target_axes()

    vector_index = None
    subscripts = [0] * num_target_axes

    vector_size = ary.vector_size(target)

    # {{{ process separate-array dim tags first, to find array name

    for i, (idx, dim_tag) in enumerate(zip(index, ary.dim_tags)):
        if isinstance(dim_tag, SeparateArrayArrayDimTag):
            idx = eval_expr_assert_integer_constant(i, idx)
            array_name += "_s%d" % idx

    # }}}

    # {{{ process remaining dim tags

    for i, (idx, dim_tag) in enumerate(zip(index, ary.dim_tags)):
        if isinstance(dim_tag, FixedStrideArrayDimTag):
            stride = dim_tag.stride

            if is_integer(stride):
                if not dim_tag.stride % vector_size == 0:
                    raise LoopyError(
                        "array '%s' has axis %d stride of "
                        "%d, which is not divisible by the size of the "
                        "vector (%d)" %
                        (ary.name, i, dim_tag.stride, vector_size))

            elif stride is lp.auto:
                stride = var(array_name + "_stride%d" % i)

            subscripts[dim_tag.target_axis] += (stride // vector_size) * idx

        elif isinstance(dim_tag, SeparateArrayArrayDimTag):
            pass

        elif isinstance(dim_tag, VectorArrayDimTag):
            from pymbolic.primitives import Variable
            if (vectorization_info is not None
                    and isinstance(index[i], Variable)
                    and index[i].name == vectorization_info.iname):
                # We'll do absolutely nothing here, which will result
                # in the vector being returned.
                pass

            else:
                idx = eval_expr_assert_integer_constant(i, idx)

                assert vector_index is None
                vector_index = idx

        else:
            raise LoopyError("unsupported array dim implementation tag '%s' "
                             "in array '%s'" % (dim_tag, ary.name))

    # }}}

    from pymbolic import var
    import loopy as lp
    if ary.offset:
        if num_target_axes > 1:
            raise NotImplementedError("offsets for multiple image axes")

        subscripts[0] = apply_offset(subscripts[0])

    return AccessInfo(array_name=array_name,
                      vector_index=vector_index,
                      subscripts=subscripts)
Example #45
0
        def gen_decls(name_suffix, shape, strides, unvec_shape, unvec_strides,
                      stride_arg_axes, dtype, user_index):
            """
            :arg unvec_shape: shape tuple
                that accounts for :class:`loopy.kernel.array.VectorArrayDimTag`
                in a scalar manner
            :arg unvec_strides: strides tuple
                that accounts for :class:`loopy.kernel.array.VectorArrayDimTag`
                in a scalar manner
            :arg stride_arg_axes: a tuple *(user_axis, impl_axis, unvec_impl_axis)*
            :arg user_index: A tuple representing a (user-facing)
                multi-dimensional subscript. This is filled in with
                concrete integers when known (such as for separate-array
                dim tags), and with *None* where the index won't be
                known until run time.
            """

            if dtype is None:
                dtype = self.dtype

            user_axis = len(user_index)

            num_user_axes = self.num_user_axes(require_answer=False)

            if num_user_axes is None or user_axis >= num_user_axes:
                # {{{ recursion base case

                full_name = self.name + name_suffix

                stride_args = []
                strides = list(strides)
                unvec_strides = list(unvec_strides)

                # generate stride arguments, yielded later to keep array first
                for stride_user_axis, stride_impl_axis, stride_unvec_impl_axis \
                        in stride_arg_axes:
                    stride_name = full_name + "_stride%d" % stride_user_axis

                    from pymbolic import var
                    strides[stride_impl_axis] = \
                            unvec_strides[stride_unvec_impl_axis] = \
                            var(stride_name)

                    stride_args.append(
                        ImplementedDataInfo(
                            target=target,
                            name=stride_name,
                            dtype=index_dtype,
                            arg_class=ValueArg,
                            stride_for_name_and_axis=(full_name,
                                                      stride_impl_axis),
                            is_written=False))

                yield ImplementedDataInfo(target=target,
                                          name=full_name,
                                          base_name=self.name,
                                          arg_class=type(self),
                                          dtype=dtype,
                                          shape=shape,
                                          strides=tuple(strides),
                                          unvec_shape=unvec_shape,
                                          unvec_strides=tuple(unvec_strides),
                                          allows_offset=bool(self.offset),
                                          is_written=is_written)

                import loopy as lp

                if self.offset is lp.auto:
                    offset_name = full_name + "_offset"
                    yield ImplementedDataInfo(target=target,
                                              name=offset_name,
                                              dtype=index_dtype,
                                              arg_class=ValueArg,
                                              offset_for_name=full_name,
                                              is_written=False)

                yield from stride_args

                # }}}

                return

            dim_tag = self.dim_tags[user_axis]

            if isinstance(dim_tag, FixedStrideArrayDimTag):
                if array_shape is None:
                    new_shape_axis = None
                else:
                    new_shape_axis = array_shape[user_axis]

                import loopy as lp
                if dim_tag.stride is lp.auto:
                    new_stride_arg_axes = stride_arg_axes \
                            + ((user_axis, len(strides), len(unvec_strides)),)

                    # repaired above when final array name is known
                    # (and stride argument is created)
                    new_stride_axis = None
                else:
                    new_stride_arg_axes = stride_arg_axes
                    new_stride_axis = dim_tag.stride

                yield from gen_decls(name_suffix, shape + (new_shape_axis, ),
                                     strides + (new_stride_axis, ),
                                     unvec_shape + (new_shape_axis, ),
                                     unvec_strides + (new_stride_axis, ),
                                     new_stride_arg_axes, dtype,
                                     user_index + (None, ))

            elif isinstance(dim_tag, SeparateArrayArrayDimTag):
                shape_i = array_shape[user_axis]
                if not is_integer(shape_i):
                    raise LoopyError("shape of '%s' has non-constant "
                                     "integer axis %d (0-based)" %
                                     (self.name, user_axis))

                for i in range(shape_i):
                    yield from gen_decls(name_suffix + "_s%d" % i, shape,
                                         strides, unvec_shape, unvec_strides,
                                         stride_arg_axes, dtype,
                                         user_index + (i, ))

            elif isinstance(dim_tag, VectorArrayDimTag):
                shape_i = array_shape[user_axis]
                if not is_integer(shape_i):
                    raise LoopyError("shape of '%s' has non-constant "
                                     "integer axis %d (0-based)" %
                                     (self.name, user_axis))

                yield from gen_decls(
                    name_suffix,
                    shape,
                    strides,
                    unvec_shape + (shape_i, ),
                    # vectors always have stride 1
                    unvec_strides + (1, ),
                    stride_arg_axes,
                    target.vector_dtype(dtype, shape_i),
                    user_index + (None, ))

            else:
                raise LoopyError(
                    "unsupported array dim implementation tag '%s' "
                    "in array '%s'" % (dim_tag, self.name))
Example #46
0
 def _indices_for_axis_permutation(self, expr: AxisPermutation) -> SymbolicIndex:
     indices = [None] * expr.ndim
     for from_index, to_index in enumerate(expr.axis_permutation):
         indices[to_index] = var(f"_{from_index}")
     return tuple(indices)
Example #47
0
def test_aff_to_expr_2():
    from loopy.symbolic import aff_to_expr
    x = isl.Aff("[n] -> { [i0] -> [(-i0 + 2*floor((i0)/2))] }")
    from pymbolic import var
    i0 = var("i0")
    assert aff_to_expr(x) == (-1) * i0 + 2 * (i0 // 2)
Example #48
0
    def __init__(self, kernel, domain, sweep_inames, access_descriptors,
                 storage_axis_count):
        self.kernel = kernel
        self.sweep_inames = sweep_inames

        storage_axis_names = self.storage_axis_names = [
            "_loopy_storage_%d" % i for i in range(storage_axis_count)
        ]

        # {{{ duplicate sweep inames

        # The duplication is necessary, otherwise the storage fetch
        # inames remain weirdly tied to the original sweep inames.

        self.primed_sweep_inames = [psin + "'" for psin in sweep_inames]

        from loopy.isl_helpers import duplicate_axes
        dup_sweep_index = domain.space.dim(dim_type.out)
        domain_dup_sweep = duplicate_axes(domain, sweep_inames,
                                          self.primed_sweep_inames)

        self.prime_sweep_inames = SubstitutionMapper(
            make_subst_func({
                sin: var(psin)
                for sin, psin in zip(sweep_inames, self.primed_sweep_inames)
            }))

        # # }}}

        self.stor2sweep = build_global_storage_to_sweep_map(
            kernel, access_descriptors, domain_dup_sweep, dup_sweep_index,
            storage_axis_names, sweep_inames, self.primed_sweep_inames,
            self.prime_sweep_inames)

        storage_base_indices, storage_shape = compute_bounds(
            kernel, domain, self.stor2sweep, self.primed_sweep_inames,
            storage_axis_names)

        # compute augmented domain

        # {{{ filter out unit-length dimensions

        non1_storage_axis_flags = []
        non1_storage_shape = []

        for saxis_len in storage_shape:
            has_length_non1 = saxis_len != 1

            non1_storage_axis_flags.append(has_length_non1)

            if has_length_non1:
                non1_storage_shape.append(saxis_len)

        # }}}

        # {{{ subtract off the base indices
        # add the new, base-0 indices as new in dimensions

        sp = self.stor2sweep.get_space()
        stor_idx = sp.dim(dim_type.out)

        n_stor = storage_axis_count
        nn1_stor = len(non1_storage_shape)

        aug_domain = self.stor2sweep.move_dims(dim_type.out, stor_idx,
                                               dim_type.in_, 0,
                                               n_stor).range()

        # aug_domain space now:
        # [domain](dup_sweep_index)[dup_sweep](stor_idx)[stor_axes']

        aug_domain = aug_domain.insert_dims(dim_type.set, stor_idx, nn1_stor)

        inew = 0
        for i, name in enumerate(storage_axis_names):
            if non1_storage_axis_flags[i]:
                aug_domain = aug_domain.set_dim_name(dim_type.set,
                                                     stor_idx + inew, name)
                inew += 1

        # aug_domain space now:
        # [domain](dup_sweep_index)[dup_sweep](stor_idx)[stor_axes'][n1_stor_axes]

        from loopy.symbolic import aff_from_expr
        for saxis, bi, s in zip(storage_axis_names, storage_base_indices,
                                storage_shape):
            if s != 1:
                cns = isl.Constraint.equality_from_aff(
                    aff_from_expr(aug_domain.get_space(),
                                  var(saxis) - (var(saxis + "'") - bi)))

                aug_domain = aug_domain.add_constraint(cns)

        # }}}

        # eliminate (primed) storage axes with non-zero base indices
        aug_domain = aug_domain.project_out(dim_type.set, stor_idx + nn1_stor,
                                            n_stor)

        # eliminate duplicated sweep_inames
        nsweep = len(sweep_inames)
        aug_domain = aug_domain.project_out(dim_type.set, dup_sweep_index,
                                            nsweep)

        self.non1_storage_axis_flags = non1_storage_axis_flags
        self.aug_domain = aug_domain
        self.storage_base_indices = storage_base_indices
        self.non1_storage_shape = non1_storage_shape
Example #49
0
    def emit_multiple_assignment(self, codegen_state, insn):
        ecm = codegen_state.expression_to_code_mapper

        from pymbolic.primitives import Variable
        from pymbolic.mapper.stringifier import PREC_NONE

        func_id = insn.expression.function
        parameters = insn.expression.parameters

        if isinstance(func_id, Variable):
            func_id = func_id.name

        assignee_var_descriptors = [
            codegen_state.kernel.get_var_descriptor(a)
            for a in insn.assignee_var_names()
        ]

        par_dtypes = tuple(ecm.infer_type(par) for par in parameters)

        mangle_result = codegen_state.kernel.mangle_function(
            func_id, par_dtypes)
        if mangle_result is None:
            raise RuntimeError(
                "function '%s' unknown--"
                "maybe you need to register a function mangler?" % func_id)

        assert mangle_result.arg_dtypes is not None

        from loopy.expression import dtype_to_type_context
        c_parameters = [
            ecm(par, PREC_NONE, dtype_to_type_context(self.target, tgt_dtype),
                tgt_dtype).expr for par, par_dtype, tgt_dtype in zip(
                    parameters, par_dtypes, mangle_result.arg_dtypes)
        ]

        from loopy.codegen import SeenFunction
        codegen_state.seen_functions.add(
            SeenFunction(func_id, mangle_result.target_name,
                         mangle_result.arg_dtypes))

        from pymbolic import var
        for i, (a, tgt_dtype) in enumerate(
                zip(insn.assignees[1:], mangle_result.result_dtypes[1:])):
            if tgt_dtype != ecm.infer_type(a):
                raise LoopyError("type mismatch in %d'th (1-based) left-hand "
                                 "side of instruction '%s'" % (i + 1, insn.id))
            c_parameters.append(
                # TODO Yuck: The "where-at function": &(...)
                var("&")(ecm(a, PREC_NONE,
                             dtype_to_type_context(self.target, tgt_dtype),
                             tgt_dtype).expr))

        from pymbolic import var
        result = var(mangle_result.target_name)(*c_parameters)

        # In case of no assignees, we are done
        if len(mangle_result.result_dtypes) == 0:
            from cgen import ExpressionStatement
            return ExpressionStatement(
                CExpression(self.get_c_expression_to_code_mapper(), result))

        result = ecm.wrap_in_typecast(mangle_result.result_dtypes[0],
                                      assignee_var_descriptors[0].dtype,
                                      result)

        lhs_code = ecm(insn.assignees[0], prec=PREC_NONE, type_context=None)

        from cgen import Assign
        return Assign(
            lhs_code,
            CExpression(self.get_c_expression_to_code_mapper(), result))
Example #50
0
File: data.py Project: shwina/loopy
def add_prefetch(kernel, var_name, sweep_inames=[], dim_arg_names=None,

        # "None" is a valid value here, distinct from the default.
        default_tag=_not_provided,

        rule_name=None,
        temporary_name=None,
        temporary_scope=None, temporary_is_local=None,
        footprint_subscripts=None,
        fetch_bounding_box=False,
        fetch_outer_inames=None):
    """Prefetch all accesses to the variable *var_name*, with all accesses
    being swept through *sweep_inames*.

    :arg var_name: A string, the name of the variable being prefetched.
        This may be a 'tagged variable name' (such as ``field$mytag``
        to restrict the effect of the operation to only variable accesses
        with a matching tag.

        This may also be a subscripted version of the variable, in which
        case this access dictates the footprint that is prefetched,
        e.g. ``A[:,:]`` or ``field[i,j,:,:]``. In this case, accesses
        in the kernel are disregarded.

    :arg sweep_inames: A list of inames, or a comma-separated string of them.
        This routine 'sweeps' all accesses to *var_name* through all allowed
        values of the *sweep_inames* to generate a footprint. All values
        in this footprint are then stored in a temporary variable, and
        the original variable accesses replaced with accesses to this
        temporary.

    :arg dim_arg_names: List of names representing each fetch axis.
        These names show up as inames in the generated fetch code

    :arg default_tag: The :ref:`implementation tag <iname-tags>` to
        assign to the inames driving the prefetch code. Use *None* to
        leave them undefined (to assign them later by hand). The current
        default will make them local axes and automatically split them to
        fit the work group size, but this default will disappear in favor
        of simply leaving them untagged in 2019.x. For 2018.x, a warning
        will be issued if no *default_tag* is specified.

    :arg rule_name: base name of the generated temporary variable.
    :arg temporary_name: The name of the temporary to be used.
    :arg temporary_scope: The :class:`temp_var_scope` to use for the
        temporary.
    :arg temporary_is_local: Deprecated, use *temporary_scope* instead.
    :arg footprint_subscripts: A list of tuples indicating the index (i.e.
        subscript) tuples used to generate the footprint.

        If only one such set of indices is desired, this may also be specified
        directly by putting an index expression into *var_name*. Substitutions
        such as those occurring in dimension splits are recorded and also
        applied to these indices.

    :arg fetch_bounding_box: To fit within :mod:`loopy`'s execution model,
        the 'footprint' of the fetch currently has to be a convex set.
        Sometimes this is not the case, e.g. for a high-order stencil::

              o
              o
            ooooo
              o
              o

        The footprint of the stencil when 'swept' over a base domain
        would look like this, and because of the 'missing corners',
        this set is not convex::

              oooooooooo
              oooooooooo
            oooooooooooooo
            oooooooooooooo
            oooooooooooooo
            oooooooooooooo
              oooooooooo
              oooooooooo

        Passing ``fetch_bounding_box=True`` gives :mod:`loopy` permission
        to instead fetch the 'bounding box' of the footprint, i.e.
        this set in the stencil example::

            OOooooooooooOO
            OOooooooooooOO
            oooooooooooooo
            oooooooooooooo
            oooooooooooooo
            oooooooooooooo
            OOooooooooooOO
            OOooooooooooOO

        Note the added corners marked with "``O``". The resulting footprint is
        guaranteed to be convex.


    :arg fetch_outer_inames: The inames within which the fetch
        instruction is nested. If *None*, make an educated guess.

    This function internally uses :func:`extract_subst` and :func:`precompute`.
    """

    # {{{ fish indexing out of var_name and into footprint_subscripts

    from loopy.symbolic import parse
    parsed_var_name = parse(var_name)

    from pymbolic.primitives import Variable, Subscript
    if isinstance(parsed_var_name, Variable):
        # nothing to see
        pass
    elif isinstance(parsed_var_name, Subscript):
        if footprint_subscripts is not None:
            raise TypeError("if footprint_subscripts is specified, then var_name "
                    "may not contain a subscript")

        assert isinstance(parsed_var_name.aggregate, Variable)
        footprint_subscripts = [parsed_var_name.index]
        parsed_var_name = parsed_var_name.aggregate
    else:
        raise ValueError("var_name must either be a variable name or a subscript")

    # }}}

    # {{{ fish out tag

    from loopy.symbolic import TaggedVariable
    if isinstance(parsed_var_name, TaggedVariable):
        var_name = parsed_var_name.name
        tag = parsed_var_name.tag
    else:
        var_name = parsed_var_name.name
        tag = None

    # }}}

    c_name = var_name
    if tag is not None:
        c_name = c_name + "_" + tag

    var_name_gen = kernel.get_var_name_generator()

    if rule_name is None:
        rule_name = var_name_gen("%s_fetch_rule" % c_name)
    if temporary_name is None:
        temporary_name = var_name_gen("%s_fetch" % c_name)

    arg = kernel.arg_dict[var_name]

    # {{{ make parameter names and unification template

    parameters = []
    for i in range(arg.num_user_axes()):
        based_on = "%s_dim_%d" % (c_name, i)
        if arg.dim_names is not None:
            based_on = "%s_dim_%s" % (c_name, arg.dim_names[i])
        if dim_arg_names is not None and i < len(dim_arg_names):
            based_on = dim_arg_names[i]

        par_name = var_name_gen(based_on=based_on)
        parameters.append(par_name)

    from pymbolic import var
    uni_template = parsed_var_name
    if len(parameters) > 1:
        uni_template = uni_template.index(
                tuple(var(par_name) for par_name in parameters))
    elif len(parameters) == 1:
        uni_template = uni_template.index(var(parameters[0]))

    # }}}

    from loopy.transform.subst import extract_subst
    kernel = extract_subst(kernel, rule_name, uni_template, parameters)

    if isinstance(sweep_inames, str):
        sweep_inames = [s.strip() for s in sweep_inames.split(",")]
    else:
        # copy, standardize to list
        sweep_inames = list(sweep_inames)

    kernel, subst_use, sweep_inames, inames_to_be_removed = \
            _process_footprint_subscripts(
                    kernel,  rule_name, sweep_inames,
                    footprint_subscripts, arg)

    # Our _not_provided is actually a different object from the one in the
    # precompute module, but precompute acutally uses that to adjust its
    # warning message.

    from loopy.transform.precompute import precompute
    new_kernel = precompute(kernel, subst_use, sweep_inames,
            precompute_inames=dim_arg_names,
            default_tag=default_tag, dtype=arg.dtype,
            fetch_bounding_box=fetch_bounding_box,
            temporary_name=temporary_name,
            temporary_scope=temporary_scope, temporary_is_local=temporary_is_local,
            precompute_outer_inames=fetch_outer_inames)

    # {{{ remove inames that were temporarily added by slice sweeps

    new_domains = new_kernel.domains[:]

    for iname in inames_to_be_removed:
        home_domain_index = kernel.get_home_domain_index(iname)
        domain = new_domains[home_domain_index]

        dt, idx = domain.get_var_dict()[iname]
        assert dt == dim_type.set

        new_domains[home_domain_index] = domain.project_out(dt, idx, 1)

    new_kernel = new_kernel.copy(domains=new_domains)

    # }}}

    # If the rule survived past precompute() (i.e. some accesses fell outside
    # the footprint), get rid of it before moving on.
    if rule_name in new_kernel.substitutions:
        from loopy.transform.subst import expand_subst
        return expand_subst(new_kernel, "... > id:"+rule_name)
    else:
        return new_kernel
Example #51
0
 def rhs_sym(t, y):
     return var("lambda") * y
Example #52
0
def get_IfThenElse_test_code_and_expected_result():
    from dagrt.expression import IfThenElse

    with CodeBuilder(name="primary") as cb:
        cb(var("c1"), IfThenElse(True, 0, 1))
        cb(var("c2"), IfThenElse(False, 0, 1))
        cb(var("c3"), IfThenElse(IfThenElse(True, True, False), 0, 1))
        cb(var("c4"), IfThenElse(IfThenElse(False, True, False), 0, 1))
        cb(var("c5"), IfThenElse(True, IfThenElse(True, 0, 1), 2))
        cb(var("c6"), IfThenElse(True, IfThenElse(False, 0, 1), 2))
        cb(var("c7"), IfThenElse(False, 0, IfThenElse(True, 1, 2)))
        cb(var("c8"), IfThenElse(False, 0, IfThenElse(False, 1, 2)))
        cb(var("c9"), 1 + IfThenElse(True, 0, 1))
        cb(var("c10"), 1 + IfThenElse(False, 0, 1))
        cb.yield_state(tuple(var("c" + str(i)) for i in range(1, 11)),
                       "result", 0, "final")

    code = create_DAGCode_with_steady_phase(cb.statements)

    return (code, (0, 1, 0, 1, 0, 1, 1, 2, 1, 2))
Example #53
0
def privatize_temporaries_with_inames(kernel,
                                      privatizing_inames,
                                      only_var_names=None):
    """This function provides each loop iteration of the *privatizing_inames*
    with its own private entry in the temporaries it accesses (possibly
    restricted to *only_var_names*).

    This is accomplished implicitly as part of generating instruction-level
    parallelism by the "ILP" tag and accessible separately through this
    transformation.

    Example::

        for imatrix, i
            acc = 0
            for k
                acc = acc + a[imatrix, i, k] * vec[k]
            end
        end

    might become::

        for imatrix, i
            acc[imatrix] = 0
            for k
                acc[imatrix] = acc[imatrix] + a[imatrix, i, k] * vec[k]
            end
        end

    facilitating loop interchange of the *imatrix* loop.
    .. versionadded:: 2018.1
    """

    if isinstance(privatizing_inames, str):
        privatizing_inames = frozenset(s.strip()
                                       for s in privatizing_inames.split(","))

    if isinstance(only_var_names, str):
        only_var_names = frozenset(s.strip()
                                   for s in only_var_names.split(","))

    wmap = kernel.writer_map()

    var_to_new_priv_axis_iname = {}

    # {{{ find variables that need extra indices

    for tv in kernel.temporary_variables.values():
        if only_var_names is not None and tv.name not in only_var_names:
            continue

        for writer_insn_id in wmap.get(tv.name, []):
            writer_insn = kernel.id_to_insn[writer_insn_id]

            priv_axis_inames = writer_insn.within_inames & privatizing_inames

            referenced_priv_axis_inames = (
                priv_axis_inames
                & writer_insn.write_dependency_names())

            new_priv_axis_inames = priv_axis_inames - referenced_priv_axis_inames

            if not new_priv_axis_inames:
                break

            if tv.name in var_to_new_priv_axis_iname:
                if new_priv_axis_inames != set(
                        var_to_new_priv_axis_iname[tv.name]):
                    raise LoopyError(
                        "instruction '%s' requires adding "
                        "indices for privatizing var '%s' on iname(s) '%s', "
                        "but previous instructions required inames '%s'" %
                        (writer_insn_id, tv.name,
                         ", ".join(new_priv_axis_inames), ", ".join(
                             var_to_new_priv_axis_iname[tv.name])))

                continue

            var_to_new_priv_axis_iname[tv.name] = set(new_priv_axis_inames)

    # }}}

    # {{{ find ilp iname lengths

    from loopy.isl_helpers import static_max_of_pw_aff
    from loopy.symbolic import pw_aff_to_expr

    priv_axis_iname_to_length = {}
    iname_to_lbound = {}
    for priv_axis_inames in var_to_new_priv_axis_iname.values():
        for iname in priv_axis_inames:
            if iname in priv_axis_iname_to_length:
                continue

            bounds = kernel.get_iname_bounds(iname, constants_only=False)
            priv_axis_iname_to_length[iname] = pw_aff_to_expr(
                static_max_of_pw_aff(bounds.size, constants_only=False))
            iname_to_lbound[iname] = pw_aff_to_expr(bounds.lower_bound_pw_aff)

    # }}}

    # {{{ change temporary variables

    from loopy.kernel.data import VectorizeTag

    new_temp_vars = kernel.temporary_variables.copy()
    for tv_name, inames in var_to_new_priv_axis_iname.items():
        tv = new_temp_vars[tv_name]
        extra_shape = tuple(priv_axis_iname_to_length[iname]
                            for iname in inames)

        shape = tv.shape
        if shape is None:
            shape = ()

        dim_tags = ["c"] * (len(shape) + len(extra_shape))
        for i, iname in enumerate(inames):
            if kernel.iname_tags_of_type(iname, VectorizeTag):
                dim_tags[len(shape) + i] = "vec"

        new_temp_vars[tv.name] = tv.copy(
            shape=shape + extra_shape,
            # Forget what you knew about data layout,
            # create from scratch.
            dim_tags=dim_tags,
            dim_names=None)

    # }}}

    from pymbolic import var
    var_to_extra_iname = {
        var_name: tuple(var(iname) for iname in inames)
        for var_name, inames in var_to_new_priv_axis_iname.items()
    }

    new_insns = []

    for insn in kernel.instructions:
        eiii = ExtraInameIndexInserter(var_to_extra_iname, iname_to_lbound)
        new_insn = insn.with_transformed_expressions(eiii)
        if not eiii.seen_priv_axis_inames <= insn.within_inames:
            raise LoopyError(
                "Kernel '%s': Instruction '%s': touched variable that "
                "(for privatization, e.g. as performed for ILP) "
                "required iname(s) '%s', but that the instruction was not "
                "previously within the iname(s). To remedy this, first promote"
                "the instruction into the iname." %
                (kernel.name, insn.id,
                 ", ".join(eiii.seen_priv_axis_inames - insn.within_inames)))

        new_insns.append(new_insn)

    return kernel.copy(temporary_variables=new_temp_vars,
                       instructions=new_insns)
Example #54
0
 def map_variable(self, expr):
     from pymbolic import var
     if expr.name in self.which_vars:
         return var(expr.name+"'")
     else:
         return expr
Example #55
0
 def test_em_dt_var(self):
     self._test_scheme(EulerMaryuyamaStep(pm.var('dt')))
Example #56
0
def test_conditions():
    from pymbolic import var
    x = var('x')
    y = var('y')
    assert str(x.eq(y).and_(x.le(5))) == "x == y and x <= 5"
Example #57
0
def main():
    from leap.step_matrix import StepMatrixFinder

    from pymbolic import var

    speed_factor = 10
    method_name = "Fq"
    order = 3
    tol = 1e-8
    prec = 1e-5

    angles = np.linspace(0, 2 * np.pi, 100, endpoint=False)

    for step_ratio in [1, 2, 3, 4, 5, 6]:
        print("speed factor: %g - step ratio: %g - method: %s "
              "- order: %d" % (speed_factor, step_ratio, method_name, order))

        method = TwoRateAdamsBashforthMethod(method=method_name,
                                             order=order,
                                             step_ratio=step_ratio,
                                             static_dt=True)

        code = method.generate()

        finder = StepMatrixFinder(code,
                                  function_map={
                                      "<func>f2f":
                                      lambda t, f, s: var("f2f") * f,
                                      "<func>s2f":
                                      lambda t, f, s: var("s2f") * s,
                                      "<func>f2s":
                                      lambda t, f, s: var("f2s") * f,
                                      "<func>s2s":
                                      lambda t, f, s: var("s2s") * s,
                                  },
                                  exclude_variables=["<p>bootstrap_step"])

        mat = finder.get_phase_step_matrix("primary")

        if 0:
            print('Variables: %s' % finder.variables)
            np.set_printoptions(formatter={"all": str})
            print(mat)

        from leap.step_matrix import fast_evaluator
        evaluate = fast_evaluator(mat)

        def is_stable(major_eigval, dt):
            smat = evaluate({
                "<dt>": dt,
                "f2f": major_eigval,
                "s2f": 1 / speed_factor,
                "f2s": 1 / speed_factor,
                "s2s": major_eigval * 1 / speed_factor,
            })

            eigvals = la.eigvals(smat)

            return (np.abs(eigvals) <= 1 + tol).all()

        from leap.stability import find_truth_bdry
        from functools import partial

        points = []

        for angle in angles:
            eigval = np.exp(1j * angle)

            max_dt = find_truth_bdry(partial(is_stable, eigval), prec=prec)

            stable_fake_eigval = eigval * max_dt

            points.append([stable_fake_eigval.real, stable_fake_eigval.imag])

        points = np.array(points).T

        pt.plot(points[0], points[1], "x", label="steprat: %d" % step_ratio)

    pt.legend(loc="best")
    pt.grid()

    outfile = "mr-stability-diagram.pdf"
    pt.savefig(outfile)
    print("Output written to %s" % outfile)
Example #58
0
    def generate_butcher(self, stage_coeff_set_names, stage_coeff_sets,
                         rhs_funcs, estimate_coeff_set_names,
                         estimate_coeff_sets):
        """
        :arg stage_coeff_set_names: a list of names/string identifiers
            for stage coefficient sets
        :arg stage_coeff_sets: a mapping from set names to stage coefficients
        :arg rhs_funcs: a mapping from set names to right-hand-side
            functions
        :arg estimate_coeffs_set_names: a list of names/string identifiers
            for estimate coefficient sets
        :arg estimate_coeffs_sets: a mapping from estimate coefficient set
            names to cofficients.
        """

        from pymbolic import var
        comp = self.component_id

        dt = self.dt
        t = self.t
        state = self.state

        nstages = len(self.c)

        # {{{ check coefficients for plausibility

        for name in stage_coeff_set_names:
            for istage in range(nstages):
                coeff_sum = sum(stage_coeff_sets[name][istage])
                assert abs(coeff_sum - self.c[istage]) < 1e-12, (
                    name, istage, coeff_sum, self.c[istage])

        # }}}

        # {{{ initialization

        last_rhss = {}

        with CodeBuilder(name="initialization") as cb:
            for name in stage_coeff_set_names:
                if (name in self.recycle_last_stage_coeff_set_names
                        and _is_first_stage_same_as_last_stage(
                            self.c, stage_coeff_sets[name])):
                    last_rhss[name] = var("<p>last_rhs_" + name)
                    cb(last_rhss[name], rhs_funcs[name](t=t, **{comp: state}))

        cb_init = cb

        # }}}

        stage_rhs_vars = {}
        rhs_var_to_unknown = {}
        for name in stage_coeff_set_names:
            stage_rhs_vars[name] = [
                cb.fresh_var(f"rhs_{name}_s{i}") for i in range(nstages)
            ]

            # These are rhss if they are not yet known and pending an implicit solve.
            for i, rhsvar in enumerate(stage_rhs_vars[name]):
                unkvar = cb.fresh_var(f"unk_{name}_s{i}")
                rhs_var_to_unknown[rhsvar] = unkvar

        knowns = set()

        # {{{ stage loop

        last_state_est_var = cb.fresh_var("last_state_est")
        last_state_est_var_valid = False

        with CodeBuilder(name="primary") as cb:
            equations = []
            unknowns = set()

            def make_known(v):
                unknowns.discard(v)
                knowns.add(v)

            for istage in range(nstages):
                for name in stage_coeff_set_names:
                    c = self.c[istage]
                    my_rhs = stage_rhs_vars[name][istage]

                    if (name in self.recycle_last_stage_coeff_set_names
                            and istage == 0
                            and _is_first_stage_same_as_last_stage(
                                self.c, stage_coeff_sets[name])):
                        cb(my_rhs, last_rhss[name])
                        make_known(my_rhs)

                    else:
                        is_implicit = False

                        state_increment = 0
                        for src_name in stage_coeff_set_names:
                            coeffs = stage_coeff_sets[src_name][istage]
                            for src_istage, coeff in enumerate(coeffs):
                                rhsval = stage_rhs_vars[src_name][src_istage]
                                if rhsval not in knowns:
                                    unknowns.add(rhsval)
                                    is_implicit = True

                                state_increment += dt * coeff * rhsval

                        state_est = state + state_increment
                        if (self.state_filter is not None and not (
                                # reusing last output state
                                c == 0 and all(
                                    len(stage_coeff_sets[src_name][istage]) ==
                                    0 for src_name in stage_coeff_set_names))):
                            state_est = self.state_filter(state_est)

                        if is_implicit:
                            rhs_expr = rhs_funcs[name](t=t + c * dt,
                                                       **{
                                                           comp: state_est
                                                       })

                            from dagrt.expression import collapse_constants
                            solve_expression = collapse_constants(
                                my_rhs - rhs_expr,
                                list(unknowns) + [self.state], cb.assign,
                                cb.fresh_var)
                            equations.append(solve_expression)

                            if istage + 1 == nstages:
                                last_state_est_var_valid = False

                        else:
                            if istage + 1 == nstages:
                                cb(last_state_est_var, state_est)
                                state_est = last_state_est_var
                                last_state_est_var_valid = True

                            rhs_expr = rhs_funcs[name](t=t + c * dt,
                                                       **{
                                                           comp: state_est
                                                       })

                            cb(my_rhs, rhs_expr)
                            make_known(my_rhs)

                    # {{{ emit solve if possible

                    if unknowns and len(unknowns) == len(equations):
                        # got a square system, let's solve
                        assignees = [unk.name for unk in unknowns]

                        from pymbolic import substitute
                        subst_dict = {
                            rhs_var.name: rhs_var_to_unknown[rhs_var]
                            for rhs_var in unknowns
                        }

                        cb.assign_implicit(
                            assignees=assignees,
                            solve_components=[
                                rhs_var_to_unknown[unk].name
                                for unk in unknowns
                            ],
                            expressions=[
                                substitute(eq, subst_dict) for eq in equations
                            ],

                            # TODO: Could supply a starting guess
                            other_params={"guess": state},
                            solver_id="solve")

                        del equations[:]
                        knowns.update(unknowns)
                        unknowns.clear()

                    # }}}

            # Compute solution estimates.
            estimate_vars = [
                cb.fresh_var("est_" + name)
                for name in estimate_coeff_set_names
            ]

            for iest, name in enumerate(estimate_coeff_set_names):
                out_coeffs = estimate_coeff_sets[name]

                if (last_state_est_var_valid and  # noqa: W504
                        _is_last_stage_same_as_output(self.c, stage_coeff_sets,
                                                      out_coeffs)):
                    state_est = last_state_est_var

                else:
                    state_increment = 0
                    for src_name in stage_coeff_set_names:
                        state_increment += sum(
                            coeff * stage_rhs_vars[src_name][src_istage]
                            for src_istage, coeff in enumerate(out_coeffs))

                    state_est = state + dt * state_increment

                    if self.state_filter is not None:
                        state_est = self.state_filter(state_est)

                cb(estimate_vars[iest], state_est)

            # This updates <t>.
            self.finish(cb, estimate_coeff_set_names, estimate_vars)

            # These updates have to happen *after* finish because before we
            # don't yet know whether finish will accept the new state.
            for name in stage_coeff_set_names:
                if (name in self.recycle_last_stage_coeff_set_names
                        and _is_first_stage_same_as_last_stage(
                            self.c, stage_coeff_sets[name])):
                    cb(last_rhss[name], stage_rhs_vars[name][-1])

        cb_primary = cb

        # }}}

        return DAGCode(phases={
            "initial":
            cb_init.as_execution_phase(next_phase="primary"),
            "primary":
            cb_primary.as_execution_phase(next_phase="primary")
        },
                       initial_phase="initial")
Example #59
0
 def get_strength_or_not(self, isrc, kernel_idx):
     return var("strength_%d" % self.strength_usage[kernel_idx]).index(isrc)
Example #60
0
 def map_wildcard(self, expr):
     from pymbolic import var
     return var(self.unique_var_name_factory())