Exemple #1
0
def test_inf_support(ctx_factory, target, dtype):
    from loopy.symbolic import parse
    import math
    # See: https://github.com/inducer/loopy/issues/443 for some laughs
    ctx = ctx_factory()
    queue = cl.CommandQueue(ctx)

    knl = lp.make_kernel("{:}", [
        lp.Assignment(parse("out_inf"), math.inf),
        lp.Assignment(parse("out_neginf"), -math.inf)
    ], [
        lp.GlobalArg("out_inf", shape=lp.auto, dtype=dtype),
        lp.GlobalArg("out_neginf", shape=lp.auto, dtype=dtype)
    ],
                         target=target())

    knl = lp.set_options(knl, "return_dict")

    if target == lp.PyOpenCLTarget:
        _, out_dict = knl(queue)
        out_dict = {k: v.get() for k, v in out_dict.items()}
    elif target == lp.ExecutableCTarget:
        _, out_dict = knl()
    else:
        raise NotImplementedError("unsupported target")

    assert np.isinf(out_dict["out_inf"])
    assert np.isneginf(out_dict["out_neginf"])
Exemple #2
0
    def initialise_terminals(self, var2terminal, coefficients):
        """ Initilisation of the variables in which coefficients
            and the Tensors coming from TSFC are saved.

            :arg var2terminal: dictionary that maps Slate Tensors to gem Variables
        """

        tensor2temp = OrderedDict()
        inits = []
        for gem_tensor, slate_tensor in var2terminal.items():
            assert slate_tensor.terminal, "Only terminal tensors need to be initialised in Slate kernels."
            (_, dtype), = assign_dtypes([gem_tensor],
                                        self.tsfc_parameters["scalar_type"])
            loopy_tensor = loopy.TemporaryVariable(
                gem_tensor.name,
                dtype=dtype,
                shape=gem_tensor.shape,
                address_space=loopy.AddressSpace.LOCAL)
            tensor2temp[slate_tensor] = loopy_tensor

            if not slate_tensor.assembled:
                indices = self.bag.index_creator(self.shape(slate_tensor))
                inames = {var.name for var in indices}
                var = pym.Subscript(pym.Variable(loopy_tensor.name), indices)
                inits.append(
                    loopy.Assignment(var,
                                     "0.",
                                     id="init%d" % len(inits),
                                     within_inames=frozenset(inames)))

            else:
                f = slate_tensor.form if isinstance(
                    slate_tensor.form, tuple) else (slate_tensor.form, )
                coeff = tuple(coefficients[c] for c in f)
                offset = 0
                ismixed = tuple(
                    (type(c.ufl_element()) == MixedElement) for c in f)
                names = []
                for (im, c) in zip(ismixed, coeff):
                    names += [name
                              for (name, ext) in c.values()] if im else [c[0]]

                # Mixed coefficients come as seperate parameter (one per space)
                for i, shp in enumerate(*slate_tensor.shapes.values()):
                    indices = self.bag.index_creator((shp, ))
                    inames = {var.name for var in indices}
                    offset_index = (pym.Sum((offset, indices[0])), )
                    name = names[i] if ismixed else names
                    var = pym.Subscript(pym.Variable(loopy_tensor.name),
                                        offset_index)
                    c = pym.Subscript(pym.Variable(name), indices)
                    inits.append(
                        loopy.Assignment(var,
                                         c,
                                         id="init%d" % len(inits),
                                         within_inames=frozenset(inames)))
                    offset += shp

        return inits, tensor2temp
Exemple #3
0
def statement_evaluate(leaf, ctx):
    expr = leaf.expression
    if isinstance(expr, gem.ListTensor):
        ops = []
        var, index = ctx.pymbolic_variable_and_destruct(expr)
        for multiindex, value in numpy.ndenumerate(expr.array):
            ops.append(
                lp.Assignment(p.Subscript(var, index + multiindex),
                              expression(value, ctx),
                              within_inames=ctx.active_inames()))
        return ops
    elif isinstance(expr, gem.Constant):
        return []
    elif isinstance(expr, gem.ComponentTensor):
        idx = ctx.gem_to_pym_multiindex(expr.multiindex)
        var, sub_idx = ctx.pymbolic_variable_and_destruct(expr)
        lhs = p.Subscript(var, idx + sub_idx)
        with active_indices(dict(zip(expr.multiindex, idx)),
                            ctx) as ctx_active:
            return [
                lp.Assignment(lhs,
                              expression(expr.children[0], ctx_active),
                              within_inames=ctx_active.active_inames())
            ]
    elif isinstance(expr, gem.Inverse):
        idx = ctx.pymbolic_multiindex(expr.shape)
        var = ctx.pymbolic_variable(expr)
        lhs = (SubArrayRef(idx, p.Subscript(var, idx)), )

        idx_reads = ctx.pymbolic_multiindex(expr.children[0].shape)
        var_reads = ctx.pymbolic_variable(expr.children[0])
        reads = (SubArrayRef(idx_reads, p.Subscript(var_reads, idx_reads)), )
        rhs = p.Call(p.Variable("inverse"), reads)

        return [
            lp.CallInstruction(lhs, rhs, within_inames=ctx.active_inames())
        ]
    elif isinstance(expr, gem.Solve):
        idx = ctx.pymbolic_multiindex(expr.shape)
        var = ctx.pymbolic_variable(expr)
        lhs = (SubArrayRef(idx, p.Subscript(var, idx)), )

        reads = []
        for child in expr.children:
            idx_reads = ctx.pymbolic_multiindex(child.shape)
            var_reads = ctx.pymbolic_variable(child)
            reads.append(
                SubArrayRef(idx_reads, p.Subscript(var_reads, idx_reads)))
        rhs = p.Call(p.Variable("solve"), tuple(reads))

        return [
            lp.CallInstruction(lhs, rhs, within_inames=ctx.active_inames())
        ]
    else:
        return [
            lp.Assignment(ctx.pymbolic_variable(expr),
                          expression(expr, ctx, top=True),
                          within_inames=ctx.active_inames())
        ]
Exemple #4
0
def test_uniquify_instruction_ids():
    i1 = lp.Assignment("b", 1, id=None)
    i2 = lp.Assignment("b", 1, id=None)
    i3 = lp.Assignment("b", 1, id=lp.UniqueName("b"))
    i4 = lp.Assignment("b", 1, id=lp.UniqueName("b"))

    knl = lp.make_kernel("{[i]: i = 1}", []).copy(instructions=[i1, i2, i3, i4])

    from loopy.transform.instruction import uniquify_instruction_ids
    knl = uniquify_instruction_ids(knl)

    insn_ids = {insn.id for insn in knl.instructions}

    assert len(insn_ids) == 4
    assert all(isinstance(id, str) for id in insn_ids)
Exemple #5
0
def statement_evaluate(leaf, ctx):
    expr = leaf.expression
    if isinstance(expr, gem.ListTensor):
        ops = []
        var = ctx.pymbolic_variable(expr)
        index = ()
        if isinstance(var, p.Subscript):
            var, index = var.aggregate, var.index_tuple
        for multiindex, value in numpy.ndenumerate(expr.array):
            ops.append(lp.Assignment(p.Subscript(var, index + multiindex), expression(value, ctx), within_inames=ctx.active_inames()))
        return ops
    elif isinstance(expr, gem.Constant):
        return []
    else:
        return [lp.Assignment(ctx.pymbolic_variable(expr), expression(expr, ctx, top=True), within_inames=ctx.active_inames())]
    def _get_scalar_func_loopy_program(self, name, nargs, naxes):
        if name == "arctan2":
            name = "atan2"
        elif name == "atan2":
            from warnings import warn
            warn(
                "'atan2' in ArrayContext.np is deprecated. Use 'arctan2', "
                "as in numpy2. This will be disallowed in 2021.",
                DeprecationWarning,
                stacklevel=3)

        from pymbolic import var

        var_names = ["i%d" % i for i in range(naxes)]
        size_names = ["n%d" % i for i in range(naxes)]
        subscript = tuple(var(vname) for vname in var_names)
        from islpy import make_zero_and_vars
        v = make_zero_and_vars(var_names, params=size_names)
        domain = v[0].domain()
        for vname, sname in zip(var_names, size_names):
            domain = domain & v[0].le_set(v[vname]) & v[vname].lt_set(v[sname])

        domain_bset, = domain.get_basic_sets()

        return make_loopy_program([domain_bset], [
            lp.Assignment(
                var("out")[subscript],
                var(name)(*[var("inp%d" % i)[subscript]
                            for i in range(nargs)]))
        ],
                                  name="actx_special_%s" % name)
Exemple #7
0
 def knl(sym_then, sym_else):
     return make_loopy_program(
         "{[iel, idof]: 0<=iel<nelements and 0<=idof<nunit_dofs}", [
             lp.Assignment(
                 var("out")[iel, idof],
                 p.If(var("crit")[iel, idof], sym_then, sym_else))
         ])
Exemple #8
0
def test_forced_iname_deps_and_reduction():
    # See https://github.com/inducer/loopy/issues/24

    # This is (purposefully) somewhat un-idiomatic, to replicate the conditions
    # under which the above bug was found. If assignees were phi[i], then the
    # iname propagation heuristic would not assume that dependent instructions
    # need to run inside of 'i', and hence the forced_iname_* bits below would not
    # be needed.

    i1 = lp.CInstruction("i", "doSomethingToGetPhi();", assignees="phi")

    from pymbolic.primitives import Subscript, Variable
    i2 = lp.Assignment("a",
                       lp.Reduction("sum", "j",
                                    Subscript(Variable("phi"), Variable("j"))),
                       forced_iname_deps=frozenset(),
                       forced_iname_deps_is_final=True)

    k = lp.make_kernel(
        "{[i,j] : 0<=i,j<n}",
        [i1, i2],
        [
            lp.GlobalArg("a", dtype=np.float32, shape=()),
            lp.ValueArg("n", dtype=np.int32),
            lp.TemporaryVariable("phi", dtype=np.float32, shape=("n", )),
        ],
        target=lp.CTarget(),
    )

    k = lp.preprocess_kernel(k)

    assert 'i' not in k.insn_inames("insn_0_j_update")
    print(k.stringify(with_dependencies=True))
Exemple #9
0
def test_nan_support(ctx_factory):
    from loopy.symbolic import parse
    ctx = ctx_factory()
    knl = lp.make_kernel("{:}", [
        lp.Assignment(parse("a"), np.nan),
        lp.Assignment(parse("b"), parse("isnan(a)")),
        lp.Assignment(parse("c"), parse("isnan(3.14)"))
    ],
                         seq_dependencies=True)

    knl = lp.set_options(knl, "return_dict")

    evt, out_dict = knl(cl.CommandQueue(ctx))
    assert np.isnan(out_dict["a"].get())
    assert out_dict["b"] == 1
    assert out_dict["c"] == 0
Exemple #10
0
def to_loopy_insns(assignments, vector_names=set(), pymbolic_expr_maps=[],
                   complex_dtype=None, retain_names=set()):
    logger.info("loopy instruction generation: start")
    assignments = list(assignments)

    # convert from sympy
    sympy_conv = SympyToPymbolicMapper()
    assignments = [(name, sympy_conv(expr)) for name, expr in assignments]

    assignments = kill_trivial_assignments(assignments, retain_names)

    bdr = BesselDerivativeReplacer()
    assignments = [(name, bdr(expr)) for name, expr in assignments]

    btog = BesselTopOrderGatherer()
    for name, expr in assignments:
        btog(expr)

    #from pymbolic.mapper.cse_tagger import CSEWalkMapper, CSETagMapper
    #cse_walk = CSEWalkMapper()
    #for name, expr in assignments:
    #    cse_walk(expr)
    #cse_tag = CSETagMapper(cse_walk)

    # do the rest of the conversion
    bessel_sub = BesselSubstitutor(BesselGetter(btog.bessel_j_arg_to_top_order))
    vcr = VectorComponentRewriter(vector_names)
    pwr = PowerRewriter()
    ssg = SumSignGrouper()
    fck = FractionKiller()
    bik = BigIntegerKiller()
    cmr = ComplexRewriter()

    def convert_expr(name, expr):
        logger.debug("generate expression for: %s" % name)
        expr = bdr(expr)
        expr = bessel_sub(expr)
        expr = vcr(expr)
        expr = pwr(expr)
        expr = fck(expr)
        expr = ssg(expr)
        expr = bik(expr)
        expr = cmr(expr)
        #expr = cse_tag(expr)
        for m in pymbolic_expr_maps:
            expr = m(expr)
        return expr

    import loopy as lp
    from pytools import MinRecursionLimit
    with MinRecursionLimit(3000):
        result = [
                lp.Assignment(id=None,
                    assignee=name, expression=convert_expr(name, expr),
                    temp_var_type=lp.Optional(None))
                for name, expr in assignments]

    logger.info("loopy instruction generation: done")
    return result
Exemple #11
0
 def knl():
     knl = lp.make_kernel(
         "{[i]: 0<=i<n}",
         [
             lp.Assignment(var("out")[i],
                 p.If(var("crit")[i], sym_then, sym_else))
         ])
     return lp.split_iname(knl, "i", 128, outer_tag="g.0", inner_tag="l.0")
Exemple #12
0
def test_uniquify_instruction_ids():
    i1 = lp.Assignment("b", 1, id=None)
    i2 = lp.Assignment("b", 1, id=None)
    i3 = lp.Assignment("b", 1, id=lp.UniqueName("b"))
    i4 = lp.Assignment("b", 1, id=lp.UniqueName("b"))

    prog = lp.make_kernel("{[i]: i = 1}", [], name="lpy_knl")
    new_root_kernel = prog["lpy_knl"].copy(instructions=[i1, i2, i3, i4])
    prog = prog.with_kernel(new_root_kernel)

    from loopy.transform.instruction import uniquify_instruction_ids
    prog = uniquify_instruction_ids(prog)

    insn_ids = {insn.id for insn in prog["lpy_knl"].instructions}

    assert len(insn_ids) == 4
    assert all(isinstance(id, str) for id in insn_ids)
Exemple #13
0
 def get_kernel_scaling_assignment(self):
     from sumpy.symbolic import SympyToPymbolicMapper
     sympy_conv = SympyToPymbolicMapper()
     return [lp.Assignment(id=None,
                 assignee="kernel_scaling",
                 expression=sympy_conv(
                     self.expansion.kernel.get_global_scaling_const()),
                 temp_var_type=lp.Optional(None))]
Exemple #14
0
    def get_kernel(self, **kwargs):

        extra_kernel_kwarg_types = ()
        if "extra_kernel_kwarg_types" in kwargs:
            extra_kernel_kwarg_types = kwargs["extra_kernel_kwarg_types"]

        eval_inames = frozenset(["itgt"])
        scalar_assignment = lp.Assignment(
            id=None,
            assignee="expr_val",
            expression=self.get_normalised_expr(),
            temp_var_type=None,
        )
        eval_insns = [
            insn.copy(within_inames=insn.within_inames | eval_inames)
            for insn in [scalar_assignment]
        ]

        loopy_knl = lp.make_kernel(  # NOQA
            "{ [itgt]: 0<=itgt<n_targets }",
            [
                """
                for itgt
                    VAR_ASSIGNMENT
                end
                """.replace("VAR_ASSIGNMENT",
                            self.get_variable_assignment_code())
            ] + eval_insns + [
                """
                for itgt
                    result[itgt] = expr_val
                end
                """
            ],
            [
                lp.ValueArg("dim, n_targets", np.int32),
                lp.GlobalArg("target_points", np.float64, "dim, n_targets"),
                lp.TemporaryVariable("expr_val", None, ()),
            ] + list(extra_kernel_kwarg_types) + [
                "...",
            ],
            name="eval_expr",
            lang_version=(2018, 2),
        )

        loopy_knl = lp.fix_parameters(loopy_knl, dim=self.dim)
        loopy_knl = lp.set_options(loopy_knl, write_cl=False)
        loopy_knl = lp.set_options(loopy_knl, return_dict=True)

        if self.function_manglers is not None:
            loopy_knl = lp.register_function_manglers(loopy_knl,
                                                      self.function_manglers)

        if self.preamble_generators is not None:
            loopy_knl = lp.register_preamble_generators(
                loopy_knl, self.preamble_generators)

        return loopy_knl
Exemple #15
0
def test_multi_arg_array_call(ctx_factory):
    ctx = ctx_factory()
    queue = cl.CommandQueue(ctx)
    import pymbolic.primitives as p
    n = 10
    acc_i = p.Variable("acc_i")
    i = p.Variable("i")
    index = p.Variable("index")
    a_i = p.Subscript(p.Variable("a"), p.Variable("i"))
    argmin_kernel = lp.make_function("{[i]: 0 <= i < n}", [
        lp.Assignment(id="init2", assignee=index, expression=0),
        lp.Assignment(id="init1", assignee=acc_i, expression="214748367"),
        lp.Assignment(id="insn",
                      assignee=index,
                      expression=p.If(p.Expression.eq(acc_i, a_i), i, index),
                      depends_on="update"),
        lp.Assignment(id="update",
                      assignee=acc_i,
                      expression=p.Variable("min")(acc_i, a_i),
                      depends_on="init1,init2")
    ], [
        lp.GlobalArg("a"),
        lp.GlobalArg(
            "acc_i, index", is_input=False, is_output=True, shape=lp.auto), ...
    ],
                                     name="custom_argmin")

    argmin_kernel = lp.fix_parameters(argmin_kernel, n=n)

    knl = lp.make_kernel(
        "{[i]:0<=i<n}", """
            []: min_val[()], []: min_index[()] = custom_argmin([i]:b[i])
            """)

    knl = lp.fix_parameters(knl, n=n)
    knl = lp.set_options(knl, return_dict=True)

    knl = lp.merge([knl, argmin_kernel])
    b = np.random.randn(n)
    evt, out_dict = knl(queue, b=b)
    tol = 1e-15
    from numpy.linalg import norm
    assert (norm(out_dict["min_val"] - np.min(b)) < tol)
    assert (norm(out_dict["min_index"] - np.argmin(b)) < tol)
Exemple #16
0
    def cumsum(self, arg):
        """
        Registers  a substitution rule in order to cumulatively sum the
        elements of array ``arg`` along ``axis``. Mimics :func:`numpy.cumsum`.

        :return: An instance of :class:`numloopy.ArraySymbol` which is
            which is registered as the cumulative summed-substitution rule.
        """
        # Note: this can remain as a substitution but loopy does not have
        # support for translating inames for substitutions to the kernel
        # domains
        assert len(arg.shape) == 1
        i_iname = self.name_generator(based_on="i")
        j_iname = self.name_generator(based_on="i")

        space = isl.Space.create_from_names(isl.DEFAULT_CONTEXT,
                                            [i_iname, j_iname])
        domain = isl.BasicSet.universe(space)
        arg_name = self.name_generator(based_on="arr")
        subst_name = self.name_generator(based_on="subst")
        domain = domain & make_slab(space, i_iname, 0, arg.shape[0])
        domain = domain.add_constraint(
            isl.Constraint.ineq_from_names(space, {j_iname: 1}))
        domain = domain.add_constraint(
            isl.Constraint.ineq_from_names(space, {
                j_iname: -1,
                i_iname: 1,
                1: -1
            }))
        cumsummed_arg = ArraySymbol(stack=self,
                                    name=arg_name,
                                    shape=arg.shape,
                                    dtype=arg.dtype)
        cumsummed_subst = ArraySymbol(stack=self,
                                      name=subst_name,
                                      shape=arg.shape,
                                      dtype=arg.dtype)
        subst_iname = self.name_generator(based_on="i")
        rule = lp.SubstitutionRule(
            subst_name, (subst_iname, ),
            Subscript(Variable(arg_name), (Variable(subst_iname), )))

        from loopy.library.reduction import SumReductionOperation

        insn = lp.Assignment(assignee=Subscript(Variable(arg_name),
                                                (Variable(i_iname), )),
                             expression=lp.Reduction(
                                 SumReductionOperation(), (j_iname, ),
                                 parse('{}({})'.format(arg.name, j_iname))))
        self.data.append(cumsummed_arg)
        self.substs_to_arrays[subst_name] = arg_name
        self.register_implicit_assignment(insn)
        self.domains.append(domain)

        self.register_substitution(rule)
        return cumsummed_subst
Exemple #17
0
def test_child_invalid_type_cast():
    from pymbolic import var
    knl = lp.make_kernel("{[i]: 0<=i<n}", [
        "<> ctr = make_uint2(0, 0)",
        lp.Assignment("a[i]",
                      lp.TypeCast(np.int64, var("ctr")) << var("i"))
    ])

    with pytest.raises(lp.LoopyError):
        knl = lp.preprocess_kernel(knl)
Exemple #18
0
    def initialise_terminals(self, var2terminal, coefficients):
        """ Initilisation of the variables in which coefficients
            and the Tensors coming from TSFC are saved.

            :arg var2terminal: dictionary that maps Slate Tensors to gem Variables
        """

        tensor2temp = OrderedDict()
        inits = []
        for gem_tensor, slate_tensor in var2terminal.items():
            loopy_tensor = loopy.TemporaryVariable(gem_tensor.name,
                                                   shape=gem_tensor.shape,
                                                   address_space=loopy.AddressSpace.LOCAL)
            tensor2temp[slate_tensor] = loopy_tensor

            if isinstance(slate_tensor, slate.Tensor):
                indices = self.bag.index_creator(self.shape(slate_tensor))
                inames = {var.name for var in indices}
                var = pym.Subscript(pym.Variable(loopy_tensor.name), indices)
                inits.append(loopy.Assignment(var, "0.", id="init%d" % len(inits),
                                              within_inames=frozenset(inames)))

            elif isinstance(slate_tensor, slate.AssembledVector):
                f = slate_tensor._function
                coeff = coefficients[f]
                offset = 0
                ismixed = (type(f.ufl_element()) == MixedElement)
                names = [name for (name, ext) in coeff.values()] if ismixed else coeff[0]

                # Mixed coefficients come as seperate parameter (one per space)
                for i, shp in enumerate(*slate_tensor.shapes.values()):
                    indices = self.bag.index_creator((shp,))
                    inames = {var.name for var in indices}
                    offset_index = (pym.Sum((offset, indices[0])),)
                    name = names[i] if ismixed else names
                    var = pym.Subscript(pym.Variable(loopy_tensor.name), offset_index)
                    c = pym.Subscript(pym.Variable(name), indices)
                    inits.append(loopy.Assignment(var, c, id="init%d" % len(inits),
                                                  within_inames=frozenset(inames)))
                    offset += shp

        return inits, tensor2temp
Exemple #19
0
def test_math_function(target, tp):
    # Test correct maths functions are generated for C and OpenCL
    # backend instead for different data type

    data_type = {"f32": np.float32, "f64": np.float64}[tp]

    import pymbolic.primitives as p

    i = p.Variable("i")
    xi = p.Subscript(p.Variable("x"), i)
    yi = p.Subscript(p.Variable("y"), i)
    zi = p.Subscript(p.Variable("z"), i)

    n = 100
    domain = "{[i]: 0<=i<%d}" % n
    data = [
        lp.GlobalArg("x", data_type, shape=(n, )),
        lp.GlobalArg("y", data_type, shape=(n, )),
        lp.GlobalArg("z", data_type, shape=(n, ))
    ]

    inst = [lp.Assignment(xi, p.Variable("min")(yi, zi))]
    knl = lp.make_kernel(domain, inst, data, target=target())
    code = lp.generate_code_v2(knl).device_code()

    assert "fmin" in code

    if tp == "f32" and target == CTarget:
        assert "fminf" in code
    else:
        assert "fminf" not in code

    inst = [lp.Assignment(xi, p.Variable("max")(yi, zi))]
    knl = lp.make_kernel(domain, inst, data, target=target())
    code = lp.generate_code_v2(knl).device_code()

    assert "fmax" in code

    if tp == "f32" and target == CTarget:
        assert "fmaxf" in code
    else:
        assert "fmaxf" not in code
Exemple #20
0
def test_np_bool_handling(ctx_factory):
    import pymbolic.primitives as p
    from loopy.symbolic import parse
    ctx = ctx_factory()
    queue = cl.CommandQueue(ctx)

    knl = lp.make_kernel(
        "{:}", [lp.Assignment(parse("y"), p.LogicalNot(np.bool_(False)))],
        [lp.GlobalArg("y", dtype=np.bool_, shape=lp.auto)])
    evt, (out, ) = knl(queue)
    assert out.get().item() is True
Exemple #21
0
    def get_kernel_scaling_assignments(self):
        from sumpy.symbolic import SympyToPymbolicMapper
        sympy_conv = SympyToPymbolicMapper()

        import loopy as lp
        return [
                lp.Assignment(id=None,
                    assignee="knl_%d_scaling" % i,
                    expression=sympy_conv(kernel.get_global_scaling_const()),
                    temp_var_type=lp.Optional(dtype))
                for i, (kernel, dtype) in enumerate(
                    zip(self.kernels, self.value_dtypes))]
Exemple #22
0
def loopy_inst_aug_assign(expr, context):
    lhs, rhs = [loopy_instructions(o, context) for o in expr.ufl_operands]
    import operator
    op = {
        IAdd: operator.add,
        ISub: operator.sub,
        IMul: operator.mul,
        IDiv: operator.truediv
    }[type(expr)]
    return loopy.Assignment(lhs,
                            op(lhs, rhs),
                            within_inames=context.within_inames)
 def knl():
     knl = lp.make_kernel("{[i]: 0<=i<n}", [
         lp.Assignment(
             Variable("out")[i],
             Variable(func_name)(Variable("a")[i]))
     ],
                          default_offset=lp.auto)
     return lp.split_iname(knl,
                           "i",
                           128,
                           outer_tag="g.0",
                           inner_tag="l.0")
Exemple #24
0
def statement_assign(expr, context):
    lvalue, _ = expr.children
    if isinstance(lvalue, Indexed):
        context.index_ordering.append(tuple(i.name for i in lvalue.index_ordering()))
    lvalue, rvalue = tuple(expression(c, context.parameters) for c in expr.children)
    within_inames = context.within_inames[expr]

    id, depends_on = context.instruction_dependencies[expr]
    predicates = frozenset(context.conditions)
    return loopy.Assignment(lvalue, rvalue, within_inames=within_inames,
                            predicates=predicates,
                            id=id,
                            depends_on=depends_on, depends_on_is_final=True)
Exemple #25
0
    def get_kernel_exprs(self, result_names):
        isrc_sym = var("isrc")
        exprs = [
            var(name) * self.get_strength_or_not(isrc_sym, i)
            for i, name in enumerate(result_names)
        ]

        return [
            lp.Assignment(id=None,
                          assignee="pair_result_%d" % i,
                          expression=expr,
                          temp_var_type=lp.Optional(None))
            for i, expr in enumerate(exprs)
        ]
        def build_ass():
            # A_T[i,j] = sum(k, A0[i,j,k] * G_T[k]);

            # Get variable symbols for all required variables
            i, j, k = inames["i"], inames["j"], inames["k"]
            A_T, A0, G_T = args["A_T"], args["A0"], args["G_T"]

            # The target of the assignment
            target = pb.Subscript(A_T, (i, j))

            # The rhs expression: Frobenius inner product <A0[i,j],G_T>
            reduce_op = lp.library.reduction.SumReductionOperation()
            reduce_expr = pb.Subscript(A0, (i, j, k)) * pb.Subscript(G_T, (k))
            expr = lp.Reduction(reduce_op, k, reduce_expr)

            return lp.Assignment(target, expr)
    def build_ass():
        """
        A[i,j] = c*sum(k, B[k,i]*B[k,j])
        """

        # The target of the assignment
        target = pb.Subscript(args["A"], (inames["i"], inames["j"]))

        # The rhs expression: A reduce operation of the matrix columns
        # Maybe replace with manual increment?
        reduce_op = lp.library.reduction.SumReductionOperation()
        reduce_expr = pb.Subscript(args["B"],
                                   (inames["k"], inames["i"])) * pb.Subscript(
                                       args["B"], (inames["k"], inames["j"]))
        expr = args["c"] * lp.Reduction(reduce_op, inames["k"], reduce_expr)

        return lp.Assignment(target, expr)
Exemple #28
0
def test_int_max_min_c_target(ctx_factory, which):
    from numpy.random import default_rng
    from pymbolic import parse
    rng = default_rng()

    n = 100
    arr1 = rng.integers(-100, 100, n)
    arr2 = rng.integers(-100, 100, n)
    np_func = getattr(np, f"{which}imum")

    knl = lp.make_kernel(
        "{[i]: 0<=i<100}",
        [lp.Assignment(parse("out[i]"), parse(f"{which}(arr1[i], arr2[i])"))],
        target=lp.ExecutableCTarget())

    _, (out, ) = knl(arr1=arr1, arr2=arr2)
    np.testing.assert_allclose(np_func(arr1, arr2), out)
Exemple #29
0
def test_fuzz_code_generator(ctx_factory):
    ctx = ctx_factory()
    queue = cl.CommandQueue(ctx)

    if ctx.devices[0].platform.vendor.startswith("Advanced Micro"):
        pytest.skip("crashes on AMD 15.12")

    #from expr_fuzz import get_fuzz_examples
    #for expr, var_values in get_fuzz_examples():
    for expr, var_values in generate_random_fuzz_examples(50):
        from pymbolic import evaluate
        try:
            true_value = evaluate(expr, var_values)
        except ZeroDivisionError:
            continue

        def get_dtype(x):
            if isinstance(x, (complex, np.complexfloating)):
                return np.complex128
            else:
                return np.float64

        knl = lp.make_kernel("{ : }", [lp.Assignment("value", expr)],
                             [lp.GlobalArg("value", np.complex128, shape=())] +
                             [
                                 lp.ValueArg(name, get_dtype(val))
                                 for name, val in six.iteritems(var_values)
                             ])
        ck = lp.CompiledKernel(ctx, knl)
        evt, (lp_value, ) = ck(queue, out_host=True, **var_values)
        err = abs(true_value - lp_value) / abs(true_value)
        if abs(err) > 1e-10:
            print(80 * "-")
            print("WRONG: rel error=%g" % err)
            print("true=%r" % true_value)
            print("loopy=%r" % lp_value)
            print(80 * "-")
            print(ck.get_code())
            print(80 * "-")
            print(var_values)
            print(80 * "-")
            print(repr(expr))
            print(80 * "-")
            print(expr)
            print(80 * "-")
            1 / 0
Exemple #30
0
def test_sized_integer_c_codegen(ctx_factory):
    ctx = ctx_factory()
    queue = cl.CommandQueue(ctx)

    from pymbolic import var
    knl = lp.make_kernel(
        "{[i]: 0<=i<n}",
        [lp.Assignment("a[i]",
                       lp.TypeCast(np.int64, 1) << var("i"))])

    knl = lp.set_options(knl, write_code=True)
    n = 40

    evt, (a, ) = knl(queue, n=n)

    a_ref = 1 << np.arange(n, dtype=np.int64)

    assert np.array_equal(a_ref, a.get())