def test_inf_support(ctx_factory, target, dtype): from loopy.symbolic import parse import math # See: https://github.com/inducer/loopy/issues/443 for some laughs ctx = ctx_factory() queue = cl.CommandQueue(ctx) knl = lp.make_kernel("{:}", [ lp.Assignment(parse("out_inf"), math.inf), lp.Assignment(parse("out_neginf"), -math.inf) ], [ lp.GlobalArg("out_inf", shape=lp.auto, dtype=dtype), lp.GlobalArg("out_neginf", shape=lp.auto, dtype=dtype) ], target=target()) knl = lp.set_options(knl, "return_dict") if target == lp.PyOpenCLTarget: _, out_dict = knl(queue) out_dict = {k: v.get() for k, v in out_dict.items()} elif target == lp.ExecutableCTarget: _, out_dict = knl() else: raise NotImplementedError("unsupported target") assert np.isinf(out_dict["out_inf"]) assert np.isneginf(out_dict["out_neginf"])
def initialise_terminals(self, var2terminal, coefficients): """ Initilisation of the variables in which coefficients and the Tensors coming from TSFC are saved. :arg var2terminal: dictionary that maps Slate Tensors to gem Variables """ tensor2temp = OrderedDict() inits = [] for gem_tensor, slate_tensor in var2terminal.items(): assert slate_tensor.terminal, "Only terminal tensors need to be initialised in Slate kernels." (_, dtype), = assign_dtypes([gem_tensor], self.tsfc_parameters["scalar_type"]) loopy_tensor = loopy.TemporaryVariable( gem_tensor.name, dtype=dtype, shape=gem_tensor.shape, address_space=loopy.AddressSpace.LOCAL) tensor2temp[slate_tensor] = loopy_tensor if not slate_tensor.assembled: indices = self.bag.index_creator(self.shape(slate_tensor)) inames = {var.name for var in indices} var = pym.Subscript(pym.Variable(loopy_tensor.name), indices) inits.append( loopy.Assignment(var, "0.", id="init%d" % len(inits), within_inames=frozenset(inames))) else: f = slate_tensor.form if isinstance( slate_tensor.form, tuple) else (slate_tensor.form, ) coeff = tuple(coefficients[c] for c in f) offset = 0 ismixed = tuple( (type(c.ufl_element()) == MixedElement) for c in f) names = [] for (im, c) in zip(ismixed, coeff): names += [name for (name, ext) in c.values()] if im else [c[0]] # Mixed coefficients come as seperate parameter (one per space) for i, shp in enumerate(*slate_tensor.shapes.values()): indices = self.bag.index_creator((shp, )) inames = {var.name for var in indices} offset_index = (pym.Sum((offset, indices[0])), ) name = names[i] if ismixed else names var = pym.Subscript(pym.Variable(loopy_tensor.name), offset_index) c = pym.Subscript(pym.Variable(name), indices) inits.append( loopy.Assignment(var, c, id="init%d" % len(inits), within_inames=frozenset(inames))) offset += shp return inits, tensor2temp
def statement_evaluate(leaf, ctx): expr = leaf.expression if isinstance(expr, gem.ListTensor): ops = [] var, index = ctx.pymbolic_variable_and_destruct(expr) for multiindex, value in numpy.ndenumerate(expr.array): ops.append( lp.Assignment(p.Subscript(var, index + multiindex), expression(value, ctx), within_inames=ctx.active_inames())) return ops elif isinstance(expr, gem.Constant): return [] elif isinstance(expr, gem.ComponentTensor): idx = ctx.gem_to_pym_multiindex(expr.multiindex) var, sub_idx = ctx.pymbolic_variable_and_destruct(expr) lhs = p.Subscript(var, idx + sub_idx) with active_indices(dict(zip(expr.multiindex, idx)), ctx) as ctx_active: return [ lp.Assignment(lhs, expression(expr.children[0], ctx_active), within_inames=ctx_active.active_inames()) ] elif isinstance(expr, gem.Inverse): idx = ctx.pymbolic_multiindex(expr.shape) var = ctx.pymbolic_variable(expr) lhs = (SubArrayRef(idx, p.Subscript(var, idx)), ) idx_reads = ctx.pymbolic_multiindex(expr.children[0].shape) var_reads = ctx.pymbolic_variable(expr.children[0]) reads = (SubArrayRef(idx_reads, p.Subscript(var_reads, idx_reads)), ) rhs = p.Call(p.Variable("inverse"), reads) return [ lp.CallInstruction(lhs, rhs, within_inames=ctx.active_inames()) ] elif isinstance(expr, gem.Solve): idx = ctx.pymbolic_multiindex(expr.shape) var = ctx.pymbolic_variable(expr) lhs = (SubArrayRef(idx, p.Subscript(var, idx)), ) reads = [] for child in expr.children: idx_reads = ctx.pymbolic_multiindex(child.shape) var_reads = ctx.pymbolic_variable(child) reads.append( SubArrayRef(idx_reads, p.Subscript(var_reads, idx_reads))) rhs = p.Call(p.Variable("solve"), tuple(reads)) return [ lp.CallInstruction(lhs, rhs, within_inames=ctx.active_inames()) ] else: return [ lp.Assignment(ctx.pymbolic_variable(expr), expression(expr, ctx, top=True), within_inames=ctx.active_inames()) ]
def test_uniquify_instruction_ids(): i1 = lp.Assignment("b", 1, id=None) i2 = lp.Assignment("b", 1, id=None) i3 = lp.Assignment("b", 1, id=lp.UniqueName("b")) i4 = lp.Assignment("b", 1, id=lp.UniqueName("b")) knl = lp.make_kernel("{[i]: i = 1}", []).copy(instructions=[i1, i2, i3, i4]) from loopy.transform.instruction import uniquify_instruction_ids knl = uniquify_instruction_ids(knl) insn_ids = {insn.id for insn in knl.instructions} assert len(insn_ids) == 4 assert all(isinstance(id, str) for id in insn_ids)
def statement_evaluate(leaf, ctx): expr = leaf.expression if isinstance(expr, gem.ListTensor): ops = [] var = ctx.pymbolic_variable(expr) index = () if isinstance(var, p.Subscript): var, index = var.aggregate, var.index_tuple for multiindex, value in numpy.ndenumerate(expr.array): ops.append(lp.Assignment(p.Subscript(var, index + multiindex), expression(value, ctx), within_inames=ctx.active_inames())) return ops elif isinstance(expr, gem.Constant): return [] else: return [lp.Assignment(ctx.pymbolic_variable(expr), expression(expr, ctx, top=True), within_inames=ctx.active_inames())]
def _get_scalar_func_loopy_program(self, name, nargs, naxes): if name == "arctan2": name = "atan2" elif name == "atan2": from warnings import warn warn( "'atan2' in ArrayContext.np is deprecated. Use 'arctan2', " "as in numpy2. This will be disallowed in 2021.", DeprecationWarning, stacklevel=3) from pymbolic import var var_names = ["i%d" % i for i in range(naxes)] size_names = ["n%d" % i for i in range(naxes)] subscript = tuple(var(vname) for vname in var_names) from islpy import make_zero_and_vars v = make_zero_and_vars(var_names, params=size_names) domain = v[0].domain() for vname, sname in zip(var_names, size_names): domain = domain & v[0].le_set(v[vname]) & v[vname].lt_set(v[sname]) domain_bset, = domain.get_basic_sets() return make_loopy_program([domain_bset], [ lp.Assignment( var("out")[subscript], var(name)(*[var("inp%d" % i)[subscript] for i in range(nargs)])) ], name="actx_special_%s" % name)
def knl(sym_then, sym_else): return make_loopy_program( "{[iel, idof]: 0<=iel<nelements and 0<=idof<nunit_dofs}", [ lp.Assignment( var("out")[iel, idof], p.If(var("crit")[iel, idof], sym_then, sym_else)) ])
def test_forced_iname_deps_and_reduction(): # See https://github.com/inducer/loopy/issues/24 # This is (purposefully) somewhat un-idiomatic, to replicate the conditions # under which the above bug was found. If assignees were phi[i], then the # iname propagation heuristic would not assume that dependent instructions # need to run inside of 'i', and hence the forced_iname_* bits below would not # be needed. i1 = lp.CInstruction("i", "doSomethingToGetPhi();", assignees="phi") from pymbolic.primitives import Subscript, Variable i2 = lp.Assignment("a", lp.Reduction("sum", "j", Subscript(Variable("phi"), Variable("j"))), forced_iname_deps=frozenset(), forced_iname_deps_is_final=True) k = lp.make_kernel( "{[i,j] : 0<=i,j<n}", [i1, i2], [ lp.GlobalArg("a", dtype=np.float32, shape=()), lp.ValueArg("n", dtype=np.int32), lp.TemporaryVariable("phi", dtype=np.float32, shape=("n", )), ], target=lp.CTarget(), ) k = lp.preprocess_kernel(k) assert 'i' not in k.insn_inames("insn_0_j_update") print(k.stringify(with_dependencies=True))
def test_nan_support(ctx_factory): from loopy.symbolic import parse ctx = ctx_factory() knl = lp.make_kernel("{:}", [ lp.Assignment(parse("a"), np.nan), lp.Assignment(parse("b"), parse("isnan(a)")), lp.Assignment(parse("c"), parse("isnan(3.14)")) ], seq_dependencies=True) knl = lp.set_options(knl, "return_dict") evt, out_dict = knl(cl.CommandQueue(ctx)) assert np.isnan(out_dict["a"].get()) assert out_dict["b"] == 1 assert out_dict["c"] == 0
def to_loopy_insns(assignments, vector_names=set(), pymbolic_expr_maps=[], complex_dtype=None, retain_names=set()): logger.info("loopy instruction generation: start") assignments = list(assignments) # convert from sympy sympy_conv = SympyToPymbolicMapper() assignments = [(name, sympy_conv(expr)) for name, expr in assignments] assignments = kill_trivial_assignments(assignments, retain_names) bdr = BesselDerivativeReplacer() assignments = [(name, bdr(expr)) for name, expr in assignments] btog = BesselTopOrderGatherer() for name, expr in assignments: btog(expr) #from pymbolic.mapper.cse_tagger import CSEWalkMapper, CSETagMapper #cse_walk = CSEWalkMapper() #for name, expr in assignments: # cse_walk(expr) #cse_tag = CSETagMapper(cse_walk) # do the rest of the conversion bessel_sub = BesselSubstitutor(BesselGetter(btog.bessel_j_arg_to_top_order)) vcr = VectorComponentRewriter(vector_names) pwr = PowerRewriter() ssg = SumSignGrouper() fck = FractionKiller() bik = BigIntegerKiller() cmr = ComplexRewriter() def convert_expr(name, expr): logger.debug("generate expression for: %s" % name) expr = bdr(expr) expr = bessel_sub(expr) expr = vcr(expr) expr = pwr(expr) expr = fck(expr) expr = ssg(expr) expr = bik(expr) expr = cmr(expr) #expr = cse_tag(expr) for m in pymbolic_expr_maps: expr = m(expr) return expr import loopy as lp from pytools import MinRecursionLimit with MinRecursionLimit(3000): result = [ lp.Assignment(id=None, assignee=name, expression=convert_expr(name, expr), temp_var_type=lp.Optional(None)) for name, expr in assignments] logger.info("loopy instruction generation: done") return result
def knl(): knl = lp.make_kernel( "{[i]: 0<=i<n}", [ lp.Assignment(var("out")[i], p.If(var("crit")[i], sym_then, sym_else)) ]) return lp.split_iname(knl, "i", 128, outer_tag="g.0", inner_tag="l.0")
def test_uniquify_instruction_ids(): i1 = lp.Assignment("b", 1, id=None) i2 = lp.Assignment("b", 1, id=None) i3 = lp.Assignment("b", 1, id=lp.UniqueName("b")) i4 = lp.Assignment("b", 1, id=lp.UniqueName("b")) prog = lp.make_kernel("{[i]: i = 1}", [], name="lpy_knl") new_root_kernel = prog["lpy_knl"].copy(instructions=[i1, i2, i3, i4]) prog = prog.with_kernel(new_root_kernel) from loopy.transform.instruction import uniquify_instruction_ids prog = uniquify_instruction_ids(prog) insn_ids = {insn.id for insn in prog["lpy_knl"].instructions} assert len(insn_ids) == 4 assert all(isinstance(id, str) for id in insn_ids)
def get_kernel_scaling_assignment(self): from sumpy.symbolic import SympyToPymbolicMapper sympy_conv = SympyToPymbolicMapper() return [lp.Assignment(id=None, assignee="kernel_scaling", expression=sympy_conv( self.expansion.kernel.get_global_scaling_const()), temp_var_type=lp.Optional(None))]
def get_kernel(self, **kwargs): extra_kernel_kwarg_types = () if "extra_kernel_kwarg_types" in kwargs: extra_kernel_kwarg_types = kwargs["extra_kernel_kwarg_types"] eval_inames = frozenset(["itgt"]) scalar_assignment = lp.Assignment( id=None, assignee="expr_val", expression=self.get_normalised_expr(), temp_var_type=None, ) eval_insns = [ insn.copy(within_inames=insn.within_inames | eval_inames) for insn in [scalar_assignment] ] loopy_knl = lp.make_kernel( # NOQA "{ [itgt]: 0<=itgt<n_targets }", [ """ for itgt VAR_ASSIGNMENT end """.replace("VAR_ASSIGNMENT", self.get_variable_assignment_code()) ] + eval_insns + [ """ for itgt result[itgt] = expr_val end """ ], [ lp.ValueArg("dim, n_targets", np.int32), lp.GlobalArg("target_points", np.float64, "dim, n_targets"), lp.TemporaryVariable("expr_val", None, ()), ] + list(extra_kernel_kwarg_types) + [ "...", ], name="eval_expr", lang_version=(2018, 2), ) loopy_knl = lp.fix_parameters(loopy_knl, dim=self.dim) loopy_knl = lp.set_options(loopy_knl, write_cl=False) loopy_knl = lp.set_options(loopy_knl, return_dict=True) if self.function_manglers is not None: loopy_knl = lp.register_function_manglers(loopy_knl, self.function_manglers) if self.preamble_generators is not None: loopy_knl = lp.register_preamble_generators( loopy_knl, self.preamble_generators) return loopy_knl
def test_multi_arg_array_call(ctx_factory): ctx = ctx_factory() queue = cl.CommandQueue(ctx) import pymbolic.primitives as p n = 10 acc_i = p.Variable("acc_i") i = p.Variable("i") index = p.Variable("index") a_i = p.Subscript(p.Variable("a"), p.Variable("i")) argmin_kernel = lp.make_function("{[i]: 0 <= i < n}", [ lp.Assignment(id="init2", assignee=index, expression=0), lp.Assignment(id="init1", assignee=acc_i, expression="214748367"), lp.Assignment(id="insn", assignee=index, expression=p.If(p.Expression.eq(acc_i, a_i), i, index), depends_on="update"), lp.Assignment(id="update", assignee=acc_i, expression=p.Variable("min")(acc_i, a_i), depends_on="init1,init2") ], [ lp.GlobalArg("a"), lp.GlobalArg( "acc_i, index", is_input=False, is_output=True, shape=lp.auto), ... ], name="custom_argmin") argmin_kernel = lp.fix_parameters(argmin_kernel, n=n) knl = lp.make_kernel( "{[i]:0<=i<n}", """ []: min_val[()], []: min_index[()] = custom_argmin([i]:b[i]) """) knl = lp.fix_parameters(knl, n=n) knl = lp.set_options(knl, return_dict=True) knl = lp.merge([knl, argmin_kernel]) b = np.random.randn(n) evt, out_dict = knl(queue, b=b) tol = 1e-15 from numpy.linalg import norm assert (norm(out_dict["min_val"] - np.min(b)) < tol) assert (norm(out_dict["min_index"] - np.argmin(b)) < tol)
def cumsum(self, arg): """ Registers a substitution rule in order to cumulatively sum the elements of array ``arg`` along ``axis``. Mimics :func:`numpy.cumsum`. :return: An instance of :class:`numloopy.ArraySymbol` which is which is registered as the cumulative summed-substitution rule. """ # Note: this can remain as a substitution but loopy does not have # support for translating inames for substitutions to the kernel # domains assert len(arg.shape) == 1 i_iname = self.name_generator(based_on="i") j_iname = self.name_generator(based_on="i") space = isl.Space.create_from_names(isl.DEFAULT_CONTEXT, [i_iname, j_iname]) domain = isl.BasicSet.universe(space) arg_name = self.name_generator(based_on="arr") subst_name = self.name_generator(based_on="subst") domain = domain & make_slab(space, i_iname, 0, arg.shape[0]) domain = domain.add_constraint( isl.Constraint.ineq_from_names(space, {j_iname: 1})) domain = domain.add_constraint( isl.Constraint.ineq_from_names(space, { j_iname: -1, i_iname: 1, 1: -1 })) cumsummed_arg = ArraySymbol(stack=self, name=arg_name, shape=arg.shape, dtype=arg.dtype) cumsummed_subst = ArraySymbol(stack=self, name=subst_name, shape=arg.shape, dtype=arg.dtype) subst_iname = self.name_generator(based_on="i") rule = lp.SubstitutionRule( subst_name, (subst_iname, ), Subscript(Variable(arg_name), (Variable(subst_iname), ))) from loopy.library.reduction import SumReductionOperation insn = lp.Assignment(assignee=Subscript(Variable(arg_name), (Variable(i_iname), )), expression=lp.Reduction( SumReductionOperation(), (j_iname, ), parse('{}({})'.format(arg.name, j_iname)))) self.data.append(cumsummed_arg) self.substs_to_arrays[subst_name] = arg_name self.register_implicit_assignment(insn) self.domains.append(domain) self.register_substitution(rule) return cumsummed_subst
def test_child_invalid_type_cast(): from pymbolic import var knl = lp.make_kernel("{[i]: 0<=i<n}", [ "<> ctr = make_uint2(0, 0)", lp.Assignment("a[i]", lp.TypeCast(np.int64, var("ctr")) << var("i")) ]) with pytest.raises(lp.LoopyError): knl = lp.preprocess_kernel(knl)
def initialise_terminals(self, var2terminal, coefficients): """ Initilisation of the variables in which coefficients and the Tensors coming from TSFC are saved. :arg var2terminal: dictionary that maps Slate Tensors to gem Variables """ tensor2temp = OrderedDict() inits = [] for gem_tensor, slate_tensor in var2terminal.items(): loopy_tensor = loopy.TemporaryVariable(gem_tensor.name, shape=gem_tensor.shape, address_space=loopy.AddressSpace.LOCAL) tensor2temp[slate_tensor] = loopy_tensor if isinstance(slate_tensor, slate.Tensor): indices = self.bag.index_creator(self.shape(slate_tensor)) inames = {var.name for var in indices} var = pym.Subscript(pym.Variable(loopy_tensor.name), indices) inits.append(loopy.Assignment(var, "0.", id="init%d" % len(inits), within_inames=frozenset(inames))) elif isinstance(slate_tensor, slate.AssembledVector): f = slate_tensor._function coeff = coefficients[f] offset = 0 ismixed = (type(f.ufl_element()) == MixedElement) names = [name for (name, ext) in coeff.values()] if ismixed else coeff[0] # Mixed coefficients come as seperate parameter (one per space) for i, shp in enumerate(*slate_tensor.shapes.values()): indices = self.bag.index_creator((shp,)) inames = {var.name for var in indices} offset_index = (pym.Sum((offset, indices[0])),) name = names[i] if ismixed else names var = pym.Subscript(pym.Variable(loopy_tensor.name), offset_index) c = pym.Subscript(pym.Variable(name), indices) inits.append(loopy.Assignment(var, c, id="init%d" % len(inits), within_inames=frozenset(inames))) offset += shp return inits, tensor2temp
def test_math_function(target, tp): # Test correct maths functions are generated for C and OpenCL # backend instead for different data type data_type = {"f32": np.float32, "f64": np.float64}[tp] import pymbolic.primitives as p i = p.Variable("i") xi = p.Subscript(p.Variable("x"), i) yi = p.Subscript(p.Variable("y"), i) zi = p.Subscript(p.Variable("z"), i) n = 100 domain = "{[i]: 0<=i<%d}" % n data = [ lp.GlobalArg("x", data_type, shape=(n, )), lp.GlobalArg("y", data_type, shape=(n, )), lp.GlobalArg("z", data_type, shape=(n, )) ] inst = [lp.Assignment(xi, p.Variable("min")(yi, zi))] knl = lp.make_kernel(domain, inst, data, target=target()) code = lp.generate_code_v2(knl).device_code() assert "fmin" in code if tp == "f32" and target == CTarget: assert "fminf" in code else: assert "fminf" not in code inst = [lp.Assignment(xi, p.Variable("max")(yi, zi))] knl = lp.make_kernel(domain, inst, data, target=target()) code = lp.generate_code_v2(knl).device_code() assert "fmax" in code if tp == "f32" and target == CTarget: assert "fmaxf" in code else: assert "fmaxf" not in code
def test_np_bool_handling(ctx_factory): import pymbolic.primitives as p from loopy.symbolic import parse ctx = ctx_factory() queue = cl.CommandQueue(ctx) knl = lp.make_kernel( "{:}", [lp.Assignment(parse("y"), p.LogicalNot(np.bool_(False)))], [lp.GlobalArg("y", dtype=np.bool_, shape=lp.auto)]) evt, (out, ) = knl(queue) assert out.get().item() is True
def get_kernel_scaling_assignments(self): from sumpy.symbolic import SympyToPymbolicMapper sympy_conv = SympyToPymbolicMapper() import loopy as lp return [ lp.Assignment(id=None, assignee="knl_%d_scaling" % i, expression=sympy_conv(kernel.get_global_scaling_const()), temp_var_type=lp.Optional(dtype)) for i, (kernel, dtype) in enumerate( zip(self.kernels, self.value_dtypes))]
def loopy_inst_aug_assign(expr, context): lhs, rhs = [loopy_instructions(o, context) for o in expr.ufl_operands] import operator op = { IAdd: operator.add, ISub: operator.sub, IMul: operator.mul, IDiv: operator.truediv }[type(expr)] return loopy.Assignment(lhs, op(lhs, rhs), within_inames=context.within_inames)
def knl(): knl = lp.make_kernel("{[i]: 0<=i<n}", [ lp.Assignment( Variable("out")[i], Variable(func_name)(Variable("a")[i])) ], default_offset=lp.auto) return lp.split_iname(knl, "i", 128, outer_tag="g.0", inner_tag="l.0")
def statement_assign(expr, context): lvalue, _ = expr.children if isinstance(lvalue, Indexed): context.index_ordering.append(tuple(i.name for i in lvalue.index_ordering())) lvalue, rvalue = tuple(expression(c, context.parameters) for c in expr.children) within_inames = context.within_inames[expr] id, depends_on = context.instruction_dependencies[expr] predicates = frozenset(context.conditions) return loopy.Assignment(lvalue, rvalue, within_inames=within_inames, predicates=predicates, id=id, depends_on=depends_on, depends_on_is_final=True)
def get_kernel_exprs(self, result_names): isrc_sym = var("isrc") exprs = [ var(name) * self.get_strength_or_not(isrc_sym, i) for i, name in enumerate(result_names) ] return [ lp.Assignment(id=None, assignee="pair_result_%d" % i, expression=expr, temp_var_type=lp.Optional(None)) for i, expr in enumerate(exprs) ]
def build_ass(): # A_T[i,j] = sum(k, A0[i,j,k] * G_T[k]); # Get variable symbols for all required variables i, j, k = inames["i"], inames["j"], inames["k"] A_T, A0, G_T = args["A_T"], args["A0"], args["G_T"] # The target of the assignment target = pb.Subscript(A_T, (i, j)) # The rhs expression: Frobenius inner product <A0[i,j],G_T> reduce_op = lp.library.reduction.SumReductionOperation() reduce_expr = pb.Subscript(A0, (i, j, k)) * pb.Subscript(G_T, (k)) expr = lp.Reduction(reduce_op, k, reduce_expr) return lp.Assignment(target, expr)
def build_ass(): """ A[i,j] = c*sum(k, B[k,i]*B[k,j]) """ # The target of the assignment target = pb.Subscript(args["A"], (inames["i"], inames["j"])) # The rhs expression: A reduce operation of the matrix columns # Maybe replace with manual increment? reduce_op = lp.library.reduction.SumReductionOperation() reduce_expr = pb.Subscript(args["B"], (inames["k"], inames["i"])) * pb.Subscript( args["B"], (inames["k"], inames["j"])) expr = args["c"] * lp.Reduction(reduce_op, inames["k"], reduce_expr) return lp.Assignment(target, expr)
def test_int_max_min_c_target(ctx_factory, which): from numpy.random import default_rng from pymbolic import parse rng = default_rng() n = 100 arr1 = rng.integers(-100, 100, n) arr2 = rng.integers(-100, 100, n) np_func = getattr(np, f"{which}imum") knl = lp.make_kernel( "{[i]: 0<=i<100}", [lp.Assignment(parse("out[i]"), parse(f"{which}(arr1[i], arr2[i])"))], target=lp.ExecutableCTarget()) _, (out, ) = knl(arr1=arr1, arr2=arr2) np.testing.assert_allclose(np_func(arr1, arr2), out)
def test_fuzz_code_generator(ctx_factory): ctx = ctx_factory() queue = cl.CommandQueue(ctx) if ctx.devices[0].platform.vendor.startswith("Advanced Micro"): pytest.skip("crashes on AMD 15.12") #from expr_fuzz import get_fuzz_examples #for expr, var_values in get_fuzz_examples(): for expr, var_values in generate_random_fuzz_examples(50): from pymbolic import evaluate try: true_value = evaluate(expr, var_values) except ZeroDivisionError: continue def get_dtype(x): if isinstance(x, (complex, np.complexfloating)): return np.complex128 else: return np.float64 knl = lp.make_kernel("{ : }", [lp.Assignment("value", expr)], [lp.GlobalArg("value", np.complex128, shape=())] + [ lp.ValueArg(name, get_dtype(val)) for name, val in six.iteritems(var_values) ]) ck = lp.CompiledKernel(ctx, knl) evt, (lp_value, ) = ck(queue, out_host=True, **var_values) err = abs(true_value - lp_value) / abs(true_value) if abs(err) > 1e-10: print(80 * "-") print("WRONG: rel error=%g" % err) print("true=%r" % true_value) print("loopy=%r" % lp_value) print(80 * "-") print(ck.get_code()) print(80 * "-") print(var_values) print(80 * "-") print(repr(expr)) print(80 * "-") print(expr) print(80 * "-") 1 / 0
def test_sized_integer_c_codegen(ctx_factory): ctx = ctx_factory() queue = cl.CommandQueue(ctx) from pymbolic import var knl = lp.make_kernel( "{[i]: 0<=i<n}", [lp.Assignment("a[i]", lp.TypeCast(np.int64, 1) << var("i"))]) knl = lp.set_options(knl, write_code=True) n = 40 evt, (a, ) = knl(queue, n=n) a_ref = 1 << np.arange(n, dtype=np.int64) assert np.array_equal(a_ref, a.get())