def prepare_loopy_kernel(self, loopy_knl): from sumpy.codegen import (bessel_preamble_generator, bessel_mangler) loopy_knl = lp.register_function_manglers(loopy_knl, [bessel_mangler]) loopy_knl = lp.register_preamble_generators( loopy_knl, [bessel_preamble_generator]) return loopy_knl
def prepare_loopy_kernel(self, loopy_knl): from sumpy.codegen import (bessel_preamble_generator, bessel_mangler) loopy_knl = lp.register_function_manglers(loopy_knl, [bessel_mangler]) loopy_knl = lp.register_preamble_generators(loopy_knl, [bessel_preamble_generator]) return loopy_knl
def get_kernel(self, **kwargs): extra_kernel_kwarg_types = () if "extra_kernel_kwarg_types" in kwargs: extra_kernel_kwarg_types = kwargs["extra_kernel_kwarg_types"] eval_inames = frozenset(["itgt"]) scalar_assignment = lp.Assignment( id=None, assignee="expr_val", expression=self.get_normalised_expr(), temp_var_type=None, ) eval_insns = [ insn.copy(within_inames=insn.within_inames | eval_inames) for insn in [scalar_assignment] ] loopy_knl = lp.make_kernel( # NOQA "{ [itgt]: 0<=itgt<n_targets }", [ """ for itgt VAR_ASSIGNMENT end """.replace("VAR_ASSIGNMENT", self.get_variable_assignment_code()) ] + eval_insns + [ """ for itgt result[itgt] = expr_val end """ ], [ lp.ValueArg("dim, n_targets", np.int32), lp.GlobalArg("target_points", np.float64, "dim, n_targets"), lp.TemporaryVariable("expr_val", None, ()), ] + list(extra_kernel_kwarg_types) + [ "...", ], name="eval_expr", lang_version=(2018, 2), ) loopy_knl = lp.fix_parameters(loopy_knl, dim=self.dim) loopy_knl = lp.set_options(loopy_knl, write_cl=False) loopy_knl = lp.set_options(loopy_knl, return_dict=True) if self.function_manglers is not None: loopy_knl = lp.register_function_manglers(loopy_knl, self.function_manglers) if self.preamble_generators is not None: loopy_knl = lp.register_preamble_generators( loopy_knl, self.preamble_generators) return loopy_knl
def test_call_with_no_returned_value(ctx_factory): import pymbolic.primitives as p ctx = ctx_factory() queue = cl.CommandQueue(ctx) knl = lp.make_kernel("{:}", [lp.CallInstruction((), p.Call(p.Variable("f"), ()))]) from library_for_test import no_ret_f_mangler, no_ret_f_preamble_gen knl = lp.register_function_manglers(knl, [no_ret_f_mangler]) knl = lp.register_preamble_generators(knl, [no_ret_f_preamble_gen]) evt, _ = knl(queue)
def __call__(self, queue, target_points, **kwargs): """ :arg target_points :arg extra_kernel_kwargs """ # handle target_points given as an obj_array of coords if (isinstance(target_points, np.ndarray) and target_points.dtype == np.object and isinstance(target_points[0], cl.array.Array)): target_points = cl.array.concatenate(target_points).reshape( [self.dim, -1]) assert target_points.shape[0] == self.dim n_tgt_points = target_points[0].shape[0] for tgt_d in target_points: assert len(tgt_d) == n_tgt_points extra_kernel_kwargs = {} if "extra_kernel_kwargs" in kwargs: extra_kernel_kwargs = kwargs["extra_kernel_kwargs"] knl = self.get_cached_optimized_kernel() # FIXME: caching loses function mangler information if self.function_manglers is not None: knl = lp.register_function_manglers(knl, self.function_manglers) if self.preamble_generators is not None: knl = lp.register_preamble_generators(knl, self.preamble_generators) evt, res = knl(queue, target_points=target_points, n_targets=n_tgt_points, result=np.zeros(n_tgt_points, dtype=self.dtype), **extra_kernel_kwargs) return res["result"]
def map_insn_assign(self, insn): from grudge.symbolic.primitives import OperatorBinding if ( len(insn.exprs) == 1 and ( isinstance(insn.exprs[0], OperatorBinding) or is_external_call( insn.exprs[0], self.function_registry))): return insn # FIXME: These names and the size names could clash with user-given names. # Need better metadata tracking in loopy. iel = "iel" idof = "idof" temp_names = [ name for name, dnr in zip(insn.names, insn.do_not_return) if dnr] from pymbolic import var expr_mapper = ToLoopyExpressionMapper( self.dd_inference_mapper, temp_names, (var(iel), var(idof))) insns = [] import loopy as lp from pymbolic import var for name, expr, dnr in zip(insn.names, insn.exprs, insn.do_not_return): insns.append( lp.Assignment( expr_mapper(var(name)), expr_mapper(expr), temp_var_type=lp.Optional(None) if dnr else lp.Optional(), no_sync_with=frozenset([ ("*", "any"), ]), )) if not expr_mapper.non_scalar_vars: return insn knl = lp.make_kernel( "{[%(iel)s, %(idof)s]: " "0 <= %(iel)s < nelements and 0 <= %(idof)s < nunit_dofs}" % {"iel": iel, "idof": idof}, insns, name="grudge_assign_%d" % self.insn_count, # Single-insn kernels may have their no_sync_with resolve to an # empty set, that's OK. options=lp.Options( check_dep_resolution=False, return_dict=True, no_numpy=True, ) ) self.insn_count += 1 from pytools import single_valued governing_dd = single_valued( self.dd_inference_mapper(expr) for expr in insn.exprs) knl = lp.register_preamble_generators(knl, [bessel_preamble_generator]) knl = lp.register_function_manglers(knl, [bessel_function_mangler]) input_mappings = {} output_mappings = {} from grudge.symbolic.mappers import DependencyMapper dep_mapper = DependencyMapper(composite_leaves=False) for expr, name in expr_mapper.expr_to_name.items(): deps = dep_mapper(expr) assert len(deps) <= 1 if not deps: is_output = False else: dep, = deps is_output = dep.name in insn.names if is_output: tgt_dict = output_mappings else: tgt_dict = input_mappings tgt_dict[name] = expr return LoopyKernelInstruction( LoopyKernelDescriptor( loopy_kernel=knl, input_mappings=input_mappings, output_mappings=output_mappings, fixed_arguments={}, governing_dd=governing_dd) )
def map_insn_assign(self, insn): from grudge.symbolic.primitives import OperatorBinding if (len(insn.exprs) == 1 and (isinstance(insn.exprs[0], OperatorBinding) or is_external_call(insn.exprs[0], self.function_registry))): return insn iname = "grdg_i" size_name = "grdg_n" temp_names = [ name for name, dnr in zip(insn.names, insn.do_not_return) if dnr ] expr_mapper = ToLoopyExpressionMapper(self.dd_inference_mapper, temp_names, iname) insns = [] import loopy as lp from pymbolic import var for name, expr, dnr in zip(insn.names, insn.exprs, insn.do_not_return): insns.append( lp.Assignment( expr_mapper(var(name)), expr_mapper(expr), temp_var_type=lp.Optional(None) if dnr else lp.Optional(), no_sync_with=frozenset([ ("*", "any"), ]), )) if not expr_mapper.non_scalar_vars: return insn knl = lp.make_kernel( "{[%s]: 0 <= %s < %s}" % (iname, iname, size_name), insns, default_offset=lp.auto, name="grudge_assign_%d" % self.insn_count, # Single-insn kernels may have their no_sync_with resolve to an # empty set, that's OK. options=lp.Options(check_dep_resolution=False)) knl = lp.set_options(knl, return_dict=True) knl = lp.split_iname(knl, iname, 128, outer_tag="g.0", inner_tag="l.0") self.insn_count += 1 from pytools import single_valued governing_dd = single_valued( self.dd_inference_mapper(expr) for expr in insn.exprs) knl = lp.register_preamble_generators(knl, [bessel_preamble_generator]) knl = lp.register_function_manglers(knl, [bessel_function_mangler]) input_mappings = {} output_mappings = {} from grudge.symbolic.mappers import DependencyMapper dep_mapper = DependencyMapper(composite_leaves=False) for expr, name in six.iteritems(expr_mapper.expr_to_name): deps = dep_mapper(expr) assert len(deps) <= 1 if not deps: is_output = False else: dep, = deps is_output = dep.name in insn.names if is_output: tgt_dict = output_mappings else: tgt_dict = input_mappings tgt_dict[name] = expr return LoopyKernelInstruction( LoopyKernelDescriptor(loopy_kernel=knl, input_mappings=input_mappings, output_mappings=output_mappings, fixed_arguments={}, governing_dd=governing_dd))