def prepare_loopy_kernel(self, loopy_knl): from sumpy.codegen import (bessel_preamble_generator, bessel_mangler) loopy_knl = lp.register_function_manglers(loopy_knl, [bessel_mangler]) loopy_knl = lp.register_preamble_generators( loopy_knl, [bessel_preamble_generator]) return loopy_knl
def prepare_loopy_kernel(self, loopy_knl): from sumpy.codegen import (bessel_preamble_generator, bessel_mangler) loopy_knl = lp.register_function_manglers(loopy_knl, [bessel_mangler]) loopy_knl = lp.register_preamble_generators(loopy_knl, [bessel_preamble_generator]) return loopy_knl
def get_kernel(self, **kwargs): extra_kernel_kwarg_types = () if "extra_kernel_kwarg_types" in kwargs: extra_kernel_kwarg_types = kwargs["extra_kernel_kwarg_types"] eval_inames = frozenset(["itgt"]) scalar_assignment = lp.Assignment( id=None, assignee="expr_val", expression=self.get_normalised_expr(), temp_var_type=None, ) eval_insns = [ insn.copy(within_inames=insn.within_inames | eval_inames) for insn in [scalar_assignment] ] loopy_knl = lp.make_kernel( # NOQA "{ [itgt]: 0<=itgt<n_targets }", [ """ for itgt VAR_ASSIGNMENT end """.replace("VAR_ASSIGNMENT", self.get_variable_assignment_code()) ] + eval_insns + [ """ for itgt result[itgt] = expr_val end """ ], [ lp.ValueArg("dim, n_targets", np.int32), lp.GlobalArg("target_points", np.float64, "dim, n_targets"), lp.TemporaryVariable("expr_val", None, ()), ] + list(extra_kernel_kwarg_types) + [ "...", ], name="eval_expr", lang_version=(2018, 2), ) loopy_knl = lp.fix_parameters(loopy_knl, dim=self.dim) loopy_knl = lp.set_options(loopy_knl, write_cl=False) loopy_knl = lp.set_options(loopy_knl, return_dict=True) if self.function_manglers is not None: loopy_knl = lp.register_function_manglers(loopy_knl, self.function_manglers) if self.preamble_generators is not None: loopy_knl = lp.register_preamble_generators( loopy_knl, self.preamble_generators) return loopy_knl
def test_call_with_no_returned_value(ctx_factory): import pymbolic.primitives as p ctx = ctx_factory() queue = cl.CommandQueue(ctx) knl = lp.make_kernel("{:}", [lp.CallInstruction((), p.Call(p.Variable("f"), ()))]) from library_for_test import no_ret_f_mangler, no_ret_f_preamble_gen knl = lp.register_function_manglers(knl, [no_ret_f_mangler]) knl = lp.register_preamble_generators(knl, [no_ret_f_preamble_gen]) evt, _ = knl(queue)
def test_symbol_mangler_in_call(ctx_factory): from library_for_test import (symbol_x, preamble_for_x) ctx = cl.create_some_context() cq = cl.CommandQueue(ctx) knl = lp.make_kernel("{:}", """ y = sin(X) """, [lp.GlobalArg("y", shape=lp.auto)]) knl = lp.register_symbol_manglers(knl, [symbol_x]) knl = lp.register_preamble_generators(knl, [preamble_for_x]) evt, (out, ) = knl(cq) np.testing.assert_allclose(out.get(), np.sin(10))
def __call__(self, queue, target_points, **kwargs): """ :arg target_points :arg extra_kernel_kwargs """ # handle target_points given as an obj_array of coords if (isinstance(target_points, np.ndarray) and target_points.dtype == np.object and isinstance(target_points[0], cl.array.Array)): target_points = cl.array.concatenate(target_points).reshape( [self.dim, -1]) assert target_points.shape[0] == self.dim n_tgt_points = target_points[0].shape[0] for tgt_d in target_points: assert len(tgt_d) == n_tgt_points extra_kernel_kwargs = {} if "extra_kernel_kwargs" in kwargs: extra_kernel_kwargs = kwargs["extra_kernel_kwargs"] knl = self.get_cached_optimized_kernel() # FIXME: caching loses function mangler information if self.function_manglers is not None: knl = lp.register_function_manglers(knl, self.function_manglers) if self.preamble_generators is not None: knl = lp.register_preamble_generators(knl, self.preamble_generators) evt, res = knl(queue, target_points=target_points, n_targets=n_tgt_points, result=np.zeros(n_tgt_points, dtype=self.dtype), **extra_kernel_kwargs) return res["result"]
def generate(builder, wrapper_name=None): if builder.layer_index is not None: outer_inames = frozenset( [builder._loop_index.name, builder.layer_index.name]) else: outer_inames = frozenset([builder._loop_index.name]) instructions = list(builder.emit_instructions()) parameters = Bag() parameters.domains = OrderedDict() parameters.assumptions = OrderedDict() parameters.wrapper_arguments = builder.wrapper_args parameters.layer_start = builder.layer_extents[0].name parameters.layer_end = builder.layer_extents[1].name parameters.conditions = [] parameters.kernel_data = list(None for _ in parameters.wrapper_arguments) parameters.temporaries = OrderedDict() parameters.kernel_name = builder.kernel.name # replace Materialise mapper = Memoizer(replace_materialise) mapper.initialisers = [] instructions = list(mapper(i) for i in instructions) # merge indices merger = index_merger(instructions) instructions = list(merger(i) for i in instructions) initialiser = list(itertools.chain(*mapper.initialisers)) merger = index_merger(initialiser) initialiser = list(merger(i) for i in initialiser) instructions = instructions + initialiser mapper.initialisers = [ tuple(merger(i) for i in inits) for inits in mapper.initialisers ] # rename indices and nodes (so that the counters start from zero) pattern = re.compile(r"^([a-zA-Z_]+)([0-9]+)(_offset)?$") replacements = {} counter = defaultdict(itertools.count) for node in traversal(instructions): if isinstance(node, (Index, RuntimeIndex, Variable, Argument, NamedLiteral)): match = pattern.match(node.name) if match is None: continue prefix, _, postfix = match.groups() if postfix is None: postfix = "" replacements[node] = "%s%d%s" % ( prefix, next(counter[(prefix, postfix)]), postfix) instructions = rename_nodes(instructions, replacements) mapper.initialisers = [ rename_nodes(inits, replacements) for inits in mapper.initialisers ] parameters.wrapper_arguments = rename_nodes(parameters.wrapper_arguments, replacements) s, e = rename_nodes([mapper(e) for e in builder.layer_extents], replacements) parameters.layer_start = s.name parameters.layer_end = e.name # scheduling and loop nesting deps = instruction_dependencies(instructions, mapper.initialisers) within_inames = loop_nesting(instructions, deps, outer_inames, parameters.kernel_name) # generate loopy context = Bag() context.parameters = parameters context.within_inames = within_inames context.conditions = [] context.index_ordering = [] context.instruction_dependencies = deps statements = list(statement(insn, context) for insn in instructions) # remote the dummy instructions (they were only used to ensure # that the kernel knows about the outer inames). statements = list(s for s in statements if not isinstance(s, DummyInstruction)) domains = list(parameters.domains.values()) if builder.single_cell: new_domains = [] for d in domains: if d.get_dim_name(isl.dim_type.set, 0) == builder._loop_index.name: # n = start new_domains.append( d.add_constraint( isl.Constraint.eq_from_names(d.space, { "n": 1, "start": -1 }))) else: new_domains.append(d) domains = new_domains if builder.extruded: new_domains = [] for d in domains: if d.get_dim_name(isl.dim_type.set, 0) == builder.layer_index.name: # layer = t1 - 1 t1 = parameters.layer_end new_domains.append( d.add_constraint( isl.Constraint.eq_from_names( d.space, { "layer": 1, t1: -1, 1: 1 }))) else: new_domains.append(d) domains = new_domains assumptions, = reduce( operator.and_, parameters.assumptions.values()).params().get_basic_sets() options = loopy.Options(check_dep_resolution=True, ignore_boostable_into=True) # sometimes masks are not used, but we still need to create the function arguments for i, arg in enumerate(parameters.wrapper_arguments): if parameters.kernel_data[i] is None: arg = loopy.GlobalArg(arg.name, dtype=arg.dtype, shape=arg.shape) parameters.kernel_data[i] = arg if wrapper_name is None: wrapper_name = "wrap_%s" % builder.kernel.name pwaffd = isl.affs_from_space(assumptions.get_space()) assumptions = assumptions & pwaffd["start"].ge_set(pwaffd[0]) if builder.single_cell: assumptions = assumptions & pwaffd["start"].lt_set(pwaffd["end"]) else: assumptions = assumptions & pwaffd["start"].le_set(pwaffd["end"]) if builder.extruded: assumptions = assumptions & pwaffd[parameters.layer_start].le_set( pwaffd[parameters.layer_end]) assumptions = reduce(operator.and_, assumptions.get_basic_sets()) wrapper = loopy.make_kernel(domains, statements, kernel_data=parameters.kernel_data, target=loopy.CTarget(), temporary_variables=parameters.temporaries, symbol_manglers=[symbol_mangler], options=options, assumptions=assumptions, lang_version=(2018, 2), name=wrapper_name) # prioritize loops for indices in context.index_ordering: wrapper = loopy.prioritize_loops(wrapper, indices) # register kernel kernel = builder.kernel headers = set(kernel._headers) headers = headers | set( ["#include <math.h>", "#include <complex.h>", "#include <petsc.h>"]) preamble = "\n".join(sorted(headers)) from coffee.base import Node if isinstance(kernel._code, loopy.LoopKernel): knl = kernel._code wrapper = loopy.register_callable_kernel(wrapper, knl) from loopy.transform.callable import _match_caller_callee_argument_dimension_ wrapper = _match_caller_callee_argument_dimension_(wrapper, knl.name) wrapper = loopy.inline_callable_kernel(wrapper, knl.name) else: # kernel is a string, add it to preamble if isinstance(kernel._code, Node): code = kernel._code.gencode() else: code = kernel._code wrapper = loopy.register_function_id_to_in_knl_callable_mapper( wrapper, PyOP2KernelLookup(kernel.name, code, tuple(builder.argument_accesses))) preamble = preamble + "\n" + code wrapper = loopy.register_preamble_generators(wrapper, [_PreambleGen(preamble)]) # register petsc functions wrapper = loopy.register_function_id_to_in_knl_callable_mapper( wrapper, petsc_function_lookup) return wrapper
def map_insn_assign(self, insn): from grudge.symbolic.primitives import OperatorBinding if ( len(insn.exprs) == 1 and ( isinstance(insn.exprs[0], OperatorBinding) or is_external_call( insn.exprs[0], self.function_registry))): return insn # FIXME: These names and the size names could clash with user-given names. # Need better metadata tracking in loopy. iel = "iel" idof = "idof" temp_names = [ name for name, dnr in zip(insn.names, insn.do_not_return) if dnr] from pymbolic import var expr_mapper = ToLoopyExpressionMapper( self.dd_inference_mapper, temp_names, (var(iel), var(idof))) insns = [] import loopy as lp from pymbolic import var for name, expr, dnr in zip(insn.names, insn.exprs, insn.do_not_return): insns.append( lp.Assignment( expr_mapper(var(name)), expr_mapper(expr), temp_var_type=lp.Optional(None) if dnr else lp.Optional(), no_sync_with=frozenset([ ("*", "any"), ]), )) if not expr_mapper.non_scalar_vars: return insn knl = lp.make_kernel( "{[%(iel)s, %(idof)s]: " "0 <= %(iel)s < nelements and 0 <= %(idof)s < nunit_dofs}" % {"iel": iel, "idof": idof}, insns, name="grudge_assign_%d" % self.insn_count, # Single-insn kernels may have their no_sync_with resolve to an # empty set, that's OK. options=lp.Options( check_dep_resolution=False, return_dict=True, no_numpy=True, ) ) self.insn_count += 1 from pytools import single_valued governing_dd = single_valued( self.dd_inference_mapper(expr) for expr in insn.exprs) knl = lp.register_preamble_generators(knl, [bessel_preamble_generator]) knl = lp.register_function_manglers(knl, [bessel_function_mangler]) input_mappings = {} output_mappings = {} from grudge.symbolic.mappers import DependencyMapper dep_mapper = DependencyMapper(composite_leaves=False) for expr, name in expr_mapper.expr_to_name.items(): deps = dep_mapper(expr) assert len(deps) <= 1 if not deps: is_output = False else: dep, = deps is_output = dep.name in insn.names if is_output: tgt_dict = output_mappings else: tgt_dict = input_mappings tgt_dict[name] = expr return LoopyKernelInstruction( LoopyKernelDescriptor( loopy_kernel=knl, input_mappings=input_mappings, output_mappings=output_mappings, fixed_arguments={}, governing_dd=governing_dd) )
def map_insn_assign(self, insn): from grudge.symbolic.primitives import OperatorBinding if (len(insn.exprs) == 1 and (isinstance(insn.exprs[0], OperatorBinding) or is_external_call(insn.exprs[0], self.function_registry))): return insn iname = "grdg_i" size_name = "grdg_n" temp_names = [ name for name, dnr in zip(insn.names, insn.do_not_return) if dnr ] expr_mapper = ToLoopyExpressionMapper(self.dd_inference_mapper, temp_names, iname) insns = [] import loopy as lp from pymbolic import var for name, expr, dnr in zip(insn.names, insn.exprs, insn.do_not_return): insns.append( lp.Assignment( expr_mapper(var(name)), expr_mapper(expr), temp_var_type=lp.Optional(None) if dnr else lp.Optional(), no_sync_with=frozenset([ ("*", "any"), ]), )) if not expr_mapper.non_scalar_vars: return insn knl = lp.make_kernel( "{[%s]: 0 <= %s < %s}" % (iname, iname, size_name), insns, default_offset=lp.auto, name="grudge_assign_%d" % self.insn_count, # Single-insn kernels may have their no_sync_with resolve to an # empty set, that's OK. options=lp.Options(check_dep_resolution=False)) knl = lp.set_options(knl, return_dict=True) knl = lp.split_iname(knl, iname, 128, outer_tag="g.0", inner_tag="l.0") self.insn_count += 1 from pytools import single_valued governing_dd = single_valued( self.dd_inference_mapper(expr) for expr in insn.exprs) knl = lp.register_preamble_generators(knl, [bessel_preamble_generator]) knl = lp.register_function_manglers(knl, [bessel_function_mangler]) input_mappings = {} output_mappings = {} from grudge.symbolic.mappers import DependencyMapper dep_mapper = DependencyMapper(composite_leaves=False) for expr, name in six.iteritems(expr_mapper.expr_to_name): deps = dep_mapper(expr) assert len(deps) <= 1 if not deps: is_output = False else: dep, = deps is_output = dep.name in insn.names if is_output: tgt_dict = output_mappings else: tgt_dict = input_mappings tgt_dict[name] = expr return LoopyKernelInstruction( LoopyKernelDescriptor(loopy_kernel=knl, input_mappings=input_mappings, output_mappings=output_mappings, fixed_arguments={}, governing_dd=governing_dd))
def generate(builder, wrapper_name=None): if builder.layer_index is not None: outer_inames = frozenset([builder._loop_index.name, builder.layer_index.name]) else: outer_inames = frozenset([builder._loop_index.name]) instructions = list(builder.emit_instructions()) parameters = Bag() parameters.domains = OrderedDict() parameters.assumptions = OrderedDict() parameters.wrapper_arguments = builder.wrapper_args parameters.layer_start = builder.layer_extents[0].name parameters.layer_end = builder.layer_extents[1].name parameters.conditions = [] parameters.kernel_data = list(None for _ in parameters.wrapper_arguments) parameters.temporaries = OrderedDict() parameters.kernel_name = builder.kernel.name # replace Materialise mapper = Memoizer(replace_materialise) mapper.initialisers = [] instructions = list(mapper(i) for i in instructions) # merge indices merger = index_merger(instructions) instructions = list(merger(i) for i in instructions) initialiser = list(itertools.chain(*mapper.initialisers)) merger = index_merger(initialiser) initialiser = list(merger(i) for i in initialiser) instructions = instructions + initialiser mapper.initialisers = [tuple(merger(i) for i in inits) for inits in mapper.initialisers] # rename indices and nodes (so that the counters start from zero) pattern = re.compile(r"^([a-zA-Z_]+)([0-9]+)(_offset)?$") replacements = {} counter = defaultdict(itertools.count) for node in traversal(instructions): if isinstance(node, (Index, RuntimeIndex, Variable, Argument, NamedLiteral)): match = pattern.match(node.name) if match is None: continue prefix, _, postfix = match.groups() if postfix is None: postfix = "" replacements[node] = "%s%d%s" % (prefix, next(counter[(prefix, postfix)]), postfix) instructions = rename_nodes(instructions, replacements) mapper.initialisers = [rename_nodes(inits, replacements) for inits in mapper.initialisers] parameters.wrapper_arguments = rename_nodes(parameters.wrapper_arguments, replacements) s, e = rename_nodes([mapper(e) for e in builder.layer_extents], replacements) parameters.layer_start = s.name parameters.layer_end = e.name # scheduling and loop nesting deps = instruction_dependencies(instructions, mapper.initialisers) within_inames = loop_nesting(instructions, deps, outer_inames, parameters.kernel_name) # generate loopy context = Bag() context.parameters = parameters context.within_inames = within_inames context.conditions = [] context.index_ordering = [] context.instruction_dependencies = deps statements = list(statement(insn, context) for insn in instructions) # remote the dummy instructions (they were only used to ensure # that the kernel knows about the outer inames). statements = list(s for s in statements if not isinstance(s, DummyInstruction)) domains = list(parameters.domains.values()) if builder.single_cell: new_domains = [] for d in domains: if d.get_dim_name(isl.dim_type.set, 0) == builder._loop_index.name: # n = start new_domains.append(d.add_constraint(isl.Constraint.eq_from_names(d.space, {"n": 1, "start": -1}))) else: new_domains.append(d) domains = new_domains if builder.extruded: new_domains = [] for d in domains: if d.get_dim_name(isl.dim_type.set, 0) == builder.layer_index.name: # layer = t1 - 1 t1 = parameters.layer_end new_domains.append(d.add_constraint(isl.Constraint.eq_from_names(d.space, {"layer": 1, t1: -1, 1: 1}))) else: new_domains.append(d) domains = new_domains assumptions, = reduce(operator.and_, parameters.assumptions.values()).params().get_basic_sets() options = loopy.Options(check_dep_resolution=True, ignore_boostable_into=True) # sometimes masks are not used, but we still need to create the function arguments for i, arg in enumerate(parameters.wrapper_arguments): if parameters.kernel_data[i] is None: arg = loopy.GlobalArg(arg.name, dtype=arg.dtype, shape=arg.shape) parameters.kernel_data[i] = arg if wrapper_name is None: wrapper_name = "wrap_%s" % builder.kernel.name pwaffd = isl.affs_from_space(assumptions.get_space()) assumptions = assumptions & pwaffd["start"].ge_set(pwaffd[0]) if builder.single_cell: assumptions = assumptions & pwaffd["start"].lt_set(pwaffd["end"]) else: assumptions = assumptions & pwaffd["start"].le_set(pwaffd["end"]) if builder.extruded: assumptions = assumptions & pwaffd[parameters.layer_start].le_set(pwaffd[parameters.layer_end]) assumptions = reduce(operator.and_, assumptions.get_basic_sets()) wrapper = loopy.make_kernel(domains, statements, kernel_data=parameters.kernel_data, target=loopy.CTarget(), temporary_variables=parameters.temporaries, symbol_manglers=[symbol_mangler], options=options, assumptions=assumptions, lang_version=(2018, 2), name=wrapper_name) # prioritize loops for indices in context.index_ordering: wrapper = loopy.prioritize_loops(wrapper, indices) # register kernel kernel = builder.kernel headers = set(kernel._headers) headers = headers | set(["#include <math.h>"]) preamble = "\n".join(sorted(headers)) from coffee.base import Node if isinstance(kernel._code, loopy.LoopKernel): knl = kernel._code wrapper = loopy.register_callable_kernel(wrapper, knl) from loopy.transform.callable import _match_caller_callee_argument_dimension_ wrapper = _match_caller_callee_argument_dimension_(wrapper, knl.name) wrapper = loopy.inline_callable_kernel(wrapper, knl.name) else: # kernel is a string, add it to preamble if isinstance(kernel._code, Node): code = kernel._code.gencode() else: code = kernel._code wrapper = loopy.register_function_id_to_in_knl_callable_mapper( wrapper, PyOP2KernelLookup(kernel.name, code, tuple(builder.argument_accesses))) preamble = preamble + "\n" + code wrapper = loopy.register_preamble_generators(wrapper, [_PreambleGen(preamble)]) # register petsc functions wrapper = loopy.register_function_id_to_in_knl_callable_mapper(wrapper, petsc_function_lookup) return wrapper