def map_substitution(self, name, tag, arguments, expn_state): if not ( name == self.subst_name and self.within( expn_state.kernel, expn_state.instruction, expn_state.stack) and (self.subst_tag is None or self.subst_tag == tag)): return super(RuleInvocationReplacer, self).map_substitution( name, tag, arguments, expn_state) # {{{ check if in footprint rule = self.rule_mapping_context.old_subst_rules[name] arg_context = self.make_new_arg_context( name, rule.arguments, arguments, expn_state.arg_context) args = [arg_context[arg_name] for arg_name in rule.arguments] accdesc = AccessDescriptor( storage_axis_exprs=storage_axis_exprs( self.storage_axis_sources, args)) if not self.array_base_map.is_access_descriptor_in_footprint(accdesc): return super(RuleInvocationReplacer, self).map_substitution( name, tag, arguments, expn_state) # }}} assert len(arguments) == len(rule.arguments) abm = self.array_base_map stor_subscript = [] for sax_name, sax_source, sax_base_idx in zip( self.storage_axis_names, self.storage_axis_sources, abm.storage_base_indices): if sax_name not in self.non1_storage_axis_names: continue if isinstance(sax_source, int): # an argument ax_index = arguments[sax_source] else: # an iname ax_index = var(sax_source) from loopy.isl_helpers import simplify_via_aff ax_index = simplify_via_aff(ax_index - sax_base_idx) stor_subscript.append(ax_index) new_outer_expr = var(self.temporary_name) if stor_subscript: new_outer_expr = new_outer_expr.index(tuple(stor_subscript)) # Can't possibly be nested, and no need to traverse # further as compute expression has already been seen # by rule_mapping_context. return new_outer_expr
def map_array_access(self, index, expn_state): accdesc = AccessDescriptor(identifier=None, storage_axis_exprs=index) if not self.array_base_map.is_access_descriptor_in_footprint(accdesc): return None abm = self.array_base_map index = expn_state.apply_arg_context(index) assert len(index) == len(abm.non1_storage_axis_flags) access_subscript = [] for i in range(len(index)): if not abm.non1_storage_axis_flags[i]: continue ax_index = index[i] from loopy.isl_helpers import simplify_via_aff ax_index = simplify_via_aff(ax_index - abm.storage_base_indices[i]) access_subscript.append(ax_index) result = self.buf_var if access_subscript: result = result.index(tuple(access_subscript)) # Can't possibly be nested, but recurse anyway to # make sure substitution rules referenced below here # do not get thrown away. self.rec(result, expn_state.copy(arg_context={})) return result
def map_substitution(self, name, tag, arguments, expn_state): if not (name == self.subst_name and self.within( expn_state.kernel, expn_state.instruction, expn_state.stack) and (self.subst_tag is None or self.subst_tag == tag)): return super(RuleInvocationReplacer, self).map_substitution(name, tag, arguments, expn_state) # {{{ check if in footprint rule = self.rule_mapping_context.old_subst_rules[name] arg_context = self.make_new_arg_context(name, rule.arguments, arguments, expn_state.arg_context) args = [arg_context[arg_name] for arg_name in rule.arguments] accdesc = AccessDescriptor(storage_axis_exprs=storage_axis_exprs( self.storage_axis_sources, args)) if not self.array_base_map.is_access_descriptor_in_footprint(accdesc): return super(RuleInvocationReplacer, self).map_substitution(name, tag, arguments, expn_state) # }}} assert len(arguments) == len(rule.arguments) abm = self.array_base_map stor_subscript = [] for sax_name, sax_source, sax_base_idx in zip( self.storage_axis_names, self.storage_axis_sources, abm.storage_base_indices): if sax_name not in self.non1_storage_axis_names: continue if isinstance(sax_source, int): # an argument ax_index = arguments[sax_source] else: # an iname ax_index = var(sax_source) from loopy.isl_helpers import simplify_via_aff ax_index = simplify_via_aff(ax_index - sax_base_idx) stor_subscript.append(ax_index) new_outer_expr = var(self.temporary_name) if stor_subscript: new_outer_expr = new_outer_expr.index(tuple(stor_subscript)) # Can't possibly be nested, and no need to traverse # further as compute expression has already been seen # by rule_mapping_context. self.replaced_something = True return new_outer_expr
def test_simplify_via_aff_reproducibility(): # See https://github.com/inducer/loopy/pull/349 from loopy.symbolic import parse from loopy.isl_helpers import simplify_via_aff expr = parse("i+i_0") assert simplify_via_aff(expr) == expr
def map_subscript(self, expr, expn_state): if expr.aggregate.name in self.callee_knl.arg_dict: from loopy.symbolic import get_start_subscript_from_sar from loopy.isl_helpers import simplify_via_aff from pymbolic.primitives import Subscript, Variable sar = self.callee_arg_to_call_param[ expr.aggregate.name] # SubArrayRef callee_arg = self.callee_knl.arg_dict[expr.aggregate.name] if sar.subscript.aggregate.name in self.caller_knl.arg_dict: caller_arg = self.caller_knl.arg_dict[ sar.subscript.aggregate.name] else: caller_arg = self.caller_knl.temporary_variables[ sar.subscript.aggregate.name] flatten_index = 0 for i, idx in enumerate( get_start_subscript_from_sar(sar, self.caller_knl).index_tuple): flatten_index += idx * caller_arg.dim_tags[i].stride flatten_index += sum(idx * tag.stride for idx, tag in zip( self.rec(expr.index_tuple, expn_state), callee_arg.dim_tags)) flatten_index = simplify_via_aff(flatten_index) new_indices = [] for dim_tag in caller_arg.dim_tags: ind = flatten_index // dim_tag.stride flatten_index -= (dim_tag.stride * ind) new_indices.append(ind) new_indices = tuple(simplify_via_aff(i) for i in new_indices) return Subscript(Variable(sar.subscript.aggregate.name), new_indices) else: return super().map_subscript(expr, expn_state)
def reshape(self, new_shape, order='C'): """ Registers a substitution rule to reshape array with the shape ``new_shape``. Mimics :func:`numpy.ndarray.reshape`. :arg new_shape: An instance of :class:`tuple` of :class:`int`. :arg order: Either 'C' or 'F' """ # need an error here complain if there is a shape mismatch # how to do this: # look at how loopy sets its dim tags, from shape and order. subst_name = self.stack.name_generator(based_on='subst') inames = tuple( self.stack.name_generator(based_on="i") for _ in new_shape) new_arg = self.copy(stack=self.stack, name=subst_name, shape=new_shape, dim_tags=None, order=order) linearized_idx = sum( Variable(iname) * dim_tag.stride for iname, dim_tag in zip(inames, new_arg.dim_tags)) strides = tuple(dim_tag.stride for dim_tag in self.dim_tags) if self.dim_tags[0].stride == 1: pass elif self.dim_tags[-1].stride == 1: strides = strides[::-1] else: raise ValueError() indices = [] for stride in strides[::-1]: current_idx = linearized_idx // stride indices.append(simplify_via_aff(current_idx)) linearized_idx -= current_idx * stride # should we assert that the linearized index is 0? rule = lp.SubstitutionRule(subst_name, inames, expression=Variable( self.name)(*tuple(indices))) self.stack.register_substitution(rule) return ArraySymbol(stack=self.stack, name=subst_name, shape=new_shape)
def pack_and_unpack_args_for_call_for_single_kernel(kernel, callables_table, call_name, args_to_pack=None, args_to_unpack=None): """ Returns a a copy of *kernel* with instructions appended to copy the arguments in *args* to match the alignment expected by the *call_name* in the kernel. The arguments are copied back to *args* with the appropriate data layout. :arg call_name: An instance of :class:`str` denoting the function call in the *kernel*. :arg args_to_unpack: A list of the arguments as instances of :class:`str` which must be packed. If set *None*, it is interpreted that all the array arguments would be packed. :arg args_to_unpack: A list of the arguments as instances of :class:`str` which must be unpacked. If set *None*, it is interpreted that all the array arguments should be unpacked. """ assert isinstance(kernel, LoopKernel) new_domains = [] new_tmps = kernel.temporary_variables.copy() old_insn_to_new_insns = {} for insn in kernel.instructions: if not isinstance(insn, CallInstruction): # pack and unpack call only be done for CallInstructions. continue if insn.expression.function.name not in callables_table: continue in_knl_callable = callables_table[insn.expression.function.name] if in_knl_callable.name != call_name: # not the function we're looking for. continue in_knl_callable = in_knl_callable.with_packing_for_args() vng = kernel.get_var_name_generator() ing = kernel.get_instruction_id_generator() parameters = insn.expression.parameters if args_to_pack is None: args_to_pack = [ par.subscript.aggregate.name for par in parameters + insn.assignees if isinstance(par, SubArrayRef) and (par.swept_inames) ] if args_to_unpack is None: args_to_unpack = [ par.subscript.aggregate.name for par in parameters + insn.assignees if isinstance(par, SubArrayRef) and (par.swept_inames) ] # {{{ sanity checks for args assert isinstance(args_to_pack, list) assert isinstance(args_to_unpack, list) for arg in args_to_pack: found_sub_array_ref = False for par in parameters + insn.assignees: # checking that the given args is a sub array ref if isinstance(par, SubArrayRef) and (par.subscript.aggregate.name == arg): found_sub_array_ref = True break if not found_sub_array_ref: raise LoopyError( "No match found for packing arg '%s' of call '%s' " "at insn '%s'." % (arg, call_name, insn.id)) for arg in args_to_unpack: if arg not in args_to_pack: raise LoopyError("Argument %s should be packed in order to be " "unpacked." % arg) # }}} packing_insns = [] unpacking_insns = [] # {{{ handling ilp tags from loopy.kernel.data import IlpBaseTag, VectorizeTag import islpy as isl from pymbolic import var dim_type = isl.dim_type.set ilp_inames = { iname for iname in insn.within_inames if all( isinstance(tag, (IlpBaseTag, VectorizeTag)) for tag in kernel.iname_to_tags.get(iname, [])) } new_ilp_inames = set() ilp_inames_map = {} for iname in ilp_inames: new_iname_name = vng(iname + "_ilp") ilp_inames_map[var(iname)] = var(new_iname_name) new_ilp_inames.add(new_iname_name) for iname in ilp_inames: new_domain = kernel.get_inames_domain(iname).copy() for i in range(new_domain.n_dim()): old_iname = new_domain.get_dim_name(dim_type, i) if old_iname in ilp_inames: new_domain = new_domain.set_dim_name( dim_type, i, ilp_inames_map[var(old_iname)].name) new_domains.append(new_domain) # }}} from pymbolic.mapper.substitutor import make_subst_func from loopy.symbolic import SubstitutionMapper # dict to store the new assignees and parameters, the mapping pattern # from arg_id to parameters is identical to InKernelCallable.arg_id_to_dtype id_to_parameters = tuple(enumerate(parameters)) + tuple( (-i - 1, assignee) for i, assignee in enumerate(insn.assignees)) new_id_to_parameters = {} for arg_id, p in id_to_parameters: if isinstance(p, SubArrayRef) and (p.subscript.aggregate.name in args_to_pack): new_pack_inames = ilp_inames_map.copy( ) # packing-specific inames new_unpack_inames = ilp_inames_map.copy( ) # unpacking-specific iname new_pack_inames = { iname: var(vng(iname.name + "_pack")) for iname in p.swept_inames } new_unpack_inames = { iname: var(vng(iname.name + "_unpack")) for iname in p.swept_inames } # Updating the domains corresponding to the new inames. for iname in p.swept_inames: new_domain_pack = kernel.get_inames_domain( iname.name).copy() new_domain_unpack = kernel.get_inames_domain( iname.name).copy() for i in range(new_domain_pack.n_dim()): old_iname = new_domain_pack.get_dim_name(dim_type, i) if var(old_iname) in new_pack_inames: new_domain_pack = new_domain_pack.set_dim_name( dim_type, i, new_pack_inames[var(old_iname)].name) new_domain_unpack = new_domain_unpack.set_dim_name( dim_type, i, new_unpack_inames[var(old_iname)].name) new_domains.append(new_domain_pack) new_domains.append(new_domain_unpack) arg = p.subscript.aggregate.name pack_name = vng(arg + "_pack") from loopy.kernel.data import (TemporaryVariable, temp_var_scope) if arg in kernel.arg_dict: arg_in_caller = kernel.arg_dict[arg] else: arg_in_caller = kernel.temporary_variables[arg] pack_tmp = TemporaryVariable( name=pack_name, dtype=arg_in_caller.dtype, dim_tags=in_knl_callable.arg_id_to_descr[arg_id].dim_tags, shape=in_knl_callable.arg_id_to_descr[arg_id].shape, scope=temp_var_scope.PRIVATE, ) new_tmps[pack_name] = pack_tmp from loopy import Assignment pack_subst_mapper = SubstitutionMapper( make_subst_func(new_pack_inames)) unpack_subst_mapper = SubstitutionMapper( make_subst_func(new_unpack_inames)) # {{{ getting the lhs for packing and rhs for unpacking from loopy.isl_helpers import simplify_via_aff, make_slab flatten_index = simplify_via_aff( sum(dim_tag.stride * idx for dim_tag, idx in zip( arg_in_caller.dim_tags, p.subscript.index_tuple))) new_indices = [] for dim_tag in in_knl_callable.arg_id_to_descr[ arg_id].dim_tags: ind = flatten_index // dim_tag.stride flatten_index -= (dim_tag.stride * ind) new_indices.append(ind) new_indices = tuple(simplify_via_aff(i) for i in new_indices) pack_lhs_assignee = pack_subst_mapper( var(pack_name).index(new_indices)) unpack_rhs = unpack_subst_mapper( var(pack_name).index(new_indices)) # }}} packing_insns.append( Assignment( assignee=pack_lhs_assignee, expression=pack_subst_mapper.map_subscript( p.subscript), within_inames=insn.within_inames - ilp_inames | {new_pack_inames[i].name for i in p.swept_inames} | (new_ilp_inames), depends_on=insn.depends_on, id=ing(insn.id + "_pack"), depends_on_is_final=True)) if p.subscript.aggregate.name in args_to_unpack: unpacking_insns.append( Assignment( expression=unpack_rhs, assignee=unpack_subst_mapper.map_subscript( p.subscript), within_inames=insn.within_inames - ilp_inames | { new_unpack_inames[i].name for i in p.swept_inames } | (new_ilp_inames), id=ing(insn.id + "_unpack"), depends_on=frozenset([insn.id]), depends_on_is_final=True)) # {{{ creating the sweep inames for the new sub array refs updated_swept_inames = [] for _ in in_knl_callable.arg_id_to_descr[arg_id].shape: updated_swept_inames.append(var(vng("i_packsweep_" + arg))) ctx = kernel.isl_context space = isl.Space.create_from_names( ctx, set=[iname.name for iname in updated_swept_inames]) iname_set = isl.BasicSet.universe(space) for iname, axis_length in zip( updated_swept_inames, in_knl_callable.arg_id_to_descr[arg_id].shape): iname_set = iname_set & make_slab(space, iname.name, 0, axis_length) new_domains = new_domains + [iname_set] # }}} new_id_to_parameters[arg_id] = SubArrayRef( tuple(updated_swept_inames), (var(pack_name).index(tuple(updated_swept_inames)))) else: new_id_to_parameters[arg_id] = p if packing_insns: subst_mapper = SubstitutionMapper(make_subst_func(ilp_inames_map)) new_call_insn = insn.with_transformed_expressions(subst_mapper) new_params = tuple( subst_mapper(new_id_to_parameters[i]) for i, _ in enumerate(parameters)) new_assignees = tuple( subst_mapper(new_id_to_parameters[-i - 1]) for i, _ in enumerate(insn.assignees)) new_call_insn = new_call_insn.copy( depends_on=new_call_insn.depends_on | {pack.id for pack in packing_insns}, within_inames=new_call_insn.within_inames - ilp_inames | (new_ilp_inames), expression=new_call_insn.expression.function(*new_params), assignees=new_assignees) old_insn_to_new_insns[insn.id] = (packing_insns + [new_call_insn] + unpacking_insns) if old_insn_to_new_insns: new_instructions = [] for insn in kernel.instructions: if insn.id in old_insn_to_new_insns: # Replacing the current instruction with the group of # instructions including the packing and unpacking instructions new_instructions.extend(old_insn_to_new_insns[insn.id]) else: # for the instructions that depend on the call instruction that # are to be packed and unpacked, we need to add the complete # instruction block as a dependency for them. new_depends_on = insn.depends_on if insn.depends_on & set(old_insn_to_new_insns): # need to add the unpack instructions on dependencies. for old_insn_id in insn.depends_on & set( old_insn_to_new_insns): new_depends_on |= frozenset( i.id for i in old_insn_to_new_insns[old_insn_id]) new_instructions.append(insn.copy(depends_on=new_depends_on)) kernel = kernel.copy(domains=kernel.domains + new_domains, instructions=new_instructions, temporary_variables=new_tmps) return kernel