def add_inames_to_insn(knl, inames, insn_match): """ :arg inames: a frozenset of inames that will be added to the instructions matched by *insn_match*, or a comma-separated string that parses to such a tuple. :arg insn_match: An instruction match as understood by :func:`loopy.match.parse_match`. :returns: an :class:`GroupIndexTag` or :class:`LocalIndexTag` that is not being used within the instructions matched by *insn_match*. .. versionadded:: 2016.3 """ if isinstance(inames, str): inames = frozenset(s.strip() for s in inames.split(",")) if not isinstance(inames, frozenset): raise TypeError("'inames' must be a frozenset") from loopy.match import parse_match match = parse_match(insn_match) new_instructions = [] for insn in knl.instructions: if match(knl, insn): new_instructions.append( insn.copy(within_inames=insn.within_inames | inames)) else: new_instructions.append(insn) return knl.copy(instructions=new_instructions)
def tag_instructions(kernel, new_tag, within=None): from loopy.match import parse_match within = parse_match(within) new_insns = [] for insn in kernel.instructions: if within(kernel, insn): new_insns.append(insn.copy(tags=insn.tags | frozenset([new_tag]))) else: new_insns.append(insn) return kernel.copy(instructions=new_insns)
def add_barrier(kernel, insn_before="", insn_after="", id_based_on=None, tags=None, synchronization_kind="global", mem_kind=None): """Takes in a kernel that needs to be added a barrier and returns a kernel which has a barrier inserted into it. It takes input of 2 instructions and then adds a barrier in between those 2 instructions. The expressions can be any inputs that are understood by :func:`loopy.match.parse_match`. :arg insn_before: String expression that specifies the instruction(s) before the barrier which is to be added :arg insn_after: String expression that specifies the instruction(s) after the barrier which is to be added :arg id: String on which the id of the barrier would be based on. :arg tags: The tag of the group to which the barrier must be added :arg synchronization_kind: Kind of barrier to be added. May be "global" or "local" :arg kind: Type of memory to be synchronied. May be "global" or "local". Ignored for "global" bariers. If not supplied, defaults to *synchronization_kind* """ if mem_kind is None: mem_kind = synchronization_kind if id_based_on is None: id = kernel.make_unique_instruction_id( based_on=synchronization_kind[0] + "_barrier") else: id = kernel.make_unique_instruction_id(based_on=id_based_on) match = parse_match(insn_before) insn_before_list = [ insn.id for insn in kernel.instructions if match(kernel, insn) ] barrier_to_add = BarrierInstruction( depends_on=frozenset(insn_before_list), depends_on_is_final=True, id=id, tags=tags, synchronization_kind=synchronization_kind, mem_kind=mem_kind) new_kernel = kernel.copy(instructions=kernel.instructions + [barrier_to_add]) new_kernel = add_dependency(kernel=new_kernel, insn_match=insn_after, depends_on="id:" + id) return new_kernel
def map_instructions(kernel, insn_match, f): from loopy.match import parse_match match = parse_match(insn_match) new_insns = [] for insn in kernel.instructions: if match(kernel, insn): new_insns.append(f(insn)) else: new_insns.append(insn) return kernel.copy(instructions=new_insns)
def tag_instructions(kernel, new_tag, within=None): from loopy.match import parse_match within = parse_match(within) new_insns = [] for insn in kernel.instructions: if within(kernel, insn): new_insns.append( insn.copy(tags=insn.tags | frozenset([new_tag]))) else: new_insns.append(insn) return kernel.copy(instructions=new_insns)
def tag_instructions(kernel, new_tag, within=None): from loopy.match import parse_match within = parse_match(within) from loopy.kernel.creation import _normalize_tags new_tags = _normalize_tags([new_tag]) new_insns = [] for insn in kernel.instructions: if within(kernel, insn): new_insns.append(insn.copy(tags=insn.tags | new_tags)) else: new_insns.append(insn) return kernel.copy(instructions=new_insns)
def add_barrier(knl, insn_before="", insn_after="", id_based_on=None, tags=None, synchronization_kind="global", mem_kind=None): """Takes in a kernel that needs to be added a barrier and returns a kernel which has a barrier inserted into it. It takes input of 2 instructions and then adds a barrier in between those 2 instructions. The expressions can be any inputs that are understood by :func:`loopy.match.parse_match`. :arg insn_before: String expression that specifies the instruction(s) before the barrier which is to be added :arg insn_after: String expression that specifies the instruction(s) after the barrier which is to be added :arg id: String on which the id of the barrier would be based on. :arg tags: The tag of the group to which the barrier must be added :arg synchronization_kind: Kind of barrier to be added. May be "global" or "local" :arg kind: Type of memory to be synchronied. May be "global" or "local". Ignored for "global" bariers. If not supplied, defaults to *synchronization_kind* """ if mem_kind is None: mem_kind = synchronization_kind if id_based_on is None: id = knl.make_unique_instruction_id( based_on=synchronization_kind[0]+"_barrier") else: id = knl.make_unique_instruction_id(based_on=id_based_on) match = parse_match(insn_before) insn_before_list = [insn.id for insn in knl.instructions if match(knl, insn)] barrier_to_add = BarrierInstruction(depends_on=frozenset(insn_before_list), depends_on_is_final=True, id=id, tags=tags, synchronization_kind=synchronization_kind, mem_kind=mem_kind) new_knl = knl.copy(instructions=knl.instructions + [barrier_to_add]) new_knl = add_dependency(kernel=new_knl, insn_match=insn_after, depends_on="id:"+id) return new_knl
def find_unused_axis_tag(kernel, kind, insn_match=None): """For one of the hardware-parallel execution tags, find an unused axis. :arg insn_match: An instruction match as understood by :func:`loopy.match.parse_match`. :arg kind: may be "l" or "g", or the corresponding tag class name :returns: an :class:`GroupIndexTag` or :class:`LocalIndexTag` that is not being used within the instructions matched by *insn_match*. """ used_axes = set() from looopy.kernel.data import GroupIndexTag, LocalIndexTag if isinstance(kind, str): found = False for cls in [GroupIndexTag, LocalIndexTag]: if kind == cls.print_name: kind = cls found = True break if not found: raise LoopyError("invlaid tag kind: %s" % kind) from loopy.match import parse_match match = parse_match(insn_match) insns = [insn for insn in kernel.instructions if match(kernel, insn)] for insn in insns: for iname in kernel.insn_inames(insn): dim_tag = kernel.iname_to_tag.get(iname) if isinstance(dim_tag, kind): used_axes.add(kind.axis) i = 0 while i in used_axes: i += 1 return kind(i)
def find_instructions(kernel, insn_match): from loopy.match import parse_match match = parse_match(insn_match) return [insn for insn in kernel.instructions if match(kernel, insn)]
def extract_subst(kernel, subst_name, template, parameters=(), within=None): """ :arg subst_name: The name of the substitution rule to be created. :arg template: Unification template expression. :arg parameters: An iterable of parameters used in *template*, or a comma-separated string of the same. :arg within: An instance of :class:`loopy.match.MatchExpressionBase` or :class:`str` as understood by :func:`loopy.match.parse_match`. All targeted subexpressions must match ('unify with') *template* The template may contain '*' wildcards that will have to match exactly across all unifications. """ if isinstance(kernel, TranslationUnit): kernel_names = [ i for i, clbl in kernel.callables_table.items() if isinstance(clbl, CallableKernel) ] if len(kernel_names) != 1: raise LoopyError() return kernel.with_kernel( extract_subst(kernel[kernel_names[0]], subst_name, template, parameters)) if isinstance(template, str): from pymbolic import parse template = parse(template) if isinstance(parameters, str): parameters = tuple(s.strip() for s in parameters.split(",")) from loopy.match import parse_match within = parse_match(within) var_name_gen = kernel.get_var_name_generator() # {{{ replace any wildcards in template with new variables def get_unique_var_name(): based_on = subst_name + "_wc" result = var_name_gen(based_on) return result from loopy.symbolic import WildcardToUniqueVariableMapper wc_map = WildcardToUniqueVariableMapper(get_unique_var_name) template = wc_map(template) # }}} # {{{ gather up expressions expr_descriptors = [] from loopy.symbolic import UnidirectionalUnifier unif = UnidirectionalUnifier(lhs_mapping_candidates=set(parameters)) def gather_exprs(expr, mapper): urecs = unif(template, expr) if urecs: if len(urecs) > 1: raise RuntimeError( "ambiguous unification of '%s' with template '%s'" % (expr, template)) urec, = urecs expr_descriptors.append( ExprDescriptor(insn=insn, expr=expr, unif_var_dict={ lhs.name: rhs for lhs, rhs in urec.equations })) else: mapper.fallback_mapper(expr) # can't nest, don't recurse from loopy.symbolic import (CallbackMapper, WalkMapper, IdentityMapper) dfmapper = CallbackMapper(gather_exprs, WalkMapper()) from loopy.kernel.instruction import MultiAssignmentBase for insn in kernel.instructions: if isinstance(insn, MultiAssignmentBase) and within(kernel, insn): dfmapper(insn.assignees) dfmapper(insn.expression) for sr in kernel.substitutions.values(): dfmapper(sr.expression) # }}} if not expr_descriptors: raise RuntimeError("no expressions matching '%s'" % template) # {{{ substitute rule into instructions def replace_exprs(expr, mapper): found = False for exprd in expr_descriptors: if expr is exprd.expr: found = True break if not found: return mapper.fallback_mapper(expr) args = [exprd.unif_var_dict[arg_name] for arg_name in parameters] result = var(subst_name) if args: result = result(*args) return result # can't nest, don't recurse cbmapper = CallbackMapper(replace_exprs, IdentityMapper()) new_insns = [] def transform_assignee(expr): # Assignment LHS's cannot be subst rules. Treat them # specially. import pymbolic.primitives as prim if isinstance(expr, tuple): return tuple(transform_assignee(expr_i) for expr_i in expr) elif isinstance(expr, prim.Subscript): return type(expr)(expr.aggregate, cbmapper(expr.index)) elif isinstance(expr, prim.Variable): return expr else: raise ValueError("assignment LHS not understood") for insn in kernel.instructions: if within(kernel, insn): new_insns.append( insn.with_transformed_expressions( cbmapper, assignee_f=transform_assignee)) else: new_insns.append(insn) from loopy.kernel.data import SubstitutionRule new_substs = { subst_name: SubstitutionRule( name=subst_name, arguments=tuple(parameters), expression=template, ) } for subst in kernel.substitutions.values(): new_substs[subst.name] = subst.copy( expression=cbmapper(subst.expression)) # }}} return kernel.copy(instructions=new_insns, substitutions=new_substs)
def find_instructions_in_single_kernel(kernel, insn_match): assert isinstance(kernel, LoopKernel) from loopy.match import parse_match match = parse_match(insn_match) return [insn for insn in kernel.instructions if match(kernel, insn)]
def remove_instructions(kernel, insn_ids): """Return a new kernel with instructions in *insn_ids* removed. Dependencies across deleted instructions are transitively propagated i.e. if insn_a depends on insn_b that depends on insn_c and 'insn_b' is to be removed then the returned kernel will have a dependency from 'insn_a' to 'insn_c'. This also updates *no_sync_with* for all instructions. :arg insn_ids: An instance of :class:`set` or :class:`str` as understood by :func:`loopy.match.parse_match` or :class:`loopy.match.MatchExpressionBase`. """ from functools import reduce if not insn_ids: return kernel from loopy.match import MatchExpressionBase if isinstance(insn_ids, str): from loopy.match import parse_match insn_ids = parse_match(insn_ids) if isinstance(insn_ids, MatchExpressionBase): within = insn_ids insn_ids = set( [insn.id for insn in kernel.instructions if within(kernel, insn)]) assert isinstance(insn_ids, set) id_to_insn = kernel.id_to_insn # {{{ for each insn_id to be removed get deps in terms of remaining insns # transitive_deps: mapping from insn_id (referred as I) to be removed to # frozenset of insn_ids that won't be removed (referred as R(I)). 'R(I)' are # the transitive dependencies of 'I' that won't be removed. transitive_deps = {} insns_not_to_be_removed = frozenset(id_to_insn) - insn_ids for insn_id in _toposort_of_subset_of_insns(kernel, insn_ids): assert id_to_insn[insn_id].depends_on <= (insns_not_to_be_removed | frozenset(transitive_deps)) transitive_deps[insn_id] = reduce( frozenset.union, (transitive_deps.get(d, frozenset([d])) for d in id_to_insn[insn_id].depends_on), frozenset()) # }}} new_insns = [] for insn in kernel.instructions: if insn.id in insn_ids: continue # transitively propagate dependencies if insn.depends_on is None: depends_on = frozenset() else: depends_on = insn.depends_on new_deps = reduce(frozenset.union, (transitive_deps.get(d, frozenset([d])) for d in depends_on), frozenset()) assert (new_deps & insn_ids) == frozenset() # update no_sync_with new_no_sync_with = frozenset((insn_id, scope) for insn_id, scope in insn.no_sync_with if insn_id not in insn_ids) new_insns.append( insn.copy(depends_on=new_deps, no_sync_with=new_no_sync_with)) return kernel.copy(instructions=new_insns)