def intersect_kernel_with_slab(kernel, slab, iname): from loopy.kernel.tools import DomainChanger domch = DomainChanger(kernel, (iname, )) orig_domain = domch.get_original_domain() orig_domain, slab = isl.align_two(slab, orig_domain) return domch.get_kernel_with(orig_domain & slab)
def intersect_kernel_with_slab(kernel, slab, iname): from loopy.kernel.tools import DomainChanger domch = DomainChanger(kernel, (iname,)) orig_domain = domch.get_original_domain() orig_domain, slab = isl.align_two(orig_domain, slab) return domch.get_kernel_with(orig_domain & slab)
def save_or_reload_impl(self, temporary, subkernel, mode, promoted_temporary=lp.auto): assert mode in ("save", "reload") if promoted_temporary is auto: promoted_temporary = self.auto_promote_temporary(temporary) if promoted_temporary is None: return from loopy.kernel.tools import DomainChanger dchg = DomainChanger( self.kernel, frozenset( self.insn_query.inames_in_subkernel(subkernel) | set(promoted_temporary.hw_inames))) domain, hw_inames, dim_inames, iname_to_tag = \ self.augment_domain_for_save_or_reload( dchg.domain, promoted_temporary, mode, subkernel) self.kernel = dchg.get_kernel_with(domain) save_or_load_insn_id = self.insn_name_gen("{name}.{mode}".format( name=temporary, mode=mode)) def subscript_or_var(agg, subscript=()): from pymbolic.primitives import Subscript, Variable if len(subscript) == 0: return Variable(agg) else: return Subscript(Variable(agg), tuple(map(Variable, subscript))) dim_inames_trunc = dim_inames[:len(promoted_temporary.orig_temporary. shape)] args = (subscript_or_var(temporary, dim_inames_trunc), subscript_or_var(promoted_temporary.name, hw_inames + dim_inames)) if mode == "save": args = reversed(args) accessing_insns_in_subkernel = ( self.insn_query.insns_reading_or_writing(temporary) & self.insn_query.insns_in_subkernel(subkernel)) if mode == "save": depends_on = accessing_insns_in_subkernel update_deps = frozenset() elif mode == "reload": depends_on = frozenset() update_deps = accessing_insns_in_subkernel pre_barrier, post_barrier = self.insn_query.pre_and_post_barriers( subkernel) if pre_barrier is not None: depends_on |= set([pre_barrier]) if post_barrier is not None: update_deps |= set([post_barrier]) # Create the load / store instruction. from loopy.kernel.data import Assignment save_or_load_insn = Assignment( *args, id=save_or_load_insn_id, within_inames=(self.insn_query.inames_in_subkernel(subkernel) | frozenset(hw_inames + dim_inames)), within_inames_is_final=True, depends_on=depends_on, boostable=False, boostable_into=frozenset()) if temporary not in self.saves_or_reloads_added: self.saves_or_reloads_added[temporary] = set() self.saves_or_reloads_added[temporary].add(save_or_load_insn_id) self.insns_to_insert.append(save_or_load_insn) for insn_id in update_deps: insn = self.insns_to_update.get(insn_id, self.kernel.id_to_insn[insn_id]) self.insns_to_update[insn_id] = insn.copy( depends_on=insn.depends_on | frozenset([save_or_load_insn_id])) self.updated_temporary_variables[promoted_temporary.name] = \ promoted_temporary.as_variable() self.updated_iname_to_tag.update(iname_to_tag)
def insert_loads_or_spills(tvals, mode): assert mode in ["load", "spill"] local_temporaries = set() code_block = \ subkernel_prolog if mode == "load" else subkernel_epilog new_kernel = kernel for tval in tvals: from loopy.kernel.tools import DomainChanger tval_hw_inames = new_temporaries[tval].hw_inames dchg = DomainChanger(kernel, frozenset(sched_item.extra_inames + tval_hw_inames)) domain = dchg.domain domain, hw_inames, dim_inames, itt = \ augment_domain_for_temporary_promotion( new_kernel, domain, new_temporaries[tval], mode, name_gen) new_iname_to_tag.update(itt) new_kernel = dchg.get_kernel_with(domain) # Add the load / spill instruction. insn_id = name_gen("{name}.{mode}".format(name=tval, mode=mode)) def subscript_or_var(agg, subscript): from pymbolic.primitives import Subscript, Variable if len(subscript) == 0: return Variable(agg) else: return Subscript( Variable(agg), tuple(map(Variable, subscript))) args = ( subscript_or_var( tval, dim_inames), subscript_or_var( new_temporaries[tval].name, hw_inames + dim_inames)) if mode == "spill": args = reversed(args) from loopy.kernel.data import Assignment new_insn = Assignment(*args, id=insn_id) new_instructions.append(new_insn) loop_begin = [EnterLoop(iname=iname) for iname in dim_inames] loop_end = list(reversed([ LeaveLoop(iname=iname) for iname in dim_inames])) code_block.extend( loop_begin + [RunInstruction(insn_id=insn_id)] + loop_end) if new_temporaries[tval].orig_temporary.is_local: local_temporaries.add(new_temporaries[tval].name) # After loading / before spilling local temporaries, we need to # insert a barrier. if local_temporaries: if mode == "load": subkernel_prolog.append( Barrier(kind="local", comment="for loads of {0}".format( ", ".join(sorted(local_temporaries))))) else: subkernel_epilog.insert(0, Barrier(kind="local", comment="for spills of {0}".format( ", ".join(sorted(local_temporaries))))) return new_kernel
def insert_loads_or_spills(tvals, mode): assert mode in ["load", "spill"] local_temporaries = set() code_block = \ subkernel_prolog if mode == "load" else subkernel_epilog new_kernel = kernel for tval in tvals: from loopy.kernel.tools import DomainChanger tval_hw_inames = new_temporaries[tval].hw_inames dchg = DomainChanger( kernel, frozenset(sched_item.extra_inames + tval_hw_inames)) domain = dchg.domain domain, hw_inames, dim_inames, itt = \ augment_domain_for_temporary_promotion( new_kernel, domain, new_temporaries[tval], mode, name_gen) new_iname_to_tag.update(itt) new_kernel = dchg.get_kernel_with(domain) # Add the load / spill instruction. insn_id = name_gen("{name}.{mode}".format(name=tval, mode=mode)) def subscript_or_var(agg, subscript): from pymbolic.primitives import Subscript, Variable if len(subscript) == 0: return Variable(agg) else: return Subscript(Variable(agg), tuple(map(Variable, subscript))) args = (subscript_or_var(tval, dim_inames), subscript_or_var(new_temporaries[tval].name, hw_inames + dim_inames)) if mode == "spill": args = reversed(args) from loopy.kernel.data import Assignment new_insn = Assignment(*args, id=insn_id) new_instructions.append(new_insn) loop_begin = [EnterLoop(iname=iname) for iname in dim_inames] loop_end = list( reversed([LeaveLoop(iname=iname) for iname in dim_inames])) code_block.extend(loop_begin + [RunInstruction(insn_id=insn_id)] + loop_end) if new_temporaries[tval].orig_temporary.is_local: local_temporaries.add(new_temporaries[tval].name) # After loading / before spilling local temporaries, we need to # insert a barrier. if local_temporaries: if mode == "load": subkernel_prolog.append( Barrier(kind="local", comment="for loads of {0}".format(", ".join( sorted(local_temporaries))))) else: subkernel_epilog.insert( 0, Barrier(kind="local", comment="for spills of {0}".format(", ".join( sorted(local_temporaries))))) return new_kernel