def augment_domain_for_temporary_promotion( kernel, domain, promoted_temporary, mode, name_gen): """ Add new axes to the domain corresponding to the dimensions of `promoted_temporary`. """ import islpy as isl orig_temporary = promoted_temporary.orig_temporary orig_dim = domain.dim(isl.dim_type.set) dims_to_insert = len(orig_temporary.shape) iname_to_tag = {} # Add dimension-dependent inames. dim_inames = [] domain = domain.add(isl.dim_type.set, dims_to_insert) for t_idx in range(len(orig_temporary.shape)): new_iname = name_gen("{name}_{mode}_dim_{dim}". format(name=orig_temporary.name, mode=mode, dim=orig_dim + t_idx)) domain = domain.set_dim_name( isl.dim_type.set, orig_dim + t_idx, new_iname) #from loopy.kernel.data import auto #iname_to_tag[new_iname] = auto dim_inames.append(new_iname) # Add size information. aff = isl.affs_from_space(domain.space) domain &= aff[0].le_set(aff[new_iname]) size = orig_temporary.shape[t_idx] from loopy.symbolic import aff_from_expr domain &= aff[new_iname].le_set(aff_from_expr(domain.space, size)) hw_inames = [] # Add hardware inames duplicates. for t_idx, hw_iname in enumerate(promoted_temporary.hw_inames): new_iname = name_gen("{name}_{mode}_hw_dim_{dim}". format(name=orig_temporary.name, mode=mode, dim=t_idx)) hw_inames.append(new_iname) iname_to_tag[new_iname] = kernel.iname_to_tag[hw_iname] from loopy.isl_helpers import duplicate_axes domain = duplicate_axes( domain, promoted_temporary.hw_inames, hw_inames) # The operations on the domain above return a Set object, but the # underlying domain should be expressible as a single BasicSet. domain_list = domain.get_basic_set_list() assert domain_list.n_basic_set() == 1 domain = domain_list.get_basic_set(0) return domain, hw_inames, dim_inames, iname_to_tag
def augment_domain_for_temporary_promotion(kernel, domain, promoted_temporary, mode, name_gen): """ Add new axes to the domain corresponding to the dimensions of `promoted_temporary`. """ import islpy as isl orig_temporary = promoted_temporary.orig_temporary orig_dim = domain.dim(isl.dim_type.set) dims_to_insert = len(orig_temporary.shape) iname_to_tag = {} # Add dimension-dependent inames. dim_inames = [] domain = domain.add(isl.dim_type.set, dims_to_insert) for t_idx in range(len(orig_temporary.shape)): new_iname = name_gen("{name}_{mode}_dim_{dim}".format( name=orig_temporary.name, mode=mode, dim=t_idx)) domain = domain.set_dim_name(isl.dim_type.set, orig_dim + t_idx, new_iname) if orig_temporary.is_local: # If the temporary is has local scope, then loads / stores can be # done in parallel. from loopy.kernel.data import AutoFitLocalIndexTag iname_to_tag[new_iname] = AutoFitLocalIndexTag() dim_inames.append(new_iname) # Add size information. aff = isl.affs_from_space(domain.space) domain &= aff[0].le_set(aff[new_iname]) size = orig_temporary.shape[t_idx] from loopy.symbolic import aff_from_expr domain &= aff[new_iname].lt_set(aff_from_expr(domain.space, size)) hw_inames = [] # Add hardware inames duplicates. for t_idx, hw_iname in enumerate(promoted_temporary.hw_inames): new_iname = name_gen("{name}_{mode}_hw_dim_{dim}".format( name=orig_temporary.name, mode=mode, dim=t_idx)) hw_inames.append(new_iname) iname_to_tag[new_iname] = kernel.iname_to_tag[hw_iname] from loopy.isl_helpers import duplicate_axes domain = duplicate_axes(domain, promoted_temporary.hw_inames, hw_inames) # The operations on the domain above return a Set object, but the # underlying domain should be expressible as a single BasicSet. domain_list = domain.get_basic_set_list() assert domain_list.n_basic_set() == 1 domain = domain_list.get_basic_set(0) return domain, hw_inames, dim_inames, iname_to_tag
def make_reduction_inames_unique(kernel, inames=None, within=None): """ :arg inames: if not *None*, only apply to these inames :arg within: a stack match as understood by :func:`loopy.match.parse_stack_match`. .. versionadded:: 2016.2 """ name_gen = kernel.get_var_name_generator() from loopy.match import parse_stack_match within = parse_stack_match(within) # {{{ change kernel rule_mapping_context = SubstitutionRuleMappingContext( kernel.substitutions, name_gen) r_uniq = _ReductionInameUniquifier(rule_mapping_context, inames, within=within) kernel = rule_mapping_context.finish_kernel(r_uniq.map_kernel(kernel)) # }}} # {{{ duplicate the inames for old_iname, new_iname in r_uniq.old_to_new: from loopy.kernel.tools import DomainChanger domch = DomainChanger(kernel, frozenset([old_iname])) from loopy.isl_helpers import duplicate_axes kernel = kernel.copy(domains=domch.get_domains_with( duplicate_axes(domch.domain, [old_iname], [new_iname]))) # }}} return kernel
def __init__(self, kernel, domain, sweep_inames, access_descriptors, storage_axis_count): self.kernel = kernel self.sweep_inames = sweep_inames storage_axis_names = self.storage_axis_names = [ "_loopy_storage_%d" % i for i in range(storage_axis_count) ] # {{{ duplicate sweep inames # The duplication is necessary, otherwise the storage fetch # inames remain weirdly tied to the original sweep inames. self.primed_sweep_inames = [psin + "'" for psin in sweep_inames] from loopy.isl_helpers import duplicate_axes dup_sweep_index = domain.space.dim(dim_type.out) domain_dup_sweep = duplicate_axes(domain, sweep_inames, self.primed_sweep_inames) self.prime_sweep_inames = SubstitutionMapper( make_subst_func({ sin: var(psin) for sin, psin in zip(sweep_inames, self.primed_sweep_inames) })) # # }}} self.stor2sweep = build_global_storage_to_sweep_map( kernel, access_descriptors, domain_dup_sweep, dup_sweep_index, storage_axis_names, sweep_inames, self.primed_sweep_inames, self.prime_sweep_inames) storage_base_indices, storage_shape = compute_bounds( kernel, domain, self.stor2sweep, self.primed_sweep_inames, storage_axis_names) # compute augmented domain # {{{ filter out unit-length dimensions non1_storage_axis_flags = [] non1_storage_shape = [] for saxis_len in storage_shape: has_length_non1 = saxis_len != 1 non1_storage_axis_flags.append(has_length_non1) if has_length_non1: non1_storage_shape.append(saxis_len) # }}} # {{{ subtract off the base indices # add the new, base-0 indices as new in dimensions sp = self.stor2sweep.get_space() stor_idx = sp.dim(dim_type.out) n_stor = storage_axis_count nn1_stor = len(non1_storage_shape) aug_domain = self.stor2sweep.move_dims(dim_type.out, stor_idx, dim_type.in_, 0, n_stor).range() # aug_domain space now: # [domain](dup_sweep_index)[dup_sweep](stor_idx)[stor_axes'] aug_domain = aug_domain.insert_dims(dim_type.set, stor_idx, nn1_stor) inew = 0 for i, name in enumerate(storage_axis_names): if non1_storage_axis_flags[i]: aug_domain = aug_domain.set_dim_name(dim_type.set, stor_idx + inew, name) inew += 1 # aug_domain space now: # [domain](dup_sweep_index)[dup_sweep](stor_idx)[stor_axes'][n1_stor_axes] from loopy.symbolic import aff_from_expr for saxis, bi, s in zip(storage_axis_names, storage_base_indices, storage_shape): if s != 1: cns = isl.Constraint.equality_from_aff( aff_from_expr(aug_domain.get_space(), var(saxis) - (var(saxis + "'") - bi))) aug_domain = aug_domain.add_constraint(cns) # }}} # eliminate (primed) storage axes with non-zero base indices aug_domain = aug_domain.project_out(dim_type.set, stor_idx + nn1_stor, n_stor) # eliminate duplicated sweep_inames nsweep = len(sweep_inames) aug_domain = aug_domain.project_out(dim_type.set, dup_sweep_index, nsweep) self.non1_storage_axis_flags = non1_storage_axis_flags self.aug_domain = aug_domain self.storage_base_indices = storage_base_indices self.non1_storage_shape = non1_storage_shape
def augment_domain_for_save_or_reload(self, domain, promoted_temporary, mode, subkernel): """ Add new axes to the domain corresponding to the dimensions of `promoted_temporary`. These axes will be used in the save/ reload stage. """ assert mode in ("save", "reload") import islpy as isl orig_temporary = promoted_temporary.orig_temporary orig_dim = domain.dim(isl.dim_type.set) # Tags for newly added inames iname_to_tag = {} # FIXME: Restrict size of new inames to access footprint. # Add dimension-dependent inames. dim_inames = [] domain = domain.add(isl.dim_type.set, len(promoted_temporary.non_hw_dims)) for dim_idx, dim_size in enumerate(promoted_temporary.non_hw_dims): new_iname = self.insn_name_gen( "{name}_{mode}_axis_{dim}_{sk}".format( name=orig_temporary.name, mode=mode, dim=dim_idx, sk=subkernel)) domain = domain.set_dim_name(isl.dim_type.set, orig_dim + dim_idx, new_iname) if orig_temporary.is_local: # If the temporary has local scope, then loads / stores can # be done in parallel. from loopy.kernel.data import AutoFitLocalIndexTag iname_to_tag[new_iname] = AutoFitLocalIndexTag() dim_inames.append(new_iname) # Add size information. aff = isl.affs_from_space(domain.space) domain &= aff[0].le_set(aff[new_iname]) from loopy.symbolic import aff_from_expr domain &= aff[new_iname].lt_set( aff_from_expr(domain.space, dim_size)) # FIXME: Use promoted_temporary.hw_inames hw_inames = [] # Add hardware inames duplicates. for t_idx, hw_iname in enumerate(promoted_temporary.hw_inames): new_iname = self.insn_name_gen( "{name}_{mode}_hw_dim_{dim}_{sk}".format( name=orig_temporary.name, mode=mode, dim=t_idx, sk=subkernel)) hw_inames.append(new_iname) iname_to_tag[new_iname] = self.kernel.iname_to_tag[hw_iname] from loopy.isl_helpers import duplicate_axes domain = duplicate_axes(domain, promoted_temporary.hw_inames, hw_inames) # The operations on the domain above return a Set object, but the # underlying domain should be expressible as a single BasicSet. domain_list = domain.get_basic_set_list() assert domain_list.n_basic_set() == 1 domain = domain_list.get_basic_set(0) return domain, hw_inames, dim_inames, iname_to_tag
def link_inames(knl, inames, new_iname, within=None, tag=None): # {{{ normalize arguments if isinstance(inames, str): inames = inames.split(",") var_name_gen = knl.get_var_name_generator() new_iname = var_name_gen(new_iname) # }}} # {{{ ensure that each iname is used at most once in each instruction inames_set = set(inames) if 0: # FIXME! for insn in knl.instructions: insn_inames = knl.insn_inames(insn.id) | insn.reduction_inames() if len(insn_inames & inames_set) > 1: raise LoopyError("To-be-linked inames '%s' are used in " "instruction '%s'. No more than one such iname can " "be used in one instruction." % (", ".join(insn_inames & inames_set), insn.id)) # }}} from loopy.kernel.tools import DomainChanger domch = DomainChanger(knl, tuple(inames)) # {{{ ensure that projections are identical unrelated_dom_inames = list( set(domch.domain.get_var_names(dim_type.set)) - inames_set) domain = domch.domain # move all inames to be linked to end to prevent shuffly confusion for iname in inames: dt, index = domain.get_var_dict()[iname] assert dt == dim_type.set # move to tail of param dim_type domain = domain.move_dims( dim_type.param, domain.dim(dim_type.param), dt, index, 1) # move to tail of set dim_type domain = domain.move_dims( dim_type.set, domain.dim(dim_type.set), dim_type.param, domain.dim(dim_type.param)-1, 1) projections = [ domch.domain.project_out_except( unrelated_dom_inames + [iname], [dim_type.set]) for iname in inames] all_equal = True first_proj = projections[0] for proj in projections[1:]: all_equal = all_equal and (proj <= first_proj and first_proj <= proj) if not all_equal: raise LoopyError("Inames cannot be linked because their domain " "constraints are not the same.") del domain # messed up for testing, do not use # }}} # change the domain from loopy.isl_helpers import duplicate_axes knl = knl.copy( domains=domch.get_domains_with( duplicate_axes(domch.domain, [inames[0]], [new_iname]))) # {{{ change the code from pymbolic import var subst_dict = dict((iname, var(new_iname)) for iname in inames) from loopy.context_matching import parse_stack_match within = parse_stack_match(within) from pymbolic.mapper.substitutor import make_subst_func rule_mapping_context = SubstitutionRuleMappingContext( knl.substitutions, var_name_gen) ijoin = RuleAwareSubstitutionMapper(rule_mapping_context, make_subst_func(subst_dict), within) knl = rule_mapping_context.finish_kernel( ijoin.map_kernel(knl)) # }}} knl = remove_unused_inames(knl, inames) if tag is not None: knl = tag_inames(knl, {new_iname: tag}) return knl
def duplicate_inames(knl, inames, within, new_inames=None, suffix=None, tags={}): """ :arg within: a stack match as understood by :func:`loopy.context_matching.parse_stack_match`. """ # {{{ normalize arguments, find unique new_inames if isinstance(inames, str): inames = [iname.strip() for iname in inames.split(",")] if isinstance(new_inames, str): new_inames = [iname.strip() for iname in new_inames.split(",")] from loopy.context_matching import parse_stack_match within = parse_stack_match(within) if new_inames is None: new_inames = [None] * len(inames) if len(new_inames) != len(inames): raise ValueError("new_inames must have the same number of entries as inames") name_gen = knl.get_var_name_generator() for i, iname in enumerate(inames): new_iname = new_inames[i] if new_iname is None: new_iname = iname if suffix is not None: new_iname += suffix new_iname = name_gen(new_iname) else: if name_gen.is_name_conflicting(new_iname): raise ValueError("new iname '%s' conflicts with existing names" % new_iname) name_gen.add_name(new_iname) new_inames[i] = new_iname # }}} # {{{ duplicate the inames for old_iname, new_iname in zip(inames, new_inames): from loopy.kernel.tools import DomainChanger domch = DomainChanger(knl, frozenset([old_iname])) from loopy.isl_helpers import duplicate_axes knl = knl.copy( domains=domch.get_domains_with( duplicate_axes(domch.domain, [old_iname], [new_iname]))) # }}} # {{{ change the inames in the code rule_mapping_context = SubstitutionRuleMappingContext( knl.substitutions, name_gen) indup = _InameDuplicator(rule_mapping_context, old_to_new=dict(list(zip(inames, new_inames))), within=within) knl = rule_mapping_context.finish_kernel( indup.map_kernel(knl)) # }}} # {{{ realize tags for old_iname, new_iname in zip(inames, new_inames): new_tag = tags.get(old_iname) if new_tag is not None: knl = tag_inames(knl, {new_iname: new_tag}) # }}} return knl
def __init__(self, kernel, domain, sweep_inames, access_descriptors, storage_axis_count): self.kernel = kernel self.sweep_inames = sweep_inames storage_axis_names = self.storage_axis_names = [ "_loopy_storage_%d" % i for i in range(storage_axis_count)] # {{{ duplicate sweep inames # The duplication is necessary, otherwise the storage fetch # inames remain weirdly tied to the original sweep inames. self.primed_sweep_inames = [psin+"'" for psin in sweep_inames] from loopy.isl_helpers import duplicate_axes dup_sweep_index = domain.space.dim(dim_type.out) domain_dup_sweep = duplicate_axes( domain, sweep_inames, self.primed_sweep_inames) self.prime_sweep_inames = SubstitutionMapper(make_subst_func( dict((sin, var(psin)) for sin, psin in zip(sweep_inames, self.primed_sweep_inames)))) # # }}} self.stor2sweep = build_global_storage_to_sweep_map( kernel, access_descriptors, domain_dup_sweep, dup_sweep_index, storage_axis_names, sweep_inames, self.primed_sweep_inames, self.prime_sweep_inames) storage_base_indices, storage_shape = compute_bounds( kernel, domain, self.stor2sweep, self.primed_sweep_inames, storage_axis_names) # compute augmented domain # {{{ filter out unit-length dimensions non1_storage_axis_flags = [] non1_storage_shape = [] for saxis, bi, l in zip( storage_axis_names, storage_base_indices, storage_shape): has_length_non1 = l != 1 non1_storage_axis_flags.append(has_length_non1) if has_length_non1: non1_storage_shape.append(l) # }}} # {{{ subtract off the base indices # add the new, base-0 indices as new in dimensions sp = self.stor2sweep.get_space() stor_idx = sp.dim(dim_type.out) n_stor = storage_axis_count nn1_stor = len(non1_storage_shape) aug_domain = self.stor2sweep.move_dims( dim_type.out, stor_idx, dim_type.in_, 0, n_stor).range() # aug_domain space now: # [domain](dup_sweep_index)[dup_sweep](stor_idx)[stor_axes'] aug_domain = aug_domain.insert_dims(dim_type.set, stor_idx, nn1_stor) inew = 0 for i, name in enumerate(storage_axis_names): if non1_storage_axis_flags[i]: aug_domain = aug_domain.set_dim_name( dim_type.set, stor_idx + inew, name) inew += 1 # aug_domain space now: # [domain](dup_sweep_index)[dup_sweep](stor_idx)[stor_axes'][n1_stor_axes] from loopy.symbolic import aff_from_expr for saxis, bi, s in zip(storage_axis_names, storage_base_indices, storage_shape): if s != 1: cns = isl.Constraint.equality_from_aff( aff_from_expr(aug_domain.get_space(), var(saxis) - (var(saxis+"'") - bi))) aug_domain = aug_domain.add_constraint(cns) # }}} # eliminate (primed) storage axes with non-zero base indices aug_domain = aug_domain.project_out(dim_type.set, stor_idx+nn1_stor, n_stor) # eliminate duplicated sweep_inames nsweep = len(sweep_inames) aug_domain = aug_domain.project_out(dim_type.set, dup_sweep_index, nsweep) self.non1_storage_axis_flags = non1_storage_axis_flags self.aug_domain = aug_domain self.storage_base_indices = storage_base_indices self.non1_storage_shape = non1_storage_shape
def duplicate_inames(knl, inames, within, new_inames=None, suffix=None, tags={}): """ :arg within: a stack match as understood by :func:`loopy.match.parse_stack_match`. """ # {{{ normalize arguments, find unique new_inames if isinstance(inames, str): inames = [iname.strip() for iname in inames.split(",")] if isinstance(new_inames, str): new_inames = [iname.strip() for iname in new_inames.split(",")] from loopy.match import parse_stack_match within = parse_stack_match(within) if new_inames is None: new_inames = [None] * len(inames) if len(new_inames) != len(inames): raise ValueError( "new_inames must have the same number of entries as inames") name_gen = knl.get_var_name_generator() for i, iname in enumerate(inames): new_iname = new_inames[i] if new_iname is None: new_iname = iname if suffix is not None: new_iname += suffix new_iname = name_gen(new_iname) else: if name_gen.is_name_conflicting(new_iname): raise ValueError( "new iname '%s' conflicts with existing names" % new_iname) name_gen.add_name(new_iname) new_inames[i] = new_iname # }}} # {{{ duplicate the inames for old_iname, new_iname in zip(inames, new_inames): from loopy.kernel.tools import DomainChanger domch = DomainChanger(knl, frozenset([old_iname])) from loopy.isl_helpers import duplicate_axes knl = knl.copy(domains=domch.get_domains_with( duplicate_axes(domch.domain, [old_iname], [new_iname]))) # }}} # {{{ change the inames in the code rule_mapping_context = SubstitutionRuleMappingContext( knl.substitutions, name_gen) indup = _InameDuplicator(rule_mapping_context, old_to_new=dict(list(zip(inames, new_inames))), within=within) knl = rule_mapping_context.finish_kernel(indup.map_kernel(knl)) # }}} # {{{ realize tags for old_iname, new_iname in zip(inames, new_inames): new_tag = tags.get(old_iname) if new_tag is not None: knl = tag_inames(knl, {new_iname: new_tag}) # }}} return knl