def process_set(s): var_dict = s.get_var_dict() try: dt, idx = var_dict[name] except KeyError: return s value_aff = isl.Aff.zero_on_domain(s.space) + value from loopy.isl_helpers import iname_rel_aff name_equal_value_aff = iname_rel_aff(s.space, name, "==", value_aff) s = s.add_constraint(isl.Constraint.equality_from_aff(name_equal_value_aff)).project_out(dt, idx, 1) return s
def process_set(s): var_dict = s.get_var_dict() try: dt, idx = var_dict[name] except KeyError: return s value_aff = isl.Aff.zero_on_domain(s.space) + value from loopy.isl_helpers import iname_rel_aff name_equal_value_aff = iname_rel_aff(s.space, name, "==", value_aff) s = (s.add_constraint( isl.Constraint.equality_from_aff( name_equal_value_aff)).project_out(dt, idx, 1)) return s
def fix(self, iname, aff): new_impl_domain = self.implemented_domain impl_space = self.implemented_domain.get_space() if iname not in impl_space.get_var_dict(): new_impl_domain = (new_impl_domain.add_dims( isl.dim_type.set, 1).set_dim_name(isl.dim_type.set, new_impl_domain.dim(isl.dim_type.set), iname)) impl_space = new_impl_domain.get_space() from loopy.isl_helpers import iname_rel_aff iname_plus_lb_aff = iname_rel_aff(impl_space, iname, "==", aff) from loopy.symbolic import pw_aff_to_expr cns = isl.Constraint.equality_from_aff(iname_plus_lb_aff) expr = pw_aff_to_expr(aff) new_impl_domain = new_impl_domain.add_constraint(cns) return self.copy_and_assign( iname, expr).copy(implemented_domain=new_impl_domain)
def fix(self, iname, aff): new_impl_domain = self.implemented_domain impl_space = self.implemented_domain.get_space() if iname not in impl_space.get_var_dict(): new_impl_domain = new_impl_domain.add_dims(isl.dim_type.set, 1).set_dim_name( isl.dim_type.set, new_impl_domain.dim(isl.dim_type.set), iname ) impl_space = new_impl_domain.get_space() from loopy.isl_helpers import iname_rel_aff iname_plus_lb_aff = iname_rel_aff(impl_space, iname, "==", aff) from loopy.symbolic import pw_aff_to_expr cns = isl.Constraint.equality_from_aff(iname_plus_lb_aff) expr = pw_aff_to_expr(aff) new_impl_domain = new_impl_domain.add_constraint(cns) return self.copy_and_assign(iname, expr).copy(implemented_domain=new_impl_domain)
def get_slab_decomposition(kernel, iname): iname_domain = kernel.get_inames_domain(iname) if iname_domain.is_empty(): return () space = iname_domain.space lower_incr, upper_incr = kernel.iname_slab_increments.get(iname, (0, 0)) lower_bulk_bound = None upper_bulk_bound = None if lower_incr or upper_incr: bounds = kernel.get_iname_bounds(iname) lower_bound_pw_aff_pieces = bounds.lower_bound_pw_aff.coalesce( ).get_pieces() upper_bound_pw_aff_pieces = bounds.upper_bound_pw_aff.coalesce( ).get_pieces() if len(lower_bound_pw_aff_pieces) > 1: raise NotImplementedError( "lower bound for slab decomp of '%s' needs " "conditional/has more than one piece" % iname) if len(upper_bound_pw_aff_pieces) > 1: raise NotImplementedError( "upper bound for slab decomp of '%s' needs " "conditional/has more than one piece" % iname) (_, lower_bound_aff), = lower_bound_pw_aff_pieces (_, upper_bound_aff), = upper_bound_pw_aff_pieces from loopy.isl_helpers import iname_rel_aff if lower_incr: assert lower_incr > 0 lower_slab = ("initial", isl.BasicSet.universe(space).add_constraint( isl.Constraint.inequality_from_aff( iname_rel_aff(space, iname, "<", lower_bound_aff + lower_incr)))) lower_bulk_bound = (isl.Constraint.inequality_from_aff( iname_rel_aff(space, iname, ">=", lower_bound_aff + lower_incr))) else: lower_slab = None if upper_incr: assert upper_incr > 0 upper_slab = ("final", isl.BasicSet.universe(space).add_constraint( isl.Constraint.inequality_from_aff( iname_rel_aff(space, iname, ">", upper_bound_aff - upper_incr)))) upper_bulk_bound = (isl.Constraint.inequality_from_aff( iname_rel_aff(space, iname, "<=", upper_bound_aff - upper_incr))) else: upper_slab = None slabs = [] bulk_slab = isl.BasicSet.universe(space) if lower_bulk_bound is not None: bulk_slab = bulk_slab.add_constraint(lower_bulk_bound) if upper_bulk_bound is not None: bulk_slab = bulk_slab.add_constraint(upper_bulk_bound) slabs.append(("bulk", bulk_slab)) if lower_slab: slabs.append(lower_slab) if upper_slab: slabs.append(upper_slab) return slabs else: return [("bulk", (isl.BasicSet.universe(space)))]
def get_slab_decomposition(kernel, iname, sched_index, codegen_state): iname_domain = kernel.get_inames_domain(iname) if iname_domain.is_empty(): return () space = iname_domain.space lower_incr, upper_incr = kernel.iname_slab_increments.get(iname, (0, 0)) lower_bulk_bound = None upper_bulk_bound = None if lower_incr or upper_incr: bounds = kernel.get_iname_bounds(iname) lower_bound_pw_aff_pieces = bounds.lower_bound_pw_aff.coalesce().get_pieces() upper_bound_pw_aff_pieces = bounds.upper_bound_pw_aff.coalesce().get_pieces() if len(lower_bound_pw_aff_pieces) > 1: raise NotImplementedError("lower bound for slab decomp of '%s' needs " "conditional/has more than one piece" % iname) if len(upper_bound_pw_aff_pieces) > 1: raise NotImplementedError("upper bound for slab decomp of '%s' needs " "conditional/has more than one piece" % iname) (_, lower_bound_aff), = lower_bound_pw_aff_pieces (_, upper_bound_aff), = upper_bound_pw_aff_pieces from loopy.isl_helpers import iname_rel_aff if lower_incr: assert lower_incr > 0 lower_slab = ("initial", isl.BasicSet.universe(space) .add_constraint( isl.Constraint.inequality_from_aff( iname_rel_aff(space, iname, "<", lower_bound_aff+lower_incr)))) lower_bulk_bound = ( isl.Constraint.inequality_from_aff( iname_rel_aff(space, iname, ">=", lower_bound_aff+lower_incr))) else: lower_slab = None if upper_incr: assert upper_incr > 0 upper_slab = ("final", isl.BasicSet.universe(space) .add_constraint( isl.Constraint.inequality_from_aff( iname_rel_aff(space, iname, ">", upper_bound_aff-upper_incr)))) upper_bulk_bound = ( isl.Constraint.inequality_from_aff( iname_rel_aff(space, iname, "<=", upper_bound_aff-upper_incr))) else: lower_slab = None slabs = [] bulk_slab = isl.BasicSet.universe(space) if lower_bulk_bound is not None: bulk_slab = bulk_slab.add_constraint(lower_bulk_bound) if upper_bulk_bound is not None: bulk_slab = bulk_slab.add_constraint(upper_bulk_bound) slabs.append(("bulk", bulk_slab)) if lower_slab: slabs.append(lower_slab) if upper_slab: slabs.append(upper_slab) return slabs else: return [("bulk", (isl.BasicSet.universe(space)))]
def map_Do(self, node): scope = self.scope_stack[-1] if not node.loopcontrol: raise NotImplementedError("unbounded do loop") loop_var, loop_bounds = node.loopcontrol.split("=") loop_var = loop_var.strip() iname_dtype = scope.get_type(loop_var) if self.index_dtype is None: self.index_dtype = iname_dtype else: if self.index_dtype != iname_dtype: raise LoopyError("type of '%s' (%s) does not agree with prior " "index type (%s)" % (loop_var, iname_dtype, self.index_dtype)) scope.use_name(loop_var) loop_bounds = self.parse_expr( node, loop_bounds, min_precedence=self.expr_parser._PREC_FUNC_ARGS) if len(loop_bounds) == 2: start, stop = loop_bounds step = 1 elif len(loop_bounds) == 3: start, stop, step = loop_bounds else: raise RuntimeError("loop bounds not understood: %s" % node.loopcontrol) if step != 1: raise NotImplementedError( "do loops with non-unit stride") if not isinstance(step, int): raise TranslationError( "non-constant steps not supported: %s" % step) from loopy.symbolic import get_dependencies loop_bound_deps = ( get_dependencies(start) | get_dependencies(stop) | get_dependencies(step)) # {{{ find a usable loopy-side loop name loopy_loop_var = loop_var loop_var_suffix = None while True: already_used = False for iset in scope.index_sets: if loopy_loop_var in iset.get_var_dict(dim_type.set): already_used = True break if not already_used: break if loop_var_suffix is None: loop_var_suffix = 0 loop_var_suffix += 1 loopy_loop_var = loop_var + "_%d" % loop_var_suffix loopy_loop_var = intern(loopy_loop_var) # }}} space = isl.Space.create_from_names(isl.DEFAULT_CONTEXT, set=[loopy_loop_var], params=list(loop_bound_deps)) from loopy.isl_helpers import iname_rel_aff from loopy.symbolic import aff_from_expr index_set = ( isl.BasicSet.universe(space) .add_constraint( isl.Constraint.inequality_from_aff( iname_rel_aff(space, loopy_loop_var, ">=", aff_from_expr(space, 0)))) .add_constraint( isl.Constraint.inequality_from_aff( iname_rel_aff(space, loopy_loop_var, "<=", aff_from_expr(space, stop-start))))) from pymbolic import var scope.active_iname_aliases[loop_var] = \ var(loopy_loop_var) + start scope.active_loopy_inames.add(loopy_loop_var) scope.index_sets.append(index_set) self.block_nest.append("do") for c in node.content: self.rec(c) del scope.active_iname_aliases[loop_var] scope.active_loopy_inames.remove(loopy_loop_var)
def map_Do(self, node): scope = self.scope_stack[-1] if not node.loopcontrol: raise NotImplementedError("unbounded do loop") loop_var, loop_bounds = node.loopcontrol.split("=") loop_var = loop_var.strip() iname_dtype = scope.get_type(loop_var) if self.index_dtype is None: self.index_dtype = iname_dtype else: if self.index_dtype != iname_dtype: raise LoopyError("type of '%s' (%s) does not agree with prior " "index type (%s)" % (loop_var, iname_dtype, self.index_dtype)) scope.use_name(loop_var) loop_bounds = self.parse_expr( node, loop_bounds, min_precedence=self.expr_parser._PREC_FUNC_ARGS) if len(loop_bounds) == 2: start, stop = loop_bounds step = 1 elif len(loop_bounds) == 3: start, stop, step = loop_bounds else: raise RuntimeError("loop bounds not understood: %s" % node.loopcontrol) if step != 1: raise NotImplementedError( "do loops with non-unit stride") if not isinstance(step, int): raise TranslationError( "non-constant steps not supported: %s" % step) from loopy.symbolic import get_dependencies loop_bound_deps = ( get_dependencies(start) | get_dependencies(stop) | get_dependencies(step)) # {{{ find a usable loopy-side loop name loopy_loop_var = loop_var loop_var_suffix = None while True: already_used = False for iset in scope.index_sets: if loopy_loop_var in iset.get_var_dict(dim_type.set): already_used = True break if not already_used: break if loop_var_suffix is None: loop_var_suffix = 0 loop_var_suffix += 1 loopy_loop_var = loop_var + "_%d" % loop_var_suffix # }}} space = isl.Space.create_from_names(isl.DEFAULT_CONTEXT, set=[loopy_loop_var], params=list(loop_bound_deps)) from loopy.isl_helpers import iname_rel_aff from loopy.symbolic import aff_from_expr index_set = ( isl.BasicSet.universe(space) .add_constraint( isl.Constraint.inequality_from_aff( iname_rel_aff(space, loopy_loop_var, ">=", aff_from_expr(space, 0)))) .add_constraint( isl.Constraint.inequality_from_aff( iname_rel_aff(space, loopy_loop_var, "<=", aff_from_expr(space, stop-start))))) from pymbolic import var scope.active_iname_aliases[loop_var] = \ var(loopy_loop_var) + start scope.active_loopy_inames.add(loopy_loop_var) scope.index_sets.append(index_set) self.block_nest.append("do") for c in node.content: self.rec(c) del scope.active_iname_aliases[loop_var] scope.active_loopy_inames.remove(loopy_loop_var)
def join_inames(kernel, inames, new_iname=None, tag=None, within=None): """ :arg inames: fastest varying last :arg within: a stack match as understood by :func:`loopy.context_matching.parse_stack_match`. """ # now fastest varying first inames = inames[::-1] if new_iname is None: new_iname = kernel.get_var_name_generator()("_and_".join(inames)) from loopy.kernel.tools import DomainChanger domch = DomainChanger(kernel, frozenset(inames)) for iname in inames: if kernel.get_home_domain_index(iname) != domch.leaf_domain_index: raise LoopyError("iname '%s' is not 'at home' in the " "join's leaf domain" % iname) new_domain = domch.domain new_dim_idx = new_domain.dim(dim_type.set) new_domain = new_domain.add_dims(dim_type.set, 1) new_domain = new_domain.set_dim_name(dim_type.set, new_dim_idx, new_iname) joint_aff = zero = isl.Aff.zero_on_domain(new_domain.space) subst_dict = {} base_divisor = 1 from pymbolic import var for i, iname in enumerate(inames): iname_dt, iname_idx = zero.get_space().get_var_dict()[iname] iname_aff = zero.add_coefficient_val(iname_dt, iname_idx, 1) joint_aff = joint_aff + base_divisor*iname_aff bounds = kernel.get_iname_bounds(iname, constants_only=True) from loopy.isl_helpers import ( static_max_of_pw_aff, static_value_of_pw_aff) from loopy.symbolic import pw_aff_to_expr length = int(pw_aff_to_expr( static_max_of_pw_aff(bounds.size, constants_only=True))) try: lower_bound_aff = static_value_of_pw_aff( bounds.lower_bound_pw_aff.coalesce(), constants_only=False) except Exception as e: raise type(e)("while finding lower bound of '%s': " % iname) my_val = var(new_iname) // base_divisor if i+1 < len(inames): my_val %= length my_val += pw_aff_to_expr(lower_bound_aff) subst_dict[iname] = my_val base_divisor *= length from loopy.isl_helpers import iname_rel_aff new_domain = new_domain.add_constraint( isl.Constraint.equality_from_aff( iname_rel_aff(new_domain.get_space(), new_iname, "==", joint_aff))) for i, iname in enumerate(inames): iname_to_dim = new_domain.get_space().get_var_dict() iname_dt, iname_idx = iname_to_dim[iname] if within is None: new_domain = new_domain.project_out(iname_dt, iname_idx, 1) def subst_forced_iname_deps(fid): result = set() for iname in fid: if iname in inames: result.add(new_iname) else: result.add(iname) return frozenset(result) new_insns = [ insn.copy( forced_iname_deps=subst_forced_iname_deps(insn.forced_iname_deps)) for insn in kernel.instructions] kernel = (kernel .copy( instructions=new_insns, domains=domch.get_domains_with(new_domain), applied_iname_rewrites=kernel.applied_iname_rewrites + [subst_dict] )) from loopy.context_matching import parse_stack_match within = parse_stack_match(within) from pymbolic.mapper.substitutor import make_subst_func rule_mapping_context = SubstitutionRuleMappingContext( kernel.substitutions, kernel.get_var_name_generator()) ijoin = _InameJoiner(rule_mapping_context, within, make_subst_func(subst_dict), inames, new_iname) kernel = rule_mapping_context.finish_kernel( ijoin.map_kernel(kernel)) if tag is not None: kernel = tag_inames(kernel, {new_iname: tag}) return kernel
def join_inames(kernel, inames, new_iname=None, tag=None, within=None): """ :arg inames: fastest varying last :arg within: a stack match as understood by :func:`loopy.match.parse_stack_match`. """ # now fastest varying first inames = inames[::-1] if new_iname is None: new_iname = kernel.get_var_name_generator()("_and_".join(inames)) from loopy.kernel.tools import DomainChanger domch = DomainChanger(kernel, frozenset(inames)) for iname in inames: if kernel.get_home_domain_index(iname) != domch.leaf_domain_index: raise LoopyError("iname '%s' is not 'at home' in the " "join's leaf domain" % iname) new_domain = domch.domain new_dim_idx = new_domain.dim(dim_type.set) new_domain = new_domain.add_dims(dim_type.set, 1) new_domain = new_domain.set_dim_name(dim_type.set, new_dim_idx, new_iname) joint_aff = zero = isl.Aff.zero_on_domain(new_domain.space) subst_dict = {} base_divisor = 1 from pymbolic import var for i, iname in enumerate(inames): iname_dt, iname_idx = zero.get_space().get_var_dict()[iname] iname_aff = zero.add_coefficient_val(iname_dt, iname_idx, 1) joint_aff = joint_aff + base_divisor * iname_aff bounds = kernel.get_iname_bounds(iname, constants_only=True) from loopy.isl_helpers import (static_max_of_pw_aff, static_value_of_pw_aff) from loopy.symbolic import pw_aff_to_expr length = int( pw_aff_to_expr( static_max_of_pw_aff(bounds.size, constants_only=True))) try: lower_bound_aff = static_value_of_pw_aff( bounds.lower_bound_pw_aff.coalesce(), constants_only=False) except Exception as e: raise type(e)("while finding lower bound of '%s': " % iname) my_val = var(new_iname) // base_divisor if i + 1 < len(inames): my_val %= length my_val += pw_aff_to_expr(lower_bound_aff) subst_dict[iname] = my_val base_divisor *= length from loopy.isl_helpers import iname_rel_aff new_domain = new_domain.add_constraint( isl.Constraint.equality_from_aff( iname_rel_aff(new_domain.get_space(), new_iname, "==", joint_aff))) for i, iname in enumerate(inames): iname_to_dim = new_domain.get_space().get_var_dict() iname_dt, iname_idx = iname_to_dim[iname] if within is None: new_domain = new_domain.project_out(iname_dt, iname_idx, 1) def subst_within_inames(fid): result = set() for iname in fid: if iname in inames: result.add(new_iname) else: result.add(iname) return frozenset(result) new_insns = [ insn.copy(within_inames=subst_within_inames(insn.within_inames)) for insn in kernel.instructions ] kernel = (kernel.copy( instructions=new_insns, domains=domch.get_domains_with(new_domain), applied_iname_rewrites=kernel.applied_iname_rewrites + [subst_dict])) from loopy.match import parse_stack_match within = parse_stack_match(within) from pymbolic.mapper.substitutor import make_subst_func rule_mapping_context = SubstitutionRuleMappingContext( kernel.substitutions, kernel.get_var_name_generator()) ijoin = _InameJoiner(rule_mapping_context, within, make_subst_func(subst_dict), inames, new_iname) kernel = rule_mapping_context.finish_kernel(ijoin.map_kernel(kernel)) if tag is not None: kernel = tag_inames(kernel, {new_iname: tag}) return kernel