def static_extremum_of_pw_aff(pw_aff, constants_only, set_method, what, context): if context is not None: context = isl.align_spaces(context, pw_aff.get_domain_space(), obj_bigger_ok=True).params() pw_aff = pw_aff.gist(context) pieces = pw_aff.get_pieces() if len(pieces) == 1: (_, result), = pieces if constants_only and not result.is_cst(): raise ValueError("a numeric %s was not found for PwAff '%s'" % (what, pw_aff)) return result from pytools import memoize, flatten @memoize def is_bounded(set): assert set.dim(dim_type.set) == 0 return (set .move_dims(dim_type.set, 0, dim_type.param, 0, set.dim(dim_type.param)) .is_bounded()) # put constant bounds with unbounded validity first order = [ (True, False), # constant, unbounded validity (False, False), # nonconstant, unbounded validity (True, True), # constant, bounded validity (False, True), # nonconstant, bounded validity ] pieces = flatten([ [(set, aff) for set, aff in pieces if aff.is_cst() == want_is_constant and is_bounded(set) == want_is_bounded] for want_is_constant, want_is_bounded in order]) reference = pw_aff.get_aggregate_domain() if context is not None: reference = reference.intersect(context) # {{{ find bounds that are also global bounds for set, candidate_aff in pieces: # gist can be time-consuming, try without first for use_gist in [False, True]: if use_gist: candidate_aff = candidate_aff.gist(set) if constants_only and not candidate_aff.is_cst(): continue if reference <= set_method(pw_aff, candidate_aff): return candidate_aff # }}} raise StaticValueFindingError("a static %s was not found for PwAff '%s'" % (what, pw_aff))
def test_align_spaces(): m1 = isl.BasicMap("[m,n] -> {[i,j,k]->[l,o]:}") m2 = isl.BasicMap("[m,n] -> {[j,k,l,i]->[o]:}") result = isl.align_spaces(m1, m2) assert result.get_var_dict() == m2.get_var_dict() a1 = isl.Aff("[t0, t1, t2] -> { [(32)] }") a2 = isl.Aff("[t1, t0] -> { [(0)] }") with pytest.raises(isl.Error): a1_aligned = isl.align_spaces(a1, a2) a1_aligned = isl.align_spaces(a1, a2, obj_bigger_ok=True) a2_aligned = isl.align_spaces(a2, a1) assert a1_aligned == isl.Aff("[t1, t0, t2] -> { [(32)] }") assert a2_aligned == isl.Aff("[t1, t0, t2] -> { [(0)] }")
def __call__(self, check_inames): if not check_inames: return [] domain = isl.align_spaces( self.kernel.get_inames_domain(check_inames), self.impl_domain, obj_bigger_ok=True) from loopy.codegen.bounds import get_approximate_convex_bounds_checks # Each instruction individually gets its bounds checks, # so we can safely overapproximate here. return get_approximate_convex_bounds_checks(domain, check_inames, self.impl_domain)
def _ensure_dim_names_match_and_align(obj_map, tgt_map): # (This function is also defined in independent, unmerged branch # new-dependency-and-nest-constraint-semantics-development, and used in # child branches thereof. Once these branches are all merged, it may make # sense to move this function to a location for more general-purpose # machinery. In the other branches, this function's name excludes the # leading underscore.) from islpy import align_spaces from islpy import dim_type as dt # first make sure names match if not all( set(obj_map.get_var_names(dt)) == set(tgt_map.get_var_names(dt)) for dt in [dt.in_, dt.out, dt.param]): raise ValueError("Cannot align spaces; names don't match:\n%s\n%s" % (obj_map, tgt_map)) return align_spaces(obj_map, tgt_map)
def iname_rel_aff(space, iname, rel, aff): """*aff*'s domain space is allowed to not match *space*.""" dt, pos = space.get_var_dict()[iname] assert dt in [isl.dim_type.set, isl.dim_type.param] if dt == isl.dim_type.set: dt = isl.dim_type.in_ from islpy import align_spaces aff = align_spaces(aff, isl.Aff.zero_on_domain(space)) if rel in ["==", "<="]: return aff.add_coefficient_val(dt, pos, -1) elif rel == ">=": return aff.neg().add_coefficient_val(dt, pos, 1) elif rel == "<": return (aff - 1).add_coefficient_val(dt, pos, -1) elif rel == ">": return (aff + 1).neg().add_coefficient_val(dt, pos, 1) else: raise ValueError("unknown value of 'rel': %s" % rel)
def iname_rel_aff(space, iname, rel, aff): """*aff*'s domain space is allowed to not match *space*.""" dt, pos = space.get_var_dict()[iname] assert dt in [isl.dim_type.set, isl.dim_type.param] if dt == isl.dim_type.set: dt = isl.dim_type.in_ from islpy import align_spaces aff = align_spaces(aff, isl.Aff.zero_on_domain(space)) if rel in ["==", "<="]: return aff.add_coefficient_val(dt, pos, -1) elif rel == ">=": return aff.neg().add_coefficient_val(dt, pos, 1) elif rel == "<": return (aff-1).add_coefficient_val(dt, pos, -1) elif rel == ">": return (aff+1).neg().add_coefficient_val(dt, pos, 1) else: raise ValueError("unknown value of 'rel': %s" % rel)
def generate_sequential_loop_dim_code(codegen_state, sched_index): kernel = codegen_state.kernel ecm = codegen_state.expression_to_code_mapper loop_iname = kernel.schedule[sched_index].iname slabs = get_slab_decomposition(kernel, loop_iname) from loopy.codegen.bounds import get_usable_inames_for_conditional # Note: this does not include loop_iname itself! usable_inames = get_usable_inames_for_conditional(kernel, sched_index) domain = kernel.get_inames_domain(loop_iname) result = [] for slab_name, slab in slabs: cmt = "%s slab for '%s'" % (slab_name, loop_iname) if len(slabs) == 1: cmt = None # {{{ find bounds aligned_domain = isl.align_spaces(domain, slab, obj_bigger_ok=True) dom_and_slab = aligned_domain & slab assumptions_non_param = isl.BasicSet.from_params(kernel.assumptions) dom_and_slab, assumptions_non_param = isl.align_two( dom_and_slab, assumptions_non_param) dom_and_slab = dom_and_slab & assumptions_non_param # move inames that are usable into parameters moved_inames = [] for das_iname in sorted(dom_and_slab.get_var_names(dim_type.set)): if das_iname in usable_inames: moved_inames.append(das_iname) dt, idx = dom_and_slab.get_var_dict()[das_iname] dom_and_slab = dom_and_slab.move_dims( dim_type.param, dom_and_slab.dim(dim_type.param), dt, idx, 1) _, loop_iname_idx = dom_and_slab.get_var_dict()[loop_iname] impl_domain = isl.align_spaces(codegen_state.implemented_domain, dom_and_slab, obj_bigger_ok=True).params() lbound = (kernel.cache_manager.dim_min( dom_and_slab, loop_iname_idx).gist( kernel.assumptions).gist(impl_domain).coalesce()) ubound = (kernel.cache_manager.dim_max( dom_and_slab, loop_iname_idx).gist( kernel.assumptions).gist(impl_domain).coalesce()) # }}} # {{{ find implemented loop, build inner code from loopy.symbolic import pw_aff_to_pw_aff_implemented_by_expr impl_lbound = pw_aff_to_pw_aff_implemented_by_expr(lbound) impl_ubound = pw_aff_to_pw_aff_implemented_by_expr(ubound) # impl_loop may be overapproximated from loopy.isl_helpers import make_loop_bounds_from_pwaffs impl_loop = make_loop_bounds_from_pwaffs(dom_and_slab.space, loop_iname, impl_lbound, impl_ubound) for moved_iname in moved_inames: # move moved_iname to 'set' dim_type in impl_loop dt, idx = impl_loop.get_var_dict()[moved_iname] impl_loop = impl_loop.move_dims(dim_type.set, impl_loop.dim(dim_type.set), dt, idx, 1) new_codegen_state = (codegen_state.intersect(impl_loop).copy( kernel=intersect_kernel_with_slab(kernel, slab, loop_iname))) inner = build_loop_nest(new_codegen_state, sched_index + 1) # }}} if cmt is not None: result.append(codegen_state.ast_builder.emit_comment(cmt)) astb = codegen_state.ast_builder from loopy.symbolic import pw_aff_to_expr if impl_ubound.is_equal(impl_lbound): # single-trip, generate just a variable assignment, not a loop inner = merge_codegen_results(codegen_state, [ astb.emit_initializer(codegen_state, kernel.index_dtype, loop_iname, ecm(pw_aff_to_expr(lbound), PREC_NONE, "i"), is_const=True), astb.emit_blank_line(), inner, ]) result.append( inner.with_new_ast( codegen_state, astb.ast_block_scope_class( inner.current_ast(codegen_state)))) else: inner_ast = inner.current_ast(codegen_state) from loopy.isl_helpers import simplify_pw_aff result.append( inner.with_new_ast( codegen_state, astb.emit_sequential_loop( codegen_state, loop_iname, kernel.index_dtype, pw_aff_to_expr( simplify_pw_aff(lbound, kernel.assumptions)), pw_aff_to_expr( simplify_pw_aff(ubound, kernel.assumptions)), inner_ast))) return merge_codegen_results(codegen_state, result)
def generate_sequential_loop_dim_code(kernel, sched_index, codegen_state): ecm = codegen_state.expression_to_code_mapper loop_iname = kernel.schedule[sched_index].iname slabs = get_slab_decomposition( kernel, loop_iname, sched_index, codegen_state) from loopy.codegen.bounds import get_usable_inames_for_conditional # Note: this does not include loop_iname itself! usable_inames = get_usable_inames_for_conditional(kernel, sched_index) domain = kernel.get_inames_domain(loop_iname) result = [] for slab_name, slab in slabs: cmt = "%s slab for '%s'" % (slab_name, loop_iname) if len(slabs) == 1: cmt = None # {{{ find bounds aligned_domain = isl.align_spaces(domain, slab, across_dim_types=True, obj_bigger_ok=True) dom_and_slab = aligned_domain & slab assumptions_non_param = isl.BasicSet.from_params(kernel.assumptions) dom_and_slab, assumptions_non_param = isl.align_two( dom_and_slab, assumptions_non_param) dom_and_slab = dom_and_slab & assumptions_non_param # move inames that are usable into parameters moved_inames = [] for iname in dom_and_slab.get_var_names(dim_type.set): if iname in usable_inames: moved_inames.append(iname) dt, idx = dom_and_slab.get_var_dict()[iname] dom_and_slab = dom_and_slab.move_dims( dim_type.param, dom_and_slab.dim(dim_type.param), dt, idx, 1) _, loop_iname_idx = dom_and_slab.get_var_dict()[loop_iname] from loopy.isl_helpers import ( static_min_of_pw_aff, static_max_of_pw_aff) lbound = ( kernel.cache_manager.dim_min( dom_and_slab, loop_iname_idx) .gist(kernel.assumptions) .coalesce()) ubound = ( kernel.cache_manager.dim_max( dom_and_slab, loop_iname_idx) .gist(kernel.assumptions) .coalesce()) static_lbound = static_min_of_pw_aff( lbound, constants_only=False) static_ubound = static_max_of_pw_aff( ubound, constants_only=False) # }}} # {{{ find implemented slab, build inner code from loopy.isl_helpers import make_slab_from_bound_pwaffs # impl_slab may be overapproximated impl_slab = make_slab_from_bound_pwaffs( dom_and_slab.space, loop_iname, static_lbound, static_ubound) for iname in moved_inames: dt, idx = impl_slab.get_var_dict()[iname] impl_slab = impl_slab.move_dims( dim_type.set, impl_slab.dim(dim_type.set), dt, idx, 1) new_codegen_state = codegen_state.intersect(impl_slab) inner = build_loop_nest( intersect_kernel_with_slab( kernel, slab, iname), sched_index+1, new_codegen_state) # }}} if cmt is not None: from cgen import Comment result.append(Comment(cmt)) from cgen import Initializer, POD, Const, Line from loopy.symbolic import aff_to_expr if (static_ubound - static_lbound).plain_is_zero(): # single-trip, generate just a variable assignment, not a loop result.append(gen_code_block([ Initializer(Const(POD(kernel.index_dtype, loop_iname)), ecm(aff_to_expr(static_lbound), PREC_NONE, "i")), Line(), inner, ])) else: result.append( kernel.target.emit_sequential_loop( codegen_state, loop_iname, kernel.index_dtype, static_lbound, static_ubound, inner)) return gen_code_block(result)
def test_align_spaces(): m1 = isl.BasicMap("[m,n] -> {[i,j,k]->[l,o]:}") m2 = isl.BasicMap("[m,n] -> {[j,k,l,i]->[o]:}") result = isl.align_spaces(m1, m2) assert result.get_var_dict() == m2.get_var_dict()
def chunk_iname(kernel, split_iname, num_chunks, outer_iname=None, inner_iname=None, outer_tag=None, inner_tag=None, slabs=(0, 0), do_tagged_check=True, within=None): """ Split *split_iname* into two inames (an 'inner' one and an 'outer' one) so that ``split_iname == inner + outer*chunk_length`` and *outer* is of fixed length *num_chunks*. :arg within: a stack match as understood by :func:`loopy.match.parse_stack_match`. .. versionadded:: 2016.2 """ size = kernel.get_iname_bounds(split_iname).size k0 = isl.Aff.zero_on_domain(size.domain().space) chunk_ceil = size.div(k0 + num_chunks).ceil() chunk_floor = size.div(k0 + num_chunks).floor() chunk_diff = chunk_ceil - chunk_floor chunk_mod = size.mod_val(num_chunks) from loopy.symbolic import pw_aff_to_expr from pymbolic.primitives import Min def make_new_loop_index(inner, outer): # These two expressions are equivalent. Benchmarking between the # two was inconclusive, although one is shorter. if 0: # Triggers isl issues in check pass. return (inner + pw_aff_to_expr(chunk_floor) * outer + pw_aff_to_expr(chunk_diff) * Min( (outer, pw_aff_to_expr(chunk_mod)))) else: return (inner + pw_aff_to_expr(chunk_ceil) * Min( (outer, pw_aff_to_expr(chunk_mod))) + pw_aff_to_expr(chunk_floor) * (outer - Min( (outer, pw_aff_to_expr(chunk_mod))))) # {{{ check that iname is a box iname # Since the linearization used in the constraint used to map the domain # does not match the linearization in make_new_loop_index, we can't really # tolerate if the iname in question has constraints that make it non-boxy, # since these sub-indices would end up in the wrong spot. for dom in kernel.domains: var_dict = dom.get_var_dict() if split_iname not in var_dict: continue dt, idx = var_dict[split_iname] assert dt == dim_type.set aff_zero = isl.Aff.zero_on_domain(dom.space) aff_split_iname = aff_zero.set_coefficient_val(dim_type.in_, idx, 1) aligned_size = isl.align_spaces(size, aff_zero) box_dom = (dom.eliminate(dt, idx, 1) & aff_zero.le_set(aff_split_iname) & aff_split_iname.lt_set(aligned_size)) if not (box_dom <= dom and dom <= box_dom): raise LoopyError("domain '%s' is not box-shape about iname " "'%s', cannot use chunk_iname()" % (dom, split_iname)) # }}} return _split_iname_backend(kernel, split_iname, fixed_length=num_chunks, fixed_length_is_inner=False, make_new_loop_index=make_new_loop_index, outer_iname=outer_iname, inner_iname=inner_iname, outer_tag=outer_tag, inner_tag=inner_tag, slabs=slabs, do_tagged_check=do_tagged_check, within=within)
def generate_sequential_loop_dim_code(codegen_state, sched_index): kernel = codegen_state.kernel ecm = codegen_state.expression_to_code_mapper loop_iname = kernel.schedule[sched_index].iname slabs = get_slab_decomposition(kernel, loop_iname) from loopy.codegen.bounds import get_usable_inames_for_conditional # Note: this does not include loop_iname itself! usable_inames = get_usable_inames_for_conditional(kernel, sched_index) domain = kernel.get_inames_domain(loop_iname) result = [] for slab_name, slab in slabs: cmt = "%s slab for '%s'" % (slab_name, loop_iname) if len(slabs) == 1: cmt = None # {{{ find bounds aligned_domain = isl.align_spaces(domain, slab, across_dim_types=True, obj_bigger_ok=True) dom_and_slab = aligned_domain & slab assumptions_non_param = isl.BasicSet.from_params(kernel.assumptions) dom_and_slab, assumptions_non_param = isl.align_two( dom_and_slab, assumptions_non_param) dom_and_slab = dom_and_slab & assumptions_non_param # move inames that are usable into parameters moved_inames = [] for iname in dom_and_slab.get_var_names(dim_type.set): if iname in usable_inames: moved_inames.append(iname) dt, idx = dom_and_slab.get_var_dict()[iname] dom_and_slab = dom_and_slab.move_dims( dim_type.param, dom_and_slab.dim(dim_type.param), dt, idx, 1) _, loop_iname_idx = dom_and_slab.get_var_dict()[loop_iname] from loopy.isl_helpers import (static_min_of_pw_aff, static_max_of_pw_aff) lbound = (kernel.cache_manager.dim_min( dom_and_slab, loop_iname_idx).gist(kernel.assumptions).coalesce()) ubound = (kernel.cache_manager.dim_max( dom_and_slab, loop_iname_idx).gist(kernel.assumptions).coalesce()) static_lbound = static_min_of_pw_aff(lbound, constants_only=False) static_ubound = static_max_of_pw_aff(ubound, constants_only=False) # }}} # {{{ find implemented slab, build inner code from loopy.isl_helpers import make_slab_from_bound_pwaffs # impl_slab may be overapproximated impl_slab = make_slab_from_bound_pwaffs(dom_and_slab.space, loop_iname, static_lbound, static_ubound) for iname in moved_inames: dt, idx = impl_slab.get_var_dict()[iname] impl_slab = impl_slab.move_dims(dim_type.set, impl_slab.dim(dim_type.set), dt, idx, 1) new_codegen_state = (codegen_state.intersect(impl_slab).copy( kernel=intersect_kernel_with_slab(kernel, slab, iname))) inner = build_loop_nest(new_codegen_state, sched_index + 1) # }}} if cmt is not None: result.append(codegen_state.ast_builder.emit_comment(cmt)) from loopy.symbolic import aff_to_expr astb = codegen_state.ast_builder if (static_ubound - static_lbound).plain_is_zero(): # single-trip, generate just a variable assignment, not a loop result.append( merge_codegen_results(codegen_state, [ astb.emit_initializer(codegen_state, kernel.index_dtype, loop_iname, ecm(aff_to_expr(static_lbound), PREC_NONE, "i"), is_const=True), astb.emit_blank_line(), inner, ])) else: inner_ast = inner.current_ast(codegen_state) result.append( inner.with_new_ast( codegen_state, astb.emit_sequential_loop(codegen_state, loop_iname, kernel.index_dtype, static_lbound, static_ubound, inner_ast))) return merge_codegen_results(codegen_state, result)
def generate_sequential_loop_dim_code(codegen_state, sched_index): kernel = codegen_state.kernel ecm = codegen_state.expression_to_code_mapper loop_iname = kernel.schedule[sched_index].iname slabs = get_slab_decomposition(kernel, loop_iname) from loopy.codegen.bounds import get_usable_inames_for_conditional # Note: this does not include loop_iname itself! usable_inames = get_usable_inames_for_conditional(kernel, sched_index) domain = kernel.get_inames_domain(loop_iname) result = [] for slab_name, slab in slabs: cmt = "%s slab for '%s'" % (slab_name, loop_iname) if len(slabs) == 1: cmt = None # {{{ find bounds aligned_domain = isl.align_spaces(domain, slab, across_dim_types=True, obj_bigger_ok=True) dom_and_slab = aligned_domain & slab assumptions_non_param = isl.BasicSet.from_params(kernel.assumptions) dom_and_slab, assumptions_non_param = isl.align_two( dom_and_slab, assumptions_non_param) dom_and_slab = dom_and_slab & assumptions_non_param # move inames that are usable into parameters moved_inames = [] for das_iname in sorted(dom_and_slab.get_var_names(dim_type.set)): if das_iname in usable_inames: moved_inames.append(das_iname) dt, idx = dom_and_slab.get_var_dict()[das_iname] dom_and_slab = dom_and_slab.move_dims( dim_type.param, dom_and_slab.dim(dim_type.param), dt, idx, 1) _, loop_iname_idx = dom_and_slab.get_var_dict()[loop_iname] impl_domain = isl.align_spaces( codegen_state.implemented_domain, dom_and_slab, obj_bigger_ok=True, across_dim_types=True ).params() lbound = ( kernel.cache_manager.dim_min( dom_and_slab, loop_iname_idx) .gist(kernel.assumptions) .gist(impl_domain) .coalesce()) ubound = ( kernel.cache_manager.dim_max( dom_and_slab, loop_iname_idx) .gist(kernel.assumptions) .gist(impl_domain) .coalesce()) # }}} # {{{ find implemented loop, build inner code from loopy.symbolic import pw_aff_to_pw_aff_implemented_by_expr impl_lbound = pw_aff_to_pw_aff_implemented_by_expr(lbound) impl_ubound = pw_aff_to_pw_aff_implemented_by_expr(ubound) # impl_loop may be overapproximated from loopy.isl_helpers import make_loop_bounds_from_pwaffs impl_loop = make_loop_bounds_from_pwaffs( dom_and_slab.space, loop_iname, impl_lbound, impl_ubound) for moved_iname in moved_inames: # move moved_iname to 'set' dim_type in impl_loop dt, idx = impl_loop.get_var_dict()[moved_iname] impl_loop = impl_loop.move_dims( dim_type.set, impl_loop.dim(dim_type.set), dt, idx, 1) new_codegen_state = ( codegen_state .intersect(impl_loop) .copy(kernel=intersect_kernel_with_slab( kernel, slab, loop_iname))) inner = build_loop_nest(new_codegen_state, sched_index+1) # }}} if cmt is not None: result.append(codegen_state.ast_builder.emit_comment(cmt)) astb = codegen_state.ast_builder from loopy.symbolic import pw_aff_to_expr if impl_ubound.is_equal(impl_lbound): # single-trip, generate just a variable assignment, not a loop inner = merge_codegen_results(codegen_state, [ astb.emit_initializer( codegen_state, kernel.index_dtype, loop_iname, ecm(pw_aff_to_expr(lbound), PREC_NONE, "i"), is_const=True), astb.emit_blank_line(), inner, ]) result.append( inner.with_new_ast( codegen_state, astb.ast_block_scope_class( inner.current_ast(codegen_state)))) else: inner_ast = inner.current_ast(codegen_state) from loopy.isl_helpers import simplify_pw_aff result.append( inner.with_new_ast( codegen_state, astb.emit_sequential_loop( codegen_state, loop_iname, kernel.index_dtype, pw_aff_to_expr(simplify_pw_aff(lbound, kernel.assumptions)), pw_aff_to_expr(simplify_pw_aff(ubound, kernel.assumptions)), inner_ast))) return merge_codegen_results(codegen_state, result)