def generate_sequential_loop_dim_code(codegen_state, sched_index): kernel = codegen_state.kernel ecm = codegen_state.expression_to_code_mapper loop_iname = kernel.schedule[sched_index].iname slabs = get_slab_decomposition(kernel, loop_iname) from loopy.codegen.bounds import get_usable_inames_for_conditional # Note: this does not include loop_iname itself! usable_inames = get_usable_inames_for_conditional(kernel, sched_index) domain = kernel.get_inames_domain(loop_iname) result = [] for slab_name, slab in slabs: cmt = "%s slab for '%s'" % (slab_name, loop_iname) if len(slabs) == 1: cmt = None # {{{ find bounds aligned_domain = isl.align_spaces(domain, slab, obj_bigger_ok=True) dom_and_slab = aligned_domain & slab assumptions_non_param = isl.BasicSet.from_params(kernel.assumptions) dom_and_slab, assumptions_non_param = isl.align_two( dom_and_slab, assumptions_non_param) dom_and_slab = dom_and_slab & assumptions_non_param # move inames that are usable into parameters moved_inames = [] for das_iname in sorted(dom_and_slab.get_var_names(dim_type.set)): if das_iname in usable_inames: moved_inames.append(das_iname) dt, idx = dom_and_slab.get_var_dict()[das_iname] dom_and_slab = dom_and_slab.move_dims( dim_type.param, dom_and_slab.dim(dim_type.param), dt, idx, 1) _, loop_iname_idx = dom_and_slab.get_var_dict()[loop_iname] impl_domain = isl.align_spaces(codegen_state.implemented_domain, dom_and_slab, obj_bigger_ok=True).params() lbound = (kernel.cache_manager.dim_min( dom_and_slab, loop_iname_idx).gist( kernel.assumptions).gist(impl_domain).coalesce()) ubound = (kernel.cache_manager.dim_max( dom_and_slab, loop_iname_idx).gist( kernel.assumptions).gist(impl_domain).coalesce()) # }}} # {{{ find implemented loop, build inner code from loopy.symbolic import pw_aff_to_pw_aff_implemented_by_expr impl_lbound = pw_aff_to_pw_aff_implemented_by_expr(lbound) impl_ubound = pw_aff_to_pw_aff_implemented_by_expr(ubound) # impl_loop may be overapproximated from loopy.isl_helpers import make_loop_bounds_from_pwaffs impl_loop = make_loop_bounds_from_pwaffs(dom_and_slab.space, loop_iname, impl_lbound, impl_ubound) for moved_iname in moved_inames: # move moved_iname to 'set' dim_type in impl_loop dt, idx = impl_loop.get_var_dict()[moved_iname] impl_loop = impl_loop.move_dims(dim_type.set, impl_loop.dim(dim_type.set), dt, idx, 1) new_codegen_state = (codegen_state.intersect(impl_loop).copy( kernel=intersect_kernel_with_slab(kernel, slab, loop_iname))) inner = build_loop_nest(new_codegen_state, sched_index + 1) # }}} if cmt is not None: result.append(codegen_state.ast_builder.emit_comment(cmt)) astb = codegen_state.ast_builder from loopy.symbolic import pw_aff_to_expr if impl_ubound.is_equal(impl_lbound): # single-trip, generate just a variable assignment, not a loop inner = merge_codegen_results(codegen_state, [ astb.emit_initializer(codegen_state, kernel.index_dtype, loop_iname, ecm(pw_aff_to_expr(lbound), PREC_NONE, "i"), is_const=True), astb.emit_blank_line(), inner, ]) result.append( inner.with_new_ast( codegen_state, astb.ast_block_scope_class( inner.current_ast(codegen_state)))) else: inner_ast = inner.current_ast(codegen_state) from loopy.isl_helpers import simplify_pw_aff result.append( inner.with_new_ast( codegen_state, astb.emit_sequential_loop( codegen_state, loop_iname, kernel.index_dtype, pw_aff_to_expr( simplify_pw_aff(lbound, kernel.assumptions)), pw_aff_to_expr( simplify_pw_aff(ubound, kernel.assumptions)), inner_ast))) return merge_codegen_results(codegen_state, result)
def generate_sequential_loop_dim_code(codegen_state, sched_index): kernel = codegen_state.kernel ecm = codegen_state.expression_to_code_mapper loop_iname = kernel.schedule[sched_index].iname slabs = get_slab_decomposition(kernel, loop_iname) from loopy.codegen.bounds import get_usable_inames_for_conditional # Note: this does not include loop_iname itself! usable_inames = get_usable_inames_for_conditional(kernel, sched_index) domain = kernel.get_inames_domain(loop_iname) result = [] for slab_name, slab in slabs: cmt = "%s slab for '%s'" % (slab_name, loop_iname) if len(slabs) == 1: cmt = None # {{{ find bounds aligned_domain = isl.align_spaces(domain, slab, across_dim_types=True, obj_bigger_ok=True) dom_and_slab = aligned_domain & slab assumptions_non_param = isl.BasicSet.from_params(kernel.assumptions) dom_and_slab, assumptions_non_param = isl.align_two( dom_and_slab, assumptions_non_param) dom_and_slab = dom_and_slab & assumptions_non_param # move inames that are usable into parameters moved_inames = [] for das_iname in sorted(dom_and_slab.get_var_names(dim_type.set)): if das_iname in usable_inames: moved_inames.append(das_iname) dt, idx = dom_and_slab.get_var_dict()[das_iname] dom_and_slab = dom_and_slab.move_dims( dim_type.param, dom_and_slab.dim(dim_type.param), dt, idx, 1) _, loop_iname_idx = dom_and_slab.get_var_dict()[loop_iname] impl_domain = isl.align_spaces( codegen_state.implemented_domain, dom_and_slab, obj_bigger_ok=True, across_dim_types=True ).params() lbound = ( kernel.cache_manager.dim_min( dom_and_slab, loop_iname_idx) .gist(kernel.assumptions) .gist(impl_domain) .coalesce()) ubound = ( kernel.cache_manager.dim_max( dom_and_slab, loop_iname_idx) .gist(kernel.assumptions) .gist(impl_domain) .coalesce()) # }}} # {{{ find implemented loop, build inner code from loopy.symbolic import pw_aff_to_pw_aff_implemented_by_expr impl_lbound = pw_aff_to_pw_aff_implemented_by_expr(lbound) impl_ubound = pw_aff_to_pw_aff_implemented_by_expr(ubound) # impl_loop may be overapproximated from loopy.isl_helpers import make_loop_bounds_from_pwaffs impl_loop = make_loop_bounds_from_pwaffs( dom_and_slab.space, loop_iname, impl_lbound, impl_ubound) for moved_iname in moved_inames: # move moved_iname to 'set' dim_type in impl_loop dt, idx = impl_loop.get_var_dict()[moved_iname] impl_loop = impl_loop.move_dims( dim_type.set, impl_loop.dim(dim_type.set), dt, idx, 1) new_codegen_state = ( codegen_state .intersect(impl_loop) .copy(kernel=intersect_kernel_with_slab( kernel, slab, loop_iname))) inner = build_loop_nest(new_codegen_state, sched_index+1) # }}} if cmt is not None: result.append(codegen_state.ast_builder.emit_comment(cmt)) astb = codegen_state.ast_builder from loopy.symbolic import pw_aff_to_expr if impl_ubound.is_equal(impl_lbound): # single-trip, generate just a variable assignment, not a loop inner = merge_codegen_results(codegen_state, [ astb.emit_initializer( codegen_state, kernel.index_dtype, loop_iname, ecm(pw_aff_to_expr(lbound), PREC_NONE, "i"), is_const=True), astb.emit_blank_line(), inner, ]) result.append( inner.with_new_ast( codegen_state, astb.ast_block_scope_class( inner.current_ast(codegen_state)))) else: inner_ast = inner.current_ast(codegen_state) from loopy.isl_helpers import simplify_pw_aff result.append( inner.with_new_ast( codegen_state, astb.emit_sequential_loop( codegen_state, loop_iname, kernel.index_dtype, pw_aff_to_expr(simplify_pw_aff(lbound, kernel.assumptions)), pw_aff_to_expr(simplify_pw_aff(ubound, kernel.assumptions)), inner_ast))) return merge_codegen_results(codegen_state, result)