Beispiel #1
0
def generate_sequential_loop_dim_code(codegen_state, sched_index):
    kernel = codegen_state.kernel

    ecm = codegen_state.expression_to_code_mapper
    loop_iname = kernel.schedule[sched_index].iname

    slabs = get_slab_decomposition(kernel, loop_iname)

    from loopy.codegen.bounds import get_usable_inames_for_conditional

    # Note: this does not include loop_iname itself!
    usable_inames = get_usable_inames_for_conditional(kernel, sched_index)
    domain = kernel.get_inames_domain(loop_iname)

    result = []

    for slab_name, slab in slabs:
        cmt = "%s slab for '%s'" % (slab_name, loop_iname)
        if len(slabs) == 1:
            cmt = None

        # {{{ find bounds

        aligned_domain = isl.align_spaces(domain, slab, obj_bigger_ok=True)

        dom_and_slab = aligned_domain & slab

        assumptions_non_param = isl.BasicSet.from_params(kernel.assumptions)
        dom_and_slab, assumptions_non_param = isl.align_two(
            dom_and_slab, assumptions_non_param)
        dom_and_slab = dom_and_slab & assumptions_non_param

        # move inames that are usable into parameters
        moved_inames = []
        for das_iname in sorted(dom_and_slab.get_var_names(dim_type.set)):
            if das_iname in usable_inames:
                moved_inames.append(das_iname)
                dt, idx = dom_and_slab.get_var_dict()[das_iname]
                dom_and_slab = dom_and_slab.move_dims(
                    dim_type.param, dom_and_slab.dim(dim_type.param), dt, idx,
                    1)

        _, loop_iname_idx = dom_and_slab.get_var_dict()[loop_iname]

        impl_domain = isl.align_spaces(codegen_state.implemented_domain,
                                       dom_and_slab,
                                       obj_bigger_ok=True).params()

        lbound = (kernel.cache_manager.dim_min(
            dom_and_slab, loop_iname_idx).gist(
                kernel.assumptions).gist(impl_domain).coalesce())
        ubound = (kernel.cache_manager.dim_max(
            dom_and_slab, loop_iname_idx).gist(
                kernel.assumptions).gist(impl_domain).coalesce())

        # }}}

        # {{{ find implemented loop, build inner code

        from loopy.symbolic import pw_aff_to_pw_aff_implemented_by_expr
        impl_lbound = pw_aff_to_pw_aff_implemented_by_expr(lbound)
        impl_ubound = pw_aff_to_pw_aff_implemented_by_expr(ubound)

        # impl_loop may be overapproximated
        from loopy.isl_helpers import make_loop_bounds_from_pwaffs
        impl_loop = make_loop_bounds_from_pwaffs(dom_and_slab.space,
                                                 loop_iname, impl_lbound,
                                                 impl_ubound)

        for moved_iname in moved_inames:
            # move moved_iname to 'set' dim_type in impl_loop
            dt, idx = impl_loop.get_var_dict()[moved_iname]
            impl_loop = impl_loop.move_dims(dim_type.set,
                                            impl_loop.dim(dim_type.set), dt,
                                            idx, 1)

        new_codegen_state = (codegen_state.intersect(impl_loop).copy(
            kernel=intersect_kernel_with_slab(kernel, slab, loop_iname)))

        inner = build_loop_nest(new_codegen_state, sched_index + 1)

        # }}}

        if cmt is not None:
            result.append(codegen_state.ast_builder.emit_comment(cmt))

        astb = codegen_state.ast_builder

        from loopy.symbolic import pw_aff_to_expr

        if impl_ubound.is_equal(impl_lbound):
            # single-trip, generate just a variable assignment, not a loop
            inner = merge_codegen_results(codegen_state, [
                astb.emit_initializer(codegen_state,
                                      kernel.index_dtype,
                                      loop_iname,
                                      ecm(pw_aff_to_expr(lbound), PREC_NONE,
                                          "i"),
                                      is_const=True),
                astb.emit_blank_line(),
                inner,
            ])
            result.append(
                inner.with_new_ast(
                    codegen_state,
                    astb.ast_block_scope_class(
                        inner.current_ast(codegen_state))))

        else:
            inner_ast = inner.current_ast(codegen_state)

            from loopy.isl_helpers import simplify_pw_aff

            result.append(
                inner.with_new_ast(
                    codegen_state,
                    astb.emit_sequential_loop(
                        codegen_state, loop_iname, kernel.index_dtype,
                        pw_aff_to_expr(
                            simplify_pw_aff(lbound, kernel.assumptions)),
                        pw_aff_to_expr(
                            simplify_pw_aff(ubound, kernel.assumptions)),
                        inner_ast)))

    return merge_codegen_results(codegen_state, result)
Beispiel #2
0
def generate_sequential_loop_dim_code(codegen_state, sched_index):
    kernel = codegen_state.kernel

    ecm = codegen_state.expression_to_code_mapper
    loop_iname = kernel.schedule[sched_index].iname

    slabs = get_slab_decomposition(kernel, loop_iname)

    from loopy.codegen.bounds import get_usable_inames_for_conditional

    # Note: this does not include loop_iname itself!
    usable_inames = get_usable_inames_for_conditional(kernel, sched_index)
    domain = kernel.get_inames_domain(loop_iname)

    result = []

    for slab_name, slab in slabs:
        cmt = "%s slab for '%s'" % (slab_name, loop_iname)
        if len(slabs) == 1:
            cmt = None

        # {{{ find bounds

        aligned_domain = isl.align_spaces(domain, slab, across_dim_types=True,
                obj_bigger_ok=True)

        dom_and_slab = aligned_domain & slab

        assumptions_non_param = isl.BasicSet.from_params(kernel.assumptions)
        dom_and_slab, assumptions_non_param = isl.align_two(
                dom_and_slab, assumptions_non_param)
        dom_and_slab = dom_and_slab & assumptions_non_param

        # move inames that are usable into parameters
        moved_inames = []
        for das_iname in sorted(dom_and_slab.get_var_names(dim_type.set)):
            if das_iname in usable_inames:
                moved_inames.append(das_iname)
                dt, idx = dom_and_slab.get_var_dict()[das_iname]
                dom_and_slab = dom_and_slab.move_dims(
                        dim_type.param, dom_and_slab.dim(dim_type.param),
                        dt, idx, 1)

        _, loop_iname_idx = dom_and_slab.get_var_dict()[loop_iname]

        impl_domain = isl.align_spaces(
            codegen_state.implemented_domain,
            dom_and_slab,
            obj_bigger_ok=True,
            across_dim_types=True
            ).params()

        lbound = (
                kernel.cache_manager.dim_min(
                    dom_and_slab, loop_iname_idx)
                .gist(kernel.assumptions)
                .gist(impl_domain)
                .coalesce())
        ubound = (
            kernel.cache_manager.dim_max(
                dom_and_slab, loop_iname_idx)
            .gist(kernel.assumptions)
            .gist(impl_domain)
            .coalesce())

        # }}}

        # {{{ find implemented loop, build inner code

        from loopy.symbolic import pw_aff_to_pw_aff_implemented_by_expr
        impl_lbound = pw_aff_to_pw_aff_implemented_by_expr(lbound)
        impl_ubound = pw_aff_to_pw_aff_implemented_by_expr(ubound)

        # impl_loop may be overapproximated
        from loopy.isl_helpers import make_loop_bounds_from_pwaffs
        impl_loop = make_loop_bounds_from_pwaffs(
                dom_and_slab.space,
                loop_iname,
                impl_lbound,
                impl_ubound)

        for moved_iname in moved_inames:
            # move moved_iname to 'set' dim_type in impl_loop
            dt, idx = impl_loop.get_var_dict()[moved_iname]
            impl_loop = impl_loop.move_dims(
                    dim_type.set, impl_loop.dim(dim_type.set),
                    dt, idx, 1)

        new_codegen_state = (
                codegen_state
                .intersect(impl_loop)
                .copy(kernel=intersect_kernel_with_slab(
                    kernel, slab, loop_iname)))

        inner = build_loop_nest(new_codegen_state, sched_index+1)

        # }}}

        if cmt is not None:
            result.append(codegen_state.ast_builder.emit_comment(cmt))

        astb = codegen_state.ast_builder

        from loopy.symbolic import pw_aff_to_expr

        if impl_ubound.is_equal(impl_lbound):
            # single-trip, generate just a variable assignment, not a loop
            inner = merge_codegen_results(codegen_state, [
                astb.emit_initializer(
                    codegen_state,
                    kernel.index_dtype, loop_iname,
                    ecm(pw_aff_to_expr(lbound), PREC_NONE, "i"),
                    is_const=True),
                astb.emit_blank_line(),
                inner,
                ])
            result.append(
                    inner.with_new_ast(
                        codegen_state,
                        astb.ast_block_scope_class(
                            inner.current_ast(codegen_state))))

        else:
            inner_ast = inner.current_ast(codegen_state)

            from loopy.isl_helpers import simplify_pw_aff

            result.append(
                inner.with_new_ast(
                    codegen_state,
                    astb.emit_sequential_loop(
                        codegen_state, loop_iname, kernel.index_dtype,
                        pw_aff_to_expr(simplify_pw_aff(lbound, kernel.assumptions)),
                        pw_aff_to_expr(simplify_pw_aff(ubound, kernel.assumptions)),
                        inner_ast)))

    return merge_codegen_results(codegen_state, result)