예제 #1
0
def to_codegen_result(codegen_state, insn_id, domain, check_inames,
                      required_preds, ast):
    from loopy.codegen.bounds import get_bounds_checks
    from loopy.symbolic import constraint_to_expr

    bounds_checks = get_bounds_checks(domain,
                                      check_inames,
                                      codegen_state.implemented_domain,
                                      overapproximate=False)
    bounds_check_set = isl.Set.universe(domain.get_space()) \
            .add_constraints(bounds_checks)
    bounds_check_set, new_implemented_domain = isl.align_two(
        bounds_check_set, codegen_state.implemented_domain)
    new_implemented_domain = new_implemented_domain & bounds_check_set

    if bounds_check_set.is_empty():
        return None

    condition_exprs = [constraint_to_expr(cns) for cns in bounds_checks]

    condition_exprs.extend(required_preds -
                           codegen_state.implemented_predicates)

    if condition_exprs:
        from pymbolic.primitives import LogicalAnd
        from pymbolic.mapper.stringifier import PREC_NONE
        ast = codegen_state.ast_builder.emit_if(
            codegen_state.expression_to_code_mapper(
                LogicalAnd(tuple(condition_exprs)), PREC_NONE), ast)

    return CodeGenerationResult.new(codegen_state, insn_id, ast,
                                    new_implemented_domain)
예제 #2
0
def to_codegen_result(codegen_state, insn_id, domain, check_inames,
                      required_preds, ast):
    chk_domain = isl.Set.from_basic_set(domain)
    chk_domain = chk_domain.remove_redundancies()
    chk_domain = codegen_state.kernel.cache_manager.eliminate_except(
        chk_domain, check_inames, (dim_type.set, ))

    chk_domain, new_implemented_domain = _get_new_implemented_domain(
        codegen_state.kernel, chk_domain, codegen_state.implemented_domain)

    if chk_domain.is_empty():
        return None

    condition_exprs = []
    if not chk_domain.plain_is_universe():
        from loopy.symbolic import set_to_cond_expr
        condition_exprs.append(set_to_cond_expr(chk_domain))

    condition_exprs.extend(required_preds -
                           codegen_state.implemented_predicates)

    if condition_exprs:
        from pymbolic.primitives import LogicalAnd
        from pymbolic.mapper.stringifier import PREC_NONE
        ast = codegen_state.ast_builder.emit_if(
            codegen_state.expression_to_code_mapper(
                LogicalAnd(tuple(condition_exprs)), PREC_NONE), ast)

    return CodeGenerationResult.new(codegen_state, insn_id, ast,
                                    new_implemented_domain)
예제 #3
0
def to_codegen_result(
        codegen_state, insn_id, domain, check_inames, required_preds, ast):
    from loopy.codegen.bounds import get_bounds_checks
    from loopy.symbolic import constraint_to_expr

    bounds_checks = get_bounds_checks(
            domain, check_inames,
            codegen_state.implemented_domain, overapproximate=False)
    bounds_check_set = isl.Set.universe(domain.get_space()) \
            .add_constraints(bounds_checks)
    bounds_check_set, new_implemented_domain = isl.align_two(
            bounds_check_set, codegen_state.implemented_domain)
    new_implemented_domain = new_implemented_domain & bounds_check_set

    if bounds_check_set.is_empty():
        return None

    condition_exprs = [
            constraint_to_expr(cns)
            for cns in bounds_checks]

    condition_exprs.extend(
            required_preds - codegen_state.implemented_predicates)

    if condition_exprs:
        from pymbolic.primitives import LogicalAnd
        from pymbolic.mapper.stringifier import PREC_NONE
        ast = codegen_state.ast_builder.emit_if(
                codegen_state.expression_to_code_mapper(
                    LogicalAnd(tuple(condition_exprs)), PREC_NONE),
                ast)

    return CodeGenerationResult.new(
            codegen_state, insn_id, ast, new_implemented_domain)
예제 #4
0
def to_codegen_result(
        codegen_state, insn_id, domain, check_inames, required_preds, ast):
    # {{{ get bounds check

    chk_domain = isl.Set.from_basic_set(domain)
    chk_domain = chk_domain.remove_redundancies()
    chk_domain = chk_domain.eliminate_except(check_inames, [dim_type.set])

    chk_domain, implemented_domain = isl.align_two(
            chk_domain, codegen_state.implemented_domain)
    chk_domain = chk_domain.gist(implemented_domain)

    # }}}

    new_implemented_domain = implemented_domain & chk_domain

    if chk_domain.is_empty():
        return None

    condition_exprs = []
    if not chk_domain.plain_is_universe():
        from loopy.symbolic import set_to_cond_expr
        condition_exprs.append(set_to_cond_expr(chk_domain))

    condition_exprs.extend(
            required_preds - codegen_state.implemented_predicates)

    if condition_exprs:
        from pymbolic.primitives import LogicalAnd
        from pymbolic.mapper.stringifier import PREC_NONE
        ast = codegen_state.ast_builder.emit_if(
                codegen_state.expression_to_code_mapper(
                    LogicalAnd(tuple(condition_exprs)), PREC_NONE),
                ast)

    return CodeGenerationResult.new(
            codegen_state, insn_id, ast, new_implemented_domain)
예제 #5
0
def generate_code_for_sched_index(codegen_state, sched_index):
    kernel = codegen_state.kernel
    sched_item = kernel.schedule[sched_index]

    if isinstance(sched_item, CallKernel):
        assert not codegen_state.is_generating_device_code

        from loopy.schedule import (gather_schedule_block,
                                    get_insn_ids_for_block_at)
        _, past_end_i = gather_schedule_block(kernel.schedule, sched_index)
        assert past_end_i <= codegen_state.schedule_index_end

        extra_args = synthesize_idis_for_extra_args(kernel, sched_index)

        new_codegen_state = codegen_state.copy(
            is_generating_device_code=True,
            gen_program_name=sched_item.kernel_name,
            schedule_index_end=past_end_i - 1,
            implemented_data_info=(codegen_state.implemented_data_info +
                                   extra_args))

        from loopy.codegen.result import generate_host_or_device_program
        codegen_result = generate_host_or_device_program(
            new_codegen_state, sched_index)

        glob_grid, loc_grid = kernel.get_grid_sizes_for_insn_ids_as_exprs(
            get_insn_ids_for_block_at(kernel.schedule, sched_index))

        return merge_codegen_results(codegen_state, [
            codegen_result,
            codegen_state.ast_builder.get_kernel_call(
                codegen_state, sched_item.kernel_name, glob_grid, loc_grid,
                extra_args),
        ])

    elif isinstance(sched_item, EnterLoop):
        tag = kernel.iname_to_tag.get(sched_item.iname)

        from loopy.codegen.loop import (generate_unroll_loop,
                                        generate_vectorize_loop,
                                        generate_sequential_loop_dim_code)

        from loopy.kernel.data import (UnrolledIlpTag, UnrollTag,
                                       ForceSequentialTag, LoopedIlpTag,
                                       VectorizeTag,
                                       InOrderSequentialSequentialTag)
        if isinstance(tag, (UnrollTag, UnrolledIlpTag)):
            func = generate_unroll_loop
        elif isinstance(tag, VectorizeTag):
            func = generate_vectorize_loop
        elif tag is None or isinstance(tag, (LoopedIlpTag, ForceSequentialTag,
                                             InOrderSequentialSequentialTag)):
            func = generate_sequential_loop_dim_code
        else:
            raise RuntimeError("encountered (invalid) EnterLoop "
                               "for '%s', tagged '%s'" %
                               (sched_item.iname, tag))

        return func(codegen_state, sched_index)

    elif isinstance(sched_item, Barrier):
        # {{{ emit barrier code

        from loopy.codegen.result import CodeGenerationResult

        if codegen_state.is_generating_device_code:
            barrier_ast = codegen_state.ast_builder.emit_barrier(
                sched_item.synchronization_kind, sched_item.mem_kind,
                sched_item.comment)
            if sched_item.originating_insn_id:
                return CodeGenerationResult.new(
                    codegen_state, sched_item.originating_insn_id, barrier_ast,
                    codegen_state.implemented_domain)
            else:
                return barrier_ast
        else:
            # host code
            if sched_item.synchronization_kind in ["global", "local"]:
                # host code is assumed globally and locally synchronous
                return CodeGenerationResult(
                    host_program=None,
                    device_programs=[],
                    implemented_domains={},
                    implemented_data_info=codegen_state.implemented_data_info)

            else:
                raise LoopyError("do not know how to emit code for barrier "
                                 "synchronization kind '%s'"
                                 "in host code" %
                                 sched_item.synchronization_kind)

        # }}}

    elif isinstance(sched_item, RunInstruction):
        insn = kernel.id_to_insn[sched_item.insn_id]

        from loopy.codegen.instruction import generate_instruction_code
        return codegen_state.try_vectorized(
            "instruction %s" % insn.id,
            lambda inner_cgs: generate_instruction_code(inner_cgs, insn))

    else:
        raise RuntimeError("unexpected schedule item type: %s" %
                           type(sched_item))
예제 #6
0
파일: control.py 프로젝트: inducer/loopy
def generate_code_for_sched_index(codegen_state, sched_index):
    kernel = codegen_state.kernel
    sched_item = kernel.schedule[sched_index]

    if isinstance(sched_item, CallKernel):
        assert not codegen_state.is_generating_device_code

        from loopy.schedule import (gather_schedule_block, get_insn_ids_for_block_at)
        _, past_end_i = gather_schedule_block(kernel.schedule, sched_index)
        assert past_end_i <= codegen_state.schedule_index_end

        extra_args = synthesize_idis_for_extra_args(kernel, sched_index)

        new_codegen_state = codegen_state.copy(
                is_generating_device_code=True,
                gen_program_name=sched_item.kernel_name,
                schedule_index_end=past_end_i-1,
                implemented_data_info=(codegen_state.implemented_data_info
                    + extra_args))

        from loopy.codegen.result import generate_host_or_device_program
        codegen_result = generate_host_or_device_program(
                new_codegen_state, sched_index)

        glob_grid, loc_grid = kernel.get_grid_sizes_for_insn_ids_as_exprs(
                get_insn_ids_for_block_at(kernel.schedule, sched_index))

        return merge_codegen_results(codegen_state, [
            codegen_result,

            codegen_state.ast_builder.get_kernel_call(
                codegen_state,
                sched_item.kernel_name,
                glob_grid, loc_grid,
                extra_args),
            ])

    elif isinstance(sched_item, EnterLoop):
        tags = kernel.iname_tags(sched_item.iname)
        tags = tuple(tag for tag in tags if tag)

        from loopy.codegen.loop import (
                generate_unroll_loop,
                generate_vectorize_loop,
                generate_sequential_loop_dim_code)

        from loopy.kernel.data import (UnrolledIlpTag, UnrollTag,
                ForceSequentialTag, LoopedIlpTag, VectorizeTag,
                InOrderSequentialSequentialTag, filter_iname_tags_by_type)
        if filter_iname_tags_by_type(tags, (UnrollTag, UnrolledIlpTag)):
            func = generate_unroll_loop
        elif filter_iname_tags_by_type(tags, VectorizeTag):
            func = generate_vectorize_loop
        elif not tags or filter_iname_tags_by_type(tags, (LoopedIlpTag,
                    ForceSequentialTag, InOrderSequentialSequentialTag)):
            func = generate_sequential_loop_dim_code
        else:
            raise RuntimeError("encountered (invalid) EnterLoop "
                    "for '%s', tagged '%s'"
                    % (sched_item.iname, ", ".join(str(tag) for tag in tags)))

        return func(codegen_state, sched_index)

    elif isinstance(sched_item, Barrier):
        # {{{ emit barrier code

        from loopy.codegen.result import CodeGenerationResult

        if codegen_state.is_generating_device_code:
            barrier_ast = codegen_state.ast_builder.emit_barrier(
                    sched_item.synchronization_kind, sched_item.mem_kind,
                    sched_item.comment)
            if sched_item.originating_insn_id:
                return CodeGenerationResult.new(
                        codegen_state,
                        sched_item.originating_insn_id,
                        barrier_ast,
                        codegen_state.implemented_domain)
            else:
                return barrier_ast
        else:
            # host code
            if sched_item.synchronization_kind in ["global", "local"]:
                # host code is assumed globally and locally synchronous
                return CodeGenerationResult(
                        host_program=None,
                        device_programs=[],
                        implemented_domains={},
                        implemented_data_info=codegen_state.implemented_data_info)

            else:
                raise LoopyError("do not know how to emit code for barrier "
                                 "synchronization kind '%s'" "in host code"
                                 % sched_item.synchronization_kind)

        # }}}

    elif isinstance(sched_item, RunInstruction):
        insn = kernel.id_to_insn[sched_item.insn_id]

        from loopy.codegen.instruction import generate_instruction_code
        return codegen_state.try_vectorized(
                "instruction %s" % insn.id,
                lambda inner_cgs: generate_instruction_code(inner_cgs, insn))

    else:
        raise RuntimeError("unexpected schedule item type: %s"
                % type(sched_item))