예제 #1
0
파일: control.py 프로젝트: rckirby/loopy
def generate_code_for_sched_index(kernel, sched_index, codegen_state):
    sched_item = kernel.schedule[sched_index]

    if isinstance(sched_item, EnterLoop):
        tag = kernel.iname_to_tag.get(sched_item.iname)

        from loopy.codegen.loop import (
                generate_unroll_loop,
                generate_vectorize_loop,
                generate_sequential_loop_dim_code)

        from loopy.kernel.data import (UnrolledIlpTag, UnrollTag, ForceSequentialTag,
                LoopedIlpTag, VectorizeTag)
        if isinstance(tag, (UnrollTag, UnrolledIlpTag)):
            func = generate_unroll_loop
        elif isinstance(tag, VectorizeTag):
            func = generate_vectorize_loop
        elif tag is None or isinstance(tag, (LoopedIlpTag, ForceSequentialTag)):
            func = generate_sequential_loop_dim_code
        else:
            raise RuntimeError("encountered (invalid) EnterLoop "
                    "for '%s', tagged '%s'" % (sched_item.iname, tag))

        return func(kernel, sched_index, codegen_state)

    elif isinstance(sched_item, Barrier):
        from loopy.codegen import GeneratedInstruction
        from cgen import Statement as S  # noqa

        if sched_item.comment:
            comment = " /* %s */" % sched_item.comment
        else:
            comment = ""

        return GeneratedInstruction(
                ast=S("barrier(CLK_LOCAL_MEM_FENCE)%s" % comment),
                implemented_domain=None)

    elif isinstance(sched_item, RunInstruction):
        insn = kernel.id_to_insn[sched_item.insn_id]

        from loopy.codegen.instruction import generate_instruction_code
        return codegen_state.try_vectorized(
                "instruction %s" % insn.id,
                lambda inner_cgs: generate_instruction_code(kernel, insn, inner_cgs))

    else:
        raise RuntimeError("unexpected schedule item type: %s"
                % type(sched_item))
예제 #2
0
파일: control.py 프로젝트: navjotk/loopy
def generate_code_for_sched_index(kernel, sched_index, codegen_state):
    sched_item = kernel.schedule[sched_index]

    if isinstance(sched_item, EnterLoop):
        tag = kernel.iname_to_tag.get(sched_item.iname)

        from loopy.codegen.loop import (
                generate_unroll_loop,
                generate_vectorize_loop,
                generate_sequential_loop_dim_code)

        from loopy.kernel.data import (UnrolledIlpTag, UnrollTag, ForceSequentialTag,
                LoopedIlpTag, VectorizeTag)
        if isinstance(tag, (UnrollTag, UnrolledIlpTag)):
            func = generate_unroll_loop
        elif isinstance(tag, VectorizeTag):
            func = generate_vectorize_loop
        elif tag is None or isinstance(tag, (LoopedIlpTag, ForceSequentialTag)):
            func = generate_sequential_loop_dim_code
        else:
            raise RuntimeError("encountered (invalid) EnterLoop "
                    "for '%s', tagged '%s'" % (sched_item.iname, tag))

        return func(kernel, sched_index, codegen_state)

    elif isinstance(sched_item, Barrier):
        return kernel.target.emit_barrier(sched_item.kind, sched_item.comment)

    elif isinstance(sched_item, RunInstruction):
        insn = kernel.id_to_insn[sched_item.insn_id]

        from loopy.codegen.instruction import generate_instruction_code
        return codegen_state.try_vectorized(
                "instruction %s" % insn.id,
                lambda inner_cgs: generate_instruction_code(kernel, insn, inner_cgs))

    else:
        raise RuntimeError("unexpected schedule item type: %s"
                % type(sched_item))
예제 #3
0
파일: control.py 프로젝트: shigh/loopy
def generate_code_for_sched_index(codegen_state, sched_index):
    kernel = codegen_state.kernel
    sched_item = kernel.schedule[sched_index]

    if isinstance(sched_item, CallKernel):
        assert not codegen_state.is_generating_device_code

        from loopy.schedule import (gather_schedule_block,
                                    get_insn_ids_for_block_at)
        _, past_end_i = gather_schedule_block(kernel.schedule, sched_index)
        assert past_end_i <= codegen_state.schedule_index_end

        extra_args = synthesize_idis_for_extra_args(kernel, sched_index)

        new_codegen_state = codegen_state.copy(
            is_generating_device_code=True,
            gen_program_name=sched_item.kernel_name,
            schedule_index_end=past_end_i - 1,
            implemented_data_info=(codegen_state.implemented_data_info +
                                   extra_args))

        from loopy.codegen.result import generate_host_or_device_program
        codegen_result = generate_host_or_device_program(
            new_codegen_state, sched_index)

        glob_grid, loc_grid = kernel.get_grid_sizes_for_insn_ids_as_exprs(
            get_insn_ids_for_block_at(kernel.schedule, sched_index))

        return merge_codegen_results(codegen_state, [
            codegen_result,
            codegen_state.ast_builder.get_kernel_call(
                codegen_state, sched_item.kernel_name, glob_grid, loc_grid,
                extra_args),
        ])

    elif isinstance(sched_item, EnterLoop):
        tag = kernel.iname_to_tag.get(sched_item.iname)

        from loopy.codegen.loop import (generate_unroll_loop,
                                        generate_vectorize_loop,
                                        generate_sequential_loop_dim_code)

        from loopy.kernel.data import (UnrolledIlpTag, UnrollTag,
                                       ForceSequentialTag, LoopedIlpTag,
                                       VectorizeTag)
        if isinstance(tag, (UnrollTag, UnrolledIlpTag)):
            func = generate_unroll_loop
        elif isinstance(tag, VectorizeTag):
            func = generate_vectorize_loop
        elif tag is None or isinstance(tag,
                                       (LoopedIlpTag, ForceSequentialTag)):
            func = generate_sequential_loop_dim_code
        else:
            raise RuntimeError("encountered (invalid) EnterLoop "
                               "for '%s', tagged '%s'" %
                               (sched_item.iname, tag))

        return func(codegen_state, sched_index)

    elif isinstance(sched_item, Barrier):
        return codegen_state.ast_builder.emit_barrier(sched_item.kind,
                                                      sched_item.comment)

    elif isinstance(sched_item, RunInstruction):
        insn = kernel.id_to_insn[sched_item.insn_id]

        from loopy.codegen.instruction import generate_instruction_code
        return codegen_state.try_vectorized(
            "instruction %s" % insn.id,
            lambda inner_cgs: generate_instruction_code(inner_cgs, insn))

    else:
        raise RuntimeError("unexpected schedule item type: %s" %
                           type(sched_item))
예제 #4
0
def generate_code_for_sched_index(codegen_state, sched_index):
    kernel = codegen_state.kernel
    sched_item = kernel.schedule[sched_index]

    if isinstance(sched_item, CallKernel):
        assert not codegen_state.is_generating_device_code

        from loopy.schedule import (gather_schedule_block,
                                    get_insn_ids_for_block_at)
        _, past_end_i = gather_schedule_block(kernel.schedule, sched_index)
        assert past_end_i <= codegen_state.schedule_index_end

        extra_args = synthesize_idis_for_extra_args(kernel, sched_index)

        new_codegen_state = codegen_state.copy(
            is_generating_device_code=True,
            gen_program_name=sched_item.kernel_name,
            schedule_index_end=past_end_i - 1,
            implemented_data_info=(codegen_state.implemented_data_info +
                                   extra_args))

        from loopy.codegen.result import generate_host_or_device_program
        codegen_result = generate_host_or_device_program(
            new_codegen_state, sched_index)

        glob_grid, loc_grid = kernel.get_grid_sizes_for_insn_ids_as_exprs(
            get_insn_ids_for_block_at(kernel.schedule, sched_index))

        return merge_codegen_results(codegen_state, [
            codegen_result,
            codegen_state.ast_builder.get_kernel_call(
                codegen_state, sched_item.kernel_name, glob_grid, loc_grid,
                extra_args),
        ])

    elif isinstance(sched_item, EnterLoop):
        tag = kernel.iname_to_tag.get(sched_item.iname)

        from loopy.codegen.loop import (generate_unroll_loop,
                                        generate_vectorize_loop,
                                        generate_sequential_loop_dim_code)

        from loopy.kernel.data import (UnrolledIlpTag, UnrollTag,
                                       ForceSequentialTag, LoopedIlpTag,
                                       VectorizeTag,
                                       InOrderSequentialSequentialTag)
        if isinstance(tag, (UnrollTag, UnrolledIlpTag)):
            func = generate_unroll_loop
        elif isinstance(tag, VectorizeTag):
            func = generate_vectorize_loop
        elif tag is None or isinstance(tag, (LoopedIlpTag, ForceSequentialTag,
                                             InOrderSequentialSequentialTag)):
            func = generate_sequential_loop_dim_code
        else:
            raise RuntimeError("encountered (invalid) EnterLoop "
                               "for '%s', tagged '%s'" %
                               (sched_item.iname, tag))

        return func(codegen_state, sched_index)

    elif isinstance(sched_item, Barrier):
        # {{{ emit barrier code

        from loopy.codegen.result import CodeGenerationResult

        if codegen_state.is_generating_device_code:
            barrier_ast = codegen_state.ast_builder.emit_barrier(
                sched_item.synchronization_kind, sched_item.mem_kind,
                sched_item.comment)
            if sched_item.originating_insn_id:
                return CodeGenerationResult.new(
                    codegen_state, sched_item.originating_insn_id, barrier_ast,
                    codegen_state.implemented_domain)
            else:
                return barrier_ast
        else:
            # host code
            if sched_item.synchronization_kind in ["global", "local"]:
                # host code is assumed globally and locally synchronous
                return CodeGenerationResult(
                    host_program=None,
                    device_programs=[],
                    implemented_domains={},
                    implemented_data_info=codegen_state.implemented_data_info)

            else:
                raise LoopyError("do not know how to emit code for barrier "
                                 "synchronization kind '%s'"
                                 "in host code" %
                                 sched_item.synchronization_kind)

        # }}}

    elif isinstance(sched_item, RunInstruction):
        insn = kernel.id_to_insn[sched_item.insn_id]

        from loopy.codegen.instruction import generate_instruction_code
        return codegen_state.try_vectorized(
            "instruction %s" % insn.id,
            lambda inner_cgs: generate_instruction_code(inner_cgs, insn))

    else:
        raise RuntimeError("unexpected schedule item type: %s" %
                           type(sched_item))
예제 #5
0
파일: control.py 프로젝트: inducer/loopy
def generate_code_for_sched_index(codegen_state, sched_index):
    kernel = codegen_state.kernel
    sched_item = kernel.schedule[sched_index]

    if isinstance(sched_item, CallKernel):
        assert not codegen_state.is_generating_device_code

        from loopy.schedule import (gather_schedule_block, get_insn_ids_for_block_at)
        _, past_end_i = gather_schedule_block(kernel.schedule, sched_index)
        assert past_end_i <= codegen_state.schedule_index_end

        extra_args = synthesize_idis_for_extra_args(kernel, sched_index)

        new_codegen_state = codegen_state.copy(
                is_generating_device_code=True,
                gen_program_name=sched_item.kernel_name,
                schedule_index_end=past_end_i-1,
                implemented_data_info=(codegen_state.implemented_data_info
                    + extra_args))

        from loopy.codegen.result import generate_host_or_device_program
        codegen_result = generate_host_or_device_program(
                new_codegen_state, sched_index)

        glob_grid, loc_grid = kernel.get_grid_sizes_for_insn_ids_as_exprs(
                get_insn_ids_for_block_at(kernel.schedule, sched_index))

        return merge_codegen_results(codegen_state, [
            codegen_result,

            codegen_state.ast_builder.get_kernel_call(
                codegen_state,
                sched_item.kernel_name,
                glob_grid, loc_grid,
                extra_args),
            ])

    elif isinstance(sched_item, EnterLoop):
        tags = kernel.iname_tags(sched_item.iname)
        tags = tuple(tag for tag in tags if tag)

        from loopy.codegen.loop import (
                generate_unroll_loop,
                generate_vectorize_loop,
                generate_sequential_loop_dim_code)

        from loopy.kernel.data import (UnrolledIlpTag, UnrollTag,
                ForceSequentialTag, LoopedIlpTag, VectorizeTag,
                InOrderSequentialSequentialTag, filter_iname_tags_by_type)
        if filter_iname_tags_by_type(tags, (UnrollTag, UnrolledIlpTag)):
            func = generate_unroll_loop
        elif filter_iname_tags_by_type(tags, VectorizeTag):
            func = generate_vectorize_loop
        elif not tags or filter_iname_tags_by_type(tags, (LoopedIlpTag,
                    ForceSequentialTag, InOrderSequentialSequentialTag)):
            func = generate_sequential_loop_dim_code
        else:
            raise RuntimeError("encountered (invalid) EnterLoop "
                    "for '%s', tagged '%s'"
                    % (sched_item.iname, ", ".join(str(tag) for tag in tags)))

        return func(codegen_state, sched_index)

    elif isinstance(sched_item, Barrier):
        # {{{ emit barrier code

        from loopy.codegen.result import CodeGenerationResult

        if codegen_state.is_generating_device_code:
            barrier_ast = codegen_state.ast_builder.emit_barrier(
                    sched_item.synchronization_kind, sched_item.mem_kind,
                    sched_item.comment)
            if sched_item.originating_insn_id:
                return CodeGenerationResult.new(
                        codegen_state,
                        sched_item.originating_insn_id,
                        barrier_ast,
                        codegen_state.implemented_domain)
            else:
                return barrier_ast
        else:
            # host code
            if sched_item.synchronization_kind in ["global", "local"]:
                # host code is assumed globally and locally synchronous
                return CodeGenerationResult(
                        host_program=None,
                        device_programs=[],
                        implemented_domains={},
                        implemented_data_info=codegen_state.implemented_data_info)

            else:
                raise LoopyError("do not know how to emit code for barrier "
                                 "synchronization kind '%s'" "in host code"
                                 % sched_item.synchronization_kind)

        # }}}

    elif isinstance(sched_item, RunInstruction):
        insn = kernel.id_to_insn[sched_item.insn_id]

        from loopy.codegen.instruction import generate_instruction_code
        return codegen_state.try_vectorized(
                "instruction %s" % insn.id,
                lambda inner_cgs: generate_instruction_code(inner_cgs, insn))

    else:
        raise RuntimeError("unexpected schedule item type: %s"
                % type(sched_item))
예제 #6
0
파일: control.py 프로젝트: cmsquared/loopy
def generate_code_for_sched_index(codegen_state, sched_index):
    kernel = codegen_state.kernel
    sched_item = kernel.schedule[sched_index]

    if isinstance(sched_item, CallKernel):
        assert not codegen_state.is_generating_device_code

        from loopy.schedule import (gather_schedule_block, get_insn_ids_for_block_at)
        _, past_end_i = gather_schedule_block(kernel.schedule, sched_index)
        assert past_end_i <= codegen_state.schedule_index_end

        extra_args = synthesize_idis_for_extra_args(kernel, sched_index)

        new_codegen_state = codegen_state.copy(
                is_generating_device_code=True,
                gen_program_name=sched_item.kernel_name,
                schedule_index_end=past_end_i-1,
                implemented_data_info=(codegen_state.implemented_data_info
                    + extra_args))

        from loopy.codegen.result import generate_host_or_device_program
        codegen_result = generate_host_or_device_program(
                new_codegen_state, sched_index)

        glob_grid, loc_grid = kernel.get_grid_sizes_for_insn_ids_as_exprs(
                get_insn_ids_for_block_at(kernel.schedule, sched_index))

        return merge_codegen_results(codegen_state, [
            codegen_result,

            codegen_state.ast_builder.get_kernel_call(
                codegen_state,
                sched_item.kernel_name,
                glob_grid, loc_grid,
                extra_args),
            ])

    elif isinstance(sched_item, EnterLoop):
        tag = kernel.iname_to_tag.get(sched_item.iname)

        from loopy.codegen.loop import (
                generate_unroll_loop,
                generate_vectorize_loop,
                generate_sequential_loop_dim_code)

        from loopy.kernel.data import (UnrolledIlpTag, UnrollTag, ForceSequentialTag,
                LoopedIlpTag, VectorizeTag)
        if isinstance(tag, (UnrollTag, UnrolledIlpTag)):
            func = generate_unroll_loop
        elif isinstance(tag, VectorizeTag):
            func = generate_vectorize_loop
        elif tag is None or isinstance(tag, (LoopedIlpTag, ForceSequentialTag)):
            func = generate_sequential_loop_dim_code
        else:
            raise RuntimeError("encountered (invalid) EnterLoop "
                    "for '%s', tagged '%s'" % (sched_item.iname, tag))

        return func(codegen_state, sched_index)

    elif isinstance(sched_item, Barrier):
        return codegen_state.ast_builder.emit_barrier(
                sched_item.kind, sched_item.comment)

    elif isinstance(sched_item, RunInstruction):
        insn = kernel.id_to_insn[sched_item.insn_id]

        from loopy.codegen.instruction import generate_instruction_code
        return codegen_state.try_vectorized(
                "instruction %s" % insn.id,
                lambda inner_cgs: generate_instruction_code(inner_cgs, insn))

    else:
        raise RuntimeError("unexpected schedule item type: %s"
                % type(sched_item))