예제 #1
0
파일: control.py 프로젝트: shigh/loopy
def generate_code_for_sched_index(codegen_state, sched_index):
    kernel = codegen_state.kernel
    sched_item = kernel.schedule[sched_index]

    if isinstance(sched_item, CallKernel):
        assert not codegen_state.is_generating_device_code

        from loopy.schedule import (gather_schedule_block,
                                    get_insn_ids_for_block_at)
        _, past_end_i = gather_schedule_block(kernel.schedule, sched_index)
        assert past_end_i <= codegen_state.schedule_index_end

        extra_args = synthesize_idis_for_extra_args(kernel, sched_index)

        new_codegen_state = codegen_state.copy(
            is_generating_device_code=True,
            gen_program_name=sched_item.kernel_name,
            schedule_index_end=past_end_i - 1,
            implemented_data_info=(codegen_state.implemented_data_info +
                                   extra_args))

        from loopy.codegen.result import generate_host_or_device_program
        codegen_result = generate_host_or_device_program(
            new_codegen_state, sched_index)

        glob_grid, loc_grid = kernel.get_grid_sizes_for_insn_ids_as_exprs(
            get_insn_ids_for_block_at(kernel.schedule, sched_index))

        return merge_codegen_results(codegen_state, [
            codegen_result,
            codegen_state.ast_builder.get_kernel_call(
                codegen_state, sched_item.kernel_name, glob_grid, loc_grid,
                extra_args),
        ])

    elif isinstance(sched_item, EnterLoop):
        tag = kernel.iname_to_tag.get(sched_item.iname)

        from loopy.codegen.loop import (generate_unroll_loop,
                                        generate_vectorize_loop,
                                        generate_sequential_loop_dim_code)

        from loopy.kernel.data import (UnrolledIlpTag, UnrollTag,
                                       ForceSequentialTag, LoopedIlpTag,
                                       VectorizeTag)
        if isinstance(tag, (UnrollTag, UnrolledIlpTag)):
            func = generate_unroll_loop
        elif isinstance(tag, VectorizeTag):
            func = generate_vectorize_loop
        elif tag is None or isinstance(tag,
                                       (LoopedIlpTag, ForceSequentialTag)):
            func = generate_sequential_loop_dim_code
        else:
            raise RuntimeError("encountered (invalid) EnterLoop "
                               "for '%s', tagged '%s'" %
                               (sched_item.iname, tag))

        return func(codegen_state, sched_index)

    elif isinstance(sched_item, Barrier):
        return codegen_state.ast_builder.emit_barrier(sched_item.kind,
                                                      sched_item.comment)

    elif isinstance(sched_item, RunInstruction):
        insn = kernel.id_to_insn[sched_item.insn_id]

        from loopy.codegen.instruction import generate_instruction_code
        return codegen_state.try_vectorized(
            "instruction %s" % insn.id,
            lambda inner_cgs: generate_instruction_code(inner_cgs, insn))

    else:
        raise RuntimeError("unexpected schedule item type: %s" %
                           type(sched_item))
예제 #2
0
def generate_code_v2(kernel):
    """
    :returns: a :class:`CodeGenerationResult`
    """

    from loopy.kernel import kernel_state
    if kernel.state == kernel_state.INITIAL:
        from loopy.preprocess import preprocess_kernel
        kernel = preprocess_kernel(kernel)

    if kernel.schedule is None:
        from loopy.schedule import get_one_scheduled_kernel
        kernel = get_one_scheduled_kernel(kernel)

    if kernel.state != kernel_state.SCHEDULED:
        raise LoopyError("cannot generate code for a kernel that has not been "
                "scheduled")

    # {{{ cache retrieval

    from loopy import CACHING_ENABLED

    if CACHING_ENABLED:
        input_kernel = kernel
        try:
            result = code_gen_cache[input_kernel]
            logger.debug("%s: code generation cache hit" % kernel.name)
            return result
        except KeyError:
            pass

    # }}}

    from loopy.type_inference import infer_unknown_types
    kernel = infer_unknown_types(kernel, expect_completion=True)

    from loopy.check import pre_codegen_checks
    pre_codegen_checks(kernel)

    logger.info("%s: generate code: start" % kernel.name)

    # {{{ examine arg list

    from loopy.kernel.data import ValueArg
    from loopy.kernel.array import ArrayBase

    implemented_data_info = []

    for arg in kernel.args:
        is_written = arg.name in kernel.get_written_variables()
        if isinstance(arg, ArrayBase):
            implemented_data_info.extend(
                    arg.decl_info(
                        kernel.target,
                        is_written=is_written,
                        index_dtype=kernel.index_dtype))

        elif isinstance(arg, ValueArg):
            implemented_data_info.append(ImplementedDataInfo(
                target=kernel.target,
                name=arg.name,
                dtype=arg.dtype,
                arg_class=ValueArg,
                is_written=is_written))

        else:
            raise ValueError("argument type not understood: '%s'" % type(arg))

    allow_complex = False
    for var in kernel.args + list(six.itervalues(kernel.temporary_variables)):
        if var.dtype.involves_complex():
            allow_complex = True

    # }}}

    seen_dtypes = set()
    seen_functions = set()
    seen_atomic_dtypes = set()

    initial_implemented_domain = isl.BasicSet.from_params(kernel.assumptions)
    codegen_state = CodeGenerationState(
            kernel=kernel,
            implemented_data_info=implemented_data_info,
            implemented_domain=initial_implemented_domain,
            implemented_predicates=frozenset(),
            seen_dtypes=seen_dtypes,
            seen_functions=seen_functions,
            seen_atomic_dtypes=seen_atomic_dtypes,
            var_subst_map={},
            allow_complex=allow_complex,
            var_name_generator=kernel.get_var_name_generator(),
            is_generating_device_code=False,
            gen_program_name=(
                kernel.target.host_program_name_prefix
                + kernel.name
                + kernel.target.host_program_name_suffix),
            schedule_index_end=len(kernel.schedule))

    from loopy.codegen.result import generate_host_or_device_program
    codegen_result = generate_host_or_device_program(
            codegen_state,
            schedule_index=0)

    device_code_str = codegen_result.device_code()

    from loopy.check import check_implemented_domains
    assert check_implemented_domains(kernel, codegen_result.implemented_domains,
            device_code_str)

    # {{{ handle preambles

    for arg in kernel.args:
        seen_dtypes.add(arg.dtype)
    for tv in six.itervalues(kernel.temporary_variables):
        seen_dtypes.add(tv.dtype)

    preambles = kernel.preambles[:]

    preamble_info = PreambleInfo(
            kernel=kernel,
            seen_dtypes=seen_dtypes,
            seen_functions=seen_functions,
            # a set of LoopyTypes (!)
            seen_atomic_dtypes=seen_atomic_dtypes)

    preamble_generators = (kernel.preamble_generators
            + kernel.target.get_device_ast_builder().preamble_generators())
    for prea_gen in preamble_generators:
        preambles.extend(prea_gen(preamble_info))

    codegen_result = codegen_result.copy(device_preambles=preambles)

    # }}}

    logger.info("%s: generate code: done" % kernel.name)

    if CACHING_ENABLED:
        code_gen_cache[input_kernel] = codegen_result

    return codegen_result
예제 #3
0
def generate_code_for_sched_index(codegen_state, sched_index):
    kernel = codegen_state.kernel
    sched_item = kernel.schedule[sched_index]

    if isinstance(sched_item, CallKernel):
        assert not codegen_state.is_generating_device_code

        from loopy.schedule import (gather_schedule_block,
                                    get_insn_ids_for_block_at)
        _, past_end_i = gather_schedule_block(kernel.schedule, sched_index)
        assert past_end_i <= codegen_state.schedule_index_end

        extra_args = synthesize_idis_for_extra_args(kernel, sched_index)

        new_codegen_state = codegen_state.copy(
            is_generating_device_code=True,
            gen_program_name=sched_item.kernel_name,
            schedule_index_end=past_end_i - 1,
            implemented_data_info=(codegen_state.implemented_data_info +
                                   extra_args))

        from loopy.codegen.result import generate_host_or_device_program
        codegen_result = generate_host_or_device_program(
            new_codegen_state, sched_index)

        glob_grid, loc_grid = kernel.get_grid_sizes_for_insn_ids_as_exprs(
            get_insn_ids_for_block_at(kernel.schedule, sched_index))

        return merge_codegen_results(codegen_state, [
            codegen_result,
            codegen_state.ast_builder.get_kernel_call(
                codegen_state, sched_item.kernel_name, glob_grid, loc_grid,
                extra_args),
        ])

    elif isinstance(sched_item, EnterLoop):
        tag = kernel.iname_to_tag.get(sched_item.iname)

        from loopy.codegen.loop import (generate_unroll_loop,
                                        generate_vectorize_loop,
                                        generate_sequential_loop_dim_code)

        from loopy.kernel.data import (UnrolledIlpTag, UnrollTag,
                                       ForceSequentialTag, LoopedIlpTag,
                                       VectorizeTag,
                                       InOrderSequentialSequentialTag)
        if isinstance(tag, (UnrollTag, UnrolledIlpTag)):
            func = generate_unroll_loop
        elif isinstance(tag, VectorizeTag):
            func = generate_vectorize_loop
        elif tag is None or isinstance(tag, (LoopedIlpTag, ForceSequentialTag,
                                             InOrderSequentialSequentialTag)):
            func = generate_sequential_loop_dim_code
        else:
            raise RuntimeError("encountered (invalid) EnterLoop "
                               "for '%s', tagged '%s'" %
                               (sched_item.iname, tag))

        return func(codegen_state, sched_index)

    elif isinstance(sched_item, Barrier):
        # {{{ emit barrier code

        from loopy.codegen.result import CodeGenerationResult

        if codegen_state.is_generating_device_code:
            barrier_ast = codegen_state.ast_builder.emit_barrier(
                sched_item.synchronization_kind, sched_item.mem_kind,
                sched_item.comment)
            if sched_item.originating_insn_id:
                return CodeGenerationResult.new(
                    codegen_state, sched_item.originating_insn_id, barrier_ast,
                    codegen_state.implemented_domain)
            else:
                return barrier_ast
        else:
            # host code
            if sched_item.synchronization_kind in ["global", "local"]:
                # host code is assumed globally and locally synchronous
                return CodeGenerationResult(
                    host_program=None,
                    device_programs=[],
                    implemented_domains={},
                    implemented_data_info=codegen_state.implemented_data_info)

            else:
                raise LoopyError("do not know how to emit code for barrier "
                                 "synchronization kind '%s'"
                                 "in host code" %
                                 sched_item.synchronization_kind)

        # }}}

    elif isinstance(sched_item, RunInstruction):
        insn = kernel.id_to_insn[sched_item.insn_id]

        from loopy.codegen.instruction import generate_instruction_code
        return codegen_state.try_vectorized(
            "instruction %s" % insn.id,
            lambda inner_cgs: generate_instruction_code(inner_cgs, insn))

    else:
        raise RuntimeError("unexpected schedule item type: %s" %
                           type(sched_item))
예제 #4
0
파일: __init__.py 프로젝트: inducer/loopy
def generate_code_v2(kernel):
    """
    :returns: a :class:`CodeGenerationResult`
    """

    from loopy.kernel import KernelState
    if kernel.state == KernelState.INITIAL:
        from loopy.preprocess import preprocess_kernel
        kernel = preprocess_kernel(kernel)

    if kernel.schedule is None:
        from loopy.schedule import get_one_scheduled_kernel
        kernel = get_one_scheduled_kernel(kernel)

    if kernel.state != KernelState.SCHEDULED:
        raise LoopyError("cannot generate code for a kernel that has not been "
                "scheduled")

    # {{{ cache retrieval

    from loopy import CACHING_ENABLED

    if CACHING_ENABLED:
        input_kernel = kernel
        try:
            result = code_gen_cache[input_kernel]
            logger.debug("%s: code generation cache hit" % kernel.name)
            return result
        except KeyError:
            pass

    # }}}

    from loopy.type_inference import infer_unknown_types
    kernel = infer_unknown_types(kernel, expect_completion=True)

    from loopy.check import pre_codegen_checks
    pre_codegen_checks(kernel)

    logger.info("%s: generate code: start" % kernel.name)

    # {{{ examine arg list

    from loopy.kernel.data import ValueArg
    from loopy.kernel.array import ArrayBase

    implemented_data_info = []

    for arg in kernel.args:
        is_written = arg.name in kernel.get_written_variables()
        if isinstance(arg, ArrayBase):
            implemented_data_info.extend(
                    arg.decl_info(
                        kernel.target,
                        is_written=is_written,
                        index_dtype=kernel.index_dtype))

        elif isinstance(arg, ValueArg):
            implemented_data_info.append(ImplementedDataInfo(
                target=kernel.target,
                name=arg.name,
                dtype=arg.dtype,
                arg_class=ValueArg,
                is_written=is_written))

        else:
            raise ValueError("argument type not understood: '%s'" % type(arg))

    allow_complex = False
    for var in kernel.args + list(six.itervalues(kernel.temporary_variables)):
        if var.dtype.involves_complex():
            allow_complex = True

    # }}}

    seen_dtypes = set()
    seen_functions = set()
    seen_atomic_dtypes = set()

    initial_implemented_domain = isl.BasicSet.from_params(kernel.assumptions)
    codegen_state = CodeGenerationState(
            kernel=kernel,
            implemented_data_info=implemented_data_info,
            implemented_domain=initial_implemented_domain,
            implemented_predicates=frozenset(),
            seen_dtypes=seen_dtypes,
            seen_functions=seen_functions,
            seen_atomic_dtypes=seen_atomic_dtypes,
            var_subst_map={},
            allow_complex=allow_complex,
            var_name_generator=kernel.get_var_name_generator(),
            is_generating_device_code=False,
            gen_program_name=(
                kernel.target.host_program_name_prefix
                + kernel.name
                + kernel.target.host_program_name_suffix),
            schedule_index_end=len(kernel.schedule))

    from loopy.codegen.result import generate_host_or_device_program
    codegen_result = generate_host_or_device_program(
            codegen_state,
            schedule_index=0)

    device_code_str = codegen_result.device_code()

    from loopy.check import check_implemented_domains
    assert check_implemented_domains(kernel, codegen_result.implemented_domains,
            device_code_str)

    # {{{ handle preambles

    for arg in kernel.args:
        seen_dtypes.add(arg.dtype)
    for tv in six.itervalues(kernel.temporary_variables):
        seen_dtypes.add(tv.dtype)

    preambles = kernel.preambles[:]

    preamble_info = PreambleInfo(
            kernel=kernel,
            seen_dtypes=seen_dtypes,
            seen_functions=seen_functions,
            # a set of LoopyTypes (!)
            seen_atomic_dtypes=seen_atomic_dtypes,
            codegen_state=codegen_state
            )

    preamble_generators = (kernel.preamble_generators
            + kernel.target.get_device_ast_builder().preamble_generators())
    for prea_gen in preamble_generators:
        preambles.extend(prea_gen(preamble_info))

    codegen_result = codegen_result.copy(device_preambles=preambles)

    # }}}

    # For faster unpickling in the common case when implemented_domains isn't needed.
    from loopy.tools import LazilyUnpicklingDict
    codegen_result = codegen_result.copy(
            implemented_domains=LazilyUnpicklingDict(
                    codegen_result.implemented_domains))

    logger.info("%s: generate code: done" % kernel.name)

    if CACHING_ENABLED:
        code_gen_cache.store_if_not_present(input_kernel, codegen_result)

    return codegen_result
예제 #5
0
파일: control.py 프로젝트: inducer/loopy
def generate_code_for_sched_index(codegen_state, sched_index):
    kernel = codegen_state.kernel
    sched_item = kernel.schedule[sched_index]

    if isinstance(sched_item, CallKernel):
        assert not codegen_state.is_generating_device_code

        from loopy.schedule import (gather_schedule_block, get_insn_ids_for_block_at)
        _, past_end_i = gather_schedule_block(kernel.schedule, sched_index)
        assert past_end_i <= codegen_state.schedule_index_end

        extra_args = synthesize_idis_for_extra_args(kernel, sched_index)

        new_codegen_state = codegen_state.copy(
                is_generating_device_code=True,
                gen_program_name=sched_item.kernel_name,
                schedule_index_end=past_end_i-1,
                implemented_data_info=(codegen_state.implemented_data_info
                    + extra_args))

        from loopy.codegen.result import generate_host_or_device_program
        codegen_result = generate_host_or_device_program(
                new_codegen_state, sched_index)

        glob_grid, loc_grid = kernel.get_grid_sizes_for_insn_ids_as_exprs(
                get_insn_ids_for_block_at(kernel.schedule, sched_index))

        return merge_codegen_results(codegen_state, [
            codegen_result,

            codegen_state.ast_builder.get_kernel_call(
                codegen_state,
                sched_item.kernel_name,
                glob_grid, loc_grid,
                extra_args),
            ])

    elif isinstance(sched_item, EnterLoop):
        tags = kernel.iname_tags(sched_item.iname)
        tags = tuple(tag for tag in tags if tag)

        from loopy.codegen.loop import (
                generate_unroll_loop,
                generate_vectorize_loop,
                generate_sequential_loop_dim_code)

        from loopy.kernel.data import (UnrolledIlpTag, UnrollTag,
                ForceSequentialTag, LoopedIlpTag, VectorizeTag,
                InOrderSequentialSequentialTag, filter_iname_tags_by_type)
        if filter_iname_tags_by_type(tags, (UnrollTag, UnrolledIlpTag)):
            func = generate_unroll_loop
        elif filter_iname_tags_by_type(tags, VectorizeTag):
            func = generate_vectorize_loop
        elif not tags or filter_iname_tags_by_type(tags, (LoopedIlpTag,
                    ForceSequentialTag, InOrderSequentialSequentialTag)):
            func = generate_sequential_loop_dim_code
        else:
            raise RuntimeError("encountered (invalid) EnterLoop "
                    "for '%s', tagged '%s'"
                    % (sched_item.iname, ", ".join(str(tag) for tag in tags)))

        return func(codegen_state, sched_index)

    elif isinstance(sched_item, Barrier):
        # {{{ emit barrier code

        from loopy.codegen.result import CodeGenerationResult

        if codegen_state.is_generating_device_code:
            barrier_ast = codegen_state.ast_builder.emit_barrier(
                    sched_item.synchronization_kind, sched_item.mem_kind,
                    sched_item.comment)
            if sched_item.originating_insn_id:
                return CodeGenerationResult.new(
                        codegen_state,
                        sched_item.originating_insn_id,
                        barrier_ast,
                        codegen_state.implemented_domain)
            else:
                return barrier_ast
        else:
            # host code
            if sched_item.synchronization_kind in ["global", "local"]:
                # host code is assumed globally and locally synchronous
                return CodeGenerationResult(
                        host_program=None,
                        device_programs=[],
                        implemented_domains={},
                        implemented_data_info=codegen_state.implemented_data_info)

            else:
                raise LoopyError("do not know how to emit code for barrier "
                                 "synchronization kind '%s'" "in host code"
                                 % sched_item.synchronization_kind)

        # }}}

    elif isinstance(sched_item, RunInstruction):
        insn = kernel.id_to_insn[sched_item.insn_id]

        from loopy.codegen.instruction import generate_instruction_code
        return codegen_state.try_vectorized(
                "instruction %s" % insn.id,
                lambda inner_cgs: generate_instruction_code(inner_cgs, insn))

    else:
        raise RuntimeError("unexpected schedule item type: %s"
                % type(sched_item))
예제 #6
0
파일: control.py 프로젝트: cmsquared/loopy
def generate_code_for_sched_index(codegen_state, sched_index):
    kernel = codegen_state.kernel
    sched_item = kernel.schedule[sched_index]

    if isinstance(sched_item, CallKernel):
        assert not codegen_state.is_generating_device_code

        from loopy.schedule import (gather_schedule_block, get_insn_ids_for_block_at)
        _, past_end_i = gather_schedule_block(kernel.schedule, sched_index)
        assert past_end_i <= codegen_state.schedule_index_end

        extra_args = synthesize_idis_for_extra_args(kernel, sched_index)

        new_codegen_state = codegen_state.copy(
                is_generating_device_code=True,
                gen_program_name=sched_item.kernel_name,
                schedule_index_end=past_end_i-1,
                implemented_data_info=(codegen_state.implemented_data_info
                    + extra_args))

        from loopy.codegen.result import generate_host_or_device_program
        codegen_result = generate_host_or_device_program(
                new_codegen_state, sched_index)

        glob_grid, loc_grid = kernel.get_grid_sizes_for_insn_ids_as_exprs(
                get_insn_ids_for_block_at(kernel.schedule, sched_index))

        return merge_codegen_results(codegen_state, [
            codegen_result,

            codegen_state.ast_builder.get_kernel_call(
                codegen_state,
                sched_item.kernel_name,
                glob_grid, loc_grid,
                extra_args),
            ])

    elif isinstance(sched_item, EnterLoop):
        tag = kernel.iname_to_tag.get(sched_item.iname)

        from loopy.codegen.loop import (
                generate_unroll_loop,
                generate_vectorize_loop,
                generate_sequential_loop_dim_code)

        from loopy.kernel.data import (UnrolledIlpTag, UnrollTag, ForceSequentialTag,
                LoopedIlpTag, VectorizeTag)
        if isinstance(tag, (UnrollTag, UnrolledIlpTag)):
            func = generate_unroll_loop
        elif isinstance(tag, VectorizeTag):
            func = generate_vectorize_loop
        elif tag is None or isinstance(tag, (LoopedIlpTag, ForceSequentialTag)):
            func = generate_sequential_loop_dim_code
        else:
            raise RuntimeError("encountered (invalid) EnterLoop "
                    "for '%s', tagged '%s'" % (sched_item.iname, tag))

        return func(codegen_state, sched_index)

    elif isinstance(sched_item, Barrier):
        return codegen_state.ast_builder.emit_barrier(
                sched_item.kind, sched_item.comment)

    elif isinstance(sched_item, RunInstruction):
        insn = kernel.id_to_insn[sched_item.insn_id]

        from loopy.codegen.instruction import generate_instruction_code
        return codegen_state.try_vectorized(
                "instruction %s" % insn.id,
                lambda inner_cgs: generate_instruction_code(inner_cgs, insn))

    else:
        raise RuntimeError("unexpected schedule item type: %s"
                % type(sched_item))