Exemple #1
0
def opencl_preamble_generator(preamble_info):
    has_double = False

    for dtype in preamble_info.seen_dtypes:
        if (isinstance(dtype, NumpyType)
                and dtype.numpy_dtype in [np.float64, np.complex128]):
            has_double = True

    if has_double:
        yield ("00_enable_double", """
            #if __OPENCL_C_VERSION__ < 120
            #pragma OPENCL EXTENSION cl_khr_fp64: enable
            #endif
            """)

    from loopy.types import AtomicNumpyType
    seen_64_bit_atomics = any(
        isinstance(dtype, AtomicNumpyType) and dtype.numpy_dtype.itemsize == 8
        for dtype in preamble_info.seen_atomic_dtypes)

    if seen_64_bit_atomics:
        # FIXME: Should gate on "CL1" atomics style
        yield ("00_enable_64bit_atomics", """
            #pragma OPENCL EXTENSION cl_khr_int64_base_atomics : enable
            """)

    from loopy.tools import remove_common_indentation
    kernel = preamble_info.kernel
    yield ("00_declare_gid_lid",
           remove_common_indentation("""
                #define lid(N) ((%(idx_ctype)s) get_local_id(N))
                #define gid(N) ((%(idx_ctype)s) get_group_id(N))
                """ % dict(
               idx_ctype=kernel.target.dtype_to_typename(kernel.index_dtype))))
Exemple #2
0
def opencl_preamble_generator(preamble_info):
    has_double = False

    for dtype in preamble_info.seen_dtypes:
        if (isinstance(dtype, NumpyType)
                and dtype.numpy_dtype in [np.float64, np.complex128]):
            has_double = True

    if has_double:
        yield ("00_enable_double", """
            #if __OPENCL_C_VERSION__ < 120
            #pragma OPENCL EXTENSION cl_khr_fp64: enable
            #endif
            """)

    from loopy.types import AtomicNumpyType
    seen_64_bit_atomics = any(
            isinstance(dtype, AtomicNumpyType) and dtype.numpy_dtype.itemsize == 8
            for dtype in preamble_info.seen_atomic_dtypes)

    if seen_64_bit_atomics:
        # FIXME: Should gate on "CL1" atomics style
        yield ("00_enable_64bit_atomics", """
            #pragma OPENCL EXTENSION cl_khr_int64_base_atomics : enable
            """)

    from loopy.tools import remove_common_indentation
    kernel = preamble_info.kernel
    yield ("00_declare_gid_lid",
            remove_common_indentation("""
                #define lid(N) ((%(idx_ctype)s) get_local_id(N))
                #define gid(N) ((%(idx_ctype)s) get_group_id(N))
                """ % dict(idx_ctype=kernel.target.dtype_to_typename(
                    kernel.index_dtype))))
Exemple #3
0
    def generate_code(self, kernel, codegen_state, impl_arg_info):
        code, implemented_domains = (
                super(OpenCLTarget, self).generate_code(
                    kernel, codegen_state, impl_arg_info))

        from loopy.tools import remove_common_indentation
        code = (
                remove_common_indentation("""
                    #define lid(N) ((%(idx_ctype)s) get_local_id(N))
                    #define gid(N) ((%(idx_ctype)s) get_group_id(N))
                    """ % dict(idx_ctype=self.dtype_to_typename(kernel.index_dtype)))
                + "\n\n"
                + code)

        return code, implemented_domains
Exemple #4
0
def process_preambles(preambles):
    seen_preamble_tags = set()
    dedup_preambles = []

    for tag, preamble in sorted(preambles, key=lambda tag_code: tag_code[0]):
        if tag in seen_preamble_tags:
            continue

        seen_preamble_tags.add(tag)
        dedup_preambles.append(preamble)

    from loopy.tools import remove_common_indentation
    return [
            remove_common_indentation(lines) + "\n"
            for lines in dedup_preambles]
Exemple #5
0
def process_preambles(preambles):
    seen_preamble_tags = set()
    dedup_preambles = []

    for tag, preamble in sorted(preambles, key=lambda tag_code: tag_code[0]):
        if tag in seen_preamble_tags:
            continue

        seen_preamble_tags.add(tag)
        dedup_preambles.append(preamble)

    from loopy.tools import remove_common_indentation
    return [
            remove_common_indentation(lines) + "\n"
            for lines in dedup_preambles]
Exemple #6
0
    def __init__(self,
                 iname_exprs,
                 code,
                 read_variables=frozenset(),
                 assignees=tuple(),
                 id=None,
                 depends_on=None,
                 depends_on_is_final=None,
                 groups=None,
                 conflicts_with_groups=None,
                 no_sync_with=None,
                 within_inames_is_final=None,
                 within_inames=None,
                 priority=0,
                 boostable=None,
                 boostable_into=None,
                 predicates=frozenset(),
                 tags=None,
                 insn_deps=None,
                 insn_deps_is_final=None):
        """
        :arg iname_exprs: Like :attr:`iname_exprs`, but instead of tuples,
            simple strings pepresenting inames are also allowed. A single
            string is also allowed, which should consists of comma-separated
            inames.
        :arg assignees: Like :attr:`assignees`, but may also be a
            semicolon-separated string of such expressions or a
            sequence of strings parseable into the desired format.
        """

        InstructionBase.__init__(self,
                                 id=id,
                                 depends_on=depends_on,
                                 depends_on_is_final=depends_on_is_final,
                                 groups=groups,
                                 conflicts_with_groups=conflicts_with_groups,
                                 no_sync_with=no_sync_with,
                                 within_inames_is_final=within_inames_is_final,
                                 within_inames=within_inames,
                                 boostable=boostable,
                                 boostable_into=boostable_into,
                                 priority=priority,
                                 predicates=predicates,
                                 tags=tags,
                                 insn_deps=insn_deps,
                                 insn_deps_is_final=insn_deps_is_final)

        # {{{ normalize iname_exprs

        if isinstance(iname_exprs, str):
            iname_exprs = [i.strip() for i in iname_exprs.split(",")]
            iname_exprs = [i for i in iname_exprs if i]

        from pymbolic import var
        new_iname_exprs = []
        for i in iname_exprs:
            if isinstance(i, str):
                new_iname_exprs.append((i, var(i)))
            else:
                new_iname_exprs.append(i)

        # }}}

        # {{{ normalize assignees

        if isinstance(assignees, str):
            assignees = [i.strip() for i in assignees.split(";")]
            assignees = [i for i in assignees if i]

        new_assignees = []
        from loopy.symbolic import parse
        for i in assignees:
            if isinstance(i, str):
                new_assignees.append(parse(i))
            else:
                new_assignees.append(i)
        # }}}

        self.iname_exprs = new_iname_exprs
        from loopy.tools import remove_common_indentation
        self.code = remove_common_indentation(code)
        self.read_variables = read_variables
        self.assignees = new_assignees
Exemple #7
0
    def __init__(self,
            iname_exprs, code,
            read_variables=frozenset(), assignees=tuple(),
            id=None, depends_on=None, depends_on_is_final=None,
            groups=None, conflicts_with_groups=None,
            no_sync_with=None,
            within_inames_is_final=None, within_inames=None,
            priority=0, boostable=None, boostable_into=None,
            predicates=frozenset(), tags=None,
            insn_deps=None, insn_deps_is_final=None):
        """
        :arg iname_exprs: Like :attr:`iname_exprs`, but instead of tuples,
            simple strings pepresenting inames are also allowed. A single
            string is also allowed, which should consists of comma-separated
            inames.
        :arg assignees: Like :attr:`assignees`, but may also be a
            semicolon-separated string of such expressions or a
            sequence of strings parseable into the desired format.
        """

        InstructionBase.__init__(self,
                id=id,
                depends_on=depends_on,
                depends_on_is_final=depends_on_is_final,
                groups=groups, conflicts_with_groups=conflicts_with_groups,
                no_sync_with=no_sync_with,
                within_inames_is_final=within_inames_is_final,
                within_inames=within_inames,
                boostable=boostable,
                boostable_into=boostable_into,
                priority=priority, predicates=predicates, tags=tags,
                insn_deps=insn_deps,
                insn_deps_is_final=insn_deps_is_final)

        # {{{ normalize iname_exprs

        if isinstance(iname_exprs, str):
            iname_exprs = [i.strip() for i in iname_exprs.split(",")]
            iname_exprs = [i for i in iname_exprs if i]

        from pymbolic import var
        new_iname_exprs = []
        for i in iname_exprs:
            if isinstance(i, str):
                new_iname_exprs.append((i, var(i)))
            else:
                new_iname_exprs.append(i)

        # }}}

        # {{{ normalize assignees

        if isinstance(assignees, str):
            assignees = [i.strip() for i in assignees.split(";")]
            assignees = [i for i in assignees if i]

        new_assignees = []
        from loopy.symbolic import parse
        for i in assignees:
            if isinstance(i, str):
                new_assignees.append(parse(i))
            else:
                new_assignees.append(i)
        # }}}

        self.iname_exprs = new_iname_exprs
        from loopy.tools import remove_common_indentation
        self.code = remove_common_indentation(code)
        self.read_variables = read_variables
        self.assignees = new_assignees
Exemple #8
0
def parse_transformed_fortran(source, free_form=True, strict=True,
        pre_transform_code=None, transform_code_context=None,
        filename="<floopy code>"):
    """
    :arg source: a string of Fortran source code which must include
        a snippet of transform code as described below.
    :arg pre_transform_code: code that is run in the same context
        as the transform

    *source* may contain snippets of loopy transform code between markers::

        !$loopy begin
        ! ...
        !$loopy end

    Within the transform code, the following symbols are predefined:

    * ``lp``: a reference to the :mod:`loopy` package
    * ``np``: a reference to the :mod:`numpy` package
    * ``SOURCE``: the source code surrounding the transform block.
      This may be processed using :func:`c_preprocess` and
      :func:`parse_fortran`.
    * ``FILENAME``: the file name of the code being processed

    The transform code must define ``RESULT``, conventionally a list of
    kernels, which is returned from this function unmodified.

    An example of *source* may look as follows::

        subroutine fill(out, a, n)
          implicit none

          real*8 a, out(n)
          integer n, i

          do i = 1, n
            out(i) = a
          end do
        end

        !$loopy begin
        !
        ! fill, = lp.parse_fortran(SOURCE, FILENAME)
        ! fill = lp.split_iname(fill, "i", split_amount,
        !     outer_tag="g.0", inner_tag="l.0")
        ! RESULT = [fill]
        !
        !$loopy end
    """

    source, transform_code = _extract_loopy_lines(source)
    if not transform_code:
        raise LoopyError("no transform code found")

    from loopy.tools import remove_common_indentation
    transform_code = remove_common_indentation(
            transform_code,
            require_leading_newline=False,
            ignore_lines_starting_with="#")

    if transform_code_context is None:
        proc_dict = {}
    else:
        proc_dict = transform_code_context.copy()

    import loopy as lp
    import numpy as np

    proc_dict["lp"] = lp
    proc_dict["np"] = np

    proc_dict["SOURCE"] = source
    proc_dict["FILENAME"] = filename

    from os.path import dirname, abspath
    from os import getcwd

    infile_dirname = dirname(filename)
    if infile_dirname:
        infile_dirname = abspath(infile_dirname)
    else:
        infile_dirname = getcwd()

    import sys
    prev_sys_path = sys.path
    try:
        if infile_dirname:
            sys.path = prev_sys_path + [infile_dirname]

        if pre_transform_code is not None:
            proc_dict["_MODULE_SOURCE_CODE"] = pre_transform_code
            exec(compile(pre_transform_code,
                "<loopy pre-transform code>", "exec"), proc_dict)

        proc_dict["_MODULE_SOURCE_CODE"] = transform_code
        exec(compile(transform_code, filename, "exec"), proc_dict)

    finally:
        sys.path = prev_sys_path

    if "RESULT" not in proc_dict:
        raise LoopyError("transform code did not set RESULT")

    return proc_dict["RESULT"]
Exemple #9
0
def _preamble_generator(preamble_info, func_qualifier="inline"):
    integer_type_names = ["int8", "int16", "int32", "int64"]

    def_integer_types_macro = ("03_def_integer_types", r"""
            #define LOOPY_CALL_WITH_INTEGER_TYPES(MACRO_NAME) \
                MACRO_NAME(int8, char) \
                MACRO_NAME(int16, short) \
                MACRO_NAME(int32, int) \
                MACRO_NAME(int64, long)
            """)

    undef_integer_types_macro = ("05_undef_integer_types", """
            #undef LOOPY_CALL_WITH_INTEGER_TYPES
            """)

    function_defs = {
        "loopy_floor_div":
        r"""
            #define LOOPY_DEFINE_FLOOR_DIV(SUFFIX, TYPE) \
                {} TYPE loopy_floor_div_##SUFFIX(TYPE a, TYPE b) \
                {{ \
                    if ((a<0) != (b<0)) \
                        a = a - (b + (b<0) - (b>=0)); \
                    return a/b; \
                }}
            LOOPY_CALL_WITH_INTEGER_TYPES(LOOPY_DEFINE_FLOOR_DIV)
            #undef LOOPY_DEFINE_FLOOR_DIV
            """.format(func_qualifier),
        "loopy_floor_div_pos_b":
        r"""
            #define LOOPY_DEFINE_FLOOR_DIV_POS_B(SUFFIX, TYPE) \
                {} TYPE loopy_floor_div_pos_b_##SUFFIX(TYPE a, TYPE b) \
                {{ \
                    if (a<0) \
                        a = a - (b-1); \
                    return a/b; \
                }}
            LOOPY_CALL_WITH_INTEGER_TYPES(LOOPY_DEFINE_FLOOR_DIV_POS_B)
            #undef LOOPY_DEFINE_FLOOR_DIV_POS_B
            """.format(func_qualifier),
        "loopy_mod":
        r"""
            #define LOOPY_DEFINE_MOD(SUFFIX, TYPE) \
                {} TYPE loopy_mod_##SUFFIX(TYPE a, TYPE b) \
                {{ \
                    TYPE result = a%b; \
                    if (result < 0 && b > 0) \
                        result += b; \
                    if (result > 0 && b < 0) \
                        result = result + b; \
                    return result; \
                }}
            LOOPY_CALL_WITH_INTEGER_TYPES(LOOPY_DEFINE_MOD)
            #undef LOOPY_DEFINE_MOD
            """.format(func_qualifier),
        "loopy_mod_pos_b":
        r"""
            #define LOOPY_DEFINE_MOD_POS_B(SUFFIX, TYPE) \
                {} TYPE loopy_mod_pos_b_##SUFFIX(TYPE a, TYPE b) \
                {{ \
                    TYPE result = a%b; \
                    if (result < 0) \
                        result += b; \
                    return result; \
                }}
            LOOPY_CALL_WITH_INTEGER_TYPES(LOOPY_DEFINE_MOD_POS_B)
            #undef LOOPY_DEFINE_MOD_POS_B
            """.format(func_qualifier),
    }

    c_funcs = {func.c_name for func in preamble_info.seen_functions}

    for func_name, func_body in function_defs.items():
        if any((func_name + "_" + tpname) in c_funcs
               for tpname in integer_type_names):
            yield def_integer_types_macro
            yield ("04_%s" % func_name, func_body)
            yield undef_integer_types_macro

    for func in preamble_info.seen_functions:
        if func.name == "int_pow":
            base_ctype = preamble_info.kernel.target.dtype_to_typename(
                func.arg_dtypes[0])
            exp_ctype = preamble_info.kernel.target.dtype_to_typename(
                func.arg_dtypes[1])
            res_ctype = preamble_info.kernel.target.dtype_to_typename(
                func.result_dtypes[0])

            if func.arg_dtypes[1].numpy_dtype.kind == "u":
                signed_exponent_preamble = ""
            else:
                signed_exponent_preamble = "\n" + remove_common_indentation("""
                        if (n < 0) {
                          x = 1.0/x;
                          n =  -n;
                        }""")

            yield (f"07_{func.c_name}", f"""
            inline {res_ctype} {func.c_name}({base_ctype} x, {exp_ctype} n) {{
              if (n == 0)
                return 1;
              {re.sub("^", 14*" ", signed_exponent_preamble, flags=re.M)}

              {res_ctype} y = 1;

              while (n > 1) {{
                if (n % 2) {{
                  y = x * y;
                  x = x * x;
                }}
                else
                  x = x * x;
                n = n / 2;
              }}

              return x*y;
            }}""")
Exemple #10
0
def generate_code(kernel, device=None):
    if device is not None:
        from warnings import warn
        warn("passing 'device' to generate_code() is deprecated",
                DeprecationWarning, stacklevel=2)

    if kernel.schedule is None:
        from loopy.schedule import get_one_scheduled_kernel
        kernel = get_one_scheduled_kernel(kernel)
    from loopy.kernel import kernel_state
    if kernel.state != kernel_state.SCHEDULED:
        raise LoopyError("cannot generate code for a kernel that has not been "
                "scheduled")

    # {{{ cache retrieval

    from loopy import CACHING_ENABLED

    if CACHING_ENABLED:
        input_kernel = kernel
        try:
            result = code_gen_cache[input_kernel]
            logger.info("%s: code generation cache hit" % kernel.name)
            return result
        except KeyError:
            pass

    # }}}

    from loopy.preprocess import infer_unknown_types
    kernel = infer_unknown_types(kernel, expect_completion=True)

    from loopy.check import pre_codegen_checks
    pre_codegen_checks(kernel)

    logger.info("%s: generate code: start" % kernel.name)

    # {{{ examine arg list

    from loopy.kernel.data import ValueArg
    from loopy.kernel.array import ArrayBase

    impl_arg_info = []

    for arg in kernel.args:
        if isinstance(arg, ArrayBase):
            impl_arg_info.extend(
                    arg.decl_info(
                        kernel.target,
                        is_written=arg.name in kernel.get_written_variables(),
                        index_dtype=kernel.index_dtype))

        elif isinstance(arg, ValueArg):
            impl_arg_info.append(ImplementedDataInfo(
                target=kernel.target,
                name=arg.name,
                dtype=arg.dtype,
                cgen_declarator=arg.get_arg_decl(kernel.target),
                arg_class=ValueArg))

        else:
            raise ValueError("argument type not understood: '%s'" % type(arg))

    allow_complex = False
    for var in kernel.args + list(six.itervalues(kernel.temporary_variables)):
        if var.dtype.kind == "c":
            allow_complex = True

    # }}}

    seen_dtypes = set()
    seen_functions = set()

    initial_implemented_domain = isl.BasicSet.from_params(kernel.assumptions)
    codegen_state = CodeGenerationState(
            kernel=kernel,
            implemented_domain=initial_implemented_domain,
            implemented_predicates=frozenset(),
            seen_dtypes=seen_dtypes,
            seen_functions=seen_functions,
            var_subst_map={},
            allow_complex=allow_complex)

    code_str, implemented_domains = kernel.target.generate_code(
            kernel, codegen_state, impl_arg_info)

    from loopy.check import check_implemented_domains
    assert check_implemented_domains(kernel, implemented_domains,
            code_str)

    # {{{ handle preambles

    for arg in kernel.args:
        seen_dtypes.add(arg.dtype)
    for tv in six.itervalues(kernel.temporary_variables):
        seen_dtypes.add(tv.dtype)

    preambles = kernel.preambles[:]

    preamble_generators = (kernel.preamble_generators
            + kernel.target.preamble_generators())
    for prea_gen in preamble_generators:
        preambles.extend(prea_gen(kernel, seen_dtypes, seen_functions))

    seen_preamble_tags = set()
    dedup_preambles = []

    for tag, preamble in sorted(preambles, key=lambda tag_code: tag_code[0]):
        if tag in seen_preamble_tags:
            continue

        seen_preamble_tags.add(tag)
        dedup_preambles.append(preamble)

    from loopy.tools import remove_common_indentation
    preamble_codes = [
            remove_common_indentation(lines) + "\n"
            for lines in dedup_preambles]

    code_str = "".join(preamble_codes) + code_str

    # }}}

    logger.info("%s: generate code: done" % kernel.name)

    result = code_str, impl_arg_info

    if CACHING_ENABLED:
        code_gen_cache[input_kernel] = result

    return result
Exemple #11
0
def generate_code_v2(kernel):
    """
    :returns: a :class:`CodeGenerationResult`
    """

    from loopy.kernel import kernel_state
    if kernel.state == kernel_state.INITIAL:
        from loopy.preprocess import preprocess_kernel
        kernel = preprocess_kernel(kernel)

    if kernel.schedule is None:
        from loopy.schedule import get_one_scheduled_kernel
        kernel = get_one_scheduled_kernel(kernel)

    if kernel.state != kernel_state.SCHEDULED:
        raise LoopyError("cannot generate code for a kernel that has not been "
                "scheduled")

    # {{{ cache retrieval

    from loopy import CACHING_ENABLED

    if CACHING_ENABLED:
        input_kernel = kernel
        try:
            result = code_gen_cache[input_kernel]
            logger.info("%s: code generation cache hit" % kernel.name)
            return result
        except KeyError:
            pass

    # }}}

    from loopy.preprocess import infer_unknown_types
    kernel = infer_unknown_types(kernel, expect_completion=True)

    from loopy.check import pre_codegen_checks
    pre_codegen_checks(kernel)

    logger.info("%s: generate code: start" % kernel.name)

    # {{{ examine arg list

    from loopy.kernel.data import ValueArg
    from loopy.kernel.array import ArrayBase

    implemented_data_info = []

    for arg in kernel.args:
        is_written = arg.name in kernel.get_written_variables()
        if isinstance(arg, ArrayBase):
            implemented_data_info.extend(
                    arg.decl_info(
                        kernel.target,
                        is_written=is_written,
                        index_dtype=kernel.index_dtype))

        elif isinstance(arg, ValueArg):
            implemented_data_info.append(ImplementedDataInfo(
                target=kernel.target,
                name=arg.name,
                dtype=arg.dtype,
                arg_class=ValueArg,
                is_written=is_written))

        else:
            raise ValueError("argument type not understood: '%s'" % type(arg))

    allow_complex = False
    for var in kernel.args + list(six.itervalues(kernel.temporary_variables)):
        if var.dtype.involves_complex():
            allow_complex = True

    # }}}

    seen_dtypes = set()
    seen_functions = set()
    seen_atomic_dtypes = set()

    initial_implemented_domain = isl.BasicSet.from_params(kernel.assumptions)
    codegen_state = CodeGenerationState(
            kernel=kernel,
            implemented_data_info=implemented_data_info,
            implemented_domain=initial_implemented_domain,
            implemented_predicates=frozenset(),
            seen_dtypes=seen_dtypes,
            seen_functions=seen_functions,
            seen_atomic_dtypes=seen_atomic_dtypes,
            var_subst_map={},
            allow_complex=allow_complex,
            var_name_generator=kernel.get_var_name_generator(),
            is_generating_device_code=False,
            gen_program_name=(
                kernel.target.host_program_name_prefix
                + kernel.name
                + kernel.target.host_program_name_suffix),
            schedule_index_end=len(kernel.schedule))

    from loopy.codegen.result import generate_host_or_device_program
    codegen_result = generate_host_or_device_program(
            codegen_state,
            schedule_index=0)

    device_code_str = codegen_result.device_code()

    from loopy.check import check_implemented_domains
    assert check_implemented_domains(kernel, codegen_result.implemented_domains,
            device_code_str)

    # {{{ handle preambles

    for arg in kernel.args:
        seen_dtypes.add(arg.dtype)
    for tv in six.itervalues(kernel.temporary_variables):
        seen_dtypes.add(tv.dtype)

    preambles = kernel.preambles[:]

    from pytools import Record

    class PreambleInfo(Record):
        pass

    preamble_info = PreambleInfo(
            kernel=kernel,
            seen_dtypes=seen_dtypes,
            seen_functions=seen_functions,
            # a set of LoopyTypes (!)
            seen_atomic_dtypes=seen_atomic_dtypes)

    preamble_generators = (kernel.preamble_generators
            + kernel.target.get_device_ast_builder().preamble_generators())
    for prea_gen in preamble_generators:
        preambles.extend(prea_gen(preamble_info))

    seen_preamble_tags = set()
    dedup_preambles = []

    for tag, preamble in sorted(preambles, key=lambda tag_code: tag_code[0]):
        if tag in seen_preamble_tags:
            continue

        seen_preamble_tags.add(tag)
        dedup_preambles.append(preamble)

    from loopy.tools import remove_common_indentation
    preamble_codes = [
            remove_common_indentation(lines) + "\n"
            for lines in dedup_preambles]

    codegen_result = codegen_result.copy(
            device_preambles=preamble_codes)

    # }}}

    logger.info("%s: generate code: done" % kernel.name)

    if CACHING_ENABLED:
        code_gen_cache[input_kernel] = codegen_result

    return codegen_result