Esempio n. 1
0
    def get_function_definition(self, codegen_state, codegen_result,
                                schedule_index, function_decl, function_body):
        from loopy.kernel.data import TemporaryVariable
        args = (["_lpy_cl_kernels", "queue"] + [
            idi.name for idi in codegen_state.implemented_data_info
            if not issubclass(idi.arg_class, TemporaryVariable)
        ] + ["wait_for=None", "allocator=None"])

        from genpy import (For, Function, Suite, Return, Line, Statement as S)
        return Function(
            codegen_result.current_program(codegen_state).name,
            args,
            Suite([
                Line(),
            ] + [
                Line(),
                function_body,
                Line(),
            ] + ([
                For(
                    "_tv",
                    "_global_temporaries",
                    # free global temporaries
                    S("_tv.release()"))
            ] if self._get_global_temporaries(codegen_state) else []) + [
                Line(),
                Return("_lpy_evt"),
            ]))
Esempio n. 2
0
    def get_kernel_call(self, codegen_state, name, gsize, lsize, extra_args):
        ecm = self.get_expression_to_code_mapper(codegen_state)

        if not gsize:
            gsize = (1, )
        if not lsize:
            lsize = (1, )

        all_args = codegen_state.implemented_data_info + extra_args

        value_arg_code, arg_idx_to_cl_arg_idx, cl_arg_count = \
            generate_value_arg_setup(
                    codegen_state.kernel,
                    [self.target.device],
                    all_args)
        arry_arg_code = generate_array_arg_setup(codegen_state.kernel,
                                                 all_args,
                                                 arg_idx_to_cl_arg_idx)

        from genpy import Suite, Assign, Assert, Line, Comment
        from pymbolic.mapper.stringifier import PREC_NONE

        import pyopencl.version as cl_ver
        if cl_ver.VERSION < (2020, 2):
            from warnings import warn
            warn("Your kernel invocation will likely fail because your "
                 "version of PyOpenCL does not support allow_empty_ndrange. "
                 "Please upgrade to version 2020.2 or newer.")

        # TODO: Generate finer-grained dependency structure
        return Suite([
            Comment("{{{ enqueue %s" % name),
            Line(),
            Assign("_lpy_knl", "_lpy_cl_kernels." + name),
            Assert("_lpy_knl.num_args == %d" % cl_arg_count),
            Line(),
            value_arg_code,
            arry_arg_code,
            Assign(
                "_lpy_evt",
                "%(pyopencl_module_name)s.enqueue_nd_range_kernel("
                "queue, _lpy_knl, "
                "%(gsize)s, %(lsize)s, "
                # using positional args because pybind is slow with kwargs
                "None, "  # offset
                "wait_for, "
                "True, "  # g_times_l
                "True, "  # allow_empty_ndrange
                ")" %
                dict(pyopencl_module_name=self.target.pyopencl_module_name,
                     gsize=ecm(gsize, prec=PREC_NONE, type_context="i"),
                     lsize=ecm(lsize, prec=PREC_NONE, type_context="i"))),
            Assign("wait_for", "[_lpy_evt]"),
            Line(),
            Comment("}}}"),
            Line(),
        ])
Esempio n. 3
0
def generate_array_arg_setup(kernel, implemented_data_info, arg_idx_to_cl_arg_idx):
    from loopy.kernel.array import ArrayBase
    from genpy import Statement as S, Suite

    result = []
    gen = result.append

    for arg_idx, arg in enumerate(implemented_data_info):
        if not issubclass(arg.arg_class, ArrayBase):
            continue

        cl_arg_idx = arg_idx_to_cl_arg_idx[arg_idx]

        gen(S("_lpy_knl.set_arg(%d, %s)" % (cl_arg_idx, arg.name)))

    return Suite(result)
Esempio n. 4
0
    def get_kernel_call(self, codegen_state, name, gsize, lsize, extra_args):
        ecm = self.get_expression_to_code_mapper(codegen_state)

        if not gsize:
            gsize = (1,)
        if not lsize:
            lsize = (1,)

        all_args = codegen_state.implemented_data_info + extra_args

        value_arg_code, arg_idx_to_cl_arg_idx, cl_arg_count = \
            generate_value_arg_setup(
                    codegen_state.kernel,
                    [self.target.device],
                    all_args)
        arry_arg_code = generate_array_arg_setup(
            codegen_state.kernel,
            all_args,
            arg_idx_to_cl_arg_idx)

        from genpy import Suite, Assign, Assert, Line, Comment
        from pymbolic.mapper.stringifier import PREC_NONE

        # TODO: Generate finer-grained dependency structure
        return Suite([
            Comment("{{{ enqueue %s" % name),
            Line(),
            Assign("_lpy_knl", "_lpy_cl_kernels."+name),
            Assert("_lpy_knl.num_args == %d" % cl_arg_count),
            Line(),
            value_arg_code,
            arry_arg_code,
            Assign("_lpy_evt", "%(pyopencl_module_name)s.enqueue_nd_range_kernel("
                "queue, _lpy_knl, "
                "%(gsize)s, %(lsize)s,  wait_for=wait_for, g_times_l=True)"
                % dict(
                    pyopencl_module_name=self.target.pyopencl_module_name,
                    gsize=ecm(gsize, prec=PREC_NONE, type_context="i"),
                    lsize=ecm(lsize, prec=PREC_NONE, type_context="i"))),
            Assign("wait_for", "[_lpy_evt]"),
            Line(),
            Comment("}}}"),
            Line(),
            ])
Esempio n. 5
0
def generate_array_arg_setup(kernel, implemented_data_info,
                             arg_idx_to_cl_arg_idx):
    from loopy.kernel.array import ArrayBase
    from genpy import Statement as S, Suite

    result = []
    gen = result.append

    cl_indices_and_args = []
    for arg_idx, arg in enumerate(implemented_data_info):
        if issubclass(arg.arg_class, ArrayBase):
            cl_indices_and_args.append(arg_idx_to_cl_arg_idx[arg_idx])
            cl_indices_and_args.append(arg.name)

    if cl_indices_and_args:
        assert len(cl_indices_and_args) % 2 == 0

        gen(
            S(f"_lpy_knl._set_arg_multi("
              f"({', '.join(str(i) for i in cl_indices_and_args)},)"
              ")"))

    return Suite(result)
Esempio n. 6
0
    def get_function_definition(self, codegen_state, codegen_result,
                                schedule_index, function_decl, function_body):
        from loopy.kernel.data import TemporaryVariable
        args = (["_lpy_cl_kernels", "queue"] + [
            idi.name for idi in codegen_state.implemented_data_info
            if not issubclass(idi.arg_class, TemporaryVariable)
        ] + ["wait_for=None", "allocator=None"])

        from genpy import (For, Function, Suite, Import, ImportAs, Return,
                           FromImport, If, Assign, Line, Statement as S)
        return Function(
            codegen_result.current_program(codegen_state).name,
            args,
            Suite([
                FromImport("struct", ["pack as _lpy_pack"]),
                ImportAs("pyopencl", "_lpy_cl"),
                Import("pyopencl.tools"),
                Line(),
                If(
                    "allocator is None",
                    Assign("allocator",
                           "_lpy_cl_tools.DeferredAllocator(queue.context)")),
                Line(),
            ] + [
                Line(),
                function_body,
                Line(),
            ] + [
                For(
                    "_tv",
                    "_global_temporaries",
                    # free global temporaries
                    S("_tv.release()"))
            ] + [
                Line(),
                Return("_lpy_evt"),
            ]))
Esempio n. 7
0
def generate_value_arg_setup(kernel, devices, implemented_data_info):
    options = kernel.options

    import loopy as lp
    from loopy.kernel.array import ArrayBase

    # {{{ arg counting bug handling

    # For example:
    # https://github.com/pocl/pocl/issues/197
    # (but Apple CPU has a similar bug)

    work_around_arg_count_bug = False
    warn_about_arg_count_bug = False

    try:
        from pyopencl.characterize import has_struct_arg_count_bug

    except ImportError:
        count_bug_per_dev = [False]*len(devices)

    else:
        count_bug_per_dev = [
                has_struct_arg_count_bug(dev)
                if dev is not None else False
                for dev in devices]

    if any(dev is None for dev in devices):
        warn("{knl_name}: device not supplied to PyOpenCLTarget--"
                "workarounds for broken OpenCL implementations "
                "(such as those relating to complex numbers) "
                "may not be enabled when needed"
                .format(knl_name=kernel.name))

    if any(count_bug_per_dev):
        if all(count_bug_per_dev):
            work_around_arg_count_bug = True
        else:
            warn_about_arg_count_bug = True

    # }}}

    cl_arg_idx = 0
    arg_idx_to_cl_arg_idx = {}

    fp_arg_count = 0

    from genpy import (
            Comment, Line, If, Raise, Assign, Statement as S, Suite)

    result = []
    gen = result.append

    for arg_idx, idi in enumerate(implemented_data_info):
        arg_idx_to_cl_arg_idx[arg_idx] = cl_arg_idx

        if not issubclass(idi.arg_class, lp.ValueArg):
            assert issubclass(idi.arg_class, ArrayBase)

            # assume each of those generates exactly one...
            cl_arg_idx += 1

            continue

        gen(Comment("{{{ process %s" % idi.name))
        gen(Line())

        if not options.skip_arg_checks:
            gen(If("%s is None" % idi.name,
                Raise('RuntimeError("input argument \'{name}\' '
                        'must be supplied")'.format(name=idi.name))))

        if idi.dtype.is_integral():
            gen(Comment("cast to Python int to avoid trouble "
                "with struct packing or Boost.Python"))
            if sys.version_info < (3,):
                py_type = "long"
            else:
                py_type = "int"

            gen(Assign(idi.name, "%s(%s)" % (py_type, idi.name)))
            gen(Line())

        if idi.dtype.is_composite():
            gen(S("_lpy_knl.set_arg(%d, %s)" % (cl_arg_idx, idi.name)))
            cl_arg_idx += 1

        elif idi.dtype.is_complex():
            assert isinstance(idi.dtype, NumpyType)

            dtype = idi.dtype

            if warn_about_arg_count_bug:
                warn("{knl_name}: arguments include complex numbers, and "
                        "some (but not all) of the target devices mishandle "
                        "struct kernel arguments (hence the workaround is "
                        "disabled".format(
                            knl_name=kernel.name))

            if dtype.numpy_dtype == np.complex64:
                arg_char = "f"
            elif dtype.numpy_dtype == np.complex128:
                arg_char = "d"
            else:
                raise TypeError("unexpected complex type: %s" % dtype)

            if (work_around_arg_count_bug
                    and dtype.numpy_dtype == np.complex128
                    and fp_arg_count + 2 <= 8):
                gen(Assign(
                    "_lpy_buf",
                    "_lpy_pack('{arg_char}', {arg_var}.real)"
                    .format(arg_char=arg_char, arg_var=idi.name)))
                gen(S(
                    "_lpy_knl.set_arg({cl_arg_idx}, _lpy_buf)"
                    .format(cl_arg_idx=cl_arg_idx)))
                cl_arg_idx += 1

                gen(Assign(
                    "_lpy_buf",
                    "_lpy_pack('{arg_char}', {arg_var}.imag)"
                    .format(arg_char=arg_char, arg_var=idi.name)))
                gen(S(
                        "_lpy_knl.set_arg({cl_arg_idx}, _lpy_buf)"
                        .format(cl_arg_idx=cl_arg_idx)))
                cl_arg_idx += 1
            else:
                gen(Assign(
                    "_lpy_buf",
                    "_lpy_pack('{arg_char}{arg_char}', "
                    "{arg_var}.real, {arg_var}.imag)"
                    .format(arg_char=arg_char, arg_var=idi.name)))
                gen(S(
                    "_lpy_knl.set_arg({cl_arg_idx}, _lpy_buf)"
                    .format(cl_arg_idx=cl_arg_idx)))
                cl_arg_idx += 1

            fp_arg_count += 2

        elif isinstance(idi.dtype, NumpyType):
            if idi.dtype.dtype.kind == "f":
                fp_arg_count += 1

            gen(S(
                "_lpy_knl.set_arg(%d, _lpy_pack('%s', %s))"
                % (cl_arg_idx, idi.dtype.dtype.char, idi.name)))

            cl_arg_idx += 1

        else:
            raise LoopyError("do not know how to pass argument of type '%s'"
                    % idi.dtype)

        gen(Line())

        gen(Comment("}}}"))
        gen(Line())

    return Suite(result), arg_idx_to_cl_arg_idx, cl_arg_idx
Esempio n. 8
0
def generate_value_arg_setup(kernel, implemented_data_info):
    options = kernel.options

    import loopy as lp
    from loopy.kernel.array import ArrayBase

    cl_arg_idx = 0
    arg_idx_to_cl_arg_idx = {}

    fp_arg_count = 0

    from genpy import If, Raise, Statement as S, Suite

    result = []
    gen = result.append

    buf_indices_and_args = []
    buf_pack_indices_and_args = []

    from pyopencl.invoker import BUF_PACK_TYPECHARS

    def add_buf_arg(arg_idx, typechar, expr_str):
        if typechar in BUF_PACK_TYPECHARS:
            buf_pack_indices_and_args.append(arg_idx)
            buf_pack_indices_and_args.append(repr(typechar.encode()))
            buf_pack_indices_and_args.append(expr_str)
        else:
            buf_indices_and_args.append(arg_idx)
            buf_indices_and_args.append(f"pack('{typechar}', {expr_str})")

    for arg_idx, idi in enumerate(implemented_data_info):
        arg_idx_to_cl_arg_idx[arg_idx] = cl_arg_idx

        if not issubclass(idi.arg_class, lp.ValueArg):
            assert issubclass(idi.arg_class, ArrayBase)

            # assume each of those generates exactly one...
            cl_arg_idx += 1

            continue

        if not options.skip_arg_checks:
            gen(
                If(
                    "%s is None" % idi.name,
                    Raise('RuntimeError("input argument \'{name}\' '
                          'must be supplied")'.format(name=idi.name))))

        if idi.dtype.is_composite():
            buf_indices_and_args.append(cl_arg_idx)
            buf_indices_and_args.append(f"{idi.name}")

            cl_arg_idx += 1

        elif idi.dtype.is_complex():
            assert isinstance(idi.dtype, NumpyType)

            dtype = idi.dtype

            if dtype.numpy_dtype == np.complex64:
                arg_char = "f"
            elif dtype.numpy_dtype == np.complex128:
                arg_char = "d"
            else:
                raise TypeError("unexpected complex type: %s" % dtype)

            buf_indices_and_args.append(cl_arg_idx)
            buf_indices_and_args.append(f"_lpy_pack('{arg_char}{arg_char}', "
                                        f"{idi.name}.real, {idi.name}.imag)")
            cl_arg_idx += 1

            fp_arg_count += 2

        elif isinstance(idi.dtype, NumpyType):
            if idi.dtype.dtype.kind == "f":
                fp_arg_count += 1

            add_buf_arg(cl_arg_idx, idi.dtype.dtype.char, idi.name)
            cl_arg_idx += 1

        else:
            raise LoopyError("do not know how to pass argument of type '%s'" %
                             idi.dtype)

    for arg_kind, args_and_indices, entry_length in [
        ("_buf", buf_indices_and_args, 2),
        ("_buf_pack", buf_pack_indices_and_args, 3),
    ]:
        assert len(args_and_indices) % entry_length == 0
        if args_and_indices:
            gen(
                S(f"_lpy_knl._set_arg{arg_kind}_multi("
                  f"({', '.join(str(i) for i in args_and_indices)},), "
                  ")"))

    return Suite(result), arg_idx_to_cl_arg_idx, cl_arg_idx
Esempio n. 9
0
def generate_value_arg_setup(kernel, devices, implemented_data_info):
    options = kernel.options

    import loopy as lp
    from loopy.kernel.array import ArrayBase

    # {{{ arg counting bug handling

    # For example:
    # https://github.com/pocl/pocl/issues/197
    # (but Apple CPU has a similar bug)

    work_around_arg_count_bug = False
    warn_about_arg_count_bug = False

    try:
        from pyopencl.characterize import has_struct_arg_count_bug

    except ImportError:
        count_bug_per_dev = [False] * len(devices)

    else:
        count_bug_per_dev = [
            has_struct_arg_count_bug(dev) if dev is not None else False
            for dev in devices
        ]

    if any(dev is None for dev in devices):
        warn("{knl_name}: device not supplied to PyOpenCLTarget--"
             "workarounds for broken OpenCL implementations "
             "(such as those relating to complex numbers) "
             "may not be enabled when needed. To avoid this, "
             "pass target=lp.PyOpenCLTarget(dev) when creating "
             "the kernel.".format(knl_name=kernel.name))

    if any(count_bug_per_dev):
        if all(count_bug_per_dev):
            work_around_arg_count_bug = True
        else:
            warn_about_arg_count_bug = True

    # }}}

    cl_arg_idx = 0
    arg_idx_to_cl_arg_idx = {}

    fp_arg_count = 0

    from genpy import If, Raise, Statement as S, Suite

    result = []
    gen = result.append

    buf_indices_and_args = []
    buf_pack_indices_and_args = []

    from pyopencl.invoker import BUF_PACK_TYPECHARS

    def add_buf_arg(arg_idx, typechar, expr_str):
        if typechar in BUF_PACK_TYPECHARS:
            buf_pack_indices_and_args.append(arg_idx)
            buf_pack_indices_and_args.append(repr(typechar.encode()))
            buf_pack_indices_and_args.append(expr_str)
        else:
            buf_indices_and_args.append(arg_idx)
            buf_indices_and_args.append(f"pack('{typechar}', {expr_str})")

    for arg_idx, idi in enumerate(implemented_data_info):
        arg_idx_to_cl_arg_idx[arg_idx] = cl_arg_idx

        if not issubclass(idi.arg_class, lp.ValueArg):
            assert issubclass(idi.arg_class, ArrayBase)

            # assume each of those generates exactly one...
            cl_arg_idx += 1

            continue

        if not options.skip_arg_checks:
            gen(
                If(
                    "%s is None" % idi.name,
                    Raise('RuntimeError("input argument \'{name}\' '
                          'must be supplied")'.format(name=idi.name))))

        if idi.dtype.is_composite():
            buf_indices_and_args.append(cl_arg_idx)
            buf_indices_and_args.append(f"{idi.name}")

            cl_arg_idx += 1

        elif idi.dtype.is_complex():
            assert isinstance(idi.dtype, NumpyType)

            dtype = idi.dtype

            if warn_about_arg_count_bug:
                warn("{knl_name}: arguments include complex numbers, and "
                     "some (but not all) of the target devices mishandle "
                     "struct kernel arguments (hence the workaround is "
                     "disabled".format(knl_name=kernel.name))

            if dtype.numpy_dtype == np.complex64:
                arg_char = "f"
            elif dtype.numpy_dtype == np.complex128:
                arg_char = "d"
            else:
                raise TypeError("unexpected complex type: %s" % dtype)

            if (work_around_arg_count_bug
                    and dtype.numpy_dtype == np.complex128
                    and fp_arg_count + 2 <= 8):
                add_buf_arg(cl_arg_idx, arg_char, f"{idi.name}.real")
                cl_arg_idx += 1

                add_buf_arg(cl_arg_idx, arg_char, f"{idi.name}.imag")
                cl_arg_idx += 1
            else:
                buf_indices_and_args.append(cl_arg_idx)
                buf_indices_and_args.append(
                    f"_lpy_pack('{arg_char}{arg_char}', "
                    f"{idi.name}.real, {idi.name}.imag)")
                cl_arg_idx += 1

            fp_arg_count += 2

        elif isinstance(idi.dtype, NumpyType):
            if idi.dtype.dtype.kind == "f":
                fp_arg_count += 1

            add_buf_arg(cl_arg_idx, idi.dtype.dtype.char, idi.name)
            cl_arg_idx += 1

        else:
            raise LoopyError("do not know how to pass argument of type '%s'" %
                             idi.dtype)

    for arg_kind, args_and_indices, entry_length in [
        ("_buf", buf_indices_and_args, 2),
        ("_buf_pack", buf_pack_indices_and_args, 3),
    ]:
        assert len(args_and_indices) % entry_length == 0
        if args_and_indices:
            gen(
                S(f"_lpy_knl._set_arg{arg_kind}_multi("
                  f"({', '.join(str(i) for i in args_and_indices)},), "
                  ")"))

    return Suite(result), arg_idx_to_cl_arg_idx, cl_arg_idx