예제 #1
0
    def get_function_definition(self, codegen_state, codegen_result,
                                schedule_index, function_decl, function_body):
        from loopy.kernel.data import TemporaryVariable
        args = (["_lpy_cl_kernels", "queue"] + [
            idi.name for idi in codegen_state.implemented_data_info
            if not issubclass(idi.arg_class, TemporaryVariable)
        ] + ["wait_for=None", "allocator=None"])

        from genpy import (For, Function, Suite, Import, ImportAs, Return,
                           FromImport, If, Assign, Line, Statement as S)
        return Function(
            codegen_result.current_program(codegen_state).name,
            args,
            Suite([
                FromImport("struct", ["pack as _lpy_pack"]),
                ImportAs("pyopencl", "_lpy_cl"),
                Import("pyopencl.tools"),
                Line(),
                If(
                    "allocator is None",
                    Assign("allocator",
                           "_lpy_cl_tools.DeferredAllocator(queue.context)")),
                Line(),
            ] + [
                Line(),
                function_body,
                Line(),
            ] + [
                For(
                    "_tv",
                    "_global_temporaries",
                    # free global temporaries
                    S("_tv.release()"))
            ] + [
                Line(),
                Return("_lpy_evt"),
            ]))
예제 #2
0
파일: pyopencl.py 프로젝트: shigh/loopy
def generate_value_arg_setup(kernel, devices, implemented_data_info):
    options = kernel.options

    import loopy as lp
    from loopy.kernel.array import ArrayBase

    # {{{ arg counting bug handling

    # For example:
    # https://github.com/pocl/pocl/issues/197
    # (but Apple CPU has a similar bug)

    work_around_arg_count_bug = False
    warn_about_arg_count_bug = False

    try:
        from pyopencl.characterize import has_struct_arg_count_bug

    except ImportError:
        count_bug_per_dev = [False]*len(devices)

    else:
        count_bug_per_dev = [
                has_struct_arg_count_bug(dev)
                if dev is not None else False
                for dev in devices]

    if any(dev is None for dev in devices):
        warn("{knl_name}: device not supplied to PyOpenCLTarget--"
                "workarounds for broken OpenCL implementations "
                "(such as those relating to complex numbers) "
                "may not be enabled when needed"
                .format(knl_name=kernel.name))

    if any(count_bug_per_dev):
        if all(count_bug_per_dev):
            work_around_arg_count_bug = True
        else:
            warn_about_arg_count_bug = True

    # }}}

    cl_arg_idx = 0
    arg_idx_to_cl_arg_idx = {}

    fp_arg_count = 0

    from genpy import (
            Comment, Line, If, Raise, Assign, Statement as S, Suite)

    result = []
    gen = result.append

    for arg_idx, idi in enumerate(implemented_data_info):
        arg_idx_to_cl_arg_idx[arg_idx] = cl_arg_idx

        if not issubclass(idi.arg_class, lp.ValueArg):
            assert issubclass(idi.arg_class, ArrayBase)

            # assume each of those generates exactly one...
            cl_arg_idx += 1

            continue

        gen(Comment("{{{ process %s" % idi.name))
        gen(Line())

        if not options.skip_arg_checks:
            gen(If("%s is None" % idi.name,
                Raise('RuntimeError("input argument \'{name}\' '
                        'must be supplied")'.format(name=idi.name))))

        if idi.dtype.is_integral():
            gen(Comment("cast to Python int to avoid trouble "
                "with struct packing or Boost.Python"))
            if sys.version_info < (3,):
                py_type = "long"
            else:
                py_type = "int"

            gen(Assign(idi.name, "%s(%s)" % (py_type, idi.name)))
            gen(Line())

        if idi.dtype.is_composite():
            gen(S("_lpy_knl.set_arg(%d, %s)" % (cl_arg_idx, idi.name)))
            cl_arg_idx += 1

        elif idi.dtype.is_complex():
            assert isinstance(idi.dtype, NumpyType)

            dtype = idi.dtype

            if warn_about_arg_count_bug:
                warn("{knl_name}: arguments include complex numbers, and "
                        "some (but not all) of the target devices mishandle "
                        "struct kernel arguments (hence the workaround is "
                        "disabled".format(
                            knl_name=kernel.name))

            if dtype.numpy_dtype == np.complex64:
                arg_char = "f"
            elif dtype.numpy_dtype == np.complex128:
                arg_char = "d"
            else:
                raise TypeError("unexpected complex type: %s" % dtype)

            if (work_around_arg_count_bug
                    and dtype.numpy_dtype == np.complex128
                    and fp_arg_count + 2 <= 8):
                gen(Assign(
                    "_lpy_buf",
                    "_lpy_pack('{arg_char}', {arg_var}.real)"
                    .format(arg_char=arg_char, arg_var=idi.name)))
                gen(S(
                    "_lpy_knl.set_arg({cl_arg_idx}, _lpy_buf)"
                    .format(cl_arg_idx=cl_arg_idx)))
                cl_arg_idx += 1

                gen(Assign(
                    "_lpy_buf",
                    "_lpy_pack('{arg_char}', {arg_var}.imag)"
                    .format(arg_char=arg_char, arg_var=idi.name)))
                gen(S(
                        "_lpy_knl.set_arg({cl_arg_idx}, _lpy_buf)"
                        .format(cl_arg_idx=cl_arg_idx)))
                cl_arg_idx += 1
            else:
                gen(Assign(
                    "_lpy_buf",
                    "_lpy_pack('{arg_char}{arg_char}', "
                    "{arg_var}.real, {arg_var}.imag)"
                    .format(arg_char=arg_char, arg_var=idi.name)))
                gen(S(
                    "_lpy_knl.set_arg({cl_arg_idx}, _lpy_buf)"
                    .format(cl_arg_idx=cl_arg_idx)))
                cl_arg_idx += 1

            fp_arg_count += 2

        elif isinstance(idi.dtype, NumpyType):
            if idi.dtype.dtype.kind == "f":
                fp_arg_count += 1

            gen(S(
                "_lpy_knl.set_arg(%d, _lpy_pack('%s', %s))"
                % (cl_arg_idx, idi.dtype.dtype.char, idi.name)))

            cl_arg_idx += 1

        else:
            raise LoopyError("do not know how to pass argument of type '%s'"
                    % idi.dtype)

        gen(Line())

        gen(Comment("}}}"))
        gen(Line())

    return Suite(result), arg_idx_to_cl_arg_idx, cl_arg_idx
예제 #3
0
 def emit_if(self, condition_str, ast):
     from genpy import If
     return If(condition_str, ast)
예제 #4
0
def generate_value_arg_setup(kernel, implemented_data_info):
    options = kernel.options

    import loopy as lp
    from loopy.kernel.array import ArrayBase

    cl_arg_idx = 0
    arg_idx_to_cl_arg_idx = {}

    fp_arg_count = 0

    from genpy import If, Raise, Statement as S, Suite

    result = []
    gen = result.append

    buf_indices_and_args = []
    buf_pack_indices_and_args = []

    from pyopencl.invoker import BUF_PACK_TYPECHARS

    def add_buf_arg(arg_idx, typechar, expr_str):
        if typechar in BUF_PACK_TYPECHARS:
            buf_pack_indices_and_args.append(arg_idx)
            buf_pack_indices_and_args.append(repr(typechar.encode()))
            buf_pack_indices_and_args.append(expr_str)
        else:
            buf_indices_and_args.append(arg_idx)
            buf_indices_and_args.append(f"pack('{typechar}', {expr_str})")

    for arg_idx, idi in enumerate(implemented_data_info):
        arg_idx_to_cl_arg_idx[arg_idx] = cl_arg_idx

        if not issubclass(idi.arg_class, lp.ValueArg):
            assert issubclass(idi.arg_class, ArrayBase)

            # assume each of those generates exactly one...
            cl_arg_idx += 1

            continue

        if not options.skip_arg_checks:
            gen(
                If(
                    "%s is None" % idi.name,
                    Raise('RuntimeError("input argument \'{name}\' '
                          'must be supplied")'.format(name=idi.name))))

        if idi.dtype.is_composite():
            buf_indices_and_args.append(cl_arg_idx)
            buf_indices_and_args.append(f"{idi.name}")

            cl_arg_idx += 1

        elif idi.dtype.is_complex():
            assert isinstance(idi.dtype, NumpyType)

            dtype = idi.dtype

            if dtype.numpy_dtype == np.complex64:
                arg_char = "f"
            elif dtype.numpy_dtype == np.complex128:
                arg_char = "d"
            else:
                raise TypeError("unexpected complex type: %s" % dtype)

            buf_indices_and_args.append(cl_arg_idx)
            buf_indices_and_args.append(f"_lpy_pack('{arg_char}{arg_char}', "
                                        f"{idi.name}.real, {idi.name}.imag)")
            cl_arg_idx += 1

            fp_arg_count += 2

        elif isinstance(idi.dtype, NumpyType):
            if idi.dtype.dtype.kind == "f":
                fp_arg_count += 1

            add_buf_arg(cl_arg_idx, idi.dtype.dtype.char, idi.name)
            cl_arg_idx += 1

        else:
            raise LoopyError("do not know how to pass argument of type '%s'" %
                             idi.dtype)

    for arg_kind, args_and_indices, entry_length in [
        ("_buf", buf_indices_and_args, 2),
        ("_buf_pack", buf_pack_indices_and_args, 3),
    ]:
        assert len(args_and_indices) % entry_length == 0
        if args_and_indices:
            gen(
                S(f"_lpy_knl._set_arg{arg_kind}_multi("
                  f"({', '.join(str(i) for i in args_and_indices)},), "
                  ")"))

    return Suite(result), arg_idx_to_cl_arg_idx, cl_arg_idx
예제 #5
0
def generate_value_arg_setup(kernel, devices, implemented_data_info):
    options = kernel.options

    import loopy as lp
    from loopy.kernel.array import ArrayBase

    # {{{ arg counting bug handling

    # For example:
    # https://github.com/pocl/pocl/issues/197
    # (but Apple CPU has a similar bug)

    work_around_arg_count_bug = False
    warn_about_arg_count_bug = False

    try:
        from pyopencl.characterize import has_struct_arg_count_bug

    except ImportError:
        count_bug_per_dev = [False] * len(devices)

    else:
        count_bug_per_dev = [
            has_struct_arg_count_bug(dev) if dev is not None else False
            for dev in devices
        ]

    if any(dev is None for dev in devices):
        warn("{knl_name}: device not supplied to PyOpenCLTarget--"
             "workarounds for broken OpenCL implementations "
             "(such as those relating to complex numbers) "
             "may not be enabled when needed. To avoid this, "
             "pass target=lp.PyOpenCLTarget(dev) when creating "
             "the kernel.".format(knl_name=kernel.name))

    if any(count_bug_per_dev):
        if all(count_bug_per_dev):
            work_around_arg_count_bug = True
        else:
            warn_about_arg_count_bug = True

    # }}}

    cl_arg_idx = 0
    arg_idx_to_cl_arg_idx = {}

    fp_arg_count = 0

    from genpy import If, Raise, Statement as S, Suite

    result = []
    gen = result.append

    buf_indices_and_args = []
    buf_pack_indices_and_args = []

    from pyopencl.invoker import BUF_PACK_TYPECHARS

    def add_buf_arg(arg_idx, typechar, expr_str):
        if typechar in BUF_PACK_TYPECHARS:
            buf_pack_indices_and_args.append(arg_idx)
            buf_pack_indices_and_args.append(repr(typechar.encode()))
            buf_pack_indices_and_args.append(expr_str)
        else:
            buf_indices_and_args.append(arg_idx)
            buf_indices_and_args.append(f"pack('{typechar}', {expr_str})")

    for arg_idx, idi in enumerate(implemented_data_info):
        arg_idx_to_cl_arg_idx[arg_idx] = cl_arg_idx

        if not issubclass(idi.arg_class, lp.ValueArg):
            assert issubclass(idi.arg_class, ArrayBase)

            # assume each of those generates exactly one...
            cl_arg_idx += 1

            continue

        if not options.skip_arg_checks:
            gen(
                If(
                    "%s is None" % idi.name,
                    Raise('RuntimeError("input argument \'{name}\' '
                          'must be supplied")'.format(name=idi.name))))

        if idi.dtype.is_composite():
            buf_indices_and_args.append(cl_arg_idx)
            buf_indices_and_args.append(f"{idi.name}")

            cl_arg_idx += 1

        elif idi.dtype.is_complex():
            assert isinstance(idi.dtype, NumpyType)

            dtype = idi.dtype

            if warn_about_arg_count_bug:
                warn("{knl_name}: arguments include complex numbers, and "
                     "some (but not all) of the target devices mishandle "
                     "struct kernel arguments (hence the workaround is "
                     "disabled".format(knl_name=kernel.name))

            if dtype.numpy_dtype == np.complex64:
                arg_char = "f"
            elif dtype.numpy_dtype == np.complex128:
                arg_char = "d"
            else:
                raise TypeError("unexpected complex type: %s" % dtype)

            if (work_around_arg_count_bug
                    and dtype.numpy_dtype == np.complex128
                    and fp_arg_count + 2 <= 8):
                add_buf_arg(cl_arg_idx, arg_char, f"{idi.name}.real")
                cl_arg_idx += 1

                add_buf_arg(cl_arg_idx, arg_char, f"{idi.name}.imag")
                cl_arg_idx += 1
            else:
                buf_indices_and_args.append(cl_arg_idx)
                buf_indices_and_args.append(
                    f"_lpy_pack('{arg_char}{arg_char}', "
                    f"{idi.name}.real, {idi.name}.imag)")
                cl_arg_idx += 1

            fp_arg_count += 2

        elif isinstance(idi.dtype, NumpyType):
            if idi.dtype.dtype.kind == "f":
                fp_arg_count += 1

            add_buf_arg(cl_arg_idx, idi.dtype.dtype.char, idi.name)
            cl_arg_idx += 1

        else:
            raise LoopyError("do not know how to pass argument of type '%s'" %
                             idi.dtype)

    for arg_kind, args_and_indices, entry_length in [
        ("_buf", buf_indices_and_args, 2),
        ("_buf_pack", buf_pack_indices_and_args, 3),
    ]:
        assert len(args_and_indices) % entry_length == 0
        if args_and_indices:
            gen(
                S(f"_lpy_knl._set_arg{arg_kind}_multi("
                  f"({', '.join(str(i) for i in args_and_indices)},), "
                  ")"))

    return Suite(result), arg_idx_to_cl_arg_idx, cl_arg_idx