Example #1
0
def capture_kernel_call(kernel, filename, queue, g_size, l_size, *args,
                        **kwargs):
    try:
        source = kernel._source
    except AttributeError:
        raise RuntimeError("cannot capture call, kernel source not available")

    if source is None:
        raise RuntimeError("cannot capture call, kernel source not available")

    cg = PythonCodeGenerator()

    cg("# generated by pyopencl.capture_call")
    cg("")
    cg("import numpy as np")
    cg("import pyopencl as cl")
    cg("from base64 import b64decode")
    cg("from zlib import decompress")
    cg("mf = cl.mem_flags")
    cg("")

    cg('CODE = r"""//CL//')
    for l in source.split("\n"):
        cg(l)
    cg('"""')

    # {{{ invocation

    arg_data = []

    cg("")
    cg("")
    cg("def main():")
    with Indentation(cg):
        cg("ctx = cl.create_some_context()")
        cg("queue = cl.CommandQueue(ctx)")
        cg("")

        kernel_args = []

        for i, arg in enumerate(args):
            if isinstance(arg, cl.Buffer):
                buf = bytearray(arg.size)
                cl.enqueue_copy(queue, buf, arg)
                arg_data.append(("arg%d_data" % i, buf))
                cg("arg%d = cl.Buffer(ctx, "
                   "mf.READ_WRITE | cl.mem_flags.COPY_HOST_PTR," % i)
                cg("    hostbuf=decompress(b64decode(arg%d_data)))" % i)
                kernel_args.append("arg%d" % i)
            elif isinstance(arg, (int, float)):
                kernel_args.append(repr(arg))
            elif isinstance(arg, np.integer):
                kernel_args.append("np.%s(%s)" %
                                   (arg.dtype.type.__name__, repr(int(arg))))
            elif isinstance(arg, np.floating):
                kernel_args.append("np.%s(%s)" %
                                   (arg.dtype.type.__name__, repr(float(arg))))
            elif isinstance(arg, np.complexfloating):
                kernel_args.append(
                    "np.%s(%s)" %
                    (arg.dtype.type.__name__, repr(complex(arg))))
            else:
                try:
                    arg_buf = memoryview(arg)
                except:
                    raise RuntimeError("cannot capture: "
                                       "unsupported arg nr %d (0-based)" % i)

                arg_data.append(("arg%d_data" % i, arg_buf))
                kernel_args.append("decompress(b64decode(arg%d_data))" % i)

        cg("")

        g_times_l = kwargs.get("g_times_l", False)
        if g_times_l:
            dim = max(len(g_size), len(l_size))
            l_size = l_size + (1, ) * (dim - len(l_size))
            g_size = g_size + (1, ) * (dim - len(g_size))
            g_size = tuple(gs * ls for gs, ls in zip(g_size, l_size))

        global_offset = kwargs.get("global_offset", None)
        if global_offset is not None:
            kernel_args.append("global_offset=%s" % repr(global_offset))

        cg("prg = cl.Program(ctx, CODE).build()")
        cg("knl = prg.%s" % kernel.function_name)
        if hasattr(kernel, "_scalar_arg_dtypes"):

            def strify_dtype(d):
                if d is None:
                    return "None"

                d = np.dtype(d)
                s = repr(d)
                if s.startswith("dtype"):
                    s = "np." + s

                return s

            cg("knl.set_scalar_arg_dtypes((%s,))" %
               ", ".join(strify_dtype(dt) for dt in kernel._scalar_arg_dtypes))

        cg("knl(queue, %s, %s," % (repr(g_size), repr(l_size)))
        cg("    %s)" % ", ".join(kernel_args))
        cg("")
        cg("queue.finish()")

    # }}}

    # {{{ data

    from zlib import compress
    from base64 import b64encode
    cg("")
    line_len = 70

    for name, val in arg_data:
        cg("%s = (" % name)
        with Indentation(cg):
            val = str(b64encode(compress(memoryview(val))))
            i = 0
            while i < len(val):
                cg(repr(val[i:i + line_len]))
                i += line_len

            cg(")")

    # }}}

    # {{{ file trailer

    cg("")
    cg("if __name__ == \"__main__\":")
    with Indentation(cg):
        cg("main()")
    cg("")

    cg("# vim: filetype=pyopencl")

    # }}}

    with open(filename, "w") as outf:
        outf.write(cg.get())
Example #2
0
    def generate_integer_arg_finding_from_shapes(self, gen, kernel,
                                                 implemented_data_info):
        # a mapping from integer argument names to a list of tuples
        # (arg_name, expression), where expression is a
        # unary function of kernel.arg_dict[arg_name]
        # returning the desired integer argument.
        iarg_to_sources = {}

        from loopy.kernel.data import GlobalArg
        from loopy.symbolic import DependencyMapper, StringifyMapper
        from loopy.diagnostic import ParameterFinderWarning
        dep_map = DependencyMapper()

        from pymbolic import var
        for arg in implemented_data_info:
            if arg.arg_class is GlobalArg:
                sym_shape = var(arg.name).attr("shape")
                for axis_nr, shape_i in enumerate(arg.shape):
                    if shape_i is None:
                        continue

                    deps = dep_map(shape_i)

                    if len(deps) == 1:
                        integer_arg_var, = deps

                        if kernel.arg_dict[
                                integer_arg_var.name].dtype.is_integral():
                            from pymbolic.algorithm import solve_affine_equations_for
                            try:
                                # friggin' overkill :)
                                iarg_expr = solve_affine_equations_for(
                                    [integer_arg_var.name], [
                                        (shape_i, sym_shape.index(axis_nr))
                                    ])[integer_arg_var]
                            except Exception as e:
                                #from traceback import print_exc
                                #print_exc()

                                # went wrong? oh well
                                from warnings import warn
                                warn(
                                    "Unable to generate code to automatically "
                                    "find '%s' from the shape of '%s':\n%s" %
                                    (integer_arg_var.name, arg.name, str(e)),
                                    ParameterFinderWarning)
                            else:
                                iarg_to_sources.setdefault(integer_arg_var.name, [])\
                                        .append((arg.name, iarg_expr))

        gen("# {{{ find integer arguments from shapes")
        gen("")

        for iarg_name, sources in six.iteritems(iarg_to_sources):
            gen("if %s is None:" % iarg_name)
            with Indentation(gen):
                if_stmt = "if"
                for arg_name, value_expr in sources:
                    gen("%s %s is not None:" % (if_stmt, arg_name))
                    with Indentation(gen):
                        gen("%s = %s" %
                            (iarg_name, StringifyMapper()(value_expr)))

                    if_stmt = "elif"

            gen("")

        gen("# }}}")
        gen("")
Example #3
0
def _generate_enqueue_and_set_args_module(function_name, num_passed_args,
                                          num_cl_args, arg_types,
                                          include_debug_code,
                                          work_around_arg_count_bug,
                                          warn_about_arg_count_bug):

    arg_names = ["arg%d" % i for i in range(num_passed_args)]

    def gen_arg_setting(in_enqueue):
        if arg_types is None:
            result = generate_generic_arg_handling_body(num_passed_args)
            if in_enqueue:
                return result, []
            else:
                return result

        else:
            return generate_specific_arg_handling_body(
                function_name,
                num_cl_args,
                arg_types,
                warn_about_arg_count_bug=warn_about_arg_count_bug,
                work_around_arg_count_bug=work_around_arg_count_bug,
                in_enqueue=in_enqueue,
                include_debug_code=include_debug_code)

    gen = PythonCodeGenerator()

    gen("from struct import pack")
    gen("from pyopencl import status_code")
    gen("import numpy as np")
    gen("import pyopencl._cl as _cl")
    gen("")

    # {{{ generate _enqueue

    enqueue_name = "enqueue_knl_%s" % function_name
    gen("def %s(%s):" %
        (enqueue_name,
         ", ".join(["self", "queue", "global_size", "local_size"] + arg_names +
                   [
                       "global_offset=None", "g_times_l=None",
                       "allow_empty_ndrange=False", "wait_for=None"
                   ])))

    with Indentation(gen):
        subgen, wait_for_parts = gen_arg_setting(in_enqueue=True)
        gen.extend(subgen)

        if wait_for_parts:
            wait_for_expr = ("[*(() if wait_for is None else wait_for), " +
                             ", ".join("*" + wfp
                                       for wfp in wait_for_parts) + "]")
        else:
            wait_for_expr = "wait_for"

        # Using positional args here because pybind is slow with keyword args
        gen(f"""
            return _cl.enqueue_nd_range_kernel(queue, self,
                    global_size, local_size, global_offset,
                    {wait_for_expr},
                    g_times_l, allow_empty_ndrange)
            """)

    # }}}

    # {{{ generate set_args

    gen("")
    gen("def set_args(%s):" % (", ".join(["self"] + arg_names)))

    with Indentation(gen):
        gen.extend(gen_arg_setting(in_enqueue=False))

    # }}}

    return (gen.get_picklable_module(
        name=f"<pyopencl invoker for '{function_name}'>"), enqueue_name)
Example #4
0
def generate_specific_arg_handling_body(function_name, num_cl_args, arg_types,
                                        *, work_around_arg_count_bug,
                                        warn_about_arg_count_bug, in_enqueue,
                                        include_debug_code):

    assert work_around_arg_count_bug is not None
    assert warn_about_arg_count_bug is not None

    fp_arg_count = 0
    cl_arg_idx = 0

    gen = PythonCodeGenerator()

    if not arg_types:
        gen("pass")

    gen_indices_and_args = []
    buf_indices_and_args = []
    buf_pack_indices_and_args = []

    def add_buf_arg(arg_idx, typechar, expr_str):
        if typechar in BUF_PACK_TYPECHARS:
            buf_pack_indices_and_args.append(arg_idx)
            buf_pack_indices_and_args.append(repr(typechar.encode()))
            buf_pack_indices_and_args.append(expr_str)
        else:
            buf_indices_and_args.append(arg_idx)
            buf_indices_and_args.append(f"pack('{typechar}', {expr_str})")

    wait_for_parts = []

    for arg_idx, arg_type in enumerate(arg_types):
        arg_var = "arg%d" % arg_idx

        if arg_type is None:
            gen_indices_and_args.append(cl_arg_idx)
            gen_indices_and_args.append(arg_var)
            cl_arg_idx += 1
            gen("")
            continue

        elif isinstance(arg_type, VectorArg):
            if include_debug_code:
                gen(f"if not {arg_var}.flags.forc:")
                with Indentation(gen):
                    gen("raise RuntimeError('only contiguous arrays may '")
                    gen("   'be used as arguments to this operation')")
                    gen("")

            if in_enqueue and include_debug_code:
                gen(f"assert {arg_var}.queue is None or {arg_var}.queue == queue, "
                    "'queues for all arrays must match the queue supplied "
                    "to enqueue'")

            gen_indices_and_args.append(cl_arg_idx)
            gen_indices_and_args.append(f"{arg_var}.base_data")
            cl_arg_idx += 1

            if arg_type.with_offset:
                add_buf_arg(cl_arg_idx,
                            np.dtype(np.int64).char, f"{arg_var}.offset")
                cl_arg_idx += 1

            if in_enqueue:
                wait_for_parts.append(f"{arg_var}.events")

            continue

        arg_dtype = np.dtype(arg_type)

        if arg_dtype.char == "V":
            buf_indices_and_args.append(cl_arg_idx)
            buf_indices_and_args.append(arg_var)
            cl_arg_idx += 1

        elif arg_dtype.kind == "c":
            if warn_about_arg_count_bug:
                warn("{knl_name}: arguments include complex numbers, and "
                     "some (but not all) of the target devices mishandle "
                     "struct kernel arguments (hence the workaround is "
                     "disabled".format(knl_name=function_name),
                     stacklevel=2)

            if arg_dtype == np.complex64:
                arg_char = "f"
            elif arg_dtype == np.complex128:
                arg_char = "d"
            else:
                raise TypeError("unexpected complex type: %s" % arg_dtype)

            if (work_around_arg_count_bug == "pocl"
                    and arg_dtype == np.complex128 and fp_arg_count + 2 <= 8):
                add_buf_arg(cl_arg_idx, arg_char, f"{arg_var}.real")
                cl_arg_idx += 1
                add_buf_arg(cl_arg_idx, arg_char, f"{arg_var}.imag")
                cl_arg_idx += 1

            elif (work_around_arg_count_bug == "apple"
                  and arg_dtype == np.complex128 and fp_arg_count + 2 <= 8):
                raise NotImplementedError(
                    "No work-around to "
                    "Apple's broken structs-as-kernel arg "
                    "handling has been found. "
                    "Cannot pass complex numbers to kernels.")

            else:
                buf_indices_and_args.append(cl_arg_idx)
                buf_indices_and_args.append(
                    f"pack('{arg_char}{arg_char}', {arg_var}.real, {arg_var}.imag)"
                )
                cl_arg_idx += 1

            fp_arg_count += 2

        else:
            if arg_dtype.kind == "f":
                fp_arg_count += 1

            arg_char = arg_dtype.char
            arg_char = _type_char_map.get(arg_char, arg_char)
            add_buf_arg(cl_arg_idx, arg_char, arg_var)
            cl_arg_idx += 1

        gen("")

    for arg_kind, args_and_indices, entry_length in [
        ("", gen_indices_and_args, 2),
        ("_buf", buf_indices_and_args, 2),
        ("_buf_pack", buf_pack_indices_and_args, 3),
    ]:
        assert len(args_and_indices) % entry_length == 0
        if args_and_indices:
            gen(f"self._set_arg{arg_kind}_multi("
                f"({', '.join(str(i) for i in args_and_indices)},), "
                ")")

    if cl_arg_idx != num_cl_args:
        raise TypeError("length of argument list (%d) and "
                        "CL-generated number of arguments (%d) do not agree" %
                        (cl_arg_idx, num_cl_args))

    if in_enqueue:
        return gen, wait_for_parts
    else:
        return gen
Example #5
0
 def generate_invocation(self, gen, kernel_name, args, kernel,
                         implemented_data_info):
     gen("for knl in _lpy_c_kernels:")
     with Indentation(gen):
         gen('knl({args})'.format(args=", ".join(args)))
Example #6
0
    def generate_arg_setup(self, gen, kernel, implemented_data_info, options):
        import loopy as lp

        from loopy.kernel.data import KernelArgument
        from loopy.kernel.array import ArrayBase
        from loopy.symbolic import StringifyMapper
        from loopy.types import NumpyType

        gen("# {{{ set up array arguments")
        gen("")

        if not options.no_numpy:
            gen("_lpy_encountered_numpy = False")
            gen("_lpy_encountered_dev = False")
            gen("")

        args = []

        strify = StringifyMapper()

        expect_no_more_arguments = False

        for arg_idx, arg in enumerate(implemented_data_info):
            is_written = arg.base_name in kernel.get_written_variables()
            kernel_arg = kernel.impl_arg_to_arg.get(arg.name)

            if not issubclass(arg.arg_class, KernelArgument):
                expect_no_more_arguments = True
                continue

            if expect_no_more_arguments:
                raise LoopyError(
                    "Further arguments encountered after arg info "
                    "describing a global temporary variable")

            if not issubclass(arg.arg_class, ArrayBase):
                args.append(arg.name)
                continue

            gen("# {{{ process %s" % arg.name)
            gen("")

            if not options.no_numpy:
                self.handle_non_numpy_arg(gen, arg)

            if not options.skip_arg_checks and not is_written:
                gen("if %s is None:" % arg.name)
                with Indentation(gen):
                    gen("raise RuntimeError(\"input argument '%s' must "
                        "be supplied\")" % arg.name)
                    gen("")

            if (is_written and arg.arg_class is lp.ImageArg
                    and not options.skip_arg_checks):
                gen("if %s is None:" % arg.name)
                with Indentation(gen):
                    gen("raise RuntimeError(\"written image '%s' must "
                        "be supplied\")" % arg.name)
                    gen("")

            if is_written and arg.shape is None and not options.skip_arg_checks:
                gen("if %s is None:" % arg.name)
                with Indentation(gen):
                    gen("raise RuntimeError(\"written argument '%s' has "
                        "unknown shape and must be supplied\")" % arg.name)
                    gen("")

            possibly_made_by_loopy = False

            # {{{ allocate written arrays, if needed

            if is_written and arg.arg_class in [lp.ArrayArg, lp.ConstantArg] \
                    and arg.shape is not None \
                    and all(si is not None for si in arg.shape):

                if not isinstance(arg.dtype, NumpyType):
                    raise LoopyError(
                        "do not know how to pass arg of type '%s'" % arg.dtype)

                possibly_made_by_loopy = True
                gen("_lpy_made_by_loopy = False")
                gen("")

                gen("if %s is None:" % arg.name)
                with Indentation(gen):
                    self.handle_alloc(gen, arg, kernel_arg, strify,
                                      options.skip_arg_checks)
                    gen("_lpy_made_by_loopy = True")
                    gen("")

            # }}}

            # {{{ argument checking

            if arg.arg_class in [lp.ArrayArg, lp.ConstantArg] \
                    and not options.skip_arg_checks:
                if possibly_made_by_loopy:
                    gen("if not _lpy_made_by_loopy:")
                else:
                    gen("if True:")

                with Indentation(gen):
                    gen("if %s.dtype != %s:" %
                        (arg.name,
                         self.python_dtype_str(kernel_arg.dtype.numpy_dtype)))
                    with Indentation(gen):
                        gen("raise TypeError(\"dtype mismatch on argument '%s' "
                            "(got: %%s, expected: %s)\" %% %s.dtype)" %
                            (arg.name, arg.dtype, arg.name))

                    # {{{ generate shape checking code

                    def strify_allowing_none(shape_axis):
                        if shape_axis is None:
                            return "None"
                        else:
                            return strify(shape_axis)

                    def strify_tuple(t):
                        if len(t) == 0:
                            return "()"
                        else:
                            return "(%s,)" % ", ".join(
                                strify_allowing_none(sa) for sa in t)

                    shape_mismatch_msg = (
                        "raise TypeError(\"shape mismatch on argument '%s' "
                        "(got: %%s, expected: %%s)\" "
                        "%% (%s.shape, %s))" %
                        (arg.name, arg.name, strify_tuple(arg.unvec_shape)))

                    if kernel_arg.shape is None:
                        pass

                    elif any(shape_axis is None
                             for shape_axis in kernel_arg.shape):
                        gen("if len(%s.shape) != %s:" %
                            (arg.name, len(arg.unvec_shape)))
                        with Indentation(gen):
                            gen(shape_mismatch_msg)

                        for i, shape_axis in enumerate(arg.unvec_shape):
                            if shape_axis is None:
                                continue

                            gen("if %s.shape[%d] != %s:" %
                                (arg.name, i, strify(shape_axis)))
                            with Indentation(gen):
                                gen(shape_mismatch_msg)

                    else:  # not None, no Nones in tuple
                        gen("if %s.shape != %s:" %
                            (arg.name, strify(arg.unvec_shape)))
                        with Indentation(gen):
                            gen(shape_mismatch_msg)

                    # }}}

                    if arg.unvec_strides and kernel_arg.dim_tags:
                        itemsize = kernel_arg.dtype.numpy_dtype.itemsize
                        sym_strides = tuple(itemsize * s_i
                                            for s_i in arg.unvec_strides)

                        ndim = len(arg.unvec_shape)
                        shape = ["_lpy_shape_%d" % i for i in range(ndim)]
                        strides = ["_lpy_stride_%d" % i for i in range(ndim)]

                        gen("(%s,) = %s.shape" % (", ".join(shape), arg.name))
                        gen("(%s,) = %s.strides" %
                            (", ".join(strides), arg.name))

                        gen("if not %s:" % self.get_strides_check_expr(
                            shape, strides, (strify(s) for s in sym_strides)))
                        with Indentation(gen):
                            gen("_lpy_got = tuple(stride "
                                "for (dim, stride) in zip(%s.shape, %s.strides) "
                                "if dim > 1)" % (arg.name, arg.name))
                            gen("_lpy_expected = tuple(stride "
                                "for (dim, stride) in zip(%s.shape, %s) "
                                "if dim > 1)" %
                                (arg.name, strify_tuple(sym_strides)))

                            gen("raise TypeError(\"strides mismatch on "
                                "argument '%s' "
                                "(after removing unit length dims, "
                                "got: %%s, expected: %%s)\" "
                                "%% (_lpy_got, _lpy_expected))" % arg.name)

                    if not arg.allows_offset:
                        gen("if hasattr(%s, 'offset') and %s.offset:" %
                            (arg.name, arg.name))
                        with Indentation(gen):
                            gen("raise ValueError(\"Argument '%s' does not "
                                "allow arrays with offsets. Try passing "
                                "default_offset=loopy.auto to make_kernel()."
                                "\")" % arg.name)
                            gen("")

            # }}}

            if possibly_made_by_loopy and not options.skip_arg_checks:
                gen("del _lpy_made_by_loopy")
                gen("")

            if arg.arg_class in [lp.ArrayArg, lp.ConstantArg]:
                args.append(self.get_arg_pass(arg))
            else:
                args.append("%s" % arg.name)

            gen("")

            gen("# }}}")
            gen("")

        gen("# }}}")
        gen("")

        return args
Example #7
0
def generate_invoker(kernel, codegen_result):
    options = kernel.options
    implemented_data_info = codegen_result.implemented_data_info
    host_code = codegen_result.host_code()

    system_args = [
        "_lpy_cl_kernels",
        "queue",
        "allocator=None",
        "wait_for=None",
        # ignored if options.no_numpy
        "out_host=None"
    ]

    from loopy.kernel.data import KernelArgument
    gen = PythonFunctionGenerator(
        "invoke_%s_loopy_kernel" % kernel.name, system_args + [
            "%s=None" % idi.name for idi in implemented_data_info
            if issubclass(idi.arg_class, KernelArgument)
        ])

    gen.add_to_preamble("from __future__ import division")
    gen.add_to_preamble("")
    gen.add_to_preamble("import pyopencl as _lpy_cl")
    gen.add_to_preamble("import pyopencl.array as _lpy_cl_array")
    gen.add_to_preamble("import pyopencl.tools as _lpy_cl_tools")
    gen.add_to_preamble("import numpy as _lpy_np")
    gen.add_to_preamble("")
    gen.add_to_preamble(host_code)
    gen.add_to_preamble("")

    gen("if allocator is None:")
    with Indentation(gen):
        gen("allocator = _lpy_cl_tools.DeferredAllocator(queue.context)")
    gen("")

    generate_integer_arg_finding_from_shapes(gen, kernel,
                                             implemented_data_info)
    generate_integer_arg_finding_from_offsets(gen, kernel,
                                              implemented_data_info)
    generate_integer_arg_finding_from_strides(gen, kernel,
                                              implemented_data_info)
    generate_value_arg_check(gen, kernel, implemented_data_info)

    args = generate_arg_setup(gen, kernel, implemented_data_info, options)

    # {{{ generate invocation

    gen("_lpy_evt = {kernel_name}({args})".format(
        kernel_name=codegen_result.host_program.name,
        args=", ".join(["_lpy_cl_kernels", "queue"] + args +
                       ["wait_for=wait_for"])))

    # }}}

    # {{{ output

    if not options.no_numpy:
        gen("if out_host is None and (_lpy_encountered_numpy "
            "and not _lpy_encountered_dev):")
        with Indentation(gen):
            gen("out_host = True")

        gen("if out_host:")
        with Indentation(gen):
            gen("pass")  # if no outputs (?!)
            for arg in implemented_data_info:
                if not issubclass(arg.arg_class, KernelArgument):
                    continue

                is_written = arg.base_name in kernel.get_written_variables()
                if is_written:
                    gen("%s = %s.get(queue=queue)" % (arg.name, arg.name))

        gen("")

    if options.return_dict:
        gen("return _lpy_evt, {%s}" %
            ", ".join("\"%s\": %s" % (arg.name, arg.name)
                      for arg in implemented_data_info
                      if issubclass(arg.arg_class, KernelArgument)
                      if arg.base_name in kernel.get_written_variables()))
    else:
        out_args = [
            arg for arg in implemented_data_info
            if issubclass(arg.arg_class, KernelArgument)
            if arg.base_name in kernel.get_written_variables()
        ]
        if out_args:
            gen("return _lpy_evt, (%s,)" % ", ".join(arg.name
                                                     for arg in out_args))
        else:
            gen("return _lpy_evt, ()")

    # }}}

    if options.write_wrapper:
        output = gen.get()
        if options.highlight_wrapper:
            output = get_highlighted_python_code(output)

        if options.write_wrapper is True:
            print(output)
        else:
            with open(options.write_wrapper, "w") as outf:
                outf.write(output)

    return gen.get_function()
Example #8
0
def generate_arg_setup(gen, kernel, implemented_data_info, options):
    import loopy as lp

    from loopy.kernel.data import KernelArgument
    from loopy.kernel.array import ArrayBase
    from loopy.symbolic import StringifyMapper
    from pymbolic import var

    gen("# {{{ set up array arguments")
    gen("")

    if not options.no_numpy:
        gen("_lpy_encountered_numpy = False")
        gen("_lpy_encountered_dev = False")
        gen("")

    args = []

    strify = StringifyMapper()

    expect_no_more_arguments = False

    for arg_idx, arg in enumerate(implemented_data_info):
        is_written = arg.base_name in kernel.get_written_variables()
        kernel_arg = kernel.impl_arg_to_arg.get(arg.name)

        if not issubclass(arg.arg_class, KernelArgument):
            expect_no_more_arguments = True
            continue

        if expect_no_more_arguments:
            raise LoopyError("Further arguments encountered after arg info "
                             "describing a global temporary variable")

        if not issubclass(arg.arg_class, ArrayBase):
            args.append(arg.name)
            continue

        gen("# {{{ process %s" % arg.name)
        gen("")

        if not options.no_numpy:
            gen("if isinstance(%s, _lpy_np.ndarray):" % arg.name)
            with Indentation(gen):
                gen("# synchronous, nothing to worry about")
                gen("%s = _lpy_cl_array.to_device("
                    "queue, %s, allocator=allocator)" % (arg.name, arg.name))
                gen("_lpy_encountered_numpy = True")
            gen("elif %s is not None:" % arg.name)
            with Indentation(gen):
                gen("_lpy_encountered_dev = True")

            gen("")

        if not options.skip_arg_checks and not is_written:
            gen("if %s is None:" % arg.name)
            with Indentation(gen):
                gen("raise RuntimeError(\"input argument '%s' must "
                    "be supplied\")" % arg.name)
                gen("")

        if (is_written and arg.arg_class is lp.ImageArg
                and not options.skip_arg_checks):
            gen("if %s is None:" % arg.name)
            with Indentation(gen):
                gen("raise RuntimeError(\"written image '%s' must "
                    "be supplied\")" % arg.name)
                gen("")

        if is_written and arg.shape is None and not options.skip_arg_checks:
            gen("if %s is None:" % arg.name)
            with Indentation(gen):
                gen("raise RuntimeError(\"written argument '%s' has "
                    "unknown shape and must be supplied\")" % arg.name)
                gen("")

        possibly_made_by_loopy = False

        # {{{ allocate written arrays, if needed

        if is_written and arg.arg_class in [lp.GlobalArg, lp.ConstantArg] \
                and arg.shape is not None:

            if not isinstance(arg.dtype, NumpyType):
                raise LoopyError("do not know how to pass arg of type '%s'" %
                                 arg.dtype)

            possibly_made_by_loopy = True
            gen("_lpy_made_by_loopy = False")
            gen("")

            gen("if %s is None:" % arg.name)
            with Indentation(gen):
                num_axes = len(arg.strides)
                for i in range(num_axes):
                    gen("_lpy_shape_%d = %s" % (i, strify(arg.unvec_shape[i])))

                itemsize = kernel_arg.dtype.numpy_dtype.itemsize
                for i in range(num_axes):
                    gen("_lpy_strides_%d = %s" %
                        (i, strify(itemsize * arg.unvec_strides[i])))

                if not options.skip_arg_checks:
                    for i in range(num_axes):
                        gen("assert _lpy_strides_%d > 0, "
                            "\"'%s' has negative stride in axis %d\"" %
                            (i, arg.name, i))

                sym_strides = tuple(
                    var("_lpy_strides_%d" % i) for i in range(num_axes))
                sym_shape = tuple(
                    var("_lpy_shape_%d" % i) for i in range(num_axes))

                alloc_size_expr = (
                    sum(astrd * (alen - 1)
                        for alen, astrd in zip(sym_shape, sym_strides)) +
                    itemsize)

                gen("_lpy_alloc_size = %s" % strify(alloc_size_expr))
                gen("%(name)s = _lpy_cl_array.Array(queue, %(shape)s, "
                    "%(dtype)s, strides=%(strides)s, "
                    "data=allocator(_lpy_alloc_size), allocator=allocator)" %
                    dict(name=arg.name,
                         shape=strify(sym_shape),
                         strides=strify(sym_strides),
                         dtype=python_dtype_str(kernel_arg.dtype.numpy_dtype)))

                if not options.skip_arg_checks:
                    for i in range(num_axes):
                        gen("del _lpy_shape_%d" % i)
                        gen("del _lpy_strides_%d" % i)
                    gen("del _lpy_alloc_size")
                    gen("")

                gen("_lpy_made_by_loopy = True")
                gen("")

        # }}}

        # {{{ argument checking

        if arg.arg_class in [lp.GlobalArg, lp.ConstantArg] \
                and not options.skip_arg_checks:
            if possibly_made_by_loopy:
                gen("if not _lpy_made_by_loopy:")
            else:
                gen("if True:")

            with Indentation(gen):
                gen("if %s.dtype != %s:" %
                    (arg.name, python_dtype_str(kernel_arg.dtype.numpy_dtype)))
                with Indentation(gen):
                    gen("raise TypeError(\"dtype mismatch on argument '%s' "
                        "(got: %%s, expected: %s)\" %% %s.dtype)" %
                        (arg.name, arg.dtype, arg.name))

                # {{{ generate shape checking code

                def strify_allowing_none(shape_axis):
                    if shape_axis is None:
                        return "None"
                    else:
                        return strify(shape_axis)

                def strify_tuple(t):
                    if len(t) == 0:
                        return "()"
                    else:
                        return "(%s,)" % ", ".join(
                            strify_allowing_none(sa) for sa in t)

                shape_mismatch_msg = (
                    "raise TypeError(\"shape mismatch on argument '%s' "
                    "(got: %%s, expected: %%s)\" "
                    "%% (%s.shape, %s))" %
                    (arg.name, arg.name, strify_tuple(arg.unvec_shape)))

                if kernel_arg.shape is None:
                    pass

                elif any(shape_axis is None
                         for shape_axis in kernel_arg.shape):
                    gen("if len(%s.shape) != %s:" %
                        (arg.name, len(arg.unvec_shape)))
                    with Indentation(gen):
                        gen(shape_mismatch_msg)

                    for i, shape_axis in enumerate(arg.unvec_shape):
                        if shape_axis is None:
                            continue

                        gen("if %s.shape[%d] != %s:" %
                            (arg.name, i, strify(shape_axis)))
                        with Indentation(gen):
                            gen(shape_mismatch_msg)

                else:  # not None, no Nones in tuple
                    gen("if %s.shape != %s:" %
                        (arg.name, strify(arg.unvec_shape)))
                    with Indentation(gen):
                        gen(shape_mismatch_msg)

                # }}}

                if arg.unvec_strides and kernel_arg.dim_tags:
                    itemsize = kernel_arg.dtype.numpy_dtype.itemsize
                    sym_strides = tuple(itemsize * s_i
                                        for s_i in arg.unvec_strides)
                    gen("if %s.strides != %s:" %
                        (arg.name, strify(sym_strides)))
                    with Indentation(gen):
                        gen("raise TypeError(\"strides mismatch on "
                            "argument '%s' (got: %%s, expected: %%s)\" "
                            "%% (%s.strides, %s))" %
                            (arg.name, arg.name, strify(sym_strides)))

                if not arg.allows_offset:
                    gen("if %s.offset:" % arg.name)
                    with Indentation(gen):
                        gen("raise ValueError(\"Argument '%s' does not "
                            "allow arrays with offsets. Try passing "
                            "default_offset=loopy.auto to make_kernel()."
                            "\")" % arg.name)
                        gen("")

        # }}}

        if possibly_made_by_loopy and not options.skip_arg_checks:
            gen("del _lpy_made_by_loopy")
            gen("")

        if arg.arg_class in [lp.GlobalArg, lp.ConstantArg]:
            args.append("%s.base_data" % arg.name)
        else:
            args.append("%s" % arg.name)

        gen("")

        gen("# }}}")
        gen("")

    gen("# }}}")
    gen("")

    return args
Example #9
0
def as_python(mesh, function_name="make_mesh"):
    """Return a snippet of Python code (as a string) that will
    recreate the mesh given as an input parameter.
    """

    from pytools.py_codegen import PythonCodeGenerator, Indentation
    cg = PythonCodeGenerator()
    cg("""
        # generated by meshmode.mesh.as_python

        import numpy as np
        from meshmode.mesh import (
            Mesh, MeshElementGroup, FacialAdjacencyGroup,
            BTAG_ALL, BTAG_REALLY_ALL)

        """)

    cg("def %s():" % function_name)
    with Indentation(cg):
        cg("vertices = " + _numpy_array_as_python(mesh.vertices))
        cg("")
        cg("groups = []")
        cg("")
        for group in mesh.groups:
            cg("import %s" % type(group).__module__)
            cg("groups.append(%s.%s(" % (
                type(group).__module__,
                type(group).__name__))
            cg("    order=%s," % group.order)
            cg("    vertex_indices=%s,"
                    % _numpy_array_as_python(group.vertex_indices))
            cg("    nodes=%s,"
                    % _numpy_array_as_python(group.nodes))
            cg("    unit_nodes=%s))"
                    % _numpy_array_as_python(group.unit_nodes))

        # {{{ facial adjacency groups

        def fagrp_params_str(fagrp):
            params = {
                    "igroup": fagrp.igroup,
                    "ineighbor_group": repr(fagrp.ineighbor_group),
                    "elements": _numpy_array_as_python(fagrp.elements),
                    "element_faces": _numpy_array_as_python(fagrp.element_faces),
                    "neighbors": _numpy_array_as_python(fagrp.neighbors),
                    "neighbor_faces": _numpy_array_as_python(fagrp.neighbor_faces),
                    }
            return ",\n    ".join("%s=%s" % (k, v) for k, v in six.iteritems(params))

        if mesh._facial_adjacency_groups:
            cg("facial_adjacency_groups = []")

            for igrp, fagrp_map in enumerate(mesh.facial_adjacency_groups):
                cg("facial_adjacency_groups.append({%s})" % ",\n    ".join(
                    "%r: FacialAdjacencyGroup(%s)" % (
                        inb_grp, fagrp_params_str(fagrp))
                    for inb_grp, fagrp in six.iteritems(fagrp_map)))

        else:
            cg("facial_adjacency_groups = %r" % mesh._facial_adjacency_groups)

        # }}}

        # {{{ boundary tags

        def strify_boundary_tag(btag):
            if isinstance(btag, type):
                return btag.__name__
            else:
                return repr(btag)

        btags_str = ", ".join(
                strify_boundary_tag(btag) for btag in mesh.boundary_tags)

        # }}}

        cg("return Mesh(vertices, groups, skip_tests=True,")
        cg("    vertex_id_dtype=np.%s," % mesh.vertex_id_dtype.name)
        cg("    element_id_dtype=np.%s," % mesh.element_id_dtype.name)

        if isinstance(mesh._nodal_adjacency, NodalAdjacency):
            el_con_str = "(%s, %s)" % (
                    _numpy_array_as_python(
                        mesh._nodal_adjacency.neighbors_starts),
                    _numpy_array_as_python(
                        mesh._nodal_adjacency.neighbors),
                    )
        else:
            el_con_str = repr(mesh._nodal_adjacency)

        cg("    nodal_adjacency=%s," % el_con_str)
        cg("    facial_adjacency_groups=facial_adjacency_groups,")
        cg("    boundary_tags=[%s]," % btags_str)
        cg("    is_conforming=%s)" % repr(mesh.is_conforming))

        # FIXME: Handle facial adjacency, boundary tags

    return cg.get()