def capture_kernel_call(kernel, filename, queue, g_size, l_size, *args, **kwargs): try: source = kernel._source except AttributeError: raise RuntimeError("cannot capture call, kernel source not available") if source is None: raise RuntimeError("cannot capture call, kernel source not available") cg = PythonCodeGenerator() cg("# generated by pyopencl.capture_call") cg("") cg("import numpy as np") cg("import pyopencl as cl") cg("from base64 import b64decode") cg("from zlib import decompress") cg("mf = cl.mem_flags") cg("") cg('CODE = r"""//CL//') for l in source.split("\n"): cg(l) cg('"""') # {{{ invocation arg_data = [] cg("") cg("") cg("def main():") with Indentation(cg): cg("ctx = cl.create_some_context()") cg("queue = cl.CommandQueue(ctx)") cg("") kernel_args = [] for i, arg in enumerate(args): if isinstance(arg, cl.Buffer): buf = bytearray(arg.size) cl.enqueue_copy(queue, buf, arg) arg_data.append(("arg%d_data" % i, buf)) cg("arg%d = cl.Buffer(ctx, " "mf.READ_WRITE | cl.mem_flags.COPY_HOST_PTR," % i) cg(" hostbuf=decompress(b64decode(arg%d_data)))" % i) kernel_args.append("arg%d" % i) elif isinstance(arg, (int, float)): kernel_args.append(repr(arg)) elif isinstance(arg, np.integer): kernel_args.append("np.%s(%s)" % (arg.dtype.type.__name__, repr(int(arg)))) elif isinstance(arg, np.floating): kernel_args.append("np.%s(%s)" % (arg.dtype.type.__name__, repr(float(arg)))) elif isinstance(arg, np.complexfloating): kernel_args.append( "np.%s(%s)" % (arg.dtype.type.__name__, repr(complex(arg)))) else: try: arg_buf = memoryview(arg) except: raise RuntimeError("cannot capture: " "unsupported arg nr %d (0-based)" % i) arg_data.append(("arg%d_data" % i, arg_buf)) kernel_args.append("decompress(b64decode(arg%d_data))" % i) cg("") g_times_l = kwargs.get("g_times_l", False) if g_times_l: dim = max(len(g_size), len(l_size)) l_size = l_size + (1, ) * (dim - len(l_size)) g_size = g_size + (1, ) * (dim - len(g_size)) g_size = tuple(gs * ls for gs, ls in zip(g_size, l_size)) global_offset = kwargs.get("global_offset", None) if global_offset is not None: kernel_args.append("global_offset=%s" % repr(global_offset)) cg("prg = cl.Program(ctx, CODE).build()") cg("knl = prg.%s" % kernel.function_name) if hasattr(kernel, "_scalar_arg_dtypes"): def strify_dtype(d): if d is None: return "None" d = np.dtype(d) s = repr(d) if s.startswith("dtype"): s = "np." + s return s cg("knl.set_scalar_arg_dtypes((%s,))" % ", ".join(strify_dtype(dt) for dt in kernel._scalar_arg_dtypes)) cg("knl(queue, %s, %s," % (repr(g_size), repr(l_size))) cg(" %s)" % ", ".join(kernel_args)) cg("") cg("queue.finish()") # }}} # {{{ data from zlib import compress from base64 import b64encode cg("") line_len = 70 for name, val in arg_data: cg("%s = (" % name) with Indentation(cg): val = str(b64encode(compress(memoryview(val)))) i = 0 while i < len(val): cg(repr(val[i:i + line_len])) i += line_len cg(")") # }}} # {{{ file trailer cg("") cg("if __name__ == \"__main__\":") with Indentation(cg): cg("main()") cg("") cg("# vim: filetype=pyopencl") # }}} with open(filename, "w") as outf: outf.write(cg.get())
def generate_integer_arg_finding_from_shapes(self, gen, kernel, implemented_data_info): # a mapping from integer argument names to a list of tuples # (arg_name, expression), where expression is a # unary function of kernel.arg_dict[arg_name] # returning the desired integer argument. iarg_to_sources = {} from loopy.kernel.data import GlobalArg from loopy.symbolic import DependencyMapper, StringifyMapper from loopy.diagnostic import ParameterFinderWarning dep_map = DependencyMapper() from pymbolic import var for arg in implemented_data_info: if arg.arg_class is GlobalArg: sym_shape = var(arg.name).attr("shape") for axis_nr, shape_i in enumerate(arg.shape): if shape_i is None: continue deps = dep_map(shape_i) if len(deps) == 1: integer_arg_var, = deps if kernel.arg_dict[ integer_arg_var.name].dtype.is_integral(): from pymbolic.algorithm import solve_affine_equations_for try: # friggin' overkill :) iarg_expr = solve_affine_equations_for( [integer_arg_var.name], [ (shape_i, sym_shape.index(axis_nr)) ])[integer_arg_var] except Exception as e: #from traceback import print_exc #print_exc() # went wrong? oh well from warnings import warn warn( "Unable to generate code to automatically " "find '%s' from the shape of '%s':\n%s" % (integer_arg_var.name, arg.name, str(e)), ParameterFinderWarning) else: iarg_to_sources.setdefault(integer_arg_var.name, [])\ .append((arg.name, iarg_expr)) gen("# {{{ find integer arguments from shapes") gen("") for iarg_name, sources in six.iteritems(iarg_to_sources): gen("if %s is None:" % iarg_name) with Indentation(gen): if_stmt = "if" for arg_name, value_expr in sources: gen("%s %s is not None:" % (if_stmt, arg_name)) with Indentation(gen): gen("%s = %s" % (iarg_name, StringifyMapper()(value_expr))) if_stmt = "elif" gen("") gen("# }}}") gen("")
def _generate_enqueue_and_set_args_module(function_name, num_passed_args, num_cl_args, arg_types, include_debug_code, work_around_arg_count_bug, warn_about_arg_count_bug): arg_names = ["arg%d" % i for i in range(num_passed_args)] def gen_arg_setting(in_enqueue): if arg_types is None: result = generate_generic_arg_handling_body(num_passed_args) if in_enqueue: return result, [] else: return result else: return generate_specific_arg_handling_body( function_name, num_cl_args, arg_types, warn_about_arg_count_bug=warn_about_arg_count_bug, work_around_arg_count_bug=work_around_arg_count_bug, in_enqueue=in_enqueue, include_debug_code=include_debug_code) gen = PythonCodeGenerator() gen("from struct import pack") gen("from pyopencl import status_code") gen("import numpy as np") gen("import pyopencl._cl as _cl") gen("") # {{{ generate _enqueue enqueue_name = "enqueue_knl_%s" % function_name gen("def %s(%s):" % (enqueue_name, ", ".join(["self", "queue", "global_size", "local_size"] + arg_names + [ "global_offset=None", "g_times_l=None", "allow_empty_ndrange=False", "wait_for=None" ]))) with Indentation(gen): subgen, wait_for_parts = gen_arg_setting(in_enqueue=True) gen.extend(subgen) if wait_for_parts: wait_for_expr = ("[*(() if wait_for is None else wait_for), " + ", ".join("*" + wfp for wfp in wait_for_parts) + "]") else: wait_for_expr = "wait_for" # Using positional args here because pybind is slow with keyword args gen(f""" return _cl.enqueue_nd_range_kernel(queue, self, global_size, local_size, global_offset, {wait_for_expr}, g_times_l, allow_empty_ndrange) """) # }}} # {{{ generate set_args gen("") gen("def set_args(%s):" % (", ".join(["self"] + arg_names))) with Indentation(gen): gen.extend(gen_arg_setting(in_enqueue=False)) # }}} return (gen.get_picklable_module( name=f"<pyopencl invoker for '{function_name}'>"), enqueue_name)
def generate_specific_arg_handling_body(function_name, num_cl_args, arg_types, *, work_around_arg_count_bug, warn_about_arg_count_bug, in_enqueue, include_debug_code): assert work_around_arg_count_bug is not None assert warn_about_arg_count_bug is not None fp_arg_count = 0 cl_arg_idx = 0 gen = PythonCodeGenerator() if not arg_types: gen("pass") gen_indices_and_args = [] buf_indices_and_args = [] buf_pack_indices_and_args = [] def add_buf_arg(arg_idx, typechar, expr_str): if typechar in BUF_PACK_TYPECHARS: buf_pack_indices_and_args.append(arg_idx) buf_pack_indices_and_args.append(repr(typechar.encode())) buf_pack_indices_and_args.append(expr_str) else: buf_indices_and_args.append(arg_idx) buf_indices_and_args.append(f"pack('{typechar}', {expr_str})") wait_for_parts = [] for arg_idx, arg_type in enumerate(arg_types): arg_var = "arg%d" % arg_idx if arg_type is None: gen_indices_and_args.append(cl_arg_idx) gen_indices_and_args.append(arg_var) cl_arg_idx += 1 gen("") continue elif isinstance(arg_type, VectorArg): if include_debug_code: gen(f"if not {arg_var}.flags.forc:") with Indentation(gen): gen("raise RuntimeError('only contiguous arrays may '") gen(" 'be used as arguments to this operation')") gen("") if in_enqueue and include_debug_code: gen(f"assert {arg_var}.queue is None or {arg_var}.queue == queue, " "'queues for all arrays must match the queue supplied " "to enqueue'") gen_indices_and_args.append(cl_arg_idx) gen_indices_and_args.append(f"{arg_var}.base_data") cl_arg_idx += 1 if arg_type.with_offset: add_buf_arg(cl_arg_idx, np.dtype(np.int64).char, f"{arg_var}.offset") cl_arg_idx += 1 if in_enqueue: wait_for_parts.append(f"{arg_var}.events") continue arg_dtype = np.dtype(arg_type) if arg_dtype.char == "V": buf_indices_and_args.append(cl_arg_idx) buf_indices_and_args.append(arg_var) cl_arg_idx += 1 elif arg_dtype.kind == "c": if warn_about_arg_count_bug: warn("{knl_name}: arguments include complex numbers, and " "some (but not all) of the target devices mishandle " "struct kernel arguments (hence the workaround is " "disabled".format(knl_name=function_name), stacklevel=2) if arg_dtype == np.complex64: arg_char = "f" elif arg_dtype == np.complex128: arg_char = "d" else: raise TypeError("unexpected complex type: %s" % arg_dtype) if (work_around_arg_count_bug == "pocl" and arg_dtype == np.complex128 and fp_arg_count + 2 <= 8): add_buf_arg(cl_arg_idx, arg_char, f"{arg_var}.real") cl_arg_idx += 1 add_buf_arg(cl_arg_idx, arg_char, f"{arg_var}.imag") cl_arg_idx += 1 elif (work_around_arg_count_bug == "apple" and arg_dtype == np.complex128 and fp_arg_count + 2 <= 8): raise NotImplementedError( "No work-around to " "Apple's broken structs-as-kernel arg " "handling has been found. " "Cannot pass complex numbers to kernels.") else: buf_indices_and_args.append(cl_arg_idx) buf_indices_and_args.append( f"pack('{arg_char}{arg_char}', {arg_var}.real, {arg_var}.imag)" ) cl_arg_idx += 1 fp_arg_count += 2 else: if arg_dtype.kind == "f": fp_arg_count += 1 arg_char = arg_dtype.char arg_char = _type_char_map.get(arg_char, arg_char) add_buf_arg(cl_arg_idx, arg_char, arg_var) cl_arg_idx += 1 gen("") for arg_kind, args_and_indices, entry_length in [ ("", gen_indices_and_args, 2), ("_buf", buf_indices_and_args, 2), ("_buf_pack", buf_pack_indices_and_args, 3), ]: assert len(args_and_indices) % entry_length == 0 if args_and_indices: gen(f"self._set_arg{arg_kind}_multi(" f"({', '.join(str(i) for i in args_and_indices)},), " ")") if cl_arg_idx != num_cl_args: raise TypeError("length of argument list (%d) and " "CL-generated number of arguments (%d) do not agree" % (cl_arg_idx, num_cl_args)) if in_enqueue: return gen, wait_for_parts else: return gen
def generate_invocation(self, gen, kernel_name, args, kernel, implemented_data_info): gen("for knl in _lpy_c_kernels:") with Indentation(gen): gen('knl({args})'.format(args=", ".join(args)))
def generate_arg_setup(self, gen, kernel, implemented_data_info, options): import loopy as lp from loopy.kernel.data import KernelArgument from loopy.kernel.array import ArrayBase from loopy.symbolic import StringifyMapper from loopy.types import NumpyType gen("# {{{ set up array arguments") gen("") if not options.no_numpy: gen("_lpy_encountered_numpy = False") gen("_lpy_encountered_dev = False") gen("") args = [] strify = StringifyMapper() expect_no_more_arguments = False for arg_idx, arg in enumerate(implemented_data_info): is_written = arg.base_name in kernel.get_written_variables() kernel_arg = kernel.impl_arg_to_arg.get(arg.name) if not issubclass(arg.arg_class, KernelArgument): expect_no_more_arguments = True continue if expect_no_more_arguments: raise LoopyError( "Further arguments encountered after arg info " "describing a global temporary variable") if not issubclass(arg.arg_class, ArrayBase): args.append(arg.name) continue gen("# {{{ process %s" % arg.name) gen("") if not options.no_numpy: self.handle_non_numpy_arg(gen, arg) if not options.skip_arg_checks and not is_written: gen("if %s is None:" % arg.name) with Indentation(gen): gen("raise RuntimeError(\"input argument '%s' must " "be supplied\")" % arg.name) gen("") if (is_written and arg.arg_class is lp.ImageArg and not options.skip_arg_checks): gen("if %s is None:" % arg.name) with Indentation(gen): gen("raise RuntimeError(\"written image '%s' must " "be supplied\")" % arg.name) gen("") if is_written and arg.shape is None and not options.skip_arg_checks: gen("if %s is None:" % arg.name) with Indentation(gen): gen("raise RuntimeError(\"written argument '%s' has " "unknown shape and must be supplied\")" % arg.name) gen("") possibly_made_by_loopy = False # {{{ allocate written arrays, if needed if is_written and arg.arg_class in [lp.ArrayArg, lp.ConstantArg] \ and arg.shape is not None \ and all(si is not None for si in arg.shape): if not isinstance(arg.dtype, NumpyType): raise LoopyError( "do not know how to pass arg of type '%s'" % arg.dtype) possibly_made_by_loopy = True gen("_lpy_made_by_loopy = False") gen("") gen("if %s is None:" % arg.name) with Indentation(gen): self.handle_alloc(gen, arg, kernel_arg, strify, options.skip_arg_checks) gen("_lpy_made_by_loopy = True") gen("") # }}} # {{{ argument checking if arg.arg_class in [lp.ArrayArg, lp.ConstantArg] \ and not options.skip_arg_checks: if possibly_made_by_loopy: gen("if not _lpy_made_by_loopy:") else: gen("if True:") with Indentation(gen): gen("if %s.dtype != %s:" % (arg.name, self.python_dtype_str(kernel_arg.dtype.numpy_dtype))) with Indentation(gen): gen("raise TypeError(\"dtype mismatch on argument '%s' " "(got: %%s, expected: %s)\" %% %s.dtype)" % (arg.name, arg.dtype, arg.name)) # {{{ generate shape checking code def strify_allowing_none(shape_axis): if shape_axis is None: return "None" else: return strify(shape_axis) def strify_tuple(t): if len(t) == 0: return "()" else: return "(%s,)" % ", ".join( strify_allowing_none(sa) for sa in t) shape_mismatch_msg = ( "raise TypeError(\"shape mismatch on argument '%s' " "(got: %%s, expected: %%s)\" " "%% (%s.shape, %s))" % (arg.name, arg.name, strify_tuple(arg.unvec_shape))) if kernel_arg.shape is None: pass elif any(shape_axis is None for shape_axis in kernel_arg.shape): gen("if len(%s.shape) != %s:" % (arg.name, len(arg.unvec_shape))) with Indentation(gen): gen(shape_mismatch_msg) for i, shape_axis in enumerate(arg.unvec_shape): if shape_axis is None: continue gen("if %s.shape[%d] != %s:" % (arg.name, i, strify(shape_axis))) with Indentation(gen): gen(shape_mismatch_msg) else: # not None, no Nones in tuple gen("if %s.shape != %s:" % (arg.name, strify(arg.unvec_shape))) with Indentation(gen): gen(shape_mismatch_msg) # }}} if arg.unvec_strides and kernel_arg.dim_tags: itemsize = kernel_arg.dtype.numpy_dtype.itemsize sym_strides = tuple(itemsize * s_i for s_i in arg.unvec_strides) ndim = len(arg.unvec_shape) shape = ["_lpy_shape_%d" % i for i in range(ndim)] strides = ["_lpy_stride_%d" % i for i in range(ndim)] gen("(%s,) = %s.shape" % (", ".join(shape), arg.name)) gen("(%s,) = %s.strides" % (", ".join(strides), arg.name)) gen("if not %s:" % self.get_strides_check_expr( shape, strides, (strify(s) for s in sym_strides))) with Indentation(gen): gen("_lpy_got = tuple(stride " "for (dim, stride) in zip(%s.shape, %s.strides) " "if dim > 1)" % (arg.name, arg.name)) gen("_lpy_expected = tuple(stride " "for (dim, stride) in zip(%s.shape, %s) " "if dim > 1)" % (arg.name, strify_tuple(sym_strides))) gen("raise TypeError(\"strides mismatch on " "argument '%s' " "(after removing unit length dims, " "got: %%s, expected: %%s)\" " "%% (_lpy_got, _lpy_expected))" % arg.name) if not arg.allows_offset: gen("if hasattr(%s, 'offset') and %s.offset:" % (arg.name, arg.name)) with Indentation(gen): gen("raise ValueError(\"Argument '%s' does not " "allow arrays with offsets. Try passing " "default_offset=loopy.auto to make_kernel()." "\")" % arg.name) gen("") # }}} if possibly_made_by_loopy and not options.skip_arg_checks: gen("del _lpy_made_by_loopy") gen("") if arg.arg_class in [lp.ArrayArg, lp.ConstantArg]: args.append(self.get_arg_pass(arg)) else: args.append("%s" % arg.name) gen("") gen("# }}}") gen("") gen("# }}}") gen("") return args
def generate_invoker(kernel, codegen_result): options = kernel.options implemented_data_info = codegen_result.implemented_data_info host_code = codegen_result.host_code() system_args = [ "_lpy_cl_kernels", "queue", "allocator=None", "wait_for=None", # ignored if options.no_numpy "out_host=None" ] from loopy.kernel.data import KernelArgument gen = PythonFunctionGenerator( "invoke_%s_loopy_kernel" % kernel.name, system_args + [ "%s=None" % idi.name for idi in implemented_data_info if issubclass(idi.arg_class, KernelArgument) ]) gen.add_to_preamble("from __future__ import division") gen.add_to_preamble("") gen.add_to_preamble("import pyopencl as _lpy_cl") gen.add_to_preamble("import pyopencl.array as _lpy_cl_array") gen.add_to_preamble("import pyopencl.tools as _lpy_cl_tools") gen.add_to_preamble("import numpy as _lpy_np") gen.add_to_preamble("") gen.add_to_preamble(host_code) gen.add_to_preamble("") gen("if allocator is None:") with Indentation(gen): gen("allocator = _lpy_cl_tools.DeferredAllocator(queue.context)") gen("") generate_integer_arg_finding_from_shapes(gen, kernel, implemented_data_info) generate_integer_arg_finding_from_offsets(gen, kernel, implemented_data_info) generate_integer_arg_finding_from_strides(gen, kernel, implemented_data_info) generate_value_arg_check(gen, kernel, implemented_data_info) args = generate_arg_setup(gen, kernel, implemented_data_info, options) # {{{ generate invocation gen("_lpy_evt = {kernel_name}({args})".format( kernel_name=codegen_result.host_program.name, args=", ".join(["_lpy_cl_kernels", "queue"] + args + ["wait_for=wait_for"]))) # }}} # {{{ output if not options.no_numpy: gen("if out_host is None and (_lpy_encountered_numpy " "and not _lpy_encountered_dev):") with Indentation(gen): gen("out_host = True") gen("if out_host:") with Indentation(gen): gen("pass") # if no outputs (?!) for arg in implemented_data_info: if not issubclass(arg.arg_class, KernelArgument): continue is_written = arg.base_name in kernel.get_written_variables() if is_written: gen("%s = %s.get(queue=queue)" % (arg.name, arg.name)) gen("") if options.return_dict: gen("return _lpy_evt, {%s}" % ", ".join("\"%s\": %s" % (arg.name, arg.name) for arg in implemented_data_info if issubclass(arg.arg_class, KernelArgument) if arg.base_name in kernel.get_written_variables())) else: out_args = [ arg for arg in implemented_data_info if issubclass(arg.arg_class, KernelArgument) if arg.base_name in kernel.get_written_variables() ] if out_args: gen("return _lpy_evt, (%s,)" % ", ".join(arg.name for arg in out_args)) else: gen("return _lpy_evt, ()") # }}} if options.write_wrapper: output = gen.get() if options.highlight_wrapper: output = get_highlighted_python_code(output) if options.write_wrapper is True: print(output) else: with open(options.write_wrapper, "w") as outf: outf.write(output) return gen.get_function()
def generate_arg_setup(gen, kernel, implemented_data_info, options): import loopy as lp from loopy.kernel.data import KernelArgument from loopy.kernel.array import ArrayBase from loopy.symbolic import StringifyMapper from pymbolic import var gen("# {{{ set up array arguments") gen("") if not options.no_numpy: gen("_lpy_encountered_numpy = False") gen("_lpy_encountered_dev = False") gen("") args = [] strify = StringifyMapper() expect_no_more_arguments = False for arg_idx, arg in enumerate(implemented_data_info): is_written = arg.base_name in kernel.get_written_variables() kernel_arg = kernel.impl_arg_to_arg.get(arg.name) if not issubclass(arg.arg_class, KernelArgument): expect_no_more_arguments = True continue if expect_no_more_arguments: raise LoopyError("Further arguments encountered after arg info " "describing a global temporary variable") if not issubclass(arg.arg_class, ArrayBase): args.append(arg.name) continue gen("# {{{ process %s" % arg.name) gen("") if not options.no_numpy: gen("if isinstance(%s, _lpy_np.ndarray):" % arg.name) with Indentation(gen): gen("# synchronous, nothing to worry about") gen("%s = _lpy_cl_array.to_device(" "queue, %s, allocator=allocator)" % (arg.name, arg.name)) gen("_lpy_encountered_numpy = True") gen("elif %s is not None:" % arg.name) with Indentation(gen): gen("_lpy_encountered_dev = True") gen("") if not options.skip_arg_checks and not is_written: gen("if %s is None:" % arg.name) with Indentation(gen): gen("raise RuntimeError(\"input argument '%s' must " "be supplied\")" % arg.name) gen("") if (is_written and arg.arg_class is lp.ImageArg and not options.skip_arg_checks): gen("if %s is None:" % arg.name) with Indentation(gen): gen("raise RuntimeError(\"written image '%s' must " "be supplied\")" % arg.name) gen("") if is_written and arg.shape is None and not options.skip_arg_checks: gen("if %s is None:" % arg.name) with Indentation(gen): gen("raise RuntimeError(\"written argument '%s' has " "unknown shape and must be supplied\")" % arg.name) gen("") possibly_made_by_loopy = False # {{{ allocate written arrays, if needed if is_written and arg.arg_class in [lp.GlobalArg, lp.ConstantArg] \ and arg.shape is not None: if not isinstance(arg.dtype, NumpyType): raise LoopyError("do not know how to pass arg of type '%s'" % arg.dtype) possibly_made_by_loopy = True gen("_lpy_made_by_loopy = False") gen("") gen("if %s is None:" % arg.name) with Indentation(gen): num_axes = len(arg.strides) for i in range(num_axes): gen("_lpy_shape_%d = %s" % (i, strify(arg.unvec_shape[i]))) itemsize = kernel_arg.dtype.numpy_dtype.itemsize for i in range(num_axes): gen("_lpy_strides_%d = %s" % (i, strify(itemsize * arg.unvec_strides[i]))) if not options.skip_arg_checks: for i in range(num_axes): gen("assert _lpy_strides_%d > 0, " "\"'%s' has negative stride in axis %d\"" % (i, arg.name, i)) sym_strides = tuple( var("_lpy_strides_%d" % i) for i in range(num_axes)) sym_shape = tuple( var("_lpy_shape_%d" % i) for i in range(num_axes)) alloc_size_expr = ( sum(astrd * (alen - 1) for alen, astrd in zip(sym_shape, sym_strides)) + itemsize) gen("_lpy_alloc_size = %s" % strify(alloc_size_expr)) gen("%(name)s = _lpy_cl_array.Array(queue, %(shape)s, " "%(dtype)s, strides=%(strides)s, " "data=allocator(_lpy_alloc_size), allocator=allocator)" % dict(name=arg.name, shape=strify(sym_shape), strides=strify(sym_strides), dtype=python_dtype_str(kernel_arg.dtype.numpy_dtype))) if not options.skip_arg_checks: for i in range(num_axes): gen("del _lpy_shape_%d" % i) gen("del _lpy_strides_%d" % i) gen("del _lpy_alloc_size") gen("") gen("_lpy_made_by_loopy = True") gen("") # }}} # {{{ argument checking if arg.arg_class in [lp.GlobalArg, lp.ConstantArg] \ and not options.skip_arg_checks: if possibly_made_by_loopy: gen("if not _lpy_made_by_loopy:") else: gen("if True:") with Indentation(gen): gen("if %s.dtype != %s:" % (arg.name, python_dtype_str(kernel_arg.dtype.numpy_dtype))) with Indentation(gen): gen("raise TypeError(\"dtype mismatch on argument '%s' " "(got: %%s, expected: %s)\" %% %s.dtype)" % (arg.name, arg.dtype, arg.name)) # {{{ generate shape checking code def strify_allowing_none(shape_axis): if shape_axis is None: return "None" else: return strify(shape_axis) def strify_tuple(t): if len(t) == 0: return "()" else: return "(%s,)" % ", ".join( strify_allowing_none(sa) for sa in t) shape_mismatch_msg = ( "raise TypeError(\"shape mismatch on argument '%s' " "(got: %%s, expected: %%s)\" " "%% (%s.shape, %s))" % (arg.name, arg.name, strify_tuple(arg.unvec_shape))) if kernel_arg.shape is None: pass elif any(shape_axis is None for shape_axis in kernel_arg.shape): gen("if len(%s.shape) != %s:" % (arg.name, len(arg.unvec_shape))) with Indentation(gen): gen(shape_mismatch_msg) for i, shape_axis in enumerate(arg.unvec_shape): if shape_axis is None: continue gen("if %s.shape[%d] != %s:" % (arg.name, i, strify(shape_axis))) with Indentation(gen): gen(shape_mismatch_msg) else: # not None, no Nones in tuple gen("if %s.shape != %s:" % (arg.name, strify(arg.unvec_shape))) with Indentation(gen): gen(shape_mismatch_msg) # }}} if arg.unvec_strides and kernel_arg.dim_tags: itemsize = kernel_arg.dtype.numpy_dtype.itemsize sym_strides = tuple(itemsize * s_i for s_i in arg.unvec_strides) gen("if %s.strides != %s:" % (arg.name, strify(sym_strides))) with Indentation(gen): gen("raise TypeError(\"strides mismatch on " "argument '%s' (got: %%s, expected: %%s)\" " "%% (%s.strides, %s))" % (arg.name, arg.name, strify(sym_strides))) if not arg.allows_offset: gen("if %s.offset:" % arg.name) with Indentation(gen): gen("raise ValueError(\"Argument '%s' does not " "allow arrays with offsets. Try passing " "default_offset=loopy.auto to make_kernel()." "\")" % arg.name) gen("") # }}} if possibly_made_by_loopy and not options.skip_arg_checks: gen("del _lpy_made_by_loopy") gen("") if arg.arg_class in [lp.GlobalArg, lp.ConstantArg]: args.append("%s.base_data" % arg.name) else: args.append("%s" % arg.name) gen("") gen("# }}}") gen("") gen("# }}}") gen("") return args
def as_python(mesh, function_name="make_mesh"): """Return a snippet of Python code (as a string) that will recreate the mesh given as an input parameter. """ from pytools.py_codegen import PythonCodeGenerator, Indentation cg = PythonCodeGenerator() cg(""" # generated by meshmode.mesh.as_python import numpy as np from meshmode.mesh import ( Mesh, MeshElementGroup, FacialAdjacencyGroup, BTAG_ALL, BTAG_REALLY_ALL) """) cg("def %s():" % function_name) with Indentation(cg): cg("vertices = " + _numpy_array_as_python(mesh.vertices)) cg("") cg("groups = []") cg("") for group in mesh.groups: cg("import %s" % type(group).__module__) cg("groups.append(%s.%s(" % ( type(group).__module__, type(group).__name__)) cg(" order=%s," % group.order) cg(" vertex_indices=%s," % _numpy_array_as_python(group.vertex_indices)) cg(" nodes=%s," % _numpy_array_as_python(group.nodes)) cg(" unit_nodes=%s))" % _numpy_array_as_python(group.unit_nodes)) # {{{ facial adjacency groups def fagrp_params_str(fagrp): params = { "igroup": fagrp.igroup, "ineighbor_group": repr(fagrp.ineighbor_group), "elements": _numpy_array_as_python(fagrp.elements), "element_faces": _numpy_array_as_python(fagrp.element_faces), "neighbors": _numpy_array_as_python(fagrp.neighbors), "neighbor_faces": _numpy_array_as_python(fagrp.neighbor_faces), } return ",\n ".join("%s=%s" % (k, v) for k, v in six.iteritems(params)) if mesh._facial_adjacency_groups: cg("facial_adjacency_groups = []") for igrp, fagrp_map in enumerate(mesh.facial_adjacency_groups): cg("facial_adjacency_groups.append({%s})" % ",\n ".join( "%r: FacialAdjacencyGroup(%s)" % ( inb_grp, fagrp_params_str(fagrp)) for inb_grp, fagrp in six.iteritems(fagrp_map))) else: cg("facial_adjacency_groups = %r" % mesh._facial_adjacency_groups) # }}} # {{{ boundary tags def strify_boundary_tag(btag): if isinstance(btag, type): return btag.__name__ else: return repr(btag) btags_str = ", ".join( strify_boundary_tag(btag) for btag in mesh.boundary_tags) # }}} cg("return Mesh(vertices, groups, skip_tests=True,") cg(" vertex_id_dtype=np.%s," % mesh.vertex_id_dtype.name) cg(" element_id_dtype=np.%s," % mesh.element_id_dtype.name) if isinstance(mesh._nodal_adjacency, NodalAdjacency): el_con_str = "(%s, %s)" % ( _numpy_array_as_python( mesh._nodal_adjacency.neighbors_starts), _numpy_array_as_python( mesh._nodal_adjacency.neighbors), ) else: el_con_str = repr(mesh._nodal_adjacency) cg(" nodal_adjacency=%s," % el_con_str) cg(" facial_adjacency_groups=facial_adjacency_groups,") cg(" boundary_tags=[%s]," % btags_str) cg(" is_conforming=%s)" % repr(mesh.is_conforming)) # FIXME: Handle facial adjacency, boundary tags return cg.get()