def get_function_definition(self, codegen_state, codegen_result, schedule_index, function_decl, function_body): from loopy.kernel.data import TemporaryVariable args = (["_lpy_cl_kernels", "queue"] + [ idi.name for idi in codegen_state.implemented_data_info if not issubclass(idi.arg_class, TemporaryVariable) ] + ["wait_for=None", "allocator=None"]) from genpy import (For, Function, Suite, Import, ImportAs, Return, FromImport, If, Assign, Line, Statement as S) return Function( codegen_result.current_program(codegen_state).name, args, Suite([ FromImport("struct", ["pack as _lpy_pack"]), ImportAs("pyopencl", "_lpy_cl"), Import("pyopencl.tools"), Line(), If( "allocator is None", Assign("allocator", "_lpy_cl_tools.DeferredAllocator(queue.context)")), Line(), ] + [ Line(), function_body, Line(), ] + [ For( "_tv", "_global_temporaries", # free global temporaries S("_tv.release()")) ] + [ Line(), Return("_lpy_evt"), ]))
def generate_value_arg_setup(kernel, devices, implemented_data_info): options = kernel.options import loopy as lp from loopy.kernel.array import ArrayBase # {{{ arg counting bug handling # For example: # https://github.com/pocl/pocl/issues/197 # (but Apple CPU has a similar bug) work_around_arg_count_bug = False warn_about_arg_count_bug = False try: from pyopencl.characterize import has_struct_arg_count_bug except ImportError: count_bug_per_dev = [False]*len(devices) else: count_bug_per_dev = [ has_struct_arg_count_bug(dev) if dev is not None else False for dev in devices] if any(dev is None for dev in devices): warn("{knl_name}: device not supplied to PyOpenCLTarget--" "workarounds for broken OpenCL implementations " "(such as those relating to complex numbers) " "may not be enabled when needed" .format(knl_name=kernel.name)) if any(count_bug_per_dev): if all(count_bug_per_dev): work_around_arg_count_bug = True else: warn_about_arg_count_bug = True # }}} cl_arg_idx = 0 arg_idx_to_cl_arg_idx = {} fp_arg_count = 0 from genpy import ( Comment, Line, If, Raise, Assign, Statement as S, Suite) result = [] gen = result.append for arg_idx, idi in enumerate(implemented_data_info): arg_idx_to_cl_arg_idx[arg_idx] = cl_arg_idx if not issubclass(idi.arg_class, lp.ValueArg): assert issubclass(idi.arg_class, ArrayBase) # assume each of those generates exactly one... cl_arg_idx += 1 continue gen(Comment("{{{ process %s" % idi.name)) gen(Line()) if not options.skip_arg_checks: gen(If("%s is None" % idi.name, Raise('RuntimeError("input argument \'{name}\' ' 'must be supplied")'.format(name=idi.name)))) if idi.dtype.is_integral(): gen(Comment("cast to Python int to avoid trouble " "with struct packing or Boost.Python")) if sys.version_info < (3,): py_type = "long" else: py_type = "int" gen(Assign(idi.name, "%s(%s)" % (py_type, idi.name))) gen(Line()) if idi.dtype.is_composite(): gen(S("_lpy_knl.set_arg(%d, %s)" % (cl_arg_idx, idi.name))) cl_arg_idx += 1 elif idi.dtype.is_complex(): assert isinstance(idi.dtype, NumpyType) dtype = idi.dtype if warn_about_arg_count_bug: warn("{knl_name}: arguments include complex numbers, and " "some (but not all) of the target devices mishandle " "struct kernel arguments (hence the workaround is " "disabled".format( knl_name=kernel.name)) if dtype.numpy_dtype == np.complex64: arg_char = "f" elif dtype.numpy_dtype == np.complex128: arg_char = "d" else: raise TypeError("unexpected complex type: %s" % dtype) if (work_around_arg_count_bug and dtype.numpy_dtype == np.complex128 and fp_arg_count + 2 <= 8): gen(Assign( "_lpy_buf", "_lpy_pack('{arg_char}', {arg_var}.real)" .format(arg_char=arg_char, arg_var=idi.name))) gen(S( "_lpy_knl.set_arg({cl_arg_idx}, _lpy_buf)" .format(cl_arg_idx=cl_arg_idx))) cl_arg_idx += 1 gen(Assign( "_lpy_buf", "_lpy_pack('{arg_char}', {arg_var}.imag)" .format(arg_char=arg_char, arg_var=idi.name))) gen(S( "_lpy_knl.set_arg({cl_arg_idx}, _lpy_buf)" .format(cl_arg_idx=cl_arg_idx))) cl_arg_idx += 1 else: gen(Assign( "_lpy_buf", "_lpy_pack('{arg_char}{arg_char}', " "{arg_var}.real, {arg_var}.imag)" .format(arg_char=arg_char, arg_var=idi.name))) gen(S( "_lpy_knl.set_arg({cl_arg_idx}, _lpy_buf)" .format(cl_arg_idx=cl_arg_idx))) cl_arg_idx += 1 fp_arg_count += 2 elif isinstance(idi.dtype, NumpyType): if idi.dtype.dtype.kind == "f": fp_arg_count += 1 gen(S( "_lpy_knl.set_arg(%d, _lpy_pack('%s', %s))" % (cl_arg_idx, idi.dtype.dtype.char, idi.name))) cl_arg_idx += 1 else: raise LoopyError("do not know how to pass argument of type '%s'" % idi.dtype) gen(Line()) gen(Comment("}}}")) gen(Line()) return Suite(result), arg_idx_to_cl_arg_idx, cl_arg_idx
def emit_if(self, condition_str, ast): from genpy import If return If(condition_str, ast)
def generate_value_arg_setup(kernel, implemented_data_info): options = kernel.options import loopy as lp from loopy.kernel.array import ArrayBase cl_arg_idx = 0 arg_idx_to_cl_arg_idx = {} fp_arg_count = 0 from genpy import If, Raise, Statement as S, Suite result = [] gen = result.append buf_indices_and_args = [] buf_pack_indices_and_args = [] from pyopencl.invoker import BUF_PACK_TYPECHARS def add_buf_arg(arg_idx, typechar, expr_str): if typechar in BUF_PACK_TYPECHARS: buf_pack_indices_and_args.append(arg_idx) buf_pack_indices_and_args.append(repr(typechar.encode())) buf_pack_indices_and_args.append(expr_str) else: buf_indices_and_args.append(arg_idx) buf_indices_and_args.append(f"pack('{typechar}', {expr_str})") for arg_idx, idi in enumerate(implemented_data_info): arg_idx_to_cl_arg_idx[arg_idx] = cl_arg_idx if not issubclass(idi.arg_class, lp.ValueArg): assert issubclass(idi.arg_class, ArrayBase) # assume each of those generates exactly one... cl_arg_idx += 1 continue if not options.skip_arg_checks: gen( If( "%s is None" % idi.name, Raise('RuntimeError("input argument \'{name}\' ' 'must be supplied")'.format(name=idi.name)))) if idi.dtype.is_composite(): buf_indices_and_args.append(cl_arg_idx) buf_indices_and_args.append(f"{idi.name}") cl_arg_idx += 1 elif idi.dtype.is_complex(): assert isinstance(idi.dtype, NumpyType) dtype = idi.dtype if dtype.numpy_dtype == np.complex64: arg_char = "f" elif dtype.numpy_dtype == np.complex128: arg_char = "d" else: raise TypeError("unexpected complex type: %s" % dtype) buf_indices_and_args.append(cl_arg_idx) buf_indices_and_args.append(f"_lpy_pack('{arg_char}{arg_char}', " f"{idi.name}.real, {idi.name}.imag)") cl_arg_idx += 1 fp_arg_count += 2 elif isinstance(idi.dtype, NumpyType): if idi.dtype.dtype.kind == "f": fp_arg_count += 1 add_buf_arg(cl_arg_idx, idi.dtype.dtype.char, idi.name) cl_arg_idx += 1 else: raise LoopyError("do not know how to pass argument of type '%s'" % idi.dtype) for arg_kind, args_and_indices, entry_length in [ ("_buf", buf_indices_and_args, 2), ("_buf_pack", buf_pack_indices_and_args, 3), ]: assert len(args_and_indices) % entry_length == 0 if args_and_indices: gen( S(f"_lpy_knl._set_arg{arg_kind}_multi(" f"({', '.join(str(i) for i in args_and_indices)},), " ")")) return Suite(result), arg_idx_to_cl_arg_idx, cl_arg_idx
def generate_value_arg_setup(kernel, devices, implemented_data_info): options = kernel.options import loopy as lp from loopy.kernel.array import ArrayBase # {{{ arg counting bug handling # For example: # https://github.com/pocl/pocl/issues/197 # (but Apple CPU has a similar bug) work_around_arg_count_bug = False warn_about_arg_count_bug = False try: from pyopencl.characterize import has_struct_arg_count_bug except ImportError: count_bug_per_dev = [False] * len(devices) else: count_bug_per_dev = [ has_struct_arg_count_bug(dev) if dev is not None else False for dev in devices ] if any(dev is None for dev in devices): warn("{knl_name}: device not supplied to PyOpenCLTarget--" "workarounds for broken OpenCL implementations " "(such as those relating to complex numbers) " "may not be enabled when needed. To avoid this, " "pass target=lp.PyOpenCLTarget(dev) when creating " "the kernel.".format(knl_name=kernel.name)) if any(count_bug_per_dev): if all(count_bug_per_dev): work_around_arg_count_bug = True else: warn_about_arg_count_bug = True # }}} cl_arg_idx = 0 arg_idx_to_cl_arg_idx = {} fp_arg_count = 0 from genpy import If, Raise, Statement as S, Suite result = [] gen = result.append buf_indices_and_args = [] buf_pack_indices_and_args = [] from pyopencl.invoker import BUF_PACK_TYPECHARS def add_buf_arg(arg_idx, typechar, expr_str): if typechar in BUF_PACK_TYPECHARS: buf_pack_indices_and_args.append(arg_idx) buf_pack_indices_and_args.append(repr(typechar.encode())) buf_pack_indices_and_args.append(expr_str) else: buf_indices_and_args.append(arg_idx) buf_indices_and_args.append(f"pack('{typechar}', {expr_str})") for arg_idx, idi in enumerate(implemented_data_info): arg_idx_to_cl_arg_idx[arg_idx] = cl_arg_idx if not issubclass(idi.arg_class, lp.ValueArg): assert issubclass(idi.arg_class, ArrayBase) # assume each of those generates exactly one... cl_arg_idx += 1 continue if not options.skip_arg_checks: gen( If( "%s is None" % idi.name, Raise('RuntimeError("input argument \'{name}\' ' 'must be supplied")'.format(name=idi.name)))) if idi.dtype.is_composite(): buf_indices_and_args.append(cl_arg_idx) buf_indices_and_args.append(f"{idi.name}") cl_arg_idx += 1 elif idi.dtype.is_complex(): assert isinstance(idi.dtype, NumpyType) dtype = idi.dtype if warn_about_arg_count_bug: warn("{knl_name}: arguments include complex numbers, and " "some (but not all) of the target devices mishandle " "struct kernel arguments (hence the workaround is " "disabled".format(knl_name=kernel.name)) if dtype.numpy_dtype == np.complex64: arg_char = "f" elif dtype.numpy_dtype == np.complex128: arg_char = "d" else: raise TypeError("unexpected complex type: %s" % dtype) if (work_around_arg_count_bug and dtype.numpy_dtype == np.complex128 and fp_arg_count + 2 <= 8): add_buf_arg(cl_arg_idx, arg_char, f"{idi.name}.real") cl_arg_idx += 1 add_buf_arg(cl_arg_idx, arg_char, f"{idi.name}.imag") cl_arg_idx += 1 else: buf_indices_and_args.append(cl_arg_idx) buf_indices_and_args.append( f"_lpy_pack('{arg_char}{arg_char}', " f"{idi.name}.real, {idi.name}.imag)") cl_arg_idx += 1 fp_arg_count += 2 elif isinstance(idi.dtype, NumpyType): if idi.dtype.dtype.kind == "f": fp_arg_count += 1 add_buf_arg(cl_arg_idx, idi.dtype.dtype.char, idi.name) cl_arg_idx += 1 else: raise LoopyError("do not know how to pass argument of type '%s'" % idi.dtype) for arg_kind, args_and_indices, entry_length in [ ("_buf", buf_indices_and_args, 2), ("_buf_pack", buf_pack_indices_and_args, 3), ]: assert len(args_and_indices) % entry_length == 0 if args_and_indices: gen( S(f"_lpy_knl._set_arg{arg_kind}_multi(" f"({', '.join(str(i) for i in args_and_indices)},), " ")")) return Suite(result), arg_idx_to_cl_arg_idx, cl_arg_idx