def get_count_kernel(self, index_dtype): index_ctype = dtype_to_ctype(index_dtype) from pyopencl.tools import VectorArg, OtherArg kernel_list_args = [ VectorArg(index_dtype, "plb_%s_count" % name) for name, dtype in self.list_names_and_dtypes if name not in self.count_sharing] user_list_args = [] for name, dtype in self.list_names_and_dtypes: if name in self.count_sharing: continue name = "plb_loc_%s_count" % name user_list_args.append(OtherArg("%s *%s" % ( index_ctype, name), name)) kernel_name = self.name_prefix+"_count" from pyopencl.characterize import has_double_support src = _LIST_BUILDER_TEMPLATE.render( is_count_stage=True, kernel_name=kernel_name, double_support=all(has_double_support(dev) for dev in self.context.devices), debug=self.debug, do_not_vectorize=self.do_not_vectorize(), eliminate_empty_output_lists=self.eliminate_empty_output_lists, kernel_list_arg_decl=_get_arg_decl(kernel_list_args), kernel_list_arg_values=_get_arg_list(user_list_args, prefix="&"), user_list_arg_decl=_get_arg_decl(user_list_args), user_list_args=_get_arg_list(user_list_args), user_arg_decl_with_offset=_get_arg_decl(self.arg_decls), user_arg_decl_no_offset=_get_arg_decl(self.arg_decls_no_offset), user_args_no_offset=_get_arg_list(self.arg_decls_no_offset), arg_offset_adjustment=get_arg_offset_adjuster_code(self.arg_decls), list_names_and_dtypes=self.list_names_and_dtypes, count_sharing=self.count_sharing, name_prefix=self.name_prefix, generate_template=self.generate_template, preamble=self.preamble, index_type=index_ctype, ) src = str(src) prg = cl.Program(self.context, src).build(self.options) knl = getattr(prg, kernel_name) from pyopencl.tools import get_arg_list_scalar_arg_dtypes knl.set_scalar_arg_dtypes(get_arg_list_scalar_arg_dtypes( kernel_list_args+self.arg_decls) + [index_dtype]) return knl
def get_count_kernel(self, index_dtype): index_ctype = dtype_to_ctype(index_dtype) from pyopencl.tools import VectorArg, OtherArg kernel_list_args = [ VectorArg(index_dtype, "plb_%s_count" % name) for name, dtype in self.list_names_and_dtypes if name not in self.count_sharing] user_list_args = [] for name, dtype in self.list_names_and_dtypes: if name in self.count_sharing: continue name = "plb_loc_%s_count" % name user_list_args.append(OtherArg("%s *%s" % ( index_ctype, name), name)) kernel_name = self.name_prefix+"_count" from pyopencl.characterize import has_double_support src = _LIST_BUILDER_TEMPLATE.render( is_count_stage=True, kernel_name=kernel_name, double_support=all(has_double_support(dev) for dev in self.context.devices), debug=self.debug, do_not_vectorize=self.do_not_vectorize(), eliminate_empty_output_lists=self.eliminate_empty_output_lists, kernel_list_arg_decl=_get_arg_decl(kernel_list_args), kernel_list_arg_values=_get_arg_list(user_list_args, prefix="&"), user_list_arg_decl=_get_arg_decl(user_list_args), user_list_args=_get_arg_list(user_list_args), user_arg_decl_with_offset=_get_arg_decl(self.arg_decls), user_arg_decl_no_offset=_get_arg_decl(self.arg_decls_no_offset), user_args_no_offset=_get_arg_list(self.arg_decls_no_offset), arg_offset_adjustment=get_arg_offset_adjuster_code(self.arg_decls), list_names_and_dtypes=self.list_names_and_dtypes, count_sharing=self.count_sharing, name_prefix=self.name_prefix, generate_template=self.generate_template, preamble=self.preamble, index_type=index_ctype, ) src = str(src) prg = cl.Program(self.context, src).build(self.options) knl = getattr(prg, kernel_name) from pyopencl.tools import get_arg_list_scalar_arg_dtypes knl.set_scalar_arg_dtypes(get_arg_list_scalar_arg_dtypes( kernel_list_args+self.arg_decls) + [index_dtype]) return knl
def get_reduction_kernel(stage, ctx, dtype_out, neutral, reduce_expr, arguments=None, name="reduce_kernel", preamble="", map_exprs=None, device=None, options=[], max_group_size=None): if map_exprs is None: raise ValueError("map_exprs has to be given!") for i, m in enumerate(map_exprs): if m is None: if stage == 2: map_exprs[i] = "pyopencl_reduction_inp_%i[i]" % i else: map_exprs[i] = "in[i]" from pyopencl.tools import (parse_arg_list, get_arg_list_scalar_arg_dtypes, get_arg_offset_adjuster_code, VectorArg) arg_prep = "" if stage == 1 and arguments is not None: arguments = parse_arg_list(arguments, with_offset=True) arg_prep = get_arg_offset_adjuster_code(arguments) if stage == 2 and arguments is not None: arguments = parse_arg_list(arguments) arguments = ([ VectorArg(dtype_out, "pyopencl_reduction_inp_%i" % i) for i in range(len(map_exprs)) ] + arguments) inf = _get_reduction_source(ctx, dtype_to_ctype(dtype_out), dtype_out.itemsize, neutral, reduce_expr, map_exprs, arguments, name, preamble, arg_prep, device, max_group_size) inf.program = cl.Program(ctx, inf.source) inf.program.build(options) inf.kernel = getattr(inf.program, name) inf.arg_types = arguments inf.kernel.set_scalar_arg_dtypes( [ None, ] * len(map_exprs) + [np.int64] + get_arg_list_scalar_arg_dtypes(inf.arg_types) + [np.uint32] * 2) return inf
def get_elwise_kernel_and_types(context, arguments, operation, name="elwise_kernel", options=[], preamble="", use_range=False, **kwargs): from pyopencl.tools import parse_arg_list, get_arg_offset_adjuster_code parsed_args = parse_arg_list(arguments, with_offset=True) auto_preamble = kwargs.pop("auto_preamble", True) pragmas = [] includes = [] have_double_pragma = False have_complex_include = False if auto_preamble: for arg in parsed_args: if arg.dtype in [np.float64, np.complex128]: if not have_double_pragma: pragmas.append(""" #if __OPENCL_C_VERSION__ < 120 #pragma OPENCL EXTENSION cl_khr_fp64: enable #endif #define PYOPENCL_DEFINE_CDOUBLE """) have_double_pragma = True if arg.dtype.kind == 'c': if not have_complex_include: includes.append("#include <pyopencl-complex.h>\n") have_complex_include = True if pragmas or includes: preamble = "\n".join(pragmas+includes) + "\n" + preamble if use_range: parsed_args.extend([ ScalarArg(np.intp, "start"), ScalarArg(np.intp, "stop"), ScalarArg(np.intp, "step"), ]) else: parsed_args.append(ScalarArg(np.intp, "n")) loop_prep = kwargs.pop("loop_prep", "") loop_prep = get_arg_offset_adjuster_code(parsed_args) + loop_prep prg = get_elwise_program( context, parsed_args, operation, name=name, options=options, preamble=preamble, use_range=use_range, loop_prep=loop_prep, **kwargs) from pyopencl.tools import get_arg_list_scalar_arg_dtypes kernel = getattr(prg, name) kernel.set_scalar_arg_dtypes(get_arg_list_scalar_arg_dtypes(parsed_args)) return kernel, parsed_args
def get_reduction_kernel(stage, ctx, dtype_out, neutral, reduce_expr, map_expr=None, arguments=None, name="reduce_kernel", preamble="", device=None, options=None, max_group_size=None): if map_expr is None: if stage == 2: map_expr = "pyopencl_reduction_inp[i]" else: map_expr = "in[i]" from pyopencl.tools import (parse_arg_list, get_arg_list_scalar_arg_dtypes, get_arg_offset_adjuster_code, VectorArg) if arguments is None: raise ValueError("arguments must not be None") arguments = parse_arg_list(arguments, with_offset=True) arg_prep = get_arg_offset_adjuster_code(arguments) if stage == 2 and arguments is not None: arguments = ([VectorArg(dtype_out, "pyopencl_reduction_inp")] + arguments) source, group_size = _get_reduction_source(ctx, dtype_to_ctype(dtype_out), dtype_out.itemsize, neutral, reduce_expr, map_expr, arguments, name, preamble, arg_prep, device, max_group_size) program = cl.Program(ctx, source) program.build(options) kernel = getattr(program, name) kernel.set_scalar_arg_dtypes([None, np.int64] + get_arg_list_scalar_arg_dtypes(arguments) + [np.int64] * 3 + [np.uint32, np.int64]) return _ReductionInfo(context=ctx, source=source, group_size=group_size, program=program, kernel=kernel, arg_types=arguments)
def get_reduction_kernel(stage, ctx, dtype_out, neutral, reduce_expr, arguments=None, name="reduce_kernel", preamble="", map_exprs = None, device=None, options=[], max_group_size=None): if map_exprs is None: raise ValueError("map_exprs has to be given!") for i, m in enumerate(map_exprs): if m is None: if stage==2: map_exprs[i] = "pyopencl_reduction_inp_%i[i]"%i else: map_exprs[i] = "in[i]" from pyopencl.tools import ( parse_arg_list, get_arg_list_scalar_arg_dtypes, get_arg_offset_adjuster_code, VectorArg) arg_prep = "" if stage==1 and arguments is not None: arguments = parse_arg_list(arguments, with_offset=True) arg_prep = get_arg_offset_adjuster_code(arguments) if stage==2 and arguments is not None: arguments = parse_arg_list(arguments) arguments = ( [VectorArg(dtype_out, "pyopencl_reduction_inp_%i"%i) for i in xrange(len(map_exprs))] +arguments) inf = _get_reduction_source( ctx, dtype_to_ctype(dtype_out), dtype_out.itemsize, neutral, reduce_expr, map_exprs, arguments, name, preamble, arg_prep, device, max_group_size) inf.program = cl.Program(ctx, inf.source) inf.program.build(options) inf.kernel = getattr(inf.program, name) inf.arg_types = arguments inf.kernel.set_scalar_arg_dtypes( [None, ]*len(map_exprs)+[np.int64] +get_arg_list_scalar_arg_dtypes(inf.arg_types) +[np.uint32]*2) return inf
def get_reduction_kernel(stage, ctx, dtype_out, neutral, reduce_expr, map_expr=None, arguments=None, name="reduce_kernel", preamble="", device=None, options=[], max_group_size=None): if map_expr is None: if stage == 2: map_expr = "pyopencl_reduction_inp[i]" else: map_expr = "in[i]" from pyopencl.tools import (parse_arg_list, get_arg_list_scalar_arg_dtypes, get_arg_offset_adjuster_code, VectorArg) arg_prep = "" if stage == 1 and arguments is not None: arguments = parse_arg_list(arguments, with_offset=True) arg_prep = get_arg_offset_adjuster_code(arguments) if stage == 2 and arguments is not None: arguments = parse_arg_list(arguments) arguments = ([VectorArg(dtype_out, "pyopencl_reduction_inp")] + arguments) inf = _get_reduction_source(ctx, dtype_to_ctype(dtype_out), dtype_out.itemsize, neutral, reduce_expr, map_expr, arguments, name, preamble, arg_prep, device, max_group_size) inf.program = cl.Program(ctx, inf.source) inf.program.build(options) inf.kernel = getattr(inf.program, name) inf.arg_types = arguments inf.kernel.set_scalar_arg_dtypes( [None, np.int64] + get_arg_list_scalar_arg_dtypes(inf.arg_types) + [np.int64] * 3 + [np.uint32, np.int64]) return inf
def get_reduction_kernel(stage, ctx, dtype_out, neutral, reduce_expr, map_expr=None, arguments=None, name="reduce_kernel", preamble="", device=None, options=[], max_group_size=None): if map_expr is None: if stage == 2: map_expr = "pyopencl_reduction_inp[i]" else: map_expr = "in[i]" from pyopencl.tools import ( parse_arg_list, get_arg_list_scalar_arg_dtypes, get_arg_offset_adjuster_code, VectorArg) arg_prep = "" if stage == 1 and arguments is not None: arguments = parse_arg_list(arguments, with_offset=True) arg_prep = get_arg_offset_adjuster_code(arguments) if stage == 2 and arguments is not None: arguments = parse_arg_list(arguments) arguments = ( [VectorArg(dtype_out, "pyopencl_reduction_inp")] + arguments) inf = _get_reduction_source( ctx, dtype_to_ctype(dtype_out), dtype_out.itemsize, neutral, reduce_expr, map_expr, arguments, name, preamble, arg_prep, device, max_group_size) inf.program = cl.Program(ctx, inf.source) inf.program.build(options) inf.kernel = getattr(inf.program, name) inf.arg_types = arguments inf.kernel.set_scalar_arg_dtypes( [None, np.int64] + get_arg_list_scalar_arg_dtypes(inf.arg_types) + [np.int64]*3 + [np.uint32, np.int64] ) return inf
def get_write_kernel(self, index_dtype): index_ctype = dtype_to_ctype(index_dtype) from pyopencl.tools import VectorArg, OtherArg kernel_list_args = [] kernel_list_arg_values = "" user_list_args = [] for name, dtype in self.list_names_and_dtypes: list_name = "plb_%s_list" % name list_arg = VectorArg(dtype, list_name) kernel_list_args.append(list_arg) user_list_args.append(list_arg) if name in self.count_sharing: kernel_list_arg_values += "%s, " % list_name continue kernel_list_args.append( VectorArg(index_dtype, "plb_%s_start_index" % name)) if name in self.eliminate_empty_output_lists: kernel_list_args.append( VectorArg(index_dtype, "%s_compressed_indices" % name)) index_name = "plb_%s_index" % name user_list_args.append(OtherArg("{} *{}".format( index_ctype, index_name), index_name)) kernel_list_arg_values += f"{list_name}, &{index_name}, " kernel_name = self.name_prefix+"_write" from pyopencl.characterize import has_double_support src = _LIST_BUILDER_TEMPLATE.render( is_count_stage=False, kernel_name=kernel_name, double_support=all(has_double_support(dev) for dev in self.context.devices), debug=self.debug, do_not_vectorize=self.do_not_vectorize(), eliminate_empty_output_lists=self.eliminate_empty_output_lists, kernel_list_arg_decl=_get_arg_decl(kernel_list_args), kernel_list_arg_values=kernel_list_arg_values, user_list_arg_decl=_get_arg_decl(user_list_args), user_list_args=_get_arg_list(user_list_args), user_arg_decl_with_offset=_get_arg_decl(self.arg_decls), user_arg_decl_no_offset=_get_arg_decl(self.arg_decls_no_offset), user_args_no_offset=_get_arg_list(self.arg_decls_no_offset), arg_offset_adjustment=get_arg_offset_adjuster_code(self.arg_decls), list_names_and_dtypes=self.list_names_and_dtypes, count_sharing=self.count_sharing, name_prefix=self.name_prefix, generate_template=self.generate_template, preamble=self.preamble, index_type=index_ctype, ) src = str(src) prg = cl.Program(self.context, src).build(self.options) knl = getattr(prg, kernel_name) from pyopencl.tools import get_arg_list_scalar_arg_dtypes knl.set_scalar_arg_dtypes(get_arg_list_scalar_arg_dtypes( kernel_list_args+self.arg_decls) + [index_dtype]) return knl