def get_count_kernel(self, index_dtype): index_ctype = dtype_to_ctype(index_dtype) from pyopencl.tools import VectorArg, OtherArg kernel_list_args = [ VectorArg(index_dtype, "plb_%s_count" % name) for name, dtype in self.list_names_and_dtypes if name not in self.count_sharing] user_list_args = [] for name, dtype in self.list_names_and_dtypes: if name in self.count_sharing: continue name = "plb_loc_%s_count" % name user_list_args.append(OtherArg("%s *%s" % ( index_ctype, name), name)) kernel_name = self.name_prefix+"_count" from pyopencl.characterize import has_double_support src = _LIST_BUILDER_TEMPLATE.render( is_count_stage=True, kernel_name=kernel_name, double_support=all(has_double_support(dev) for dev in self.context.devices), debug=self.debug, do_not_vectorize=self.do_not_vectorize(), eliminate_empty_output_lists=self.eliminate_empty_output_lists, kernel_list_arg_decl=_get_arg_decl(kernel_list_args), kernel_list_arg_values=_get_arg_list(user_list_args, prefix="&"), user_list_arg_decl=_get_arg_decl(user_list_args), user_list_args=_get_arg_list(user_list_args), user_arg_decl_with_offset=_get_arg_decl(self.arg_decls), user_arg_decl_no_offset=_get_arg_decl(self.arg_decls_no_offset), user_args_no_offset=_get_arg_list(self.arg_decls_no_offset), arg_offset_adjustment=get_arg_offset_adjuster_code(self.arg_decls), list_names_and_dtypes=self.list_names_and_dtypes, count_sharing=self.count_sharing, name_prefix=self.name_prefix, generate_template=self.generate_template, preamble=self.preamble, index_type=index_ctype, ) src = str(src) prg = cl.Program(self.context, src).build(self.options) knl = getattr(prg, kernel_name) from pyopencl.tools import get_arg_list_scalar_arg_dtypes knl.set_scalar_arg_dtypes(get_arg_list_scalar_arg_dtypes( kernel_list_args+self.arg_decls) + [index_dtype]) return knl
def get_write_kernel(self, index_dtype): index_ctype = dtype_to_ctype(index_dtype) from pyopencl.tools import VectorArg, OtherArg kernel_list_args = [] kernel_list_arg_values = "" user_list_args = [] for name, dtype in self.list_names_and_dtypes: list_name = "plb_%s_list" % name list_arg = VectorArg(dtype, list_name) kernel_list_args.append(list_arg) user_list_args.append(list_arg) if name in self.count_sharing: kernel_list_arg_values += "%s, " % list_name continue kernel_list_args.append( VectorArg(index_dtype, "plb_%s_start_index" % name)) index_name = "plb_%s_index" % name user_list_args.append( OtherArg("%s *%s" % (index_ctype, index_name), index_name)) kernel_list_arg_values += "%s, &%s, " % (list_name, index_name) kernel_name = self.name_prefix + "_write" from pyopencl.characterize import has_double_support src = _LIST_BUILDER_TEMPLATE.render( is_count_stage=False, kernel_name=kernel_name, double_support=all( has_double_support(dev) for dev in self.context.devices), debug=self.debug, do_not_vectorize=self.do_not_vectorize(), kernel_list_arg_decl=_get_arg_decl(kernel_list_args), kernel_list_arg_values=kernel_list_arg_values, user_list_arg_decl=_get_arg_decl(user_list_args), user_list_args=_get_arg_list(user_list_args), user_arg_decl=_get_arg_decl(self.arg_decls), user_args=_get_arg_list(self.arg_decls), list_names_and_dtypes=self.list_names_and_dtypes, count_sharing=self.count_sharing, name_prefix=self.name_prefix, generate_template=self.generate_template, preamble=self.preamble, index_type=index_ctype, ) src = str(src) prg = cl.Program(self.context, src).build(self.options) knl = getattr(prg, kernel_name) from pyopencl.tools import get_arg_list_scalar_arg_dtypes knl.set_scalar_arg_dtypes( get_arg_list_scalar_arg_dtypes(kernel_list_args + self.arg_decls) + [index_dtype]) return knl