def __init__(self, ctx, dev, src, func_name, t_type=np.float32, y_type=np.float32, extra_args=None, options=None, post_func=None): t_type = np.dtype(t_type) y_type = np.dtype(y_type) if not extra_args: extra_args_decl = '' extra_args_name = '' else: extra_args_decl = ', ' + ', '.join(arg.decl for arg in extra_args) extra_args_name = ', ' + ', '.join(arg.name for arg in extra_args) solver_kernel_src = _ode_solver_kernel_fmt.format( elwise_diff_func=func_name, t_type=dtype_to_ctype(t_type), y_type=dtype_to_ctype(y_type), extra_args_decl=extra_args_decl, extra_args_name=extra_args_name, post_func=post_func or '', has_post_func='1' if post_func else '0') whole_src = src + solver_kernel_src options = (options or []) + ['-I', cl_src_dir + '/cl'] self.__ctx = ctx self.__dev = dev self.__y_type = y_type self.__t_type = t_type self.__prog = cl.Program(ctx, whole_src) self.__prog.build(options=options, devices=[dev]) self.__has_post = bool(post_func)
def declarator(self): if self.with_offset: # Two underscores -> less likelihood of a name clash. return "__global %s *%s__base, long %s__offset" % (dtype_to_ctype(self.dtype), self.name, self.name) else: result = "__global %s *%s" % (dtype_to_ctype(self.dtype), self.name) return result
def declarator(self): if self.with_offset: # Two underscores -> less likelihood of a name clash. return "__global %s *%s__base, long %s__offset" % ( dtype_to_ctype(self.dtype), self.name, self.name) else: result = "__global %s *%s" % (dtype_to_ctype(self.dtype), self.name) return result
def add_dtype(self, dtype): dtype = np.dtype(dtype) if dtype in [np.float64 or np.complex128]: self.saw_double = True if dtype.kind == "c": self.saw_complex = True if dtype.kind != "V": return if dtype in self.declared_dtypes: return import pyopencl.cltypes if dtype in pyopencl.cltypes.vec_type_to_scalar_and_count: return if hasattr(dtype, "subdtype") and dtype.subdtype is not None: self.add_dtype(dtype.subdtype[0]) return for name, field_data in sorted(six.iteritems(dtype.fields)): field_dtype, offset = field_data[:2] self.add_dtype(field_dtype) _, cdecl = match_dtype_to_c_struct(self.device, dtype_to_ctype(dtype), dtype) self.declarations.append(cdecl) self.declared_dtypes.add(dtype)
def add_dtype(self, dtype): dtype = np.dtype(dtype) if dtype in [np.float64 or np.complex128]: self.saw_double = True if dtype.kind == "c": self.saw_complex = True if dtype.kind != "V": return if dtype in self.declared_dtypes: return from pyopencl.array import vec if dtype in vec.type_to_scalar_and_count: return for name, field_data in dtype.fields.iteritems(): field_dtype, offset = field_data[:2] self.add_dtype(field_dtype) _, cdecl = match_dtype_to_c_struct( self.device, dtype_to_ctype(dtype), dtype) self.declarations.append(cdecl) self.declared_dtypes.add(dtype)
def get_type(self, address_space_qualifier): """Returns the c99 declaration of the type.""" field_definitions = [] for field in self.blob_type.dtype.names: if field.endswith(Blob.PADDING_FIELD_SUFFIX): continue field_definitions.append('\t%s %s;' % (dtype_to_ctype(self.blob_type.dtype.fields[field][0]), field)) field_definitions = '\n'.join(field_definitions) definition = \ ''' /* plain type %(cname)s */ typedef struct __attribute__((__packed__)) %(cname)s { %(fields)s } %(cname)s; #define %(define)s ''' % { 'fields': field_definitions, 'cname': self.get_name(address_space_qualifier), 'define': self.get_name(address_space_qualifier).upper() } return definition.strip()
def add_dtype(self, dtype): dtype = np.dtype(dtype) if dtype in [np.float64 or np.complex128]: self.saw_double = True if dtype.kind == "c": self.saw_complex = True if dtype.kind != "V": return if dtype in self.declared_dtypes: return from pyopencl.array import vec if dtype in vec.type_to_scalar_and_count: return for name, field_data in six.iteritems(dtype.fields): field_dtype, offset = field_data[:2] self.add_dtype(field_dtype) _, cdecl = match_dtype_to_c_struct(self.device, dtype_to_ctype(dtype), dtype) self.declarations.append(cdecl) self.declared_dtypes.add(dtype)
def __call__(self, txt): if txt is None: return txt result = self.template.get_text_template(txt).render(self.var_dict) # substitute in types for name, dtype in self.type_dict.iteritems(): result = re.sub(r"\b%s\b" % name, dtype_to_ctype(dtype), result) return str(result)
def get_arg_offset_adjuster_code(arg_types): result = [] for arg_type in arg_types: if isinstance(arg_type, VectorArg) and arg_type.with_offset: result.append( "__global %(type)s *%(name)s = " "(__global %(type)s *) " "((__global char *) %(name)s__base + %(name)s__offset);" % dict(type=dtype_to_ctype(arg_type.dtype), name=arg_type.name)) return "\n".join(result)
def get_sizeof(self, address_space_qualifier): """Creates a c99 sizeof method.""" definition = 'unsigned long %(function_name)s(%(address_space_qualifier)s char* blob)' % { 'function_name': self.get_sizeof_name(address_space_qualifier), 'address_space_qualifier': address_space_qualifier, } arguments = ['blob', '&self'] # the first argument must be the data itself. variables = ['%s %s;' % (self.get_name(address_space_qualifier, ), 'self')] # all required variable names lines = [] # all required source code lines # iterate over all components/subtypes for field, subtype in self.blob_type.subtypes: if field.endswith(Blob.PADDING_FIELD_SUFFIX): continue if numpy.issctype(subtype): # determine the size of the scalar type cname = dtype_to_ctype(subtype) sizeof_call = 'sizeof(%s)' % cname else: # determine the size of the complex type assert issubclass(subtype, Blob), 'unexpected type %s %s' % (type(subtype), subtype) sizeof_call = '%s((%s char*)(blob + size))' % ( BlobLib.get_interface(subtype).get_sizeof_name(address_space_qualifier), address_space_qualifier, ) # save which arguments and lines are required to determine the total size lines.append('size += %s;' % sizeof_call) lines.insert(0, '%s(%s);' % (self.get_deserialize_name(address_space_qualifier), ', '.join(arguments))) # prepend the variable declarations to the source code variables.extend(lines) lines = variables # fill the function template declaration = \ ''' %(definition)s { unsigned long size = 0; %(lines)s return size; } ''' % { 'definition': definition.strip(), 'cname': self.get_name(address_space_qualifier), 'lines': '\n'.join(['\t' + line for line in lines]) } return definition.strip() + ';', declaration.strip()
def get_arg_offset_adjuster_code(arg_types): result = [] for arg_type in arg_types: if isinstance(arg_type, VectorArg) and arg_type.with_offset: result.append("__global %(type)s *%(name)s = " "(__global %(type)s *) " "((__global char *) %(name)s__base + %(name)s__offset);" % dict( type=dtype_to_ctype(arg_type.dtype), name=arg_type.name)) return "\n".join(result)
def dtype_to_c_struct(device, dtype): matched_dtype, c_decl = match_dtype_to_c_struct(device, dtype_to_ctype(dtype), dtype) def dtypes_match(): result = len(dtype.fields) == len(matched_dtype.fields) for name, val in dtype.fields.iteritems(): result = result and matched_dtype.fields[name] == val return result assert dtypes_match() return c_decl
def dtype_to_c_struct(device, dtype): matched_dtype, c_decl = match_dtype_to_c_struct( device, dtype_to_ctype(dtype), dtype) def dtypes_match(): result = len(dtype.fields) == len(matched_dtype.fields) for name, val in dtype.fields.iteritems(): result = result and matched_dtype.fields[name] == val return result assert dtypes_match() return c_decl
def get_type_decl_preamble(self, device, decl_type_names, arguments=None): cdl = _CDeclList(device) for typename in decl_type_names: cdl.add_dtype(self.parse_type(typename)) if arguments is not None: cdl.visit_arguments(arguments) for tv in self.type_aliases.itervalues(): cdl.add_dtype(tv) type_alias_decls = [ "typedef %s %s;" % (dtype_to_ctype(val), name) for name, val in self.type_aliases.iteritems() ] return cdl.get_declarations() + "\n" + "\n".join(type_alias_decls)
def get_type_decl_preamble(self, device, decl_type_names, arguments=None): cdl = _CDeclList(device) for typename in decl_type_names: cdl.add_dtype(self.parse_type(typename)) if arguments is not None: cdl.visit_arguments(arguments) for tv in six.itervalues(self.type_aliases): cdl.add_dtype(tv) type_alias_decls = [ "typedef %s %s;" % (dtype_to_ctype(val), name) for name, val in six.iteritems(self.type_aliases) ] return cdl.get_declarations() + "\n" + "\n".join(type_alias_decls)
def get_type(self, address_space_qualifier): """Returns the c99 deserializer function declaration, which separates the components of a flat type.""" fields = [] # iterate over all subtypes/components for field, subtype in self.blob_type.subtypes: if field.endswith(Blob.PADDING_FIELD_SUFFIX): continue # used variable names # add sizeof call of component if numpy.issctype(subtype): fields.append('%s %s* %s;' % (address_space_qualifier, dtype_to_ctype(subtype), field)) else: assert issubclass(subtype, Blob), 'unexpected type %s %s' % (type(subtype), subtype) if subtype.is_plain(): fields.append('%s* %s;' % ( BlobLib.get_interface(subtype).get_spaced_name(address_space_qualifier), field )) else: fields.append('%s %s;' % ( BlobLib.get_interface(subtype).get_name(address_space_qualifier), field )) definition = \ ''' /* complex type %(name)s */ typedef struct _%(name)s { %(fields)s } %(name)s; ''' % { 'name': self.get_name(address_space_qualifier, ), 'fields': '\n\t'.join(fields) } return definition
def add_dtype(self, dtype): dtype = np.dtype(dtype) if dtype in [np.float64 or np.complex128]: self.saw_double = True if dtype.kind == "c": self.saw_complex = True if dtype.kind != "V": return if dtype in self.declared_dtypes: return for name, (field_dtype, offset) in dtype.fields.iteritems(): self.add_dtype(field_dtype) _, cdecl = match_dtype_to_c_struct(self.device, dtype_to_ctype(dtype), dtype) self.declarations.append(cdecl) self.declared_dtypes.add(dtype)
def dtype_to_c_struct(device, dtype): if dtype.fields is None: return "" import pyopencl.cltypes if dtype in pyopencl.cltypes.vec_type_to_scalar_and_count: # Vector types are built-in. Don't try to redeclare those. return "" matched_dtype, c_decl = match_dtype_to_c_struct( device, dtype_to_ctype(dtype), dtype) def dtypes_match(): result = len(dtype.fields) == len(matched_dtype.fields) for name, val in six.iteritems(dtype.fields): result = result and matched_dtype.fields[name] == val return result assert dtypes_match() return c_decl
def dtype_to_c_struct(device, dtype): if dtype.fields is None: return "" from pyopencl.array import vec if dtype in vec.type_to_scalar_and_count: # Vector types are built-in. Don't try to redeclare those. return "" matched_dtype, c_decl = match_dtype_to_c_struct( device, dtype_to_ctype(dtype), dtype) def dtypes_match(): result = len(dtype.fields) == len(matched_dtype.fields) for name, val in six.iteritems(dtype.fields): result = result and matched_dtype.fields[name] == val return result assert dtypes_match() return c_decl
def get_type(self, address_space_qualifier): """Returns the c99 struct declaration.""" field_definitions = [] for field, subdtype in self.blob_type.dtype_static_components: field_definitions.append('\t%s %s* %s;' % (address_space_qualifier, dtype_to_ctype(subdtype), field)) child_name = BlobLib.get_interface(self.blob_type.child_type).get_spaced_name(address_space_qualifier) return \ ''' /* array type %(name)s */ typedef struct __attribute__((__packed__)) _%(name)s { %(static_fields)s %(address_space_qualifier)s char* %(first_item_field)s; } %(name)s;''' % { 'static_fields': '\n'.join(field_definitions), 'first_item_field': self.FIRST_ITEM_FIELD, 'child_name': child_name, 'address_space_qualifier': address_space_qualifier, 'name': self.get_name(address_space_qualifier) }
def declarator(self): return "{} {}".format(dtype_to_ctype(self.dtype), self.name)
def match_dtype_to_c_struct(device, name, dtype, context=None): """Return a tuple `(dtype, c_decl)` such that the C struct declaration in `c_decl` and the structure :class:`numpy.dtype` instance `dtype` have the same memory layout. Note that *dtype* may be modified from the value that was passed in, for example to insert padding. (As a remark on implementation, this routine runs a small kernel on the given *device* to ensure that :mod:`numpy` and C offsets and sizes match.) .. versionadded: 2013.1 This example explains the use of this function:: >>> import numpy as np >>> import pyopencl as cl >>> import pyopencl.tools >>> ctx = cl.create_some_context() >>> dtype = np.dtype([("id", np.uint32), ("value", np.float32)]) >>> dtype, c_decl = pyopencl.tools.match_dtype_to_c_struct( ... ctx.devices[0], 'id_val', dtype) >>> print c_decl typedef struct { unsigned id; float value; } id_val; >>> print dtype [('id', '<u4'), ('value', '<f4')] >>> cl.tools.get_or_register_dtype('id_val', dtype) As this example shows, it is important to call :func:`get_or_register_dtype` on the modified `dtype` returned by this function, not the original one. """ fields = sorted(six.iteritems(dtype.fields), key=lambda name_dtype_offset: name_dtype_offset[1][1]) c_fields = [] for field_name, dtype_and_offset in fields: field_dtype, offset = dtype_and_offset[:2] c_fields.append(" %s %s;" % (dtype_to_ctype(field_dtype), field_name)) c_decl = "typedef struct {\n%s\n} %s;\n\n" % ("\n".join(c_fields), name) cdl = _CDeclList(device) for field_name, dtype_and_offset in fields: field_dtype, offset = dtype_and_offset[:2] cdl.add_dtype(field_dtype) pre_decls = cdl.get_declarations() offset_code = "\n".join("result[%d] = pycl_offsetof(%s, %s);" % (i + 1, name, field_name) for i, (field_name, _) in enumerate(fields)) src = r""" #define pycl_offsetof(st, m) \ ((size_t) ((__local char *) &(dummy.m) \ - (__local char *)&dummy )) %(pre_decls)s %(my_decl)s __kernel void get_size_and_offsets(__global size_t *result) { result[0] = sizeof(%(my_type)s); __local %(my_type)s dummy; %(offset_code)s } """ % dict(pre_decls=pre_decls, my_decl=c_decl, my_type=name, offset_code=offset_code) if context is None: context = cl.Context([device]) queue = cl.CommandQueue(context) prg = cl.Program(context, src) knl = prg.build(devices=[device]).get_size_and_offsets import pyopencl.array # noqa result_buf = cl.array.empty(queue, 1 + len(fields), np.uintp) knl(queue, (1, ), (1, ), result_buf.data) queue.finish() size_and_offsets = result_buf.get() size = int(size_and_offsets[0]) from pytools import any offsets = size_and_offsets[1:] if any(ofs >= size for ofs in offsets): # offsets not plausible if dtype.itemsize == size: # If sizes match, use numpy's idea of the offsets. offsets = [ dtype_and_offset[1] for field_name, dtype_and_offset in fields ] else: raise RuntimeError( "OpenCL compiler reported offsetof() past sizeof() " "for struct layout on '%s'. " "This makes no sense, and it's usually indicates a " "compiler bug. " "Refusing to discover struct layout." % device) result_buf.data.release() del knl del prg del queue del context try: dtype_arg_dict = { 'names': [field_name for field_name, (field_dtype, offset) in fields], 'formats': [field_dtype for field_name, (field_dtype, offset) in fields], 'offsets': [int(x) for x in offsets], 'itemsize': int(size_and_offsets[0]), } dtype = np.dtype(dtype_arg_dict) if dtype.itemsize != size_and_offsets[0]: # "Old" versions of numpy (1.6.x?) silently ignore "itemsize". Boo. dtype_arg_dict["names"].append("_pycl_size_fixer") dtype_arg_dict["formats"].append(np.uint8) dtype_arg_dict["offsets"].append(int(size_and_offsets[0]) - 1) dtype = np.dtype(dtype_arg_dict) except NotImplementedError: def calc_field_type(): total_size = 0 padding_count = 0 for offset, (field_name, (field_dtype, _)) in zip(offsets, fields): if offset > total_size: padding_count += 1 yield ('__pycl_padding%d' % padding_count, 'V%d' % offset - total_size) yield field_name, field_dtype total_size = field_dtype.itemsize + offset dtype = np.dtype(list(calc_field_type())) assert dtype.itemsize == size_and_offsets[0] return dtype, c_decl
def get_deserialize(self, address_space_qualifier): """Returns the c99 deserializer function declaration, which separates the components of a flat type.""" arguments = ['%s char* blob' % address_space_qualifier] declarations = [] lines = [] previous_field_offset, previous_field_space = 0, 0 last_field = self.blob_type.subtypes[-1][0] # iterate over all subtypes/components for field, subtype in self.blob_type.subtypes: if field.endswith(Blob.PADDING_FIELD_SUFFIX): continue is_last_field = field == last_field # format lines.append('') lines.append('/* cast of %s */' % field) # used variable names field_variable = 'self->%s' % field field_offset = '%s_offset' % field field_reference = 'blob + %s' % field_offset if not is_last_field: field_space = '%s_space' % field declarations.append('unsigned long %s;' % field_offset) if not is_last_field: declarations.append('unsigned long %s;' % field_space) # add sizeof call of component if numpy.issctype(subtype): cname = "%s %s" % (address_space_qualifier, dtype_to_ctype(subtype)) sizeof_call = 'sizeof(%s)' % cname else: assert issubclass(subtype, Blob), 'unexpected type %s %s' % (type(subtype), subtype) cname = "%s %s" % ( address_space_qualifier, BlobLib.get_interface(subtype).get_name(address_space_qualifier) ) sizeof_call = '%s((%s char*)%s)' % ( BlobLib.get_interface(subtype).get_sizeof_name(address_space_qualifier), address_space_qualifier, field_reference ) # determine offset of component lines.append('%s = %s + %s;' % (field_offset, previous_field_offset, previous_field_space)) # set and cast component reference if not numpy.issctype(subtype) and not subtype.is_plain(): lines.append('%s(%s, &%s);' % ( BlobLib.get_interface(subtype).get_deserialize_name(address_space_qualifier), field_reference, field_variable )) else: lines.append('%s = (%s*)(%s);' % (field_variable, cname, field_reference)) if not is_last_field: # determine size of component lines.append('%s = %s;' % (field_space, sizeof_call)) previous_field_space = field_space previous_field_offset = field_offset lines = ['\t' + line for line in lines] arguments.append('%s* %s' % (self.get_name(address_space_qualifier, ), 'self')) definition = 'void %s(%s)' % (self.get_deserialize_name(address_space_qualifier), ', '.join(arguments)) # fill function template lines.insert(0, definition) lines.insert(1, '{') for index, line in enumerate(declarations): lines.insert(2 + index, '\t' + line) lines.append('}') declaration = '\n'.join(lines) return definition.strip() + ';', declaration
def declarator(self): return "%s %s" % (dtype_to_ctype(self.dtype), self.name)
def match_dtype_to_c_struct(device, name, dtype, context=None): """Return a tuple `(dtype, c_decl)` such that the C struct declaration in `c_decl` and the structure :class:`numpy.dtype` instance `dtype` have the same memory layout. Note that *dtype* may be modified from the value that was passed in, for example to insert padding. (As a remark on implementation, this routine runs a small kernel on the given *device* to ensure that :mod:`numpy` and C offsets and sizes match.) .. versionadded: 2013.1 This example explains the use of this function:: >>> import numpy as np >>> import pyopencl as cl >>> import pyopencl.tools >>> ctx = cl.create_some_context() >>> dtype = np.dtype([("id", np.uint32), ("value", np.float32)]) >>> dtype, c_decl = pyopencl.tools.match_dtype_to_c_struct( ... ctx.devices[0], 'id_val', dtype) >>> print c_decl typedef struct { unsigned id; float value; } id_val; >>> print dtype [('id', '<u4'), ('value', '<f4')] >>> cl.tools.get_or_register_dtype('id_val', dtype) As this example shows, it is important to call :func:`get_or_register_dtype` on the modified `dtype` returned by this function, not the original one. """ fields = sorted(dtype.fields.iteritems(), key=lambda (name, (dtype, offset)): offset) c_fields = [] for field_name, (field_dtype, offset) in fields: c_fields.append(" %s %s;" % (dtype_to_ctype(field_dtype), field_name)) c_decl = "typedef struct {\n%s\n} %s;\n\n" % ( "\n".join(c_fields), name) cdl = _CDeclList(device) for field_name, (field_dtype, offset) in fields: cdl.add_dtype(field_dtype) pre_decls = cdl.get_declarations() offset_code = "\n".join( "result[%d] = pycl_offsetof(%s, %s);" % (i+1, name, field_name) for i, (field_name, (field_dtype, offset)) in enumerate(fields)) src = r""" #define pycl_offsetof(st, m) \ ((size_t) ((__local char *) &(dummy.m) \ - (__local char *)&dummy )) %(pre_decls)s %(my_decl)s __kernel void get_size_and_offsets(__global size_t *result) { result[0] = sizeof(%(my_type)s); __local %(my_type)s dummy; %(offset_code)s } """ % dict( pre_decls=pre_decls, my_decl=c_decl, my_type=name, offset_code=offset_code) if context is None: context = cl.Context([device]) queue = cl.CommandQueue(context) prg = cl.Program(context, src) knl = prg.build(devices=[device]).get_size_and_offsets import pyopencl.array # noqa result_buf = cl.array.empty(queue, 1+len(fields), np.uintp) knl(queue, (1,), (1,), result_buf.data) queue.finish() size_and_offsets = result_buf.get() size = int(size_and_offsets[0]) from pytools import any offsets = size_and_offsets[1:] if any(ofs >= size for ofs in offsets): # offsets not plausible if dtype.itemsize == size: # If sizes match, use numpy's idea of the offsets. offsets = [offset for field_name, (field_dtype, offset) in fields] else: raise RuntimeError( "cannot discover struct layout on '%s'" % device) result_buf.data.release() del knl del prg del queue del context dtype_arg_dict = dict( names=[field_name for field_name, (field_dtype, offset) in fields], formats=[field_dtype for field_name, (field_dtype, offset) in fields], offsets=[int(x) for x in offsets], itemsize=int(size_and_offsets[0]), ) dtype = np.dtype(dtype_arg_dict) if dtype.itemsize != size_and_offsets[0]: # "Old" versions of numpy (1.6.x?) silently ignore "itemsize". Boo. dtype_arg_dict["names"].append("_pycl_size_fixer") dtype_arg_dict["formats"].append(np.uint8) dtype_arg_dict["offsets"].append(int(size_and_offsets[0])-1) dtype = np.dtype(dtype_arg_dict) assert dtype.itemsize == size_and_offsets[0] return dtype, c_decl
def dtype_to_ctype(self, dtype): from pyopencl.compyte.dtypes import dtype_to_ctype return dtype_to_ctype(dtype)
def declarator(self): return "__global %s *%s" % (dtype_to_ctype(self.dtype), self.name)