Esempio n. 1
0
    def __init__(self, ctx, dev, src, func_name, t_type=np.float32,
                 y_type=np.float32, extra_args=None, options=None,
                 post_func=None):
        t_type = np.dtype(t_type)
        y_type = np.dtype(y_type)
        if not extra_args:
            extra_args_decl = ''
            extra_args_name = ''
        else:
            extra_args_decl = ', ' + ', '.join(arg.decl for arg in extra_args)
            extra_args_name = ', ' + ', '.join(arg.name for arg in extra_args)
        solver_kernel_src = _ode_solver_kernel_fmt.format(
            elwise_diff_func=func_name,
            t_type=dtype_to_ctype(t_type),
            y_type=dtype_to_ctype(y_type),
            extra_args_decl=extra_args_decl,
            extra_args_name=extra_args_name,
            post_func=post_func or '',
            has_post_func='1' if post_func else '0')
        whole_src = src + solver_kernel_src

        options = (options or []) + ['-I', cl_src_dir + '/cl']
        self.__ctx = ctx
        self.__dev = dev
        self.__y_type = y_type
        self.__t_type = t_type
        self.__prog = cl.Program(ctx, whole_src)
        self.__prog.build(options=options, devices=[dev])
        self.__has_post = bool(post_func)
Esempio n. 2
0
    def declarator(self):
        if self.with_offset:
            # Two underscores -> less likelihood of a name clash.
            return "__global %s *%s__base, long %s__offset" % (dtype_to_ctype(self.dtype), self.name, self.name)
        else:
            result = "__global %s *%s" % (dtype_to_ctype(self.dtype), self.name)

        return result
Esempio n. 3
0
    def declarator(self):
        if self.with_offset:
            # Two underscores -> less likelihood of a name clash.
            return "__global %s *%s__base, long %s__offset" % (
                    dtype_to_ctype(self.dtype), self.name, self.name)
        else:
            result = "__global %s *%s" % (dtype_to_ctype(self.dtype), self.name)

        return result
Esempio n. 4
0
    def add_dtype(self, dtype):
        dtype = np.dtype(dtype)

        if dtype in [np.float64 or np.complex128]:
            self.saw_double = True

        if dtype.kind == "c":
            self.saw_complex = True

        if dtype.kind != "V":
            return

        if dtype in self.declared_dtypes:
            return

        import pyopencl.cltypes
        if dtype in pyopencl.cltypes.vec_type_to_scalar_and_count:
            return

        if hasattr(dtype, "subdtype") and dtype.subdtype is not None:
            self.add_dtype(dtype.subdtype[0])
            return

        for name, field_data in sorted(six.iteritems(dtype.fields)):
            field_dtype, offset = field_data[:2]
            self.add_dtype(field_dtype)

        _, cdecl = match_dtype_to_c_struct(self.device, dtype_to_ctype(dtype),
                                           dtype)

        self.declarations.append(cdecl)
        self.declared_dtypes.add(dtype)
Esempio n. 5
0
    def add_dtype(self, dtype):
        dtype = np.dtype(dtype)

        if dtype in [np.float64 or np.complex128]:
            self.saw_double = True

        if dtype.kind == "c":
            self.saw_complex = True

        if dtype.kind != "V":
            return

        if dtype in self.declared_dtypes:
            return

        from pyopencl.array import vec
        if dtype in vec.type_to_scalar_and_count:
            return

        for name, field_data in dtype.fields.iteritems():
            field_dtype, offset = field_data[:2]
            self.add_dtype(field_dtype)

        _, cdecl = match_dtype_to_c_struct(
                self.device, dtype_to_ctype(dtype), dtype)

        self.declarations.append(cdecl)
        self.declared_dtypes.add(dtype)
Esempio n. 6
0
    def get_type(self, address_space_qualifier):
        """Returns the c99 declaration of the type."""

        field_definitions = []
        for field in self.blob_type.dtype.names:
            if field.endswith(Blob.PADDING_FIELD_SUFFIX):
                continue
            field_definitions.append('\t%s %s;' % (dtype_to_ctype(self.blob_type.dtype.fields[field][0]), field))
        field_definitions = '\n'.join(field_definitions)

        definition = \
'''
/* plain type %(cname)s */

typedef struct __attribute__((__packed__)) %(cname)s
{
%(fields)s
} %(cname)s;

#define %(define)s
''' % {
    'fields': field_definitions,
    'cname': self.get_name(address_space_qualifier),
    'define': self.get_name(address_space_qualifier).upper()
}
        return definition.strip()
Esempio n. 7
0
    def add_dtype(self, dtype):
        dtype = np.dtype(dtype)

        if dtype in [np.float64 or np.complex128]:
            self.saw_double = True

        if dtype.kind == "c":
            self.saw_complex = True

        if dtype.kind != "V":
            return

        if dtype in self.declared_dtypes:
            return

        from pyopencl.array import vec
        if dtype in vec.type_to_scalar_and_count:
            return

        for name, field_data in six.iteritems(dtype.fields):
            field_dtype, offset = field_data[:2]
            self.add_dtype(field_dtype)

        _, cdecl = match_dtype_to_c_struct(self.device, dtype_to_ctype(dtype),
                                           dtype)

        self.declarations.append(cdecl)
        self.declared_dtypes.add(dtype)
Esempio n. 8
0
    def __call__(self, txt):
        if txt is None:
            return txt

        result = self.template.get_text_template(txt).render(self.var_dict)

        # substitute in types
        for name, dtype in self.type_dict.iteritems():
            result = re.sub(r"\b%s\b" % name, dtype_to_ctype(dtype), result)

        return str(result)
Esempio n. 9
0
    def __call__(self, txt):
        if txt is None:
            return txt

        result = self.template.get_text_template(txt).render(self.var_dict)

        # substitute in types
        for name, dtype in self.type_dict.iteritems():
            result = re.sub(r"\b%s\b" % name, dtype_to_ctype(dtype), result)

        return str(result)
Esempio n. 10
0
def get_arg_offset_adjuster_code(arg_types):
    result = []

    for arg_type in arg_types:
        if isinstance(arg_type, VectorArg) and arg_type.with_offset:
            result.append(
                "__global %(type)s *%(name)s = "
                "(__global %(type)s *) "
                "((__global char *) %(name)s__base + %(name)s__offset);" %
                dict(type=dtype_to_ctype(arg_type.dtype), name=arg_type.name))

    return "\n".join(result)
Esempio n. 11
0
    def get_sizeof(self, address_space_qualifier):
        """Creates a c99 sizeof method."""

        definition = 'unsigned long %(function_name)s(%(address_space_qualifier)s char* blob)' % {
            'function_name': self.get_sizeof_name(address_space_qualifier),
            'address_space_qualifier': address_space_qualifier,
        }

        arguments = ['blob', '&self']  # the first argument must be the data itself.
        variables = ['%s %s;' % (self.get_name(address_space_qualifier, ), 'self')]  # all required variable names
        lines = []  # all required source code lines

        # iterate over all components/subtypes
        for field, subtype in self.blob_type.subtypes:
            if field.endswith(Blob.PADDING_FIELD_SUFFIX):
                continue

            if numpy.issctype(subtype):
                # determine the size of the scalar type
                cname = dtype_to_ctype(subtype)
                sizeof_call = 'sizeof(%s)' % cname
            else:
                # determine the size of the complex type
                assert issubclass(subtype, Blob), 'unexpected type %s %s' % (type(subtype), subtype)
                sizeof_call = '%s((%s char*)(blob + size))' % (
                    BlobLib.get_interface(subtype).get_sizeof_name(address_space_qualifier),
                    address_space_qualifier,
                )

            # save which arguments and lines are required to determine the total size
            lines.append('size += %s;' % sizeof_call)

        lines.insert(0, '%s(%s);' % (self.get_deserialize_name(address_space_qualifier), ', '.join(arguments)))

        # prepend the variable declarations to the source code
        variables.extend(lines)
        lines = variables

        # fill the function template
        declaration = \
'''
%(definition)s
{
    unsigned long size = 0;
%(lines)s
    return size;
}
''' % {
    'definition': definition.strip(),
    'cname': self.get_name(address_space_qualifier),
    'lines': '\n'.join(['\t' + line for line in lines])
}
        return definition.strip() + ';', declaration.strip()
Esempio n. 12
0
def get_arg_offset_adjuster_code(arg_types):
    result = []

    for arg_type in arg_types:
        if isinstance(arg_type, VectorArg) and arg_type.with_offset:
            result.append("__global %(type)s *%(name)s = "
                    "(__global %(type)s *) "
                    "((__global char *) %(name)s__base + %(name)s__offset);"
                    % dict(
                        type=dtype_to_ctype(arg_type.dtype),
                        name=arg_type.name))

    return "\n".join(result)
Esempio n. 13
0
def dtype_to_c_struct(device, dtype):
    matched_dtype, c_decl = match_dtype_to_c_struct(device, dtype_to_ctype(dtype), dtype)

    def dtypes_match():
        result = len(dtype.fields) == len(matched_dtype.fields)

        for name, val in dtype.fields.iteritems():
            result = result and matched_dtype.fields[name] == val

        return result

    assert dtypes_match()

    return c_decl
Esempio n. 14
0
def dtype_to_c_struct(device, dtype):
    matched_dtype, c_decl = match_dtype_to_c_struct(
            device, dtype_to_ctype(dtype), dtype)

    def dtypes_match():
        result = len(dtype.fields) == len(matched_dtype.fields)

        for name, val in dtype.fields.iteritems():
            result = result and matched_dtype.fields[name] == val

        return result

    assert dtypes_match()

    return c_decl
Esempio n. 15
0
    def get_type_decl_preamble(self, device, decl_type_names, arguments=None):
        cdl = _CDeclList(device)

        for typename in decl_type_names:
            cdl.add_dtype(self.parse_type(typename))

        if arguments is not None:
            cdl.visit_arguments(arguments)

        for tv in self.type_aliases.itervalues():
            cdl.add_dtype(tv)

        type_alias_decls = [
            "typedef %s %s;" % (dtype_to_ctype(val), name) for name, val in self.type_aliases.iteritems()
        ]

        return cdl.get_declarations() + "\n" + "\n".join(type_alias_decls)
Esempio n. 16
0
    def get_type_decl_preamble(self, device, decl_type_names, arguments=None):
        cdl = _CDeclList(device)

        for typename in decl_type_names:
            cdl.add_dtype(self.parse_type(typename))

        if arguments is not None:
            cdl.visit_arguments(arguments)

        for tv in six.itervalues(self.type_aliases):
            cdl.add_dtype(tv)

        type_alias_decls = [
            "typedef %s %s;" % (dtype_to_ctype(val), name)
            for name, val in six.iteritems(self.type_aliases)
        ]

        return cdl.get_declarations() + "\n" + "\n".join(type_alias_decls)
Esempio n. 17
0
    def get_type(self, address_space_qualifier):
        """Returns the c99 deserializer function declaration, which separates the components of a flat type."""

        fields = []

        # iterate over all subtypes/components
        for field, subtype in self.blob_type.subtypes:
            if field.endswith(Blob.PADDING_FIELD_SUFFIX):
                continue

            # used variable names

            # add sizeof call of component
            if numpy.issctype(subtype):
                fields.append('%s %s* %s;' % (address_space_qualifier, dtype_to_ctype(subtype), field))

            else:
                assert issubclass(subtype, Blob), 'unexpected type %s %s' % (type(subtype), subtype)
                if subtype.is_plain():
                    fields.append('%s* %s;' % (
                        BlobLib.get_interface(subtype).get_spaced_name(address_space_qualifier),
                        field
                    ))

                else:
                    fields.append('%s %s;' % (
                        BlobLib.get_interface(subtype).get_name(address_space_qualifier),
                        field
                    ))

        definition = \
'''
/* complex type %(name)s */

typedef struct _%(name)s
{
    %(fields)s
} %(name)s;
''' % {
    'name': self.get_name(address_space_qualifier, ),
    'fields': '\n\t'.join(fields)
}
        return definition
Esempio n. 18
0
    def add_dtype(self, dtype):
        dtype = np.dtype(dtype)

        if dtype in [np.float64 or np.complex128]:
            self.saw_double = True

        if dtype.kind == "c":
            self.saw_complex = True

        if dtype.kind != "V":
            return

        if dtype in self.declared_dtypes:
            return

        for name, (field_dtype, offset) in dtype.fields.iteritems():
            self.add_dtype(field_dtype)

        _, cdecl = match_dtype_to_c_struct(self.device, dtype_to_ctype(dtype), dtype)

        self.declarations.append(cdecl)
        self.declared_dtypes.add(dtype)
Esempio n. 19
0
    def add_dtype(self, dtype):
        dtype = np.dtype(dtype)

        if dtype in [np.float64 or np.complex128]:
            self.saw_double = True

        if dtype.kind == "c":
            self.saw_complex = True

        if dtype.kind != "V":
            return

        if dtype in self.declared_dtypes:
            return

        for name, (field_dtype, offset) in dtype.fields.iteritems():
            self.add_dtype(field_dtype)

        _, cdecl = match_dtype_to_c_struct(self.device, dtype_to_ctype(dtype), dtype)

        self.declarations.append(cdecl)
        self.declared_dtypes.add(dtype)
Esempio n. 20
0
def dtype_to_c_struct(device, dtype):
    if dtype.fields is None:
        return ""

    import pyopencl.cltypes
    if dtype in pyopencl.cltypes.vec_type_to_scalar_and_count:
        # Vector types are built-in. Don't try to redeclare those.
        return ""

    matched_dtype, c_decl = match_dtype_to_c_struct(
            device, dtype_to_ctype(dtype), dtype)

    def dtypes_match():
        result = len(dtype.fields) == len(matched_dtype.fields)

        for name, val in six.iteritems(dtype.fields):
            result = result and matched_dtype.fields[name] == val

        return result

    assert dtypes_match()

    return c_decl
Esempio n. 21
0
def dtype_to_c_struct(device, dtype):
    if dtype.fields is None:
        return ""

    from pyopencl.array import vec
    if dtype in vec.type_to_scalar_and_count:
        # Vector types are built-in. Don't try to redeclare those.
        return ""

    matched_dtype, c_decl = match_dtype_to_c_struct(
            device, dtype_to_ctype(dtype), dtype)

    def dtypes_match():
        result = len(dtype.fields) == len(matched_dtype.fields)

        for name, val in six.iteritems(dtype.fields):
            result = result and matched_dtype.fields[name] == val

        return result

    assert dtypes_match()

    return c_decl
Esempio n. 22
0
    def get_type(self, address_space_qualifier):
        """Returns the c99 struct declaration."""

        field_definitions = []
        for field, subdtype in self.blob_type.dtype_static_components:
            field_definitions.append('\t%s %s* %s;' % (address_space_qualifier, dtype_to_ctype(subdtype), field))

        child_name = BlobLib.get_interface(self.blob_type.child_type).get_spaced_name(address_space_qualifier)

        return \
'''
/* array type %(name)s */

typedef struct __attribute__((__packed__)) _%(name)s
{
%(static_fields)s
    %(address_space_qualifier)s char* %(first_item_field)s;
} %(name)s;''' % {
    'static_fields': '\n'.join(field_definitions),
    'first_item_field': self.FIRST_ITEM_FIELD,
    'child_name': child_name,
    'address_space_qualifier': address_space_qualifier,
    'name': self.get_name(address_space_qualifier)
}
Esempio n. 23
0
 def declarator(self):
     return "{} {}".format(dtype_to_ctype(self.dtype), self.name)
Esempio n. 24
0
def match_dtype_to_c_struct(device, name, dtype, context=None):
    """Return a tuple `(dtype, c_decl)` such that the C struct declaration
    in `c_decl` and the structure :class:`numpy.dtype` instance `dtype`
    have the same memory layout.

    Note that *dtype* may be modified from the value that was passed in,
    for example to insert padding.

    (As a remark on implementation, this routine runs a small kernel on
    the given *device* to ensure that :mod:`numpy` and C offsets and
    sizes match.)

    .. versionadded: 2013.1

    This example explains the use of this function::

        >>> import numpy as np
        >>> import pyopencl as cl
        >>> import pyopencl.tools
        >>> ctx = cl.create_some_context()
        >>> dtype = np.dtype([("id", np.uint32), ("value", np.float32)])
        >>> dtype, c_decl = pyopencl.tools.match_dtype_to_c_struct(
        ...     ctx.devices[0], 'id_val', dtype)
        >>> print c_decl
        typedef struct {
          unsigned id;
          float value;
        } id_val;
        >>> print dtype
        [('id', '<u4'), ('value', '<f4')]
        >>> cl.tools.get_or_register_dtype('id_val', dtype)

    As this example shows, it is important to call
    :func:`get_or_register_dtype` on the modified `dtype` returned by this
    function, not the original one.
    """

    fields = sorted(six.iteritems(dtype.fields),
                    key=lambda name_dtype_offset: name_dtype_offset[1][1])

    c_fields = []
    for field_name, dtype_and_offset in fields:
        field_dtype, offset = dtype_and_offset[:2]
        c_fields.append("  %s %s;" % (dtype_to_ctype(field_dtype), field_name))

    c_decl = "typedef struct {\n%s\n} %s;\n\n" % ("\n".join(c_fields), name)

    cdl = _CDeclList(device)
    for field_name, dtype_and_offset in fields:
        field_dtype, offset = dtype_and_offset[:2]
        cdl.add_dtype(field_dtype)

    pre_decls = cdl.get_declarations()

    offset_code = "\n".join("result[%d] = pycl_offsetof(%s, %s);" %
                            (i + 1, name, field_name)
                            for i, (field_name, _) in enumerate(fields))

    src = r"""
        #define pycl_offsetof(st, m) \
                 ((size_t) ((__local char *) &(dummy.m) \
                 - (__local char *)&dummy ))

        %(pre_decls)s

        %(my_decl)s

        __kernel void get_size_and_offsets(__global size_t *result)
        {
            result[0] = sizeof(%(my_type)s);
            __local %(my_type)s dummy;
            %(offset_code)s
        }
    """ % dict(pre_decls=pre_decls,
               my_decl=c_decl,
               my_type=name,
               offset_code=offset_code)

    if context is None:
        context = cl.Context([device])

    queue = cl.CommandQueue(context)

    prg = cl.Program(context, src)
    knl = prg.build(devices=[device]).get_size_and_offsets

    import pyopencl.array  # noqa
    result_buf = cl.array.empty(queue, 1 + len(fields), np.uintp)
    knl(queue, (1, ), (1, ), result_buf.data)
    queue.finish()
    size_and_offsets = result_buf.get()

    size = int(size_and_offsets[0])

    from pytools import any
    offsets = size_and_offsets[1:]
    if any(ofs >= size for ofs in offsets):
        # offsets not plausible

        if dtype.itemsize == size:
            # If sizes match, use numpy's idea of the offsets.
            offsets = [
                dtype_and_offset[1] for field_name, dtype_and_offset in fields
            ]
        else:
            raise RuntimeError(
                "OpenCL compiler reported offsetof() past sizeof() "
                "for struct layout on '%s'. "
                "This makes no sense, and it's usually indicates a "
                "compiler bug. "
                "Refusing to discover struct layout." % device)

    result_buf.data.release()
    del knl
    del prg
    del queue
    del context

    try:
        dtype_arg_dict = {
            'names':
            [field_name for field_name, (field_dtype, offset) in fields],
            'formats':
            [field_dtype for field_name, (field_dtype, offset) in fields],
            'offsets': [int(x) for x in offsets],
            'itemsize':
            int(size_and_offsets[0]),
        }
        dtype = np.dtype(dtype_arg_dict)
        if dtype.itemsize != size_and_offsets[0]:
            # "Old" versions of numpy (1.6.x?) silently ignore "itemsize". Boo.
            dtype_arg_dict["names"].append("_pycl_size_fixer")
            dtype_arg_dict["formats"].append(np.uint8)
            dtype_arg_dict["offsets"].append(int(size_and_offsets[0]) - 1)
            dtype = np.dtype(dtype_arg_dict)
    except NotImplementedError:

        def calc_field_type():
            total_size = 0
            padding_count = 0
            for offset, (field_name, (field_dtype, _)) in zip(offsets, fields):
                if offset > total_size:
                    padding_count += 1
                    yield ('__pycl_padding%d' % padding_count,
                           'V%d' % offset - total_size)
                yield field_name, field_dtype
                total_size = field_dtype.itemsize + offset

        dtype = np.dtype(list(calc_field_type()))

    assert dtype.itemsize == size_and_offsets[0]

    return dtype, c_decl
Esempio n. 25
0
    def get_deserialize(self, address_space_qualifier):
        """Returns the c99 deserializer function declaration, which separates the components of a flat type."""

        arguments = ['%s char* blob' % address_space_qualifier]
        declarations = []
        lines = []
        previous_field_offset, previous_field_space = 0, 0

        last_field = self.blob_type.subtypes[-1][0]

        # iterate over all subtypes/components
        for field, subtype in self.blob_type.subtypes:
            if field.endswith(Blob.PADDING_FIELD_SUFFIX):
                continue

            is_last_field = field == last_field

            # format
            lines.append('')
            lines.append('/* cast of %s */' % field)

            # used variable names
            field_variable = 'self->%s' % field
            field_offset = '%s_offset' % field
            field_reference = 'blob + %s' % field_offset
            if not is_last_field:
                field_space = '%s_space' % field

            declarations.append('unsigned long %s;' % field_offset)
            if not is_last_field:
                declarations.append('unsigned long %s;' % field_space)

            # add sizeof call of component
            if numpy.issctype(subtype):
                cname = "%s %s" % (address_space_qualifier, dtype_to_ctype(subtype))
                sizeof_call = 'sizeof(%s)' % cname

            else:
                assert issubclass(subtype, Blob), 'unexpected type %s %s' % (type(subtype), subtype)
                cname = "%s %s" % (
                    address_space_qualifier,
                    BlobLib.get_interface(subtype).get_name(address_space_qualifier)
                )
                sizeof_call = '%s((%s char*)%s)' % (
                    BlobLib.get_interface(subtype).get_sizeof_name(address_space_qualifier),
                    address_space_qualifier,
                    field_reference
                )

            # determine offset of component
            lines.append('%s = %s + %s;' % (field_offset, previous_field_offset, previous_field_space))

            # set and cast component reference
            if not numpy.issctype(subtype) and not subtype.is_plain():
                lines.append('%s(%s, &%s);' % (
                    BlobLib.get_interface(subtype).get_deserialize_name(address_space_qualifier),
                    field_reference,
                    field_variable
                ))
            else:
                lines.append('%s = (%s*)(%s);' % (field_variable, cname, field_reference))

            if not is_last_field:
                # determine size of component
                lines.append('%s = %s;' % (field_space, sizeof_call))

            previous_field_space = field_space
            previous_field_offset = field_offset

        lines = ['\t' + line for line in lines]

        arguments.append('%s* %s' % (self.get_name(address_space_qualifier, ), 'self'))

        definition = 'void %s(%s)' % (self.get_deserialize_name(address_space_qualifier), ', '.join(arguments))

        # fill function template
        lines.insert(0, definition)
        lines.insert(1, '{')
        for index, line in enumerate(declarations):
            lines.insert(2 + index, '\t' + line)
        lines.append('}')
        declaration = '\n'.join(lines)

        return definition.strip() + ';', declaration
Esempio n. 26
0
 def declarator(self):
     return "%s %s" % (dtype_to_ctype(self.dtype), self.name)
Esempio n. 27
0
def match_dtype_to_c_struct(device, name, dtype, context=None):
    """Return a tuple `(dtype, c_decl)` such that the C struct declaration
    in `c_decl` and the structure :class:`numpy.dtype` instance `dtype`
    have the same memory layout.

    Note that *dtype* may be modified from the value that was passed in,
    for example to insert padding.

    (As a remark on implementation, this routine runs a small kernel on
    the given *device* to ensure that :mod:`numpy` and C offsets and
    sizes match.)

    .. versionadded: 2013.1

    This example explains the use of this function::

        >>> import numpy as np
        >>> import pyopencl as cl
        >>> import pyopencl.tools
        >>> ctx = cl.create_some_context()
        >>> dtype = np.dtype([("id", np.uint32), ("value", np.float32)])
        >>> dtype, c_decl = pyopencl.tools.match_dtype_to_c_struct(
        ...     ctx.devices[0], 'id_val', dtype)
        >>> print c_decl
        typedef struct {
          unsigned id;
          float value;
        } id_val;
        >>> print dtype
        [('id', '<u4'), ('value', '<f4')]
        >>> cl.tools.get_or_register_dtype('id_val', dtype)

    As this example shows, it is important to call
    :func:`get_or_register_dtype` on the modified `dtype` returned by this
    function, not the original one.
    """

    fields = sorted(dtype.fields.iteritems(),
            key=lambda (name, (dtype, offset)): offset)

    c_fields = []
    for field_name, (field_dtype, offset) in fields:
        c_fields.append("  %s %s;" % (dtype_to_ctype(field_dtype), field_name))

    c_decl = "typedef struct {\n%s\n} %s;\n\n" % (
            "\n".join(c_fields),
            name)

    cdl = _CDeclList(device)
    for field_name, (field_dtype, offset) in fields:
        cdl.add_dtype(field_dtype)

    pre_decls = cdl.get_declarations()

    offset_code = "\n".join(
            "result[%d] = pycl_offsetof(%s, %s);" % (i+1, name, field_name)
            for i, (field_name, (field_dtype, offset)) in enumerate(fields))

    src = r"""
        #define pycl_offsetof(st, m) \
                 ((size_t) ((__local char *) &(dummy.m) \
                 - (__local char *)&dummy ))

        %(pre_decls)s

        %(my_decl)s

        __kernel void get_size_and_offsets(__global size_t *result)
        {
            result[0] = sizeof(%(my_type)s);
            __local %(my_type)s dummy;
            %(offset_code)s
        }
    """ % dict(
            pre_decls=pre_decls,
            my_decl=c_decl,
            my_type=name,
            offset_code=offset_code)

    if context is None:
        context = cl.Context([device])

    queue = cl.CommandQueue(context)

    prg = cl.Program(context, src)
    knl = prg.build(devices=[device]).get_size_and_offsets

    import pyopencl.array  # noqa
    result_buf = cl.array.empty(queue, 1+len(fields), np.uintp)
    knl(queue, (1,), (1,), result_buf.data)
    queue.finish()
    size_and_offsets = result_buf.get()

    size = int(size_and_offsets[0])

    from pytools import any
    offsets = size_and_offsets[1:]
    if any(ofs >= size for ofs in offsets):
        # offsets not plausible

        if dtype.itemsize == size:
            # If sizes match, use numpy's idea of the offsets.
            offsets = [offset
                    for field_name, (field_dtype, offset) in fields]
        else:
            raise RuntimeError(
                    "cannot discover struct layout on '%s'" % device)

    result_buf.data.release()
    del knl
    del prg
    del queue
    del context

    dtype_arg_dict = dict(
            names=[field_name for field_name, (field_dtype, offset) in fields],
            formats=[field_dtype
                for field_name, (field_dtype, offset) in fields],
            offsets=[int(x) for x in offsets],
            itemsize=int(size_and_offsets[0]),
            )
    dtype = np.dtype(dtype_arg_dict)

    if dtype.itemsize != size_and_offsets[0]:
        # "Old" versions of numpy (1.6.x?) silently ignore "itemsize". Boo.
        dtype_arg_dict["names"].append("_pycl_size_fixer")
        dtype_arg_dict["formats"].append(np.uint8)
        dtype_arg_dict["offsets"].append(int(size_and_offsets[0])-1)
        dtype = np.dtype(dtype_arg_dict)

    assert dtype.itemsize == size_and_offsets[0]

    return dtype, c_decl
Esempio n. 28
0
 def declarator(self):
     return "%s %s" % (dtype_to_ctype(self.dtype), self.name)
Esempio n. 29
0
 def dtype_to_ctype(self, dtype):
     from pyopencl.compyte.dtypes import dtype_to_ctype
     return dtype_to_ctype(dtype)
Esempio n. 30
0
 def declarator(self):
     return "__global %s *%s" % (dtype_to_ctype(self.dtype), self.name)
Esempio n. 31
0
 def dtype_to_ctype(self, dtype):
     from pyopencl.compyte.dtypes import dtype_to_ctype
     return dtype_to_ctype(dtype)
Esempio n. 32
0
 def declarator(self):
     return "__global %s *%s" % (dtype_to_ctype(self.dtype), self.name)