Exemple #1
0
    def test_compound(self):
        """ Test all compound datatype operations """
        names = ("A", "B", "Name3", "Name with space",
                 " 12A-d878dd&%2 0-1!** ")
        types = (h5t.STD_I8LE, h5t.IEEE_F32BE, h5t.STD_U16BE, h5t.C_S1.copy(),
                 h5t.FORTRAN_S1.copy())
        types[3].set_size(8)
        types[4].set_size(8)
        classcodes = (h5t.INTEGER, h5t.FLOAT, h5t.INTEGER, h5t.STRING,
                      h5t.STRING)

        # Align all on 128-bit (16-byte) boundaries
        offsets = tuple(x * 16 for x in xrange(len(names)))
        total_len = 16 * len(names)
        htype = h5t.create(h5t.COMPOUND, total_len)

        for idx, name in enumerate(names):
            htype.insert(name, offsets[idx], types[idx])

        for idx, name in enumerate(names):
            self.assertEqual(htype.get_member_name(idx), name)
            self.assertEqual(htype.get_member_class(idx), classcodes[idx])
            self.assertEqual(htype.get_member_index(name), idx)
            self.assertEqual(htype.get_member_offset(idx), offsets[idx])
            self.assert_(htype.get_member_type(idx).equal(types[idx]))

        self.assertEqual(htype.get_size(), total_len)
        htype.pack()
        self.assert_(htype.get_size() < total_len)
        self.assertEqual(htype.get_nmembers(), len(names))
Exemple #2
0
    def test_compound(self):
        """ Test all compound datatype operations """
        names = ("A", "B", "Name3", "Name with space", " 12A-d878dd&%2 0-1!** ")
        types = (h5t.STD_I8LE, h5t.IEEE_F32BE, h5t.STD_U16BE, h5t.C_S1.copy(), h5t.FORTRAN_S1.copy())
        types[3].set_size(8)
        types[4].set_size(8)
        classcodes = (h5t.INTEGER, h5t.FLOAT, h5t.INTEGER, h5t.STRING, h5t.STRING)
        
        # Align all on 128-bit (16-byte) boundaries
        offsets = tuple(x*16 for x in xrange(len(names)))
        total_len = 16*len(names)
        htype = h5t.create(h5t.COMPOUND, total_len)

        for idx, name in enumerate(names):
            htype.insert(name, offsets[idx], types[idx])
        
        for idx, name in enumerate(names):
            self.assertEqual(htype.get_member_name(idx), name)
            self.assertEqual(htype.get_member_class(idx), classcodes[idx])
            self.assertEqual(htype.get_member_index(name), idx)
            self.assertEqual(htype.get_member_offset(idx), offsets[idx])
            self.assert_(htype.get_member_type(idx).equal(types[idx]))

        self.assertEqual(htype.get_size(), total_len)
        htype.pack()
        self.assert_(htype.get_size() < total_len)
        self.assertEqual(htype.get_nmembers(), len(names))
Exemple #3
0
    def test_get_set_size(self):

        sizes = (1, 2, 3, 4, 127, 128, 129, 133, 16385)

        def test(htype, size):
            htype.set_size(size)
            self.assertEqual(htype.get_size(), size)

        htype = h5t.create(h5t.OPAQUE, 4)
        for size in sizes:
            test(htype, size)
Exemple #4
0
    def test_get_set_size(self):

        sizes = (1,2,3,4,127,128,129,133,16385)

        def test(htype, size):
            htype.set_size(size)
            self.assertEqual(htype.get_size(), size)  

        htype = h5t.create(h5t.OPAQUE, 4)
        for size in sizes:
            test(htype, size)
Exemple #5
0
    def test_obj2vlen_complex(self):
        """ Object to vlen (compound) """

        obj_ptr_size = h5t.PYTHON_OBJECT.get_size()
        vlen_ptr_size = vlen_htype.get_size()

        input_htype = h5t.create(h5t.COMPOUND, obj_ptr_size+4)
        input_htype.insert('a', 0, obj_htype)
        input_htype.insert('b', obj_ptr_size, h5t.STD_I32LE)

        output_htype = h5t.create(h5t.COMPOUND, vlen_ptr_size+4)
        output_htype.insert('a', 0, vlen_htype)
        output_htype.insert('b', vlen_ptr_size, h5t.STD_I32LE)

        objarr = np.ndarray((len(strings),), dtype=[('a', vlen_dtype), ('b', '<i4')])
        objarr['a'] = strings

        destbuffer = np.ndarray(objarr.shape, dtype=[('a', np.uintp), ('b', '<i4')], buffer=objarr).copy()

        h5t.convert(input_htype, output_htype, len(strings), destbuffer, destbuffer.copy())

        for idx, val in enumerate(destbuffer):
            self.assertEqual(ctypes.string_at(int(val[0])), strings[idx])
Exemple #6
0
    def test_out_of_order_offsets(self):
        size = 20
        type_dict = {
            'names': ['f1', 'f2', 'f3'],
            'formats': ['<f4', '<i4', '<f8'],
            'offsets': [0, 16, 8]
        }

        expected_dtype = np.dtype(type_dict)

        tid = h5t.create(h5t.COMPOUND, size)
        for name, offset, dt in zip(type_dict["names"], type_dict["offsets"],
                                    type_dict["formats"]):
            tid.insert(
                name.encode("utf8") if isinstance(name, text_type) else name,
                offset, h5t.py_create(dt))

        self.assertEqual(tid.dtype, expected_dtype)
        self.assertEqual(tid.dtype.itemsize, size)
Exemple #7
0
    def test_out_of_order_offsets(self):
        size = 20
        type_dict = {
            'names': ['f1', 'f2', 'f3'],
            'formats': ['<f4', '<i4', '<f8'],
            'offsets': [0, 16, 8]
        }

        expected_dtype = np.dtype(type_dict)

        tid = h5t.create(h5t.COMPOUND, size)
        for name, offset, dt in zip(
                type_dict["names"], type_dict["offsets"], type_dict["formats"]
        ):
            tid.insert(
                name.encode("utf8") if isinstance(name, text_type) else name,
                offset,
                h5t.py_create(dt)
            )

        self.assertEqual(tid.dtype, expected_dtype)
        self.assertEqual(tid.dtype.itemsize, size)
Exemple #8
0
    def __setitem__(self, args, val):
        """ Write to the HDF5 dataset from a Numpy array.

        NumPy's broadcasting rules are honored, for "simple" indexing
        (slices and integers).  For advanced indexing, the shapes must
        match.
        """
        args = args if isinstance(args, tuple) else (args, )

        # Sort field indices from the slicing
        names = tuple(x for x in args if isinstance(x, str))
        args = tuple(x for x in args if not isinstance(x, str))

        # Generally we try to avoid converting the arrays on the Python
        # side.  However, for compound literals this is unavoidable.
        vlen = h5t.check_dtype(vlen=self.dtype)
        if vlen not in (bytes, unicode, None):
            try:
                val = numpy.asarray(val, dtype=vlen)
            except ValueError:
                try:
                    val = numpy.array(
                        [numpy.array(x, dtype=vlen) for x in val],
                        dtype=self.dtype)
                except ValueError:
                    pass
            if vlen == val.dtype:
                if val.ndim > 1:
                    tmp = numpy.empty(shape=val.shape[:-1], dtype=object)
                    tmp.ravel()[:] = [
                        i for i in val.reshape((numpy.product(val.shape[:-1]),
                                                val.shape[-1]))
                    ]
                else:
                    tmp = numpy.array([None], dtype=object)
                    tmp[0] = val
                val = tmp
        elif self.dtype.kind == "O" or \
          (self.dtype.kind == 'V' and \
          (not isinstance(val, numpy.ndarray) or val.dtype.kind != 'V') and \
          (self.dtype.subdtype == None)):
            if len(names) == 1 and self.dtype.fields is not None:
                # Single field selected for write, from a non-array source
                if not names[0] in self.dtype.fields:
                    raise ValueError("No such field for indexing: %s" %
                                     names[0])
                dtype = self.dtype.fields[names[0]][0]
                cast_compound = True
            else:
                dtype = self.dtype
                cast_compound = False

            val = numpy.asarray(val, dtype=dtype, order='C')
            if cast_compound:
                val = val.astype(numpy.dtype([(names[0], dtype)]))
        else:
            val = numpy.asarray(val, order='C')

        # Check for array dtype compatibility and convert
        if self.dtype.subdtype is not None:
            shp = self.dtype.subdtype[1]
            valshp = val.shape[-len(shp):]
            if valshp != shp:  # Last dimension has to match
                raise TypeError(
                    "When writing to array types, last N dimensions have to match (got %s, but should be %s)"
                    % (
                        valshp,
                        shp,
                    ))
            mtype = h5t.py_create(numpy.dtype((val.dtype, shp)))
            mshape = val.shape[0:len(val.shape) - len(shp)]

        # Make a compound memory type if field-name slicing is required
        elif len(names) != 0:

            mshape = val.shape

            # Catch common errors
            if self.dtype.fields is None:
                raise TypeError(
                    "Illegal slicing argument (not a compound dataset)")
            mismatch = [x for x in names if x not in self.dtype.fields]
            if len(mismatch) != 0:
                mismatch = ", ".join('"%s"' % x for x in mismatch)
                raise ValueError(
                    "Illegal slicing argument (fields %s not in dataset type)"
                    % mismatch)

            # Write non-compound source into a single dataset field
            if len(names) == 1 and val.dtype.fields is None:
                subtype = h5y.py_create(val.dtype)
                mtype = h5t.create(h5t.COMPOUND, subtype.get_size())
                mtype.insert(self._e(names[0]), 0, subtype)

            # Make a new source type keeping only the requested fields
            else:
                fieldnames = [x for x in val.dtype.names
                              if x in names]  # Keep source order
                mtype = h5t.create(h5t.COMPOUND, val.dtype.itemsize)
                for fieldname in fieldnames:
                    subtype = h5t.py_create(val.dtype.fields[fieldname][0])
                    offset = val.dtype.fields[fieldname][1]
                    mtype.insert(self._e(fieldname), offset, subtype)

        # Use mtype derived from array (let DatasetID.write figure it out)
        else:
            mshape = val.shape
            mtype = None

        # Perform the dataspace selection
        selection = sel.select(self.shape, args, dsid=self.id)

        if selection.nselect == 0:
            return

        # Broadcast scalars if necessary.
        if (mshape == () and selection.mshape != ()):
            if self.dtype.subdtype is not None:
                raise TypeError(
                    "Scalar broadcasting is not supported for array dtypes")
            val2 = numpy.empty(selection.mshape[-1], dtype=val.dtype)
            val2[...] = val
            val = val2
            mshape = val.shape

        # Perform the write, with broadcasting
        # Be careful to pad memory shape with ones to avoid HDF5 chunking
        # glitch, which kicks in for mismatched memory/file selections
        if (len(mshape) < len(self.shape)):
            mshape_pad = (1, ) * (len(self.shape) - len(mshape)) + mshape
        else:
            mshape_pad = mshape
        mspace = h5s.create_simple(mshape_pad,
                                   (h5s.UNLIMITED, ) * len(mshape_pad))
        for fspace in selection.broadcast(mshape):
            self.id.write(mspace, fspace, val, mtype)
Exemple #9
0
    def __setitem__(self, args, val):
        """ Write to the HDF5 dataset from a Numpy array.

        NumPy's broadcasting rules are honored, for "simple" indexing
        (slices and integers).  For advanced indexing, the shapes must
        match.
        """
        args = args if isinstance(args, tuple) else (args,)

        # Sort field indices from the slicing
        names = tuple(x for x in args if isinstance(x, str))
        args = tuple(x for x in args if not isinstance(x, str))

        # Generally we try to avoid converting the arrays on the Python
        # side.  However, for compound literals this is unavoidable.
        if self.dtype.kind == "O" or \
          (self.dtype.kind == 'V' and \
          (not isinstance(val, numpy.ndarray) or val.dtype.kind != 'V') and \
          (self.dtype.subdtype == None)):
            if len(names) == 1 and self.dtype.fields is not None:
                # Single field selected for write, from a non-array source
                if not names[0] in self.dtype.fields:
                    raise ValueError("No such field for indexing: %s" % names[0])
                dtype = self.dtype.fields[names[0]][0]
                cast_compound = True
            else:
                dtype = self.dtype
                cast_compound = False

            val = numpy.asarray(val, dtype=dtype, order='C')
            if cast_compound:
                val = val.astype(numpy.dtype([(names[0], dtype)]))
        else:
            val = numpy.asarray(val, order='C')

        # Check for array dtype compatibility and convert
        if self.dtype.subdtype is not None:
            shp = self.dtype.subdtype[1]
            valshp = val.shape[-len(shp):]
            if valshp != shp:  # Last dimension has to match
                raise TypeError("When writing to array types, last N dimensions have to match (got %s, but should be %s)" % (valshp, shp,))
            mtype = h5t.py_create(numpy.dtype((val.dtype, shp)))
            mshape = val.shape[0:len(val.shape)-len(shp)]

        # Make a compound memory type if field-name slicing is required
        elif len(names) != 0:

            mshape = val.shape

            # Catch common errors
            if self.dtype.fields is None:
                raise TypeError("Illegal slicing argument (not a compound dataset)")
            mismatch = [x for x in names if x not in self.dtype.fields]
            if len(mismatch) != 0:
                mismatch = ", ".join('"%s"'%x for x in mismatch)
                raise ValueError("Illegal slicing argument (fields %s not in dataset type)" % mismatch)
        
            # Write non-compound source into a single dataset field
            if len(names) == 1 and val.dtype.fields is None:
                subtype = h5y.py_create(val.dtype)
                mtype = h5t.create(h5t.COMPOUND, subtype.get_size())
                mtype.insert(self._e(names[0]), 0, subtype)

            # Make a new source type keeping only the requested fields
            else:
                fieldnames = [x for x in val.dtype.names if x in names] # Keep source order
                mtype = h5t.create(h5t.COMPOUND, val.dtype.itemsize)
                for fieldname in fieldnames:
                    subtype = h5t.py_create(val.dtype.fields[fieldname][0])
                    offset = val.dtype.fields[fieldname][1]
                    mtype.insert(self._e(fieldname), offset, subtype)

        # Use mtype derived from array (let DatasetID.write figure it out)
        else:
            mshape = val.shape
            mtype = None

        # Perform the dataspace selection
        selection = sel.select(self.shape, args, dsid=self.id)

        if selection.nselect == 0:
            return

        # Broadcast scalars if necessary.
        if (mshape == () and selection.mshape != ()):
            if self.dtype.subdtype is not None:
                raise TypeError("Scalar broadcasting is not supported for array dtypes")
            val2 = numpy.empty(selection.mshape[-1], dtype=val.dtype)
            val2[...] = val
            val = val2
            mshape = val.shape

        # Perform the write, with broadcasting
        # Be careful to pad memory shape with ones to avoid HDF5 chunking
        # glitch, which kicks in for mismatched memory/file selections
        if(len(mshape) < len(self.shape)):
            mshape_pad = (1,)*(len(self.shape)-len(mshape)) + mshape
        else:
            mshape_pad = mshape
        mspace = h5s.create_simple(mshape_pad, (h5s.UNLIMITED,)*len(mshape_pad))
        for fspace in selection.broadcast(mshape):
            self.id.write(mspace, fspace, val, mtype)
Exemple #10
0
    def __setitem__(self, args, val):
        """ Write to the HDF5 dataset from a Numpy array.

        NumPy's broadcasting rules are honored, for "simple" indexing
        (slices and integers).  For advanced indexing, the shapes must
        match.
        """
        self.parent._check_committed()
        # This boilerplate code is based on h5py.Dataset.__setitem__
        args = args if isinstance(args, tuple) else (args, )

        # Sort field indices from the slicing
        names = tuple(x for x in args if isinstance(x, str))
        args = tuple(x for x in args if not isinstance(x, str))

        # Generally we try to avoid converting the arrays on the Python
        # side.  However, for compound literals this is unavoidable.
        vlen = h5t.check_vlen_dtype(self.dtype)
        if vlen is not None and vlen not in (bytes, str):
            try:
                val = np.asarray(val, dtype=vlen)
            except ValueError:
                try:
                    val = np.array([np.array(x, dtype=vlen) for x in val],
                                   dtype=self.dtype)
                except ValueError:
                    pass
            if vlen == val.dtype:
                if val.ndim > 1:
                    tmp = np.empty(shape=val.shape[:-1], dtype=object)
                    tmp.ravel()[:] = [
                        i for i in val.reshape((
                            np.product(val.shape[:-1], dtype=np.ulonglong),
                            val.shape[-1]))
                    ]
                else:
                    tmp = np.array([None], dtype=object)
                    tmp[0] = val
                val = tmp
        elif self.dtype.kind == "O" or \
          (self.dtype.kind == 'V' and \
          (not isinstance(val, np.ndarray) or val.dtype.kind != 'V') and \
          (self.dtype.subdtype == None)):
            if len(names) == 1 and self.dtype.fields is not None:
                # Single field selected for write, from a non-array source
                if not names[0] in self.dtype.fields:
                    raise ValueError("No such field for indexing: %s" %
                                     names[0])
                dtype = self.dtype.fields[names[0]][0]
                cast_compound = True
            else:
                dtype = self.dtype
                cast_compound = False

            val = np.asarray(val, dtype=dtype.base, order='C')
            if cast_compound:
                val = val.view(np.dtype([(names[0], dtype)]))
                val = val.reshape(val.shape[:len(val.shape) -
                                            len(dtype.shape)])
        else:
            val = np.asarray(val, order='C')

        # Check for array dtype compatibility and convert
        if self.dtype.subdtype is not None:
            shp = self.dtype.subdtype[1]
            valshp = val.shape[-len(shp):]
            if valshp != shp:  # Last dimension has to match
                raise TypeError(
                    "When writing to array types, last N dimensions have to match (got %s, but should be %s)"
                    % (
                        valshp,
                        shp,
                    ))
            mtype = h5t.py_create(np.dtype((val.dtype, shp)))
            # mshape = val.shape[0:len(val.shape)-len(shp)]

        # Make a compound memory type if field-name slicing is required
        elif len(names) != 0:

            # mshape = val.shape

            # Catch common errors
            if self.dtype.fields is None:
                raise TypeError(
                    "Illegal slicing argument (not a compound dataset)")
            mismatch = [x for x in names if x not in self.dtype.fields]
            if len(mismatch) != 0:
                mismatch = ", ".join('"%s"' % x for x in mismatch)
                raise ValueError(
                    "Illegal slicing argument (fields %s not in dataset type)"
                    % mismatch)

            # Write non-compound source into a single dataset field
            if len(names) == 1 and val.dtype.fields is None:
                subtype = h5t.py_create(val.dtype)
                mtype = h5t.create(h5t.COMPOUND, subtype.get_size())
                mtype.insert(self._e(names[0]), 0, subtype)

            # Make a new source type keeping only the requested fields
            else:
                fieldnames = [x for x in val.dtype.names
                              if x in names]  # Keep source order
                mtype = h5t.create(h5t.COMPOUND, val.dtype.itemsize)
                for fieldname in fieldnames:
                    subtype = h5t.py_create(val.dtype.fields[fieldname][0])
                    offset = val.dtype.fields[fieldname][1]
                    mtype.insert(self._e(fieldname), offset, subtype)

        # Use mtype derived from array (let DatasetID.write figure it out)
        else:
            mtype = None

        # === END CODE FROM h5py.Dataset.__setitem__ ===

        idx = ndindex(args).reduce(self.shape)

        val = np.broadcast_to(val, idx.newshape(self.shape))

        for c, index in as_subchunks(idx, self.shape, self.chunks):
            if isinstance(self.id.data_dict[c], (slice, Slice, tuple, Tuple)):
                raw_idx = Tuple(self.id.data_dict[c],
                                *[slice(0, len(i)) for i in c.args[1:]]).raw
                a = self.id._read_chunk(raw_idx)
                self.id.data_dict[c] = a

            if self.id.data_dict[c].size != 0:
                val_idx = c.as_subindex(idx)
                self.id.data_dict[c][index.raw] = val[val_idx.raw]
Exemple #11
0
 def test_setget_tag(self):
     htype = h5t.create(h5t.OPAQUE, 40)
     htype.set_tag("FOOBAR")
     self.assertEqual(htype.get_tag(), "FOOBAR")
Exemple #12
0
 def test_setget_tag(self):
     htype = h5t.create(h5t.OPAQUE, 40)
     htype.set_tag("FOOBAR")
     self.assertEqual(htype.get_tag(), "FOOBAR")
Exemple #13
0
 def __init__(self, size):
     self.type_id = h5t.create(h5t.COMPOUND, size)
     self.offset = 0