def test_compound(self): """ Test all compound datatype operations """ names = ("A", "B", "Name3", "Name with space", " 12A-d878dd&%2 0-1!** ") types = (h5t.STD_I8LE, h5t.IEEE_F32BE, h5t.STD_U16BE, h5t.C_S1.copy(), h5t.FORTRAN_S1.copy()) types[3].set_size(8) types[4].set_size(8) classcodes = (h5t.INTEGER, h5t.FLOAT, h5t.INTEGER, h5t.STRING, h5t.STRING) # Align all on 128-bit (16-byte) boundaries offsets = tuple(x * 16 for x in xrange(len(names))) total_len = 16 * len(names) htype = h5t.create(h5t.COMPOUND, total_len) for idx, name in enumerate(names): htype.insert(name, offsets[idx], types[idx]) for idx, name in enumerate(names): self.assertEqual(htype.get_member_name(idx), name) self.assertEqual(htype.get_member_class(idx), classcodes[idx]) self.assertEqual(htype.get_member_index(name), idx) self.assertEqual(htype.get_member_offset(idx), offsets[idx]) self.assert_(htype.get_member_type(idx).equal(types[idx])) self.assertEqual(htype.get_size(), total_len) htype.pack() self.assert_(htype.get_size() < total_len) self.assertEqual(htype.get_nmembers(), len(names))
def test_compound(self): """ Test all compound datatype operations """ names = ("A", "B", "Name3", "Name with space", " 12A-d878dd&%2 0-1!** ") types = (h5t.STD_I8LE, h5t.IEEE_F32BE, h5t.STD_U16BE, h5t.C_S1.copy(), h5t.FORTRAN_S1.copy()) types[3].set_size(8) types[4].set_size(8) classcodes = (h5t.INTEGER, h5t.FLOAT, h5t.INTEGER, h5t.STRING, h5t.STRING) # Align all on 128-bit (16-byte) boundaries offsets = tuple(x*16 for x in xrange(len(names))) total_len = 16*len(names) htype = h5t.create(h5t.COMPOUND, total_len) for idx, name in enumerate(names): htype.insert(name, offsets[idx], types[idx]) for idx, name in enumerate(names): self.assertEqual(htype.get_member_name(idx), name) self.assertEqual(htype.get_member_class(idx), classcodes[idx]) self.assertEqual(htype.get_member_index(name), idx) self.assertEqual(htype.get_member_offset(idx), offsets[idx]) self.assert_(htype.get_member_type(idx).equal(types[idx])) self.assertEqual(htype.get_size(), total_len) htype.pack() self.assert_(htype.get_size() < total_len) self.assertEqual(htype.get_nmembers(), len(names))
def test_get_set_size(self): sizes = (1, 2, 3, 4, 127, 128, 129, 133, 16385) def test(htype, size): htype.set_size(size) self.assertEqual(htype.get_size(), size) htype = h5t.create(h5t.OPAQUE, 4) for size in sizes: test(htype, size)
def test_get_set_size(self): sizes = (1,2,3,4,127,128,129,133,16385) def test(htype, size): htype.set_size(size) self.assertEqual(htype.get_size(), size) htype = h5t.create(h5t.OPAQUE, 4) for size in sizes: test(htype, size)
def test_obj2vlen_complex(self): """ Object to vlen (compound) """ obj_ptr_size = h5t.PYTHON_OBJECT.get_size() vlen_ptr_size = vlen_htype.get_size() input_htype = h5t.create(h5t.COMPOUND, obj_ptr_size+4) input_htype.insert('a', 0, obj_htype) input_htype.insert('b', obj_ptr_size, h5t.STD_I32LE) output_htype = h5t.create(h5t.COMPOUND, vlen_ptr_size+4) output_htype.insert('a', 0, vlen_htype) output_htype.insert('b', vlen_ptr_size, h5t.STD_I32LE) objarr = np.ndarray((len(strings),), dtype=[('a', vlen_dtype), ('b', '<i4')]) objarr['a'] = strings destbuffer = np.ndarray(objarr.shape, dtype=[('a', np.uintp), ('b', '<i4')], buffer=objarr).copy() h5t.convert(input_htype, output_htype, len(strings), destbuffer, destbuffer.copy()) for idx, val in enumerate(destbuffer): self.assertEqual(ctypes.string_at(int(val[0])), strings[idx])
def test_out_of_order_offsets(self): size = 20 type_dict = { 'names': ['f1', 'f2', 'f3'], 'formats': ['<f4', '<i4', '<f8'], 'offsets': [0, 16, 8] } expected_dtype = np.dtype(type_dict) tid = h5t.create(h5t.COMPOUND, size) for name, offset, dt in zip(type_dict["names"], type_dict["offsets"], type_dict["formats"]): tid.insert( name.encode("utf8") if isinstance(name, text_type) else name, offset, h5t.py_create(dt)) self.assertEqual(tid.dtype, expected_dtype) self.assertEqual(tid.dtype.itemsize, size)
def test_out_of_order_offsets(self): size = 20 type_dict = { 'names': ['f1', 'f2', 'f3'], 'formats': ['<f4', '<i4', '<f8'], 'offsets': [0, 16, 8] } expected_dtype = np.dtype(type_dict) tid = h5t.create(h5t.COMPOUND, size) for name, offset, dt in zip( type_dict["names"], type_dict["offsets"], type_dict["formats"] ): tid.insert( name.encode("utf8") if isinstance(name, text_type) else name, offset, h5t.py_create(dt) ) self.assertEqual(tid.dtype, expected_dtype) self.assertEqual(tid.dtype.itemsize, size)
def __setitem__(self, args, val): """ Write to the HDF5 dataset from a Numpy array. NumPy's broadcasting rules are honored, for "simple" indexing (slices and integers). For advanced indexing, the shapes must match. """ args = args if isinstance(args, tuple) else (args, ) # Sort field indices from the slicing names = tuple(x for x in args if isinstance(x, str)) args = tuple(x for x in args if not isinstance(x, str)) # Generally we try to avoid converting the arrays on the Python # side. However, for compound literals this is unavoidable. vlen = h5t.check_dtype(vlen=self.dtype) if vlen not in (bytes, unicode, None): try: val = numpy.asarray(val, dtype=vlen) except ValueError: try: val = numpy.array( [numpy.array(x, dtype=vlen) for x in val], dtype=self.dtype) except ValueError: pass if vlen == val.dtype: if val.ndim > 1: tmp = numpy.empty(shape=val.shape[:-1], dtype=object) tmp.ravel()[:] = [ i for i in val.reshape((numpy.product(val.shape[:-1]), val.shape[-1])) ] else: tmp = numpy.array([None], dtype=object) tmp[0] = val val = tmp elif self.dtype.kind == "O" or \ (self.dtype.kind == 'V' and \ (not isinstance(val, numpy.ndarray) or val.dtype.kind != 'V') and \ (self.dtype.subdtype == None)): if len(names) == 1 and self.dtype.fields is not None: # Single field selected for write, from a non-array source if not names[0] in self.dtype.fields: raise ValueError("No such field for indexing: %s" % names[0]) dtype = self.dtype.fields[names[0]][0] cast_compound = True else: dtype = self.dtype cast_compound = False val = numpy.asarray(val, dtype=dtype, order='C') if cast_compound: val = val.astype(numpy.dtype([(names[0], dtype)])) else: val = numpy.asarray(val, order='C') # Check for array dtype compatibility and convert if self.dtype.subdtype is not None: shp = self.dtype.subdtype[1] valshp = val.shape[-len(shp):] if valshp != shp: # Last dimension has to match raise TypeError( "When writing to array types, last N dimensions have to match (got %s, but should be %s)" % ( valshp, shp, )) mtype = h5t.py_create(numpy.dtype((val.dtype, shp))) mshape = val.shape[0:len(val.shape) - len(shp)] # Make a compound memory type if field-name slicing is required elif len(names) != 0: mshape = val.shape # Catch common errors if self.dtype.fields is None: raise TypeError( "Illegal slicing argument (not a compound dataset)") mismatch = [x for x in names if x not in self.dtype.fields] if len(mismatch) != 0: mismatch = ", ".join('"%s"' % x for x in mismatch) raise ValueError( "Illegal slicing argument (fields %s not in dataset type)" % mismatch) # Write non-compound source into a single dataset field if len(names) == 1 and val.dtype.fields is None: subtype = h5y.py_create(val.dtype) mtype = h5t.create(h5t.COMPOUND, subtype.get_size()) mtype.insert(self._e(names[0]), 0, subtype) # Make a new source type keeping only the requested fields else: fieldnames = [x for x in val.dtype.names if x in names] # Keep source order mtype = h5t.create(h5t.COMPOUND, val.dtype.itemsize) for fieldname in fieldnames: subtype = h5t.py_create(val.dtype.fields[fieldname][0]) offset = val.dtype.fields[fieldname][1] mtype.insert(self._e(fieldname), offset, subtype) # Use mtype derived from array (let DatasetID.write figure it out) else: mshape = val.shape mtype = None # Perform the dataspace selection selection = sel.select(self.shape, args, dsid=self.id) if selection.nselect == 0: return # Broadcast scalars if necessary. if (mshape == () and selection.mshape != ()): if self.dtype.subdtype is not None: raise TypeError( "Scalar broadcasting is not supported for array dtypes") val2 = numpy.empty(selection.mshape[-1], dtype=val.dtype) val2[...] = val val = val2 mshape = val.shape # Perform the write, with broadcasting # Be careful to pad memory shape with ones to avoid HDF5 chunking # glitch, which kicks in for mismatched memory/file selections if (len(mshape) < len(self.shape)): mshape_pad = (1, ) * (len(self.shape) - len(mshape)) + mshape else: mshape_pad = mshape mspace = h5s.create_simple(mshape_pad, (h5s.UNLIMITED, ) * len(mshape_pad)) for fspace in selection.broadcast(mshape): self.id.write(mspace, fspace, val, mtype)
def __setitem__(self, args, val): """ Write to the HDF5 dataset from a Numpy array. NumPy's broadcasting rules are honored, for "simple" indexing (slices and integers). For advanced indexing, the shapes must match. """ args = args if isinstance(args, tuple) else (args,) # Sort field indices from the slicing names = tuple(x for x in args if isinstance(x, str)) args = tuple(x for x in args if not isinstance(x, str)) # Generally we try to avoid converting the arrays on the Python # side. However, for compound literals this is unavoidable. if self.dtype.kind == "O" or \ (self.dtype.kind == 'V' and \ (not isinstance(val, numpy.ndarray) or val.dtype.kind != 'V') and \ (self.dtype.subdtype == None)): if len(names) == 1 and self.dtype.fields is not None: # Single field selected for write, from a non-array source if not names[0] in self.dtype.fields: raise ValueError("No such field for indexing: %s" % names[0]) dtype = self.dtype.fields[names[0]][0] cast_compound = True else: dtype = self.dtype cast_compound = False val = numpy.asarray(val, dtype=dtype, order='C') if cast_compound: val = val.astype(numpy.dtype([(names[0], dtype)])) else: val = numpy.asarray(val, order='C') # Check for array dtype compatibility and convert if self.dtype.subdtype is not None: shp = self.dtype.subdtype[1] valshp = val.shape[-len(shp):] if valshp != shp: # Last dimension has to match raise TypeError("When writing to array types, last N dimensions have to match (got %s, but should be %s)" % (valshp, shp,)) mtype = h5t.py_create(numpy.dtype((val.dtype, shp))) mshape = val.shape[0:len(val.shape)-len(shp)] # Make a compound memory type if field-name slicing is required elif len(names) != 0: mshape = val.shape # Catch common errors if self.dtype.fields is None: raise TypeError("Illegal slicing argument (not a compound dataset)") mismatch = [x for x in names if x not in self.dtype.fields] if len(mismatch) != 0: mismatch = ", ".join('"%s"'%x for x in mismatch) raise ValueError("Illegal slicing argument (fields %s not in dataset type)" % mismatch) # Write non-compound source into a single dataset field if len(names) == 1 and val.dtype.fields is None: subtype = h5y.py_create(val.dtype) mtype = h5t.create(h5t.COMPOUND, subtype.get_size()) mtype.insert(self._e(names[0]), 0, subtype) # Make a new source type keeping only the requested fields else: fieldnames = [x for x in val.dtype.names if x in names] # Keep source order mtype = h5t.create(h5t.COMPOUND, val.dtype.itemsize) for fieldname in fieldnames: subtype = h5t.py_create(val.dtype.fields[fieldname][0]) offset = val.dtype.fields[fieldname][1] mtype.insert(self._e(fieldname), offset, subtype) # Use mtype derived from array (let DatasetID.write figure it out) else: mshape = val.shape mtype = None # Perform the dataspace selection selection = sel.select(self.shape, args, dsid=self.id) if selection.nselect == 0: return # Broadcast scalars if necessary. if (mshape == () and selection.mshape != ()): if self.dtype.subdtype is not None: raise TypeError("Scalar broadcasting is not supported for array dtypes") val2 = numpy.empty(selection.mshape[-1], dtype=val.dtype) val2[...] = val val = val2 mshape = val.shape # Perform the write, with broadcasting # Be careful to pad memory shape with ones to avoid HDF5 chunking # glitch, which kicks in for mismatched memory/file selections if(len(mshape) < len(self.shape)): mshape_pad = (1,)*(len(self.shape)-len(mshape)) + mshape else: mshape_pad = mshape mspace = h5s.create_simple(mshape_pad, (h5s.UNLIMITED,)*len(mshape_pad)) for fspace in selection.broadcast(mshape): self.id.write(mspace, fspace, val, mtype)
def __setitem__(self, args, val): """ Write to the HDF5 dataset from a Numpy array. NumPy's broadcasting rules are honored, for "simple" indexing (slices and integers). For advanced indexing, the shapes must match. """ self.parent._check_committed() # This boilerplate code is based on h5py.Dataset.__setitem__ args = args if isinstance(args, tuple) else (args, ) # Sort field indices from the slicing names = tuple(x for x in args if isinstance(x, str)) args = tuple(x for x in args if not isinstance(x, str)) # Generally we try to avoid converting the arrays on the Python # side. However, for compound literals this is unavoidable. vlen = h5t.check_vlen_dtype(self.dtype) if vlen is not None and vlen not in (bytes, str): try: val = np.asarray(val, dtype=vlen) except ValueError: try: val = np.array([np.array(x, dtype=vlen) for x in val], dtype=self.dtype) except ValueError: pass if vlen == val.dtype: if val.ndim > 1: tmp = np.empty(shape=val.shape[:-1], dtype=object) tmp.ravel()[:] = [ i for i in val.reshape(( np.product(val.shape[:-1], dtype=np.ulonglong), val.shape[-1])) ] else: tmp = np.array([None], dtype=object) tmp[0] = val val = tmp elif self.dtype.kind == "O" or \ (self.dtype.kind == 'V' and \ (not isinstance(val, np.ndarray) or val.dtype.kind != 'V') and \ (self.dtype.subdtype == None)): if len(names) == 1 and self.dtype.fields is not None: # Single field selected for write, from a non-array source if not names[0] in self.dtype.fields: raise ValueError("No such field for indexing: %s" % names[0]) dtype = self.dtype.fields[names[0]][0] cast_compound = True else: dtype = self.dtype cast_compound = False val = np.asarray(val, dtype=dtype.base, order='C') if cast_compound: val = val.view(np.dtype([(names[0], dtype)])) val = val.reshape(val.shape[:len(val.shape) - len(dtype.shape)]) else: val = np.asarray(val, order='C') # Check for array dtype compatibility and convert if self.dtype.subdtype is not None: shp = self.dtype.subdtype[1] valshp = val.shape[-len(shp):] if valshp != shp: # Last dimension has to match raise TypeError( "When writing to array types, last N dimensions have to match (got %s, but should be %s)" % ( valshp, shp, )) mtype = h5t.py_create(np.dtype((val.dtype, shp))) # mshape = val.shape[0:len(val.shape)-len(shp)] # Make a compound memory type if field-name slicing is required elif len(names) != 0: # mshape = val.shape # Catch common errors if self.dtype.fields is None: raise TypeError( "Illegal slicing argument (not a compound dataset)") mismatch = [x for x in names if x not in self.dtype.fields] if len(mismatch) != 0: mismatch = ", ".join('"%s"' % x for x in mismatch) raise ValueError( "Illegal slicing argument (fields %s not in dataset type)" % mismatch) # Write non-compound source into a single dataset field if len(names) == 1 and val.dtype.fields is None: subtype = h5t.py_create(val.dtype) mtype = h5t.create(h5t.COMPOUND, subtype.get_size()) mtype.insert(self._e(names[0]), 0, subtype) # Make a new source type keeping only the requested fields else: fieldnames = [x for x in val.dtype.names if x in names] # Keep source order mtype = h5t.create(h5t.COMPOUND, val.dtype.itemsize) for fieldname in fieldnames: subtype = h5t.py_create(val.dtype.fields[fieldname][0]) offset = val.dtype.fields[fieldname][1] mtype.insert(self._e(fieldname), offset, subtype) # Use mtype derived from array (let DatasetID.write figure it out) else: mtype = None # === END CODE FROM h5py.Dataset.__setitem__ === idx = ndindex(args).reduce(self.shape) val = np.broadcast_to(val, idx.newshape(self.shape)) for c, index in as_subchunks(idx, self.shape, self.chunks): if isinstance(self.id.data_dict[c], (slice, Slice, tuple, Tuple)): raw_idx = Tuple(self.id.data_dict[c], *[slice(0, len(i)) for i in c.args[1:]]).raw a = self.id._read_chunk(raw_idx) self.id.data_dict[c] = a if self.id.data_dict[c].size != 0: val_idx = c.as_subindex(idx) self.id.data_dict[c][index.raw] = val[val_idx.raw]
def test_setget_tag(self): htype = h5t.create(h5t.OPAQUE, 40) htype.set_tag("FOOBAR") self.assertEqual(htype.get_tag(), "FOOBAR")
def __init__(self, size): self.type_id = h5t.create(h5t.COMPOUND, size) self.offset = 0