def setUp(self): BaseSlicing.setUp(self) sid = h5s.create(h5s.NULL) tid = h5t.C_S1.copy() tid.set_size(10) dsid = h5d.create(self.f.id, b'x', tid, sid) self.dataset = self.f['x']
def test_plugins(self): shape = (32 * 1024,) chunks = (4 * 1024,) dtype = np.int64 data = np.arange(shape[0]) fname = "tmp_test_filters.h5" f = h5py.File(fname) tid = h5t.py_create(dtype, logical=1) sid = h5s.create_simple(shape, shape) # Different API's for different h5py versions. try: dcpl = filters.generate_dcpl(shape, dtype, chunks, None, None, None, None, None, None) except TypeError: dcpl = filters.generate_dcpl(shape, dtype, chunks, None, None, None, None, None) dcpl.set_filter(32008, h5z.FLAG_MANDATORY) dcpl.set_filter(32000, h5z.FLAG_MANDATORY) dset_id = h5d.create(f.id, "range", tid, sid, dcpl=dcpl) dset_id.write(h5s.ALL, h5s.ALL, data) f.close() # Make sure the filters are working outside of h5py by calling h5dump h5dump = Popen(['h5dump', fname], stdout=PIPE, stderr=STDOUT) stdout, nothing = h5dump.communicate() #print stdout err = h5dump.returncode self.assertEqual(err, 0) f = h5py.File(fname, 'r') d = f['range'][:] self.assertTrue(np.all(d == data)) f.close()
def test_plugins(self): if not H51811P: return shape = (32 * 1024, ) chunks = (4 * 1024, ) dtype = np.int64 data = np.arange(shape[0]) fname = "tmp_test_filters.h5" f = h5py.File(fname) tid = h5t.py_create(dtype, logical=1) sid = h5s.create_simple(shape, shape) # Different API's for different h5py versions. try: dcpl = filters.generate_dcpl(shape, dtype, chunks, None, None, None, None, None, None) except TypeError: dcpl = filters.generate_dcpl(shape, dtype, chunks, None, None, None, None, None) dcpl.set_filter(32008, h5z.FLAG_MANDATORY) dcpl.set_filter(32000, h5z.FLAG_MANDATORY) dset_id = h5d.create(f.id, b"range", tid, sid, dcpl=dcpl) dset_id.write(h5s.ALL, h5s.ALL, data) f.close() # Make sure the filters are working outside of h5py by calling h5dump h5dump = Popen(['h5dump', fname], stdout=PIPE, stderr=STDOUT) stdout, nothing = h5dump.communicate() err = h5dump.returncode self.assertEqual(err, 0) f = h5py.File(fname, 'r') d = f['range'][:] self.assertTrue(np.all(d == data)) f.close()
def make_new_dset(parent, shape=None, dtype=None, data=None, chunks=None, compression=None, shuffle=None, fletcher32=None, maxshape=None, compression_opts=None, fillvalue=None, scaleoffset=None, track_times=None): """ Return a new low-level dataset identifier Only creates anonymous datasets. """ # Convert data to a C-contiguous ndarray if data is not None: import base data = numpy.asarray(data, order="C", dtype=base.guess_dtype(data)) # Validate shape if shape is None: if data is None: raise TypeError("Either data or shape must be specified") shape = data.shape else: shape = tuple(shape) if data is not None and (numpy.product(shape) != numpy.product(data.shape)): raise ValueError("Shape tuple is incompatible with data") # Validate dtype if dtype is None and data is None: dtype = numpy.dtype("=f4") elif dtype is None and data is not None: dtype = data.dtype else: dtype = numpy.dtype(dtype) # Legacy if any((compression, shuffle, fletcher32, maxshape,scaleoffset)) and chunks is False: raise ValueError("Chunked format required for given storage options") # Legacy if compression is True: if compression_opts is None: compression_opts = 4 compression = 'gzip' # Legacy if compression in range(10): if compression_opts is not None: raise TypeError("Conflict in compression options") compression_opts = compression compression = 'gzip' dcpl = filters.generate_dcpl(shape, dtype, chunks, compression, compression_opts, shuffle, fletcher32, maxshape, scaleoffset) if fillvalue is not None: fillvalue = numpy.array(fillvalue) dcpl.set_fill_value(fillvalue) if track_times in (True, False): dcpl.set_obj_track_times(track_times) elif track_times is not None: raise TypeError("track_times must be either True or False") if maxshape is not None: maxshape = tuple(m if m is not None else h5s.UNLIMITED for m in maxshape) sid = h5s.create_simple(shape, maxshape) tid = h5t.py_create(dtype, logical=1) dset_id = h5d.create(parent.id, None, tid, sid, dcpl=dcpl) if data is not None: dset_id.write(h5s.ALL, h5s.ALL, data) return dset_id
def create_compact_dataset(loc, name, shape=None, dtype=None, data=None, chunks=None, compression=None, shuffle=None, fletcher32=None, maxshape=None, compression_opts=None, fillvalue=None, scaleoffset=None, track_times=None): """Create a new HDF5 dataset with a compact storage layout.""" # Convert data to a C-contiguous ndarray if data is not None: import h5py._hl.base data = numpy.asarray(data, order="C", dtype=h5py._hl.base.guess_dtype(data)) # Validate shape if shape is None: if data is None: raise TypeError("Either data or shape must be specified") shape = data.shape else: shape = tuple(shape) if data is not None and (numpy.product(shape) != numpy.product(data.shape)): raise ValueError("Shape tuple is incompatible with data") if isinstance(dtype, h5py.Datatype): # Named types are used as-is tid = dtype.id dtype = tid.dtype # Following code needs this else: # Validate dtype if dtype is None and data is None: dtype = numpy.dtype("=f4") elif dtype is None and data is not None: dtype = data.dtype else: dtype = numpy.dtype(dtype) tid = h5t.py_create(dtype, logical=1) # Legacy if any((compression, shuffle, fletcher32, maxshape,scaleoffset)) and chunks is False: raise ValueError("Chunked format required for given storage options") # Legacy if compression is True: if compression_opts is None: compression_opts = 4 compression = 'gzip' # Legacy if compression in range(10): if compression_opts is not None: raise TypeError("Conflict in compression options") compression_opts = compression compression = 'gzip' if h5py.version.version_tuple >= (2, 2, 0, ''): dcpl = filters.generate_dcpl(shape, dtype, chunks, compression, compression_opts, shuffle, fletcher32, maxshape, None) else: dcpl = filters.generate_dcpl(shape, dtype, chunks, compression, compression_opts, shuffle, fletcher32, maxshape) if fillvalue is not None: fillvalue = numpy.array(fillvalue) dcpl.set_fill_value(fillvalue) if track_times in (True, False): dcpl.set_obj_track_times(track_times) elif track_times is not None: raise TypeError("track_times must be either True or False") dcpl.set_layout(h5d.COMPACT) if maxshape is not None: maxshape = tuple(m if m is not None else h5s.UNLIMITED for m in maxshape) sid = h5s.create_simple(shape, maxshape) dset_id = h5d.create(loc.id, None, tid, sid, dcpl=dcpl) if data is not None: dset_id.write(h5s.ALL, h5s.ALL, data) dset = dataset.Dataset(dset_id) if name is not None: loc[name] = dset return dset
def make_new_dset(parent, shape=None, dtype=None, data=None, chunks=None, compression=None, shuffle=None, fletcher32=None, maxshape=None, compression_opts=None, fillvalue=None, scaleoffset=None, track_times=None): """ Return a new low-level dataset identifier Only creates anonymous datasets. """ # Convert data to a C-contiguous ndarray if data is not None: import base data = numpy.asarray(data, order="C", dtype=base.guess_dtype(data)) # Validate shape if shape is None: if data is None: raise TypeError("Either data or shape must be specified") shape = data.shape else: shape = tuple(shape) if data is not None and (numpy.product(shape) != numpy.product(data.shape)): raise ValueError("Shape tuple is incompatible with data") tmp_shape = maxshape if maxshape is not None else shape # Validate chunk shape if isinstance(chunks, tuple) and (-numpy.array([ i>=j for i,j in zip(tmp_shape,chunks) if i is not None])).any(): errmsg = "Chunk shape must not be greater than data shape in any dimension. "\ "{} is not compatible with {}".format(chunks, shape) raise ValueError(errmsg) if isinstance(dtype, h5py.Datatype): # Named types are used as-is tid = dtype.id dtype = tid.dtype # Following code needs this else: # Validate dtype if dtype is None and data is None: dtype = numpy.dtype("=f4") elif dtype is None and data is not None: dtype = data.dtype else: dtype = numpy.dtype(dtype) tid = h5t.py_create(dtype, logical=1) # Legacy if any((compression, shuffle, fletcher32, maxshape,scaleoffset)) and chunks is False: raise ValueError("Chunked format required for given storage options") # Legacy if compression is True: if compression_opts is None: compression_opts = 4 compression = 'gzip' # Legacy if compression in _LEGACY_GZIP_COMPRESSION_VALS: if compression_opts is not None: raise TypeError("Conflict in compression options") compression_opts = compression compression = 'gzip' dcpl = filters.generate_dcpl(shape, dtype, chunks, compression, compression_opts, shuffle, fletcher32, maxshape, scaleoffset) if fillvalue is not None: fillvalue = numpy.array(fillvalue) dcpl.set_fill_value(fillvalue) if track_times in (True, False): dcpl.set_obj_track_times(track_times) elif track_times is not None: raise TypeError("track_times must be either True or False") if maxshape is not None: maxshape = tuple(m if m is not None else h5s.UNLIMITED for m in maxshape) sid = h5s.create_simple(shape, maxshape) dset_id = h5d.create(parent.id, None, tid, sid, dcpl=dcpl) if data is not None: dset_id.write(h5s.ALL, h5s.ALL, data) return dset_id
def make_new_dset(parent, shape=None, dtype=None, data=None, chunks=None, compression=None, shuffle=None, fletcher32=None, maxshape=None, compression_opts=None, fillvalue=None, scaleoffset=None, track_times=None): """ Return a new low-level dataset identifier Only creates anonymous datasets. """ # Convert data to a C-contiguous ndarray if data is not None: import base data = numpy.asarray(data, order="C", dtype=base.guess_dtype(data)) # Validate shape if shape is None: if data is None: raise TypeError("Either data or shape must be specified") shape = data.shape else: shape = tuple(shape) if data is not None and (numpy.product(shape) != numpy.product( data.shape)): raise ValueError("Shape tuple is incompatible with data") tmp_shape = maxshape if maxshape is not None else shape # Validate chunk shape if isinstance(chunks, tuple) and (-numpy.array( [i >= j for i, j in zip(tmp_shape, chunks) if i is not None])).any(): errmsg = "Chunk shape must not be greater than data shape in any dimension. "\ "{} is not compatible with {}".format(chunks, shape) raise ValueError(errmsg) if isinstance(dtype, h5py.Datatype): # Named types are used as-is tid = dtype.id dtype = tid.dtype # Following code needs this else: # Validate dtype if dtype is None and data is None: dtype = numpy.dtype("=f4") elif dtype is None and data is not None: dtype = data.dtype else: dtype = numpy.dtype(dtype) tid = h5t.py_create(dtype, logical=1) # Legacy if any((compression, shuffle, fletcher32, maxshape, scaleoffset)) and chunks is False: raise ValueError("Chunked format required for given storage options") # Legacy if compression is True: if compression_opts is None: compression_opts = 4 compression = 'gzip' # Legacy if compression in _LEGACY_GZIP_COMPRESSION_VALS: if compression_opts is not None: raise TypeError("Conflict in compression options") compression_opts = compression compression = 'gzip' dcpl = filters.generate_dcpl(shape, dtype, chunks, compression, compression_opts, shuffle, fletcher32, maxshape, scaleoffset) if fillvalue is not None: fillvalue = numpy.array(fillvalue) dcpl.set_fill_value(fillvalue) if track_times in (True, False): dcpl.set_obj_track_times(track_times) elif track_times is not None: raise TypeError("track_times must be either True or False") if maxshape is not None: maxshape = tuple(m if m is not None else h5s.UNLIMITED for m in maxshape) sid = h5s.create_simple(shape, maxshape) dset_id = h5d.create(parent.id, None, tid, sid, dcpl=dcpl) if data is not None: dset_id.write(h5s.ALL, h5s.ALL, data) return dset_id