def testCreateVlenObjRefType(self): typeItem = {'class': 'H5T_VLEN', 'base': {'class': 'H5T_REFERENCE', 'base': 'H5T_STD_REF_OBJ'} } dt = hdf5dtype.createDataType(typeItem) self.assertEqual(dt.name, 'object') self.assertEqual(dt.kind, 'O') self.assertTrue(check_dtype(ref=dt) is None) dt_base = check_dtype(vlen=dt) self.assertTrue(dt_base is not None) self.assertTrue(check_dtype(ref=dt_base) is Reference)
def testCreateEnumType(self): typeItem = { "class": "H5T_ENUM", "base": { "base": "H5T_STD_I16LE", "class": "H5T_INTEGER" }, "mapping": { "GAS": 2, "LIQUID": 1, "PLASMA": 3, "SOLID": 0 } } typeSize = hdf5dtype.getItemSize(typeItem) self.assertEqual(typeSize, 2) dt = hdf5dtype.createDataType(typeItem) self.assertEqual(dt.name, 'int16') self.assertEqual(dt.kind, 'i') mapping = check_dtype(enum=dt) self.assertTrue(isinstance(mapping, dict)) self.assertEqual(mapping["SOLID"], 0) self.assertEqual(mapping["LIQUID"], 1) self.assertEqual(mapping["GAS"], 2) self.assertEqual(mapping["PLASMA"], 3)
def test_create(self): """ Enum datasets can be created and type correctly round-trips """ dt = h5py.special_dtype(enum=('i', self.EDICT)) ds = self.f.create_dataset('x', (100,100), dtype=dt) dt2 = ds.dtype dict2 = h5py.check_dtype(enum=dt2) self.assertEqual(dict2,self.EDICT)
def convertDatasetToObject(data, slices): """Convert numpy/hdf dataset to suitable data for veusz. Raise ConvertError if cannot.""" # lazily-loaded h5py try: from h5py import check_dtype except ImportError: # fallback if no h5py, e.g. only installed fits def check_dtype(vlen=None): return False if slices: data = applySlices(data, slices) try: kind = data.dtype.kind except TypeError: raise ConvertError(_("Could not get data type of dataset")) if kind in ('b', 'i', 'u', 'f'): data = N.array(data, dtype=N.float64) if data.ndim == 0: raise ConvertError(_("Dataset has no dimensions")) return data elif kind in ('S', 'a', 'U') or ( kind == 'O' and check_dtype(vlen=data.dtype) is str): if hasattr(data, 'ndim') and data.ndim != 1: raise ConvertError(_("Text datasets must have 1 dimension")) strcnv = list(data) return strcnv raise ConvertError(_("Dataset has an invalid type"))
def convertDatasetToObject(data, slices): """Convert numpy/hdf dataset to suitable data for veusz. Raise _ConvertError if cannot.""" if slices: data = applySlices(data, slices) try: kind = data.dtype.kind except TypeError: raise _ConvertError(_("Could not get kind of HDF5 dataset")) if kind in ('b', 'i', 'u', 'f'): data = N.array(data, dtype=N.float64) if len(data.shape) > 2: raise _ConvertError(_("HDF5 dataset has more than 2 dimensions")) return data elif kind in ('S', 'a') or ( kind == 'O' and h5py.check_dtype(vlen=data.dtype)): if len(data.shape) != 1: raise _ConvertError(_("HDF5 dataset has more than 1 dimension")) strcnv = list(data) return strcnv raise _ConvertError(_("HDF5 dataset has an invalid type"))
def testCreateVLenUTF8Type(self): typeItem = { 'class': 'H5T_STRING', 'charSet': 'H5T_CSET_UTF8', 'length': 'H5T_VARIABLE' } typeSize = hdf5dtype.getItemSize(typeItem) dt = hdf5dtype.createDataType(typeItem) self.assertEqual(dt.name, 'object') self.assertEqual(dt.kind, 'O') self.assertEqual(check_dtype(vlen=dt), six.text_type) self.assertEqual(typeSize, 'H5T_VARIABLE')
def test_create(self): filename = self.getFileName("objref_test") print(filename) f = h5py.File(filename, 'w') self.assertTrue(f.id.id is not None) self.assertTrue('/' in f) r = f['/'] r.create_group('g1') self.assertTrue('g1' in r) g1 = r['g1'] g11 = g1.create_group('g1.1') g11_ref = g11.ref print(g11_ref) print("uuid:", g11_ref.id.uuid) print("domain:", g11_ref.id.domain) print("type:", g11_ref.id.objtype_code) #print("g11_ref_tolist:", g11_ref.tolist()) # todo - fix #self.assertTrue(isinstance(g11_ref, h5py.Reference)) r.create_group('g2') self.assertEqual(len(r), 2) g2 = r['g2'] """ g11ref = g2[g11_ref] print("g11ref:", g11ref) print("g11ref name:", g11ref.name) print("g11ref type:", type(g11ref)) g11ref.create_group("foo") """ # todo - special_dtype not implemented dt = h5py.special_dtype(ref=h5py.Reference) print("dt:", dt) print("dt.kind:", dt.kind) print("dt.meta:", dt.metadata['ref']) self.assertTrue(dt.metadata['ref'] is h5py.Reference) dset = g1.create_dataset('myrefs', (10,), dtype=dt) print("dset.dtype.kind:", dset.dtype.kind) ref = h5py.check_dtype(ref=dset.dtype) print("check_dtype:", ref) null_ref = dset[0] print("null_ref:", null_ref) dset[0] = g11_ref #g2.attrs['dataset'] = dset.ref # todo - references as data will need h5pyd equivalent of h5t module # g2.attrs.create('dataset', dset.ref, dtype=dt) f.close()
def test_compound(self): fields = [] fields.append(('field_1', h5py.special_dtype(vlen=str))) fields.append(('field_2', np.int32)) dt = np.dtype(fields) self.f['mytype'] = np.dtype(dt) dt_out = self.f['mytype'].dtype.fields['field_1'][0] self.assertEqual(h5py.check_dtype(vlen=dt_out), str)
def test_compound(self): fields = [] fields.append(("field_1", h5py.special_dtype(vlen=str))) fields.append(("field_2", np.int32)) dt = np.dtype(fields) self.f["mytype"] = np.dtype(dt) dt_out = self.f["mytype"].dtype.fields["field_1"][0] self.assertEqual(h5py.check_dtype(vlen=dt_out), str)
def test_vlen_enum(self): fname = self.mktemp() arr1 = [[1],[1,2]] dt1 = h5py.special_dtype(vlen=h5py.special_dtype( enum=('i', dict(foo=1, bar=2)))) with h5py.File(fname,'w') as f: df1 = f.create_dataset('test', (len(arr1),), dtype=dt1) df1[:] = np.array(arr1) with h5py.File(fname,'r') as f: df2 = f['test'] dt2 = df2.dtype arr2 = [e.tolist() for e in df2[:]] self.assertEqual(arr1, arr2) self.assertEqual(h5py.check_dtype(enum=h5py.check_dtype(vlen=dt1)), h5py.check_dtype(enum=h5py.check_dtype(vlen=dt2)))
def __formatH5pyObject(self, data, dtype): # That's an HDF5 object ref = h5py.check_dtype(ref=dtype) if ref is not None: if bool(data): return "REF" else: return "NULL_REF" vlen = h5py.check_dtype(vlen=dtype) if vlen is not None: if vlen == six.text_type: # HDF5 UTF8 return self.__formatText(data) elif vlen == six.binary_type: # HDF5 ASCII return self.__formatCharString(data) elif isinstance(vlen, numpy.dtype): return self.toString(data, vlen) return None
def test_enum(self): # Test high-level enumerated type vals = {'a': 1, 'b': 2, 'c': 42} f = h5py.File(res.get_name(), 'w') for idx, basetype in enumerate(np.dtype(x) for x in (common.INTS + common.UINTS)): msg = "dset %s, type %s" % (idx, basetype) dt = h5py.special_dtype(enum=(basetype, vals)) self.assertEqual(h5py.check_dtype(enum=dt), vals, msg) self.assert_(h5py.check_dtype(enum=np.dtype('i')) is None, msg) # Test dataset creation refarr = np.zeros((4,4), dtype=dt) ds = f.create_dataset(str(idx), (4,4), dtype=dt) self.assert_(np.all(ds[...] == refarr), msg) # Test conversion to/from plain integer ds[0,0] = np.array(64, dtype=dt) self.assertEqual(ds[0,0], 64, msg)
def from_hdf5(self, h5group): for key, dataset in h5group.items(): # Load value from the hdf5 dataset and store in data # FIXME : the following conditional statement is to prevent # reading an empty dataset. # see : https://github.com/h5py/h5py/issues/281 # It should be fixed by the next h5py version if dataset.shape != (0,): if h5py.check_dtype(vlen=dataset.dtype): # to deal with VLEN data used for list of # list self.__setattr__(key, eval(dataset[...].tolist())) else: self.__setattr__(key, dataset[...]) else: self.__setattr__(key, [])
def testCreateCompoundType(self): typeItem = { 'class': 'H5T_COMPOUND', 'fields': [{'name': 'temp', 'type': 'H5T_IEEE_F32LE'}, {'name': 'pressure', 'type': 'H5T_IEEE_F32LE'}, {'name': 'location', 'type': { 'length': 'H5T_VARIABLE', 'charSet': 'H5T_CSET_ASCII', 'class': 'H5T_STRING', 'strPad': 'H5T_STR_NULLTERM'}}, {'name': 'wind', 'type': 'H5T_STD_I16LE'}] } dt = hdf5dtype.createDataType(typeItem) self.assertEqual(dt.name, 'void144') self.assertEqual(dt.kind, 'V') self.assertEqual(len(dt.fields), 4) dtLocation = dt[2] self.assertEqual(dtLocation.name, 'object') self.assertEqual(dtLocation.kind, 'O') self.assertEqual(check_dtype(vlen=dtLocation), str)
def getDataTranspose(self, limit, start): struct_data, new_pos = self._getData(limit, start) columns = [] for idx in range(len(struct_data.dtype)): col = struct_data['f{}'.format(idx)] # Strings are stored as hdf5 vlen objects. Numpy can't do # variable length strings, so they get encoded as object # arrays by hdf5. we don't know how to flatten object # arrays so we special case vlen types here and convert # them to lists. Also, h5py has a bug where when you # index a dataset with a compound type, it loses the # special dtype information, so we pull it directly from # self.dataset.dtype rather than the data returned by # _getData if self.dataset.dtype[idx] == np.object: base_type = h5py.check_dtype(vlen=self.dataset.dtype[idx]) if not base_type or not issubclass(base_type, str): raise RuntimeError("Found object type array, but not vlen str. Not supported. This shouldn't happen") col = [base_type(x) for x in col] columns.append(col) columns = tuple(columns) return columns, new_pos
def open_store_variable(self, name, var): import h5py with self.ensure_open(autoclose=False): dimensions = var.dimensions data = indexing.LazilyOuterIndexedArray( H5NetCDFArrayWrapper(name, self)) attrs = _read_attributes(var) # netCDF4 specific encoding encoding = { 'chunksizes': var.chunks, 'fletcher32': var.fletcher32, 'shuffle': var.shuffle, } # Convert h5py-style compression options to NetCDF4-Python # style, if possible if var.compression == 'gzip': encoding['zlib'] = True encoding['complevel'] = var.compression_opts elif var.compression is not None: encoding['compression'] = var.compression encoding['compression_opts'] = var.compression_opts # save source so __repr__ can detect if it's local or not encoding['source'] = self._filename encoding['original_shape'] = var.shape vlen_dtype = h5py.check_dtype(vlen=var.dtype) if vlen_dtype is unicode_type: encoding['dtype'] = str elif vlen_dtype is not None: # pragma: no cover # xarray doesn't support writing arbitrary vlen dtypes yet. pass else: encoding['dtype'] = var.dtype return Variable(dimensions, data, attrs, encoding)
def get_catalog_type(hdf5_type): """Converts the data type from the HDF5 data type to a type recognized by the API. (Uses some common prefixes and the 'maps' dictionary located near the top of the script.)""" # Check if the type is object, it is most likely to be a string if hdf5_type.kind == 'O': dt = h5py.check_dtype(vlen=hdf5_type) if hasattr(dt, '__name__'): if dt.__name__ == 'str': return 'text' old_type = str(hdf5_type) if old_type[:2] == '|S': return 'text' elif old_type[:3] == 'int': return 'int8' elif old_type[:4] == 'uint': return 'int8' elif old_type[:5] == 'float': return 'float8' elif old_type in maps: return maps[old_type] else: return None
def read_h5netcdf(tmp_netcdf, write_module): remote_file = (isinstance(tmp_netcdf, str) and tmp_netcdf.startswith(remote_h5)) ds = h5netcdf.File(tmp_netcdf, 'r') assert ds.name == '/' assert list(ds.attrs) == ['global', 'other_attr'] assert ds.attrs['global'] == 42 if not PY2 and write_module is not netCDF4: # skip for now: https://github.com/Unidata/netcdf4-python/issues/388 assert ds.attrs['other_attr'] == 'yes' assert set(ds.dimensions) == set( ['x', 'y', 'z', 'empty', 'string3', 'mismatched_dim']) assert set(ds.variables) == set([ 'foo', 'y', 'z', 'intscalar', 'scalar', 'var_len_str', 'mismatched_dim' ]) assert set(ds.groups) == set(['subgroup']) assert ds.parent is None v = ds['foo'] assert v.name == '/foo' assert array_equal(v, np.ones((4, 5))) assert v.dtype == float assert v.dimensions == ('x', 'y') assert v.ndim == 2 assert list(v.attrs) == ['units'] if not PY2 and write_module is not netCDF4: assert v.attrs['units'] == 'meters' assert v.chunks == (4, 5) assert v.compression == 'gzip' assert v.compression_opts == 4 assert not v.fletcher32 assert v.shuffle v = ds['y'] assert array_equal(v, np.r_[np.arange(4), [-1]]) assert v.dtype == int assert v.dimensions == ('y', ) assert v.ndim == 1 assert list(v.attrs) == ['_FillValue'] assert v.attrs['_FillValue'] == -1 if not remote_file: assert v.chunks is None assert v.compression is None assert v.compression_opts is None assert not v.fletcher32 assert not v.shuffle ds.close() if is_h5py_char_working(tmp_netcdf, 'z'): ds = h5netcdf.File(tmp_netcdf, 'r') v = ds['z'] assert v.dtype == 'S1' assert v.ndim == 2 assert v.dimensions == ('z', 'string3') assert list(v.attrs) == ['_FillValue'] assert v.attrs['_FillValue'] == b'X' else: ds = h5netcdf.File(tmp_netcdf, 'r') v = ds['scalar'] assert array_equal(v, np.array(2.0)) assert v.dtype == 'float32' assert v.ndim == 0 assert v.dimensions == () assert list(v.attrs) == [] v = ds.variables['intscalar'] assert array_equal(v, np.array(2)) assert v.dtype == 'int64' assert v.ndim == 0 assert v.dimensions == () assert list(v.attrs) == [] v = ds['var_len_str'] assert h5py.check_dtype(vlen=v.dtype) == unicode assert v[0] == u'foo' v = ds['/subgroup/subvar'] assert v is ds['subgroup']['subvar'] assert v is ds['subgroup/subvar'] assert v is ds['subgroup']['/subgroup/subvar'] assert v.name == '/subgroup/subvar' assert ds['subgroup'].name == '/subgroup' assert ds['subgroup'].parent is ds assert array_equal(v, np.arange(4.0)) assert v.dtype == 'int32' assert v.ndim == 1 assert v.dimensions == ('x', ) assert list(v.attrs) == [] assert ds['/subgroup/y_var'].shape == (10, ) assert ds['/subgroup'].dimensions['y'] == 10 ds.close()
def dtype(self): dt = self._h5ds.dtype if h5py.check_dtype(vlen=dt) is unicode: return str return dt
ST_ESTIMATED = 113 ST_REPORTED = 114 ST_VERIFIED = 115 _dtstate = h5py.special_dtype(enum=('i', { "Invalid":ST_INVALID, "Default":ST_DEFAULT, "Estimated":ST_ESTIMATED, "Reported":ST_REPORTED, "Verified":ST_VERIFIED})) # MicroscopeMode MM_NONE = 0 MM_TRANSMISSION = 1 MM_REFLECTION = 2 MM_FLUORESCENCE = 3 _dtmm = h5py.special_dtype(enum=('i', { "None":MM_NONE, "Transmission":MM_TRANSMISSION , "Reflection":MM_REFLECTION, "Fluorescence":MM_FLUORESCENCE})) _dictmm = h5py.check_dtype(enum=_dtmm) # MicroscopeType MT_NONE = 111 MT_WIDEFIELD = 112 MT_CONFOCAL = 113 MT_4PIEXCITATION = 114 MT_NIPKOWDISKCONFOCAL = 115 MT_GENERICSENSOR = 118 _dtmt = h5py.special_dtype(enum=('i', { "None":MT_NONE, "WideField":MT_WIDEFIELD, "Confocal":MT_CONFOCAL, "4PiExcitation":MT_4PIEXCITATION, "NipkowDiskConfocal":MT_NIPKOWDISKCONFOCAL, "GenericSensor":MT_GENERICSENSOR})) _dictmt = h5py.check_dtype(enum=_dtmt) # ImagingDirection ID_UPWARD = 0 ID_DOWNWARD = 1
def testCreateVLenStringType(self): typeItem = { 'class': 'H5T_STRING', 'charSet': 'H5T_CSET_ASCII', 'length': 'H5T_VARIABLE' } dt = hdf5dtype.createDataType(typeItem) self.assertEqual(dt.name, 'object') self.assertEqual(dt.kind, 'O') self.assertEqual(check_dtype(vlen=dt), str)
def toString(self, data, dtype=None): """Format a data into a string using formatter options :param object data: Data to render :param dtype: enforce a dtype (mostly used to remember the h5py dtype, special h5py dtypes are not propagated from array to items) :rtype: str """ if isinstance(data, tuple): text = [self.toString(d) for d in data] return "(" + " ".join(text) + ")" elif isinstance(data, list): text = [self.toString(d) for d in data] return "[" + " ".join(text) + "]" elif isinstance(data, (numpy.ndarray)): if dtype is None: dtype = data.dtype if data.shape == (): # it is a scaler return self.toString(data[()], dtype) else: text = [self.toString(d, dtype) for d in data] return "[" + " ".join(text) + "]" if dtype is not None and dtype.kind == 'O': text = self.__formatH5pyObject(data, dtype) if text is not None: return text elif isinstance(data, numpy.void): if dtype is None: dtype = data.dtype if dtype.fields is not None: text = [] for index, field in enumerate(dtype.fields.items()): text.append(field[0] + ":" + self.toString(data[index], field[1][0])) return "(" + " ".join(text) + ")" return self.__formatBinary(data) elif isinstance(data, (numpy.unicode_, six.text_type)): return self.__formatText(data) elif isinstance(data, (numpy.string_, six.binary_type)): if dtype is None and hasattr(data, "dtype"): dtype = data.dtype if dtype is not None: # Maybe a sub item from HDF5 if dtype.kind == 'S': return self.__formatCharString(data) elif dtype.kind == 'O': text = self.__formatH5pyObject(data, dtype) if text is not None: return text try: # Try ascii/utf-8 text = "%s" % data.decode("utf-8") return self.__formatText(text) except UnicodeDecodeError: pass return self.__formatBinary(data) elif isinstance(data, six.string_types): text = "%s" % data return self.__formatText(text) elif isinstance(data, (numpy.integer)): if dtype is None: dtype = data.dtype enumType = h5py.check_dtype(enum=dtype) if enumType is not None: for key, value in enumType.items(): if value == data: result = {} result["name"] = key result["value"] = data return self.__enumFormat % result return self.__integerFormat % data elif isinstance(data, (numbers.Integral)): return self.__integerFormat % data elif isinstance(data, (numbers.Real, numpy.floating)): # It have to be done before complex checking return self.__floatFormat % data elif isinstance(data, (numpy.complexfloating, numbers.Complex)): text = "" if data.real != 0: text += self.__floatFormat % data.real if data.real != 0 and data.imag != 0: if data.imag < 0: template = self.__floatFormat + " - " + self.__floatFormat + self.__imaginaryUnit params = (data.real, -data.imag) else: template = self.__floatFormat + " + " + self.__floatFormat + self.__imaginaryUnit params = (data.real, data.imag) else: if data.imag != 0: template = self.__floatFormat + self.__imaginaryUnit params = (data.imag) else: template = self.__floatFormat params = (data.real) return template % params elif isinstance(data, h5py.h5r.Reference): dtype = h5py.special_dtype(ref=h5py.Reference) text = self.__formatH5pyObject(data, dtype) return text elif isinstance(data, h5py.h5r.RegionReference): dtype = h5py.special_dtype(ref=h5py.RegionReference) text = self.__formatH5pyObject(data, dtype) return text elif isinstance(data, numpy.object_) or dtype is not None: if dtype is None: dtype = data.dtype text = self.__formatH5pyObject(data, dtype) if text is not None: return text # That's a numpy object return str(data) return str(data)
def get(h5, lo=0, hi=None, fields=None, convert_enum=True, **kwargs): """ Query a range of rows from a table as a dataframe. A table is an HDF5 group containing equal-length 1D datasets serving as columns. Parameters ---------- h5 : ``h5py.Group`` or any dict-like of array-likes Handle to an HDF5 group containing only 1D datasets or any similar collection of 1D datasets or arrays lo, hi : int, optional Range of rows to select from the table. fields : str or sequence of str, optional Column or list of columns to query. Defaults to all available columns. A single string returns a Series instead of a DataFrame. convert_enum : bool, optional Whether to convert HDF5 enum datasets into ``pandas.Categorical`` columns instead of plain integer columns. Default is True. kwargs : optional Options to pass to ``pandas.DataFrame`` or ``pandas.Series``. Returns ------- DataFrame or Series Notes ----- HDF5 ASCII datasets are converted to Unicode. """ grp = h5 series = False if fields is None: fields = list(grp.keys()) elif isinstance(fields, six.string_types): fields = [fields] series = True data = {} for field in fields: dset = grp[field] if convert_enum: dt = h5py.check_dtype(enum=dset.dtype) else: dt = None if dt is not None: data[field] = pandas.Categorical.from_codes( dset[lo:hi], sorted(dt, key=dt.__getitem__), ordered=True) elif dset.dtype.type == np.string_: data[field] = dset[lo:hi].astype('U') else: data[field] = dset[lo:hi] if data and lo is not None: index = np.arange(lo, lo + len(next(iter(data.values())))) else: index = None if series: return pandas.Series( data[fields[0]], index=index, name=field, **kwargs) else: return pandas.DataFrame( data, columns=fields, index=index, **kwargs)
def humanReadableDType(self, dtype, full=False): if dtype == six.binary_type or numpy.issubdtype(dtype, numpy.string_): text = "string" if full: text = "ASCII " + text return text elif dtype == six.text_type or numpy.issubdtype(dtype, numpy.unicode_): text = "string" if full: text = "UTF-8 " + text return text elif dtype.type == numpy.object_: ref = h5py.check_dtype(ref=dtype) if ref is not None: return "reference" vlen = h5py.check_dtype(vlen=dtype) if vlen is not None: text = self.humanReadableDType(vlen, full=full) if full: text = "variable-length " + text return text return "object" elif dtype.type == numpy.bool_: return "bool" elif dtype.type == numpy.void: if dtype.fields is None: return "opaque" else: if not full: return "compound" else: fields = sorted(dtype.fields.items(), key=lambda e: e[1][1]) compound = [d[1][0] for d in fields] compound = [self.humanReadableDType(d) for d in compound] return "compound(%s)" % ", ".join(compound) elif numpy.issubdtype(dtype, numpy.integer): enumType = h5py.check_dtype(enum=dtype) if enumType is not None: return "enum" text = str(dtype.newbyteorder('N')) if numpy.issubdtype(dtype, numpy.floating): if hasattr(numpy, "float128") and dtype == numpy.float128: text = "float80" if full: text += " (padding 128bits)" elif hasattr(numpy, "float96") and dtype == numpy.float96: text = "float80" if full: text += " (padding 96bits)" if full: if dtype.byteorder == "<": text = "Little-endian " + text elif dtype.byteorder == ">": text = "Big-endian " + text elif dtype.byteorder == "=": text = "Native " + text dtype = dtype.newbyteorder('N') return text
def test_dtype(self): """ (Vlen) Dtype round-trip """ dt = h5py.special_dtype(vlen=str) self.assertEqual(h5py.check_dtype(vlen=dt), str)
def read_h5netcdf(tmp_netcdf, write_module): ds = h5netcdf.File(tmp_netcdf, 'r') assert ds.name == '/' assert list(ds.attrs) == ['global', 'other_attr'] assert ds.attrs['global'] == 42 if not PY2 and write_module is not netCDF4: # skip for now: https://github.com/Unidata/netcdf4-python/issues/388 assert ds.attrs['other_attr'] == 'yes' assert set(ds.dimensions) == set(['x', 'y', 'z', 'string3', 'mismatched_dim']) assert set(ds.variables) == set(['foo', 'y', 'z', 'intscalar', 'scalar', 'var_len_str', 'mismatched_dim']) assert set(ds.groups) == set(['subgroup']) assert ds.parent is None v = ds['foo'] assert v.name == '/foo' assert array_equal(v, np.ones((4, 5))) assert v.dtype == float assert v.dimensions == ('x', 'y') assert v.ndim == 2 assert list(v.attrs) == ['units'] if not PY2 and write_module is not netCDF4: assert v.attrs['units'] == 'meters' assert v.chunks == (4, 5) assert v.compression == 'gzip' assert v.compression_opts == 4 assert not v.fletcher32 assert v.shuffle v = ds['y'] assert array_equal(v, np.r_[np.arange(4), [-1]]) assert v.dtype == int assert v.dimensions == ('y',) assert v.ndim == 1 assert list(v.attrs) == ['_FillValue'] assert v.attrs['_FillValue'] == -1 assert v.chunks == None assert v.compression == None assert v.compression_opts == None assert not v.fletcher32 assert not v.shuffle ds.close() if is_h5py_char_working(tmp_netcdf, 'z'): ds = h5netcdf.File(tmp_netcdf, 'r') v = ds['z'] assert v.dtype == 'S1' assert v.ndim == 2 assert v.dimensions == ('z', 'string3') assert list(v.attrs) == ['_FillValue'] assert v.attrs['_FillValue'] == b'X' else: ds = h5netcdf.File(tmp_netcdf, 'r') v = ds['scalar'] assert array_equal(v, np.array(2.0)) assert v.dtype == 'float32' assert v.ndim == 0 assert v.dimensions == () assert list(v.attrs) == [] v = ds.variables['intscalar'] assert array_equal(v, np.array(2)) assert v.dtype == 'int64' assert v.ndim == 0 assert v.dimensions == () assert list(v.attrs) == [] v = ds['var_len_str'] assert h5py.check_dtype(vlen=v.dtype) == unicode assert v[0] == u'foo' v = ds['/subgroup/subvar'] assert v is ds['subgroup']['subvar'] assert v is ds['subgroup/subvar'] assert v is ds['subgroup']['/subgroup/subvar'] assert v.name == '/subgroup/subvar' assert ds['subgroup'].name == '/subgroup' assert ds['subgroup'].parent is ds assert array_equal(v, np.arange(4.0)) assert v.dtype == 'int32' assert v.ndim == 1 assert v.dimensions == ('x',) assert list(v.attrs) == [] assert ds['/subgroup/y_var'].shape == (10,) assert ds['/subgroup'].dimensions['y'] == 10 ds.close()
def get(grp, lo=0, hi=None, fields=None, convert_enum=True, as_dict=False): """ Query a range of rows from a table as a dataframe. A table is an HDF5 group containing equal-length 1D datasets serving as columns. Parameters ---------- grp : ``h5py.Group`` or any dict-like of array-likes Handle to an HDF5 group containing only 1D datasets or any similar collection of 1D datasets or arrays lo, hi : int, optional Range of rows to select from the table. fields : str or sequence of str, optional Column or list of columns to query. Defaults to all available columns. A single string returns a Series instead of a DataFrame. convert_enum : bool, optional Whether to convert HDF5 enum datasets into ``pandas.Categorical`` columns instead of plain integer columns. Default is True. kwargs : optional Options to pass to ``pandas.DataFrame`` or ``pandas.Series``. Returns ------- DataFrame or Series Notes ----- HDF5 ASCII datasets are converted to Unicode. """ series = False if fields is None: fields = list(grp.keys()) elif isinstance(fields, six.string_types): fields = [fields] series = True data = {} for field in fields: dset = grp[field] if convert_enum: dt = h5py.check_dtype(enum=dset.dtype) else: dt = None if dt is not None: data[field] = pd.Categorical.from_codes(dset[lo:hi], sorted(dt, key=dt.__getitem__), ordered=True) elif dset.dtype.type == np.string_: data[field] = dset[lo:hi].astype('U') else: data[field] = dset[lo:hi] if as_dict: return data if data and lo is not None: index = np.arange(lo, lo + len(next(iter(data.values())))) else: index = None if series: return pd.Series(data[fields[0]], index=index, name=field) else: return pd.DataFrame(data, columns=fields, index=index)