Ejemplo n.º 1
0
 def testCreateVlenObjRefType(self):
     typeItem = {'class': 'H5T_VLEN', 
                 'base': {'class': 'H5T_REFERENCE', 'base': 'H5T_STD_REF_OBJ'} }
     dt = hdf5dtype.createDataType(typeItem)
     self.assertEqual(dt.name, 'object')
     self.assertEqual(dt.kind, 'O')
     self.assertTrue(check_dtype(ref=dt) is None)
     dt_base = check_dtype(vlen=dt)
     self.assertTrue(dt_base is not None)
     self.assertTrue(check_dtype(ref=dt_base) is Reference)
Ejemplo n.º 2
0
    def testCreateEnumType(self):
        typeItem = {
                "class": "H5T_ENUM",
                "base": {
                    "base": "H5T_STD_I16LE",
                    "class": "H5T_INTEGER"
                }, 
                "mapping": {
                    "GAS": 2,
                    "LIQUID": 1,
                    "PLASMA": 3,
                    "SOLID": 0
                }
            }

        typeSize = hdf5dtype.getItemSize(typeItem)
        self.assertEqual(typeSize, 2)
        dt = hdf5dtype.createDataType(typeItem)
        self.assertEqual(dt.name, 'int16')
        self.assertEqual(dt.kind, 'i')    
        mapping = check_dtype(enum=dt)
        self.assertTrue(isinstance(mapping, dict))
        self.assertEqual(mapping["SOLID"], 0)
        self.assertEqual(mapping["LIQUID"], 1)
        self.assertEqual(mapping["GAS"], 2)
        self.assertEqual(mapping["PLASMA"], 3)
Ejemplo n.º 3
0
 def test_create(self):
     """ Enum datasets can be created and type correctly round-trips """
     dt = h5py.special_dtype(enum=('i', self.EDICT))
     ds = self.f.create_dataset('x', (100,100), dtype=dt)
     dt2 = ds.dtype
     dict2 = h5py.check_dtype(enum=dt2)
     self.assertEqual(dict2,self.EDICT)
Ejemplo n.º 4
0
def convertDatasetToObject(data, slices):
    """Convert numpy/hdf dataset to suitable data for veusz.
    Raise ConvertError if cannot."""

    # lazily-loaded h5py
    try:
        from h5py import check_dtype
    except ImportError:
        # fallback if no h5py, e.g. only installed fits
        def check_dtype(vlen=None):
            return False

    if slices:
        data = applySlices(data, slices)

    try:
        kind = data.dtype.kind
    except TypeError:
        raise ConvertError(_("Could not get data type of dataset"))

    if kind in ('b', 'i', 'u', 'f'):
        data = N.array(data, dtype=N.float64)
        if data.ndim == 0:
            raise ConvertError(_("Dataset has no dimensions"))
        return data

    elif kind in ('S', 'a', 'U') or (
        kind == 'O' and check_dtype(vlen=data.dtype) is str):
        if hasattr(data, 'ndim') and data.ndim != 1:
            raise ConvertError(_("Text datasets must have 1 dimension"))

        strcnv = list(data)
        return strcnv

    raise ConvertError(_("Dataset has an invalid type"))
Ejemplo n.º 5
0
def convertDatasetToObject(data, slices):
    """Convert numpy/hdf dataset to suitable data for veusz.
    Raise _ConvertError if cannot."""

    if slices:
        data = applySlices(data, slices)

    try:
        kind = data.dtype.kind
    except TypeError:
        raise _ConvertError(_("Could not get kind of HDF5 dataset"))

    if kind in ('b', 'i', 'u', 'f'):
        data = N.array(data, dtype=N.float64)
        if len(data.shape) > 2:
            raise _ConvertError(_("HDF5 dataset has more than 2 dimensions"))
        return data

    elif kind in ('S', 'a') or (
        kind == 'O' and h5py.check_dtype(vlen=data.dtype)):
        if len(data.shape) != 1:
            raise _ConvertError(_("HDF5 dataset has more than 1 dimension"))

        strcnv = list(data)
        return strcnv

    raise _ConvertError(_("HDF5 dataset has an invalid type"))
Ejemplo n.º 6
0
 def testCreateVLenUTF8Type(self):
     typeItem = { 'class': 'H5T_STRING', 'charSet': 'H5T_CSET_UTF8', 'length': 'H5T_VARIABLE' }
     typeSize = hdf5dtype.getItemSize(typeItem)
     dt = hdf5dtype.createDataType(typeItem)
     self.assertEqual(dt.name, 'object')
     self.assertEqual(dt.kind, 'O')
     self.assertEqual(check_dtype(vlen=dt), six.text_type)
     self.assertEqual(typeSize, 'H5T_VARIABLE')
Ejemplo n.º 7
0
 def test_create(self):
     filename = self.getFileName("objref_test")
     print(filename)
     f = h5py.File(filename, 'w')
     self.assertTrue(f.id.id is not None)
     self.assertTrue('/' in f)      
     r = f['/']
      
     r.create_group('g1')
     self.assertTrue('g1' in r)
     g1 = r['g1']
     
     
     g11 = g1.create_group('g1.1')
     
     g11_ref = g11.ref 
     print(g11_ref)
     print("uuid:", g11_ref.id.uuid)
     print("domain:", g11_ref.id.domain)
     print("type:", g11_ref.id.objtype_code)
     #print("g11_ref_tolist:", g11_ref.tolist())
      
     # todo - fix
     #self.assertTrue(isinstance(g11_ref, h5py.Reference))
      
     
     r.create_group('g2')
     self.assertEqual(len(r), 2)
     g2 = r['g2']
     """
     g11ref = g2[g11_ref]
     print("g11ref:", g11ref)
     print("g11ref name:", g11ref.name)
     print("g11ref type:", type(g11ref))
     g11ref.create_group("foo")
     """
     
     # todo - special_dtype not implemented
     dt = h5py.special_dtype(ref=h5py.Reference)
     print("dt:", dt)
     print("dt.kind:", dt.kind)
     print("dt.meta:", dt.metadata['ref'])
     self.assertTrue(dt.metadata['ref'] is h5py.Reference)
         
     dset = g1.create_dataset('myrefs', (10,), dtype=dt)
     print("dset.dtype.kind:", dset.dtype.kind)
     ref = h5py.check_dtype(ref=dset.dtype)
     print("check_dtype:", ref)
     null_ref = dset[0]
     print("null_ref:", null_ref)
     dset[0] = g11_ref
     #g2.attrs['dataset'] = dset.ref
     
     # todo - references as data will need h5pyd equivalent of h5t module
     # g2.attrs.create('dataset', dset.ref, dtype=dt)   
      
     f.close()
Ejemplo n.º 8
0
    def test_compound(self):

        fields = []
        fields.append(('field_1', h5py.special_dtype(vlen=str)))
        fields.append(('field_2', np.int32))
        dt = np.dtype(fields)
        self.f['mytype'] = np.dtype(dt)
        dt_out = self.f['mytype'].dtype.fields['field_1'][0]
        self.assertEqual(h5py.check_dtype(vlen=dt_out), str)
Ejemplo n.º 9
0
    def test_compound(self):

        fields = []
        fields.append(("field_1", h5py.special_dtype(vlen=str)))
        fields.append(("field_2", np.int32))
        dt = np.dtype(fields)
        self.f["mytype"] = np.dtype(dt)
        dt_out = self.f["mytype"].dtype.fields["field_1"][0]
        self.assertEqual(h5py.check_dtype(vlen=dt_out), str)
Ejemplo n.º 10
0
    def test_vlen_enum(self):
        fname = self.mktemp()
        arr1 = [[1],[1,2]]
        dt1 = h5py.special_dtype(vlen=h5py.special_dtype(
            enum=('i', dict(foo=1, bar=2))))

        with h5py.File(fname,'w') as f:
            df1 = f.create_dataset('test', (len(arr1),), dtype=dt1)
            df1[:] = np.array(arr1)

        with h5py.File(fname,'r') as f:
            df2  = f['test']
            dt2  = df2.dtype
            arr2 = [e.tolist() for e in df2[:]]

        self.assertEqual(arr1, arr2)
        self.assertEqual(h5py.check_dtype(enum=h5py.check_dtype(vlen=dt1)),
                         h5py.check_dtype(enum=h5py.check_dtype(vlen=dt2)))
Ejemplo n.º 11
0
 def __formatH5pyObject(self, data, dtype):
     # That's an HDF5 object
     ref = h5py.check_dtype(ref=dtype)
     if ref is not None:
         if bool(data):
             return "REF"
         else:
             return "NULL_REF"
     vlen = h5py.check_dtype(vlen=dtype)
     if vlen is not None:
         if vlen == six.text_type:
             # HDF5 UTF8
             return self.__formatText(data)
         elif vlen == six.binary_type:
             # HDF5 ASCII
             return self.__formatCharString(data)
         elif isinstance(vlen, numpy.dtype):
             return self.toString(data, vlen)
     return None
Ejemplo n.º 12
0
    def test_enum(self):
        # Test high-level enumerated type

        vals = {'a': 1, 'b': 2, 'c': 42}
        
        f = h5py.File(res.get_name(), 'w')
        for idx, basetype in enumerate(np.dtype(x) for x in (common.INTS + common.UINTS)):

            msg = "dset %s, type %s" % (idx, basetype)

            dt = h5py.special_dtype(enum=(basetype, vals))
            self.assertEqual(h5py.check_dtype(enum=dt), vals, msg)
            self.assert_(h5py.check_dtype(enum=np.dtype('i')) is None, msg)

            # Test dataset creation
            refarr = np.zeros((4,4), dtype=dt)
            ds = f.create_dataset(str(idx), (4,4), dtype=dt)
            self.assert_(np.all(ds[...] == refarr), msg)

            # Test conversion to/from plain integer
            ds[0,0] = np.array(64, dtype=dt)
            self.assertEqual(ds[0,0], 64, msg)
Ejemplo n.º 13
0
 def from_hdf5(self, h5group):
     for key, dataset in h5group.items():
         # Load value from the hdf5 dataset and store in data
         # FIXME : the following conditional statement is to prevent
         # reading an empty dataset.
         # see : https://github.com/h5py/h5py/issues/281
         # It should be fixed by the next h5py version
         if dataset.shape != (0,):
             if h5py.check_dtype(vlen=dataset.dtype):
                 # to deal with VLEN data used for list of
                 # list
                 self.__setattr__(key, eval(dataset[...].tolist()))
             else:
                 self.__setattr__(key, dataset[...])
         else:
             self.__setattr__(key, [])
Ejemplo n.º 14
0
    def testCreateCompoundType(self):
        typeItem = {
            'class': 'H5T_COMPOUND', 'fields':
                [{'name': 'temp',     'type': 'H5T_IEEE_F32LE'},
                 {'name': 'pressure', 'type': 'H5T_IEEE_F32LE'},
                 {'name': 'location', 'type': {
                     'length': 'H5T_VARIABLE',
                     'charSet': 'H5T_CSET_ASCII',
                     'class': 'H5T_STRING',
                     'strPad': 'H5T_STR_NULLTERM'}},
                 {'name': 'wind',     'type': 'H5T_STD_I16LE'}]
        }

        dt = hdf5dtype.createDataType(typeItem)
        self.assertEqual(dt.name, 'void144')
        self.assertEqual(dt.kind, 'V')
        self.assertEqual(len(dt.fields), 4)
        dtLocation = dt[2]
        self.assertEqual(dtLocation.name, 'object')
        self.assertEqual(dtLocation.kind, 'O')
        self.assertEqual(check_dtype(vlen=dtLocation), str)
Ejemplo n.º 15
0
 def getDataTranspose(self, limit, start):
     struct_data, new_pos = self._getData(limit, start)
     columns = []
     for idx in range(len(struct_data.dtype)):
         col = struct_data['f{}'.format(idx)]
         # Strings are stored as hdf5 vlen objects.  Numpy can't do
         # variable length strings, so they get encoded as object
         # arrays by hdf5.  we don't know how to flatten object
         # arrays so we special case vlen types here and convert
         # them to lists.  Also, h5py has a bug where when you
         # index a dataset with a compound type, it loses the
         # special dtype information, so we pull it directly from
         # self.dataset.dtype rather than the data returned by
         # _getData
         if self.dataset.dtype[idx] == np.object:
             base_type = h5py.check_dtype(vlen=self.dataset.dtype[idx])
             if not base_type or not issubclass(base_type, str):
                 raise RuntimeError("Found object type array, but not vlen str.  Not supported.  This shouldn't happen")
             col = [base_type(x) for x in col]
         columns.append(col)
     columns = tuple(columns)
     return columns, new_pos
Ejemplo n.º 16
0
    def open_store_variable(self, name, var):
        import h5py

        with self.ensure_open(autoclose=False):
            dimensions = var.dimensions
            data = indexing.LazilyOuterIndexedArray(
                H5NetCDFArrayWrapper(name, self))
            attrs = _read_attributes(var)

            # netCDF4 specific encoding
            encoding = {
                'chunksizes': var.chunks,
                'fletcher32': var.fletcher32,
                'shuffle': var.shuffle,
            }
            # Convert h5py-style compression options to NetCDF4-Python
            # style, if possible
            if var.compression == 'gzip':
                encoding['zlib'] = True
                encoding['complevel'] = var.compression_opts
            elif var.compression is not None:
                encoding['compression'] = var.compression
                encoding['compression_opts'] = var.compression_opts

            # save source so __repr__ can detect if it's local or not
            encoding['source'] = self._filename
            encoding['original_shape'] = var.shape

            vlen_dtype = h5py.check_dtype(vlen=var.dtype)
            if vlen_dtype is unicode_type:
                encoding['dtype'] = str
            elif vlen_dtype is not None:  # pragma: no cover
                # xarray doesn't support writing arbitrary vlen dtypes yet.
                pass
            else:
                encoding['dtype'] = var.dtype

        return Variable(dimensions, data, attrs, encoding)
Ejemplo n.º 17
0
def get_catalog_type(hdf5_type):
        """Converts the data type from the HDF5 data type to a type recognized by the API.
           (Uses some common prefixes and the 'maps' dictionary located near the top of the script.)"""

        # Check if the type is object, it is most likely to be a string
        if hdf5_type.kind == 'O':
            dt = h5py.check_dtype(vlen=hdf5_type)
            if hasattr(dt, '__name__'):
                if dt.__name__ == 'str':
                   return 'text'

        old_type = str(hdf5_type)
        if old_type[:2] == '|S':
            return 'text'
        elif old_type[:3] == 'int':
            return 'int8'
        elif old_type[:4] == 'uint':
            return 'int8'
        elif old_type[:5] == 'float':
            return 'float8'
        elif old_type in maps:
            return maps[old_type]
        else:
            return None
Ejemplo n.º 18
0
def read_h5netcdf(tmp_netcdf, write_module):
    remote_file = (isinstance(tmp_netcdf, str)
                   and tmp_netcdf.startswith(remote_h5))
    ds = h5netcdf.File(tmp_netcdf, 'r')
    assert ds.name == '/'
    assert list(ds.attrs) == ['global', 'other_attr']
    assert ds.attrs['global'] == 42
    if not PY2 and write_module is not netCDF4:
        # skip for now: https://github.com/Unidata/netcdf4-python/issues/388
        assert ds.attrs['other_attr'] == 'yes'
    assert set(ds.dimensions) == set(
        ['x', 'y', 'z', 'empty', 'string3', 'mismatched_dim'])
    assert set(ds.variables) == set([
        'foo', 'y', 'z', 'intscalar', 'scalar', 'var_len_str', 'mismatched_dim'
    ])
    assert set(ds.groups) == set(['subgroup'])
    assert ds.parent is None

    v = ds['foo']
    assert v.name == '/foo'
    assert array_equal(v, np.ones((4, 5)))
    assert v.dtype == float
    assert v.dimensions == ('x', 'y')
    assert v.ndim == 2
    assert list(v.attrs) == ['units']
    if not PY2 and write_module is not netCDF4:
        assert v.attrs['units'] == 'meters'
    assert v.chunks == (4, 5)
    assert v.compression == 'gzip'
    assert v.compression_opts == 4
    assert not v.fletcher32
    assert v.shuffle

    v = ds['y']
    assert array_equal(v, np.r_[np.arange(4), [-1]])
    assert v.dtype == int
    assert v.dimensions == ('y', )
    assert v.ndim == 1
    assert list(v.attrs) == ['_FillValue']
    assert v.attrs['_FillValue'] == -1
    if not remote_file:
        assert v.chunks is None
    assert v.compression is None
    assert v.compression_opts is None
    assert not v.fletcher32
    assert not v.shuffle
    ds.close()

    if is_h5py_char_working(tmp_netcdf, 'z'):
        ds = h5netcdf.File(tmp_netcdf, 'r')
        v = ds['z']
        assert v.dtype == 'S1'
        assert v.ndim == 2
        assert v.dimensions == ('z', 'string3')
        assert list(v.attrs) == ['_FillValue']
        assert v.attrs['_FillValue'] == b'X'
    else:
        ds = h5netcdf.File(tmp_netcdf, 'r')

    v = ds['scalar']
    assert array_equal(v, np.array(2.0))
    assert v.dtype == 'float32'
    assert v.ndim == 0
    assert v.dimensions == ()
    assert list(v.attrs) == []

    v = ds.variables['intscalar']
    assert array_equal(v, np.array(2))
    assert v.dtype == 'int64'
    assert v.ndim == 0
    assert v.dimensions == ()
    assert list(v.attrs) == []

    v = ds['var_len_str']
    assert h5py.check_dtype(vlen=v.dtype) == unicode
    assert v[0] == u'foo'

    v = ds['/subgroup/subvar']
    assert v is ds['subgroup']['subvar']
    assert v is ds['subgroup/subvar']
    assert v is ds['subgroup']['/subgroup/subvar']
    assert v.name == '/subgroup/subvar'
    assert ds['subgroup'].name == '/subgroup'
    assert ds['subgroup'].parent is ds
    assert array_equal(v, np.arange(4.0))
    assert v.dtype == 'int32'
    assert v.ndim == 1
    assert v.dimensions == ('x', )
    assert list(v.attrs) == []

    assert ds['/subgroup/y_var'].shape == (10, )
    assert ds['/subgroup'].dimensions['y'] == 10

    ds.close()
Ejemplo n.º 19
0
 def dtype(self):
     dt = self._h5ds.dtype
     if h5py.check_dtype(vlen=dt) is unicode:
         return str
     return dt
Ejemplo n.º 20
0
ST_ESTIMATED = 113
ST_REPORTED = 114
ST_VERIFIED = 115
_dtstate = h5py.special_dtype(enum=('i', {
     "Invalid":ST_INVALID, "Default":ST_DEFAULT, "Estimated":ST_ESTIMATED,
     "Reported":ST_REPORTED, "Verified":ST_VERIFIED}))

# MicroscopeMode
MM_NONE = 0
MM_TRANSMISSION = 1
MM_REFLECTION = 2
MM_FLUORESCENCE = 3
_dtmm = h5py.special_dtype(enum=('i', {
     "None":MM_NONE, "Transmission":MM_TRANSMISSION ,
     "Reflection":MM_REFLECTION, "Fluorescence":MM_FLUORESCENCE}))
_dictmm = h5py.check_dtype(enum=_dtmm)
# MicroscopeType
MT_NONE = 111
MT_WIDEFIELD = 112
MT_CONFOCAL = 113
MT_4PIEXCITATION = 114
MT_NIPKOWDISKCONFOCAL = 115
MT_GENERICSENSOR = 118
_dtmt = h5py.special_dtype(enum=('i', {
    "None":MT_NONE, "WideField":MT_WIDEFIELD, "Confocal":MT_CONFOCAL,
    "4PiExcitation":MT_4PIEXCITATION, "NipkowDiskConfocal":MT_NIPKOWDISKCONFOCAL,
    "GenericSensor":MT_GENERICSENSOR}))
_dictmt = h5py.check_dtype(enum=_dtmt)
# ImagingDirection
ID_UPWARD = 0
ID_DOWNWARD = 1
Ejemplo n.º 21
0
 def testCreateVLenStringType(self):
     typeItem = { 'class': 'H5T_STRING', 'charSet': 'H5T_CSET_ASCII', 'length': 'H5T_VARIABLE' }
     dt = hdf5dtype.createDataType(typeItem)
     self.assertEqual(dt.name, 'object')
     self.assertEqual(dt.kind, 'O')
     self.assertEqual(check_dtype(vlen=dt), str)
    def toString(self, data, dtype=None):
        """Format a data into a string using formatter options

        :param object data: Data to render
        :param dtype: enforce a dtype (mostly used to remember the h5py dtype,
             special h5py dtypes are not propagated from array to items)
        :rtype: str
        """
        if isinstance(data, tuple):
            text = [self.toString(d) for d in data]
            return "(" + " ".join(text) + ")"
        elif isinstance(data, list):
            text = [self.toString(d) for d in data]
            return "[" + " ".join(text) + "]"
        elif isinstance(data, (numpy.ndarray)):
            if dtype is None:
                dtype = data.dtype
            if data.shape == ():
                # it is a scaler
                return self.toString(data[()], dtype)
            else:
                text = [self.toString(d, dtype) for d in data]
                return "[" + " ".join(text) + "]"
        if dtype is not None and dtype.kind == 'O':
            text = self.__formatH5pyObject(data, dtype)
            if text is not None:
                return text
        elif isinstance(data, numpy.void):
            if dtype is None:
                dtype = data.dtype
            if dtype.fields is not None:
                text = []
                for index, field in enumerate(dtype.fields.items()):
                    text.append(field[0] + ":" +
                                self.toString(data[index], field[1][0]))
                return "(" + " ".join(text) + ")"
            return self.__formatBinary(data)
        elif isinstance(data, (numpy.unicode_, six.text_type)):
            return self.__formatText(data)
        elif isinstance(data, (numpy.string_, six.binary_type)):
            if dtype is None and hasattr(data, "dtype"):
                dtype = data.dtype
            if dtype is not None:
                # Maybe a sub item from HDF5
                if dtype.kind == 'S':
                    return self.__formatCharString(data)
                elif dtype.kind == 'O':
                    text = self.__formatH5pyObject(data, dtype)
                    if text is not None:
                        return text
            try:
                # Try ascii/utf-8
                text = "%s" % data.decode("utf-8")
                return self.__formatText(text)
            except UnicodeDecodeError:
                pass
            return self.__formatBinary(data)
        elif isinstance(data, six.string_types):
            text = "%s" % data
            return self.__formatText(text)
        elif isinstance(data, (numpy.integer)):
            if dtype is None:
                dtype = data.dtype
            enumType = h5py.check_dtype(enum=dtype)
            if enumType is not None:
                for key, value in enumType.items():
                    if value == data:
                        result = {}
                        result["name"] = key
                        result["value"] = data
                        return self.__enumFormat % result
            return self.__integerFormat % data
        elif isinstance(data, (numbers.Integral)):
            return self.__integerFormat % data
        elif isinstance(data, (numbers.Real, numpy.floating)):
            # It have to be done before complex checking
            return self.__floatFormat % data
        elif isinstance(data, (numpy.complexfloating, numbers.Complex)):
            text = ""
            if data.real != 0:
                text += self.__floatFormat % data.real
            if data.real != 0 and data.imag != 0:
                if data.imag < 0:
                    template = self.__floatFormat + " - " + self.__floatFormat + self.__imaginaryUnit
                    params = (data.real, -data.imag)
                else:
                    template = self.__floatFormat + " + " + self.__floatFormat + self.__imaginaryUnit
                    params = (data.real, data.imag)
            else:
                if data.imag != 0:
                    template = self.__floatFormat + self.__imaginaryUnit
                    params = (data.imag)
                else:
                    template = self.__floatFormat
                    params = (data.real)
            return template % params
        elif isinstance(data, h5py.h5r.Reference):
            dtype = h5py.special_dtype(ref=h5py.Reference)
            text = self.__formatH5pyObject(data, dtype)
            return text
        elif isinstance(data, h5py.h5r.RegionReference):
            dtype = h5py.special_dtype(ref=h5py.RegionReference)
            text = self.__formatH5pyObject(data, dtype)
            return text
        elif isinstance(data, numpy.object_) or dtype is not None:
            if dtype is None:
                dtype = data.dtype
            text = self.__formatH5pyObject(data, dtype)
            if text is not None:
                return text
            # That's a numpy object
            return str(data)
        return str(data)
Ejemplo n.º 23
0
def get(h5, lo=0, hi=None, fields=None, convert_enum=True, **kwargs):
    """
    Query a range of rows from a table as a dataframe.

    A table is an HDF5 group containing equal-length 1D datasets serving as
    columns.

    Parameters
    ----------
    h5 : ``h5py.Group`` or any dict-like of array-likes
        Handle to an HDF5 group containing only 1D datasets or any similar
        collection of 1D datasets or arrays
    lo, hi : int, optional
        Range of rows to select from the table.
    fields : str or sequence of str, optional
        Column or list of columns to query. Defaults to all available columns.
        A single string returns a Series instead of a DataFrame.
    convert_enum : bool, optional
        Whether to convert HDF5 enum datasets into ``pandas.Categorical``
        columns instead of plain integer columns. Default is True.
    kwargs : optional
        Options to pass to ``pandas.DataFrame`` or ``pandas.Series``.

    Returns
    -------
    DataFrame or Series

    Notes
    -----
    HDF5 ASCII datasets are converted to Unicode.

    """
    grp = h5
    series = False
    if fields is None:
        fields = list(grp.keys())
    elif isinstance(fields, six.string_types):
        fields = [fields]
        series = True

    data = {}
    for field in fields:
        dset = grp[field]

        if convert_enum:
            dt = h5py.check_dtype(enum=dset.dtype)
        else:
            dt = None

        if dt is not None:
            data[field] = pandas.Categorical.from_codes(
                dset[lo:hi],
                sorted(dt, key=dt.__getitem__),
                ordered=True)
        elif dset.dtype.type == np.string_:
            data[field] = dset[lo:hi].astype('U')
        else:
            data[field] = dset[lo:hi]

    if data and lo is not None:
        index = np.arange(lo, lo + len(next(iter(data.values()))))
    else:
        index = None

    if series:
        return pandas.Series(
            data[fields[0]],
            index=index,
            name=field,
            **kwargs)
    else:
        return pandas.DataFrame(
            data,
            columns=fields,
            index=index,
            **kwargs)
Ejemplo n.º 24
0
    def toString(self, data, dtype=None):
        """Format a data into a string using formatter options

        :param object data: Data to render
        :param dtype: enforce a dtype (mostly used to remember the h5py dtype,
             special h5py dtypes are not propagated from array to items)
        :rtype: str
        """
        if isinstance(data, tuple):
            text = [self.toString(d) for d in data]
            return "(" + " ".join(text) + ")"
        elif isinstance(data, list):
            text = [self.toString(d) for d in data]
            return "[" + " ".join(text) + "]"
        elif isinstance(data, (numpy.ndarray)):
            if dtype is None:
                dtype = data.dtype
            if data.shape == ():
                # it is a scaler
                return self.toString(data[()], dtype)
            else:
                text = [self.toString(d, dtype) for d in data]
                return "[" + " ".join(text) + "]"
        if dtype is not None and dtype.kind == 'O':
            text = self.__formatH5pyObject(data, dtype)
            if text is not None:
                return text
        elif isinstance(data, numpy.void):
            if dtype is None:
                dtype = data.dtype
            if dtype.fields is not None:
                text = []
                for index, field in enumerate(dtype.fields.items()):
                    text.append(field[0] + ":" + self.toString(data[index], field[1][0]))
                return "(" + " ".join(text) + ")"
            return self.__formatBinary(data)
        elif isinstance(data, (numpy.unicode_, six.text_type)):
            return self.__formatText(data)
        elif isinstance(data, (numpy.string_, six.binary_type)):
            if dtype is None and hasattr(data, "dtype"):
                dtype = data.dtype
            if dtype is not None:
                # Maybe a sub item from HDF5
                if dtype.kind == 'S':
                    return self.__formatCharString(data)
                elif dtype.kind == 'O':
                    text = self.__formatH5pyObject(data, dtype)
                    if text is not None:
                        return text
            try:
                # Try ascii/utf-8
                text = "%s" % data.decode("utf-8")
                return self.__formatText(text)
            except UnicodeDecodeError:
                pass
            return self.__formatBinary(data)
        elif isinstance(data, six.string_types):
            text = "%s" % data
            return self.__formatText(text)
        elif isinstance(data, (numpy.integer)):
            if dtype is None:
                dtype = data.dtype
            enumType = h5py.check_dtype(enum=dtype)
            if enumType is not None:
                for key, value in enumType.items():
                    if value == data:
                        result = {}
                        result["name"] = key
                        result["value"] = data
                        return self.__enumFormat % result
            return self.__integerFormat % data
        elif isinstance(data, (numbers.Integral)):
            return self.__integerFormat % data
        elif isinstance(data, (numbers.Real, numpy.floating)):
            # It have to be done before complex checking
            return self.__floatFormat % data
        elif isinstance(data, (numpy.complexfloating, numbers.Complex)):
            text = ""
            if data.real != 0:
                text += self.__floatFormat % data.real
            if data.real != 0 and data.imag != 0:
                if data.imag < 0:
                    template = self.__floatFormat + " - " + self.__floatFormat + self.__imaginaryUnit
                    params = (data.real, -data.imag)
                else:
                    template = self.__floatFormat + " + " + self.__floatFormat + self.__imaginaryUnit
                    params = (data.real, data.imag)
            else:
                if data.imag != 0:
                    template = self.__floatFormat + self.__imaginaryUnit
                    params = (data.imag)
                else:
                    template = self.__floatFormat
                    params = (data.real)
            return template % params
        elif isinstance(data, h5py.h5r.Reference):
            dtype = h5py.special_dtype(ref=h5py.Reference)
            text = self.__formatH5pyObject(data, dtype)
            return text
        elif isinstance(data, h5py.h5r.RegionReference):
            dtype = h5py.special_dtype(ref=h5py.RegionReference)
            text = self.__formatH5pyObject(data, dtype)
            return text
        elif isinstance(data, numpy.object_) or dtype is not None:
            if dtype is None:
                dtype = data.dtype
            text = self.__formatH5pyObject(data, dtype)
            if text is not None:
                return text
            # That's a numpy object
            return str(data)
        return str(data)
Ejemplo n.º 25
0
    def humanReadableDType(self, dtype, full=False):
        if dtype == six.binary_type or numpy.issubdtype(dtype, numpy.string_):
            text = "string"
            if full:
                text = "ASCII " + text
            return text
        elif dtype == six.text_type or numpy.issubdtype(dtype, numpy.unicode_):
            text = "string"
            if full:
                text = "UTF-8 " + text
            return text
        elif dtype.type == numpy.object_:
            ref = h5py.check_dtype(ref=dtype)
            if ref is not None:
                return "reference"
            vlen = h5py.check_dtype(vlen=dtype)
            if vlen is not None:
                text = self.humanReadableDType(vlen, full=full)
                if full:
                    text = "variable-length " + text
                return text
            return "object"
        elif dtype.type == numpy.bool_:
            return "bool"
        elif dtype.type == numpy.void:
            if dtype.fields is None:
                return "opaque"
            else:
                if not full:
                    return "compound"
                else:
                    fields = sorted(dtype.fields.items(), key=lambda e: e[1][1])
                    compound = [d[1][0] for d in fields]
                    compound = [self.humanReadableDType(d) for d in compound]
                    return "compound(%s)" % ", ".join(compound)
        elif numpy.issubdtype(dtype, numpy.integer):
            enumType = h5py.check_dtype(enum=dtype)
            if enumType is not None:
                return "enum"

        text = str(dtype.newbyteorder('N'))
        if numpy.issubdtype(dtype, numpy.floating):
            if hasattr(numpy, "float128") and dtype == numpy.float128:
                text = "float80"
                if full:
                    text += " (padding 128bits)"
            elif hasattr(numpy, "float96") and dtype == numpy.float96:
                text = "float80"
                if full:
                    text += " (padding 96bits)"

        if full:
            if dtype.byteorder == "<":
                text = "Little-endian " + text
            elif dtype.byteorder == ">":
                text = "Big-endian " + text
            elif dtype.byteorder == "=":
                text = "Native " + text

        dtype = dtype.newbyteorder('N')
        return text
Ejemplo n.º 26
0
 def test_dtype(self):
     """ (Vlen) Dtype round-trip """
     dt = h5py.special_dtype(vlen=str)
     self.assertEqual(h5py.check_dtype(vlen=dt), str)
Ejemplo n.º 27
0
def read_h5netcdf(tmp_netcdf, write_module):
    ds = h5netcdf.File(tmp_netcdf, 'r')
    assert ds.name == '/'
    assert list(ds.attrs) == ['global', 'other_attr']
    assert ds.attrs['global'] == 42
    if not PY2 and write_module is not netCDF4:
        # skip for now: https://github.com/Unidata/netcdf4-python/issues/388
        assert ds.attrs['other_attr'] == 'yes'
    assert set(ds.dimensions) == set(['x', 'y', 'z', 'string3', 'mismatched_dim'])
    assert set(ds.variables) == set(['foo', 'y', 'z', 'intscalar', 'scalar',
                                     'var_len_str', 'mismatched_dim'])
    assert set(ds.groups) == set(['subgroup'])
    assert ds.parent is None

    v = ds['foo']
    assert v.name == '/foo'
    assert array_equal(v, np.ones((4, 5)))
    assert v.dtype == float
    assert v.dimensions == ('x', 'y')
    assert v.ndim == 2
    assert list(v.attrs) == ['units']
    if not PY2 and write_module is not netCDF4:
        assert v.attrs['units'] == 'meters'
    assert v.chunks == (4, 5)
    assert v.compression == 'gzip'
    assert v.compression_opts == 4
    assert not v.fletcher32
    assert v.shuffle

    v = ds['y']
    assert array_equal(v, np.r_[np.arange(4), [-1]])
    assert v.dtype == int
    assert v.dimensions == ('y',)
    assert v.ndim == 1
    assert list(v.attrs) == ['_FillValue']
    assert v.attrs['_FillValue'] == -1
    assert v.chunks == None
    assert v.compression == None
    assert v.compression_opts == None
    assert not v.fletcher32
    assert not v.shuffle
    ds.close()

    if is_h5py_char_working(tmp_netcdf, 'z'):
        ds = h5netcdf.File(tmp_netcdf, 'r')
        v = ds['z']
        assert v.dtype == 'S1'
        assert v.ndim == 2
        assert v.dimensions == ('z', 'string3')
        assert list(v.attrs) == ['_FillValue']
        assert v.attrs['_FillValue'] == b'X'
    else:
        ds = h5netcdf.File(tmp_netcdf, 'r')

    v = ds['scalar']
    assert array_equal(v, np.array(2.0))
    assert v.dtype == 'float32'
    assert v.ndim == 0
    assert v.dimensions == ()
    assert list(v.attrs) == []

    v = ds.variables['intscalar']
    assert array_equal(v, np.array(2))
    assert v.dtype == 'int64'
    assert v.ndim == 0
    assert v.dimensions == ()
    assert list(v.attrs) == []

    v = ds['var_len_str']
    assert h5py.check_dtype(vlen=v.dtype) == unicode
    assert v[0] == u'foo'

    v = ds['/subgroup/subvar']
    assert v is ds['subgroup']['subvar']
    assert v is ds['subgroup/subvar']
    assert v is ds['subgroup']['/subgroup/subvar']
    assert v.name == '/subgroup/subvar'
    assert ds['subgroup'].name == '/subgroup'
    assert ds['subgroup'].parent is ds
    assert array_equal(v, np.arange(4.0))
    assert v.dtype == 'int32'
    assert v.ndim == 1
    assert v.dimensions == ('x',)
    assert list(v.attrs) == []

    assert ds['/subgroup/y_var'].shape == (10,)
    assert ds['/subgroup'].dimensions['y'] == 10

    ds.close()
Ejemplo n.º 28
0
def get(grp, lo=0, hi=None, fields=None, convert_enum=True, as_dict=False):
    """
    Query a range of rows from a table as a dataframe.

    A table is an HDF5 group containing equal-length 1D datasets serving as
    columns.

    Parameters
    ----------
    grp : ``h5py.Group`` or any dict-like of array-likes
        Handle to an HDF5 group containing only 1D datasets or any similar
        collection of 1D datasets or arrays
    lo, hi : int, optional
        Range of rows to select from the table.
    fields : str or sequence of str, optional
        Column or list of columns to query. Defaults to all available columns.
        A single string returns a Series instead of a DataFrame.
    convert_enum : bool, optional
        Whether to convert HDF5 enum datasets into ``pandas.Categorical``
        columns instead of plain integer columns. Default is True.
    kwargs : optional
        Options to pass to ``pandas.DataFrame`` or ``pandas.Series``.

    Returns
    -------
    DataFrame or Series

    Notes
    -----
    HDF5 ASCII datasets are converted to Unicode.

    """
    series = False
    if fields is None:
        fields = list(grp.keys())
    elif isinstance(fields, six.string_types):
        fields = [fields]
        series = True

    data = {}
    for field in fields:
        dset = grp[field]

        if convert_enum:
            dt = h5py.check_dtype(enum=dset.dtype)
        else:
            dt = None

        if dt is not None:
            data[field] = pd.Categorical.from_codes(dset[lo:hi],
                                                    sorted(dt,
                                                           key=dt.__getitem__),
                                                    ordered=True)
        elif dset.dtype.type == np.string_:
            data[field] = dset[lo:hi].astype('U')
        else:
            data[field] = dset[lo:hi]

    if as_dict:
        return data

    if data and lo is not None:
        index = np.arange(lo, lo + len(next(iter(data.values()))))
    else:
        index = None

    if series:
        return pd.Series(data[fields[0]], index=index, name=field)
    else:
        return pd.DataFrame(data, columns=fields, index=index)