コード例 #1
0
ファイル: test_h5t.py プロジェクト: minrk/h5py
    def test_enum(self):
        """ Enum type translation

        Literal:
        - TypeIntegerID

        Logical:
        - TypeEnumID
        - Base TypeIntegerID
        - 0 to (at least) 128 values
        """
        enums = [{}, {'a': 0, 'b': 1}, dict(("%s" % d, d) for d in xrange(128)) ]
        bases = ('|i1', '|u1', '<i4', '>i4', '<u8')

        for b in bases:
            for e in enums:
                dt = h5t.py_new_enum(b, e)
                htype_comp = h5t.py_create(b)
                htype = h5t.py_create(dt)
                self.assert_(isinstance(htype, h5t.TypeIntegerID))
                self.assertEqual(htype, htype_comp)
                htype = h5t.py_create(dt, logical=True)
                self.assert_(isinstance(htype, h5t.TypeEnumID), "%s" % (htype,))
                basetype = htype.get_super()
                self.assertEqual(htype_comp, basetype)
                self.assertEqual(htype.get_nmembers(), len(e))
                for idx in xrange(htype.get_nmembers()):
                    name = htype.get_member_name(idx)
                    value = htype.get_member_value(idx)
                    self.assertEqual(e[name], value)
コード例 #2
0
ファイル: test_dataset.py プロジェクト: gregbanks/h5py
 def test_ldouble_mapping(self):
     """ Test mapping for extended-precision """
     self.assertEqual(h5t.NATIVE_LDOUBLE.dtype, np.longdouble(1).dtype)
     if hasattr(np, 'float96'):
         self.assertEqual(h5t.py_create(np.dtype('float96')).dtype, np.longdouble(1).dtype)
     if hasattr(np, 'float128'):
         self.assertEqual(h5t.py_create(np.dtype('float128')).dtype, np.longdouble(1).dtype)
コード例 #3
0
def _add_typecode(tc, sizes_dict):
    dt_le = np.dtype('<' + tc)
    dt_be = np.dtype('>' + tc)

    entries = sizes_dict.setdefault(dt_le.itemsize, [])
    entries.append((h5t.py_create(dt_le), dt_le.name))
    entries.append((h5t.py_create(dt_be), dt_be.name + ' (big-endian)'))
コード例 #4
0
    def test_enum(self):
        """ Enum type translation

        Literal:
        - TypeIntegerID

        Logical:
        - TypeEnumID
        - Base TypeIntegerID
        - 0 to (at least) 128 values
        """
        enums = [{}, {
            'a': 0,
            'b': 1
        }, dict(("%s" % d, d) for d in xrange(128))]
        bases = ('|i1', '|u1', '<i4', '>i4', '<u8')

        for b in bases:
            for e in enums:
                dt = h5t.py_new_enum(b, e)
                htype_comp = h5t.py_create(b)
                htype = h5t.py_create(dt)
                self.assert_(isinstance(htype, h5t.TypeIntegerID))
                self.assertEqual(htype, htype_comp)
                htype = h5t.py_create(dt, logical=True)
                self.assert_(isinstance(htype, h5t.TypeEnumID),
                             "%s" % (htype, ))
                basetype = htype.get_super()
                self.assertEqual(htype_comp, basetype)
                self.assertEqual(htype.get_nmembers(), len(e))
                for idx in xrange(htype.get_nmembers()):
                    name = htype.get_member_name(idx)
                    value = htype.get_member_value(idx)
                    self.assertEqual(e[name], value)
コード例 #5
0
ファイル: test_datatypes.py プロジェクト: t20100/h5glance
def test_standard_int():
    it = h5t.py_create(np.dtype('<i4'))
    assert datatypes.fmt_dtype(it) == 'int32'
    assert datatypes.dtype_description(it) == '32-bit signed integer'

    ut = h5t.py_create(np.dtype('>u8'))
    assert datatypes.fmt_dtype(ut) == 'uint64 (big-endian)'
    assert datatypes.dtype_description(ut) == '64-bit unsigned integer'
コード例 #6
0
ファイル: test_datatypes.py プロジェクト: t20100/h5glance
def test_string():
    # vlen string
    vst = h5t.py_create(h5t.string_dtype(encoding='utf-8'), logical=True)
    assert datatypes.fmt_dtype(vst) == 'UTF-8 string'

    # fixed-length string
    fst = h5t.py_create(h5t.string_dtype(encoding='ascii', length=3))
    assert datatypes.fmt_dtype(fst) == '3-byte ASCII string'
コード例 #7
0
ファイル: test_types.py プロジェクト: qsnake/h5py
 def test_array_dtype(self):
     """ (Types) Array dtypes using non-tuple shapes """
     dt1 = np.dtype('f4', (2,))
     dt2 = np.dtype('f4', [2])
     dt3 = np.dtype('f4', 2)
     dt4 = np.dtype('f4', 2.1)
     ht1 = h5t.py_create(dt1)
     ht2 = h5t.py_create(dt2)
     ht3 = h5t.py_create(dt3)
     ht4 = h5t.py_create(dt4)
     self.assertEqual(ht1.dtype, dt1)
     self.assertEqual(ht2.dtype, dt1)
     self.assertEqual(ht3.dtype, dt1)
     self.assertEqual(ht4.dtype, dt1)
コード例 #8
0
ファイル: dataset.py プロジェクト: Juxi/OpenSignals
    def __setitem__(self, args, val):
        """ Write to the HDF5 dataset from a Numpy array.

        NumPy's broadcasting rules are honored, for "simple" indexing
        (slices and integers).  For advanced indexing, the shapes must
        match.
        """
        args = args if isinstance(args, tuple) else (args,)

        # Sort field indices from the slicing
        names = tuple(x for x in args if isinstance(x, str))
        args = tuple(x for x in args if not isinstance(x, str))

        if len(names) != 0:
            raise TypeError("Field name selections are not allowed for write.")

        # Generally we try to avoid converting the arrays on the Python
        # side.  However, for compound literals this is unavoidable.
        if self.dtype.kind == 'V' and \
        (not isinstance(val, numpy.ndarray) or val.dtype.kind != 'V'):
            val = numpy.asarray(val, dtype=self.dtype, order='C')
        else:
            val = numpy.asarray(val, order='C')

        # Check for array dtype compatibility and convert
        if self.dtype.subdtype is not None:
            shp = self.dtype.subdtype[1]
            if val.shape[-len(shp):] != shp:
                raise TypeError("Can't broadcast to array dimension %s" % (shp,))
            mtype = h5t.py_create(numpy.dtype((val.dtype, shp)))
            mshape = val.shape[0:len(val.shape)-len(shp)]
        else:
            mshape = val.shape
            mtype = None

        # Perform the dataspace selection
        selection = sel.select(self.shape, args, dsid=self.id)

        if selection.nselect == 0:
            return

        # Broadcast scalars if necessary.
        if (mshape == () and selection.mshape != ()):
            if self.dtype.subdtype is not None:
                raise NotImplementedError("Scalar broadcasting is not supported for array dtypes")
            val2 = numpy.empty(selection.mshape[-1], dtype=val.dtype)
            val2[...] = val
            val = val2
            mshape = val.shape

        # Perform the write, with broadcasting
        # Be careful to pad memory shape with ones to avoid HDF5 chunking
        # glitch, which kicks in for mismatched memory/file selections
        if(len(mshape) < len(self.shape)):
            mshape_pad = (1,)*(len(self.shape)-len(mshape)) + mshape
        else:
            mshape_pad = mshape
        mspace = h5s.create_simple(mshape_pad, (h5s.UNLIMITED,)*len(mshape_pad))
        for fspace in selection.broadcast(mshape):
            self.id.write(mspace, fspace, val, mtype)
コード例 #9
0
    def __init__(self, _id):
        # super __init__ is handled by DatasetID.__cinit__ automatically
        self._data_dict = None
        with phil:
            sid = self.get_space()
            self._shape = sid.get_simple_extent_dims()
        self._reshaped = False

        attr = h5a.open(self, b'raw_data')
        htype = h5t.py_create(attr.dtype)
        _arr = np.ndarray(attr.shape, dtype=attr.dtype, order='C')
        attr.read(_arr, mtype=htype)
        raw_data_name = _arr[()]
        if isinstance(raw_data_name, bytes):
            raw_data_name = raw_data_name.decode('utf-8')

        fid = h5i.get_file_id(self)
        g = Group(fid)
        self.raw_data = g[raw_data_name]
        self.chunks = tuple(self.raw_data.attrs['chunks'])

        fillvalue_a = np.empty((1,), dtype=self.dtype)
        dcpl = self.get_create_plist()
        dcpl.get_fill_value(fillvalue_a)
        self.fillvalue = fillvalue_a[0]
コード例 #10
0
    def test_complex(self):
        """ Complex type translation

        - TypeComplexID
        - 8, 16 bytes
        - LE and BE
        - 2 members
        - Member names from cfg.complex_names
        - Members are TypeFloatID
        """
        bases = ('=c', '<c', '>c')

        for b in bases:
            for l in (8, 16):
                dt = '%s%s' % (b, l)
                htype = h5t.py_create(dt)
                self.assert_(isinstance(htype, h5t.TypeCompoundID),
                             "wrong class")
                self.assertEqual(htype.get_size(), l, "wrong size")
                self.assertEqual(htype.get_nmembers(), 2, "wrong # members")
                for idx in (0, 1):
                    self.assertEqual(htype.get_member_name(idx),
                                     cfg.complex_names[idx])
                    st = htype.get_member_type(idx)
                    self.assert_(isinstance(st, h5t.TypeFloatID))
                    self.assertEqual(st.get_size(), l // 2)
                    self.assertEqual(st.get_order(), bytemap[b[0]])
コード例 #11
0
ファイル: test_h5plugin.py プロジェクト: placrosse/bitshuffle
    def test_plugins(self):
        shape = (32 * 1024,)
        chunks = (4 * 1024,)
        dtype = np.int64
        data = np.arange(shape[0])
        fname = "tmp_test_filters.h5"
        f = h5py.File(fname)
        tid = h5t.py_create(dtype, logical=1)
        sid = h5s.create_simple(shape, shape)
        # Different API's for different h5py versions.
        try:
            dcpl = filters.generate_dcpl(shape, dtype, chunks, None, None,
                      None, None, None, None)
        except TypeError:
            dcpl = filters.generate_dcpl(shape, dtype, chunks, None, None,
                      None, None, None)
        dcpl.set_filter(32008, h5z.FLAG_MANDATORY)
        dcpl.set_filter(32000, h5z.FLAG_MANDATORY)
        dset_id = h5d.create(f.id, "range", tid, sid, dcpl=dcpl)
        dset_id.write(h5s.ALL, h5s.ALL, data)
        f.close()

        # Make sure the filters are working outside of h5py by calling h5dump
        h5dump = Popen(['h5dump', fname],
                       stdout=PIPE, stderr=STDOUT)
        stdout, nothing = h5dump.communicate()
        #print stdout
        err = h5dump.returncode
        self.assertEqual(err, 0)

        f = h5py.File(fname, 'r')
        d = f['range'][:]
        self.assertTrue(np.all(d == data))
        f.close()
コード例 #12
0
    def test_plugins(self):
        if not H51811P:
            return
        shape = (32 * 1024, )
        chunks = (4 * 1024, )
        dtype = np.int64
        data = np.arange(shape[0])
        fname = "tmp_test_filters.h5"
        f = h5py.File(fname)
        tid = h5t.py_create(dtype, logical=1)
        sid = h5s.create_simple(shape, shape)
        # Different API's for different h5py versions.
        try:
            dcpl = filters.generate_dcpl(shape, dtype, chunks, None, None,
                                         None, None, None, None)
        except TypeError:
            dcpl = filters.generate_dcpl(shape, dtype, chunks, None, None,
                                         None, None, None)
        dcpl.set_filter(32008, h5z.FLAG_MANDATORY)
        dcpl.set_filter(32000, h5z.FLAG_MANDATORY)
        dset_id = h5d.create(f.id, b"range", tid, sid, dcpl=dcpl)
        dset_id.write(h5s.ALL, h5s.ALL, data)
        f.close()

        # Make sure the filters are working outside of h5py by calling h5dump
        h5dump = Popen(['h5dump', fname], stdout=PIPE, stderr=STDOUT)
        stdout, nothing = h5dump.communicate()
        err = h5dump.returncode
        self.assertEqual(err, 0)

        f = h5py.File(fname, 'r')
        d = f['range'][:]
        self.assertTrue(np.all(d == data))
        f.close()
コード例 #13
0
ファイル: test_h5t.py プロジェクト: minrk/h5py
    def test_complex(self):
        """ Complex type translation

        - TypeComplexID
        - 8, 16 bytes
        - LE and BE
        - 2 members
        - Member names from cfg.complex_names
        - Members are TypeFloatID
        """
        bases = ('=c', '<c', '>c')
        
        for b in bases:
            for l in (8, 16):
                dt = '%s%s' % (b, l)
                htype = h5t.py_create(dt)
                self.assert_(isinstance(htype, h5t.TypeCompoundID), "wrong class")
                self.assertEqual(htype.get_size(), l, "wrong size")
                self.assertEqual(htype.get_nmembers(), 2, "wrong # members")
                for idx in (0, 1):
                    self.assertEqual(htype.get_member_name(idx), cfg.complex_names[idx])
                    st = htype.get_member_type(idx)
                    self.assert_(isinstance(st, h5t.TypeFloatID))
                    self.assertEqual(st.get_size(), l//2)
                    self.assertEqual(st.get_order(), bytemap[b[0]])
コード例 #14
0
ファイル: test_h5t.py プロジェクト: minrk/h5py
    def test_detect_class(self):
        
        dt = dtype([(x, x) for x in simple_types])

        htype = h5t.py_create(dt)
        self.assert_(htype.detect_class(h5t.INTEGER))
        self.assert_(htype.detect_class(h5t.OPAQUE))
        self.assert_(not htype.detect_class(h5t.ARRAY))
コード例 #15
0
ファイル: test_types.py プロジェクト: qsnake/h5py
 def test_vlstring_log(self):
     """ (Types) Vlen string logical is null-term HDF5 vlen ASCII string """
     dt = h5t.special_dtype(vlen=str)
     htype = h5t.py_create(dt, logical=True)
     self.assertIsInstance(htype, h5t.TypeStringID)
     self.assertEqual(htype.is_variable_str(), True)
     self.assertEqual(htype.get_cset(), h5t.CSET_ASCII)
     self.assertEqual(htype.get_strpad(), h5t.STR_NULLTERM)
コード例 #16
0
    def test_detect_class(self):

        dt = dtype([(x, x) for x in simple_types])

        htype = h5t.py_create(dt)
        self.assert_(htype.detect_class(h5t.INTEGER))
        self.assert_(htype.detect_class(h5t.OPAQUE))
        self.assert_(not htype.detect_class(h5t.ARRAY))
コード例 #17
0
ファイル: test_h5t.py プロジェクト: ajelenak-thg/h5py
 def test_ref(self):
     """ Reference types are correctly stored in compound types (issue 144)
     """
     dt = np.dtype([('a', h5py.ref_dtype), ('b', '<f4')])
     tid = h5t.py_create(dt, logical=True)
     t1, t2 = tid.get_member_type(0), tid.get_member_type(1)
     self.assertEqual(t1, h5t.STD_REF_OBJ)
     self.assertEqual(t2, h5t.IEEE_F32LE)
     self.assertEqual(tid.get_member_offset(0), 0)
     self.assertEqual(tid.get_member_offset(1), h5t.STD_REF_OBJ.get_size())
コード例 #18
0
ファイル: test_h5t.py プロジェクト: ChiLi90/LifetimeFit
 def test_ref(self):
     """ Reference types are correctly stored in compound types (issue 144)
     """
     dt = np.dtype([('a', h5py.ref_dtype), ('b', '<f4')])
     tid = h5t.py_create(dt, logical=True)
     t1, t2 = tid.get_member_type(0), tid.get_member_type(1)
     self.assertEqual(t1, h5t.STD_REF_OBJ)
     self.assertEqual(t2, h5t.IEEE_F32LE)
     self.assertEqual(tid.get_member_offset(0), 0)
     self.assertEqual(tid.get_member_offset(1), h5t.STD_REF_OBJ.get_size())
コード例 #19
0
 def retrieveDataObject(self):
     if not self.numpyData:
         import numpy
         from h5py import h5t
         if self.maxLength:
             dtype = h5t.py_create('S' + str(self.maxLength))
         else:
             from pypies.impl.H5pyDataStore import vlen_str_type as dtype
         #dtype.set_strpad(h5t.STR_NULLTERM)
         numpyData = numpy.asarray(self.getStringData(), dtype)
     return numpyData
コード例 #20
0
ファイル: H5pyDataStore.py プロジェクト: HoneyYan/awips2
 def __getHdf5Datatype(self, record):
     dtype = dataRecordMap[record.__class__]
     if dtype == types.StringType:
         from h5py import h5t
         size = record.getMaxLength()
         if size > 0:
             dtype = h5t.py_create('S' + str(size))
         else:
             dtype = vlen_str_type
         #dtype.set_strpad(h5t.STR_NULLTERM)
     return dtype
コード例 #21
0
 def retrieveDataObject(self):
     if not self.numpyData:
         import numpy
         from h5py import h5t
         if self.maxLength:
             dtype = h5t.py_create('S' + str(self.maxLength))
         else:
             from pypies.impl.H5pyDataStore import vlen_str_type as dtype
         #dtype.set_strpad(h5t.STR_NULLTERM)
         numpyData = numpy.asarray(self.getStringData(), dtype)
     return numpyData
コード例 #22
0
    def test_py_create_compound(self):

        # Compound type, each field of which is named for its type
        simple_compound = [(x, x) for x in simple_types]
        deep_compound = [('A', simple_compound), ('B', '<i4')]

        compound_types = [simple_compound, deep_compound]
        for x in compound_types:
            dt = dtype(x)
            htype = h5t.py_create(dt)
            self.assertEqual(type(htype), h5t.TypeCompoundID)
            self.assertEqual(dt, htype.dtype)
コード例 #23
0
    def test_py_create_array(self):
        shapes = [(1, 1), (1, ), (4, 5), (99, 10, 22)]
        array_types = []
        for base in simple_types:
            for shape in shapes:
                array_types.append((base, shape))

        for x in array_types:
            dt = dtype(x)
            htype = h5t.py_create(dt)
            self.assertEqual(type(htype), h5t.TypeArrayID)
            self.assertEqual(dt, htype.dtype)
コード例 #24
0
ファイル: test_h5t.py プロジェクト: minrk/h5py
    def test_py_create_compound(self):

        # Compound type, each field of which is named for its type
        simple_compound = [ (x, x) for x in simple_types ]
        deep_compound = [ ('A', simple_compound), ('B', '<i4') ]

        compound_types = [simple_compound, deep_compound]
        for x in compound_types:
            dt = dtype(x)
            htype = h5t.py_create(dt)
            self.assertEqual(type(htype), h5t.TypeCompoundID)
            self.assertEqual(dt, htype.dtype)
コード例 #25
0
ファイル: test_h5t.py プロジェクト: minrk/h5py
    def test_py_create_array(self):
        shapes = [ (1,1), (1,), (4,5), (99,10,22) ]
        array_types = []
        for base in simple_types:
            for shape in shapes:
                array_types.append((base, shape))

        for x in array_types:
            dt = dtype(x)
            htype = h5t.py_create(dt)
            self.assertEqual(type(htype), h5t.TypeArrayID)
            self.assertEqual(dt, htype.dtype)
コード例 #26
0
ファイル: attrs.py プロジェクト: tovrstra/h5py
    def create(self, name, data, shape=None, dtype=None):
        """ Create a new attribute, overwriting any existing attribute.

        name
            Name of the new attribute (required)
        data
            An array to initialize the attribute (required)
        shape
            Shape of the attribute.  Overrides data.shape if both are
            given, in which case the total number of points must be unchanged.
        dtype
            Data type of the attribute.  Overrides data.dtype if both
            are given.
        """

        if data is not None:
            data = numpy.asarray(data, order='C', dtype=dtype)
            if shape is None:
                shape = data.shape
            elif numpy.product(shape) != numpy.product(data.shape):
                raise ValueError(
                    "Shape of new attribute conflicts with shape of data")

            if dtype is None:
                dtype = data.dtype

        if isinstance(dtype, h5py.Datatype):
            htype = dtype.id
            dtype = htype.dtype
        else:
            if dtype is None:
                dtype = numpy.dtype('f')
            htype = h5t.py_create(dtype, logical=True)

        if shape is None:
            raise ValueError('At least one of "shape" or "data" must be given')

        data = data.reshape(shape)

        space = h5s.create_simple(shape)

        if name in self:
            h5a.delete(self._id, self._e(name))

        attr = h5a.create(self._id, self._e(name), htype, space)

        if data is not None:
            try:
                attr.write(data)
            except:
                attr._close()
                h5a.delete(self._id, self._e(name))
                raise
コード例 #27
0
    def create(self, name, data, shape=None, dtype=None):
        """ Create a new attribute, overwriting any existing attribute.

        name
            Name of the new attribute (required)
        data
            An array to initialize the attribute (required)
        shape
            Shape of the attribute.  Overrides data.shape if both are
            given, in which case the total number of points must be unchanged.
        dtype
            Data type of the attribute.  Overrides data.dtype if both
            are given.
        """

        with phil:
            if data is not None:
                data = numpy.asarray(data, order="C", dtype=dtype)
                if shape is None:
                    shape = data.shape
                elif numpy.product(shape) != numpy.product(data.shape):
                    raise ValueError("Shape of new attribute conflicts with shape of data")

                if dtype is None:
                    dtype = data.dtype

            if isinstance(dtype, h5py.Datatype):
                htype = dtype.id
                dtype = htype.dtype
            else:
                if dtype is None:
                    dtype = numpy.dtype("f")
                htype = h5t.py_create(dtype, logical=True)

            if shape is None:
                raise ValueError('At least one of "shape" or "data" must be given')

            data = data.reshape(shape)

            space = h5s.create_simple(shape)

            if name in self:
                h5a.delete(self._id, self._e(name))

            attr = h5a.create(self._id, self._e(name), htype, space)

            if data is not None:
                try:
                    attr.write(data)
                except:
                    attr._close()
                    h5a.delete(self._id, self._e(name))
                    raise
コード例 #28
0
ファイル: attrs.py プロジェクト: andy-slac/h5py
    def create(self, name, data, shape=None, dtype=None):
        """ Create a new attribute, overwriting any existing attribute.

        name
            Name of the new attribute (required)
        data
            An array to initialize the attribute (required)
        shape
            Shape of the attribute.  Overrides data.shape if both are
            given, in which case the total number of points must be unchanged.
        dtype
            Data type of the attribute.  Overrides data.dtype if both
            are given.
        """
        # TODO: REMOVE WHEN UNICODE VLENS IMPLEMENTED
        # Hack to support Unicode values (scalars only)
        #if isinstance(data, unicode):
        #    unicode_hack = True
        #    data = data.encode('utf8')
        #else:
        #    unicode_hack = False

        if data is not None:
            data = numpy.asarray(data, order='C', dtype=dtype)
            if shape is None:
                shape = data.shape
            elif numpy.product(shape) != numpy.product(data.shape):
                raise ValueError("Shape of new attribute conflicts with shape of data")
                
            if dtype is None:
                dtype = data.dtype

        if dtype is None:
            dtype = numpy.dtype('f')
        if shape is None:
            raise ValueError('At least one of "shape" or "data" must be given')

        data = data.reshape(shape)

        space = h5s.create_simple(shape)
        htype = h5t.py_create(dtype, logical=True)

        # TODO: REMOVE WHEN UNICODE VLENS IMPLEMENTED
        #if unicode_hack:
        #    htype.set_cset(h5t.CSET_UTF8)

        if name in self:
            h5a.delete(self._id, self._e(name))

        attr = h5a.create(self._id, self._e(name), htype, space)
        if data is not None:
            attr.write(data)
コード例 #29
0
ファイル: test_h5t.py プロジェクト: minrk/h5py
    def test_opaque(self):
        """ Opaque type translation

        - TypeOpaqueID
        - Sizes 1 byte to 2**31-1 bytes
        - Empty tag
        """

        for l in (1, 21, 2**31-1):
            htype = h5t.py_create('|V%s' % l)
            self.assert_(isinstance(htype, h5t.TypeOpaqueID))
            self.assertEqual(htype.get_size(), l)
            self.assertEqual(htype.get_tag(), "")
コード例 #30
0
    def test_opaque(self):
        """ Opaque type translation

        - TypeOpaqueID
        - Sizes 1 byte to 2**31-1 bytes
        - Empty tag
        """

        for l in (1, 21, 2**31 - 1):
            htype = h5t.py_create('|V%s' % l)
            self.assert_(isinstance(htype, h5t.TypeOpaqueID))
            self.assertEqual(htype.get_size(), l)
            self.assertEqual(htype.get_tag(), "")
コード例 #31
0
ファイル: group.py プロジェクト: bfroehle/h5py
    def __setitem__(self, name, obj):
        """ Add an object to the group.  The name must not already be in use.

        The action taken depends on the type of object assigned:

        Named HDF5 object (Dataset, Group, Datatype)
            A hard link is created at "name" which points to the
            given object.

        SoftLink or ExternalLink
            Create the corresponding link.

        Numpy ndarray
            The array is converted to a dataset object, with default
            settings (contiguous storage, etc.).

        Numpy dtype
            Commit a copy of the datatype as a named datatype in the file.

        Anything else
            Attempt to convert it to an ndarray and store it.  Scalar
            values are stored as scalar datasets. Raise ValueError if we
            can't understand the resulting array dtype.
        """
        name, lcpl = self._e(name, lcpl=True)

        if isinstance(obj, HLObject):
            h5o.link(obj.id, self.id, name, lcpl=lcpl, lapl=self._lapl)

        elif isinstance(obj, SoftLink):
            self.id.links.create_soft(name, self._e(obj.path),
                          lcpl=lcpl, lapl=self._lapl)

        elif isinstance(obj, ExternalLink):
            self.id.links.create_external(name, self._e(obj.filename),
                          self._e(obj.path), lcpl=lcpl, lapl=self._lapl)

        elif isinstance(obj, numpy.dtype):
            htype = h5t.py_create(obj)
            htype.commit(self.id, name, lcpl=lcpl)

        else:
            ds = self.create_dataset(None, data=obj, dtype=base.guess_dtype(obj))
            h5o.link(ds.id, self.id, name, lcpl=lcpl)
コード例 #32
0
ファイル: test_h5t.py プロジェクト: connectthefuture/h5py
    def test_out_of_order_offsets(self):
        size = 20
        type_dict = {
            'names': ['f1', 'f2', 'f3'],
            'formats': ['<f4', '<i4', '<f8'],
            'offsets': [0, 16, 8]
        }

        expected_dtype = np.dtype(type_dict)

        tid = h5t.create(h5t.COMPOUND, size)
        for name, offset, dt in zip(type_dict["names"], type_dict["offsets"],
                                    type_dict["formats"]):
            tid.insert(
                name.encode("utf8") if isinstance(name, text_type) else name,
                offset, h5t.py_create(dt))

        self.assertEqual(tid.dtype, expected_dtype)
        self.assertEqual(tid.dtype.itemsize, size)
コード例 #33
0
ファイル: test_h5t.py プロジェクト: minrk/h5py
    def test_names(self):

        names = [('r','i'), ('real', 'imag'), (' real name ', ' img name '),
                 (' Re!#@$%\t\tREALr\neal ^;;;"<>? ', ' \t*&^  . ^@IMGI        MG!~\t\n\r') ]

        complex_types = [x for x in simple_types if 'c' in x]
        config = h5.get_config()

        oldnames = config.complex_names
        try:
            for name in names:
                config.complex_names = name
                for ctype in complex_types:
                    dt = dtype(ctype)
                    htype = h5t.py_create(dt)
                    self.assertEqual(type(htype), h5t.TypeCompoundID)
                    self.assertEqual(htype.get_nmembers(), 2)
                    self.assertEqual(htype.get_member_name(0), name[0])
                    self.assertEqual(htype.get_member_name(1), name[1])
        finally:
            config.complex_names = oldnames
コード例 #34
0
ファイル: test_h5t.py プロジェクト: minrk/h5py
    def test_boolean(self):
        """ Boolean type translation

        - TypeEnumID
        - Base TypeIntegerID
        - Base 1 byte
        - Base signed
        - Member names from cfg.bool_names
        - 2 values
        - Values 0, 1
        """

        htype = h5t.py_create('bool')
        self.assert_(isinstance(htype, h5t.TypeEnumID), "wrong class")
        self.assertEqual(htype.get_nmembers(), 2, "must be 2-element enum")
        basetype = htype.get_super()
        self.assertEqual(basetype.get_size(), 1, "wrong size")
        self.assertEqual(basetype.get_sign(), h5t.SGN_2, "wrong sign")
        for idx in (0,1):
            self.assertEqual(htype.get_member_name(idx), cfg.bool_names[idx], "wrong name")
            self.assertEqual(htype.get_member_value(idx), idx, "wrong value")
コード例 #35
0
    def test_names(self):

        names = [('r', 'i'), ('real', 'imag'), (' real name ', ' img name '),
                 (' Re!#@$%\t\tREALr\neal ^;;;"<>? ',
                  ' \t*&^  . ^@IMGI        MG!~\t\n\r')]

        complex_types = [x for x in simple_types if 'c' in x]
        config = h5.get_config()

        oldnames = config.complex_names
        try:
            for name in names:
                config.complex_names = name
                for ctype in complex_types:
                    dt = dtype(ctype)
                    htype = h5t.py_create(dt)
                    self.assertEqual(type(htype), h5t.TypeCompoundID)
                    self.assertEqual(htype.get_nmembers(), 2)
                    self.assertEqual(htype.get_member_name(0), name[0])
                    self.assertEqual(htype.get_member_name(1), name[1])
        finally:
            config.complex_names = oldnames
コード例 #36
0
    def test_boolean(self):
        """ Boolean type translation

        - TypeEnumID
        - Base TypeIntegerID
        - Base 1 byte
        - Base signed
        - Member names from cfg.bool_names
        - 2 values
        - Values 0, 1
        """

        htype = h5t.py_create('bool')
        self.assert_(isinstance(htype, h5t.TypeEnumID), "wrong class")
        self.assertEqual(htype.get_nmembers(), 2, "must be 2-element enum")
        basetype = htype.get_super()
        self.assertEqual(basetype.get_size(), 1, "wrong size")
        self.assertEqual(basetype.get_sign(), h5t.SGN_2, "wrong sign")
        for idx in (0, 1):
            self.assertEqual(htype.get_member_name(idx), cfg.bool_names[idx],
                             "wrong name")
            self.assertEqual(htype.get_member_value(idx), idx, "wrong value")
コード例 #37
0
ファイル: test_h5t.py プロジェクト: alpaco42/ML_Spring_2018
    def test_out_of_order_offsets(self):
        size = 20
        type_dict = {
            'names': ['f1', 'f2', 'f3'],
            'formats': ['<f4', '<i4', '<f8'],
            'offsets': [0, 16, 8]
        }

        expected_dtype = np.dtype(type_dict)

        tid = h5t.create(h5t.COMPOUND, size)
        for name, offset, dt in zip(
                type_dict["names"], type_dict["offsets"], type_dict["formats"]
        ):
            tid.insert(
                name.encode("utf8") if isinstance(name, text_type) else name,
                offset,
                h5t.py_create(dt)
            )

        self.assertEqual(tid.dtype, expected_dtype)
        self.assertEqual(tid.dtype.itemsize, size)
コード例 #38
0
ファイル: dataset.py プロジェクト: bfroehle/h5py
    def __setitem__(self, args, val):
        """ Write to the HDF5 dataset from a Numpy array.

        NumPy's broadcasting rules are honored, for "simple" indexing
        (slices and integers).  For advanced indexing, the shapes must
        match.
        """
        args = args if isinstance(args, tuple) else (args,)

        # Sort field indices from the slicing
        names = tuple(x for x in args if isinstance(x, str))
        args = tuple(x for x in args if not isinstance(x, str))

        if len(names) != 0:
            raise TypeError("Field name selections are not allowed for write.")

        # Generally we try to avoid converting the arrays on the Python
        # side.  However, for compound literals this is unavoidable.
        if self.dtype.kind == "O" or \
          (self.dtype.kind == 'V' and \
          (not isinstance(val, numpy.ndarray) or val.dtype.kind != 'V') and \
          (self.dtype.subdtype == None)):
            val = numpy.asarray(val, dtype=self.dtype, order='C')
        else:
            val = numpy.asarray(val, order='C')

        # Check for array dtype compatibility and convert
        if self.dtype.subdtype is not None:
            shp = self.dtype.subdtype[1]
            valshp = val.shape[-len(shp):]
            if valshp != shp:  # Last dimension has to match
                raise TypeError("When writing to array types, last N dimensions have to match (got %s, but should be %s)" % (valshp, shp,))
            mtype = h5t.py_create(numpy.dtype((val.dtype, shp)))
            mshape = val.shape[0:len(val.shape)-len(shp)]
        else:
            mshape = val.shape
            mtype = None

        # Perform the dataspace selection
        selection = sel.select(self.shape, args, dsid=self.id)

        if selection.nselect == 0:
            return

        # Broadcast scalars if necessary.
        if (mshape == () and selection.mshape != ()):
            if self.dtype.subdtype is not None:
                raise TypeError("Scalar broadcasting is not supported for array dtypes")
            val2 = numpy.empty(selection.mshape[-1], dtype=val.dtype)
            val2[...] = val
            val = val2
            mshape = val.shape

        # Perform the write, with broadcasting
        # Be careful to pad memory shape with ones to avoid HDF5 chunking
        # glitch, which kicks in for mismatched memory/file selections
        if(len(mshape) < len(self.shape)):
            mshape_pad = (1,)*(len(self.shape)-len(mshape)) + mshape
        else:
            mshape_pad = mshape
        mspace = h5s.create_simple(mshape_pad, (h5s.UNLIMITED,)*len(mshape_pad))
        for fspace in selection.broadcast(mshape):
            self.id.write(mspace, fspace, val, mtype)
コード例 #39
0
ファイル: dataset.py プロジェクト: bfroehle/h5py
def make_new_dset(parent, shape=None, dtype=None, data=None,
                 chunks=None, compression=None, shuffle=None,
                    fletcher32=None, maxshape=None, compression_opts=None,
                  fillvalue=None, scaleoffset=None, track_times=None):
    """ Return a new low-level dataset identifier

    Only creates anonymous datasets.
    """

    # Convert data to a C-contiguous ndarray
    if data is not None:
        import base
        data = numpy.asarray(data, order="C", dtype=base.guess_dtype(data))

    # Validate shape
    if shape is None:
        if data is None:
            raise TypeError("Either data or shape must be specified")
        shape = data.shape
    else:
        shape = tuple(shape)
        if data is not None and (numpy.product(shape) != numpy.product(data.shape)):
            raise ValueError("Shape tuple is incompatible with data")

    # Validate dtype
    if dtype is None and data is None:
        dtype = numpy.dtype("=f4")
    elif dtype is None and data is not None:
        dtype = data.dtype
    else:
        dtype = numpy.dtype(dtype)

    # Legacy
    if any((compression, shuffle, fletcher32, maxshape,scaleoffset)) and chunks is False:
        raise ValueError("Chunked format required for given storage options")

    # Legacy
    if compression is True:
        if compression_opts is None:
            compression_opts = 4
        compression = 'gzip'

    # Legacy
    if compression in range(10):
        if compression_opts is not None:
            raise TypeError("Conflict in compression options")
        compression_opts = compression
        compression = 'gzip'

    dcpl = filters.generate_dcpl(shape, dtype, chunks, compression, compression_opts,
                  shuffle, fletcher32, maxshape, scaleoffset)

    if fillvalue is not None:
        fillvalue = numpy.array(fillvalue)
        dcpl.set_fill_value(fillvalue)

    if track_times in (True, False):
        dcpl.set_obj_track_times(track_times)
    elif track_times is not None:
        raise TypeError("track_times must be either True or False")

    if maxshape is not None:
        maxshape = tuple(m if m is not None else h5s.UNLIMITED for m in maxshape)
    sid = h5s.create_simple(shape, maxshape)
    tid = h5t.py_create(dtype, logical=1)

    dset_id = h5d.create(parent.id, None, tid, sid, dcpl=dcpl)

    if data is not None:
        dset_id.write(h5s.ALL, h5s.ALL, data)

    return dset_id
コード例 #40
0
ファイル: dataset.py プロジェクト: bfroehle/h5py
    def __getitem__(self, args):
        """ Read a slice from the HDF5 dataset.

        Takes slices and recarray-style field names (more than one is
        allowed!) in any order.  Obeys basic NumPy rules, including
        broadcasting.

        Also supports:

        * Boolean "mask" array indexing
        """
        args = args if isinstance(args, tuple) else (args,)

        # Sort field indices from the rest of the args.
        names = tuple(x for x in args if isinstance(x, str))
        args = tuple(x for x in args if not isinstance(x, str))

        def strip_fields(basetype):
            """ Strip extra dtype information from special types """
            if basetype.kind == 'O':
                return numpy.dtype('O')
            if basetype.fields is not None:
                if basetype.kind in ('i','u'):
                    return basetype.fields['enum'][0]
                fields = []
                for name in basetype.names:
                    fff = basetype.fields[name]
                    if len(fff) == 3:
                        (subtype, offset, meta) = fff
                    else:
                        subtype, meta = fff
                        offset = 0
                    subtype = strip_fields(subtype)
                    fields.append((name, subtype))
                return numpy.dtype(fields)
            return basetype

        def readtime_dtype(basetype, names):
            """ Make a NumPy dtype appropriate for reading """

            basetype = strip_fields(basetype)

            if len(names) == 0:  # Not compound, or we want all fields
                return basetype

            if basetype.names is None:  # Names provided, but not compound
                raise ValueError("Field names only allowed for compound types")

            for name in names:  # Check all names are legal
                if not name in basetype.names:
                    raise ValueError("Field %s does not appear in this type." % name)

            return numpy.dtype([(name, basetype.fields[name][0]) for name in names])

        # This is necessary because in the case of array types, NumPy
        # discards the array information at the top level.
        new_dtype = readtime_dtype(self.id.dtype, names)
        mtype = h5t.py_create(new_dtype)

        # === Scalar dataspaces =================

        if self.shape == ():
            fspace = self.id.get_space()
            selection = sel2.select_read(fspace, args)
            arr = numpy.ndarray(selection.mshape, dtype=new_dtype)
            for mspace, fspace in selection:
                self.id.read(mspace, fspace, arr, mtype)
            if selection.mshape is None:
                return arr[()]
            return arr

        # === Everything else ===================

        # Perform the dataspace selection.
        selection = sel.select(self.shape, args, dsid=self.id)

        if selection.nselect == 0:
            return numpy.ndarray((0,), dtype=new_dtype)

        # Up-converting to (1,) so that numpy.ndarray correctly creates
        # np.void rows in case of multi-field dtype. (issue 135)
        single_element = selection.mshape == ()
        mshape = (1,) if single_element else selection.mshape
        arr = numpy.ndarray(mshape, new_dtype, order='C')

        # HDF5 has a bug where if the memory shape has a different rank
        # than the dataset, the read is very slow
        if len(mshape) < len(self.shape):
            # pad with ones
            mshape = (1,)*(len(self.shape)-len(mshape)) + mshape

        # Perfom the actual read
        mspace = h5s.create_simple(mshape)
        fspace = selection._id
        self.id.read(mspace, fspace, arr, mtype)

        # Patch up the output for NumPy
        if len(names) == 1:
            arr = arr[names[0]]     # Single-field recarray convention
        if arr.shape == ():
            arr = numpy.asscalar(arr)
        if single_element:
            arr = arr[0]
        return arr
コード例 #41
0
ファイル: dataset.py プロジェクト: tovrstra/h5py
    def __setitem__(self, args, val):
        """ Write to the HDF5 dataset from a Numpy array.

        NumPy's broadcasting rules are honored, for "simple" indexing
        (slices and integers).  For advanced indexing, the shapes must
        match.
        """
        args = args if isinstance(args, tuple) else (args, )

        # Sort field indices from the slicing
        names = tuple(x for x in args if isinstance(x, str))
        args = tuple(x for x in args if not isinstance(x, str))

        # Generally we try to avoid converting the arrays on the Python
        # side.  However, for compound literals this is unavoidable.
        vlen = h5t.check_dtype(vlen=self.dtype)
        if vlen not in (bytes, unicode, None):
            try:
                val = numpy.asarray(val, dtype=vlen)
            except ValueError:
                try:
                    val = numpy.array(
                        [numpy.array(x, dtype=vlen) for x in val],
                        dtype=self.dtype)
                except ValueError:
                    pass
            if vlen == val.dtype:
                if val.ndim > 1:
                    tmp = numpy.empty(shape=val.shape[:-1], dtype=object)
                    tmp.ravel()[:] = [
                        i for i in val.reshape((numpy.product(val.shape[:-1]),
                                                val.shape[-1]))
                    ]
                else:
                    tmp = numpy.array([None], dtype=object)
                    tmp[0] = val
                val = tmp
        elif self.dtype.kind == "O" or \
          (self.dtype.kind == 'V' and \
          (not isinstance(val, numpy.ndarray) or val.dtype.kind != 'V') and \
          (self.dtype.subdtype == None)):
            if len(names) == 1 and self.dtype.fields is not None:
                # Single field selected for write, from a non-array source
                if not names[0] in self.dtype.fields:
                    raise ValueError("No such field for indexing: %s" %
                                     names[0])
                dtype = self.dtype.fields[names[0]][0]
                cast_compound = True
            else:
                dtype = self.dtype
                cast_compound = False

            val = numpy.asarray(val, dtype=dtype, order='C')
            if cast_compound:
                val = val.astype(numpy.dtype([(names[0], dtype)]))
        else:
            val = numpy.asarray(val, order='C')

        # Check for array dtype compatibility and convert
        if self.dtype.subdtype is not None:
            shp = self.dtype.subdtype[1]
            valshp = val.shape[-len(shp):]
            if valshp != shp:  # Last dimension has to match
                raise TypeError(
                    "When writing to array types, last N dimensions have to match (got %s, but should be %s)"
                    % (
                        valshp,
                        shp,
                    ))
            mtype = h5t.py_create(numpy.dtype((val.dtype, shp)))
            mshape = val.shape[0:len(val.shape) - len(shp)]

        # Make a compound memory type if field-name slicing is required
        elif len(names) != 0:

            mshape = val.shape

            # Catch common errors
            if self.dtype.fields is None:
                raise TypeError(
                    "Illegal slicing argument (not a compound dataset)")
            mismatch = [x for x in names if x not in self.dtype.fields]
            if len(mismatch) != 0:
                mismatch = ", ".join('"%s"' % x for x in mismatch)
                raise ValueError(
                    "Illegal slicing argument (fields %s not in dataset type)"
                    % mismatch)

            # Write non-compound source into a single dataset field
            if len(names) == 1 and val.dtype.fields is None:
                subtype = h5y.py_create(val.dtype)
                mtype = h5t.create(h5t.COMPOUND, subtype.get_size())
                mtype.insert(self._e(names[0]), 0, subtype)

            # Make a new source type keeping only the requested fields
            else:
                fieldnames = [x for x in val.dtype.names
                              if x in names]  # Keep source order
                mtype = h5t.create(h5t.COMPOUND, val.dtype.itemsize)
                for fieldname in fieldnames:
                    subtype = h5t.py_create(val.dtype.fields[fieldname][0])
                    offset = val.dtype.fields[fieldname][1]
                    mtype.insert(self._e(fieldname), offset, subtype)

        # Use mtype derived from array (let DatasetID.write figure it out)
        else:
            mshape = val.shape
            mtype = None

        # Perform the dataspace selection
        selection = sel.select(self.shape, args, dsid=self.id)

        if selection.nselect == 0:
            return

        # Broadcast scalars if necessary.
        if (mshape == () and selection.mshape != ()):
            if self.dtype.subdtype is not None:
                raise TypeError(
                    "Scalar broadcasting is not supported for array dtypes")
            val2 = numpy.empty(selection.mshape[-1], dtype=val.dtype)
            val2[...] = val
            val = val2
            mshape = val.shape

        # Perform the write, with broadcasting
        # Be careful to pad memory shape with ones to avoid HDF5 chunking
        # glitch, which kicks in for mismatched memory/file selections
        if (len(mshape) < len(self.shape)):
            mshape_pad = (1, ) * (len(self.shape) - len(mshape)) + mshape
        else:
            mshape_pad = mshape
        mspace = h5s.create_simple(mshape_pad,
                                   (h5s.UNLIMITED, ) * len(mshape_pad))
        for fspace in selection.broadcast(mshape):
            self.id.write(mspace, fspace, val, mtype)
コード例 #42
0
def make_new_dset(parent, shape=None, dtype=None, data=None,
                 chunks=None, compression=None, shuffle=None,
                    fletcher32=None, maxshape=None, compression_opts=None,
                  fillvalue=None, scaleoffset=None, track_times=None):
    """ Return a new low-level dataset identifier

    Only creates anonymous datasets.
    """

    # Convert data to a C-contiguous ndarray
    if data is not None:
        import base
        data = numpy.asarray(data, order="C", dtype=base.guess_dtype(data))

    # Validate shape
    if shape is None:
        if data is None:
            raise TypeError("Either data or shape must be specified")
        shape = data.shape
    else:
        shape = tuple(shape)
        if data is not None and (numpy.product(shape) != numpy.product(data.shape)):
            raise ValueError("Shape tuple is incompatible with data")

    tmp_shape = maxshape if maxshape is not None else shape
    # Validate chunk shape
    if isinstance(chunks, tuple) and (-numpy.array([ i>=j for i,j in zip(tmp_shape,chunks) if i is not None])).any():
        errmsg = "Chunk shape must not be greater than data shape in any dimension. "\
                 "{} is not compatible with {}".format(chunks, shape)
        raise ValueError(errmsg)

    if isinstance(dtype, h5py.Datatype):
        # Named types are used as-is
        tid = dtype.id
        dtype = tid.dtype  # Following code needs this
    else:
        # Validate dtype
        if dtype is None and data is None:
            dtype = numpy.dtype("=f4")
        elif dtype is None and data is not None:
            dtype = data.dtype
        else:
            dtype = numpy.dtype(dtype)
        tid = h5t.py_create(dtype, logical=1)

    # Legacy
    if any((compression, shuffle, fletcher32, maxshape,scaleoffset)) and chunks is False:
        raise ValueError("Chunked format required for given storage options")

    # Legacy
    if compression is True:
        if compression_opts is None:
            compression_opts = 4
        compression = 'gzip'

    # Legacy
    if compression in _LEGACY_GZIP_COMPRESSION_VALS:
        if compression_opts is not None:
            raise TypeError("Conflict in compression options")
        compression_opts = compression
        compression = 'gzip'

    dcpl = filters.generate_dcpl(shape, dtype, chunks, compression, compression_opts,
                  shuffle, fletcher32, maxshape, scaleoffset)

    if fillvalue is not None:
        fillvalue = numpy.array(fillvalue)
        dcpl.set_fill_value(fillvalue)

    if track_times in (True, False):
        dcpl.set_obj_track_times(track_times)
    elif track_times is not None:
        raise TypeError("track_times must be either True or False")

    if maxshape is not None:
        maxshape = tuple(m if m is not None else h5s.UNLIMITED for m in maxshape)
    sid = h5s.create_simple(shape, maxshape)


    dset_id = h5d.create(parent.id, None, tid, sid, dcpl=dcpl)

    if data is not None:
        dset_id.write(h5s.ALL, h5s.ALL, data)

    return dset_id
コード例 #43
0
def concatenate(data_list, out_group=None, start=None, stop=None,
                datasets=None, dataset_filter=None):
    """Concatenate data along the time axis.

    All :class:`TOData` objects to be concatenated are assumed to have the
    same datasets and index_maps with compatible shapes and data types.

    Currently only 'time' axis concatenation is supported, and it must be the
    fastest varying index.

    All attributes, history, and other non-time-dependant information is copied
    from the first item.

    Parameters
    ----------
    data_list : list of :class:`TOData`. These are assumed to be identical in
            every way except along the axes representing time, over which they
            are concatenated. All other data and attributes are simply copied
            from the first entry of the list.
    out_group : `h5py.Group`, hdf5 filename or `memh5.Group`
            Underlying hdf5 like container that will store the data for the
            BaseData instance.
    start : int or dict with keys ``data_list[0].time_axes``
        In the aggregate datasets at what index to start.  Every thing before
        this index is excluded.
    stop : int or dict with keys ``data_list[0].time_axes``
        In the aggregate datasets at what index to stop.  Every thing after
        this index is excluded.
    datasets : sequence of strings
        Which datasets to include.  Default is all of them.
    dataset_filter : callable
        Function for preprocessing all datasets.  Useful for changing data
        types etc.  Should return a dataset.


    Returns
    -------
    data : :class:`TOData`

    """

    if dataset_filter is None:
        dataset_filter = lambda d: d

    # Inspect first entry in the list to get constant parts..
    first_data = data_list[0]
    concatenation_axes = first_data.time_axes

    # Ensure *start* and *stop* are mappings.
    if not hasattr(start, '__getitem__'):
        start = {axis : start for axis in concatenation_axes}
    if not hasattr(stop, '__getitem__'):
        stop = {axis : stop for axis in concatenation_axes}

    # Get the length of all axes for which we are concatenating.
    concat_index_lengths = {axis : 0 for axis in concatenation_axes}
    for data in data_list:
        for index_name in concatenation_axes:
            if index_name not in data.index_map.keys():
                continue
            concat_index_lengths[index_name] += len(data.index_map[index_name])

    # Get real start and stop indexes.
    for axis in concatenation_axes:
        start[axis], stop[axis] = _start_stop_inds(
                start.get(axis, None),
                stop.get(axis, None),
                concat_index_lengths[axis],
                )

    if first_data.distributed and not isinstance(out_group, h5py.Group):
        distributed = True
        comm = first_data.comm
    else:
        distributed = False
        comm = None

    # Choose return class and initialize the object.
    out = first_data.__class__(out_group, distributed=distributed, comm=comm)

    # Resolve the index maps. XXX Shouldn't be nessisary after fix to
    # _copy_non_time_data.
    for axis, index_map in first_data.index_map.items():
        if axis in concatenation_axes:
            # Initialize the dataset.
            dtype = index_map.dtype
            out.create_index_map(
                    axis,
                    np.empty(shape=(stop[axis] - start[axis],), dtype=dtype),
                    )
        else:
            # Just copy it.
            out.create_index_map(axis, index_map)

    all_dataset_names = _copy_non_time_data(data_list, out)
    if datasets is None:
        dataset_names = all_dataset_names
    else:
        dataset_names = datasets

    current_concat_index_start = {axis : 0 for axis in concatenation_axes}
    # Now loop over the list and copy the data.
    for data in data_list:
        # Get the concatenation axis lengths for this BaseData.
        current_concat_index_n = {axis : len(data.index_map.get(axis, []))
                for axis in concatenation_axes}
        # Start with the index_map.
        for axis in concatenation_axes:
            axis_finished = current_concat_index_start[axis] >= stop[axis]
            axis_not_started = (current_concat_index_start[axis]
                                + current_concat_index_n[axis] <= start[axis])
            if axis_finished or axis_not_started:
                continue
            in_slice, out_slice = _get_in_out_slice(
                    start[axis], stop[axis],
                    current_concat_index_start[axis],
                    current_concat_index_n[axis],
                    )
            out.index_map[axis][out_slice] = data.index_map[axis][in_slice]
        # Now copy over the datasets and flags.
        this_dataset_names = _copy_non_time_data(data)
        for name in this_dataset_names:
            dataset = data[name]
            if name not in dataset_names:
                continue
            attrs = dataset.attrs
            dataset = dataset_filter(dataset)
            if hasattr(dataset, "attrs"):
                # Some filters modify the attributes; others return a thing
                # without attributes. So we need to check.
                attrs = dataset.attrs

            # For now only support concatenation over minor axis.
            axis = attrs['axis'][-1]
            if axis not in concatenation_axes:
                msg = "Dataset %s does not have a valid concatenation axis."
                raise ValueError(msg % name)
            axis_finished = current_concat_index_start[axis] >= stop[axis]
            axis_not_started = (current_concat_index_start[axis]
                                + current_concat_index_n[axis] <= start[axis])
            if axis_finished or axis_not_started:
                continue
            # Place holder for eventual implementation of 'axis_rate' attribute.
            axis_rate = 1
            # If this is the first piece of data, initialize the output
            # dataset.
            #out_keys = ['flags/' + n for n in  out.flags.keys()]
            #out_keys += out.datasets.keys()
            if name not in out:
                shape = dataset.shape
                dtype = dataset.dtype
                full_shape = shape[:-1] + ((stop[axis] - start[axis]) * \
                             axis_rate,)
                if (distributed
                        and isinstance(dataset, memh5.MemDatasetDistributed)):
                    new_dset = out.create_dataset(
                            name,
                            shape=full_shape,
                            dtype=dtype,
                            distributed=True,
                            distributed_axis=dataset.distributed_axis,
                            )
                else:
                    new_dset = out.create_dataset(name, shape=full_shape,
                                                  dtype=dtype)
                memh5.copyattrs(attrs, new_dset.attrs)
            out_dset = out[name]
            in_slice, out_slice = _get_in_out_slice(
                    start[axis] * axis_rate,
                    stop[axis] * axis_rate,
                    current_concat_index_start[axis] * axis_rate,
                    current_concat_index_n[axis] * axis_rate,
                    )
            # Awkward special case for pure subarray dtypes, which h5py and
            # numpy treat differently.
            out_dtype = out_dset.dtype
            if (out_dtype.kind == 'V' and not out_dtype.fields
                        and out_dtype.shape
                        and isinstance(out_dset, h5py.Dataset)):
                #index_pairs = zip(range(dataset.shape[-1])[in_slice],
                #                  range(out_dset.shape[-1])[out_slice])
                # Drop down to low level interface. I think this is only
                # nessisary for pretty old h5py.
                from h5py import h5t
                from h5py._hl import selections
                mtype = h5t.py_create(out_dtype)
                mdata = dataset[..., in_slice].copy().flat[:]
                mspace = selections.SimpleSelection(
                        (mdata.size // out_dtype.itemsize,)).id
                fspace = selections.select(out_dset.shape, out_slice,
                                           out_dset.id).id
                out_dset.id.write(mspace, fspace, mdata, mtype)
            else:
                out_dset[..., out_slice] = dataset[..., in_slice]
        # Increment the start indexes for the next item of the list.
        for axis in current_concat_index_start.keys():
            current_concat_index_start[axis] += current_concat_index_n[axis]

    return out
コード例 #44
0
ファイル: test_datatypes.py プロジェクト: t20100/h5glance
def test_standard_float():
    ft = h5t.py_create(np.dtype('<f4'))
    assert datatypes.fmt_dtype(ft) == 'float32'
    assert datatypes.dtype_description(ft) == '32-bit floating point'
コード例 #45
0
ファイル: test_h5t.py プロジェクト: minrk/h5py
 def test_bool(self):
     out = h5t.py_create('bool')
     self.assert_(isinstance(out, h5t.TypeEnumID))
     self.assertEqual(out.get_nmembers(),2)
     self.assertEqual(out.dtype, dtype('bool'))
コード例 #46
0
ファイル: test_datatypes.py プロジェクト: t20100/h5glance
def test_vlen():
    vt_n = h5t.vlen_dtype(np.dtype('<i2'))
    vt_h = h5t.py_create(vt_n, logical=True)
    assert datatypes.fmt_dtype(vt_h) == 'vlen array of int16'
コード例 #47
0
ファイル: dataset.py プロジェクト: asdfvar/ray-trace
    def __setitem__(self, args, val):
        """ Write to the HDF5 dataset from a Numpy array.

        NumPy's broadcasting rules are honored, for "simple" indexing
        (slices and integers).  For advanced indexing, the shapes must
        match.
        """
        args = args if isinstance(args, tuple) else (args,)

        # Sort field indices from the slicing
        names = tuple(x for x in args if isinstance(x, str))
        args = tuple(x for x in args if not isinstance(x, str))

        # Generally we try to avoid converting the arrays on the Python
        # side.  However, for compound literals this is unavoidable.
        if self.dtype.kind == "O" or \
          (self.dtype.kind == 'V' and \
          (not isinstance(val, numpy.ndarray) or val.dtype.kind != 'V') and \
          (self.dtype.subdtype == None)):
            if len(names) == 1 and self.dtype.fields is not None:
                # Single field selected for write, from a non-array source
                if not names[0] in self.dtype.fields:
                    raise ValueError("No such field for indexing: %s" % names[0])
                dtype = self.dtype.fields[names[0]][0]
                cast_compound = True
            else:
                dtype = self.dtype
                cast_compound = False

            val = numpy.asarray(val, dtype=dtype, order='C')
            if cast_compound:
                val = val.astype(numpy.dtype([(names[0], dtype)]))
        else:
            val = numpy.asarray(val, order='C')

        # Check for array dtype compatibility and convert
        if self.dtype.subdtype is not None:
            shp = self.dtype.subdtype[1]
            valshp = val.shape[-len(shp):]
            if valshp != shp:  # Last dimension has to match
                raise TypeError("When writing to array types, last N dimensions have to match (got %s, but should be %s)" % (valshp, shp,))
            mtype = h5t.py_create(numpy.dtype((val.dtype, shp)))
            mshape = val.shape[0:len(val.shape)-len(shp)]

        # Make a compound memory type if field-name slicing is required
        elif len(names) != 0:

            mshape = val.shape

            # Catch common errors
            if self.dtype.fields is None:
                raise TypeError("Illegal slicing argument (not a compound dataset)")
            mismatch = [x for x in names if x not in self.dtype.fields]
            if len(mismatch) != 0:
                mismatch = ", ".join('"%s"'%x for x in mismatch)
                raise ValueError("Illegal slicing argument (fields %s not in dataset type)" % mismatch)
        
            # Write non-compound source into a single dataset field
            if len(names) == 1 and val.dtype.fields is None:
                subtype = h5y.py_create(val.dtype)
                mtype = h5t.create(h5t.COMPOUND, subtype.get_size())
                mtype.insert(self._e(names[0]), 0, subtype)

            # Make a new source type keeping only the requested fields
            else:
                fieldnames = [x for x in val.dtype.names if x in names] # Keep source order
                mtype = h5t.create(h5t.COMPOUND, val.dtype.itemsize)
                for fieldname in fieldnames:
                    subtype = h5t.py_create(val.dtype.fields[fieldname][0])
                    offset = val.dtype.fields[fieldname][1]
                    mtype.insert(self._e(fieldname), offset, subtype)

        # Use mtype derived from array (let DatasetID.write figure it out)
        else:
            mshape = val.shape
            mtype = None

        # Perform the dataspace selection
        selection = sel.select(self.shape, args, dsid=self.id)

        if selection.nselect == 0:
            return

        # Broadcast scalars if necessary.
        if (mshape == () and selection.mshape != ()):
            if self.dtype.subdtype is not None:
                raise TypeError("Scalar broadcasting is not supported for array dtypes")
            val2 = numpy.empty(selection.mshape[-1], dtype=val.dtype)
            val2[...] = val
            val = val2
            mshape = val.shape

        # Perform the write, with broadcasting
        # Be careful to pad memory shape with ones to avoid HDF5 chunking
        # glitch, which kicks in for mismatched memory/file selections
        if(len(mshape) < len(self.shape)):
            mshape_pad = (1,)*(len(self.shape)-len(mshape)) + mshape
        else:
            mshape_pad = mshape
        mspace = h5s.create_simple(mshape_pad, (h5s.UNLIMITED,)*len(mshape_pad))
        for fspace in selection.broadcast(mshape):
            self.id.write(mspace, fspace, val, mtype)
コード例 #48
0
ファイル: test_h5t.py プロジェクト: minrk/h5py
 def test(dt):
     """ Check get_super for a given dtype """
     htype = h5t.py_create(dt)
     atype = h5t.array_create(htype, (4,5))
     self.assert_(htype.equal(atype.get_super()))
コード例 #49
0
ファイル: test_datatypes.py プロジェクト: t20100/h5glance
def test_compound():
    ct_n = np.dtype([('x', np.float32), ('y', np.float32)])
    ct_h = h5t.py_create(ct_n)
    assert datatypes.fmt_dtype(ct_h) == '(x: float32, y: float32)'
コード例 #50
0
ファイル: test_conv.py プロジェクト: minrk/h5py

from unittest import TestCase
import numpy as np

from h5py import h5t
import ctypes

strings = ["Hi", "Hello", "This is a string", "HDF5 is awesome!"]
vlen_dtype = h5t.special_dtype(vlen=str)
vlen_htype = h5t.py_create(vlen_dtype, logical=1)
obj_htype = h5t.py_create(vlen_dtype)

class TestVlenObject(TestCase):

    """
        Test conversion routines between string vlens and object pointers
    """

    def test_obj2vlen_simple(self):
        """ Object to vlen (contiguous) """

        objarr = np.array(strings, dtype=vlen_dtype)

        destbuffer = np.ndarray(objarr.shape, dtype=np.uintp, buffer=objarr).copy()

        h5t.convert(obj_htype, vlen_htype, len(strings), destbuffer)

        for idx, val in enumerate(destbuffer):
            self.assertEqual(ctypes.string_at(int(val)), strings[idx])
        
コード例 #51
0
ファイル: test_datatypes.py プロジェクト: t20100/h5glance
def test_enum():
    et_n = h5t.enum_dtype({'apple': 1, 'banana': 2})
    et_h = h5t.py_create(et_n, logical=True)
    assert datatypes.fmt_dtype(et_h) == 'enum (apple, banana)'
コード例 #52
0
ファイル: utils.py プロジェクト: MrTheodor/pyh5md
def create_compact_dataset(loc, name, shape=None, dtype=None, data=None,
                           chunks=None, compression=None, shuffle=None,
                           fletcher32=None, maxshape=None,
                           compression_opts=None, fillvalue=None,
                           scaleoffset=None, track_times=None):
    """Create a new HDF5 dataset with a compact storage layout."""

    # Convert data to a C-contiguous ndarray
    if data is not None:
        import h5py._hl.base
        data = numpy.asarray(data, order="C", dtype=h5py._hl.base.guess_dtype(data))

    # Validate shape
    if shape is None:
        if data is None:
            raise TypeError("Either data or shape must be specified")
        shape = data.shape
    else:
        shape = tuple(shape)
        if data is not None and (numpy.product(shape) != numpy.product(data.shape)):
            raise ValueError("Shape tuple is incompatible with data")

    if isinstance(dtype, h5py.Datatype):
        # Named types are used as-is
        tid = dtype.id
        dtype = tid.dtype  # Following code needs this
    else:
        # Validate dtype
        if dtype is None and data is None:
            dtype = numpy.dtype("=f4")
        elif dtype is None and data is not None:
            dtype = data.dtype
        else:
            dtype = numpy.dtype(dtype)
        tid = h5t.py_create(dtype, logical=1)

    # Legacy
    if any((compression, shuffle, fletcher32, maxshape,scaleoffset)) and chunks is False:
        raise ValueError("Chunked format required for given storage options")

    # Legacy
    if compression is True:
        if compression_opts is None:
            compression_opts = 4
        compression = 'gzip'

    # Legacy
    if compression in range(10):
        if compression_opts is not None:
            raise TypeError("Conflict in compression options")
        compression_opts = compression
        compression = 'gzip'

    if h5py.version.version_tuple >= (2, 2, 0, ''):
        dcpl = filters.generate_dcpl(shape, dtype, chunks, compression,
                                     compression_opts, shuffle, fletcher32,
                                     maxshape, None)
    else:
        dcpl = filters.generate_dcpl(shape, dtype, chunks, compression,
                                     compression_opts, shuffle, fletcher32,
                                     maxshape)

    if fillvalue is not None:
        fillvalue = numpy.array(fillvalue)
        dcpl.set_fill_value(fillvalue)

    if track_times in (True, False):
        dcpl.set_obj_track_times(track_times)
    elif track_times is not None:
        raise TypeError("track_times must be either True or False")

    dcpl.set_layout(h5d.COMPACT)

    if maxshape is not None:
        maxshape = tuple(m if m is not None else h5s.UNLIMITED for m in maxshape)
    sid = h5s.create_simple(shape, maxshape)


    dset_id = h5d.create(loc.id, None, tid, sid, dcpl=dcpl)

    if data is not None:
        dset_id.write(h5s.ALL, h5s.ALL, data)

    dset = dataset.Dataset(dset_id)
    if name is not None:
        loc[name] = dset
    return dset
コード例 #53
0
ファイル: test_datatypes.py プロジェクト: t20100/h5glance
def test_array():
    at_n = np.dtype((np.float64, (3, 4)))
    at_h = h5t.py_create(at_n)
    assert datatypes.fmt_dtype(at_h) == '3 × 4 array of float64'
コード例 #54
0
ファイル: tod.py プロジェクト: radiocosmology/caput
def concatenate(data_list, out_group=None, start=None, stop=None,
                datasets=None, dataset_filter=None):
    """Concatenate data along the time axis.

    All :class:`TOData` objects to be concatenated are assumed to have the
    same datasets and index_maps with compatible shapes and data types.

    Currently only 'time' axis concatenation is supported, and it must be the
    fastest varying index.

    All attributes, history, and other non-time-dependant information is copied
    from the first item.

    Parameters
    ----------
    data_list : list of :class:`TOData`. These are assumed to be identical in
            every way except along the axes representing time, over which they
            are concatenated. All other data and attributes are simply copied
            from the first entry of the list.
    out_group : `h5py.Group`, hdf5 filename or `memh5.Group`
            Underlying hdf5 like container that will store the data for the
            BaseData instance.
    start : int or dict with keys ``data_list[0].time_axes``
        In the aggregate datasets at what index to start.  Every thing before
        this index is excluded.
    stop : int or dict with keys ``data_list[0].time_axes``
        In the aggregate datasets at what index to stop.  Every thing after
        this index is excluded.
    datasets : sequence of strings
        Which datasets to include.  Default is all of them.
    dataset_filter : callable with one or two arguments
        Function for preprocessing all datasets.  Useful for changing data
        types etc. Takes a dataset as an arguement and should return a
        dataset (either h5py or memh5). Optionally may accept a second
        argument that is slice along the time axis, which the filter should
        apply.

    Returns
    -------
    data : :class:`TOData`

    """

    if dataset_filter is None:
        def dataset_filter(d): return d

    filter_time_slice = len(inspect.getargspec(dataset_filter).args) == 2

    # Inspect first entry in the list to get constant parts..
    first_data = data_list[0]
    concatenation_axes = first_data.time_axes

    # Ensure *start* and *stop* are mappings.
    if not hasattr(start, '__getitem__'):
        start = {axis: start for axis in concatenation_axes}
    if not hasattr(stop, '__getitem__'):
        stop = {axis: stop for axis in concatenation_axes}

    # Get the length of all axes for which we are concatenating.
    concat_index_lengths = {axis: 0 for axis in concatenation_axes}
    for data in data_list:
        for index_name in concatenation_axes:
            if index_name not in data.index_map:
                continue
            concat_index_lengths[index_name] += len(data.index_map[index_name])

    # Get real start and stop indexes.
    for axis in concatenation_axes:
        start[axis], stop[axis] = _start_stop_inds(
                start.get(axis, None),
                stop.get(axis, None),
                concat_index_lengths[axis],
                )

    if first_data.distributed and not isinstance(out_group, h5py.Group):
        distributed = True
        comm = first_data.comm
    else:
        distributed = False
        comm = None

    # Choose return class and initialize the object.
    out = first_data.__class__(out_group, distributed=distributed, comm=comm)

    # Resolve the index maps. XXX Shouldn't be nessisary after fix to
    # _copy_non_time_data.
    for axis, index_map in first_data.index_map.items():
        if axis in concatenation_axes:
            # Initialize the dataset.
            dtype = index_map.dtype
            out.create_index_map(
                    axis,
                    np.empty(shape=(stop[axis] - start[axis],), dtype=dtype),
                    )
        else:
            # Just copy it.
            out.create_index_map(axis, index_map)

    # Copy over the reverse maps.
    for axis, reverse_map in first_data.reverse_map.items():
        out.create_reverse_map(axis, reverse_map)

    all_dataset_names = _copy_non_time_data(data_list, out)
    if datasets is None:
        dataset_names = all_dataset_names
    else:
        dataset_names = datasets

    current_concat_index_start = {axis: 0 for axis in concatenation_axes}
    # Now loop over the list and copy the data.
    for data in data_list:
        # Get the concatenation axis lengths for this BaseData.
        current_concat_index_n = {axis: len(data.index_map.get(axis, []))
                                  for axis in concatenation_axes}
        # Start with the index_map.
        for axis in concatenation_axes:
            axis_finished = current_concat_index_start[axis] >= stop[axis]
            axis_not_started = (current_concat_index_start[axis]
                                + current_concat_index_n[axis] <= start[axis])
            if axis_finished or axis_not_started:
                continue
            in_slice, out_slice = _get_in_out_slice(
                    start[axis], stop[axis],
                    current_concat_index_start[axis],
                    current_concat_index_n[axis],
                    )
            out.index_map[axis][out_slice] = data.index_map[axis][in_slice]
        # Now copy over the datasets and flags.
        this_dataset_names = _copy_non_time_data(data)
        for name in this_dataset_names:
            dataset = data[name]
            if name not in dataset_names:
                continue
            attrs = dataset.attrs

            # Figure out which axis we are concatenating over.
            for a in memh5.bytes_to_unicode(attrs['axis']):
                if a in concatenation_axes:
                    axis = a
                    break
            else:
                msg = "Dataset %s does not have a valid concatenation axis."
                raise ValueError(msg % name)
            # Figure out where we are in that axis and how to slice it.
            axis_finished = current_concat_index_start[axis] >= stop[axis]
            axis_not_started = (current_concat_index_start[axis]
                                + current_concat_index_n[axis] <= start[axis])
            if axis_finished or axis_not_started:
                continue
            axis_rate = 1  # Place holder for eventual implementation.
            in_slice, out_slice = _get_in_out_slice(
                    start[axis] * axis_rate,
                    stop[axis] * axis_rate,
                    current_concat_index_start[axis] * axis_rate,
                    current_concat_index_n[axis] * axis_rate,
                    )

            # Filter the dataset.
            if filter_time_slice:
                dataset = dataset_filter(dataset, in_slice)
            else:
                dataset = dataset_filter(dataset)
            if hasattr(dataset, "attrs"):
                # Some filters modify the attributes; others return a thing
                # without attributes. So we need to check.
                attrs = dataset.attrs

            # Do this *after* the filter, in case filter changed axis order.
            axis_ind = list(memh5.bytes_to_unicode(attrs['axis'])).index(axis)

            # Slice input data if the filter doesn't do it.
            if not filter_time_slice:
                in_slice = (slice(None),) * axis_ind + (in_slice,)
                dataset = dataset[in_slice]

            # The time slice filter above will convert dataset from a MemDataset
            # instance to either an MPIArray or np.ndarray (depending on if
            # it is distributed).  Need to convert back to the appropriate
            # subclass of MemDataset for the initialization of output dataset.
            if not isinstance(dataset, memh5.MemDataset):
                if distributed and isinstance(dataset, mpiarray.MPIArray):
                    dataset = memh5.MemDatasetDistributed.from_mpi_array(dataset)
                else:
                    dataset = memh5.MemDatasetCommon.from_numpy_array(dataset)

            # If this is the first piece of data, initialize the output
            # dataset.
            if name not in out:
                shape = dataset.shape
                dtype = dataset.dtype
                full_shape = shape[:axis_ind]
                full_shape += ((stop[axis] - start[axis]) * axis_rate,)
                full_shape += shape[axis_ind + 1:]
                if (distributed
                        and isinstance(dataset, memh5.MemDatasetDistributed)):
                    new_dset = out.create_dataset(
                            name,
                            shape=full_shape,
                            dtype=dtype,
                            distributed=True,
                            distributed_axis=dataset.distributed_axis,
                            )
                else:
                    new_dset = out.create_dataset(name, shape=full_shape,
                                                  dtype=dtype)
                memh5.copyattrs(attrs, new_dset.attrs)

            out_dset = out[name]
            out_slice = (slice(None),) * axis_ind + (out_slice,)

            # Copy the data in.
            out_dtype = out_dset.dtype
            if (out_dtype.kind == 'V' and not out_dtype.fields
                    and out_dtype.shape
                    and isinstance(out_dset, h5py.Dataset)):
                # Awkward special case for pure subarray dtypes, which h5py and
                # numpy treat differently.
                # Drop down to low level interface. I think this is only
                # nessisary for pretty old h5py.
                from h5py import h5t
                from h5py._hl import selections
                mtype = h5t.py_create(out_dtype)
                mdata = dataset.copy().flat[:]
                mspace = selections.SimpleSelection(
                        (mdata.size // out_dtype.itemsize,)).id
                fspace = selections.select(out_dset.shape, out_slice,
                                           out_dset.id).id
                out_dset.id.write(mspace, fspace, mdata, mtype)
            else:
                out_dset[out_slice] = dataset[:]
        # Increment the start indexes for the next item of the list.
        for axis in current_concat_index_start.keys():
            current_concat_index_start[axis] += current_concat_index_n[axis]

    return out