Exemple #1
0
    def __init__(self, filename, groupname, index=None):
        """Initializes a h5features reader to read a group in a HDF5 file."""

        # check filename
        if not h5py.is_hdf5(filename):
            raise IOError('{} is not a HDF5 file'.format(filename))
        self.filename = filename

        # open the HDF5 file for reading
        self.h5file = h5py.File(self.filename, 'r')

        # access to the requested group
        if not groupname in self.h5file:
            raise IOError('{} is not a valid group in {}'
                          .format(groupname, self.filename))
        self.groupname = groupname
        self.group = self.h5file[groupname]

        # Get the version of the readed file
        self.version = self._read_version()

        # read the index from group if not provided
        if index is None:
            # Choose the good index according to file version
            if self.version == '0.1':
                index_class = IndexV0_1()
            elif self.version == '1.0':
                index_class = IndexV1_0()
            else:
                index_class = Index()

            self.index = index_class.read(self.group)
        else:
            self.index = index
Exemple #2
0
    def write(self, data, groupname='features', append=True):
        """Write h5features data in a specified group.

        Parameters
        ----------

        - data : dict --- TODO document this!

        - groupname : str, optional --- The name of the group in which
             to write the data.

        - append : bool, optional --- This parameter has no effect if
             the *groupname* is not an existing group in the file. If
             set to True (default), try to append new data in the
             group. If False erase all data in the group before
             writing.

        """
        # shortcut from parameters
        items = data['items']
        times = data['times']
        featu = data['features']

        # Open the HDF5 file for writing/appending in the group.
        with h5py.File(self.filename, mode='a') as h5file:
            # Initialize an empty index
            index = Index()

            # The group already exists
            if groupname in h5file:
                group = h5file[groupname]

                # want to append data, raise if we cannot
                if append and not self.is_appendable_to(group, data):
                    raise IOError('data is not appendable to the group {} in {}'
                                  .format(groupname, self.filename))

                # want to overwrite, delete the existing group
                # TODO test that
                if not append:
                    del group
            else:
                # The group does not exist, create it...
                group = h5file.create_group(groupname)
                group.attrs['version'] = self.version

                # ... and initialize it with empty datasets
                featu.create_dataset(group, self.chunk_size)
                items.create_dataset(group, self.chunk_size)
                index.create_dataset(group, self.chunk_size)
                # chunking the times depends on features chunks
                times.create_dataset(group, featu.nb_per_chunk)

            # writing data TODO assert no side effects here,
            # e.g. writting features concat them in place...
            index.write(group, data['items'], data['features'])
            for dataset in ['items', 'times', 'features']:
                data[dataset].write(group)