def __init__(self, filename, groupname, index=None): """Initializes a h5features reader to read a group in a HDF5 file.""" # check filename if not h5py.is_hdf5(filename): raise IOError('{} is not a HDF5 file'.format(filename)) self.filename = filename # open the HDF5 file for reading self.h5file = h5py.File(self.filename, 'r') # access to the requested group if not groupname in self.h5file: raise IOError('{} is not a valid group in {}' .format(groupname, self.filename)) self.groupname = groupname self.group = self.h5file[groupname] # Get the version of the readed file self.version = self._read_version() # read the index from group if not provided if index is None: # Choose the good index according to file version if self.version == '0.1': index_class = IndexV0_1() elif self.version == '1.0': index_class = IndexV1_0() else: index_class = Index() self.index = index_class.read(self.group) else: self.index = index
def write(self, data, groupname='features', append=True): """Write h5features data in a specified group. Parameters ---------- - data : dict --- TODO document this! - groupname : str, optional --- The name of the group in which to write the data. - append : bool, optional --- This parameter has no effect if the *groupname* is not an existing group in the file. If set to True (default), try to append new data in the group. If False erase all data in the group before writing. """ # shortcut from parameters items = data['items'] times = data['times'] featu = data['features'] # Open the HDF5 file for writing/appending in the group. with h5py.File(self.filename, mode='a') as h5file: # Initialize an empty index index = Index() # The group already exists if groupname in h5file: group = h5file[groupname] # want to append data, raise if we cannot if append and not self.is_appendable_to(group, data): raise IOError('data is not appendable to the group {} in {}' .format(groupname, self.filename)) # want to overwrite, delete the existing group # TODO test that if not append: del group else: # The group does not exist, create it... group = h5file.create_group(groupname) group.attrs['version'] = self.version # ... and initialize it with empty datasets featu.create_dataset(group, self.chunk_size) items.create_dataset(group, self.chunk_size) index.create_dataset(group, self.chunk_size) # chunking the times depends on features chunks times.create_dataset(group, featu.nb_per_chunk) # writing data TODO assert no side effects here, # e.g. writting features concat them in place... index.write(group, data['items'], data['features']) for dataset in ['items', 'times', 'features']: data[dataset].write(group)