def get_data_shape(self, dataset_name, where=ROOT_NODE_PATH, ignore_errors=False): """ This method reads data-size from the given data set :param dataset_name: Name of the data set from where to read data :param where: represents the path where dataset is stored (e.g. /data/info) :return: a tuple containing data size """ LOG.debug("Reading data from data set: %s" % dataset_name) if dataset_name is None: dataset_name = '' if where is None: where = self.ROOT_NODE_PATH try: # Open file to read data hdf5File = self._open_h5_file('r') data_array = hdf5File[where + dataset_name] return data_array.shape except KeyError: if not ignore_errors: LOG.debug("Trying to read data from a missing data set: %s" % dataset_name) raise MissingDataSetException("Could not locate dataset: %s" % dataset_name) else: return 0 finally: self.close_file()
def get_data(self, dataset_name, data_slice=None, where=ROOT_NODE_PATH, ignore_errors=False): """ This method reads data from the given data set based on the slice specification :param dataset_name: Name of the data set from where to read data :param data_slice: Specify how to retrieve data from array {e.g (slice(1,10,1),slice(1,6,2)) } :param where: represents the path where dataset is stored (e.g. /data/info) :return: a numpy.ndarray containing filtered data """ LOG.debug("Reading data from data set: %s" % dataset_name) if dataset_name is None: dataset_name = '' if where is None: where = self.ROOT_NODE_PATH try: # Open file to read data hdf5File = self._open_h5_file('r') data_array = hdf5File[where + dataset_name] # Now read data if data_slice is None: return data_array[()] else: return data_array[data_slice] except KeyError: if not ignore_errors: LOG.error("Trying to read data from a missing data set: %s" % dataset_name) raise MissingDataSetException("Could not locate dataset: %s" % dataset_name) else: return numpy.ndarray(0) finally: self.close_file()
def __init__(self, h5py_dataset, buffer_size=300, buffered_data=None, grow_dimension=-1): self.buffered_data = buffered_data self.buffer_size = buffer_size if h5py_dataset is None: raise MissingDataSetException("A H5pyStorageBuffer instance must have a h5py dataset for which the" "buffering is done. Please supply one to the 'h5py_dataset' parameter.") self.h5py_dataset = h5py_dataset self.grow_dimension = grow_dimension
def load(self): # type: () -> typing.Union[str, int, float] # assuming here that the h5 will return the type we stored. # if paranoid do self.trait_attribute.field_type(value) metadata = self.owner.storage_manager.get_metadata() if self.field_name in metadata: return metadata[self.field_name] else: raise MissingDataSetException(self.field_name)
def get_metadata(self, dataset_name='', where=ROOT_NODE_PATH, ignore_errors=False): """ Retrieve ALL meta-data information for root node or for a given data set. :param dataset_name: name of the dataset for which to read metadata. If None, read metadata from ROOT node. :param where: represents the path where dataset is stored (e.g. /data/info) :returns: a dictionary containing all metadata associated with the node """ LOG.debug("Retrieving metadata for dataset: %s" % dataset_name) if dataset_name is None: dataset_name = '' if where is None: where = self.ROOT_NODE_PATH meta_key = "" try: # Open file to read data hdf5File = self._open_h5_file('r') node = hdf5File[where + dataset_name] # Now retrieve metadata values all_meta_data = {} for meta_key in node.attrs: new_key = meta_key if meta_key.startswith(self.TVB_ATTRIBUTE_PREFIX): new_key = meta_key[len(self.TVB_ATTRIBUTE_PREFIX):] value = node.attrs[meta_key] all_meta_data[new_key] = self._deserialize_value(value) return all_meta_data except KeyError: if not ignore_errors: msg = "Trying to read data from a missing data set: %s" % ( where + dataset_name) LOG.warning(msg) raise MissingDataSetException(msg) else: return numpy.ndarray(0) except AttributeError: msg = "Trying to get value for missing metadata %s" % meta_key LOG.error(msg) raise FileStructureException(msg) except Exception, excep: msg = "Failed to read metadata from H5 file! %s" % self.__storage_full_name LOG.exception(excep) LOG.error(msg) raise FileStructureException(msg)
def get_data(self, dataset_name, data_slice=None, where=ROOT_NODE_PATH, ignore_errors=False, close_file=True): """ This method reads data from the given data set based on the slice specification :param close_file: Automatically close after reading the current field :param ignore_errors: return None in case of error, or throw exception :param dataset_name: Name of the data set from where to read data :param data_slice: Specify how to retrieve data from array {e.g (slice(1,10,1),slice(1,6,2)) } :param where: represents the path where dataset is stored (e.g. /data/info) :returns: a numpy.ndarray containing filtered data """ LOG.debug("Reading data from data set: %s" % dataset_name) if dataset_name is None: dataset_name = '' if where is None: where = self.ROOT_NODE_PATH data_path = where + dataset_name try: # Open file to read data hdf5_file = self._open_h5_file('r') if data_path in hdf5_file: data_array = hdf5_file[data_path] # Now read data if data_slice is None: result = data_array[()] if isinstance(result, hdf5.Empty): return numpy.empty([]) return result else: return data_array[data_slice] else: if not ignore_errors: LOG.error( "Trying to read data from a missing data set: %s" % dataset_name) raise MissingDataSetException( "Could not locate dataset: %s" % dataset_name) else: return None finally: if close_file: self.close_file()