class TimeSeriesH5(H5File): def __init__(self, path): super(TimeSeriesH5, self).__init__(path) self.title = Scalar(TimeSeries.title, self) self.data = DataSet(TimeSeries.data, self, expand_dimension=0) self.nr_dimensions = Scalar(Int(), self, name="nr_dimensions") # omitted length_nd , these are indexing props, to be removed from datatype too self.labels_ordering = Json(TimeSeries.labels_ordering, self) self.labels_dimensions = Json(TimeSeries.labels_dimensions, self) self.time = DataSet(TimeSeries.time, self, expand_dimension=0) self.start_time = Scalar(TimeSeries.start_time, self) self.sample_period = Scalar(TimeSeries.sample_period, self) self.sample_period_unit = Scalar(TimeSeries.sample_period_unit, self) self.sample_rate = Scalar(Float(), self, name="sample_rate") # omitted has_surface_mapping, has_volume_mapping, indexing props, to be removed fro datatype too # experiment: load header data eagerly, see surface for a lazy approach # as we do not explicitly make a difference between opening for read or write # the file might not yet exist, so loading headers makes no sense if not self.is_new_file: self._sample_period = self.sample_period.load() self._start_time = self.start_time.load() # experimental port of some of the data access apis from the datatype # NOTE: some methods can not be here as they load data from dependent data types # or they assume that dependent data has been loaded # Those belong to a higher level where dependent h5 files are handles and # partially loaded datatypes are filled def read_data_shape(self): return self.data.shape def read_data_slice(self, data_slice): """ Expose chunked-data access. """ return self.data[data_slice] def read_time_page(self, current_page, page_size, max_size=None): """ Compute time for current page. :param current_page: Starting from 0 """ # todo: why are we even storing the time array if we return a synthetized version? current_page = int(current_page) page_size = int(page_size) if max_size is None: max_size = page_size else: max_size = int(max_size) page_real_size = page_size * self._sample_period start_time = self._start_time + current_page * page_real_size end_time = start_time + min(page_real_size, max_size * self._sample_period) return numpy.arange(start_time, end_time, self._sample_period) def read_channels_page(self, from_idx, to_idx, step=None, specific_slices=None, channels_list=None): """ Read and return only the data page for the specified channels list. :param from_idx: the starting time idx from which to read data :param to_idx: the end time idx up until to which you read data :param step: increments in which to read the data. Optional, default to 1. :param specific_slices: optional parameter. If speficied slices the data accordingly. :param channels_list: the list of channels for which we want data """ if channels_list: channels_list = json.loads(channels_list) for i in range(len(channels_list)): channels_list[i] = int(channels_list[i]) if channels_list: channel_slice = tuple(channels_list) else: channel_slice = slice(None) data_page = self.read_data_page(from_idx, to_idx, step, specific_slices) # This is just a 1D array like in the case of Global Average monitor. # No need for the channels list if len(data_page.shape) == 1: return data_page.reshape(data_page.shape[0], 1) else: return data_page[:, channel_slice] def read_data_page(self, from_idx, to_idx, step=None, specific_slices=None): """ Retrieve one page of data (paging done based on time). """ from_idx, to_idx = int(from_idx), int(to_idx) if isinstance(specific_slices, str): specific_slices = json.loads(specific_slices) if step is None: step = 1 else: step = int(step) slices = [] overall_shape = self.data.shape for i in range(len(overall_shape)): if i == 0: # Time slice slices.append( slice(from_idx, min(to_idx, overall_shape[0]), step)) continue if i == 2: # Read full of the main_dimension (space for the simulator) slices.append(slice(overall_shape[i])) continue if specific_slices is None: slices.append(slice(0, 1)) else: slices.append( slice(specific_slices[i], min(specific_slices[i] + 1, overall_shape[i]), 1)) data = self.data[tuple(slices)] data = data.squeeze() if len(data.shape) == 1: # Do not allow time dimension to get squeezed, a 2D result need to # come out of this method. data = data.reshape((1, len(data))) return data def write_time_slice(self, partial_result): """ Append a new value to the ``time`` attribute. """ self.time.append(partial_result) def write_data_slice(self, partial_result): """ Append a chunk of time-series data to the ``data`` attribute. """ self.data.append(partial_result) def write_data_slice_on_grow_dimension(self, partial_result, grow_dimension=0): self.data.append(partial_result, grow_dimension=grow_dimension) def get_min_max_values(self): """ Retrieve the minimum and maximum values from the metadata. :returns: (minimum_value, maximum_value) """ metadata = self.data.get_cached_metadata() return metadata.min, metadata.max def get_space_labels(self): """ It assumes that we want to select in the 3'rd dimension, and generates labels for each point in that dimension. Subclasses are more specific. :return: An array of strings. """ if self.nr_dimensions.load() > 2: return ['signal-%d' % i for i in range(self.data.shape[2])] else: return [] def get_grouped_space_labels(self): """ :return: A list of label groups. A label group is a tuple (name, [(label_idx, label)...]). Default all labels in a group named '' """ return [('', list(enumerate(self.get_space_labels())))] def get_default_selection(self): """ :return: The measure point indices that have to be shown by default. By default show all. """ return list( range(min(NO_OF_DEFAULT_SELECTED_CHANNELS, self.data.shape[2]))) def get_measure_points_selection_gid(self): """ :return: a datatype gid with which to obtain al valid measure point selection for this time series We have to decide if the default should be all selections or none """ return '' def store_references(self, ts): pass
class TimeSeriesH5(H5File): def __init__(self, path): super(TimeSeriesH5, self).__init__(path) self.title = Scalar(TimeSeries.title) self.data = DataSet(TimeSeries.data, expand_dimension=0) self.nr_dimensions = Scalar(TimeSeries.nr_dimensions) # omitted length_nd , these are indexing props, to be removed from datatype too self.labels_ordering = Json(TimeSeries.labels_ordering) self.labels_dimensions = Json(TimeSeries.labels_dimensions) self.time = DataSet(TimeSeries.time, expand_dimension=0) self.start_time = Scalar(TimeSeries.start_time) self.sample_period = Scalar(TimeSeries.sample_period) self.sample_period_unit = Scalar(TimeSeries.sample_period_unit) self.sample_rate = Scalar(TimeSeries.sample_rate) self._end_accessor_declarations() # omitted has_surface_mapping, has_volume_mapping, indexing props, to be removed fro datatype too # experiment: load header data eagerly, see surface for a lazy approach # as we do not explicitly make a difference between opening for read or write # the file might not yet exist, so loading headers makes no sense if self.storage_manager.is_valid_hdf5_file(): self._sample_period = self.sample_period.load() self._start_time = self.start_time.load() # experimental port of some of the data access apis from the datatype # NOTE: some methods can not be here as they load data from dependent data types # or they assume that dependent data has been loaded # Those belong to a higher level where dependent h5 files are handles and # partially loaded datatypes are filled def read_data_shape(self): return self.data.shape def read_data_slice(self, data_slice): """ Expose chunked-data access. """ return self.data[data_slice] def read_time_page(self, current_page, page_size, max_size=None): """ Compute time for current page. :param current_page: Starting from 0 """ # todo: why are we even storing the time array if we return a synthetized version? current_page = int(current_page) page_size = int(page_size) if max_size is None: max_size = page_size else: max_size = int(max_size) page_real_size = page_size * self._sample_period start_time = self._start_time + current_page * page_real_size end_time = start_time + min(page_real_size, max_size * self._sample_period) return numpy.arange(start_time, end_time, self._sample_period) def read_channels_page(self, from_idx, to_idx, step=None, specific_slices=None, channels_list=None): """ Read and return only the data page for the specified channels list. :param from_idx: the starting time idx from which to read data :param to_idx: the end time idx up until to which you read data :param step: increments in which to read the data. Optional, default to 1. :param specific_slices: optional parameter. If speficied slices the data accordingly. :param channels_list: the list of channels for which we want data """ if channels_list: channels_list = json.loads(channels_list) for i in range(len(channels_list)): channels_list[i] = int(channels_list[i]) if channels_list: channel_slice = tuple(channels_list) else: channel_slice = slice(None) data_page = self.read_data_page(from_idx, to_idx, step, specific_slices) # This is just a 1D array like in the case of Global Average monitor. # No need for the channels list if len(data_page.shape) == 1: return data_page.reshape(data_page.shape[0], 1) else: return data_page[:, channel_slice] def read_data_page(self, from_idx, to_idx, step=None, specific_slices=None): """ Retrieve one page of data (paging done based on time). """ from_idx, to_idx = int(from_idx), int(to_idx) if isinstance(specific_slices, basestring): specific_slices = json.loads(specific_slices) if step is None: step = 1 else: step = int(step) slices = [] overall_shape = self.data.shape for i in range(len(overall_shape)): if i == 0: # Time slice slices.append( slice(from_idx, min(to_idx, overall_shape[0]), step)) continue if i == 2: # Read full of the main_dimension (space for the simulator) slices.append(slice(overall_shape[i])) continue if specific_slices is None: slices.append(slice(0, 1)) else: slices.append( slice(specific_slices[i], min(specific_slices[i] + 1, overall_shape[i]), 1)) data = self.data[tuple(slices)] if len(data) == 1: # Do not allow time dimension to get squeezed, a 2D result need to # come out of this method. data = data.squeeze() data = data.reshape((1, len(data))) else: data = data.squeeze() return data def write_time_slice(self, partial_result): """ Append a new value to the ``time`` attribute. """ self.time.append(partial_result) def write_data_slice(self, partial_result, grow_dimension=0): """ Append a chunk of time-series data to the ``data`` attribute. """ self.data.append(partial_result) def get_min_max_values(self): """ Retrieve the minimum and maximum values from the metadata. :returns: (minimum_value, maximum_value) """ metadata = self.data.get_cached_metadata() return metadata.min, metadata.max