Esempio n. 1
0
class TimeSeriesH5(H5File):
    def __init__(self, path):
        super(TimeSeriesH5, self).__init__(path)
        self.title = Scalar(TimeSeries.title, self)
        self.data = DataSet(TimeSeries.data, self, expand_dimension=0)
        self.nr_dimensions = Scalar(Int(), self, name="nr_dimensions")

        # omitted length_nd , these are indexing props, to be removed from datatype too
        self.labels_ordering = Json(TimeSeries.labels_ordering, self)
        self.labels_dimensions = Json(TimeSeries.labels_dimensions, self)

        self.time = DataSet(TimeSeries.time, self, expand_dimension=0)
        self.start_time = Scalar(TimeSeries.start_time, self)
        self.sample_period = Scalar(TimeSeries.sample_period, self)
        self.sample_period_unit = Scalar(TimeSeries.sample_period_unit, self)
        self.sample_rate = Scalar(Float(), self, name="sample_rate")

        # omitted has_surface_mapping, has_volume_mapping, indexing props, to be removed fro datatype too

        # experiment: load header data eagerly, see surface for a lazy approach
        # as we do not explicitly make a difference between opening for read or write
        # the file might not yet exist, so loading headers makes no sense

        if not self.is_new_file:
            self._sample_period = self.sample_period.load()
            self._start_time = self.start_time.load()

    # experimental port of some of the data access apis from the datatype
    # NOTE: some methods can not be here as they load data from dependent data types
    #       or they assume that dependent data has been loaded
    #       Those belong to a higher level where dependent h5 files are handles and
    #       partially loaded datatypes are filled

    def read_data_shape(self):
        return self.data.shape

    def read_data_slice(self, data_slice):
        """
        Expose chunked-data access.
        """
        return self.data[data_slice]

    def read_time_page(self, current_page, page_size, max_size=None):
        """
        Compute time for current page.
        :param current_page: Starting from 0
        """
        # todo: why are we even storing the time array if we return a synthetized version?
        current_page = int(current_page)
        page_size = int(page_size)

        if max_size is None:
            max_size = page_size
        else:
            max_size = int(max_size)

        page_real_size = page_size * self._sample_period
        start_time = self._start_time + current_page * page_real_size
        end_time = start_time + min(page_real_size,
                                    max_size * self._sample_period)

        return numpy.arange(start_time, end_time, self._sample_period)

    def read_channels_page(self,
                           from_idx,
                           to_idx,
                           step=None,
                           specific_slices=None,
                           channels_list=None):
        """
        Read and return only the data page for the specified channels list.

        :param from_idx: the starting time idx from which to read data
        :param to_idx: the end time idx up until to which you read data
        :param step: increments in which to read the data. Optional, default to 1.
        :param specific_slices: optional parameter. If speficied slices the data accordingly.
        :param channels_list: the list of channels for which we want data
        """
        if channels_list:
            channels_list = json.loads(channels_list)
            for i in range(len(channels_list)):
                channels_list[i] = int(channels_list[i])

        if channels_list:
            channel_slice = tuple(channels_list)
        else:
            channel_slice = slice(None)

        data_page = self.read_data_page(from_idx, to_idx, step,
                                        specific_slices)
        # This is just a 1D array like in the case of Global Average monitor.
        # No need for the channels list
        if len(data_page.shape) == 1:
            return data_page.reshape(data_page.shape[0], 1)
        else:
            return data_page[:, channel_slice]

    def read_data_page(self,
                       from_idx,
                       to_idx,
                       step=None,
                       specific_slices=None):
        """
        Retrieve one page of data (paging done based on time).
        """
        from_idx, to_idx = int(from_idx), int(to_idx)

        if isinstance(specific_slices, str):
            specific_slices = json.loads(specific_slices)
        if step is None:
            step = 1
        else:
            step = int(step)

        slices = []
        overall_shape = self.data.shape
        for i in range(len(overall_shape)):
            if i == 0:
                # Time slice
                slices.append(
                    slice(from_idx, min(to_idx, overall_shape[0]), step))
                continue
            if i == 2:
                # Read full of the main_dimension (space for the simulator)
                slices.append(slice(overall_shape[i]))
                continue
            if specific_slices is None:
                slices.append(slice(0, 1))
            else:
                slices.append(
                    slice(specific_slices[i],
                          min(specific_slices[i] + 1, overall_shape[i]), 1))

        data = self.data[tuple(slices)]
        data = data.squeeze()

        if len(data.shape) == 1:
            # Do not allow time dimension to get squeezed, a 2D result need to
            # come out of this method.
            data = data.reshape((1, len(data)))

        return data

    def write_time_slice(self, partial_result):
        """
        Append a new value to the ``time`` attribute.
        """
        self.time.append(partial_result)

    def write_data_slice(self, partial_result):
        """
        Append a chunk of time-series data to the ``data`` attribute.
        """
        self.data.append(partial_result)

    def write_data_slice_on_grow_dimension(self,
                                           partial_result,
                                           grow_dimension=0):
        self.data.append(partial_result, grow_dimension=grow_dimension)

    def get_min_max_values(self):
        """
        Retrieve the minimum and maximum values from the metadata.
        :returns: (minimum_value, maximum_value)
        """
        metadata = self.data.get_cached_metadata()
        return metadata.min, metadata.max

    def get_space_labels(self):
        """
        It assumes that we want to select in the 3'rd dimension,
        and generates labels for each point in that dimension.
        Subclasses are more specific.
        :return: An array of strings.
        """
        if self.nr_dimensions.load() > 2:
            return ['signal-%d' % i for i in range(self.data.shape[2])]
        else:
            return []

    def get_grouped_space_labels(self):
        """
        :return: A list of label groups. A label group is a tuple (name, [(label_idx, label)...]).
                 Default all labels in a group named ''
        """
        return [('', list(enumerate(self.get_space_labels())))]

    def get_default_selection(self):
        """
        :return: The measure point indices that have to be shown by default. By default show all.
        """
        return list(
            range(min(NO_OF_DEFAULT_SELECTED_CHANNELS, self.data.shape[2])))

    def get_measure_points_selection_gid(self):
        """
        :return: a datatype gid with which to obtain al valid measure point selection for this time series
                 We have to decide if the default should be all selections or none
        """
        return ''

    def store_references(self, ts):
        pass
Esempio n. 2
0
class TimeSeriesH5(H5File):
    def __init__(self, path):
        super(TimeSeriesH5, self).__init__(path)
        self.title = Scalar(TimeSeries.title)
        self.data = DataSet(TimeSeries.data, expand_dimension=0)
        self.nr_dimensions = Scalar(TimeSeries.nr_dimensions)

        # omitted length_nd , these are indexing props, to be removed from datatype too
        self.labels_ordering = Json(TimeSeries.labels_ordering)
        self.labels_dimensions = Json(TimeSeries.labels_dimensions)

        self.time = DataSet(TimeSeries.time, expand_dimension=0)
        self.start_time = Scalar(TimeSeries.start_time)
        self.sample_period = Scalar(TimeSeries.sample_period)
        self.sample_period_unit = Scalar(TimeSeries.sample_period_unit)
        self.sample_rate = Scalar(TimeSeries.sample_rate)
        self._end_accessor_declarations()

        # omitted has_surface_mapping, has_volume_mapping, indexing props, to be removed fro datatype too

        # experiment: load header data eagerly, see surface for a lazy approach
        # as we do not explicitly make a difference between opening for read or write
        # the file might not yet exist, so loading headers makes no sense

        if self.storage_manager.is_valid_hdf5_file():
            self._sample_period = self.sample_period.load()
            self._start_time = self.start_time.load()

    # experimental port of some of the data access apis from the datatype
    # NOTE: some methods can not be here as they load data from dependent data types
    #       or they assume that dependent data has been loaded
    #       Those belong to a higher level where dependent h5 files are handles and
    #       partially loaded datatypes are filled

    def read_data_shape(self):
        return self.data.shape

    def read_data_slice(self, data_slice):
        """
        Expose chunked-data access.
        """
        return self.data[data_slice]

    def read_time_page(self, current_page, page_size, max_size=None):
        """
        Compute time for current page.
        :param current_page: Starting from 0
        """
        # todo: why are we even storing the time array if we return a synthetized version?
        current_page = int(current_page)
        page_size = int(page_size)

        if max_size is None:
            max_size = page_size
        else:
            max_size = int(max_size)

        page_real_size = page_size * self._sample_period
        start_time = self._start_time + current_page * page_real_size
        end_time = start_time + min(page_real_size,
                                    max_size * self._sample_period)

        return numpy.arange(start_time, end_time, self._sample_period)

    def read_channels_page(self,
                           from_idx,
                           to_idx,
                           step=None,
                           specific_slices=None,
                           channels_list=None):
        """
        Read and return only the data page for the specified channels list.

        :param from_idx: the starting time idx from which to read data
        :param to_idx: the end time idx up until to which you read data
        :param step: increments in which to read the data. Optional, default to 1.
        :param specific_slices: optional parameter. If speficied slices the data accordingly.
        :param channels_list: the list of channels for which we want data
        """
        if channels_list:
            channels_list = json.loads(channels_list)
            for i in range(len(channels_list)):
                channels_list[i] = int(channels_list[i])

        if channels_list:
            channel_slice = tuple(channels_list)
        else:
            channel_slice = slice(None)

        data_page = self.read_data_page(from_idx, to_idx, step,
                                        specific_slices)
        # This is just a 1D array like in the case of Global Average monitor.
        # No need for the channels list
        if len(data_page.shape) == 1:
            return data_page.reshape(data_page.shape[0], 1)
        else:
            return data_page[:, channel_slice]

    def read_data_page(self,
                       from_idx,
                       to_idx,
                       step=None,
                       specific_slices=None):
        """
        Retrieve one page of data (paging done based on time).
        """
        from_idx, to_idx = int(from_idx), int(to_idx)

        if isinstance(specific_slices, basestring):
            specific_slices = json.loads(specific_slices)
        if step is None:
            step = 1
        else:
            step = int(step)

        slices = []
        overall_shape = self.data.shape
        for i in range(len(overall_shape)):
            if i == 0:
                # Time slice
                slices.append(
                    slice(from_idx, min(to_idx, overall_shape[0]), step))
                continue
            if i == 2:
                # Read full of the main_dimension (space for the simulator)
                slices.append(slice(overall_shape[i]))
                continue
            if specific_slices is None:
                slices.append(slice(0, 1))
            else:
                slices.append(
                    slice(specific_slices[i],
                          min(specific_slices[i] + 1, overall_shape[i]), 1))

        data = self.data[tuple(slices)]
        if len(data) == 1:
            # Do not allow time dimension to get squeezed, a 2D result need to
            # come out of this method.
            data = data.squeeze()
            data = data.reshape((1, len(data)))
        else:
            data = data.squeeze()

        return data

    def write_time_slice(self, partial_result):
        """
        Append a new value to the ``time`` attribute.
        """
        self.time.append(partial_result)

    def write_data_slice(self, partial_result, grow_dimension=0):
        """
        Append a chunk of time-series data to the ``data`` attribute.
        """
        self.data.append(partial_result)

    def get_min_max_values(self):
        """
        Retrieve the minimum and maximum values from the metadata.
        :returns: (minimum_value, maximum_value)
        """
        metadata = self.data.get_cached_metadata()
        return metadata.min, metadata.max