class CrossCorrelationH5(H5File): def __init__(self, path): super(CrossCorrelationH5, self).__init__(path) self.array_data = DataSet(CrossCorrelation.array_data, self, expand_dimension=3) self.source = Reference(CrossCorrelation.source, self) self.time = DataSet(CrossCorrelation.time, self) self.labels_ordering = Json(CrossCorrelation.labels_ordering, self) def read_data_shape(self): """ The shape of the data """ return self.array_data.shape def read_data_slice(self, data_slice): """ Expose chunked-data access. """ return self.array_data[data_slice] def write_data_slice(self, partial_result): """ Append chunk. """ self.array_data.append(partial_result.array_data)
class ComplexCoherenceSpectrumH5(DataTypeMatrixH5): spectrum_types = ["Imaginary", "Real", "Absolute"] def __init__(self, path): super(ComplexCoherenceSpectrumH5, self).__init__(path) self.cross_spectrum = DataSet(ComplexCoherenceSpectrum.cross_spectrum, self, expand_dimension=2) self.array_data = DataSet(ComplexCoherenceSpectrum.array_data, self, expand_dimension=2) self.source = Reference(ComplexCoherenceSpectrum.source, self) self.epoch_length = Scalar(ComplexCoherenceSpectrum.epoch_length, self) self.segment_length = Scalar(ComplexCoherenceSpectrum.segment_length, self) self.windowing_function = Scalar( ComplexCoherenceSpectrum.windowing_function, self) def write_data_slice(self, partial_result): """ Append chunk. """ self.cross_spectrum.append(partial_result.cross_spectrum, close_file=False) self.array_data.append(partial_result.array_data, close_file=False) def get_spectrum_data(self, selected_spectrum): shape = self.array_data.shape slices = (slice(shape[0]), slice(shape[1]), slice(shape[2])) if selected_spectrum == self.spectrum_types[0]: data_matrix = self.array_data[slices].imag indices = numpy.triu_indices(shape[0], 1) data_matrix = data_matrix[indices] elif selected_spectrum == self.spectrum_types[1]: data_matrix = self.array_data[slices].real data_matrix = data_matrix.reshape(shape[0] * shape[0], shape[2]) else: data_matrix = self.array_data[slices] data_matrix = numpy.absolute(data_matrix) data_matrix = data_matrix.reshape(shape[0] * shape[0], shape[2]) coh_spec_sd = numpy.std(data_matrix, axis=0) coh_spec_av = numpy.mean(data_matrix, axis=0) ymin = numpy.amin(coh_spec_av - coh_spec_sd) ymax = numpy.amax(coh_spec_av + coh_spec_sd) coh_spec_sd = json.dumps(coh_spec_sd.tolist()) coh_spec_av = json.dumps(coh_spec_av.tolist()) return dict(coh_spec_sd=coh_spec_sd, coh_spec_av=coh_spec_av, ymin=ymin, ymax=ymax)
class CovarianceH5(DataTypeMatrixH5): def __init__(self, path): super(CovarianceH5, self).__init__(path) self.array_data = DataSet(Covariance.array_data, self, expand_dimension=2) self.source = Reference(Covariance.source, self) def write_data_slice(self, partial_result): """ Append chunk. """ self.array_data.append(partial_result, close_file=False)
class CoherenceSpectrumH5(DataTypeMatrixH5): def __init__(self, path): super(CoherenceSpectrumH5, self).__init__(path) self.array_data = DataSet(CoherenceSpectrum.array_data, self, expand_dimension=3) self.source = Reference(CoherenceSpectrum.source, self) self.nfft = Scalar(CoherenceSpectrum.nfft, self) self.frequency = DataSet(CoherenceSpectrum.frequency, self) def write_data_slice(self, partial_result): """ Append chunk. """ self.array_data.append(partial_result.array_data)
class WaveletCoefficientsH5(DataTypeMatrixH5): def __init__(self, path): super(WaveletCoefficientsH5, self).__init__(path) self.array_data = DataSet(WaveletCoefficients.array_data, self, expand_dimension=2) self.source = Reference(WaveletCoefficients.source, self) self.mother = Scalar(WaveletCoefficients.mother, self) self.sample_period = Scalar(WaveletCoefficients.sample_period, self) self.frequencies = DataSet(WaveletCoefficients.frequencies, self) self.normalisation = Scalar(WaveletCoefficients.normalisation, self) self.q_ratio = Scalar(WaveletCoefficients.q_ratio, self) self.amplitude = DataSet(WaveletCoefficients.amplitude, self, expand_dimension=2) self.phase = DataSet(WaveletCoefficients.phase, self, expand_dimension=2) self.power = DataSet(WaveletCoefficients.power, self, expand_dimension=2) def write_data_slice(self, partial_result): """ Append chunk. """ # mhtodo: these computations on the partial_result belong in the caller not here self.array_data.append(partial_result.array_data) partial_result.compute_amplitude() self.amplitude.append(partial_result.amplitude) partial_result.compute_phase() self.phase.append(partial_result.phase) partial_result.compute_power() self.power.append(partial_result.power)
class PrincipalComponentsH5(H5File): def __init__(self, path): super(PrincipalComponentsH5, self).__init__(path) self.source = Reference(PrincipalComponents.source, self) self.weights = DataSet(PrincipalComponents.weights, self, expand_dimension=2) self.fractions = DataSet(PrincipalComponents.fractions, self, expand_dimension=1) self.norm_source = DataSet(PrincipalComponents.norm_source, self, expand_dimension=1) self.component_time_series = DataSet( PrincipalComponents.component_time_series, self, expand_dimension=1) self.normalised_component_time_series = DataSet( PrincipalComponents.normalised_component_time_series, self, expand_dimension=1) def write_data_slice(self, partial_result): """ Append chunk. """ self.weights.append(partial_result.weights, close_file=False) self.fractions.append(partial_result.fractions, close_file=False) partial_result.compute_norm_source() self.norm_source.append(partial_result.norm_source, close_file=False) partial_result.compute_component_time_series() self.component_time_series.append(partial_result.component_time_series, close_file=False) partial_result.compute_normalised_component_time_series() self.normalised_component_time_series.append( partial_result.normalised_component_time_series) def read_fractions_data(self, from_comp, to_comp): """ Return a list with fractions for components in interval from_comp, to_comp and in addition have in position n the sum of the fractions for the rest of the components. """ from_comp = int(from_comp) to_comp = int(to_comp) all_data = self.fractions[:].flat sum_others = 0 for idx, val in enumerate(all_data): if idx < from_comp or idx > to_comp: sum_others += val return numpy.array(all_data[from_comp:to_comp].tolist() + [sum_others]) def read_weights_data(self, from_comp, to_comp): """ Return the weights data for the components in the interval [from_comp, to_comp]. """ from_comp = int(from_comp) to_comp = int(to_comp) data_slice = slice(from_comp, to_comp, None) weights_shape = self.weights.shape weights_slice = [slice(size) for size in weights_shape] weights_slice[0] = data_slice weights_data = self.weights[tuple(weights_slice)] return weights_data.flatten()
class IndependentComponentsH5(H5File): def __init__(self, path): super(IndependentComponentsH5, self).__init__(path) self.source = Reference(IndependentComponents.source, self) self.mixing_matrix = DataSet(IndependentComponents.mixing_matrix, self, expand_dimension=2) self.unmixing_matrix = DataSet(IndependentComponents.unmixing_matrix, self, expand_dimension=2) self.prewhitening_matrix = DataSet( IndependentComponents.prewhitening_matrix, self, expand_dimension=2) self.n_components = Scalar(IndependentComponents.n_components, self) self.norm_source = DataSet(IndependentComponents.norm_source, self, expand_dimension=1) self.component_time_series = DataSet( IndependentComponents.component_time_series, self, expand_dimension=1) self.normalised_component_time_series = DataSet( IndependentComponents.normalised_component_time_series, self, expand_dimension=1) def write_data_slice(self, partial_result): """ Append chunk. """ self.unmixing_matrix.append(partial_result.unmixing_matrix, close_file=False) self.prewhitening_matrix.append(partial_result.prewhitening_matrix, close_file=False) partial_result.compute_norm_source() self.norm_source.append(partial_result.norm_source, close_file=False) partial_result.compute_component_time_series() self.component_time_series.append(partial_result.component_time_series, close_file=False) partial_result.compute_normalised_component_time_series() self.normalised_component_time_series.append( partial_result.normalised_component_time_series, close_file=False) partial_result.compute_mixing_matrix() self.mixing_matrix.append(partial_result.mixing_matrix)
class TimeSeriesH5(H5File): def __init__(self, path): super(TimeSeriesH5, self).__init__(path) self.title = Scalar(TimeSeries.title, self) self.data = DataSet(TimeSeries.data, self, expand_dimension=0) self.nr_dimensions = Scalar(Int(), self, name="nr_dimensions") # omitted length_nd , these are indexing props, to be removed from datatype too self.labels_ordering = Json(TimeSeries.labels_ordering, self) self.labels_dimensions = Json(TimeSeries.labels_dimensions, self) self.time = DataSet(TimeSeries.time, self, expand_dimension=0) self.start_time = Scalar(TimeSeries.start_time, self) self.sample_period = Scalar(TimeSeries.sample_period, self) self.sample_period_unit = Scalar(TimeSeries.sample_period_unit, self) self.sample_rate = Scalar(Float(), self, name="sample_rate") # omitted has_surface_mapping, has_volume_mapping, indexing props, to be removed fro datatype too # experiment: load header data eagerly, see surface for a lazy approach # as we do not explicitly make a difference between opening for read or write # the file might not yet exist, so loading headers makes no sense if not self.is_new_file: self._sample_period = self.sample_period.load() self._start_time = self.start_time.load() # experimental port of some of the data access apis from the datatype # NOTE: some methods can not be here as they load data from dependent data types # or they assume that dependent data has been loaded # Those belong to a higher level where dependent h5 files are handles and # partially loaded datatypes are filled def read_data_shape(self): return self.data.shape def read_data_slice(self, data_slice): """ Expose chunked-data access. """ return self.data[data_slice] def read_time_page(self, current_page, page_size, max_size=None): """ Compute time for current page. :param current_page: Starting from 0 """ # todo: why are we even storing the time array if we return a synthetized version? current_page = int(current_page) page_size = int(page_size) if max_size is None: max_size = page_size else: max_size = int(max_size) page_real_size = page_size * self._sample_period start_time = self._start_time + current_page * page_real_size end_time = start_time + min(page_real_size, max_size * self._sample_period) return numpy.arange(start_time, end_time, self._sample_period) def read_channels_page(self, from_idx, to_idx, step=None, specific_slices=None, channels_list=None): """ Read and return only the data page for the specified channels list. :param from_idx: the starting time idx from which to read data :param to_idx: the end time idx up until to which you read data :param step: increments in which to read the data. Optional, default to 1. :param specific_slices: optional parameter. If speficied slices the data accordingly. :param channels_list: the list of channels for which we want data """ if channels_list: channels_list = json.loads(channels_list) for i in range(len(channels_list)): channels_list[i] = int(channels_list[i]) if channels_list: channel_slice = tuple(channels_list) else: channel_slice = slice(None) data_page = self.read_data_page(from_idx, to_idx, step, specific_slices) # This is just a 1D array like in the case of Global Average monitor. # No need for the channels list if len(data_page.shape) == 1: return data_page.reshape(data_page.shape[0], 1) else: return data_page[:, channel_slice] def read_data_page(self, from_idx, to_idx, step=None, specific_slices=None): """ Retrieve one page of data (paging done based on time). """ from_idx, to_idx = int(from_idx), int(to_idx) if isinstance(specific_slices, str): specific_slices = json.loads(specific_slices) if step is None: step = 1 else: step = int(step) slices = [] overall_shape = self.data.shape for i in range(len(overall_shape)): if i == 0: # Time slice slices.append( slice(from_idx, min(to_idx, overall_shape[0]), step)) continue if i == 2: # Read full of the main_dimension (space for the simulator) slices.append(slice(overall_shape[i])) continue if specific_slices is None: slices.append(slice(0, 1)) else: slices.append( slice(specific_slices[i], min(specific_slices[i] + 1, overall_shape[i]), 1)) data = self.data[tuple(slices)] data = data.squeeze() if len(data.shape) == 1: # Do not allow time dimension to get squeezed, a 2D result need to # come out of this method. data = data.reshape((1, len(data))) return data def write_time_slice(self, partial_result): """ Append a new value to the ``time`` attribute. """ self.time.append(partial_result) def write_data_slice(self, partial_result): """ Append a chunk of time-series data to the ``data`` attribute. """ self.data.append(partial_result) def write_data_slice_on_grow_dimension(self, partial_result, grow_dimension=0): self.data.append(partial_result, grow_dimension=grow_dimension) def get_min_max_values(self): """ Retrieve the minimum and maximum values from the metadata. :returns: (minimum_value, maximum_value) """ metadata = self.data.get_cached_metadata() return metadata.min, metadata.max def get_space_labels(self): """ It assumes that we want to select in the 3'rd dimension, and generates labels for each point in that dimension. Subclasses are more specific. :return: An array of strings. """ if self.nr_dimensions.load() > 2: return ['signal-%d' % i for i in range(self.data.shape[2])] else: return [] def get_grouped_space_labels(self): """ :return: A list of label groups. A label group is a tuple (name, [(label_idx, label)...]). Default all labels in a group named '' """ return [('', list(enumerate(self.get_space_labels())))] def get_default_selection(self): """ :return: The measure point indices that have to be shown by default. By default show all. """ return list( range(min(NO_OF_DEFAULT_SELECTED_CHANNELS, self.data.shape[2]))) def get_measure_points_selection_gid(self): """ :return: a datatype gid with which to obtain al valid measure point selection for this time series We have to decide if the default should be all selections or none """ return '' def store_references(self, ts): pass
class FourierSpectrumH5(DataTypeMatrixH5): def __init__(self, path): super(FourierSpectrumH5, self).__init__(path) self.array_data = DataSet(FourierSpectrum.array_data, self, expand_dimension=2) self.source = Reference(FourierSpectrum.source, self) self.segment_length = Scalar(FourierSpectrum.segment_length, self) self.windowing_function = Scalar(FourierSpectrum.windowing_function, self) self.amplitude = DataSet(FourierSpectrum.amplitude, self, expand_dimension=2) self.phase = DataSet(FourierSpectrum.phase, self, expand_dimension=2) self.power = DataSet(FourierSpectrum.power, self, expand_dimension=2) self.average_power = DataSet(FourierSpectrum.average_power, self, expand_dimension=2) self.normalised_average_power = DataSet( FourierSpectrum.normalised_average_power, self, expand_dimension=2) def write_data_slice(self, partial_result): """ Append chunk. """ # self.store_data_chunk('array_data', partial_result, grow_dimension=2, close_file=False) # mhtodo: these computations on the partial_result belong in the caller not here self.array_data.append(partial_result.array_data) partial_result.compute_amplitude() self.amplitude.append(partial_result.amplitude) partial_result.compute_phase() self.phase.append(partial_result.phase) partial_result.compute_power() self.power.append(partial_result.power) partial_result.compute_average_power() self.average_power.append(partial_result.average_power) partial_result.compute_normalised_average_power() self.normalised_average_power.append( partial_result.normalised_average_power) def get_fourier_data(self, selected_state, selected_mode, normalized): shape = self.array_data.shape slices = (slice(shape[0]), slice(int(selected_state), min(int(selected_state) + 1, shape[1]), None), slice(shape[2]), slice(int(selected_mode), min(int(selected_mode) + 1, shape[3]), None)) if normalized == "yes": data_matrix = self.normalised_average_power[slices] else: data_matrix = self.average_power[slices] data_matrix = data_matrix.reshape((shape[0], shape[2])) ymin = numpy.amin(data_matrix) ymax = numpy.amax(data_matrix) data_matrix = data_matrix.transpose() # mhtodo: this form with string inputs and json outputs belongs in some viewer not here return dict(data_matrix=json.dumps(data_matrix.tolist()), ymin=ymin, ymax=ymax)
class TractsH5(H5File): MAX_N_VERTICES = 2 ** 16 def __init__(self, path): super(TractsH5, self).__init__(path) self.vertices = DataSet(Tracts.vertices, self, expand_dimension=0) self.tract_start_idx = DataSet(Tracts.tract_start_idx, self) self.tract_region = DataSet(Tracts.tract_region, self) self.region_volume_map = Reference(Tracts.region_volume_map, self) def get_tract(self, i): """ get a tract by index """ start, end = self.tract_start_idx[i:i + 2] return self.vertices[start:end] def _get_tract_ids(self, region_id): tract_ids = numpy.where(self.tract_region.load() == region_id)[0] return tract_ids def _get_track_ids_webgl_chunks(self, region_id): """ webgl can draw up to MAX_N_VERTICES vertices in a draw call. Assuming that no one track exceeds this limit we partition the tracts such that each track bundle has fewer than the max vertices :return: the id's of the tracts in a region chunked by the above criteria. """ # We have to split the int64 range in many uint16 ranges tract_ids = self._get_tract_ids(region_id) tract_id_chunks = [] chunk = [] count = 0 tidx = 0 while tidx < len(tract_ids): # tidx always grows tid = tract_ids[tidx] start, end = self.tract_start_idx[tid:tid + 2] track_len = end - start if track_len >= self.MAX_N_VERTICES: raise ValueError('Currently tracts are too long to be handled!') count += track_len if count < self.MAX_N_VERTICES: # add this track to the current chunk and advance to next track chunk.append(tid) tidx += 1 else: # stay with the same track and start a new chunk tract_id_chunks.append(chunk) chunk = [] count = 0 if chunk: tract_id_chunks.append(chunk) return tract_id_chunks def get_vertices(self, region_id, slice_number=0): """ Concatenates the vertices for all tracts starting in region_id. Returns a completely flat array as required by gl.bindBuffer apis """ region_id = int(region_id) slice_number = int(slice_number) chunks = self._get_track_ids_webgl_chunks(region_id) tract_ids = chunks[slice_number] tracts_vertices = [] for tid in tract_ids: tracts_vertices.append(self.get_tract(tid)) self.close() if tracts_vertices: tracts_vertices = numpy.concatenate(tracts_vertices) return tracts_vertices.ravel() else: return numpy.array([]) def get_line_starts(self, region_id): """ Returns a compact representation of the element buffers required to draw the streams via gl.drawElements A list of indices that describe where the first vertex for a tract is in the vertex array returned by get_tract_vertices_starting_in_region """ region_id = int(region_id) chunks = self._get_track_ids_webgl_chunks(region_id) chunk_line_starts = [] tract_start_idx = self.tract_start_idx # traits make the . expensive for tract_ids in chunks: offset = 0 tract_offsets = [0] for tid in tract_ids: start, end = tract_start_idx[tid:tid + 2] track_len = end - start offset += track_len tract_offsets.append(offset) chunk_line_starts.append(tract_offsets) return chunk_line_starts def write_vertices_slice(self, partial_result): """ Append a new value to the ``vertices`` attribute. """ self.vertices.append(partial_result)
class TimeSeriesH5(H5File): def __init__(self, path): super(TimeSeriesH5, self).__init__(path) self.title = Scalar(TimeSeries.title) self.data = DataSet(TimeSeries.data, expand_dimension=0) self.nr_dimensions = Scalar(TimeSeries.nr_dimensions) # omitted length_nd , these are indexing props, to be removed from datatype too self.labels_ordering = Json(TimeSeries.labels_ordering) self.labels_dimensions = Json(TimeSeries.labels_dimensions) self.time = DataSet(TimeSeries.time, expand_dimension=0) self.start_time = Scalar(TimeSeries.start_time) self.sample_period = Scalar(TimeSeries.sample_period) self.sample_period_unit = Scalar(TimeSeries.sample_period_unit) self.sample_rate = Scalar(TimeSeries.sample_rate) self._end_accessor_declarations() # omitted has_surface_mapping, has_volume_mapping, indexing props, to be removed fro datatype too # experiment: load header data eagerly, see surface for a lazy approach # as we do not explicitly make a difference between opening for read or write # the file might not yet exist, so loading headers makes no sense if self.storage_manager.is_valid_hdf5_file(): self._sample_period = self.sample_period.load() self._start_time = self.start_time.load() # experimental port of some of the data access apis from the datatype # NOTE: some methods can not be here as they load data from dependent data types # or they assume that dependent data has been loaded # Those belong to a higher level where dependent h5 files are handles and # partially loaded datatypes are filled def read_data_shape(self): return self.data.shape def read_data_slice(self, data_slice): """ Expose chunked-data access. """ return self.data[data_slice] def read_time_page(self, current_page, page_size, max_size=None): """ Compute time for current page. :param current_page: Starting from 0 """ # todo: why are we even storing the time array if we return a synthetized version? current_page = int(current_page) page_size = int(page_size) if max_size is None: max_size = page_size else: max_size = int(max_size) page_real_size = page_size * self._sample_period start_time = self._start_time + current_page * page_real_size end_time = start_time + min(page_real_size, max_size * self._sample_period) return numpy.arange(start_time, end_time, self._sample_period) def read_channels_page(self, from_idx, to_idx, step=None, specific_slices=None, channels_list=None): """ Read and return only the data page for the specified channels list. :param from_idx: the starting time idx from which to read data :param to_idx: the end time idx up until to which you read data :param step: increments in which to read the data. Optional, default to 1. :param specific_slices: optional parameter. If speficied slices the data accordingly. :param channels_list: the list of channels for which we want data """ if channels_list: channels_list = json.loads(channels_list) for i in range(len(channels_list)): channels_list[i] = int(channels_list[i]) if channels_list: channel_slice = tuple(channels_list) else: channel_slice = slice(None) data_page = self.read_data_page(from_idx, to_idx, step, specific_slices) # This is just a 1D array like in the case of Global Average monitor. # No need for the channels list if len(data_page.shape) == 1: return data_page.reshape(data_page.shape[0], 1) else: return data_page[:, channel_slice] def read_data_page(self, from_idx, to_idx, step=None, specific_slices=None): """ Retrieve one page of data (paging done based on time). """ from_idx, to_idx = int(from_idx), int(to_idx) if isinstance(specific_slices, basestring): specific_slices = json.loads(specific_slices) if step is None: step = 1 else: step = int(step) slices = [] overall_shape = self.data.shape for i in range(len(overall_shape)): if i == 0: # Time slice slices.append( slice(from_idx, min(to_idx, overall_shape[0]), step)) continue if i == 2: # Read full of the main_dimension (space for the simulator) slices.append(slice(overall_shape[i])) continue if specific_slices is None: slices.append(slice(0, 1)) else: slices.append( slice(specific_slices[i], min(specific_slices[i] + 1, overall_shape[i]), 1)) data = self.data[tuple(slices)] if len(data) == 1: # Do not allow time dimension to get squeezed, a 2D result need to # come out of this method. data = data.squeeze() data = data.reshape((1, len(data))) else: data = data.squeeze() return data def write_time_slice(self, partial_result): """ Append a new value to the ``time`` attribute. """ self.time.append(partial_result) def write_data_slice(self, partial_result, grow_dimension=0): """ Append a chunk of time-series data to the ``data`` attribute. """ self.data.append(partial_result) def get_min_max_values(self): """ Retrieve the minimum and maximum values from the metadata. :returns: (minimum_value, maximum_value) """ metadata = self.data.get_cached_metadata() return metadata.min, metadata.max