class BurstConfigurationH5(H5File): def __init__(self, path): super(BurstConfigurationH5, self).__init__(path) self.name = Scalar(Attr(str), self, name='name') self.status = Scalar(Attr(str), self, name='status') self.error_message = Scalar(Attr(str, required=False), self, name='error_message') self.start_time = Scalar(Attr(str), self, name='start_time') self.finish_time = Scalar(Attr(str, required=False), self, name='finish_time') self.simulator = Reference(Attr(uuid.UUID), self, name='simulator') self.range1 = Scalar(Attr(str, required=False), self, name='range1') self.range2 = Scalar(Attr(str, required=False), self, name='range2') def store(self, burst_config, scalars_only=False, store_references=True): # type (BurstConfiguration, bool, bool) -> None self.gid.store(uuid.UUID(burst_config.gid)) self.name.store(burst_config.name) self.status.store(burst_config.status) self.error_message.store(burst_config.error_message or 'None') self.start_time.store(date2string(burst_config.start_time)) self.finish_time.store(date2string(burst_config.finish_time)) self.simulator.store(uuid.UUID(burst_config.simulator_gid)) self.range1.store(burst_config.range1) self.range2.store(burst_config.range2) def load_into(self, burst_config): # type (BurstConfiguration) -> None burst_config.gid = self.gid.load().hex burst_config.name = self.name.load() burst_config.status = self.status.load() burst_config.error_message = self.error_message.load() burst_config.start_time = string2date(self.start_time.load()) finish_time = self.finish_time.load() if finish_time and finish_time != 'None': burst_config.finish_time = string2date(finish_time) burst_config.simulator_gid = self.simulator.load().hex try: burst_config.range1 = self.range1.load() except MissingDataSetException: burst_config.range1 = None try: burst_config.range2 = self.range2.load() except MissingDataSetException: burst_config.range2 = None
class SurfaceH5(H5File): def __init__(self, path): super(SurfaceH5, self).__init__(path) self.vertices = DataSet(Surface.vertices, self) self.triangles = DataSet(Surface.triangles, self) self.vertex_normals = DataSet(Surface.vertex_normals, self) self.triangle_normals = DataSet(Surface.triangle_normals, self) self.number_of_vertices = Scalar(Surface.number_of_vertices, self) self.number_of_triangles = Scalar(Surface.number_of_triangles, self) self.edge_mean_length = Scalar(Surface.edge_mean_length, self) self.edge_min_length = Scalar(Surface.edge_min_length, self) self.edge_max_length = Scalar(Surface.edge_max_length, self) self.zero_based_triangles = Scalar(Surface.zero_based_triangles, self) self.split_triangles = DataSet(NArray(dtype=int), self, name="split_triangles") self.number_of_split_slices = Scalar(Int(), self, name="number_of_split_slices") self.split_slices = Json(Attr(field_type=dict), self, name="split_slices") self.bi_hemispheric = Scalar(Surface.bi_hemispheric, self) self.surface_type = Scalar(Surface.surface_type, self) self.valid_for_simulations = Scalar(Surface.valid_for_simulations, self) # cached header like information, needed to interpret the rest of the file # Load the data that is required in order to interpret the file format # number_of_vertices and split_slices are needed for the get_vertices_slice read call if not self.is_new_file: self._split_slices = self.split_slices.load() self._split_triangles = self.split_triangles.load() self._number_of_vertices = self.number_of_vertices.load() self._number_of_triangles = self.number_of_triangles.load() self._number_of_split_slices = self.number_of_split_slices.load() self._bi_hemispheric = self.bi_hemispheric.load() # else: this is a new file def store(self, datatype, scalars_only=False, store_references=True): # type: (Surface, bool, bool) -> None super(SurfaceH5, self).store(datatype, scalars_only=scalars_only, store_references=store_references) # When any of the header fields change we have to update our cache of them # As they are an invariant of SurfaceH5 we don't do that in the accessors but here. # This implies that direct public writes to them via the accessors will break the invariant. # todo: should we make the accessors private? In complex formats like this one they are private # for this type direct writes to accessors should not be done self._number_of_vertices = datatype.number_of_vertices self._number_of_triangles = datatype.number_of_triangles self._bi_hemispheric = datatype.bi_hemispheric self.prepare_slices(datatype) self.number_of_split_slices.store(self._number_of_split_slices) self.split_slices.store(self._split_slices) self.split_triangles.store(self._split_triangles) def read_subtype_attr(self): return self.surface_type.load() def center(self): """ Compute the center of the surface as the mean spot on all the three axes. """ # is this different from return numpy.mean(self.vertices, axis=0) ? return [float(numpy.mean(self.vertices[:, 0])), float(numpy.mean(self.vertices[:, 1])), float(numpy.mean(self.vertices[:, 2]))] def get_number_of_split_slices(self): return self._number_of_split_slices def prepare_slices(self, datatype): """ Before storing Surface in H5, make sure vertices/triangles are split in slices that are readable by WebGL. WebGL only supports triangle indices in interval [0.... 2^16] """ # Do not split when size is conveniently small: if self._number_of_vertices <= SPLIT_MAX_SIZE + SPLIT_BUFFER_SIZE and not self._bi_hemispheric: self._number_of_split_slices = 1 self._split_slices = {0: {KEY_TRIANGLES: {KEY_START: 0, KEY_END: self._number_of_triangles}, KEY_VERTICES: {KEY_START: 0, KEY_END: self._number_of_vertices}, KEY_HEMISPHERE: HEMISPHERE_UNKNOWN}} self._split_triangles = numpy.array([], dtype=numpy.int32) return # Compute the number of split slices: left_hemisphere_slices = 0 left_hemisphere_vertices_no = 0 if self._bi_hemispheric: # when more than one hemisphere right_hemisphere_vertices_no = numpy.count_nonzero(datatype.hemisphere_mask) left_hemisphere_vertices_no = self._number_of_vertices - right_hemisphere_vertices_no LOG.debug("Right %d Left %d" % (right_hemisphere_vertices_no, left_hemisphere_vertices_no)) left_hemisphere_slices = self._get_slices_number(left_hemisphere_vertices_no) self._number_of_split_slices = left_hemisphere_slices self._number_of_split_slices += self._get_slices_number(right_hemisphere_vertices_no) LOG.debug("Hemispheres Total %d Left %d" % (self._number_of_split_slices, left_hemisphere_slices)) else: # when a single hemisphere self._number_of_split_slices = self._get_slices_number(self._number_of_vertices) LOG.debug("Start to compute surface split triangles and vertices") split_triangles = [] ignored_triangles_counter = 0 self._split_slices = {} for i in range(self._number_of_split_slices): split_triangles.append([]) if not self._bi_hemispheric: self._split_slices[i] = {KEY_VERTICES: {KEY_START: i * SPLIT_MAX_SIZE, KEY_END: min(self._number_of_vertices, (i + 1) * SPLIT_MAX_SIZE + SPLIT_BUFFER_SIZE)}, KEY_HEMISPHERE: HEMISPHERE_UNKNOWN} else: if i < left_hemisphere_slices: self._split_slices[i] = {KEY_VERTICES: {KEY_START: i * SPLIT_MAX_SIZE, KEY_END: min(left_hemisphere_vertices_no, (i + 1) * SPLIT_MAX_SIZE + SPLIT_BUFFER_SIZE)}, KEY_HEMISPHERE: HEMISPHERE_LEFT} else: self._split_slices[i] = {KEY_VERTICES: {KEY_START: left_hemisphere_vertices_no + (i - left_hemisphere_slices) * SPLIT_MAX_SIZE, KEY_END: min(self._number_of_vertices, left_hemisphere_vertices_no + SPLIT_MAX_SIZE * (i + 1 - left_hemisphere_slices) + SPLIT_BUFFER_SIZE)}, KEY_HEMISPHERE: HEMISPHERE_RIGHT} # Iterate Triangles and find the slice where it fits best, based on its vertices indexes: for i in range(self._number_of_triangles): current_triangle = [datatype.triangles[i][j] for j in range(3)] fit_slice, transformed_triangle = self._find_slice(current_triangle) if fit_slice is not None: split_triangles[fit_slice].append(transformed_triangle) else: # triangle ignored, as it has vertices over multiple slices. ignored_triangles_counter += 1 continue final_split_triangles = [] last_triangles_idx = 0 # Concatenate triangles, to be stored in a single HDF5 array. for slice_idx, split_ in enumerate(split_triangles): self._split_slices[slice_idx][KEY_TRIANGLES] = {KEY_START: last_triangles_idx, KEY_END: last_triangles_idx + len(split_)} final_split_triangles.extend(split_) last_triangles_idx += len(split_) self._split_triangles = numpy.array(final_split_triangles, dtype=numpy.int32) if ignored_triangles_counter > 0: LOG.warning("Ignored triangles from multiple hemispheres: " + str(ignored_triangles_counter)) LOG.debug("End compute surface split triangles and vertices " + str(self._split_slices)) @staticmethod def _get_slices_number(vertices_number): """ Slices are for vertices [SPLIT_MAX_SIZE * i ... SPLIT_MAX_SIZE * (i + 1) + SPLIT_BUFFER_SIZE] Slices will overlap : |........SPLIT_MAX_SIZE|...SPLIT_BUFFER_SIZE| <-- split 1 |......... SPLIT_MAX_SIZE|...SPLIT_BUFFER_SIZE| <-- split 2 If we have trailing data smaller than the SPLIT_BUFFER_SIZE, then we no longer split but we need to have at least 1 slice. """ slices_number, trailing = divmod(vertices_number, SPLIT_MAX_SIZE) if trailing > SPLIT_BUFFER_SIZE or (slices_number == 0 and trailing > 0): slices_number += 1 return slices_number def _find_slice(self, triangle): mn = min(triangle) mx = max(triangle) for i in range(self._number_of_split_slices): v = self._split_slices[i][KEY_VERTICES] # extracted for performance slice_start = v[KEY_START] if slice_start <= mn and mx < v[KEY_END]: return i, [triangle[j] - slice_start for j in range(3)] return None, triangle def get_slice_vertex_boundaries(self, slice_idx): if str(slice_idx) in self._split_slices: start_idx = max(0, self._split_slices[str(slice_idx)][KEY_VERTICES][KEY_START]) end_idx = min(self._split_slices[str(slice_idx)][KEY_VERTICES][KEY_END], self._number_of_vertices) return start_idx, end_idx else: LOG.warning("Could not access slice indices, possibly due to an incompatibility with code update!") return 0, min(SPLIT_BUFFER_SIZE, self._number_of_vertices) def _get_slice_triangle_boundaries(self, slice_idx): if str(slice_idx) in self._split_slices: start_idx = max(0, self._split_slices[str(slice_idx)][KEY_TRIANGLES][KEY_START]) end_idx = min(self._split_slices[str(slice_idx)][KEY_TRIANGLES][KEY_END], self._number_of_triangles) return start_idx, end_idx else: LOG.warn("Could not access slice indices, possibly due to an incompatibility with code update!") return 0, self._number_of_triangles def get_vertices_slice(self, slice_number=0): """ Read vertices slice, to be used by WebGL visualizer. """ slice_number = int(slice_number) start_idx, end_idx = self.get_slice_vertex_boundaries(slice_number) return self.vertices[start_idx: end_idx: 1] def get_vertex_normals_slice(self, slice_number=0): """ Read vertex-normal slice, to be used by WebGL visualizer. """ slice_number = int(slice_number) start_idx, end_idx = self.get_slice_vertex_boundaries(slice_number) return self.vertex_normals[start_idx: end_idx: 1] def get_triangles_slice(self, slice_number=0): """ Read split-triangles slice, to be used by WebGL visualizer. """ if self._number_of_split_slices == 1: return self.triangles.load() slice_number = int(slice_number) start_idx, end_idx = self._get_slice_triangle_boundaries(slice_number) return self._split_triangles[start_idx: end_idx: 1] def get_lines_slice(self, slice_number=0): """ Read the gl lines values for the current slice number. """ return Surface._triangles_to_lines(self.get_triangles_slice(slice_number)) def get_slices_to_hemisphere_mask(self): """ :return: a vector af length number_of_slices, with 1 when current chunk belongs to the Right hemisphere """ if not self._bi_hemispheric or self._split_slices is None: return None result = [1] * self._number_of_split_slices for key, value in self._split_slices.items(): if value[KEY_HEMISPHERE] == HEMISPHERE_LEFT: result[int(key)] = 0 return result # todo: many of these do not belong in the data access layer but higher, adapter or gui layer ####################################### Split for Picking ####################################### def get_pick_vertices_slice(self, slice_number=0): """ Read vertices slice, to be used by WebGL visualizer with pick. """ slice_number = int(slice_number) slice_triangles = self.triangles[ slice_number * SPLIT_PICK_MAX_TRIANGLE: min(self._number_of_triangles, (slice_number + 1) * SPLIT_PICK_MAX_TRIANGLE) ] result_vertices = [] cache_vertices = self.vertices.load() for triang in slice_triangles: result_vertices.append(cache_vertices[triang[0]]) result_vertices.append(cache_vertices[triang[1]]) result_vertices.append(cache_vertices[triang[2]]) return numpy.array(result_vertices) def get_pick_vertex_normals_slice(self, slice_number=0): """ Read vertex-normals slice, to be used by WebGL visualizer with pick. """ slice_number = int(slice_number) slice_triangles = self.triangles[ slice_number * SPLIT_PICK_MAX_TRIANGLE: min(self.number_of_triangles.load(), (slice_number + 1) * SPLIT_PICK_MAX_TRIANGLE) ] result_normals = [] cache_vertex_normals = self.vertex_normals.load() for triang in slice_triangles: result_normals.append(cache_vertex_normals[triang[0]]) result_normals.append(cache_vertex_normals[triang[1]]) result_normals.append(cache_vertex_normals[triang[2]]) return numpy.array(result_normals) def get_pick_triangles_slice(self, slice_number=0): """ Read triangles slice, to be used by WebGL visualizer with pick. """ slice_number = int(slice_number) no_of_triangles = (min(self._number_of_triangles, (slice_number + 1) * SPLIT_PICK_MAX_TRIANGLE) - slice_number * SPLIT_PICK_MAX_TRIANGLE) triangles_array = numpy.arange(no_of_triangles * 3).reshape((no_of_triangles, 3)) return triangles_array
class H5File(object): """ A H5 based file format. This class implements reading and writing to a *specific* h5 based file format. A subclass of this defines a new file format. """ KEY_WRITTEN_BY = 'written_by' is_new_file = False def __init__(self, path): # type: (str) -> None self.path = path self.storage_manager = StorageInterface.get_storage_manager(self.path) # would be nice to have an opened state for the chunked api instead of the close_file=False # common scalar headers self.gid = Uuid(HasTraits.gid, self) self.written_by = Scalar(Attr(str), self, name=self.KEY_WRITTEN_BY) self.create_date = Scalar(Attr(str), self, name='create_date') self.type = Scalar(Attr(str), self, name='type') # Generic attributes descriptors self.generic_attributes = GenericAttributes() self.invalid = Scalar(Attr(bool), self, name='invalid') self.is_nan = Scalar(Attr(bool), self, name='is_nan') self.subject = Scalar(Attr(str), self, name='subject') self.state = Scalar(Attr(str), self, name='state') self.user_tag_1 = Scalar(Attr(str), self, name='user_tag_1') self.user_tag_2 = Scalar(Attr(str), self, name='user_tag_2') self.user_tag_3 = Scalar(Attr(str), self, name='user_tag_3') self.user_tag_4 = Scalar(Attr(str), self, name='user_tag_4') self.user_tag_5 = Scalar(Attr(str), self, name='user_tag_5') self.operation_tag = Scalar(Attr(str, required=False), self, name='operation_tag') self.parent_burst = Uuid(Attr(uuid.UUID, required=False), self, name='parent_burst') self.visible = Scalar(Attr(bool), self, name='visible') self.metadata_cache = None # Keep a list with datasets for which we should write metadata before closing the file self.expandable_datasets = [] if not self.storage_manager.is_valid_tvb_file(): self.written_by.store(self.get_class_path()) self.is_new_file = True @classmethod def file_name_base(cls): return cls.__name__.replace("H5", "") def read_subtype_attr(self): return None def get_class_path(self): return self.__class__.__module__ + '.' + self.__class__.__name__ def iter_accessors(self): # type: () -> typing.Generator[Accessor] for accessor in self.__dict__.values(): if isinstance(accessor, Accessor): yield accessor def iter_datasets(self): for dataset in self.__dict__.values(): if isinstance(dataset, DataSet): yield dataset def __enter__(self): return self def __exit__(self, exc_type, exc_val, exc_tb): self.close() def close(self): for dataset in self.expandable_datasets: self.storage_manager.set_metadata(dataset.meta.to_dict(), dataset.field_name) self.storage_manager.close_file() def store(self, datatype, scalars_only=False, store_references=True): # type: (HasTraits, bool, bool) -> None for accessor in self.iter_accessors(): f_name = accessor.trait_attribute.field_name if f_name is None: # skipp attribute that does not seem to belong to a traited type # accessor is an independent Accessor continue if scalars_only and not isinstance(accessor, Scalar): continue if not store_references and isinstance(accessor, Reference): continue accessor.store(getattr(datatype, f_name)) def load_into(self, datatype): # type: (HasTraits) -> None for accessor in self.iter_accessors(): if isinstance(accessor, (Reference, ReferenceList)): # we do not load references recursively continue f_name = accessor.trait_attribute.field_name if f_name is None: # skipp attribute that does not seem to belong to a traited type continue # handle optional data, that will be missing from the h5 files try: value = accessor.load() except MissingDataSetException: if accessor.trait_attribute.required: raise else: value = None if isinstance(accessor, JsonFinal): current_attr = getattr(datatype, f_name) for k, v in current_attr.items(): current_attr[k] = value[k] else: try: setattr(datatype, f_name, value) except TraitFinalAttributeError: if getattr(datatype, f_name) != value: raise else: LOGGER.info( 'Cannot overwrite Final attribute: {} on {}, but it already has the expected value' .format(f_name, type(datatype).__name__)) def store_generic_attributes(self, generic_attributes, create=True): # type: (GenericAttributes, bool) -> None # write_metadata creation time, serializer class name, etc if create: self.create_date.store(date2string(datetime.now())) self.generic_attributes.fill_from(generic_attributes) self.invalid.store(self.generic_attributes.invalid) self.is_nan.store(self.generic_attributes.is_nan) self.subject.store(self.generic_attributes.subject) self.state.store(self.generic_attributes.state) self.user_tag_1.store(self.generic_attributes.user_tag_1) self.user_tag_2.store(self.generic_attributes.user_tag_2) self.user_tag_3.store(self.generic_attributes.user_tag_3) self.user_tag_4.store(self.generic_attributes.user_tag_4) self.user_tag_5.store(self.generic_attributes.user_tag_5) self.operation_tag.store(self.generic_attributes.operation_tag) self.visible.store(self.generic_attributes.visible) if self.generic_attributes.parent_burst is not None: self.parent_burst.store( uuid.UUID(self.generic_attributes.parent_burst)) def load_generic_attributes(self): # type: () -> GenericAttributes self.generic_attributes.invalid = self.invalid.load() self.generic_attributes.is_nan = self.is_nan.load() self.generic_attributes.subject = self.subject.load() self.generic_attributes.state = self.state.load() self.generic_attributes.user_tag_1 = self.user_tag_1.load() self.generic_attributes.user_tag_2 = self.user_tag_2.load() self.generic_attributes.user_tag_3 = self.user_tag_3.load() self.generic_attributes.user_tag_4 = self.user_tag_4.load() self.generic_attributes.user_tag_5 = self.user_tag_5.load() self.generic_attributes.visible = self.visible.load() self.generic_attributes.create_date = string2date( str(self.create_date.load())) or None try: self.generic_attributes.operation_tag = self.operation_tag.load() except MissingDataSetException: self.generic_attributes.operation_tag = None try: burst = self.parent_burst.load() self.generic_attributes.parent_burst = burst.hex if burst is not None else None except MissingDataSetException: self.generic_attributes.parent_burst = None return self.generic_attributes def gather_references(self, datatype_cls=None): ret = [] for accessor in self.iter_accessors(): trait_attribute = None if datatype_cls: if hasattr(datatype_cls, accessor.field_name): trait_attribute = getattr(datatype_cls, accessor.field_name) if not trait_attribute: trait_attribute = accessor.trait_attribute if isinstance(accessor, Reference): ret.append((trait_attribute, accessor.load())) if isinstance(accessor, ReferenceList): hex_gids = accessor.load() gids = [uuid.UUID(hex_gid) for hex_gid in hex_gids] ret.append((trait_attribute, gids)) return ret def determine_datatype_from_file(self): config_type = self.type.load() package, cls_name = config_type.rsplit('.', 1) module = importlib.import_module(package) datatype_cls = getattr(module, cls_name) return datatype_cls @staticmethod def determine_type(path): # type: (str) -> typing.Type[HasTraits] type_class_fqn = H5File.get_metadata_param(path, 'type') if type_class_fqn is None: return HasTraits package, cls_name = type_class_fqn.rsplit('.', 1) module = importlib.import_module(package) cls = getattr(module, cls_name) return cls @staticmethod def get_metadata_param(path, param): meta = StorageInterface.get_storage_manager(path).get_metadata() return meta.get(param) def store_metadata_param(self, key, value): self.storage_manager.set_metadata({key: value}) @staticmethod def h5_class_from_file(path): # type: (str) -> typing.Type[H5File] h5file_class_fqn = H5File.get_metadata_param(path, H5File.KEY_WRITTEN_BY) if h5file_class_fqn is None: return H5File(path) package, cls_name = h5file_class_fqn.rsplit('.', 1) module = importlib.import_module(package) cls = getattr(module, cls_name) return cls @staticmethod def from_file(path): # type: (str) -> H5File cls = H5File.h5_class_from_file(path) return cls(path) def __repr__(self): return '<{}("{}")>'.format(type(self).__name__, self.path)