def write_properties(self, properties: PersistentDictType, file_datetime: datetime.datetime) -> None: """ Write properties to the ndata file specified by reference. :param reference: the reference to which to write :param properties: the dict to write to the file :param file_datetime: the datetime for the file The properties param must not change during this method. Callers should take care to ensure this does not happen. """ with self.__lock: absolute_file_path = self.__file_path #logging.debug("WRITE properties %s for %s", absolute_file_path, key) make_directory_if_needed(os.path.dirname(absolute_file_path)) exists = os.path.exists(absolute_file_path) if exists: rewrite_zip(absolute_file_path, Utility.clean_dict(properties)) else: write_zip(absolute_file_path, None, Utility.clean_dict(properties)) # convert to utc time. tz_minutes = Utility.local_utcoffset_minutes(file_datetime) timestamp = calendar.timegm( file_datetime.timetuple()) - tz_minutes * 60 os.utime(absolute_file_path, (time.time(), timestamp))
def test_reset_display_limits_on_various_value_types_write_to_clean_json( self): with TestContext.create_memory_context() as test_context: document_model = test_context.create_document_model() dtypes = (numpy.float32, float, numpy.complex64, numpy.complex128, numpy.int16, numpy.uint16, numpy.int32, numpy.uint32) for dtype in dtypes: data_item = DataItem.DataItem(numpy.ones((16, 16), dtype)) document_model.append_data_item(data_item) display_item = document_model.get_display_item_for_data_item( data_item) display_item.display_data_channels[0].reset_display_limits() Utility.clean_dict(data_item.properties)
def test_reset_display_limits_on_various_value_types_write_to_clean_json( self): document_model = DocumentModel.DocumentModel() with contextlib.closing(document_model): dtypes = (numpy.float32, numpy.float64, numpy.complex64, numpy.complex128, numpy.int16, numpy.uint16, numpy.int32, numpy.uint32) for dtype in dtypes: data_item = DataItem.DataItem(numpy.ones((16, 16), dtype)) document_model.append_data_item(data_item) display_item = document_model.get_display_item_for_data_item( data_item) display_item.display_data_channels[0].reset_display_limits() Utility.clean_dict(data_item.properties)
def __init__(self, storage_system, storage_handler, properties): self.__storage_system = storage_system self.__storage_handler = storage_handler self.__properties = Migration.transform_to_latest( Utility.clean_dict( copy.deepcopy(properties) if properties else dict())) self.__properties_lock = threading.RLock() self.__write_delayed = False
def __write_properties(self, object): if self.__write_delay_counts.get(object, 0) == 0: persistent_object_parent = object.persistent_object_parent if object else None if object and isinstance(object, DataItem.DataItem): self.__get_storage_for_item(object).rewrite_item(object) elif not persistent_object_parent: if self.__filepath: # atomically overwrite filepath = pathlib.Path(self.__filepath) temp_filepath = filepath.with_name(filepath.name + ".temp") with temp_filepath.open("w") as fp: json.dump(Utility.clean_dict(self.__properties), fp) os.replace(temp_filepath, filepath) else: self.__write_properties(persistent_object_parent.parent)
def auto_migrate_data_item(reader_info, persistent_storage_system, new_persistent_storage_system, index: int, count: int) -> None: storage_handler = reader_info.storage_handler properties = reader_info.properties properties = Utility.clean_dict( copy.deepcopy(properties) if properties else dict()) data_item_uuid = uuid.UUID(properties["uuid"]) if persistent_storage_system == new_persistent_storage_system: if reader_info.changed_ref[0]: storage_handler.write_properties( Migration.transform_from_latest(copy.deepcopy(properties)), datetime.datetime.now()) persistent_storage_system.register_data_item(None, data_item_uuid, storage_handler, properties) else: # create a temporary data item that can be used to get the new file reference old_data_item = DataItem.DataItem(item_uuid=data_item_uuid) old_data_item.begin_reading() old_data_item.read_from_dict(properties) old_data_item.finish_reading() old_data_item_path = storage_handler.reference # ask the storage system for the file handler for the data item path file_handler = new_persistent_storage_system.get_file_handler_for_file( old_data_item_path) # ask the storage system to make a storage handler (an instance of a file handler) for the data item # this ensures that the storage handler (file format) is the same as before. target_storage_handler = new_persistent_storage_system.make_storage_handler( old_data_item, file_handler) if target_storage_handler: os.makedirs(os.path.dirname(target_storage_handler.reference), exist_ok=True) shutil.copyfile(storage_handler.reference, target_storage_handler.reference) target_storage_handler.write_properties( Migration.transform_from_latest(copy.deepcopy(properties)), datetime.datetime.now()) new_persistent_storage_system.register_data_item( None, data_item_uuid, target_storage_handler, properties) logging.getLogger("migration").info( "Copying data item ({}/{}) {} to new library.".format( index + 1, count, data_item_uuid)) else: logging.getLogger("migration").warning( "Unable to copy data item %s to new library.".format( data_item_uuid))
def __write_properties_to_dataset(self, properties): with self.__lock: assert self.__dataset is not None class JSONEncoder(json.JSONEncoder): def default(self, obj): if isinstance(obj, Geometry.IntPoint) or isinstance( obj, Geometry.IntSize) or isinstance( obj, Geometry.IntRect) or isinstance( obj, Geometry.FloatPoint) or isinstance( obj, Geometry.FloatSize) or isinstance( obj, Geometry.FloatRect): return tuple(obj) else: return json.JSONEncoder.default(self, obj) json_io = io.StringIO() json.dump(Utility.clean_dict(properties), json_io, cls=JSONEncoder) json_str = json_io.getvalue() self.__dataset.attrs["properties"] = json_str
def convert_data_element_to_data_and_metadata_1(data_element) -> DataAndMetadata.DataAndMetadata: """Convert a data element to xdata. No data copying occurs. The data element can have the following keys: data (required) is_sequence, collection_dimension_count, datum_dimension_count (optional description of the data) spatial_calibrations (optional list of spatial calibration dicts, scale, offset, units) intensity_calibration (optional intensity calibration dict, scale, offset, units) metadata (optional) properties (get stored into metadata.hardware_source) one of either timestamp or datetime_modified if datetime_modified (dst, tz) it is converted and used as timestamp then timezone gets stored into metadata.description.timezone. """ # data. takes ownership. data = data_element["data"] dimensional_shape = Image.dimensional_shape_from_data(data) is_sequence = data_element.get("is_sequence", False) dimension_count = len(Image.dimensional_shape_from_data(data)) adjusted_dimension_count = dimension_count - (1 if is_sequence else 0) collection_dimension_count = data_element.get("collection_dimension_count", 2 if adjusted_dimension_count in (3, 4) else 0) datum_dimension_count = data_element.get("datum_dimension_count", adjusted_dimension_count - collection_dimension_count) data_descriptor = DataAndMetadata.DataDescriptor(is_sequence, collection_dimension_count, datum_dimension_count) # dimensional calibrations dimensional_calibrations = None if "spatial_calibrations" in data_element: dimensional_calibrations_list = data_element.get("spatial_calibrations") if len(dimensional_calibrations_list) == len(dimensional_shape): dimensional_calibrations = list() for dimension_calibration in dimensional_calibrations_list: offset = float(dimension_calibration.get("offset", 0.0)) scale = float(dimension_calibration.get("scale", 1.0)) units = dimension_calibration.get("units", "") units = str(units) if units is not None else str() if scale != 0.0: dimensional_calibrations.append(Calibration.Calibration(offset, scale, units)) else: dimensional_calibrations.append(Calibration.Calibration()) # intensity calibration intensity_calibration = None if "intensity_calibration" in data_element: intensity_calibration_dict = data_element.get("intensity_calibration") offset = float(intensity_calibration_dict.get("offset", 0.0)) scale = float(intensity_calibration_dict.get("scale", 1.0)) units = intensity_calibration_dict.get("units", "") units = str(units) if units is not None else str() if scale != 0.0: intensity_calibration = Calibration.Calibration(offset, scale, units) # properties (general tags) metadata = dict() if "metadata" in data_element: metadata.update(Utility.clean_dict(data_element.get("metadata"))) if "properties" in data_element and data_element["properties"]: hardware_source_metadata = metadata.setdefault("hardware_source", dict()) hardware_source_metadata.update(Utility.clean_dict(data_element.get("properties"))) # dates are _local_ time and must use this specific ISO 8601 format. 2013-11-17T08:43:21.389391 # time zones are offsets (east of UTC) in the following format "+HHMM" or "-HHMM" # daylight savings times are time offset (east of UTC) in format "+MM" or "-MM" # timezone is for conversion and is the Olson timezone string. # datetime.datetime.strptime(datetime.datetime.isoformat(datetime.datetime.now()), "%Y-%m-%dT%H:%M:%S.%f" ) # datetime_modified, datetime_modified_tz, datetime_modified_dst, datetime_modified_tzname is the time at which this image was modified. # datetime_original, datetime_original_tz, datetime_original_dst, datetime_original_tzname is the time at which this image was created. timestamp = data_element.get("timestamp", datetime.datetime.utcnow()) datetime_item = data_element.get("datetime_modified", Utility.get_datetime_item_from_utc_datetime(timestamp)) local_datetime = Utility.get_datetime_from_datetime_item(datetime_item) dst_value = datetime_item.get("dst", "+00") tz_value = datetime_item.get("tz", "+0000") timezone = datetime_item.get("timezone") time_zone = { "dst": dst_value, "tz": tz_value} if timezone is not None: time_zone["timezone"] = timezone # note: dst is informational only; tz already include dst tz_adjust = (int(tz_value[1:3]) * 60 + int(tz_value[3:5])) * (-1 if tz_value[0] == '-' else 1) utc_datetime = local_datetime - datetime.timedelta(minutes=tz_adjust) # tz_adjust already contains dst_adjust timestamp = utc_datetime return DataAndMetadata.new_data_and_metadata(data, intensity_calibration=intensity_calibration, dimensional_calibrations=dimensional_calibrations, metadata=metadata, timestamp=timestamp, data_descriptor=data_descriptor, timezone=timezone, timezone_offset=tz_value)
def read_library(persistent_storage_system, ignore_older_files) -> typing.Dict: """Read data items from the data reference handler and return as a list. Data items will have persistent_object_context set upon return, but caller will need to call finish_reading on each of the data items. """ data_item_uuids = set() utilized_deletions = set() # the uuid's skipped due to being deleted deletions = list() reader_info_list, library_updates = auto_migrate_storage_system( persistent_storage_system=persistent_storage_system, new_persistent_storage_system=persistent_storage_system, data_item_uuids=data_item_uuids, deletions=deletions, utilized_deletions=utilized_deletions, ignore_older_files=ignore_older_files) # next, for each auto migration, create a temporary storage system and read items from that storage system # using auto_migrate_storage_system. the data items returned will have been copied to the current storage # system (persistent object context). for auto_migration in reversed( persistent_storage_system.get_auto_migrations()): old_persistent_storage_system = FileStorageSystem( auto_migration.library_path, auto_migration.paths ) if auto_migration.paths else auto_migration.storage_system new_reader_info_list, new_library_updates = auto_migrate_storage_system( persistent_storage_system=old_persistent_storage_system, new_persistent_storage_system=persistent_storage_system, data_item_uuids=data_item_uuids, deletions=deletions, utilized_deletions=utilized_deletions, ignore_older_files=ignore_older_files) reader_info_list.extend(new_reader_info_list) library_updates.update(new_library_updates) assert len(reader_info_list) == len(data_item_uuids) library_storage_properties = persistent_storage_system.library_storage_properties for reader_info in reader_info_list: properties = reader_info.properties properties = Utility.clean_dict( copy.deepcopy(properties) if properties else dict()) version = properties.get("version", 0) if version == DataItem.DataItem.writer_version: data_item_uuid = uuid.UUID(properties.get("uuid", uuid.uuid4())) library_update = library_updates.get(data_item_uuid, dict()) library_storage_properties.setdefault( "connections", list()).extend(library_update.get("connections", list())) library_storage_properties.setdefault( "computations", list()).extend(library_update.get("computations", list())) library_storage_properties.setdefault( "display_items", list()).extend(library_update.get("display_items", list())) # mark deletions that need to be tracked because they've been deleted but are also present in older libraries # and would be migrated during reading unless they explicitly are prevented from doing so (via data_item_deletions). # utilized deletions are the ones that were attempted; if nothing was attempted, then no reason to track it anymore # since there is nothing to migrate in the future. library_storage_properties["data_item_deletions"] = [ str(uuid_) for uuid_ in utilized_deletions ] connections_list = library_storage_properties.get("connections", list()) assert len(connections_list) == len( {connection.get("uuid") for connection in connections_list}) computations_list = library_storage_properties.get("computations", list()) assert len(computations_list) == len( {computation.get("uuid") for computation in computations_list}) # migrations if library_storage_properties.get("version", 0) < 2: for data_group_properties in library_storage_properties.get( "data_groups", list()): data_group_properties.pop("data_groups") display_item_references = data_group_properties.setdefault( "display_item_references", list()) data_item_uuid_strs = data_group_properties.pop( "data_item_uuids", list()) for data_item_uuid_str in data_item_uuid_strs: for display_item_properties in library_storage_properties.get( "display_items", list()): data_item_references = [ d.get("data_item_reference", None) for d in display_item_properties.get( "display_data_channels", list()) ] if data_item_uuid_str in data_item_references: display_item_references.append( display_item_properties["uuid"]) data_item_uuid_to_display_item_uuid_map = dict() data_item_uuid_to_display_item_dict_map = dict() display_to_display_item_map = dict() display_to_display_data_channel_map = dict() for display_item_properties in library_storage_properties.get( "display_items", list()): display_to_display_item_map[display_item_properties["display"][ "uuid"]] = display_item_properties["uuid"] display_to_display_data_channel_map[display_item_properties[ "display"]["uuid"]] = display_item_properties[ "display_data_channels"][0]["uuid"] data_item_references = [ d.get("data_item_reference", None) for d in display_item_properties.get("display_data_channels", list()) ] for data_item_uuid_str in data_item_references: data_item_uuid_to_display_item_uuid_map.setdefault( data_item_uuid_str, display_item_properties["uuid"]) data_item_uuid_to_display_item_dict_map.setdefault( data_item_uuid_str, display_item_properties) display_item_properties.pop("display", None) for workspace_properties in library_storage_properties.get( "workspaces", list()): def replace1(d): if "children" in d: for dd in d["children"]: replace1(dd) if "data_item_uuid" in d: data_item_uuid_str = d.pop("data_item_uuid") display_item_uuid_str = data_item_uuid_to_display_item_uuid_map.get( data_item_uuid_str) if display_item_uuid_str: d["display_item_uuid"] = display_item_uuid_str replace1(workspace_properties["layout"]) for connection_dict in library_storage_properties.get( "connections", list()): source_uuid_str = connection_dict["source_uuid"] if connection_dict["type"] == "interval-list-connection": connection_dict[ "source_uuid"] = display_to_display_item_map.get( source_uuid_str, None) if connection_dict[ "type"] == "property-connection" and connection_dict[ "source_property"] == "slice_interval": connection_dict[ "source_uuid"] = display_to_display_data_channel_map.get( source_uuid_str, None) def fix_specifier(specifier_dict): if specifier_dict.get("type") in ("data_item", "display_xdata", "cropped_xdata", "cropped_display_xdata", "filter_xdata", "filtered_xdata"): if specifier_dict.get( "uuid") in data_item_uuid_to_display_item_dict_map: specifier_dict[ "uuid"] = data_item_uuid_to_display_item_dict_map[ specifier_dict["uuid"]]["display_data_channels"][ 0]["uuid"] else: specifier_dict.pop("uuid", None) if specifier_dict.get("type") == "data_item": specifier_dict["type"] = "data_source" if specifier_dict.get("type") == "data_item_object": specifier_dict["type"] = "data_item" if specifier_dict.get("type") == "region": specifier_dict["type"] = "graphic" for computation_dict in library_storage_properties.get( "computations", list()): for variable_dict in computation_dict.get("variables", list()): if "specifier" in variable_dict: specifier_dict = variable_dict["specifier"] if specifier_dict is not None: fix_specifier(specifier_dict) if "secondary_specifier" in variable_dict: specifier_dict = variable_dict["secondary_specifier"] if specifier_dict is not None: fix_specifier(specifier_dict) for result_dict in computation_dict.get("results", list()): fix_specifier(result_dict["specifier"]) library_storage_properties[ "version"] = DocumentModel.DocumentModel.library_version # TODO: add consistency checks: no duplicated items [by uuid] such as connections or computations or data items assert library_storage_properties[ "version"] == DocumentModel.DocumentModel.library_version persistent_storage_system.rewrite_properties(library_storage_properties) properties = copy.deepcopy(library_storage_properties) for reader_info in reader_info_list: data_item_properties = Utility.clean_dict( reader_info.properties if reader_info.properties else dict()) if data_item_properties.get("version", 0) == DataItem.DataItem.writer_version: data_item_properties["__large_format"] = reader_info.large_format data_item_properties["__identifier"] = reader_info.identifier properties.setdefault("data_items", list()).append(data_item_properties) def data_item_created(data_item_properties: typing.Mapping) -> str: return data_item_properties.get("created", "1900-01-01T00:00:00.000000") properties["data_items"] = sorted(properties.get("data_items", list()), key=data_item_created) return properties
def write_properties(self, properties, file_datetime): self.__properties[self.__uuid] = Utility.clean_dict(properties)