def _export_h5n5(self, compress=False): self.progressSignal(0) # Create and open the hdf5/n5 file export_components = PathComponents(self.ExportPath.value) try: with OpStreamingH5N5Reader.get_h5_n5_file( export_components.externalPath, mode="a") as h5N5File: # Create a temporary operator to do the work for us opH5N5Writer = OpH5N5WriterBigDataset(parent=self) with contextlib.suppress(KeyError): del h5N5File[export_components.internalPath] try: opH5N5Writer.CompressionEnabled.setValue(compress) opH5N5Writer.h5N5File.setValue(h5N5File) opH5N5Writer.h5N5Path.setValue( export_components.internalPath) opH5N5Writer.Image.connect(self.Input) # The H5 Writer provides it's own progress signal, so just connect ours to it. opH5N5Writer.progressSignal.subscribe(self.progressSignal) # Perform the export and block for it in THIS THREAD. opH5N5Writer.WriteImage[:].wait() finally: opH5N5Writer.cleanUp() self.progressSignal(100) except IOError as ex: import sys msg = "\nException raised when attempting to export to {}: {}\n".format( export_components.externalPath, str(ex)) sys.stderr.write(msg) raise
def get_non_transposed_provider_slot( self, parent: Optional[Operator] = None, graph: Optional[Graph] = None ) -> OutputSlot: opReader = OpStreamingH5N5Reader(parent=parent, graph=graph) opReader.H5N5File.setValue(self.project_file) opReader.InternalPath.setValue(self.inner_path) return opReader.OutputImage
def setup_method(self, method): self.graph = Graph() self.testFileDir = tempfile.TemporaryDirectory() self.testDataH5FileName = self.testFileDir.name + "test.h5" self.testDataN5FileName = self.testFileDir.name + "test.n5" self.h5_op = OpStreamingH5N5Reader(graph=self.graph) self.n5_op = OpStreamingH5N5Reader(graph=self.graph) self.h5File = OpStreamingH5N5Reader.get_h5_n5_file(self.testDataH5FileName) self.n5File = OpStreamingH5N5Reader.get_h5_n5_file(self.testDataN5FileName) self.h5File.create_group("volume") self.n5File.create_group("volume") # Create a test dataset datashape = (1, 2, 3, 4, 5) self.data = numpy.indices(datashape).sum(0).astype(numpy.float32)
def setup_method(self, method): self.graph = Graph() self.testFileDir = tempfile.TemporaryDirectory() self.testDataH5FileName = self.testFileDir.name + "test.h5" self.testDataN5FileName = self.testFileDir.name + "test.n5" self.h5_op = OpStreamingH5N5Reader(graph=self.graph) self.n5_op = OpStreamingH5N5Reader(graph=self.graph) self.h5File = OpStreamingH5N5Reader.get_h5_n5_file( self.testDataH5FileName) self.n5File = OpStreamingH5N5Reader.get_h5_n5_file( self.testDataN5FileName) self.h5File.create_group("volume") self.n5File.create_group("volume") # Create a test dataset datashape = (1, 2, 3, 4, 5) self.data = numpy.indices(datashape).sum(0).astype(numpy.float32)
def _applyPattern(self): globStrings = self.patternEdit.text() H5EXTS = OpStreamingH5N5SequenceReaderM.H5EXTS N5EXTS = OpStreamingH5N5SequenceReaderM.N5EXTS filenames = [] # see if some glob strings include HDF5 and/or N5 files globStrings = globStrings.split(os.path.pathsep) pcs = [PathComponents(x) for x in globStrings] is_h5_n5 = [x.extension in (H5EXTS + N5EXTS) for x in pcs] h5GlobStrings = os.path.pathsep.join( [x for x, y in zip(globStrings, is_h5_n5) if y is True]) globStrings = os.path.pathsep.join( [x for x, y in zip(globStrings, is_h5_n5) if y is False]) filenames.extend(OpStackLoader.expandGlobStrings(globStrings)) try: OpStreamingH5N5SequenceReaderS.checkGlobString(h5GlobStrings) # OK, if nothing raised there is a single h5 file in h5GlobStrings: pathComponents = PathComponents( h5GlobStrings.split(os.path.pathsep)[0]) h5file = OpStreamingH5N5Reader.get_h5_n5_file( pathComponents.externalPath, mode="r") filenames.extend( "{}/{}".format(pathComponents.externalPath, internal) for internal in OpStreamingH5N5SequenceReaderS. expandGlobStrings(h5file, h5GlobStrings)) except ( OpStreamingH5N5SequenceReaderS.WrongFileTypeError, OpStreamingH5N5SequenceReaderS.NotTheSameFileError, OpStreamingH5N5SequenceReaderS.NoInternalPlaceholderError, OpStreamingH5N5SequenceReaderS.ExternalPlaceholderError, ): pass try: OpStreamingH5N5SequenceReaderM.checkGlobString(h5GlobStrings) filenames.extend( "{}/{}".format(external, internal) for external, internal in zip( *OpStreamingH5N5SequenceReaderM.expandGlobStrings( h5GlobStrings))) except ( OpStreamingH5N5SequenceReaderM.WrongFileTypeError, OpStreamingH5N5SequenceReaderM.SameFileError, OpStreamingH5N5SequenceReaderM.NoExternalPlaceholderError, OpStreamingH5N5SequenceReaderM.InternalPlaceholderError, ): pass self._updateFileList(filenames)
def _h5N5FindCommonInternal(h5N5Files): """ Tries to find common internal path (containing data) Method is used, when a directory is selected and the internal path is, thus, unclear. Args: h5Files or hń5Files (list of strings): h5 or n5 files to be globbed internally Returns: list of internal paths """ h5 = OpStreamingH5N5Reader.get_h5_n5_file(h5N5Files[0], mode='r') internal_paths = set([x['name'] for x in lsH5N5(h5, minShape=2)]) h5.close() for h5N5File in h5N5Files[1::]: h5 = OpStreamingH5N5Reader.get_h5_n5_file(h5N5File, 'r') # get all files with with at least 2D shape tmp = set([x['name'] for x in lsH5N5(h5, minShape=2)]) internal_paths = internal_paths.intersection(tmp) return list(internal_paths)
def _deserialize(self, group, slot): # Flush the GUI cache of any saved up dirty rois if self.operator.FreezePredictions.value == True: self.operator.FreezePredictions.setValue(False) self.operator.FreezePredictions.setValue(True) #self.operator.PredictionsFromDisk.resize(len(group)) if len(list(group.keys())) > 0: assert len(list(group.keys())) == len(self.operator.PredictionsFromDisk), "Expected to find the same number of on-disk predications as there are images loaded." else: for slot in self.operator.PredictionsFromDisk: slot.disconnect() for imageIndex, datasetName in enumerate(group.keys()): opStreamer = OpStreamingH5N5Reader(graph=self.operator.graph, parent=self.operator.parent) opStreamer.H5N5File.setValue(group) opStreamer.InternalPath.setValue(datasetName) self.operator.PredictionsFromDisk[imageIndex].connect(opStreamer.OutputImage)
def _applyPattern(self): globStrings = self.patternEdit.text() H5EXTS = OpStreamingH5N5SequenceReaderM.H5EXTS N5EXTS = OpStreamingH5N5SequenceReaderM.N5EXTS filenames = [] # see if some glob strings include HDF5 and/or N5 files globStrings = globStrings.split(os.path.pathsep) pcs = [PathComponents(x) for x in globStrings] is_h5_n5 = [x.extension in (H5EXTS + N5EXTS) for x in pcs] h5GlobStrings = os.path.pathsep.join([x for x, y in zip(globStrings, is_h5_n5) if y is True]) globStrings = os.path.pathsep.join([x for x, y in zip(globStrings, is_h5_n5) if y is False]) filenames.extend(OpStackLoader.expandGlobStrings(globStrings)) try: OpStreamingH5N5SequenceReaderS.checkGlobString(h5GlobStrings) # OK, if nothing raised there is a single h5 file in h5GlobStrings: pathComponents = PathComponents(h5GlobStrings.split(os.path.pathsep)[0]) h5file = OpStreamingH5N5Reader.get_h5_n5_file(pathComponents.externalPath, mode='r') filenames.extend( "{}/{}".format(pathComponents.externalPath, internal) for internal in OpStreamingH5N5SequenceReaderS.expandGlobStrings(h5file, h5GlobStrings)) except ( OpStreamingH5N5SequenceReaderS.WrongFileTypeError, OpStreamingH5N5SequenceReaderS.NotTheSameFileError, OpStreamingH5N5SequenceReaderS.NoInternalPlaceholderError, OpStreamingH5N5SequenceReaderS.ExternalPlaceholderError): pass try: OpStreamingH5N5SequenceReaderM.checkGlobString(h5GlobStrings) filenames.extend( "{}/{}".format(external, internal) for external, internal in zip(*OpStreamingH5N5SequenceReaderM.expandGlobStrings(h5GlobStrings)) ) except ( OpStreamingH5N5SequenceReaderM.WrongFileTypeError, OpStreamingH5N5SequenceReaderM.SameFileError, OpStreamingH5N5SequenceReaderM.NoExternalPlaceholderError, OpStreamingH5N5SequenceReaderM.InternalPlaceholderError): pass self._updateFileList(filenames)
def _export_h5n5(self, compress=False): self.progressSignal(0) # Create and open the hdf5/n5 file export_components = PathComponents(self.ExportPath.value) try: if os.path.isdir(export_components.externalPath ): # externalPath leads to a n5 file shutil.rmtree(export_components.externalPath ) # n5 is stored as a directory structure else: os.remove(export_components.externalPath) except OSError as ex: # It's okay if the file isn't there. if ex.errno != 2: raise try: with OpStreamingH5N5Reader.get_h5_n5_file( export_components.externalPath, "w") as h5N5File: # Create a temporary operator to do the work for us opH5N5Writer = OpH5N5WriterBigDataset(parent=self) try: opH5N5Writer.CompressionEnabled.setValue(compress) opH5N5Writer.h5N5File.setValue(h5N5File) opH5N5Writer.h5N5Path.setValue( export_components.internalPath) opH5N5Writer.Image.connect(self.Input) # The H5 Writer provides it's own progress signal, so just connect ours to it. opH5N5Writer.progressSignal.subscribe(self.progressSignal) # Perform the export and block for it in THIS THREAD. opH5N5Writer.WriteImage[:].wait() finally: opH5N5Writer.cleanUp() self.progressSignal(100) except IOError as ex: import sys msg = "\nException raised when attempting to export to {}: {}\n".format( export_components.externalPath, str(ex)) sys.stderr.write(msg) raise
def _findInternalStacks(h5N5File): """ Tries to find common internal path (containing data) Method is used, when a directory is selected and the internal path is, thus, unclear. Args: h5file or n5file (list of strings): h5 or n5 files to be globbed internally Returns: list of internal stacks """ pathComponents = PathComponents(h5N5File) if pathComponents.extension in (OpStreamingH5N5SequenceReaderM.H5EXTS + OpStreamingH5N5SequenceReaderM.N5EXTS): # get all internal paths with OpStreamingH5N5Reader.get_h5_n5_file(h5N5File, mode='r') as h5: internal_paths = lsH5N5(h5, minShape=2) return [x['name'] for x in internal_paths]
def _export_h5n5(self, compress=False): self.progressSignal(0) # Create and open the hdf5/n5 file export_components = PathComponents(self.ExportPath.value) try: if os.path.isdir(export_components.externalPath): # externalPath leads to a n5 file shutil.rmtree(export_components.externalPath) # n5 is stored as a directory structure else: os.remove(export_components.externalPath) except OSError as ex: # It's okay if the file isn't there. if ex.errno != 2: raise try: with OpStreamingH5N5Reader.get_h5_n5_file(export_components.externalPath, "w") as h5N5File: # Create a temporary operator to do the work for us opH5N5Writer = OpH5N5WriterBigDataset(parent=self) try: opH5N5Writer.CompressionEnabled.setValue(compress) opH5N5Writer.h5N5File.setValue(h5N5File) opH5N5Writer.h5N5Path.setValue(export_components.internalPath) opH5N5Writer.Image.connect(self.Input) # The H5 Writer provides it's own progress signal, so just connect ours to it. opH5N5Writer.progressSignal.subscribe(self.progressSignal) # Perform the export and block for it in THIS THREAD. opH5N5Writer.WriteImage[:].wait() finally: opH5N5Writer.cleanUp() self.progressSignal(100) except IOError as ex: import sys msg = "\nException raised when attempting to export to {}: {}\n".format( export_components.externalPath, str(ex) ) sys.stderr.write(msg) raise
def _attemptOpenAsH5N5(self, filePath): # Check for an hdf5 or n5 extension pathComponents = PathComponents(filePath) ext = pathComponents.extension if ext[1:] not in OpInputDataReader.h5_n5_Exts: return [], None externalPath = pathComponents.externalPath internalPath = pathComponents.internalPath if not os.path.exists(externalPath): raise OpInputDataReader.DatasetReadError("Input file does not exist: " + externalPath) # Open the h5/n5 file in read-only mode try: h5N5File = OpStreamingH5N5Reader.get_h5_n5_file(externalPath, "r") except OpInputDataReader.DatasetReadError: raise except Exception as e: msg = "Unable to open H5/N5 File: {}\n{}".format(externalPath, str(e)) raise OpInputDataReader.DatasetReadError(msg) from e else: if not internalPath: possible_internal_paths = lsH5N5(h5N5File) if len(possible_internal_paths) == 1: internalPath = possible_internal_paths[0]["name"] elif len(possible_internal_paths) == 0: h5N5File.close() msg = "H5/N5 file contains no datasets: {}".format(externalPath) raise OpInputDataReader.DatasetReadError(msg) else: h5N5File.close() msg = ( "When using hdf5/n5, you must append the hdf5 internal path to the " "data set to your filename, e.g. myfile.h5/volume/data " "No internal path provided for dataset in file: {}".format(externalPath) ) raise OpInputDataReader.DatasetReadError(msg) try: compression_setting = h5N5File[internalPath].compression except Exception as e: h5N5File.close() msg = "Error reading H5/N5 File: {}\n{}".format(externalPath, e) raise OpInputDataReader.DatasetReadError(msg) from e # If the h5 dataset is compressed, we'll have better performance # with a multi-process hdf5 access object. # (Otherwise, single-process is faster.) allow_multiprocess_hdf5 = ( "LAZYFLOW_MULTIPROCESS_HDF5" in os.environ and os.environ["LAZYFLOW_MULTIPROCESS_HDF5"] != "" ) if compression_setting is not None and allow_multiprocess_hdf5 and isinstance(h5N5File, h5py.File): h5N5File.close() h5N5File = MultiProcessHdf5File(externalPath, "r") self._file = h5N5File h5N5Reader = OpStreamingH5N5Reader(parent=self) h5N5Reader.H5N5File.setValue(h5N5File) try: h5N5Reader.InternalPath.setValue(internalPath) except OpStreamingH5N5Reader.DatasetReadError as e: msg = "Error reading H5/N5 File: {}\n{}".format(externalPath, e.msg) raise OpInputDataReader.DatasetReadError(msg) from e return ([h5N5Reader], h5N5Reader.OutputImage)
def setupOutputs(self): self.internalCleanup() datasetInfo = self.Dataset.value try: # Data only comes from the project file if the user said so AND it exists in the project datasetInProject = ( datasetInfo.location == DatasetInfo.Location.ProjectInternal) datasetInProject &= self.ProjectFile.ready() if datasetInProject: internalPath = self.ProjectDataGroup.value + '/' + datasetInfo.datasetId datasetInProject &= internalPath in self.ProjectFile.value # If we should find the data in the project file, use a dataset reader if datasetInProject: opReader = OpStreamingH5N5Reader(parent=self) opReader.H5N5File.setValue(self.ProjectFile.value) opReader.InternalPath.setValue(internalPath) providerSlot = opReader.OutputImage elif datasetInfo.location == DatasetInfo.Location.PreloadedArray: preloaded_array = datasetInfo.preloaded_array assert preloaded_array is not None if not hasattr(preloaded_array, 'axistags'): axisorder = get_default_axisordering(preloaded_array.shape) preloaded_array = vigra.taggedView(preloaded_array, axisorder) opReader = OpArrayPiper(parent=self) opReader.Input.setValue(preloaded_array) providerSlot = opReader.Output else: if datasetInfo.realDataSource: # Use a normal (filesystem) reader opReader = OpInputDataReader(parent=self) if datasetInfo.subvolume_roi is not None: opReader.SubVolumeRoi.setValue( datasetInfo.subvolume_roi) opReader.WorkingDirectory.setValue( self.WorkingDirectory.value) opReader.SequenceAxis.setValue(datasetInfo.sequenceAxis) opReader.FilePath.setValue(datasetInfo.filePath) else: # Use fake reader: allows to run the project in a headless # mode without the raw data opReader = OpZeroDefault(parent=self) opReader.MetaInput.meta = MetaDict( shape=datasetInfo.laneShape, dtype=datasetInfo.laneDtype, drange=datasetInfo.drange, axistags=datasetInfo.axistags) opReader.MetaInput.setValue( numpy.zeros(datasetInfo.laneShape, dtype=datasetInfo.laneDtype)) providerSlot = opReader.Output self._opReaders.append(opReader) # Inject metadata if the dataset info specified any. # Also, inject if if dtype is uint8, which we can reasonably assume has drange (0,255) metadata = {} metadata['display_mode'] = datasetInfo.display_mode role_name = self.RoleName.value if 'c' not in providerSlot.meta.getTaggedShape(): num_channels = 0 else: num_channels = providerSlot.meta.getTaggedShape()['c'] if num_channels > 1: metadata['channel_names'] = [ "{}-{}".format(role_name, i) for i in range(num_channels) ] else: metadata['channel_names'] = [role_name] if datasetInfo.drange is not None: metadata['drange'] = datasetInfo.drange elif providerSlot.meta.dtype == numpy.uint8: # SPECIAL case for uint8 data: Provide a default drange. # The user can always override this herself if she wants. metadata['drange'] = (0, 255) if datasetInfo.normalizeDisplay is not None: metadata['normalizeDisplay'] = datasetInfo.normalizeDisplay if datasetInfo.axistags is not None: if len(datasetInfo.axistags) != len(providerSlot.meta.shape): ts = providerSlot.meta.getTaggedShape() if 'c' in ts and 'c' not in datasetInfo.axistags and len( datasetInfo.axistags) + 1 == len(ts): # provider has no channel axis, but template has => add channel axis to provider # fixme: Optimize the axistag guess in BatchProcessingApplet instead of hoping for the best here metadata['axistags'] = vigra.defaultAxistags( ''.join(datasetInfo.axistags.keys()) + 'c') else: # This usually only happens when we copied a DatasetInfo from another lane, # and used it as a 'template' to initialize this lane. # This happens in the BatchProcessingApplet when it attempts to guess the axistags of # batch images based on the axistags chosen by the user in the interactive images. # If the interactive image tags don't make sense for the batch image, you get this error. raise Exception( "Your dataset's provided axistags ({}) do not have the " "correct dimensionality for your dataset, which has {} dimensions." .format( "".join(tag.key for tag in datasetInfo.axistags), len(providerSlot.meta.shape))) else: metadata['axistags'] = datasetInfo.axistags if datasetInfo.original_axistags is not None: metadata['original_axistags'] = datasetInfo.original_axistags if datasetInfo.subvolume_roi is not None: metadata['subvolume_roi'] = datasetInfo.subvolume_roi # FIXME: We are overwriting the axistags metadata to intentionally allow # the user to change our interpretation of which axis is which. # That's okay, but technically there's a special corner case if # the user redefines the channel axis index. # Technically, it invalidates the meaning of meta.ram_usage_per_requested_pixel. # For most use-cases, that won't really matter, which is why I'm not worrying about it right now. opMetadataInjector = OpMetadataInjector(parent=self) opMetadataInjector.Input.connect(providerSlot) opMetadataInjector.Metadata.setValue(metadata) providerSlot = opMetadataInjector.Output self._opReaders.append(opMetadataInjector) self._NonTransposedImage.connect(providerSlot) # make sure that x and y axes are present in the selected axis order if 'x' not in providerSlot.meta.axistags or 'y' not in providerSlot.meta.axistags: raise DatasetConstraintError( "DataSelection", "Data must always have at leaset the axes x and y for ilastik to work." ) if self.forceAxisOrder: assert isinstance(self.forceAxisOrder, list), \ "forceAxisOrder should be a *list* of preferred axis orders" # Before we re-order, make sure no non-singleton # axes would be dropped by the forced order. tagged_provider_shape = providerSlot.meta.getTaggedShape() minimal_axes = [ k_v for k_v in list(tagged_provider_shape.items()) if k_v[1] > 1 ] minimal_axes = set(k for k, v in minimal_axes) # Pick the shortest of the possible 'forced' orders that # still contains all the axes of the original dataset. candidate_orders = list(self.forceAxisOrder) candidate_orders = [ order for order in candidate_orders if minimal_axes.issubset(set(order)) ] if len(candidate_orders) == 0: msg = "The axes of your dataset ({}) are not compatible with any of the allowed"\ " axis configurations used by this workflow ({}). Please fix them."\ .format(providerSlot.meta.getAxisKeys(), self.forceAxisOrder) raise DatasetConstraintError("DataSelection", msg) output_order = sorted(candidate_orders, key=len)[0] # the shortest one output_order = "".join(output_order) else: # No forced axisorder is supplied. Use original axisorder as # output order: it is assumed by the export-applet, that the # an OpReorderAxes operator is added in the beginning output_order = "".join( [x for x in providerSlot.meta.axistags.keys()]) op5 = OpReorderAxes(parent=self) op5.AxisOrder.setValue(output_order) op5.Input.connect(providerSlot) providerSlot = op5.Output self._opReaders.append(op5) # If the channel axis is missing, add it as last axis if 'c' not in providerSlot.meta.axistags: op5 = OpReorderAxes(parent=self) keys = providerSlot.meta.getAxisKeys() # Append keys.append('c') op5.AxisOrder.setValue("".join(keys)) op5.Input.connect(providerSlot) providerSlot = op5.Output self._opReaders.append(op5) # Connect our external outputs to the internal operators we chose self.Image.connect(providerSlot) self.AllowLabels.setValue(datasetInfo.allowLabels) # If the reading operator provides a nickname, use it. if self.Image.meta.nickname is not None: datasetInfo.nickname = self.Image.meta.nickname imageName = datasetInfo.nickname if imageName == "": imageName = datasetInfo.filePath self.ImageName.setValue(imageName) except: self.internalCleanup() raise
def get_provider_slot(self, parent: Operator): opReader = OpStreamingH5N5Reader(parent=parent) opReader.H5N5File.setValue(self.project_file) opReader.InternalPath.setValue(self.inner_path) return opReader.OutputImage