def _export_hdf5(self): self.progressSignal(0) # Create and open the hdf5 file export_components = PathComponents(self.ExportPath.value) try: os.remove(export_components.externalPath) except OSError as ex: # It's okay if the file isn't there. if ex.errno != 2: raise try: with h5py.File(export_components.externalPath, 'w') as hdf5File: # Create a temporary operator to do the work for us opH5Writer = OpH5WriterBigDataset(parent=self) try: opH5Writer.hdf5File.setValue(hdf5File) opH5Writer.hdf5Path.setValue( export_components.internalPath) opH5Writer.Image.connect(self.Input) # The H5 Writer provides it's own progress signal, so just connect ours to it. opH5Writer.progressSignal.subscribe(self.progressSignal) # Perform the export and block for it in THIS THREAD. opH5Writer.WriteImage[:].wait() finally: opH5Writer.cleanUp() self.progressSignal(100) except IOError as ex: import sys msg = "\nException raised when attempting to export to {}: {}\n"\ .format( export_components.externalPath, str(ex) ) sys.stderr.write(msg) raise
def test_Writer(self): # Create the h5 file hdf5File = h5py.File(self.testDataFileName) opPiper = OpArrayPiper(graph=self.graph) opPiper.Input.setValue(self.testData) # Force extra metadata onto the output opPiper.Output.meta.ideal_blockshape = (1, 1, 0, 0, 1) # Pretend the RAM usage will be really high to force lots of tiny blocks opPiper.Output.meta.ram_usage_per_requested_pixel = 1000000.0 opWriter = OpH5WriterBigDataset(graph=self.graph) # This checks that you can give a preexisting group as the file g = hdf5File.create_group('volume') opWriter.hdf5File.setValue(g) opWriter.hdf5Path.setValue("data") opWriter.Image.connect(opPiper.Output) # Force the operator to execute by asking for the output (a bool) success = opWriter.WriteImage.value assert success hdf5File.close() # Check the file. f = h5py.File(self.testDataFileName, 'r') dataset = f[self.datasetInternalPath] assert dataset.shape == self.dataShape assert numpy.all( dataset[...] == self.testData.view(numpy.ndarray)[...]) f.close()
def test_Writer(self): # Create the h5 file hdf5File = h5py.File(self.testDataFileName) opPiper = OpArrayPiper(graph=self.graph) opPiper.Input.setValue(self.testData) opWriter = OpH5WriterBigDataset(graph=self.graph) opWriter.hdf5File.setValue(hdf5File) opWriter.hdf5Path.setValue(self.datasetInternalPath) opWriter.Image.connect(opPiper.Output) # Force the operator to execute by asking for the output (a bool) success = opWriter.WriteImage.value assert success hdf5File.close() # Check the file. f = h5py.File(self.testDataFileName, 'r') dataset = f[self.datasetInternalPath] assert dataset.shape == self.dataShape assert numpy.all( dataset[...] == self.testData.view(numpy.ndarray)[...]) f.close()
def test_Writer(self): # Create the h5 file hdf5File = h5py.File(self.testDataFileName) opWriter = OpH5WriterBigDataset(graph=self.graph) # This checks that you can give a preexisting group as the file g = hdf5File.create_group('volume') opWriter.hdf5File.setValue(g) opWriter.hdf5Path.setValue("data") opWriter.Image.setValue(self.testData) # Force the operator to execute by asking for the output (a bool) success = opWriter.WriteImage.value assert success hdf5File.close() # Check the file. f = h5py.File(self.testDataFileName, 'r') dataset = f[self.datasetInternalPath] assert dataset.shape == self.dataShape assert numpy.all( dataset[...] == self.testData.view(numpy.ndarray)[...]) f.close()
def importStackAsLocalDataset(self, info, sequence_axis='t'): """ Add the given stack data to the project file as a local dataset. Does not update the topLevelOperator. :param info: A DatasetInfo object. Note: info.filePath must be a str which lists the stack files, delimited with os.path.pathsep Note: info will be MODIFIED by this function. Use the modified info when assigning it to a dataset. """ self.progressSignal.emit(0) projectFileHdf5 = self.topLevelOperator.ProjectFile.value globstring = info.filePath info.location = DatasetInfo.Location.ProjectInternal firstPathParts = PathComponents(info.filePath.split(os.path.pathsep)[0]) info.filePath = firstPathParts.externalDirectory + '/??' + firstPathParts.extension info.fromstack = True # Use absolute path cwd = self.topLevelOperator.WorkingDirectory if os.path.pathsep not in globstring and not os.path.isabs(globstring): globstring = os.path.normpath( os.path.join(cwd, globstring) ) if firstPathParts.extension.lower() in OpTiffReader.TIFF_EXTS: # Special loader for TIFFs opLoader = OpTiffSequenceReader( parent=self.topLevelOperator.parent ) opLoader.SequenceAxis.setValue(sequence_axis) opLoader.GlobString.setValue(globstring) data_slot = opLoader.Output else: # All other sequences (e.g. pngs, jpegs, etc.) opLoader = OpStackLoader( parent=self.topLevelOperator.parent ) opLoader.SequenceAxis.setValue(sequence_axis) opLoader.globstring.setValue(globstring) data_slot = opLoader.stack try: opWriter = OpH5WriterBigDataset(parent=self.topLevelOperator.parent) opWriter.hdf5File.setValue(projectFileHdf5) opWriter.hdf5Path.setValue(self.topGroupName + '/local_data/' + info.datasetId) opWriter.CompressionEnabled.setValue(False) # We assume that the main bottleneck is the hard disk, # so adding lots of threads to access it at once seems like a bad idea. opWriter.BatchSize.setValue(1) opWriter.Image.connect( data_slot ) # Forward progress from the writer directly to our applet opWriter.progressSignal.subscribe( self.progressSignal.emit ) success = opWriter.WriteImage.value finally: opWriter.cleanUp() opLoader.cleanUp() self.progressSignal.emit(100) return success
def exportFinalSegmentation(self, outputPath, axisorder, progressCallback=None): assert self.FinalSegmentation.ready( ), "Can't export yet: The final segmentation isn't ready!" logger.info("Starting Final Segmentation Export...") opTranspose = OpReorderAxes(parent=self) opTranspose.AxisOrder.setValue(axisorder) opTranspose.Input.connect(self.FinalSegmentation) f = h5py.File(outputPath, 'w') opExporter = OpH5WriterBigDataset(parent=self) opExporter.hdf5File.setValue(f) opExporter.hdf5Path.setValue('split_result') opExporter.Image.connect(opTranspose.Output) if progressCallback is not None: opExporter.progressSignal.subscribe(progressCallback) req = Request(partial(self._runExporter, opExporter)) def cleanOps(): opExporter.cleanUp() opTranspose.cleanUp() def handleFailed(exc, exc_info): cleanOps() f.close() import traceback traceback.print_tb(exc_info[2]) msg = "Final Segmentation export FAILED due to the following error:\n{}".format( exc) logger.error(msg) def handleFinished(result): try: cleanOps() logger.info("FINISHED Final Segmentation Export") finally: f.close() def handleCancelled(): cleanOps() f.close() logger.info("Final Segmentation export was cancelled!") req.notify_failed(handleFailed) req.notify_finished(handleFinished) req.notify_cancelled(handleCancelled) req.submit() return req # Returned in case the user wants to cancel it.
def _serialize(self, group, name, slot): """Called when the currently stored predictions are dirty. If prediction storage is currently enabled, store them to the file. Otherwise, just delete them/ (Avoid inconsistent project states, e.g. don't allow old predictions to be stored with a new classifier.) """ predictionDir = group.create_group(self.name) # Disconnect the operators that might be using the old data. self.deserialize(group) failedToSave = False opWriter = None try: num = len(slot) if num > 0: increment = 100 / float(num) progress = 0 for imageIndex in range(num): # Have we been cancelled? if not self.predictionStorageEnabled: break datasetName = self.subname.format(imageIndex) # Use a big dataset writer to do this in chunks opWriter = OpH5WriterBigDataset(graph=self.operator.graph, parent=self.operator.parent) opWriter.hdf5File.setValue(predictionDir) opWriter.hdf5Path.setValue(datasetName) opWriter.Image.connect(slot[imageIndex]) def handleProgress(percent): # Stop sending progress if we were cancelled if self.predictionStorageEnabled: curprogress = progress + percent * (increment / 100.0) self.progressSignal(curprogress) opWriter.progressSignal.subscribe(handleProgress) # Create the request self._predictionStorageRequest = opWriter.WriteImage[...] # Must use a threading event here because if we wait on the # request from within a "real" thread, it refuses to be cancelled. finishedEvent = threading.Event() def handleFinish(result): finishedEvent.set() def handleCancel(): logger.info("Full volume prediction save CANCELLED.") self._predictionStorageRequest = None finishedEvent.set() # Trigger the write and wait for it to complete or cancel. self._predictionStorageRequest.notify_finished(handleFinish) self._predictionStorageRequest.notify_cancelled(handleCancel) self._predictionStorageRequest.submit( ) # Can't call wait(). See note above. finishedEvent.wait() progress += increment opWriter.cleanUp() opWriter = None except: failedToSave = True raise finally: if opWriter is not None: opWriter.cleanUp() # If we were cancelled, delete the predictions we just started if not self.predictionStorageEnabled or failedToSave: deleteIfPresent(group, name)
def _serializeToHdf5(self, topGroup, hdf5File, projectFilePath): # Write any missing local datasets to the local_data group localDataGroup = getOrCreateGroup(topGroup, 'local_data') wroteInternalData = False for laneIndex, multislot in enumerate(self.topLevelOperator.DatasetGroup): for roleIndex, slot in enumerate( multislot ): if not slot.ready(): continue info = slot.value # If this dataset should be stored in the project, but it isn't there yet if info.location == DatasetInfo.Location.ProjectInternal \ and info.datasetId not in localDataGroup.keys(): # Obtain the data from the corresponding output and store it to the project. dataSlot = self.topLevelOperator._NonTransposedImageGroup[laneIndex][roleIndex] try: opWriter = OpH5WriterBigDataset(parent=self.topLevelOperator.parent, graph=self.topLevelOperator.graph) opWriter.CompressionEnabled.setValue(False) # Compression slows down browsing a lot, and raw data tends to be noisy and doesn't compress very well, anyway. opWriter.hdf5File.setValue( localDataGroup ) opWriter.hdf5Path.setValue( info.datasetId ) opWriter.Image.connect(dataSlot) # Trigger the copy success = opWriter.WriteImage.value assert success finally: opWriter.cleanUp() # Add axistags and drange attributes, in case someone uses this dataset outside ilastik localDataGroup[info.datasetId].attrs['axistags'] = dataSlot.meta.axistags.toJSON() if dataSlot.meta.drange is not None: localDataGroup[info.datasetId].attrs['drange'] = dataSlot.meta.drange # Make sure the dataSlot's axistags are updated with the dataset as we just wrote it # (The top-level operator may use an OpReorderAxes, which changed the axisorder) info.axistags = dataSlot.meta.axistags wroteInternalData = True # Construct a list of all the local dataset ids we want to keep localDatasetIds = set() for laneIndex, multislot in enumerate(self.topLevelOperator.DatasetGroup): for roleIndex, slot in enumerate(multislot): if slot.ready() and slot.value.location == DatasetInfo.Location.ProjectInternal: localDatasetIds.add( slot.value.datasetId ) # Delete any datasets in the project that aren't needed any more for datasetName in localDataGroup.keys(): if datasetName not in localDatasetIds: del localDataGroup[datasetName] if wroteInternalData: # We can only re-configure the operator if we're not saving a snapshot # We know we're saving a snapshot if the project file isn't the one we deserialized with. if self._projectFilePath is None or self._projectFilePath == projectFilePath: # Force the operator to setupOutputs() again so it gets data from the project, not external files firstInfo = self.topLevelOperator.DatasetGroup[0][0].value self.topLevelOperator.DatasetGroup[0][0].setValue(firstInfo, check_changed=False) deleteIfPresent(topGroup, 'Role Names') topGroup.create_dataset('Role Names', data=self.topLevelOperator.DatasetRoles.value) # Access the info group infoDir = getOrCreateGroup(topGroup, 'infos') # Delete all infos for infoName in infoDir.keys(): del infoDir[infoName] # Rebuild the list of infos roleNames = self.topLevelOperator.DatasetRoles.value for laneIndex, multislot in enumerate(self.topLevelOperator.DatasetGroup): laneGroupName = 'lane{:04d}'.format(laneIndex) laneGroup = infoDir.create_group( laneGroupName ) for roleIndex, slot in enumerate(multislot): infoGroup = laneGroup.create_group( roleNames[roleIndex] ) if slot.ready(): datasetInfo = slot.value locationString = self.LocationStrings[datasetInfo.location] infoGroup.create_dataset('location', data=locationString) infoGroup.create_dataset('filePath', data=datasetInfo.filePath) infoGroup.create_dataset('datasetId', data=datasetInfo.datasetId) infoGroup.create_dataset('allowLabels', data=datasetInfo.allowLabels) infoGroup.create_dataset('nickname', data=datasetInfo.nickname) infoGroup.create_dataset('fromstack', data=datasetInfo.fromstack) if datasetInfo.drange is not None: infoGroup.create_dataset('drange', data=datasetInfo.drange) # Pull the axistags from the NonTransposedImage, # which is what the image looks like before 'forceAxisOrder' is applied, # and before 'c' is automatically appended axistags = self.topLevelOperator._NonTransposedImageGroup[laneIndex][roleIndex].meta.axistags infoGroup.create_dataset('axistags', data=axistags.toJSON()) axisorder = "".join(tag.key for tag in axistags) infoGroup.create_dataset('axisorder', data=axisorder) if datasetInfo.subvolume_roi is not None: infoGroup.create_dataset('subvolume_roi', data=datasetInfo.subvolume_roi) self._dirty = False
def execute(self, slot, subindex, roi, result): if slot == self.Dirty: assert False # Shouldn't get to this line because the dirty output is given a value directly if slot == self.OutputDataPath: assert False # This slot is already set via setupOutputs if slot == self.ExportResult: # We can stop now if the output isn't dirty if not self.Dirty.value: result[0] = True return exportFormat = self.Format.value # Export H5 if exportFormat == ExportFormat.H5: pathComp = PathComponents(self.OutputDataPath.value) # Ensure the directory exists if not os.path.exists(pathComp.externalDirectory): with self._createDirLock: # Check again now that we have the lock. if not os.path.exists(pathComp.externalDirectory): os.makedirs(pathComp.externalDirectory) # Open the file try: hdf5File = h5py.File(pathComp.externalPath) except: logger.error("Unable to open hdf5File: " + pathComp.externalPath) logger.error(traceback.format_exc()) result[0] = False return # Set up the write operator opH5Writer = OpH5WriterBigDataset(parent=self) opH5Writer.hdf5File.setValue(hdf5File) opH5Writer.hdf5Path.setValue(pathComp.internalPath) #opH5Writer.Image.connect( self.ImageToExport ) opH5Writer.Image.connect(self.ImageCache.Output) print "computing predictions for the selected slices:" self.ImageCache.fixAtCurrent.setValue(False) #check readiness for inp in self.ImageCache.inputs: print inp, self.ImageCache.inputs[inp].ready() print "input shape:", self.ImageCache.Input.meta.shape print "output shape:", self.ImageCache.Output.meta.shape selectedSlices = self.SelectedSlices.value zaxis = self.ImageToExport.meta.axistags.index('z') for isl, sl in enumerate(selectedSlices): print "computing for slice ...", isl start = [0] * len(self.ImageToExport.meta.shape) start[zaxis] = sl stop = list(self.ImageToExport.meta.shape) stop[zaxis] = sl + 1 roi = SubRegion(self.ImageCache, start=start, stop=stop) print roi temp = self.ImageCache.Output[roi.toSlice()].wait() #print temp self.ImageCache.fixAtCurrent.setValue(True) #tstart = [0]*len(self.ImageToExport.meta.shape) #tstop = list(self.ImageToExport.meta.shape) #troi = SubRegion(self.ImageCache, start=tstart, stop=tstop) #tttemp = self.ImageCache.Output[troi.toSlice()].wait() #print tttemp # The H5 Writer provides it's own progress signal, so just connect ours to it. opH5Writer.progressSignal.subscribe(self.progressSignal) # Trigger the write self.Dirty.setValue(not opH5Writer.WriteImage.value) hdf5File.close() opH5Writer.cleanUp() # elif exportFormat == ExportFormat.Npy: # assert False # TODO # elif exportFormat == ExportFormat.Npy: # assert False # TODO else: assert False, "Unknown export format" result[0] = not self.Dirty.value
def exportFinalSupervoxels(self, outputPath, axisorder, progressCallback=None): """ Executes the export process within a request. The (already-running) request is returned, in case you want to wait for it or monitor its progress. """ assert self.FinalSupervoxels.ready( ), "Can't export yet: The final segmentation isn't ready!" logger.info("Starting Final Segmentation Export...") opTranspose = OpReorderAxes(parent=self) opTranspose.AxisOrder.setValue(axisorder) opTranspose.Input.connect(self.FinalSupervoxels) f = h5py.File(outputPath, 'w') opExporter = OpH5WriterBigDataset(parent=self) opExporter.hdf5File.setValue(f) opExporter.hdf5Path.setValue('stack') opExporter.Image.connect(opTranspose.Output) if progressCallback is not None: opExporter.progressSignal.subscribe(progressCallback) req = Request(partial(self._runExporter, opExporter)) def cleanOps(): opExporter.cleanUp() opTranspose.cleanUp() def handleFailed(exc, exc_info): cleanOps() f.close() msg = "Final Supervoxel export FAILED due to the following error:\n{}".format( exc) log_exception(logger, msg, exc_info) def handleFinished(result): # Generate the mapping transforms dataset mapping = self._opAccumulateFinalImage.Mapping.value num_labels = mapping.keys()[-1][1] transform = numpy.zeros(shape=(num_labels, 2), dtype=numpy.uint32) for (start, stop), body_id in mapping.items(): for supervoxel_label in range(start, stop): transform[supervoxel_label][0] = supervoxel_label if body_id == -1: # Special case: -1 means "identity transform" for this supervoxel # (Which is really an untouched raveler body) transform[supervoxel_label][1] = supervoxel_label else: transform[supervoxel_label][1] = body_id # Save the transform before closing the file f.create_dataset('transforms', data=transform) # Copy all other datasets from the original segmentation file. ravelerSegmentationInfo = self.DatasetInfos[2].value pathComponents = PathComponents(ravelerSegmentationInfo.filePath, self.WorkingDirectory.value) with h5py.File(pathComponents.externalPath, 'r') as originalFile: for k, dset in originalFile.items(): if k not in ['transforms', 'stack']: f.copy(dset, k) try: cleanOps() logger.info("FINISHED Final Supervoxel Export") finally: f.close() def handleCancelled(): cleanOps() f.close() logger.info("Final Supervoxel export was cancelled!") req.notify_failed(handleFailed) req.notify_finished(handleFinished) req.notify_cancelled(handleCancelled) req.submit() return req # Returned in case the user wants to cancel it.
def _serializePredictions(self, topGroup, startProgress, endProgress): """ Called when the currently stored predictions are dirty. If prediction storage is currently enabled, store them to the file. Otherwise, just delete them/ (Avoid inconsistent project states, e.g. don't allow old predictions to be stored with a new classifier.) """ with Tracer(traceLogger): # If the predictions are missing, then maybe the user wants them stored (even if they aren't dirty) if self._dirtyFlags[ Section. Predictions] or 'Pdigital signal processing bookredictions' not in topGroup.keys( ): deleteIfPresent(topGroup, 'Predictions') # Disconnect the precomputed prediction inputs. for i, slot in enumerate( self.mainOperator.PredictionsFromDisk): slot.disconnect() if self.predictionStorageEnabled: predictionDir = topGroup.create_group('Predictions') failedToSave = False try: numImages = len( self.mainOperator.PredictionProbabilities) if numImages > 0: increment = (endProgress - startProgress) / float(numImages) for imageIndex in range(numImages): # Have we been cancelled? if not self.predictionStorageEnabled: break datasetName = 'predictions{:04d}'.format( imageIndex) progress = [startProgress] # Use a big dataset writer to do this in chunks opWriter = OpH5WriterBigDataset( graph=self.mainOperator.graph) opWriter.hdf5File.setValue(predictionDir) opWriter.hdf5Path.setValue(datasetName) opWriter.Image.connect( self.mainOperator. PredictionProbabilities[imageIndex]) # Create the request self._predictionStorageRequest = opWriter.WriteImage[ ...] def handleProgress(percent): # Stop sending progress if we were cancelled if self.predictionStorageEnabled: progress[0] = startProgress + percent * ( increment / 100.0) self.progressSignal.emit(progress[0]) opWriter.progressSignal.subscribe(handleProgress) finishedEvent = threading.Event() def handleFinish(request): finishedEvent.set() def handleCancel(request): self._predictionStorageRequest = None finishedEvent.set() # Trigger the write and wait for it to complete or cancel. self._predictionStorageRequest.notify(handleFinish) self._predictionStorageRequest.onCancel( handleCancel) finishedEvent.wait() except: failedToSave = True raise finally: # If we were cancelled, delete the predictions we just started if not self.predictionStorageEnabled or failedToSave: deleteIfPresent(predictionDir, datasetName) self._predictionsPresent = False startProgress = progress[0] else: # Re-load the operator with the prediction groups we just saved self._deserializePredictions(topGroup)
def execute(self, slot, subindex, roi, result): if slot == self.Dirty: assert False # Shouldn't get to this line because the dirty output is given a value directly if slot == self.OutputDataPath: assert False # This slot is already set via setupOutputs if slot == self.ExportResult: # We can stop now if the output isn't dirty if not self.Dirty.value: result[0] = True return exportFormat = self.Format.value # Export H5 if exportFormat == ExportFormat.H5: pathComp = PathComponents(self.OutputDataPath.value, self.WorkingDirectory.value) # Ensure the directory exists if not os.path.exists(pathComp.externalDirectory): with self._createDirLock: # Check again now that we have the lock. if not os.path.exists(pathComp.externalDirectory): os.makedirs(pathComp.externalDirectory) self.cleanupPreview() # Open the file try: hdf5File = h5py.File(pathComp.externalPath) except: logger.error("Unable to open hdf5File: " + pathComp.externalPath) logger.error(traceback.format_exc()) result[0] = False return # Set up the write operator opH5Writer = OpH5WriterBigDataset(parent=self) opH5Writer.hdf5File.setValue(hdf5File) opH5Writer.hdf5Path.setValue(pathComp.internalPath) opH5Writer.Image.connect(self.ImageToExport) # The H5 Writer provides it's own progress signal, so just connect ours to it. opH5Writer.progressSignal.subscribe(self.progressSignal) # Trigger the write dirtyState = not opH5Writer.WriteImage.value hdf5File.close() opH5Writer.cleanUp() self.Dirty.setValue(dirtyState) self.setupPreview() # elif exportFormat == ExportFormat.Npy: # assert False # TODO # elif exportFormat == ExportFormat.Npy: # assert False # TODO else: assert False, "Unknown export format" result[0] = not self.Dirty.value