def __init__(self, mainOperator, projectFileGroupName): with Tracer(traceLogger): super(AutocontextClassificationSerializer, self).__init__(projectFileGroupName) self.mainOperator = mainOperator self._initDirtyFlags() # Set up handlers for dirty detection def handleDirty(section): if not self.ignoreDirty: self._dirtyFlags[section] = True def handleNewClassifier(slot, index): slot[index].notifyDirty(bind(handleDirty, 1)) #self.mainOperator.Classifiers.notifyDirty( bind(handleDirty, Section.Classifiers) ) self.mainOperator.Classifiers.notifyInserted( bind(handleNewClassifier)) def handleNewImage(section, slot, index): slot[index].notifyDirty(bind(handleDirty, section)) # New label images need to be 'serialized' as an empty group. if section == Section.Labels: handleDirty(Section.Labels) # These are multi-slots, so subscribe to dirty callbacks on each of their subslots as they are created self.mainOperator.LabelImages.notifyInserted( bind(handleNewImage, Section.Labels)) self.mainOperator.PredictionProbabilities.notifyInserted( bind(handleNewImage, Section.Predictions)) #self.mainOperator.PixelOnlyPredictions.notifyInserted( bind(handleNewImage, Section.PixelPredictions) ) self._predictionStorageEnabled = False self._predictionStorageRequest = None self._predictionsPresent = False
def _serializeLabels(self, topGroup): with Tracer(traceLogger): # Delete all labels from the file deleteIfPresent(topGroup, 'LabelSets') labelSetDir = topGroup.create_group('LabelSets') numImages = len(self.mainOperator.NonzeroLabelBlocks) for imageIndex in range(numImages): # Create a group for this image labelGroupName = 'labels{:03d}'.format(imageIndex) labelGroup = labelSetDir.create_group(labelGroupName) # Get a list of slicings that contain labels nonZeroBlocks = self.mainOperator.NonzeroLabelBlocks[ imageIndex].value for blockIndex, slicing in enumerate(nonZeroBlocks): # Read the block from the label output block = self.mainOperator.LabelImages[imageIndex][ slicing].wait() # Store the block as a new dataset blockName = 'block{:04d}'.format(blockIndex) labelGroup.create_dataset(blockName, data=block) # Add the slice this block came from as an attribute of the dataset labelGroup[blockName].attrs[ 'blockSlice'] = self.slicingToString(slicing) self._dirtyFlags[Section.Labels] = False
def _deserializeFromHdf5(self, topGroup, groupVersion, hdf5File, projectFilePath): with Tracer(traceLogger): self.progressSignal.emit(0) self._deserializeLabels(topGroup) self.progressSignal.emit(50) self._deserializeClassifier(topGroup) self._deserializePredictions(topGroup) self.progressSignal.emit(100)
def _deserializeLabels(self, topGroup): with Tracer(traceLogger): try: labelSetGroup = topGroup['LabelSets'] except KeyError: pass else: numImages = len(labelSetGroup) self.mainOperator.LabelInputs.resize(numImages) # For each image in the file for index, (groupName, labelGroup) in enumerate( sorted(labelSetGroup.items())): # For each block of label data in the file for blockData in labelGroup.values(): # The location of this label data block within the image is stored as an hdf5 attribute slicing = self.stringToSlicing( blockData.attrs['blockSlice']) # Slice in this data to the label input self.mainOperator.LabelInputs[index][ slicing] = blockData[...] finally: self._dirtyFlags[Section.Labels] = False
def _deserializeClassifier(self, topGroup): with Tracer(traceLogger): try: classifiersTop = topGroup['Classifiers'] except KeyError: pass else: # Due to non-shared hdf5 dlls, vigra can't read directly from our open hdf5 group. # Instead, we'll copy the classfier data to a temporary file and give it to vigra. for i, cache in enumerate(self.mainOperator.classifier_caches): fullpath = "Classifiers/Classifier%d" % i if fullpath not in topGroup: break classifierGroup = topGroup[fullpath] tmpDir = tempfile.mkdtemp() cachePath = os.path.join( tmpDir, 'tmp_classifier_cache.h5').replace('\\', '/') with h5py.File(cachePath, 'w') as cacheFile: cacheFile.copy(classifierGroup, 'ClassifierForests') forests = [] for name, forestGroup in sorted(classifierGroup.items()): forests.append( vigra.learning.RandomForest( cachePath, str('ClassifierForests/' + name))) os.remove(cachePath) os.rmdir(tmpDir) # Now force the classifier into our classifier cache. # The downstream operators (e.g. the prediction operator) can use the classifier without inducing it to be re-trained. # (This assumes that the classifier we are loading is consistent with the images and labels that we just loaded. # As soon as training input changes, it will be retrained.) cache.forceValue(numpy.array(forests)) finally: self._dirtyFlags[Section.Classifiers] = False
def _serializeClassifiers(self, topGroup): with Tracer(traceLogger): deleteIfPresent(topGroup, 'Classifiers') self._dirtyFlags[Section.Classifiers] = False if not self.mainOperator.Classifiers.ready(): return classifiers = self.mainOperator.Classifiers topGroup.require_group("Classifiers") for i in range(len(classifiers)): classifier_forests = classifiers[i].value # Classifier can be None if there isn't any training data yet. if classifier_forests is None: return for forest in classifier_forests: if forest is None: return # Due to non-shared hdf5 dlls, vigra can't write directly to our open hdf5 group. # Instead, we'll use vigra to write the classifier to a temporary file. tmpDir = tempfile.mkdtemp() cachePath = os.path.join(tmpDir, 'tmp_classifier_cache.h5').replace( '\\', '/') for j, forest in enumerate(classifier_forests): forest.writeHDF5( cachePath, 'ClassifierForests/Forest{:04d}'.format(j)) # Open the temp file and copy to our project group with h5py.File(cachePath, 'r') as cacheFile: grouppath = "Classifiers/Classifier%d" % i topGroup.copy(cacheFile['ClassifierForests'], grouppath) os.remove(cachePath) os.removedirs(tmpDir)
def _serializeToHdf5(self, topGroup, hdf5File, projectFilePath): with Tracer(traceLogger): numSteps = sum(self._dirtyFlags.values()) progress = 0 if numSteps > 0: increment = 100 // numSteps if self._dirtyFlags[Section.Labels]: self._serializeLabels(topGroup) progress += increment self.progressSignal.emit(progress) if self._dirtyFlags[Section.Classifiers]: self._serializeClassifiers(topGroup) progress += increment self.progressSignal.emit(progress) # Need to call serialize predictions even if it isn't dirty # (Since it isn't always stored.) self._serializePredictions(topGroup, progress, progress + increment) if self._dirtyFlags[Section.Predictions]: progress += increment self.progressSignal.emit(progress)
def _serializePredictions(self, topGroup, startProgress, endProgress): """ Called when the currently stored predictions are dirty. If prediction storage is currently enabled, store them to the file. Otherwise, just delete them/ (Avoid inconsistent project states, e.g. don't allow old predictions to be stored with a new classifier.) """ with Tracer(traceLogger): # If the predictions are missing, then maybe the user wants them stored (even if they aren't dirty) if self._dirtyFlags[ Section. Predictions] or 'Pdigital signal processing bookredictions' not in topGroup.keys( ): deleteIfPresent(topGroup, 'Predictions') # Disconnect the precomputed prediction inputs. for i, slot in enumerate( self.mainOperator.PredictionsFromDisk): slot.disconnect() if self.predictionStorageEnabled: predictionDir = topGroup.create_group('Predictions') failedToSave = False try: numImages = len( self.mainOperator.PredictionProbabilities) if numImages > 0: increment = (endProgress - startProgress) / float(numImages) for imageIndex in range(numImages): # Have we been cancelled? if not self.predictionStorageEnabled: break datasetName = 'predictions{:04d}'.format( imageIndex) progress = [startProgress] # Use a big dataset writer to do this in chunks opWriter = OpH5WriterBigDataset( graph=self.mainOperator.graph) opWriter.hdf5File.setValue(predictionDir) opWriter.hdf5Path.setValue(datasetName) opWriter.Image.connect( self.mainOperator. PredictionProbabilities[imageIndex]) # Create the request self._predictionStorageRequest = opWriter.WriteImage[ ...] def handleProgress(percent): # Stop sending progress if we were cancelled if self.predictionStorageEnabled: progress[0] = startProgress + percent * ( increment / 100.0) self.progressSignal.emit(progress[0]) opWriter.progressSignal.subscribe(handleProgress) finishedEvent = threading.Event() def handleFinish(request): finishedEvent.set() def handleCancel(request): self._predictionStorageRequest = None finishedEvent.set() # Trigger the write and wait for it to complete or cancel. self._predictionStorageRequest.notify(handleFinish) self._predictionStorageRequest.onCancel( handleCancel) finishedEvent.wait() except: failedToSave = True raise finally: # If we were cancelled, delete the predictions we just started if not self.predictionStorageEnabled or failedToSave: deleteIfPresent(predictionDir, datasetName) self._predictionsPresent = False startProgress = progress[0] else: # Re-load the operator with the prediction groups we just saved self._deserializePredictions(topGroup)