def __init__(self, mainOperator, projectFileGroupName):
        with Tracer(traceLogger):
            super(AutocontextClassificationSerializer,
                  self).__init__(projectFileGroupName)
            self.mainOperator = mainOperator
            self._initDirtyFlags()

            # Set up handlers for dirty detection
            def handleDirty(section):
                if not self.ignoreDirty:
                    self._dirtyFlags[section] = True

            def handleNewClassifier(slot, index):
                slot[index].notifyDirty(bind(handleDirty, 1))

            #self.mainOperator.Classifiers.notifyDirty( bind(handleDirty, Section.Classifiers) )
            self.mainOperator.Classifiers.notifyInserted(
                bind(handleNewClassifier))

            def handleNewImage(section, slot, index):
                slot[index].notifyDirty(bind(handleDirty, section))
                # New label images need to be 'serialized' as an empty group.
                if section == Section.Labels:
                    handleDirty(Section.Labels)

            # These are multi-slots, so subscribe to dirty callbacks on each of their subslots as they are created
            self.mainOperator.LabelImages.notifyInserted(
                bind(handleNewImage, Section.Labels))
            self.mainOperator.PredictionProbabilities.notifyInserted(
                bind(handleNewImage, Section.Predictions))
            #self.mainOperator.PixelOnlyPredictions.notifyInserted( bind(handleNewImage, Section.PixelPredictions) )

            self._predictionStorageEnabled = False
            self._predictionStorageRequest = None
            self._predictionsPresent = False
    def _serializeLabels(self, topGroup):
        with Tracer(traceLogger):
            # Delete all labels from the file
            deleteIfPresent(topGroup, 'LabelSets')
            labelSetDir = topGroup.create_group('LabelSets')

            numImages = len(self.mainOperator.NonzeroLabelBlocks)
            for imageIndex in range(numImages):
                # Create a group for this image
                labelGroupName = 'labels{:03d}'.format(imageIndex)
                labelGroup = labelSetDir.create_group(labelGroupName)

                # Get a list of slicings that contain labels
                nonZeroBlocks = self.mainOperator.NonzeroLabelBlocks[
                    imageIndex].value
                for blockIndex, slicing in enumerate(nonZeroBlocks):
                    # Read the block from the label output
                    block = self.mainOperator.LabelImages[imageIndex][
                        slicing].wait()

                    # Store the block as a new dataset
                    blockName = 'block{:04d}'.format(blockIndex)
                    labelGroup.create_dataset(blockName, data=block)

                    # Add the slice this block came from as an attribute of the dataset
                    labelGroup[blockName].attrs[
                        'blockSlice'] = self.slicingToString(slicing)

            self._dirtyFlags[Section.Labels] = False
    def _deserializeFromHdf5(self, topGroup, groupVersion, hdf5File,
                             projectFilePath):
        with Tracer(traceLogger):
            self.progressSignal.emit(0)
            self._deserializeLabels(topGroup)
            self.progressSignal.emit(50)
            self._deserializeClassifier(topGroup)
            self._deserializePredictions(topGroup)

            self.progressSignal.emit(100)
    def _deserializeLabels(self, topGroup):
        with Tracer(traceLogger):
            try:
                labelSetGroup = topGroup['LabelSets']
            except KeyError:
                pass
            else:
                numImages = len(labelSetGroup)
                self.mainOperator.LabelInputs.resize(numImages)

                # For each image in the file
                for index, (groupName, labelGroup) in enumerate(
                        sorted(labelSetGroup.items())):
                    # For each block of label data in the file
                    for blockData in labelGroup.values():
                        # The location of this label data block within the image is stored as an hdf5 attribute
                        slicing = self.stringToSlicing(
                            blockData.attrs['blockSlice'])
                        # Slice in this data to the label input
                        self.mainOperator.LabelInputs[index][
                            slicing] = blockData[...]
            finally:
                self._dirtyFlags[Section.Labels] = False
    def _deserializeClassifier(self, topGroup):
        with Tracer(traceLogger):
            try:
                classifiersTop = topGroup['Classifiers']
            except KeyError:
                pass
            else:
                # Due to non-shared hdf5 dlls, vigra can't read directly from our open hdf5 group.
                # Instead, we'll copy the classfier data to a temporary file and give it to vigra.
                for i, cache in enumerate(self.mainOperator.classifier_caches):
                    fullpath = "Classifiers/Classifier%d" % i
                    if fullpath not in topGroup:
                        break
                    classifierGroup = topGroup[fullpath]
                    tmpDir = tempfile.mkdtemp()
                    cachePath = os.path.join(
                        tmpDir, 'tmp_classifier_cache.h5').replace('\\', '/')
                    with h5py.File(cachePath, 'w') as cacheFile:
                        cacheFile.copy(classifierGroup, 'ClassifierForests')

                    forests = []
                    for name, forestGroup in sorted(classifierGroup.items()):
                        forests.append(
                            vigra.learning.RandomForest(
                                cachePath, str('ClassifierForests/' + name)))

                    os.remove(cachePath)
                    os.rmdir(tmpDir)

                    # Now force the classifier into our classifier cache.
                    # The downstream operators (e.g. the prediction operator) can use the classifier without inducing it to be re-trained.
                    # (This assumes that the classifier we are loading is consistent with the images and labels that we just loaded.
                    #  As soon as training input changes, it will be retrained.)
                    cache.forceValue(numpy.array(forests))
            finally:
                self._dirtyFlags[Section.Classifiers] = False
    def _serializeClassifiers(self, topGroup):
        with Tracer(traceLogger):
            deleteIfPresent(topGroup, 'Classifiers')
            self._dirtyFlags[Section.Classifiers] = False

            if not self.mainOperator.Classifiers.ready():
                return

            classifiers = self.mainOperator.Classifiers
            topGroup.require_group("Classifiers")
            for i in range(len(classifiers)):
                classifier_forests = classifiers[i].value
                # Classifier can be None if there isn't any training data yet.
                if classifier_forests is None:
                    return
                for forest in classifier_forests:
                    if forest is None:
                        return

                # Due to non-shared hdf5 dlls, vigra can't write directly to our open hdf5 group.
                # Instead, we'll use vigra to write the classifier to a temporary file.
                tmpDir = tempfile.mkdtemp()
                cachePath = os.path.join(tmpDir,
                                         'tmp_classifier_cache.h5').replace(
                                             '\\', '/')
                for j, forest in enumerate(classifier_forests):
                    forest.writeHDF5(
                        cachePath, 'ClassifierForests/Forest{:04d}'.format(j))

                # Open the temp file and copy to our project group
                with h5py.File(cachePath, 'r') as cacheFile:
                    grouppath = "Classifiers/Classifier%d" % i
                    topGroup.copy(cacheFile['ClassifierForests'], grouppath)

                os.remove(cachePath)
                os.removedirs(tmpDir)
    def _serializeToHdf5(self, topGroup, hdf5File, projectFilePath):
        with Tracer(traceLogger):
            numSteps = sum(self._dirtyFlags.values())
            progress = 0
            if numSteps > 0:
                increment = 100 // numSteps

            if self._dirtyFlags[Section.Labels]:
                self._serializeLabels(topGroup)
                progress += increment
                self.progressSignal.emit(progress)

            if self._dirtyFlags[Section.Classifiers]:
                self._serializeClassifiers(topGroup)
                progress += increment
                self.progressSignal.emit(progress)

            # Need to call serialize predictions even if it isn't dirty
            # (Since it isn't always stored.)
            self._serializePredictions(topGroup, progress,
                                       progress + increment)
            if self._dirtyFlags[Section.Predictions]:
                progress += increment
                self.progressSignal.emit(progress)
    def _serializePredictions(self, topGroup, startProgress, endProgress):
        """
        Called when the currently stored predictions are dirty.
        If prediction storage is currently enabled, store them to the file.
        Otherwise, just delete them/
        (Avoid inconsistent project states, e.g. don't allow old predictions to be stored with a new classifier.)
        """
        with Tracer(traceLogger):
            # If the predictions are missing, then maybe the user wants them stored (even if they aren't dirty)
            if self._dirtyFlags[
                    Section.
                    Predictions] or 'Pdigital signal processing bookredictions' not in topGroup.keys(
                    ):

                deleteIfPresent(topGroup, 'Predictions')

                # Disconnect the precomputed prediction inputs.
                for i, slot in enumerate(
                        self.mainOperator.PredictionsFromDisk):
                    slot.disconnect()

                if self.predictionStorageEnabled:
                    predictionDir = topGroup.create_group('Predictions')

                    failedToSave = False
                    try:
                        numImages = len(
                            self.mainOperator.PredictionProbabilities)

                        if numImages > 0:
                            increment = (endProgress -
                                         startProgress) / float(numImages)

                        for imageIndex in range(numImages):
                            # Have we been cancelled?
                            if not self.predictionStorageEnabled:
                                break

                            datasetName = 'predictions{:04d}'.format(
                                imageIndex)

                            progress = [startProgress]

                            # Use a big dataset writer to do this in chunks
                            opWriter = OpH5WriterBigDataset(
                                graph=self.mainOperator.graph)
                            opWriter.hdf5File.setValue(predictionDir)
                            opWriter.hdf5Path.setValue(datasetName)
                            opWriter.Image.connect(
                                self.mainOperator.
                                PredictionProbabilities[imageIndex])

                            # Create the request
                            self._predictionStorageRequest = opWriter.WriteImage[
                                ...]

                            def handleProgress(percent):
                                # Stop sending progress if we were cancelled
                                if self.predictionStorageEnabled:
                                    progress[0] = startProgress + percent * (
                                        increment / 100.0)
                                    self.progressSignal.emit(progress[0])

                            opWriter.progressSignal.subscribe(handleProgress)

                            finishedEvent = threading.Event()

                            def handleFinish(request):
                                finishedEvent.set()

                            def handleCancel(request):
                                self._predictionStorageRequest = None
                                finishedEvent.set()

                            # Trigger the write and wait for it to complete or cancel.
                            self._predictionStorageRequest.notify(handleFinish)
                            self._predictionStorageRequest.onCancel(
                                handleCancel)
                            finishedEvent.wait()
                    except:
                        failedToSave = True
                        raise
                    finally:
                        # If we were cancelled, delete the predictions we just started
                        if not self.predictionStorageEnabled or failedToSave:
                            deleteIfPresent(predictionDir, datasetName)
                            self._predictionsPresent = False
                            startProgress = progress[0]
                        else:
                            # Re-load the operator with the prediction groups we just saved
                            self._deserializePredictions(topGroup)