def _applyInternalPathToTempOps(self, index):
     if index == -1:
         return
     
     newInternalPath = str( self.internalDatasetNameComboBox.currentText() )
     
     # Save a copy of our settings
     oldInfos = {}
     for laneIndex, op in self.tempOps.items():
         oldInfos[laneIndex] = copy.copy( op.Dataset.value )
     
     # Attempt to apply to all temp operators
     try:
         for laneIndex, op in self.tempOps.items():
             info = copy.copy( op.Dataset.value )
             pathComponents = PathComponents(info.filePath)
             if pathComponents.internalPath != newInternalPath:
                 pathComponents.internalPath = newInternalPath
                 info.filePath = pathComponents.totalPath()
                 op.Dataset.setValue( info )
         self._error_fields.discard('Internal Dataset Name')
         return True
     except Exception as e:
         # Revert everything back to the previous state
         for laneIndex, op in self.tempOps.items():
             op.Dataset.setValue( oldInfos[laneIndex] )
         
         traceback.print_exc()
         msg = "Could not set new internal path settings due to an exception:\n"
         msg += "{}".format( e )
         QMessageBox.warning(self, "Error", msg)
         self._error_fields.add('Internal Dataset Name')
         return False
    def _applyInternalPathToTempOps(self, index):
        if index == -1:
            return

        newInternalPath = str(self.internalDatasetNameComboBox.currentText())

        # Save a copy of our settings
        oldInfos = {}
        for laneIndex, op in self.tempOps.items():
            oldInfos[laneIndex] = copy.copy(op.Dataset.value)

        # Attempt to apply to all temp operators
        try:
            for laneIndex, op in self.tempOps.items():
                info = copy.copy(op.Dataset.value)
                pathComponents = PathComponents(info.filePath)
                if pathComponents.internalPath != newInternalPath:
                    pathComponents.internalPath = newInternalPath
                    info.filePath = pathComponents.totalPath()
                    op.Dataset.setValue(info)
            self._error_fields.discard('Internal Dataset Name')
            return True
        except Exception as e:
            # Revert everything back to the previous state
            for laneIndex, op in self.tempOps.items():
                op.Dataset.setValue(oldInfos[laneIndex])

            traceback.print_exc()
            msg = "Could not set new internal path settings due to an exception:\n"
            msg += "{}".format(e)
            QMessageBox.warning(self, "Error", msg)
            self._error_fields.add('Internal Dataset Name')
            return False
    def _initInternalDatasetNameCombo(self):
        # If any dataset is either (1) not hdf5 or (2) project-internal, then we can't change the internal path.
        h5Exts = ['.ilp', '.h5', '.hdf5']
        for laneIndex in self._laneIndexes:
            tmpOp = self.tempOps[laneIndex]
            datasetInfo = tmpOp.Dataset.value
            externalPath = PathComponents(datasetInfo.filePath).externalPath
            if os.path.splitext(externalPath)[1] not in h5Exts \
            or datasetInfo.location == DatasetInfo.Location.ProjectInternal:
                self.internalDatasetNameComboBox.addItem("N/A")
                self.internalDatasetNameComboBox.setEnabled(False)
                return

        # Enable IFF all datasets have at least one common internal dataset, and only show COMMON datasets
        allInternalPaths = set()
        commonInternalPaths = None

        for laneIndex in self._laneIndexes:
            tmpOp = self.tempOps[laneIndex]
            datasetInfo = tmpOp.Dataset.value

            externalPath = PathComponents(datasetInfo.filePath).externalPath
            absPath, relPath = getPathVariants(externalPath,
                                               tmpOp.WorkingDirectory.value)
            internalPaths = set(self._getPossibleInternalPaths(absPath))

            if commonInternalPaths is None:
                # Init with the first file's set of paths
                commonInternalPaths = internalPaths

            # Set operations
            allInternalPaths |= internalPaths
            commonInternalPaths &= internalPaths
            if len(commonInternalPaths) == 0:
                self.internalDatasetNameComboBox.addItem(
                    "Couldn't find a dataset name common to all selected files."
                )
                self.internalDatasetNameComboBox.setEnabled(False)
                return

        uncommonInternalPaths = allInternalPaths - commonInternalPaths
        # Add all common paths to the combo
        for path in sorted(commonInternalPaths):
            self.internalDatasetNameComboBox.addItem(path)

        # Add the remaining ones, but disable them since they aren't common to all files:
        for path in sorted(uncommonInternalPaths):
            self.internalDatasetNameComboBox.addItem(path)
            # http://theworldwideinternet.blogspot.com/2011/01/disabling-qcombobox-items.html
            model = self.internalDatasetNameComboBox.model()
            index = model.index(self.internalDatasetNameComboBox.count() - 1,
                                0)
            model.setData(index, 0, Qt.UserRole - 1)

        # Finally, initialize with NO item selected
        self.internalDatasetNameComboBox.setCurrentIndex(-1)
        def handleFinished( result ):
            # Generate the mapping transforms dataset
            mapping = self._opAccumulateFinalImage.Mapping.value
            num_labels = mapping.keys()[-1][1]
            transform = numpy.zeros( shape=(num_labels, 2), dtype=numpy.uint32 )
            for (start, stop), body_id in mapping.items():
                for supervoxel_label in range(start, stop):
                    transform[supervoxel_label][0] = supervoxel_label
                    if body_id == -1:
                        # Special case: -1 means "identity transform" for this supervoxel
                        # (Which is really an untouched raveler body)
                        transform[supervoxel_label][1] = supervoxel_label
                    else:
                        transform[supervoxel_label][1] = body_id

            # Save the transform before closing the file
            f.create_dataset('transforms', data=transform)

            # Copy all other datasets from the original segmentation file.
            ravelerSegmentationInfo = self.DatasetInfos[2].value
            pathComponents = PathComponents(ravelerSegmentationInfo.filePath, self.WorkingDirectory.value)
            with h5py.File(pathComponents.externalPath, 'r') as originalFile:
                for k,dset in originalFile.items():
                    if k not in ['transforms', 'stack']:
                        f.copy(dset, k)
            
            try:
                cleanOps()
                logger.info("FINISHED Final Supervoxel Export")
            finally:
                f.close()
    def _initStorageCombo(self):
        # If there's only one dataset, show the path in the combo
        showpaths = False
        if len(self._laneIndexes) == 1:
            op = self.tempOps.values()[0]
            info = op.Dataset.value
            cwd = op.WorkingDirectory.value
            filePath = PathComponents(info.filePath).externalPath
            absPath, relPath = getPathVariants(filePath, cwd)
            showpaths = not info.fromstack

        if showpaths:
            self.storageComboBox.addItem("Copied to Project File",
                                         userData=StorageLocation.ProjectFile)
            self.storageComboBox.addItem("Absolute Link: " + absPath,
                                         userData=StorageLocation.AbsoluteLink)
            self.storageComboBox.addItem("Relative Link: " + relPath,
                                         userData=StorageLocation.RelativeLink)
        else:
            self.storageComboBox.addItem("Copied to Project File",
                                         userData=StorageLocation.ProjectFile)
            self.storageComboBox.addItem("Absolute Link",
                                         userData=StorageLocation.AbsoluteLink)
            self.storageComboBox.addItem("Relative Link",
                                         userData=StorageLocation.RelativeLink)

        self.storageComboBox.setCurrentIndex(-1)
    def _deserializeFromHdf5(self, topGroup, groupVersion, hdf5File,
                             projectFilePath):
        with Tracer(traceLogger):
            self._projectFilePath = projectFilePath
            self.initWithoutTopGroup(hdf5File, projectFilePath)

            infoDir = topGroup['infos']

            self.mainOperator.Dataset.resize(len(infoDir))
            for index, (infoGroupName,
                        infoGroup) in enumerate(sorted(infoDir.items())):
                datasetInfo = DatasetInfo()

                # Make a reverse-lookup of the location storage strings
                LocationLookup = {
                    v: k
                    for k, v in self.LocationStrings.items()
                }
                datasetInfo.location = LocationLookup[str(
                    infoGroup['location'].value)]

                # Write to the 'private' members to avoid resetting the dataset id
                datasetInfo._filePath = str(infoGroup['filePath'].value)
                datasetInfo._datasetId = str(infoGroup['datasetId'].value)

                # Deserialize the "allow labels" flag
                try:
                    datasetInfo.allowLabels = infoGroup['allowLabels'].value
                except KeyError:
                    pass

                # Deserialize the axisorder (if present)
                try:
                    datasetInfo.axisorder = infoGroup['axisorder'].value
                except KeyError:
                    if ilastik.utility.globals.ImportOptions.default_axis_order is not None:
                        datasetInfo.axisorder = ilastik.utility.globals.ImportOptions.default_axis_order

                # If the data is supposed to be in the project,
                #  check for it now.
                if datasetInfo.location == DatasetInfo.Location.ProjectInternal:
                    if not datasetInfo.datasetId in topGroup[
                            'local_data'].keys():
                        raise RuntimeError(
                            "Corrupt project file.  Could not find data for " +
                            infoGroupName)

                # If the data is supposed to exist outside the project, make sure it really does.
                if datasetInfo.location == DatasetInfo.Location.FileSystem:
                    filePath = PathComponents(
                        datasetInfo.filePath,
                        os.path.split(projectFilePath)[0]).externalPath
                    if not os.path.exists(filePath):
                        raise RuntimeError("Could not find external data: " +
                                           filePath)

                # Give the new info to the operator
                self.mainOperator.Dataset[index].setValue(datasetInfo)

            self._dirty = False
Example #7
0
def generateBatchPredictions(workflow, batchInputPaths, batchExportDir, batchOutputSuffix, exportedDatasetName):
    """
    Compute the predictions for each of the specified batch input files,
    and export them to corresponding h5 files.
    """
    batchInputPaths = convertStacksToH5(batchInputPaths)

    batchInputInfos = []
    for p in batchInputPaths:
        info = DatasetInfo()
        info.location = DatasetInfo.Location.FileSystem

        # Convert all paths to absolute 
        # (otherwise they are relative to the project file, which probably isn't what the user meant)        
        comp = PathComponents(p)
        comp.externalPath = os.path.abspath(comp.externalPath)
        
        info.filePath = comp.totalPath()        
        batchInputInfos.append(info)

    # Configure batch input operator
    opBatchInputs = workflow.batchInputApplet.topLevelOperator
    opBatchInputs.Dataset.setValues( batchInputInfos )
    
    # Configure batch export operator
    opBatchResults = workflow.batchResultsApplet.topLevelOperator
    opBatchResults.ExportDirectory.setValue(batchExportDir)
    opBatchResults.Format.setValue(ExportFormat.H5)
    opBatchResults.Suffix.setValue(batchOutputSuffix)
    opBatchResults.InternalPath.setValue(exportedDatasetName)
    opBatchResults.SelectedSlices.setValue([30])
    
    logger.info( "Exporting data to " + opBatchResults.OutputDataPath[0].value )

    # Set up progress display handling (just logging for now)        
    currentProgress = [None]
    def handleProgress(percentComplete):
        if currentProgress[0] != percentComplete:
            currentProgress[0] = percentComplete
            logger.info("Batch job: {}% complete.".format(percentComplete))
        
    progressSignal = opBatchResults.ProgressSignal[0].value
    progressSignal.subscribe( handleProgress )

    # Make it happen!
    result = opBatchResults.ExportResult[0].value
    return result
Example #8
0
    def testCreateExportDirectory(self):
        """
        Test that the batch operator can create the export directory if it doesn't exist yet.
        """
        # Start by writing some test data to disk.
        self.testData = numpy.random.random((1, 10, 10, 10, 1))
        numpy.save(self.testDataFileName, self.testData)

        cwd = os.getcwd()
        info = DatasetInfo()
        info.filePath = os.path.join(cwd, 'NpyTestData.npy')

        graph = Graph()
        opBatchIo = OpBatchIo(graph=graph)
        opInput = OpInputDataReader(graph=graph)
        opInput.FilePath.setValue(info.filePath)

        # Our test "processing pipeline" is just a smoothing operator.
        opSmooth = OpGaussianSmoothing(graph=graph)
        opSmooth.Input.connect(opInput.Output)
        opSmooth.sigma.setValue(3.0)

        exportDir = os.path.join(cwd, 'exported_data')
        opBatchIo.ExportDirectory.setValue(exportDir)
        opBatchIo.Suffix.setValue('_smoothed')
        opBatchIo.Format.setValue(ExportFormat.H5)
        opBatchIo.DatasetPath.setValue(info.filePath)

        internalPath = 'path/to/data'
        opBatchIo.InternalPath.setValue(internalPath)

        opBatchIo.ImageToExport.connect(opSmooth.Output)

        dirty = opBatchIo.Dirty.value
        assert dirty == True

        outputPath = opBatchIo.OutputDataPath.value
        assert outputPath == os.path.join(exportDir, 'NpyTestData_smoothed.h5',
                                          internalPath)

        result = opBatchIo.ExportResult.value
        assert result

        dirty = opBatchIo.Dirty.value
        assert dirty == False

        # Check the file
        smoothedPath = PathComponents(outputPath).externalPath
        with h5py.File(smoothedPath, 'r') as f:
            assert internalPath in f
            assert f[internalPath].shape == self.testData.shape
        try:
            os.remove(smoothedPath)
            os.rmdir(exportDir)
        except:
            pass
Example #9
0
def runWorkflow(parsed_args):
    args = parsed_args
    
    # Make sure project file exists.
    if not os.path.exists(args.project):
        raise RuntimeError("Project file '" + args.project + "' does not exist.")

    # Make sure batch inputs exist.
    for p in args.batch_inputs:
        error = False
        p = PathComponents(p).externalPath
        if not os.path.exists(p):
            logger.error("Batch input file does not exist: " + p)
            error = True
        if error:
            raise RuntimeError("Could not find one or more batch inputs.  See logged errors.")

    if not args.generate_project_predictions and len(args.batch_inputs) == 0:
        logger.error("Command-line arguments didn't specify a workload.")
        return

    # Instantiate 'shell'
    shell, workflow = startShellHeadless( PixelClassificationWorkflow )
    
    if args.assume_old_ilp_axes:
        # Special hack for Janelia: 
        # In some old versions of 0.5, the data was stored in tyxzc order.
        # We have no way of inspecting the data to determine this, so we allow 
        #  users to specify that their ilp is very old using the 
        #  assume_old_ilp_axes command-line flag
        ilastik.utility.globals.ImportOptions.default_axis_order = 'tyxzc'

    # Load project (auto-import it if necessary)
    logger.info("Opening project: '" + args.project + "'")
    shell.openProjectPath(args.project)

    # Predictions for project input datasets
    if args.generate_project_predictions:
        generateProjectPredictions(shell, workflow)

    # Predictions for other datasets ('batch datasets')
    result = True
    if len(args.batch_inputs) > 0:
        result = generateBatchPredictions(workflow,
                                          args.batch_inputs,
                                          args.batch_export_dir,
                                          args.batch_output_suffix,
                                          args.batch_output_dataset_name)

    logger.info("Closing project...")
    shell.projectManager.closeCurrentProject()
    
    assert result    
    
    logger.info("FINISHED.")
Example #10
0
 def deleteAllResults(self):
     for k in xrange(len(self.topLevelOperator)):
         operatorView = self.topLevelOperator.getLane(k)
         operatorView.cleanupPreview()
         pathComp = PathComponents(operatorView.OutputDataPath.value,
                                   operatorView.WorkingDirectory.value)
         os.remove(pathComp.externalPath)
         operatorView.setupPreview()
         # we need to toggle the dirts state in order to enforce a frech dirty signal
         operatorView.Dirty.setValue(False)
         operatorView.Dirty.setValue(True)
Example #11
0
 def deleteAllResults(self):
     for innerOp in self.topLevelOperator:
         operatorView = innerOp
         operatorView.cleanupOnDiskView()
         pathComp = PathComponents(operatorView.ExportPath.value, operatorView.WorkingDirectory.value)
         if os.path.exists(pathComp.externalPath):
             os.remove(pathComp.externalPath)
         operatorView.setupOnDiskView()
         # we need to toggle the dirts state in order to enforce a frech dirty signal
         operatorView.Dirty.setValue( False )
         operatorView.Dirty.setValue( True )
 def hasInternalPaths(self):
     for mslot in self._op.DatasetGroup:
         if self._roleIndex < len(mslot):
             slot = mslot[self._roleIndex]
             if slot.ready():
                 datasetInfo = slot.value
                 filePathComponents = PathComponents(datasetInfo.filePath)
                 if (datasetInfo.location == DatasetInfo.Location.FileSystem
                         and filePathComponents.internalPath is not None):
                     return True
     return False
    def updateWorkingDirectory(self, newpath, oldpath):
        newdir = PathComponents(newpath).externalDirectory
        olddir = PathComponents(oldpath).externalDirectory

        if newdir == olddir:
            return

        # Disconnect the working directory while we make these changes.
        # All the changes will take effect when we set the new working directory.
        self.topLevelOperator.WorkingDirectory.disconnect()

        for laneIndex, multislot in enumerate(
                self.topLevelOperator.DatasetGroup):
            for roleIndex, slot in enumerate(multislot):
                if not slot.ready():
                    # Skip if there is no dataset in this lane/role combination yet.
                    continue
                datasetInfo = slot.value
                if datasetInfo.location == DatasetInfo.Location.FileSystem:

                    #construct absolute path and recreate relative to the new path
                    fp = PathComponents(datasetInfo.filePath,
                                        olddir).totalPath()
                    abspath, relpath = getPathVariants(fp, newdir)

                    # Same convention as in dataSelectionGui:
                    # Relative by default, unless the file is in a totally different tree from the working directory.
                    if len(os.path.commonprefix([fp, abspath])) > 1:
                        datasetInfo.filePath = relpath
                    else:
                        datasetInfo.filePath = abspath

                    slot.setValue(datasetInfo, check_changed=False)

        self.topLevelOperator.WorkingDirectory.setValue(newdir)
        self._projectFilePath = newdir
Example #14
0
    def _getDisplayRoleData(self, index):
        # Last row is just buttons
        if index.row() >= self.rowCount() - 1:
            return ""

        laneIndex = index.row()

        if index.column() < LaneColumn.NumColumns:
            if index.column() == LaneColumn.LabelsAllowed:
                firstInfoSlot = self._op.DatasetGroup[laneIndex][0]
                if not firstInfoSlot.ready():
                    return ""
                info = firstInfoSlot.value
                return {True: "True", False: "False"}[info.allowLabels]
            else:
                assert False

        ## Dataset info item
        roleIndex = (index.column() -
                     LaneColumn.NumColumns) // DatasetInfoColumn.NumColumns
        datasetInfoIndex = (index.column() - LaneColumn.NumColumns
                            ) % DatasetInfoColumn.NumColumns

        datasetSlot = self._op.DatasetGroup[laneIndex][roleIndex]
        if not datasetSlot.ready():
            return ""

        UninitializedDisplayData = {DatasetInfoColumn.Name: "<please select>"}

        datasetSlot = self._op.DatasetGroup[laneIndex][roleIndex]
        if datasetSlot.ready():
            datasetInfo = self._op.DatasetGroup[laneIndex][roleIndex].value
        else:
            return UninitializedDisplayData[datasetInfoIndex]

        if datasetInfoIndex == DatasetInfoColumn.Name:
            if datasetInfo.nickname is not None and datasetInfo.nickname != "":
                return datasetInfo.nickname
            return PathComponents(datasetInfo.filePath).filename

        if datasetInfoIndex == DatasetInfoColumn.Location:
            LocationNames = {
                DatasetInfo.Location.FileSystem: "External File",
                DatasetInfo.Location.ProjectInternal: "Project File"
            }
            return LocationNames[datasetInfo.location]

        assert False, "Unknown column"
Example #15
0
def runWorkflow(parsed_args):
    args = parsed_args
    
    # Make sure project file exists.
    if not os.path.exists(args.project):
        raise RuntimeError("Project file '" + args.project + "' does not exist.")

    # Make sure batch inputs exist.
    for p in args.batch_inputs:
        print p
        error = False
        p = PathComponents(p).externalPath
        if not os.path.exists(p):
            logger.error("Batch input file does not exist: " + p)
            error = True
        if error:
            raise RuntimeError("Could not find one or more batch inputs.  See logged errors.")

    if not args.generate_project_predictions and len(args.batch_inputs) == 0:
        logger.error("Command-line arguments didn't specify a workload.")
        return

    # Instantiate 'shell'
    shell, workflow = startShellHeadless( AutocontextClassificationWorkflow )
    
    # Load project (auto-import it if necessary)
    logger.info("Opening project: '" + args.project + "'")
    shell.openProjectPath(args.project)

    # Predictions for project input datasets
    if args.generate_project_predictions:
        generateProjectPredictions(shell, workflow)

    # Predictions for other datasets ('batch datasets')
    result = True
    if len(args.batch_inputs) > 0:
        result = generateBatchPredictions(workflow,
                                          args.batch_inputs,
                                          args.batch_export_dir,
                                          args.batch_output_suffix,
                                          args.batch_output_dataset_name)

    logger.info("Closing project...")
    shell.projectManager.closeCurrentProject()
    
    assert result    
    
    logger.info("FINISHED.")
    def importStackAsLocalDataset(self, info):
        """
        Add the given stack data to the project file as a local dataset.
        Does not update the topLevelOperator.
        
        :param info: A DatasetInfo object.
                     Note: info.filePath must be a stack files must be separated by '//' tokens.
                     Note: info will be MODIFIED by this function.  Use the modified info when assigning it to a dataset.
        """
        try:
            self.progressSignal.emit(0)

            projectFileHdf5 = self.topLevelOperator.ProjectFile.value
            topGroup = getOrCreateGroup(projectFileHdf5, self.topGroupName)
            localDataGroup = getOrCreateGroup(topGroup, 'local_data')

            globstring = info.filePath
            info.location = DatasetInfo.Location.ProjectInternal
            firstPathParts = PathComponents(info.filePath.split('//')[0])
            info.filePath = firstPathParts.externalDirectory + '/??' + firstPathParts.extension

            # Use absolute path
            cwd = self.topLevelOperator.WorkingDirectory
            if '//' not in globstring and not os.path.isabs(globstring):
                globstring = os.path.normpath(os.path.join(cwd, globstring))

            opWriter = OpStackToH5Writer(parent=self.topLevelOperator.parent,
                                         graph=self.topLevelOperator.graph)
            opWriter.hdf5Group.setValue(localDataGroup)
            opWriter.hdf5Path.setValue(info.datasetId)
            opWriter.GlobString.setValue(globstring)

            # Forward progress from the writer directly to our applet
            opWriter.progressSignal.subscribe(self.progressSignal.emit)

            success = opWriter.WriteImage.value

        finally:
            opWriter.cleanUp()
            self.progressSignal.emit(100)

        return success
    def _updateInternalDatasetSelection(self):
        # If all lanes have the same dataset selected, choose that item.
        # Otherwise, leave it uninitialized
        if not self.internalDatasetNameComboBox.isEnabled():
            return

        internalPath = None

        for laneIndex in self._laneIndexes:
            tmpOp = self.tempOps[laneIndex]
            datasetInfo = tmpOp.Dataset.value

            nextPath = PathComponents(datasetInfo.filePath).internalPath
            if internalPath is None:
                internalPath = nextPath  # init
            if internalPath != nextPath:
                self.internalDatasetNameComboBox.setCurrentIndex(-1)
                return

        # Make sure the correct index is selected.
        index = self.internalDatasetNameComboBox.findText(internalPath)
        self.internalDatasetNameComboBox.setCurrentIndex(index)
    def _serializeToHdf5(self, topGroup, hdf5File, projectFilePath):
        with Tracer(traceLogger):
            # Write any missing local datasets to the local_data group
            localDataGroup = self.getOrCreateGroup(topGroup, 'local_data')
            wroteInternalData = False
            for index, slot in enumerate(self.mainOperator.Dataset):
                info = slot.value
                # If this dataset should be stored in the project, but it isn't there yet
                if  info.location == DatasetInfo.Location.ProjectInternal \
                and info.datasetId not in localDataGroup.keys():
                    # Obtain the data from the corresponding output and store it to the project.
                    dataSlot = self.mainOperator.Image[index]

                    opWriter = OpH5WriterBigDataset(
                        graph=self.mainOperator.graph)
                    opWriter.hdf5File.setValue(localDataGroup)
                    opWriter.hdf5Path.setValue(info.datasetId)
                    opWriter.Image.connect(dataSlot)

                    # Trigger the copy
                    success = opWriter.WriteImage.value
                    assert success

                    # Add the axistags attribute to the dataset we just created
                    localDataGroup[info.datasetId].attrs[
                        'axistags'] = dataSlot.meta.axistags.toJSON()

                    # Update the dataset info with no path, just filename base to remind us what this data is
                    # (operator will react to the change when we call setValue(), below)
                    # Directly set the private member to avoid getting a new datasetid
                    info._filePath = PathComponents(info.filePath).filenameBase
                    wroteInternalData = True

            # Construct a list of all the local dataset ids we want to keep
            localDatasetIds = [
                slot.value.datasetId
                for index, slot in enumerate(self.mainOperator.Dataset)
                if slot.value.location == DatasetInfo.Location.ProjectInternal
            ]

            # Delete any datasets in the project that aren't needed any more
            for datasetName in localDataGroup.keys():
                if datasetName not in localDatasetIds:
                    del localDataGroup[datasetName]

            if wroteInternalData:
                # We can only re-configure the operator if we're not saving a snapshot
                # We know we're saving a snapshot if the project file isn't the one we deserialized with.
                if self._projectFilePath is None or self._projectFilePath == projectFilePath:
                    # Force the operator to setupOutputs() again so it gets data from the project, not external files
                    firstInfo = self.mainOperator.Dataset[0].value
                    self.mainOperator.Dataset[0].setValue(firstInfo, False)

            # Access the info group
            infoDir = self.getOrCreateGroup(topGroup, 'infos')

            # Delete all infos
            for infoName in infoDir.keys():
                del infoDir[infoName]

            # Rebuild the list of infos
            for index, slot in enumerate(self.mainOperator.Dataset):
                infoGroup = infoDir.create_group('info{:04d}'.format(index))
                datasetInfo = slot.value
                locationString = self.LocationStrings[datasetInfo.location]
                infoGroup.create_dataset('location', data=locationString)
                infoGroup.create_dataset('filePath', data=datasetInfo.filePath)
                infoGroup.create_dataset('datasetId',
                                         data=datasetInfo.datasetId)
                infoGroup.create_dataset('allowLabels',
                                         data=datasetInfo.allowLabels)
                if datasetInfo.axisorder is not None:
                    infoGroup.create_dataset('axisorder',
                                             data=datasetInfo.axisorder)

            self._dirty = False
    def _readDatasetInfo(self, infoGroup, localDataGroup, projectFilePath,
                         headless):
        # Unready datasets are represented with an empty group.
        if len(infoGroup) == 0:
            return None, False
        datasetInfo = DatasetInfo()

        # Make a reverse-lookup of the location storage strings
        LocationLookup = {v: k for k, v in self.LocationStrings.items()}
        datasetInfo.location = LocationLookup[str(infoGroup['location'].value)]

        # Write to the 'private' members to avoid resetting the dataset id
        datasetInfo._filePath = str(infoGroup['filePath'].value)
        datasetInfo._datasetId = str(infoGroup['datasetId'].value)

        try:
            datasetInfo.allowLabels = infoGroup['allowLabels'].value
        except KeyError:
            pass

        try:
            datasetInfo.drange = tuple(infoGroup['drange'].value)
        except KeyError:
            pass

        try:
            datasetInfo.nickname = str(infoGroup['nickname'].value)
        except KeyError:
            datasetInfo.nickname = PathComponents(
                datasetInfo.filePath).filenameBase

        try:
            tags = vigra.AxisTags.fromJSON(infoGroup['axistags'].value)
            datasetInfo.axistags = tags
        except KeyError:
            # Old projects just have an 'axisorder' field instead of full axistags
            try:
                axisorder = infoGroup['axisorder'].value
                datasetInfo.axistags = vigra.defaultAxistags(axisorder)
            except KeyError:
                pass

        # If the data is supposed to be in the project,
        #  check for it now.
        if datasetInfo.location == DatasetInfo.Location.ProjectInternal:
            if not datasetInfo.datasetId in localDataGroup.keys():
                raise RuntimeError(
                    "Corrupt project file.  Could not find data for " +
                    infoGroup.name)

        dirty = False
        # If the data is supposed to exist outside the project, make sure it really does.
        if datasetInfo.location == DatasetInfo.Location.FileSystem:
            pathData = PathComponents(datasetInfo.filePath,
                                      os.path.split(projectFilePath)[0])
            filePath = pathData.externalPath
            if not os.path.exists(filePath):
                if headless:
                    raise RuntimeError("Could not find data at " + filePath)
                filt = "Image files (" + ' '.join(
                    '*.' + x
                    for x in OpDataSelection.SupportedExtensions) + ')'
                newpath = self.repairFile(filePath, filt)
                if pathData.internalPath is not None:
                    newpath += pathData.internalPath
                datasetInfo._filePath = getPathVariants(
                    newpath,
                    os.path.split(projectFilePath)[0])[0]
                dirty = True

        return datasetInfo, dirty
Example #20
0
def generateBatchPredictions(workflow, batchInputPaths, batchExportDir,
                             batchOutputSuffix, exportedDatasetName,
                             stackVolumeCacheDir):
    """
    Compute the predictions for each of the specified batch input files,
    and export them to corresponding h5 files.
    """
    originalBatchInputPaths = list(batchInputPaths)
    batchInputPaths = convertStacksToH5(batchInputPaths, stackVolumeCacheDir)

    batchInputInfos = []
    for p in batchInputPaths:
        info = DatasetInfo()
        info.location = DatasetInfo.Location.FileSystem

        # Convert all paths to absolute
        # (otherwise they are relative to the project file, which probably isn't what the user meant)
        comp = PathComponents(p)
        comp.externalPath = os.path.abspath(comp.externalPath)

        info.filePath = comp.totalPath()
        batchInputInfos.append(info)

    # Also convert the export dir to absolute (for the same reason)
    if batchExportDir != '':
        batchExportDir = os.path.abspath(batchExportDir)

    # Configure batch input operator
    opBatchInputs = workflow.batchInputApplet.topLevelOperator
    opBatchInputs.DatasetGroup.resize(len(batchInputInfos))
    for info, multislot in zip(batchInputInfos, opBatchInputs.DatasetGroup):
        # FIXME: This assumes that the workflow has exactly one dataset role.
        multislot[0].setValue(info)

    # Configure batch export operator
    opBatchResults = workflow.batchResultsApplet.topLevelOperator

    # By default, the output files from the batch export operator
    #  are named using the input file name.
    # If we converted any stacks to hdf5, then the user won't recognize the input file name.
    # Let's override the output file name using the *original* input file names.
    outputFileNameBases = []
    for origPath in originalBatchInputPaths:
        outputFileNameBases.append(origPath.replace('*', 'STACKED'))

    opBatchResults.OutputFileNameBase.setValues(outputFileNameBases)
    opBatchResults.ExportDirectory.setValue(batchExportDir)
    opBatchResults.Format.setValue(ExportFormat.H5)
    opBatchResults.Suffix.setValue(batchOutputSuffix)
    opBatchResults.InternalPath.setValue(exportedDatasetName)

    logger.info("Exporting data to " + opBatchResults.OutputDataPath[0].value)

    # Set up progress display handling (just logging for now)
    currentProgress = [None]

    def handleProgress(percentComplete):
        if currentProgress[0] != percentComplete:
            currentProgress[0] = percentComplete
            logger.info("Batch job: {}% complete.".format(percentComplete))

    progressSignal = opBatchResults.ProgressSignal[0].value
    progressSignal.subscribe(handleProgress)

    # Make it happen!
    result = opBatchResults.ExportResult[0].value
    return result
    def _getDisplayRoleData(self, index):
        laneIndex = index.row()

        UninitializedDisplayData = {
            DatasetDetailedInfoColumn.Nickname: "<empty>",
            DatasetDetailedInfoColumn.Location: "",
            DatasetDetailedInfoColumn.InternalID: "",
            DatasetDetailedInfoColumn.AxisOrder: "",
            DatasetDetailedInfoColumn.Shape: "",
            DatasetDetailedInfoColumn.Range: ""
        }

        if len( self._op.DatasetGroup ) <= laneIndex \
        or len( self._op.DatasetGroup[laneIndex] ) <= self._roleIndex:
            return UninitializedDisplayData[index.column()]

        datasetSlot = self._op.DatasetGroup[laneIndex][self._roleIndex]

        # Default
        if not datasetSlot.ready():
            return UninitializedDisplayData[index.column()]

        datasetInfo = self._op.DatasetGroup[laneIndex][self._roleIndex].value
        filePathComponents = PathComponents(datasetInfo.filePath)

        ## Input meta-data fields

        # Name
        if index.column() == DatasetDetailedInfoColumn.Nickname:
            return datasetInfo.nickname

        # Location
        if index.column() == DatasetDetailedInfoColumn.Location:
            if datasetInfo.location == DatasetInfo.Location.FileSystem:
                if os.path.isabs(datasetInfo.filePath):
                    return "Absolute Link: {}".format(
                        filePathComponents.externalPath)
                else:
                    return "Relative Link: {}".format(
                        filePathComponents.externalPath)
            else:
                return "Project File"

        # Internal ID
        if index.column() == DatasetDetailedInfoColumn.InternalID:
            if datasetInfo.location == DatasetInfo.Location.FileSystem:
                return filePathComponents.internalPath
            return ""

        ## Output meta-data fields

        # Defaults
        imageSlot = self._op.ImageGroup[laneIndex][self._roleIndex]
        if not imageSlot.ready():
            return UninitializedDisplayData[index.column()]

        # Axis order
        if index.column() == DatasetDetailedInfoColumn.AxisOrder:
            original_axistags = imageSlot.meta.original_axistags
            axistags = imageSlot.meta.axistags
            if original_axistags is not None:
                return "".join(tag.key for tag in original_axistags)
            if axistags is not None:
                return "".join(imageSlot.meta.getAxisKeys())
            return ""

        # Shape
        if index.column() == DatasetDetailedInfoColumn.Shape:
            original_shape = imageSlot.meta.original_shape
            shape = imageSlot.meta.shape
            if original_shape is not None:
                return str(original_shape)
            if shape is None:
                return ""
            return str(shape)

        # Range
        if index.column() == DatasetDetailedInfoColumn.Range:
            drange = imageSlot.meta.drange
            if drange is None:
                return ""
            return str(drange)

        assert False, "Unknown column: row={}, column={}".format(
            index.row(), index.column())
def generateBatchPredictions(workflow, batchInputPaths, batchExportDir, batchOutputSuffix, exportedDatasetName, stackVolumeCacheDir):
    """
    Compute the predictions for each of the specified batch input files,
    and export them to corresponding h5 files.
    """
    originalBatchInputPaths = list(batchInputPaths)
    batchInputPaths = convertStacksToH5(batchInputPaths, stackVolumeCacheDir)

    batchInputInfos = []
    for p in batchInputPaths:
        info = DatasetInfo()
        info.location = DatasetInfo.Location.FileSystem

        # Convert all paths to absolute 
        # (otherwise they are relative to the project file, which probably isn't what the user meant)        
        comp = PathComponents(p)
        comp.externalPath = os.path.abspath(comp.externalPath)
        
        info.filePath = comp.totalPath()        
        batchInputInfos.append(info)

    # Also convert the export dir to absolute (for the same reason)
    if batchExportDir != '':
        batchExportDir = os.path.abspath( batchExportDir )

    # Configure batch input operator
    opBatchInputs = workflow.batchInputApplet.topLevelOperator
    opBatchInputs.Dataset.setValues( batchInputInfos )
    
    # Configure batch export operator
    opBatchResults = workflow.batchResultsApplet.topLevelOperator

    # By default, the output files from the batch export operator
    #  are named using the input file name.
    # If we converted any stacks to hdf5, then the user won't recognize the input file name.
    # Let's override the output file name using the *original* input file names.
    outputFileNameBases = []
    for origPath in originalBatchInputPaths:
        outputFileNameBases.append( origPath.replace('*', 'STACKED') )

    opBatchResults.OutputFileNameBase.setValues( outputFileNameBases )    
    opBatchResults.ExportDirectory.setValue(batchExportDir)
    opBatchResults.Format.setValue(ExportFormat.H5)
    opBatchResults.Suffix.setValue(batchOutputSuffix)
    opBatchResults.InternalPath.setValue(exportedDatasetName)
    
    logger.info( "Exporting data to " + opBatchResults.OutputDataPath[0].value )

    # Set up progress display handling (just logging for now)        
    currentProgress = [None]
    def handleProgress(percentComplete):
        if currentProgress[0] != percentComplete:
            currentProgress[0] = percentComplete
            logger.info("Batch job: {}% complete.".format(percentComplete))
        
    progressSignal = opBatchResults.ProgressSignal[0].value
    progressSignal.subscribe( handleProgress )

    # Make it happen!
    result = opBatchResults.ExportResult[0].value
    return result
Example #23
0
def runWorkflow(parsed_args):
    args = parsed_args

    # Use a temporary cache dir
    if args.stack_volume_cache_dir is None:
        args.stack_volume_cache_dir = tempfile.gettempdir()

    # Make sure project file exists.
    if not os.path.exists(args.project):
        raise RuntimeError("Project file '" + args.project +
                           "' does not exist.")

    # Make sure batch inputs exist.
    for p in args.batch_inputs:
        error = False
        p = PathComponents(p).externalPath
        if '*' in p:
            if len(glob.glob(p)) == 0:
                logger.error(
                    "Could not find any files for globstring: {}".format(p))
                logger.error("Check your quotes!")
                error = True
        elif not os.path.exists(p):
            logger.error("Batch input file does not exist: " + p)
            error = True
        if error:
            raise RuntimeError(
                "Could not find one or more batch inputs.  See logged errors.")

    # Instantiate 'shell'
    shell = HeadlessShell(
        functools.partial(PixelClassificationWorkflow,
                          appendBatchOperators=True))

    if args.assume_old_ilp_axes:
        # Special hack for Janelia:
        # In some old versions of 0.5, the data was stored in tyxzc order.
        # We have no way of inspecting the data to determine this, so we allow
        #  users to specify that their ilp is very old using the
        #  assume_old_ilp_axes command-line flag
        ilastik.utility.globals.ImportOptions.default_axis_order = 'tyxzc'

    # Load project (auto-import it if necessary)
    logger.info("Opening project: '" + args.project + "'")
    shell.openProjectFile(args.project)

    try:
        if not args.generate_project_predictions and len(
                args.batch_inputs) == 0:
            logger.error(
                "Command-line arguments didn't specify any classification jobs."
            )
        else:
            # Predictions for project input datasets
            if args.generate_project_predictions:
                generateProjectPredictions(shell)

            # Predictions for other datasets ('batch datasets')
            result = True
            if len(args.batch_inputs) > 0:
                result = generateBatchPredictions(
                    shell.workflow, args.batch_inputs, args.batch_export_dir,
                    args.batch_output_suffix, args.batch_output_dataset_name,
                    args.stack_volume_cache_dir)
                assert result
    finally:
        logger.info("Closing project...")
        shell.closeCurrentProject()

    logger.info("FINISHED.")