def updateWorkingDirectory(self,newpath,oldpath):
        newdir = PathComponents(newpath).externalDirectory
        olddir = PathComponents(oldpath).externalDirectory
        
        if newdir==olddir:
            return
 
        # Disconnect the working directory while we make these changes.
        # All the changes will take effect when we set the new working directory.
        self.topLevelOperator.WorkingDirectory.disconnect()
        
        for laneIndex, multislot in enumerate(self.topLevelOperator.DatasetGroup):
            for roleIndex, slot in enumerate(multislot):
                if not slot.ready():
                    # Skip if there is no dataset in this lane/role combination yet.
                    continue
                datasetInfo = slot.value
                if datasetInfo.location == DatasetInfo.Location.FileSystem:
                    
                    #construct absolute path and recreate relative to the new path
                    fp = PathComponents(datasetInfo.filePath,olddir).totalPath()
                    abspath,relpath = getPathVariants(fp,newdir)
                    
                    # Same convention as in dataSelectionGui:
                    # Relative by default, unless the file is in a totally different tree from the working directory.
                    if len(os.path.commonprefix([fp, abspath])) > 1:
                        datasetInfo.filePath = relpath
                    else:
                        datasetInfo.filePath = abspath
                    
                    slot.setValue(datasetInfo, check_changed=False)
        
        self.topLevelOperator.WorkingDirectory.setValue(newdir)
        self._projectFilePath = newdir
 def updateWorkingDirectory(self,newpath,oldpath):
     
     newpath,oldpath = PathComponents(newpath).externalDirectory,PathComponents(oldpath).externalDirectory
     
     if newpath==oldpath:
         return
     
     for slot in self.topLevelOperator.Dataset:
         datasetInfo = slot.value
         if datasetInfo.location == DatasetInfo.Location.FileSystem:
             
             #construct absolute path and recreate relative to the new path
             fp = PathComponents(datasetInfo.filePath,oldpath).totalPath()
             abspath,relpath = getPathVariants(fp,newpath)
             
             # Same convention as in dataSelectionGui:
             # Relative by default, unless the file is in a totally different tree from the working directory.
             if len(os.path.commonprefix([fp, abspath])) > 1: datasetInfo.filePath = relpath
             else:datasetInfo.filePath = abspath
             
             slot.setValue(datasetInfo)
     
     self.topLevelOperator.WorkingDirectory.setValue(newpath)
     self._projectFilePath = newpath
     
     for slot in self.topLevelOperator.Dataset:
         self.topLevelOperator.applet._gui.updateTableForSlot(slot = slot)
Beispiel #3
0
    def updateInternalPathComboBox( self, row, externalPath, internalPath ):
        with Tracer(traceLogger):
            combo = QComboBox()
            datasetNames = []
    
            # Make sure we're dealing with the absolute path (to make this simple)
            absPath, relPath = getPathVariants(externalPath, self.mainOperator.WorkingDirectory.value)
            ext = os.path.splitext(absPath)[1]
            h5Exts = ['.ilp', '.h5', '.hdf5']
            if ext in h5Exts:
                datasetNames = self.getPossibleInternalPaths(absPath)

            # Add each dataset option to the combo            
            for path in datasetNames:
                combo.addItem( path )
    
            # If the internal path we used previously is in the combo list, select it.
            prevSelection = combo.findText( internalPath )
            if prevSelection != -1:
                combo.setCurrentIndex( prevSelection )
    
            # Define response to changes and add it to the GUI.
            # Pass in the corresponding the table item so we can figure out which row this came from
            combo.currentIndexChanged.connect( bind(self.handleComboSelectionChanged, combo) )
            self.fileInfoTableWidget.setCellWidget( row, Column.InternalID, combo )
Beispiel #4
0
    def addFileNames(self, fileNames):
        """
        Add the given filenames to both the GUI table and the top-level operator inputs.
        """
        with Tracer(traceLogger):
            # Allocate additional subslots in the operator inputs.
            oldNumFiles = len(self.mainOperator.Dataset)
            self.mainOperator.Dataset.resize( oldNumFiles+len(fileNames) )
    
            # Assign values to the new inputs we just allocated.
            # The GUI will be updated by callbacks that are listening to slot changes
            for i, filePath in enumerate(fileNames):
                datasetInfo = DatasetInfo()
                cwd = self.mainOperator.WorkingDirectory.value
                absPath, relPath = getPathVariants(filePath, cwd)

                # Relative by default, unless the file is in a totally different tree from the working directory.
                if len(os.path.commonprefix([cwd, absPath])) > 1: 
                   datasetInfo.filePath = relPath
                else:
                   datasetInfo.filePath = absPath

                h5Exts = ['.ilp', '.h5', '.hdf5']
                if os.path.splitext(datasetInfo.filePath)[1] in h5Exts:
                    datasetNames = self.getPossibleInternalPaths( absPath )
                    if len(datasetNames) > 0:
                        datasetInfo.filePath += str(datasetNames[0])
                    else:
                        raise RuntimeError("HDF5 file has no image datasets")
                
                # Allow labels by default if this gui isn't being used for batch data.
                datasetInfo.allowLabels = ( self.guiMode == GuiMode.Normal )

                self.mainOperator.Dataset[i+oldNumFiles].setValue( datasetInfo )
 def updateInternalPathComboBox( self, row, externalPath, internalPath ):
     with Tracer(traceLogger):
         combo = QComboBox()
         datasetNames = []
 
         # Make sure we're dealing with the absolute path (to make this simple)
         absPath, relPath = getPathVariants(externalPath, self.mainOperator.WorkingDirectory.value)
         ext = os.path.splitext(absPath)[1]
         h5Exts = ['.ilp', '.h5', '.hdf5']
         if ext in h5Exts:
             # Open the file as a read-only so we can get a list of the internal paths
             f = h5py.File(absPath, 'r')
             
             # Define a closure to collect all of the dataset names in the file.
             def accumulateDatasetPaths(name, val):
                 if type(val) == h5py._hl.dataset.Dataset and 3 <= len(val.shape) <= 5:
                     datasetNames.append( '/' + name )
 
             # Visit every group/dataset in the file            
             f.visititems(accumulateDatasetPaths)
             f.close()
 
         # Add each dataset option to the combo            
         for path in datasetNames:
             combo.addItem( path )
 
         # If the internal path we used previously is in the combo list, select it.
         prevSelection = combo.findText( internalPath )
         if prevSelection != -1:
             combo.setCurrentIndex( prevSelection )
 
         # Define response to changes and add it to the GUI.
         # Pass in the corresponding the table item so we can figure out which row this came from
         combo.currentIndexChanged.connect( bind(self.handleComboSelectionChanged, combo) )
         self.fileInfoTableWidget.setCellWidget( row, Column.InternalID, combo )
Beispiel #6
0
 def updateFilePath(self, index):
     """
     Update the operator's filePath input to match the gui
     """
     with Tracer(traceLogger):
         oldLocationSetting = self.mainOperator.Dataset[index].value.location
         
         # Get the directory by inspecting the original operator path
         oldTotalPath = self.mainOperator.Dataset[index].value.filePath
         # Split into directory, filename, extension, and internal path
         lastDotIndex = oldTotalPath.rfind('.')
         extensionAndInternal = oldTotalPath[lastDotIndex:]
         extension = extensionAndInternal.split('/')[0]
         oldFilePath = oldTotalPath[:lastDotIndex] + extension
         
         fileNameText = str(self.fileInfoTableWidget.item(index, Column.Name).text())
         
         internalPathCombo = self.fileInfoTableWidget.cellWidget(index, Column.InternalID)
         #internalPath = str(self.fileInfoTableWidget.item(index, Column.InternalID).text())
         internalPath = str(internalPathCombo.currentText())
         
         directory = os.path.split(oldFilePath)[0]
         newFileNamePath = fileNameText
         if directory != '':
             newFileNamePath = directory + '/' + fileNameText
         
         newTotalPath = newFileNamePath
         if internalPath != '':
             if internalPath[0] != '/':
                 newTotalPath += '/'
             newTotalPath += internalPath
 
         cwd = self.mainOperator.WorkingDirectory.value
         absTotalPath, relTotalPath = getPathVariants( newTotalPath, cwd )
 
         # Check the location setting
         locationCombo = self.fileInfoTableWidget.cellWidget(index, Column.Location)
         comboIndex = locationCombo.currentIndex()
         newLocationSelection = locationCombo.itemData(comboIndex).toInt()[0] # In PyQt, toInt() returns a tuple
 
         if newLocationSelection == LocationOptions.Project:
             newLocationSetting = DatasetInfo.Location.ProjectInternal
         elif newLocationSelection == LocationOptions.AbsolutePath:
             newLocationSetting = DatasetInfo.Location.FileSystem
             newTotalPath = absTotalPath
         elif newLocationSelection == LocationOptions.RelativePath:
             newLocationSetting = DatasetInfo.Location.FileSystem
             newTotalPath = relTotalPath
         
         if newTotalPath != oldTotalPath or newLocationSetting != oldLocationSetting:
             # Be sure to copy so the slot notices the change when we setValue()
             datasetInfo = copy.copy(self.mainOperator.Dataset[index].value)
             datasetInfo.filePath = newTotalPath
             datasetInfo.location = newLocationSetting
     
             # TODO: First check to make sure this file exists!
             self.mainOperator.Dataset[index].setValue( datasetInfo )
     
             # Update the storage option combo to show the new path        
             self.updateStorageOptionComboBox(index, newFileNamePath)
Beispiel #7
0
    def addFileNames(self, fileNames):
        """
        Add the given filenames to both the GUI table and the top-level operator inputs.
        """
        with Tracer(traceLogger):
            # Allocate additional subslots in the operator inputs.
            oldNumFiles = len(self.mainOperator.Dataset)
            self.mainOperator.Dataset.resize(oldNumFiles + len(fileNames))

            # Assign values to the new inputs we just allocated.
            # The GUI will be updated by callbacks that are listening to slot changes
            for i, filePath in enumerate(fileNames):
                datasetInfo = DatasetInfo()
                cwd = self.mainOperator.WorkingDirectory.value
                absPath, relPath = getPathVariants(filePath, cwd)

                # Relative by default, unless the file is in a totally different tree from the working directory.
                if len(os.path.commonprefix([cwd, absPath])) > 1:
                    datasetInfo.filePath = relPath
                else:
                    datasetInfo.filePath = absPath

                # Allow labels by default if this gui isn't being used for batch data.
                datasetInfo.allowLabels = (self.guiMode == GuiMode.Normal)

                self.mainOperator.Dataset[i +
                                          oldNumFiles].setValue(datasetInfo)
    def _deserializeFromHdf5(self, topGroup, groupVersion, hdf5File, projectFilePath, headless):
        self._projectFilePath = projectFilePath
        self.initWithoutTopGroup(hdf5File, projectFilePath)
        
        # normally the serializer is not dirty after loading a project file
        # however, when the file was corrupted, the user has the possibility
        # to save the fixed file after loading it.
        dirty = False 
        
        infoDir = topGroup['infos']
        
        self.topLevelOperator.Dataset.resize( len(infoDir) )
        for index, (infoGroupName, infoGroup) in enumerate( sorted(infoDir.items()) ):
            datasetInfo = DatasetInfo()

            # Make a reverse-lookup of the location storage strings            
            LocationLookup = { v:k for k,v in self.LocationStrings.items() }
            datasetInfo.location = LocationLookup[ str(infoGroup['location'].value) ]
            
            # Write to the 'private' members to avoid resetting the dataset id
            datasetInfo._filePath = str(infoGroup['filePath'].value)
            datasetInfo._datasetId = str(infoGroup['datasetId'].value)

            # Deserialize the "allow labels" flag
            try:
                datasetInfo.allowLabels = infoGroup['allowLabels'].value
            except KeyError:
                pass

            # Deserialize the axisorder (if present)
            try:
                datasetInfo.axisorder = infoGroup['axisorder'].value
            except KeyError:
                pass
            
            # If the data is supposed to be in the project,
            #  check for it now.
            if datasetInfo.location == DatasetInfo.Location.ProjectInternal:
                if not datasetInfo.datasetId in topGroup['local_data'].keys():
                    raise RuntimeError("Corrupt project file.  Could not find data for " + infoGroupName)

            # If the data is supposed to exist outside the project, make sure it really does.
            if datasetInfo.location == DatasetInfo.Location.FileSystem:
                pathData = PathComponents( datasetInfo.filePath, os.path.split(projectFilePath)[0])
                filePath = pathData.externalPath
                if not os.path.exists(filePath):
                    if headless:
                        raise RuntimeError("Could not find data at " + filePath)
                    filt = "Image files (" + ' '.join('*.' + x for x in OpDataSelection.SupportedExtensions) + ')'
                    newpath = self.repairFile(filePath, filt)
                    newpath = newpath+pathData.internalPath
                    datasetInfo._filePath = getPathVariants(newpath , os.path.split(projectFilePath)[0])[0]
                    
                    dirty = True
                    
            # Give the new info to the operator
            self.topLevelOperator.Dataset[index].setValue(datasetInfo)

        self._dirty = dirty
Beispiel #9
0
    def execute(self, slot, subindex, roi, result):
        dtypeBytes = self._getDtypeBytes()
        totalBytes = dtypeBytes * numpy.prod(self.Input.meta.shape)
        totalMB = totalBytes / (1000*1000)
        logger.info( "Clusterizing computation of {} MB dataset, outputting according to {}".format(totalMB, self.OutputDatasetDescription.value) )

        configFilePath = self.ConfigFilePath.value
        self._config = parseClusterConfigFile( configFilePath )

        self._validateConfig()

        # Create the destination file if necessary
        blockwiseFileset, taskInfos = self._prepareDestination()

        try:
            # Figure out which work doesn't need to be recomputed (if any)
            unneeded_rois = []
            for roi in taskInfos.keys():
                if blockwiseFileset.getBlockStatus(roi[0]) == BlockwiseFileset.BLOCK_AVAILABLE \
                or blockwiseFileset.isBlockLocked(roi[0]): # We don't attempt to process currently locked blocks.
                    unneeded_rois.append( roi )
    
            # Remove any tasks that we don't need to compute (they were finished in a previous run)
            for roi in unneeded_rois:
                logger.info( "No need to run task: {} for roi: {}".format( taskInfos[roi].taskName, roi ) )
                del taskInfos[roi]

            absWorkDir, relWorkDir = getPathVariants(self._config.server_working_directory, os.path.split( configFilePath )[0] )
            if self._config.task_launch_server == "localhost":
                def localCommand( cmd ):
                    cwd = os.getcwd()
                    os.chdir( absWorkDir )
                    subprocess.call( cmd, shell=True )
                    os.chdir( cwd )
                launchFunc = localCommand
            else:
                # We use fabric for executing remote tasks
                # Import it here because it isn't required that the nodes can use it.
                import fabric.api as fab
                @fab.hosts( self._config.task_launch_server )
                def remoteCommand( cmd ):
                    with fab.cd( absWorkDir ):
                        fab.run( cmd )
                launchFunc = functools.partial( fab.execute, remoteCommand )
    
            # Spawn each task
            for taskInfo in taskInfos.values():
                logger.info("Launching node task: " + taskInfo.command )
                launchFunc( taskInfo.command )
        
            # Return immediately.  We do not attempt to monitor the task progress.
            result[0] = True
            return result
        finally:
            blockwiseFileset.close()
Beispiel #10
0
    def addFileNames(self, fileNames):
        """
        Add the given filenames to both the GUI table and the top-level operator inputs.
        """
        with Tracer(traceLogger):
            infos = []

            oldNumFiles = len(self.topLevelOperator.Dataset)
            # HACK: If the filePath isn't valid, replace it
            # This is to work around the scenario where two independent data selection applets are coupled, causing mutual resizes.
            # This will be fixed when a multi-file data selection applet gui replaces this gui.            
            for i in reversed( range( oldNumFiles ) ):
                if not self.topLevelOperator.Dataset[i].ready():
                    oldNumFiles -= 1
                else:
                    break
            
    
            # Assign values to the new inputs we just allocated.
            # The GUI will be updated by callbacks that are listening to slot changes
            for i, filePath in enumerate(fileNames):
                datasetInfo = DatasetInfo()
                cwd = self.topLevelOperator.WorkingDirectory.value
                
                if not areOnSameDrive(filePath,cwd):
                    QMessageBox.critical(self, "Drive Error","Data must be on same drive as working directory.")
                    return
                    
                absPath, relPath = getPathVariants(filePath, cwd)
                
                # Relative by default, unless the file is in a totally different tree from the working directory.
                if len(os.path.commonprefix([cwd, absPath])) > 1:
                    datasetInfo.filePath = relPath
                else:
                    datasetInfo.filePath = absPath

                h5Exts = ['.ilp', '.h5', '.hdf5']
                if os.path.splitext(datasetInfo.filePath)[1] in h5Exts:
                    datasetNames = self.getPossibleInternalPaths( absPath )
                    if len(datasetNames) > 0:
                        datasetInfo.filePath += str(datasetNames[0])
                    else:
                        raise RuntimeError("HDF5 file %s has no image datasets" % datasetInfo.filePath)

                # Allow labels by default if this gui isn't being used for batch data.
                datasetInfo.allowLabels = ( self.guiMode == GuiMode.Normal )
                infos.append(datasetInfo)

            #if no exception was thrown, set up the operator now
            self.topLevelOperator.Dataset.resize( oldNumFiles+len(fileNames) )
            for i in range(len(infos)):
                self.topLevelOperator.Dataset[i+oldNumFiles].setValue( infos[i] )
Beispiel #11
0
    def _prepareTaskInfos(self, roiList):
        # Divide up the workload into large pieces
        logger.info("Dividing into {} node jobs.".format(len(roiList)))

        taskInfos = collections.OrderedDict()
        for roiIndex, roi in enumerate(roiList):
            roi = (tuple(roi[0]), tuple(roi[1]))
            taskInfo = OpClusterize.TaskInfo()
            taskInfo.subregion = SubRegion(None, start=roi[0], stop=roi[1])

            taskName = "J{:02}".format(roiIndex)

            commandArgs = []
            commandArgs.append("--option_config_file=" +
                               self.ConfigFilePath.value)
            commandArgs.append("--project=" + self.ProjectFilePath.value)
            commandArgs.append("--_node_work_=\"" +
                               Roi.dumps(taskInfo.subregion) + "\"")
            commandArgs.append("--process_name={}".format(taskName))
            commandArgs.append("--output_description_file={}".format(
                self.OutputDatasetDescription.value))
            for slot in self.SecondaryOutputDescriptions:
                commandArgs.append(
                    "--secondary_output_description_file={}".format(
                        slot.value))

            # Check the command format string: We need to know where to put our args...
            commandFormat = self._config.command_format
            assert commandFormat.find("{task_args}") != -1

            # Output log directory might be a relative path (relative to config file)
            absLogDir, relLogDir = getPathVariants(
                self._config.output_log_directory,
                os.path.split(self.ConfigFilePath.value)[0])
            taskOutputLogFilename = taskName + ".log"
            taskOutputLogPath = os.path.join(absLogDir, taskOutputLogFilename)

            allArgs = " " + " ".join(commandArgs) + " "
            taskInfo.taskName = taskName
            taskInfo.command = commandFormat.format(
                task_args=allArgs,
                task_name=taskName,
                task_output_file=taskOutputLogPath)
            taskInfos[roi] = taskInfo

        return taskInfos
Beispiel #12
0
    def updateStorageOptionComboBox(self, row, filePath):
        """
        Create and add the combobox for storage location options
        """
        with Tracer(traceLogger):
            # Determine the relative path to this file
            absPath, relPath = getPathVariants(
                filePath, self.mainOperator.WorkingDirectory.value)
            # Add a prefixes to make the options clear
            absPath = "Absolute Link: " + absPath
            relPath = "Relative Link: <project directory>/" + relPath

            combo = QComboBox()
            options = {}  # combo data -> combo text
            options[LocationOptions.AbsolutePath] = absPath
            options[LocationOptions.RelativePath] = relPath

            options[LocationOptions.Project] = "Store in Project File"

            for option, text in sorted(options.items()):
                # Add to the combo, storing the option as the item data
                combo.addItem(text, option)

            # Select the combo index that matches the current setting
            location = self.mainOperator.Dataset[row].value.location

            if location == DatasetInfo.Location.ProjectInternal:
                comboData = LocationOptions.Project
            elif location == DatasetInfo.Location.FileSystem:
                # Determine if the path is relative or absolute
                if os.path.isabs(
                        self.mainOperator.Dataset[row].value.filePath[0]):
                    comboData = LocationOptions.AbsolutePath
                else:
                    comboData = LocationOptions.RelativePath

            comboIndex = combo.findData(QVariant(comboData))
            combo.setCurrentIndex(comboIndex)

            combo.currentIndexChanged.connect(
                partial(self.handleComboSelectionChanged, combo))
            self.fileInfoTableWidget.setCellWidget(row, Column.Location, combo)
Beispiel #13
0
    def updateStorageOptionComboBox(self, row, filePath):
        """
        Create and add the combobox for storage location options
        """
        assert threading.current_thread().name == "MainThread"
        with Tracer(traceLogger):
            # Determine the relative path to this file
            absPath, relPath = getPathVariants(filePath, self.topLevelOperator.WorkingDirectory.value)
            # Add a prefixes to make the options clear
            absPath = "Absolute Link: " + absPath
            relPath = "Relative Link: <project directory>/" + relPath

            combo = QComboBox()
            options = {} # combo data -> combo text
            options[ LocationOptions.AbsolutePath ] = absPath
            options[ LocationOptions.RelativePath ] = relPath

            options[ LocationOptions.Project ] = "Store in Project File"

            for option, text in sorted(options.items()):
                # Add to the combo, storing the option as the item data
                combo.addItem(text, option)

            # Select the combo index that matches the current setting
            location = self.topLevelOperator.Dataset[row].value.location

            if location == DatasetInfo.Location.ProjectInternal:
                comboData = LocationOptions.Project
            elif location == DatasetInfo.Location.FileSystem:
                # Determine if the path is relative or absolute
                if os.path.isabs(self.topLevelOperator.Dataset[row].value.filePath[0]):
                    comboData = LocationOptions.AbsolutePath
                else:
                    comboData = LocationOptions.RelativePath

            comboIndex = combo.findData( QVariant(comboData) )
            combo.setCurrentIndex( comboIndex )

            combo.currentIndexChanged.connect( partial(self.handleComboSelectionChanged, combo) )
            self.fileInfoTableWidget.setCellWidget( row, Column.Location, combo )
Beispiel #14
0
    def updateInternalPathComboBox(self, row, externalPath, internalPath):
        with Tracer(traceLogger):
            combo = QComboBox()
            datasetNames = []

            # Make sure we're dealing with the absolute path (to make this simple)
            absPath, relPath = getPathVariants(
                externalPath, self.mainOperator.WorkingDirectory.value)
            ext = os.path.splitext(absPath)[1]
            h5Exts = ['.ilp', '.h5', '.hdf5']
            if ext in h5Exts:
                # Open the file as a read-only so we can get a list of the internal paths
                f = h5py.File(absPath, 'r')

                # Define a closure to collect all of the dataset names in the file.
                def accumulateDatasetPaths(name, val):
                    if type(val) == h5py._hl.dataset.Dataset and 3 <= len(
                            val.shape) <= 5:
                        datasetNames.append('/' + name)

                # Visit every group/dataset in the file
                f.visititems(accumulateDatasetPaths)
                f.close()

            # Add each dataset option to the combo
            for path in datasetNames:
                combo.addItem(path)

            # If the internal path we used previously is in the combo list, select it.
            prevSelection = combo.findText(internalPath)
            if prevSelection != -1:
                combo.setCurrentIndex(prevSelection)

            # Define response to changes and add it to the GUI.
            # Pass in the corresponding the table item so we can figure out which row this came from
            combo.currentIndexChanged.connect(
                bind(self.handleComboSelectionChanged, combo))
            self.fileInfoTableWidget.setCellWidget(row, Column.InternalID,
                                                   combo)
    def updateWorkingDirectory(self, newpath, oldpath):
        newdir = PathComponents(newpath).externalDirectory
        olddir = PathComponents(oldpath).externalDirectory

        if newdir == olddir:
            return

        # Disconnect the working directory while we make these changes.
        # All the changes will take effect when we set the new working directory.
        self.topLevelOperator.WorkingDirectory.disconnect()

        for laneIndex, multislot in enumerate(
                self.topLevelOperator.DatasetGroup):
            for roleIndex, slot in enumerate(multislot):
                if not slot.ready():
                    # Skip if there is no dataset in this lane/role combination yet.
                    continue
                datasetInfo = slot.value
                if datasetInfo.location == DatasetInfo.Location.FileSystem:

                    #construct absolute path and recreate relative to the new path
                    fp = PathComponents(datasetInfo.filePath,
                                        olddir).totalPath()
                    abspath, relpath = getPathVariants(fp, newdir)

                    # Same convention as in dataSelectionGui:
                    # Relative by default, unless the file is in a totally different tree from the working directory.
                    if len(os.path.commonprefix([fp, abspath])) > 1:
                        datasetInfo.filePath = relpath
                    else:
                        datasetInfo.filePath = abspath

                    slot.setValue(datasetInfo, check_changed=False)

        self.topLevelOperator.WorkingDirectory.setValue(newdir)
        self._projectFilePath = newdir
Beispiel #16
0
    def _prepareTaskInfos(self, roiList):
        # Divide up the workload into large pieces
        logger.info( "Dividing into {} node jobs.".format( len(roiList) ) )

        taskInfos = collections.OrderedDict()
        for roiIndex, roi in enumerate(roiList):
            roi = ( tuple(roi[0]), tuple(roi[1]) )
            taskInfo = OpClusterize.TaskInfo()
            taskInfo.subregion = SubRegion( None, start=roi[0], stop=roi[1] )
            
            taskName = "J{:02}".format(roiIndex)

            commandArgs = []
            commandArgs.append( "--option_config_file=" + self.ConfigFilePath.value )
            commandArgs.append( "--project=" + self.ProjectFilePath.value )
            commandArgs.append( "--_node_work_=\"" + Roi.dumps( taskInfo.subregion ) + "\"" )
            commandArgs.append( "--process_name={}".format(taskName)  )
            commandArgs.append( "--output_description_file={}".format( self.OutputDatasetDescription.value )  )
            for slot in self.SecondaryOutputDescriptions:
                commandArgs.append( "--secondary_output_description_file={}".format( slot.value )  )

            # Check the command format string: We need to know where to put our args...
            commandFormat = self._config.command_format
            assert commandFormat.find("{task_args}") != -1

            # Output log directory might be a relative path (relative to config file)
            absLogDir, relLogDir = getPathVariants(self._config.output_log_directory, os.path.split( self.ConfigFilePath.value )[0] )
            taskOutputLogFilename = taskName + ".log"
            taskOutputLogPath = os.path.join( absLogDir, taskOutputLogFilename )
            
            allArgs = " " + " ".join(commandArgs) + " "
            taskInfo.taskName = taskName
            taskInfo.command = commandFormat.format( task_args=allArgs, task_name=taskName, task_output_file=taskOutputLogPath )
            taskInfos[roi] = taskInfo

        return taskInfos
def runWorkflow(parsed_args):
    args = parsed_args

    # Read the config file
    configFilePath = args.option_config_file
    config = parseClusterConfigFile(configFilePath)

    # If we've got a process name, re-initialize the logger from scratch
    task_name = "node"
    if args.process_name is not None:
        task_name = args.process_name
        ilastik.ilastik_logging.default_config.init(args.process_name + ' ')

    rootLogHandler = None
    if args._node_work_ is None:
        # This is the master process.
        # Tee the log to a file for future reference.

        # Output log directory might be a relative path (relative to config file)
        absLogDir, relLogDir = getPathVariants(
            config.output_log_directory,
            os.path.split(configFilePath)[0])
        if not os.path.exists(absLogDir):
            os.mkdir(absLogDir)

        # Copy the config we're using to the output directory
        shutil.copy(configFilePath, absLogDir)

        logFile = os.path.join(absLogDir, "MASTER.log")
        logFileFormatter = logging.Formatter(
            "%(levelname)s %(name)s: %(message)s")
        rootLogHandler = logging.FileHandler(logFile, 'a')
        rootLogHandler.setFormatter(logFileFormatter)
        rootLogger = logging.getLogger()
        rootLogger.addHandler(rootLogHandler)
        logger.info("Launched with sys.argv: {}".format(sys.argv))

    # Update the monkey_patch settings
    ilastik.utility.monkey_patches.apply_setting_dict(config.__dict__)

    # If we're running a node job, set the threadpool size if the user specified one.
    # Note that the main thread does not count toward the threadpool total.
    if args._node_work_ is not None and config.task_threadpool_size is not None:
        lazyflow.request.Request.reset_thread_pool(
            num_workers=config.task_threadpool_size)

    # Make sure project file exists.
    if not os.path.exists(args.project):
        raise RuntimeError("Project file '" + args.project +
                           "' does not exist.")

    # Instantiate 'shell'
    shell = HeadlessShell(
        functools.partial(Workflow.getSubclass(config.workflow_type)))

    # Load project (auto-import it if necessary)
    logger.info("Opening project: '" + args.project + "'")
    shell.openProjectPath(args.project)

    workflow = shell.projectManager.workflow

    # Attach cluster operators
    resultSlot = None
    finalOutputSlot = workflow.getHeadlessOutputSlot(config.output_slot_id)
    assert finalOutputSlot is not None

    secondaryOutputSlots = workflow.getSecondaryHeadlessOutputSlots(
        config.output_slot_id)
    secondaryOutputDescriptions = args.secondary_output_description_file  # This is a list (see 'action' above)
    if len(secondaryOutputDescriptions) != len(secondaryOutputSlots):
        raise RuntimeError(
            "This workflow produces exactly {} SECONDARY outputs.  You provided {}."
            .format(len(secondaryOutputSlots),
                    len(secondaryOutputDescriptions)))

    clusterOperator = None
    try:
        if args._node_work_ is not None:
            # We're doing node work
            opClusterTaskWorker = OperatorWrapper(
                OpTaskWorker, parent=finalOutputSlot.getRealOperator().parent)

            # FIXME: Image index is hard-coded as 0.  We assume we are working with only one (big) dataset in cluster mode.
            opClusterTaskWorker.Input.connect(finalOutputSlot)
            opClusterTaskWorker.RoiString[0].setValue(args._node_work_)
            opClusterTaskWorker.TaskName.setValue(task_name)
            opClusterTaskWorker.ConfigFilePath.setValue(
                args.option_config_file)

            # Configure optional slots first for efficiency (avoid multiple calls to setupOutputs)
            opClusterTaskWorker.SecondaryInputs[0].resize(
                len(secondaryOutputSlots))
            opClusterTaskWorker.SecondaryOutputDescriptions[0].resize(
                len(secondaryOutputSlots))
            for i in range(len(secondaryOutputSlots)):
                opClusterTaskWorker.SecondaryInputs[0][i].connect(
                    secondaryOutputSlots[i][0])
                opClusterTaskWorker.SecondaryOutputDescriptions[0][i].setValue(
                    secondaryOutputDescriptions[i])

            opClusterTaskWorker.OutputFilesetDescription.setValue(
                args.output_description_file)

            # If we have a way to report task progress (e.g. by updating the job name),
            #  then subscribe to progress signals
            if config.task_progress_update_command is not None:

                def report_progress(progress):
                    cmd = config.task_progress_update_command.format(
                        progress=int(progress))

                    def shell_call(shell_cmd):
                        logger.debug("Executing progress command: " + cmd)
                        subprocess.call(shell_cmd, shell=True)

                    background_tasks.put(functools.partial(shell_call, cmd))

                opClusterTaskWorker.innerOperators[0].progressSignal.subscribe(
                    report_progress)

            resultSlot = opClusterTaskWorker.ReturnCode
            clusterOperator = opClusterTaskWorker
        else:
            # We're the master
            opClusterizeMaster = OperatorWrapper(
                OpClusterize, parent=finalOutputSlot.getRealOperator().parent)

            opClusterizeMaster.Input.connect(finalOutputSlot)
            opClusterizeMaster.ProjectFilePath.setValue(args.project)
            opClusterizeMaster.OutputDatasetDescription.setValue(
                args.output_description_file)

            # Configure optional slots first for efficiency (avoid multiple calls to setupOutputs)
            opClusterizeMaster.SecondaryInputs[0].resize(
                len(secondaryOutputSlots))
            opClusterizeMaster.SecondaryOutputDescriptions[0].resize(
                len(secondaryOutputSlots))
            for i in range(len(secondaryOutputSlots)):
                opClusterizeMaster.SecondaryInputs[0][i].connect(
                    secondaryOutputSlots[i][0])
                opClusterizeMaster.SecondaryOutputDescriptions[0][i].setValue(
                    secondaryOutputDescriptions[i])

            opClusterizeMaster.ConfigFilePath.setValue(args.option_config_file)

            resultSlot = opClusterizeMaster.ReturnCode
            clusterOperator = opClusterizeMaster

        # Get the result
        logger.info("Starting task")
        result = resultSlot[
            0].value  # FIXME: The image index is hard-coded here.
    finally:
        logger.info("Cleaning up")
        global stop_background_tasks
        stop_background_tasks = True

        try:
            if clusterOperator is not None:
                clusterOperator.cleanUp()
        except:
            logger.error("Errors during cleanup.")

        try:
            logger.info("Closing project...")
            shell.closeCurrentProject()
        except:
            logger.error("Errors while closing project.")

    logger.info("FINISHED with result {}".format(result))
    if not result:
        logger.error("FAILED TO COMPLETE!")

    if rootLogHandler is not None:
        rootLogHandler.close()
Beispiel #18
0
    def addFileNames(self, fileNames, roleIndex, startingLane=None):
        """
        Add the given filenames to both the GUI table and the top-level operator inputs.
        If startingLane is None, the filenames will be *appended* to the role's list of files.
        """
        infos = []

        if startingLane is None:
            startingLane = self._findFirstEmptyLane(roleIndex)
            endingLane = startingLane + len(fileNames) - 1
        else:
            assert startingLane < len(self.topLevelOperator.DatasetGroup)
            endingLane = startingLane + len(fileNames) - 1

        if self._max_lanes and endingLane >= self._max_lanes:
            msg = "You may not add more than {} file(s) to this workflow.  Please try again.".format(
                self._max_lanes)
            QMessageBox.critical(self, "Too many files", msg)
            return

        # Assign values to the new inputs we just allocated.
        # The GUI will be updated by callbacks that are listening to slot changes
        for i, filePath in enumerate(fileNames):
            datasetInfo = DatasetInfo()
            cwd = self.topLevelOperator.WorkingDirectory.value

            if not areOnSameDrive(filePath, cwd):
                QMessageBox.critical(
                    self, "Drive Error",
                    "Data must be on same drive as working directory.")
                return

            absPath, relPath = getPathVariants(filePath, cwd)

            # Relative by default, unless the file is in a totally different tree from the working directory.
            if len(os.path.commonprefix([cwd, absPath])) > 1:
                datasetInfo.filePath = relPath
            else:
                datasetInfo.filePath = absPath

            datasetInfo.nickname = PathComponents(absPath).filenameBase

            h5Exts = ['.ilp', '.h5', '.hdf5']
            if os.path.splitext(datasetInfo.filePath)[1] in h5Exts:
                datasetNames = self.getPossibleInternalPaths(absPath)
                if len(datasetNames) > 0:
                    datasetInfo.filePath += str(datasetNames[0])
                else:
                    raise RuntimeError("HDF5 file %s has no image datasets" %
                                       datasetInfo.filePath)

            # Allow labels by default if this gui isn't being used for batch data.
            datasetInfo.allowLabels = (self.guiMode == GuiMode.Normal)
            infos.append(datasetInfo)

        # if no exception was thrown, set up the operator now
        opTop = self.topLevelOperator
        originalSize = len(opTop.DatasetGroup)

        if len(opTop.DatasetGroup) < endingLane + 1:
            opTop.DatasetGroup.resize(endingLane + 1)
        for laneIndex, info in zip(range(startingLane, endingLane + 1), infos):
            try:
                self.topLevelOperator.DatasetGroup[laneIndex][
                    roleIndex].setValue(info)
            except DatasetConstraintError as ex:
                return_val = [False]
                # Give the user a chance to fix the problem
                self.handleDatasetConstraintError(info, info.filePath, ex,
                                                  roleIndex, laneIndex,
                                                  return_val)
                if not return_val[0]:
                    # Not successfully repaired.  Roll back the changes and give up.
                    opTop.DatasetGroup.resize(originalSize)
                    break
            except OpDataSelection.InvalidDimensionalityError as ex:
                opTop.DatasetGroup.resize(originalSize)
                QMessageBox.critical(self,
                                     "Dataset has different dimensionality",
                                     ex.message)
                break
            except:
                QMessageBox.critical(
                    self, "Dataset Load Error",
                    "Wasn't able to load your dataset into the workflow.  See console for details."
                )
                opTop.DatasetGroup.resize(originalSize)
                raise
        self.updateInternalPathVisiblity()
    def _readDatasetInfo(self, infoGroup, localDataGroup, projectFilePath,
                         headless):
        # Unready datasets are represented with an empty group.
        if len(infoGroup) == 0:
            return None, False
        datasetInfo = DatasetInfo()

        # Make a reverse-lookup of the location storage strings
        LocationLookup = {v: k for k, v in self.LocationStrings.items()}
        datasetInfo.location = LocationLookup[str(infoGroup['location'].value)]

        # Write to the 'private' members to avoid resetting the dataset id
        datasetInfo._filePath = str(infoGroup['filePath'].value)
        datasetInfo._datasetId = str(infoGroup['datasetId'].value)

        try:
            datasetInfo.allowLabels = infoGroup['allowLabels'].value
        except KeyError:
            pass

        try:
            datasetInfo.drange = tuple(infoGroup['drange'].value)
        except KeyError:
            pass

        try:
            datasetInfo.nickname = str(infoGroup['nickname'].value)
        except KeyError:
            datasetInfo.nickname = PathComponents(
                datasetInfo.filePath).filenameBase

        try:
            tags = vigra.AxisTags.fromJSON(infoGroup['axistags'].value)
            datasetInfo.axistags = tags
        except KeyError:
            # Old projects just have an 'axisorder' field instead of full axistags
            try:
                axisorder = infoGroup['axisorder'].value
                datasetInfo.axistags = vigra.defaultAxistags(axisorder)
            except KeyError:
                pass

        # If the data is supposed to be in the project,
        #  check for it now.
        if datasetInfo.location == DatasetInfo.Location.ProjectInternal:
            if not datasetInfo.datasetId in localDataGroup.keys():
                raise RuntimeError(
                    "Corrupt project file.  Could not find data for " +
                    infoGroup.name)

        dirty = False
        # If the data is supposed to exist outside the project, make sure it really does.
        if datasetInfo.location == DatasetInfo.Location.FileSystem:
            pathData = PathComponents(datasetInfo.filePath,
                                      os.path.split(projectFilePath)[0])
            filePath = pathData.externalPath
            if not os.path.exists(filePath):
                if headless:
                    raise RuntimeError("Could not find data at " + filePath)
                filt = "Image files (" + ' '.join(
                    '*.' + x
                    for x in OpDataSelection.SupportedExtensions) + ')'
                newpath = self.repairFile(filePath, filt)
                if pathData.internalPath is not None:
                    newpath += pathData.internalPath
                datasetInfo._filePath = getPathVariants(
                    newpath,
                    os.path.split(projectFilePath)[0])[0]
                dirty = True

        return datasetInfo, dirty
    def _readDatasetInfo(self, infoGroup, localDataGroup, projectFilePath, headless):
        # Unready datasets are represented with an empty group.
        if len( infoGroup ) == 0:
            return None, False
        datasetInfo = DatasetInfo()

        # Make a reverse-lookup of the location storage strings
        LocationLookup = { v:k for k,v in self.LocationStrings.items() }
        datasetInfo.location = LocationLookup[ str(infoGroup['location'].value) ]
        
        # Write to the 'private' members to avoid resetting the dataset id
        datasetInfo._filePath = str(infoGroup['filePath'].value)
        datasetInfo._datasetId = str(infoGroup['datasetId'].value)

        try:
            datasetInfo.allowLabels = infoGroup['allowLabels'].value
        except KeyError:
            pass
        
        try:
            datasetInfo.drange = tuple( infoGroup['drange'].value )
        except KeyError:
            pass
        
        try:
            datasetInfo.nickname = str( infoGroup['nickname'].value )
        except KeyError:
            datasetInfo.nickname = PathComponents(datasetInfo.filePath).filenameBase
        
        try:
            tags = vigra.AxisTags.fromJSON( infoGroup['axistags'].value )
            datasetInfo.axistags = tags
        except KeyError:
            # Old projects just have an 'axisorder' field instead of full axistags
            try:
                axisorder = infoGroup['axisorder'].value
                datasetInfo.axistags = vigra.defaultAxistags(axisorder)
            except KeyError:
                pass
        
        # If the data is supposed to be in the project,
        #  check for it now.
        if datasetInfo.location == DatasetInfo.Location.ProjectInternal:
            if not datasetInfo.datasetId in localDataGroup.keys():
                raise RuntimeError("Corrupt project file.  Could not find data for " + infoGroup.name)

        dirty = False
        # If the data is supposed to exist outside the project, make sure it really does.
        if datasetInfo.location == DatasetInfo.Location.FileSystem:
            pathData = PathComponents( datasetInfo.filePath, os.path.split(projectFilePath)[0])
            filePath = pathData.externalPath
            if not os.path.exists(filePath):
                if headless:
                    raise RuntimeError("Could not find data at " + filePath)
                filt = "Image files (" + ' '.join('*.' + x for x in OpDataSelection.SupportedExtensions) + ')'
                newpath = self.repairFile(filePath, filt)
                if pathData.internalPath is not None:
                    newpath += pathData.internalPath
                datasetInfo._filePath = getPathVariants(newpath , os.path.split(projectFilePath)[0])[0]
                dirty = True
        
        return datasetInfo, dirty
Beispiel #21
0
    def addFileNames(self, fileNames, roleIndex, startingLane=None):
        """
        Add the given filenames to both the GUI table and the top-level operator inputs.
        If startingLane is None, the filenames will be *appended* to the role's list of files.
        """
        infos = []

        if startingLane is None:        
            startingLane = self._findFirstEmptyLane(roleIndex)
            endingLane = startingLane+len(fileNames)-1
        else:
            assert startingLane < len(self.topLevelOperator.DatasetGroup)
            endingLane = startingLane+len(fileNames)-1

        # Assign values to the new inputs we just allocated.
        # The GUI will be updated by callbacks that are listening to slot changes
        for i, filePath in enumerate(fileNames):
            datasetInfo = DatasetInfo()
            cwd = self.topLevelOperator.WorkingDirectory.value
            
            if not areOnSameDrive(filePath,cwd):
                QMessageBox.critical(self, "Drive Error","Data must be on same drive as working directory.")
                return
                
            absPath, relPath = getPathVariants(filePath, cwd)
            
            # Relative by default, unless the file is in a totally different tree from the working directory.
            if len(os.path.commonprefix([cwd, absPath])) > 1:
                datasetInfo.filePath = relPath
            else:
                datasetInfo.filePath = absPath
                
            datasetInfo.nickname = PathComponents(absPath).filenameBase

            h5Exts = ['.ilp', '.h5', '.hdf5']
            if os.path.splitext(datasetInfo.filePath)[1] in h5Exts:
                datasetNames = self.getPossibleInternalPaths( absPath )
                if len(datasetNames) > 0:
                    datasetInfo.filePath += str(datasetNames[0])
                else:
                    raise RuntimeError("HDF5 file %s has no image datasets" % datasetInfo.filePath)

            # Allow labels by default if this gui isn't being used for batch data.
            datasetInfo.allowLabels = ( self.guiMode == GuiMode.Normal )
            infos.append(datasetInfo)

        # if no exception was thrown, set up the operator now
        opTop = self.topLevelOperator
        originalSize = len(opTop.DatasetGroup)
            
        if len( opTop.DatasetGroup ) < endingLane+1:
            opTop.DatasetGroup.resize( endingLane+1 )
        for laneIndex, info in zip(range(startingLane, endingLane+1), infos):
            try:
                self.topLevelOperator.DatasetGroup[laneIndex][roleIndex].setValue( info )
            except DatasetConstraintError as ex:
                # Give the user a chance to fix the problem
                if not self.handleDatasetConstraintError(info, info.filePath, ex, roleIndex, laneIndex):
                    opTop.DatasetGroup.resize( originalSize )
                    break
            except:
                QMessageBox.critical( self, "Dataset Load Error", "Wasn't able to load your dataset into the workflow.  See console for details." )
                opTop.DatasetGroup.resize( originalSize )
                raise
        self.updateInternalPathVisiblity()
Beispiel #22
0
    def execute(self, slot, subindex, roi, result):
        dtypeBytes = self._getDtypeBytes()
        totalBytes = dtypeBytes * numpy.prod(self.Input.meta.shape)
        totalMB = totalBytes / (1000 * 1000)
        logger.info(
            "Clusterizing computation of {} MB dataset, outputting according to {}"
            .format(totalMB, self.OutputDatasetDescription.value))

        configFilePath = self.ConfigFilePath.value
        self._config = parseClusterConfigFile(configFilePath)

        self._validateConfig()

        # Create the destination file if necessary
        blockwiseFileset, taskInfos = self._prepareDestination()

        try:
            # Figure out which work doesn't need to be recomputed (if any)
            unneeded_rois = []
            for roi in taskInfos.keys():
                if blockwiseFileset.getBlockStatus(roi[0]) == BlockwiseFileset.BLOCK_AVAILABLE \
                or blockwiseFileset.isBlockLocked(roi[0]): # We don't attempt to process currently locked blocks.
                    unneeded_rois.append(roi)

            # Remove any tasks that we don't need to compute (they were finished in a previous run)
            for roi in unneeded_rois:
                logger.info("No need to run task: {} for roi: {}".format(
                    taskInfos[roi].taskName, roi))
                del taskInfos[roi]

            absWorkDir, relWorkDir = getPathVariants(
                self._config.server_working_directory,
                os.path.split(configFilePath)[0])
            if self._config.task_launch_server == "localhost":

                def localCommand(cmd):
                    cwd = os.getcwd()
                    os.chdir(absWorkDir)
                    subprocess.call(cmd, shell=True)
                    os.chdir(cwd)

                launchFunc = localCommand
            else:
                # We use fabric for executing remote tasks
                # Import it here because it isn't required that the nodes can use it.
                import fabric.api as fab

                @fab.hosts(self._config.task_launch_server)
                def remoteCommand(cmd):
                    with fab.cd(absWorkDir):
                        fab.run(cmd)

                launchFunc = functools.partial(fab.execute, remoteCommand)

            # Spawn each task
            for taskInfo in taskInfos.values():
                logger.info("Launching node task: " + taskInfo.command)
                launchFunc(taskInfo.command)

            # Return immediately.  We do not attempt to monitor the task progress.
            result[0] = True
            return result
        finally:
            blockwiseFileset.close()
Beispiel #23
0
    def updateFilePath(self, index):
        """
        Update the operator's filePath input to match the gui
        """
        with Tracer(traceLogger):
            oldLocationSetting = self.mainOperator.Dataset[
                index].value.location

            # Get the directory by inspecting the original operator path
            oldTotalPath = self.mainOperator.Dataset[index].value.filePath
            # Split into directory, filename, extension, and internal path
            lastDotIndex = oldTotalPath.rfind('.')
            extensionAndInternal = oldTotalPath[lastDotIndex:]
            extension = extensionAndInternal.split('/')[0]
            oldFilePath = oldTotalPath[:lastDotIndex] + extension

            fileNameText = str(
                self.fileInfoTableWidget.item(index, Column.Name).text())

            internalPathCombo = self.fileInfoTableWidget.cellWidget(
                index, Column.InternalID)
            #internalPath = str(self.fileInfoTableWidget.item(index, Column.InternalID).text())
            internalPath = str(internalPathCombo.currentText())

            directory = os.path.split(oldFilePath)[0]
            newFileNamePath = fileNameText
            if directory != '':
                newFileNamePath = directory + '/' + fileNameText

            newTotalPath = newFileNamePath
            if internalPath != '':
                if internalPath[0] != '/':
                    newTotalPath += '/'
                newTotalPath += internalPath

            cwd = self.mainOperator.WorkingDirectory.value
            absTotalPath, relTotalPath = getPathVariants(newTotalPath, cwd)

            # Check the location setting
            locationCombo = self.fileInfoTableWidget.cellWidget(
                index, Column.Location)
            comboIndex = locationCombo.currentIndex()
            newLocationSelection = locationCombo.itemData(comboIndex).toInt()[
                0]  # In PyQt, toInt() returns a tuple

            if newLocationSelection == LocationOptions.Project:
                newLocationSetting = DatasetInfo.Location.ProjectInternal
            elif newLocationSelection == LocationOptions.AbsolutePath:
                newLocationSetting = DatasetInfo.Location.FileSystem
                newTotalPath = absTotalPath
            elif newLocationSelection == LocationOptions.RelativePath:
                newLocationSetting = DatasetInfo.Location.FileSystem
                newTotalPath = relTotalPath

            if newTotalPath != oldTotalPath or newLocationSetting != oldLocationSetting:
                # Be sure to copy so the slot notices the change when we setValue()
                datasetInfo = copy.copy(self.mainOperator.Dataset[index].value)
                datasetInfo.filePath = newTotalPath
                datasetInfo.location = newLocationSetting

                # TODO: First check to make sure this file exists!
                self.mainOperator.Dataset[index].setValue(datasetInfo)

                # Update the storage option combo to show the new path
                self.updateStorageOptionComboBox(index, newFileNamePath)
def runWorkflow(parsed_args):
    args = parsed_args

    # Read the config file
    configFilePath = args.option_config_file
    config = parseClusterConfigFile( configFilePath )

    # If we've got a process name, re-initialize the logger from scratch
    task_name = "node"
    if args.process_name is not None:
        task_name = args.process_name
        ilastik.ilastik_logging.default_config.init(args.process_name + ' ')

    rootLogHandler = None
    if args._node_work_ is None:
        # This is the master process.
        # Tee the log to a file for future reference.

        # Output log directory might be a relative path (relative to config file)
        absLogDir, relLogDir = getPathVariants(config.output_log_directory, os.path.split( configFilePath )[0] )
        if not os.path.exists(absLogDir):
            os.mkdir(absLogDir)

        # Copy the config we're using to the output directory
        shutil.copy(configFilePath, absLogDir)
        
        logFile = os.path.join( absLogDir, "MASTER.log" )
        logFileFormatter = logging.Formatter("%(levelname)s %(name)s: %(message)s")
        rootLogHandler = logging.FileHandler(logFile, 'a')
        rootLogHandler.setFormatter(logFileFormatter)
        rootLogger = logging.getLogger()
        rootLogger.addHandler( rootLogHandler )
        logger.info( "Launched with sys.argv: {}".format( sys.argv ) )

    # Update the monkey_patch settings
    ilastik.utility.monkey_patches.apply_setting_dict( config.__dict__ )

    # If we're running a node job, set the threadpool size if the user specified one.
    # Note that the main thread does not count toward the threadpool total.
    if args._node_work_ is not None and config.task_threadpool_size is not None:
        lazyflow.request.Request.reset_thread_pool( num_workers = config.task_threadpool_size )

    # Make sure project file exists.
    if not os.path.exists(args.project):
        raise RuntimeError("Project file '" + args.project + "' does not exist.")

    # Instantiate 'shell'
    shell = HeadlessShell( functools.partial(Workflow.getSubclass(config.workflow_type) ) )
    
    # Load project (auto-import it if necessary)
    logger.info("Opening project: '" + args.project + "'")
    shell.openProjectPath(args.project)

    workflow = shell.projectManager.workflow
            
    # Attach cluster operators
    resultSlot = None
    finalOutputSlot = workflow.getHeadlessOutputSlot( config.output_slot_id )
    assert finalOutputSlot is not None

    secondaryOutputSlots = workflow.getSecondaryHeadlessOutputSlots( config.output_slot_id )
    secondaryOutputDescriptions = args.secondary_output_description_file # This is a list (see 'action' above)
    if len(secondaryOutputDescriptions) != len(secondaryOutputSlots):
        raise RuntimeError( "This workflow produces exactly {} SECONDARY outputs.  You provided {}.".format( len(secondaryOutputSlots), len(secondaryOutputDescriptions) ) )
    
    clusterOperator = None
    try:
        if args._node_work_ is not None:
            # We're doing node work
            opClusterTaskWorker = OperatorWrapper( OpTaskWorker, parent=finalOutputSlot.getRealOperator().parent )

            # FIXME: Image index is hard-coded as 0.  We assume we are working with only one (big) dataset in cluster mode.            
            opClusterTaskWorker.Input.connect( finalOutputSlot )
            opClusterTaskWorker.RoiString[0].setValue( args._node_work_ )
            opClusterTaskWorker.TaskName.setValue( task_name )
            opClusterTaskWorker.ConfigFilePath.setValue( args.option_config_file )

            # Configure optional slots first for efficiency (avoid multiple calls to setupOutputs)
            opClusterTaskWorker.SecondaryInputs[0].resize( len( secondaryOutputSlots ) )
            opClusterTaskWorker.SecondaryOutputDescriptions[0].resize( len( secondaryOutputSlots ) )
            for i in range( len(secondaryOutputSlots) ):
                opClusterTaskWorker.SecondaryInputs[0][i].connect( secondaryOutputSlots[i][0] )
                opClusterTaskWorker.SecondaryOutputDescriptions[0][i].setValue( secondaryOutputDescriptions[i] )

            opClusterTaskWorker.OutputFilesetDescription.setValue( args.output_description_file )
    
            # If we have a way to report task progress (e.g. by updating the job name),
            #  then subscribe to progress signals
            if config.task_progress_update_command is not None:
                def report_progress( progress ):
                    cmd = config.task_progress_update_command.format( progress=int(progress) )
                    def shell_call(shell_cmd):
                        logger.debug( "Executing progress command: " + cmd )
                        subprocess.call( shell_cmd, shell=True )
                    background_tasks.put( functools.partial( shell_call, cmd ) )
                opClusterTaskWorker.innerOperators[0].progressSignal.subscribe( report_progress )
            
            resultSlot = opClusterTaskWorker.ReturnCode
            clusterOperator = opClusterTaskWorker
        else:
            # We're the master
            opClusterizeMaster = OperatorWrapper( OpClusterize, parent=finalOutputSlot.getRealOperator().parent )

            opClusterizeMaster.Input.connect( finalOutputSlot )
            opClusterizeMaster.ProjectFilePath.setValue( args.project )
            opClusterizeMaster.OutputDatasetDescription.setValue( args.output_description_file )

            # Configure optional slots first for efficiency (avoid multiple calls to setupOutputs)
            opClusterizeMaster.SecondaryInputs[0].resize( len( secondaryOutputSlots ) )
            opClusterizeMaster.SecondaryOutputDescriptions[0].resize( len( secondaryOutputSlots ) )
            for i in range( len(secondaryOutputSlots) ):
                opClusterizeMaster.SecondaryInputs[0][i].connect( secondaryOutputSlots[i][0] )
                opClusterizeMaster.SecondaryOutputDescriptions[0][i].setValue( secondaryOutputDescriptions[i] )    

            opClusterizeMaster.ConfigFilePath.setValue( args.option_config_file )

            resultSlot = opClusterizeMaster.ReturnCode
            clusterOperator = opClusterizeMaster
        
        # Get the result
        logger.info("Starting task")
        result = resultSlot[0].value # FIXME: The image index is hard-coded here.
    finally:
        logger.info("Cleaning up")
        global stop_background_tasks
        stop_background_tasks = True
        
        try:
            if clusterOperator is not None:
                clusterOperator.cleanUp()
        except:
            logger.error("Errors during cleanup.")

        try:
            logger.info("Closing project...")
            shell.closeCurrentProject()
        except:
            logger.error("Errors while closing project.")
    
    logger.info("FINISHED with result {}".format(result))
    if not result:
        logger.error( "FAILED TO COMPLETE!" )

    if rootLogHandler is not None:
        rootLogHandler.close()