Exemple #1
0
    def test_getPathVariants(self):
        abs, rel = getPathVariants("/aaa/bbb/ccc/ddd.txt", "/aaa/bbb/ccc/eee")
        # assert abs == '/aaa/bbb/ccc/ddd.txt'
        # Use normpath to make sure this test works on windows...
        expected = os.path.normpath(
            os.path.join("/aaa/bbb/ccc/eee",
                         "/aaa/bbb/ccc/ddd.txt")).replace("\\", "/")
        assert abs == expected, "{} != {}".format(abs, expected)
        assert rel == "../ddd.txt"

        abs, rel = getPathVariants("../ddd.txt", "/aaa/bbb/ccc/eee")
        # assert abs == '/aaa/bbb/ccc/ddd.txt'
        # Use normpath to make sure this test works on windows...
        assert abs == os.path.normpath(
            os.path.join("/aaa/bbb/ccc/eee", "../ddd.txt")).replace("\\", "/")
        assert rel == "../ddd.txt"

        abs, rel = getPathVariants("ddd.txt", "/aaa/bbb/ccc")
        # assert abs == '/aaa/bbb/ccc/ddd.txt'
        # Use normpath to make sure this test works on windows...
        assert abs == os.path.normpath(os.path.join("/aaa/bbb/ccc",
                                                    "ddd.txt")).replace(
                                                        "\\", "/")
        assert rel == "ddd.txt"

        assert getPathVariants("", "/abc") == ("/abc", ""), "{} != {}".format(
            getPathVariants("", "/abc"), ("/abc", ""))
Exemple #2
0
    def test_getPathVariants(self):
        abs, rel = getPathVariants('/aaa/bbb/ccc/ddd.txt', '/aaa/bbb/ccc/eee')
        #assert abs == '/aaa/bbb/ccc/ddd.txt'
        # Use normpath to make sure this test works on windows...
        expected = os.path.normpath(
            os.path.join('/aaa/bbb/ccc/eee',
                         '/aaa/bbb/ccc/ddd.txt')).replace('\\', '/')
        assert abs == expected, "{} != {}".format(abs, expected)
        assert rel == '../ddd.txt'

        abs, rel = getPathVariants('../ddd.txt', '/aaa/bbb/ccc/eee')
        #assert abs == '/aaa/bbb/ccc/ddd.txt'
        # Use normpath to make sure this test works on windows...
        assert abs == os.path.normpath(
            os.path.join('/aaa/bbb/ccc/eee', '../ddd.txt')).replace('\\', '/')
        assert rel == '../ddd.txt'

        abs, rel = getPathVariants('ddd.txt', '/aaa/bbb/ccc')
        #assert abs == '/aaa/bbb/ccc/ddd.txt'
        # Use normpath to make sure this test works on windows...
        assert abs == os.path.normpath(os.path.join('/aaa/bbb/ccc',
                                                    'ddd.txt')).replace(
                                                        '\\', '/')
        assert rel == 'ddd.txt'

        assert getPathVariants('', '/abc') == ('/abc', ''), \
            "{} != {}".format( getPathVariants('', '/abc'), ('/abc', '') )
    def updateWorkingDirectory(self,newpath,oldpath):
        newdir = PathComponents(newpath).externalDirectory
        olddir = PathComponents(oldpath).externalDirectory
        
        if newdir==olddir:
            return
 
        # Disconnect the working directory while we make these changes.
        # All the changes will take effect when we set the new working directory.
        self.topLevelOperator.WorkingDirectory.disconnect()
        
        for laneIndex, multislot in enumerate(self.topLevelOperator.DatasetGroup):
            for roleIndex, slot in enumerate(multislot):
                if not slot.ready():
                    # Skip if there is no dataset in this lane/role combination yet.
                    continue
                datasetInfo = slot.value
                if datasetInfo.location == DatasetInfo.Location.FileSystem:
                    
                    #construct absolute path and recreate relative to the new path
                    fp = PathComponents(datasetInfo.filePath,olddir).totalPath()
                    abspath, relpath = getPathVariants(fp,newdir)
                    
                    # Same convention as in dataSelectionGui:
                    # Relative by default, unless the file is in a totally different tree from the working directory.
                    if relpath is not None and len(os.path.commonprefix([fp, abspath])) > 1:
                        datasetInfo.filePath = relpath
                    else:
                        datasetInfo.filePath = abspath
                    
                    slot.setValue(datasetInfo, check_changed=False)
        
        self.topLevelOperator.WorkingDirectory.setValue(newdir)
        self._projectFilePath = newdir
    def updateWorkingDirectory(self,newpath,oldpath):
        newdir = PathComponents(newpath).externalDirectory
        olddir = PathComponents(oldpath).externalDirectory
        
        if newdir==olddir:
            return
 
        # Disconnect the working directory while we make these changes.
        # All the changes will take effect when we set the new working directory.
        self.topLevelOperator.WorkingDirectory.disconnect()
        
        for laneIndex, multislot in enumerate(self.topLevelOperator.DatasetGroup):
            for roleIndex, slot in enumerate(multislot):
                if not slot.ready():
                    # Skip if there is no dataset in this lane/role combination yet.
                    continue
                datasetInfo = slot.value
                if datasetInfo.location == DatasetInfo.Location.FileSystem:
                    
                    #construct absolute path and recreate relative to the new path
                    fp = PathComponents(datasetInfo.filePath,olddir).totalPath()
                    abspath, relpath = getPathVariants(fp,newdir)
                    
                    # Same convention as in dataSelectionGui:
                    # Relative by default, unless the file is in a totally different tree from the working directory.
                    if relpath is not None and len(os.path.commonprefix([fp, abspath])) > 1:
                        datasetInfo.filePath = relpath
                    else:
                        datasetInfo.filePath = abspath
                    
                    slot.setValue(datasetInfo, check_changed=False)
        
        self.topLevelOperator.WorkingDirectory.setValue(newdir)
        self._projectFilePath = newdir
Exemple #5
0
    def _createDatasetInfo(self, roleIndex, filePath, roi):
        """
        Create a DatasetInfo object for the given filePath and roi.
        roi may be None, in which case it is ignored.
        """
        datasetInfo = DatasetInfo()

        if roi is not None:
            datasetInfo.subvolume_roi = roi

        cwd = self.topLevelOperator.WorkingDirectory.value

        absPath, relPath = getPathVariants(filePath, cwd)

        # Relative by default, unless the file is in a totally different tree from the working directory.
        if relPath is not None and len(os.path.commonprefix([cwd, absPath
                                                             ])) > 1:
            datasetInfo.filePath = relPath
        else:
            datasetInfo.filePath = absPath

        datasetInfo.nickname = PathComponents(absPath).filenameBase

        h5Exts = ['.ilp', '.h5', '.hdf5']
        if os.path.splitext(datasetInfo.filePath)[1] in h5Exts:
            datasetNames = self.getPossibleInternalPaths(absPath)
            if len(datasetNames) == 0:
                raise RuntimeError("HDF5 file %s has no image datasets" %
                                   datasetInfo.filePath)
            elif len(datasetNames) == 1:
                datasetInfo.filePath += str(datasetNames[0])
            else:
                # If exactly one of the file's datasets matches a user's previous choice, use it.
                if roleIndex not in self._default_h5_volumes:
                    self._default_h5_volumes[roleIndex] = set()
                previous_selections = self._default_h5_volumes[roleIndex]
                possible_auto_selections = previous_selections.intersection(
                    datasetNames)
                if len(possible_auto_selections) == 1:
                    datasetInfo.filePath += str(
                        list(possible_auto_selections)[0])
                else:
                    # Ask the user which dataset to choose
                    dlg = H5VolumeSelectionDlg(datasetNames, self)
                    if dlg.exec_() == QDialog.Accepted:
                        selected_index = dlg.combo.currentIndex()
                        selected_dataset = str(datasetNames[selected_index])
                        datasetInfo.filePath += selected_dataset
                        self._default_h5_volumes[roleIndex].add(
                            selected_dataset)
                    else:
                        raise DataSelectionGui.UserCancelledError()

        # Allow labels by default if this gui isn't being used for batch data.
        datasetInfo.allowLabels = (self.guiMode == GuiMode.Normal)
        return datasetInfo
Exemple #6
0
    def execute(self, slot, subindex, roi, result):
        dtypeBytes = self._getDtypeBytes()
        totalBytes = dtypeBytes * numpy.prod(self.Input.meta.shape)
        totalMB = totalBytes / (1000*1000)
        logger.info( "Clusterizing computation of {} MB dataset, outputting according to {}".format(totalMB, self.OutputDatasetDescription.value) )

        configFilePath = self.ConfigFilePath.value
        self._config = parseClusterConfigFile( configFilePath )

        self._validateConfig()

        # Create the destination file if necessary
        blockwiseFileset, taskInfos = self._prepareDestination()

        try:
            # Figure out which work doesn't need to be recomputed (if any)
            unneeded_rois = []
            for roi in taskInfos.keys():
                if blockwiseFileset.getBlockStatus(roi[0]) == BlockwiseFileset.BLOCK_AVAILABLE \
                or blockwiseFileset.isBlockLocked(roi[0]): # We don't attempt to process currently locked blocks.
                    unneeded_rois.append( roi )
    
            # Remove any tasks that we don't need to compute (they were finished in a previous run)
            for roi in unneeded_rois:
                logger.info( "No need to run task: {} for roi: {}".format( taskInfos[roi].taskName, roi ) )
                del taskInfos[roi]

            absWorkDir, _ = getPathVariants(self._config.server_working_directory, os.path.split( configFilePath )[0] )
            if self._config.task_launch_server == "localhost":
                def localCommand( cmd ):
                    cwd = os.getcwd()
                    os.chdir( absWorkDir )
                    subprocess.call( cmd, shell=True )
                    os.chdir( cwd )
                launchFunc = localCommand
            else:
                # We use fabric for executing remote tasks
                # Import it here because it isn't required that the nodes can use it.
                import fabric.api as fab
                @fab.hosts( self._config.task_launch_server )
                def remoteCommand( cmd ):
                    with fab.cd( absWorkDir ):
                        fab.run( cmd )
                launchFunc = functools.partial( fab.execute, remoteCommand )
    
            # Spawn each task
            for taskInfo in taskInfos.values():
                logger.info("Launching node task: " + taskInfo.command )
                launchFunc( taskInfo.command )
        
            # Return immediately.  We do not attempt to monitor the task progress.
            result[0] = True
            return result
        finally:
            blockwiseFileset.close()
Exemple #7
0
    def execute(self, slot, subindex, roi, result):
        dtypeBytes = self._getDtypeBytes()
        totalBytes = dtypeBytes * numpy.prod(self.Input.meta.shape)
        totalMB = totalBytes / (1000*1000)
        logger.info( "Clusterizing computation of {} MB dataset, outputting according to {}".format(totalMB, self.OutputDatasetDescription.value) )

        configFilePath = self.ConfigFilePath.value
        self._config = parseClusterConfigFile( configFilePath )

        self._validateConfig()

        # Create the destination file if necessary
        blockwiseFileset, taskInfos = self._prepareDestination()

        try:
            # Figure out which work doesn't need to be recomputed (if any)
            unneeded_rois = []
            for roi in taskInfos.keys():
                if blockwiseFileset.getBlockStatus(roi[0]) == BlockwiseFileset.BLOCK_AVAILABLE \
                or blockwiseFileset.isBlockLocked(roi[0]): # We don't attempt to process currently locked blocks.
                    unneeded_rois.append( roi )
    
            # Remove any tasks that we don't need to compute (they were finished in a previous run)
            for roi in unneeded_rois:
                logger.info( "No need to run task: {} for roi: {}".format( taskInfos[roi].taskName, roi ) )
                del taskInfos[roi]

            absWorkDir, _ = getPathVariants(self._config.server_working_directory, os.path.split( configFilePath )[0] )
            if self._config.task_launch_server == "localhost":
                def localCommand( cmd ):
                    cwd = os.getcwd()
                    os.chdir( absWorkDir )
                    subprocess.call( cmd, shell=True )
                    os.chdir( cwd )
                launchFunc = localCommand
            else:
                # We use fabric for executing remote tasks
                # Import it here because it isn't required that the nodes can use it.
                import fabric.api as fab
                @fab.hosts( self._config.task_launch_server )
                def remoteCommand( cmd ):
                    with fab.cd( absWorkDir ):
                        fab.run( cmd )
                launchFunc = functools.partial( fab.execute, remoteCommand )
    
            # Spawn each task
            for taskInfo in taskInfos.values():
                logger.info("Launching node task: " + taskInfo.command )
                launchFunc( taskInfo.command )
        
            # Return immediately.  We do not attempt to monitor the task progress.
            result[0] = True
            return result
        finally:
            blockwiseFileset.close()
Exemple #8
0
    def test_getPathVariants(self):
        abs, rel = getPathVariants('/aaa/bbb/ccc/ddd.txt', '/aaa/bbb/ccc/eee')
        #assert abs == '/aaa/bbb/ccc/ddd.txt'
        # Use normpath to make sure this test works on windows...
        assert abs == os.path.normpath(os.path.join('/aaa/bbb/ccc/eee', '/aaa/bbb/ccc/ddd.txt'))
        assert rel == '../ddd.txt'
    
        abs, rel = getPathVariants('../ddd.txt', '/aaa/bbb/ccc/eee')
        #assert abs == '/aaa/bbb/ccc/ddd.txt'
        # Use normpath to make sure this test works on windows...
        assert abs == os.path.normpath(os.path.join('/aaa/bbb/ccc/eee', '../ddd.txt'))
        assert rel == '../ddd.txt'
    
        abs, rel = getPathVariants('ddd.txt', '/aaa/bbb/ccc')
        #assert abs == '/aaa/bbb/ccc/ddd.txt'
        # Use normpath to make sure this test works on windows...
        assert abs == os.path.normpath(os.path.join('/aaa/bbb/ccc', 'ddd.txt'))
        assert rel == 'ddd.txt'

        assert getPathVariants('', '/abc') == ('/abc', '')
Exemple #9
0
    def test_getPathVariants(self):
        abs, rel = getPathVariants("/aaa/bbb/ccc/ddd.txt", "/aaa/bbb/ccc/eee")
        # assert abs == '/aaa/bbb/ccc/ddd.txt'
        # Use normpath to make sure this test works on windows...
        expected = os.path.normpath(os.path.join("/aaa/bbb/ccc/eee", "/aaa/bbb/ccc/ddd.txt")).replace("\\", "/")
        assert abs == expected, "{} != {}".format(abs, expected)
        assert rel == "../ddd.txt"

        abs, rel = getPathVariants("../ddd.txt", "/aaa/bbb/ccc/eee")
        # assert abs == '/aaa/bbb/ccc/ddd.txt'
        # Use normpath to make sure this test works on windows...
        assert abs == os.path.normpath(os.path.join("/aaa/bbb/ccc/eee", "../ddd.txt")).replace("\\", "/")
        assert rel == "../ddd.txt"

        abs, rel = getPathVariants("ddd.txt", "/aaa/bbb/ccc")
        # assert abs == '/aaa/bbb/ccc/ddd.txt'
        # Use normpath to make sure this test works on windows...
        assert abs == os.path.normpath(os.path.join("/aaa/bbb/ccc", "ddd.txt")).replace("\\", "/")
        assert rel == "ddd.txt"

        assert getPathVariants("", "/abc") == ("/abc", ""), "{} != {}".format(getPathVariants("", "/abc"), ("/abc", ""))
Exemple #10
0
    def _createDatasetInfo(self, roleIndex, filePath, roi):
        """
        Create a DatasetInfo object for the given filePath and roi.
        roi may be None, in which case it is ignored.
        """
        datasetInfo = DatasetInfo()
        
        if roi is not None:
            datasetInfo.subvolume_roi = roi
        
        cwd = self.topLevelOperator.WorkingDirectory.value
        
        absPath, relPath = getPathVariants(filePath, cwd)
        
        # Relative by default, unless the file is in a totally different tree from the working directory.
        if relPath is not None and len(os.path.commonprefix([cwd, absPath])) > 1:
            datasetInfo.filePath = relPath
        else:
            datasetInfo.filePath = absPath
            
        datasetInfo.nickname = PathComponents(absPath).filenameBase

        h5Exts = ['.ilp', '.h5', '.hdf5']
        if os.path.splitext(datasetInfo.filePath)[1] in h5Exts:
            datasetNames = self.getPossibleInternalPaths( absPath )
            if len(datasetNames) == 0:
                raise RuntimeError("HDF5 file %s has no image datasets" % datasetInfo.filePath)
            elif len(datasetNames) == 1:
                datasetInfo.filePath += str(datasetNames[0])
            else:
                # If exactly one of the file's datasets matches a user's previous choice, use it.
                if roleIndex not in self._default_h5_volumes:
                    self._default_h5_volumes[roleIndex] = set()
                previous_selections = self._default_h5_volumes[roleIndex]
                possible_auto_selections = previous_selections.intersection(datasetNames)
                if len(possible_auto_selections) == 1:
                    datasetInfo.filePath += str(list(possible_auto_selections)[0])
                else:
                    # Ask the user which dataset to choose
                    dlg = H5VolumeSelectionDlg(datasetNames, self)
                    if dlg.exec_() == QDialog.Accepted:
                        selected_index = dlg.combo.currentIndex()
                        selected_dataset = str(datasetNames[selected_index])
                        datasetInfo.filePath += selected_dataset
                        self._default_h5_volumes[roleIndex].add( selected_dataset )
                    else:
                        raise DataSelectionGui.UserCancelledError()

        # Allow labels by default if this gui isn't being used for batch data.
        datasetInfo.allowLabels = ( self.guiMode == GuiMode.Normal )
        return datasetInfo
    def _prepareTaskInfos(self, roiList):
        # Divide up the workload into large pieces
        logger.info("Dividing into {} node jobs.".format(len(roiList)))

        taskInfos = collections.OrderedDict()
        for roiIndex, roi in enumerate(roiList):
            roi = (tuple(roi[0]), tuple(roi[1]))
            taskInfo = OpClusterize.TaskInfo()
            taskInfo.subregion = SubRegion(None, start=roi[0], stop=roi[1])

            taskName = "J{:02}".format(roiIndex)

            commandArgs = []
            commandArgs.append("--option_config_file=" +
                               self.ConfigFilePath.value)
            commandArgs.append("--project=" + self.ProjectFilePath.value)
            commandArgs.append("--_node_work_=\"" +
                               Roi.dumps(taskInfo.subregion) + "\"")
            commandArgs.append("--process_name={}".format(taskName))
            commandArgs.append("--output_description_file={}".format(
                self.OutputDatasetDescription.value))

            # Check the command format string: We need to know where to put our args...
            commandFormat = self._config.command_format
            assert commandFormat.find("{task_args}") != -1

            # Output log directory might be a relative path (relative to config file)
            absLogDir, _ = getPathVariants(
                self._config.output_log_directory,
                os.path.split(self.ConfigFilePath.value)[0])
            if not os.path.exists(absLogDir):
                os.makedirs(absLogDir)
            taskOutputLogFilename = taskName + ".log"
            taskOutputLogPath = os.path.join(absLogDir, taskOutputLogFilename)

            allArgs = " " + " ".join(commandArgs) + " "
            taskInfo.taskName = taskName
            taskInfo.command = commandFormat.format(
                task_args=allArgs,
                task_name=taskName,
                task_output_file=taskOutputLogPath)
            taskInfos[roi] = taskInfo

        return taskInfos
Exemple #12
0
    def _prepareTaskInfos(self, roiList):
        # Divide up the workload into large pieces
        logger.info( "Dividing into {} node jobs.".format( len(roiList) ) )

        taskInfos = collections.OrderedDict()
        for roiIndex, roi in enumerate(roiList):
            roi = ( tuple(roi[0]), tuple(roi[1]) )
            taskInfo = OpClusterize.TaskInfo()
            taskInfo.subregion = SubRegion( None, start=roi[0], stop=roi[1] )
            
            taskName = "J{:02}".format(roiIndex)

            commandArgs = []
            commandArgs.append( "--option_config_file=" + self.ConfigFilePath.value )
            commandArgs.append( "--project=" + self.ProjectFilePath.value )
            commandArgs.append( "--_node_work_=\"" + Roi.dumps( taskInfo.subregion ) + "\"" )
            commandArgs.append( "--process_name={}".format(taskName)  )
            commandArgs.append( "--output_description_file={}".format( self.OutputDatasetDescription.value )  )
            for slot in self.SecondaryOutputDescriptions:
                commandArgs.append( "--secondary_output_description_file={}".format( slot.value )  )

            # Check the command format string: We need to know where to put our args...
            commandFormat = self._config.command_format
            assert commandFormat.find("{task_args}") != -1

            # Output log directory might be a relative path (relative to config file)
            absLogDir, _ = getPathVariants(self._config.output_log_directory, os.path.split( self.ConfigFilePath.value )[0] )
            taskOutputLogFilename = taskName + ".log"
            taskOutputLogPath = os.path.join( absLogDir, taskOutputLogFilename )
            
            allArgs = " " + " ".join(commandArgs) + " "
            taskInfo.taskName = taskName
            taskInfo.command = commandFormat.format( task_args=allArgs, task_name=taskName, task_output_file=taskOutputLogPath )
            taskInfos[roi] = taskInfo

        return taskInfos
    def _readDatasetInfo(self, infoGroup, localDataGroup, projectFilePath, headless):
        # Unready datasets are represented with an empty group.
        if len( infoGroup ) == 0:
            return None, False
        datasetInfo = DatasetInfo()

        # Make a reverse-lookup of the location storage strings
        LocationLookup = { v:k for k,v in self.LocationStrings.items() }
        datasetInfo.location = LocationLookup[ str(infoGroup['location'].value) ]
        
        # Write to the 'private' members to avoid resetting the dataset id
        datasetInfo._filePath = infoGroup['filePath'].value
        datasetInfo._datasetId = infoGroup['datasetId'].value

        try:
            datasetInfo.allowLabels = infoGroup['allowLabels'].value
        except KeyError:
            pass
        
        try:
            datasetInfo.drange = tuple( infoGroup['drange'].value )
        except KeyError:
            pass
        
        try:
            datasetInfo.nickname = infoGroup['nickname'].value
        except KeyError:
            datasetInfo.nickname = PathComponents(datasetInfo.filePath).filenameBase
        
        try:
            tags = vigra.AxisTags.fromJSON( infoGroup['axistags'].value )
            datasetInfo.axistags = tags
        except KeyError:
            # Old projects just have an 'axisorder' field instead of full axistags
            try:
                axisorder = infoGroup['axisorder'].value
                datasetInfo.axistags = vigra.defaultAxistags(axisorder)
            except KeyError:
                pass
        
        # If the data is supposed to be in the project,
        #  check for it now.
        if datasetInfo.location == DatasetInfo.Location.ProjectInternal:
            if not datasetInfo.datasetId in localDataGroup.keys():
                raise RuntimeError("Corrupt project file.  Could not find data for " + infoGroup.name)

        dirty = False
        # If the data is supposed to exist outside the project, make sure it really does.
        if datasetInfo.location == DatasetInfo.Location.FileSystem and not isUrl(datasetInfo.filePath):
            pathData = PathComponents( datasetInfo.filePath, os.path.split(projectFilePath)[0])
            filePath = pathData.externalPath
            if not os.path.exists(filePath):
                if headless:
                    raise RuntimeError("Could not find data at " + filePath)
                filt = "Image files (" + ' '.join('*.' + x for x in OpDataSelection.SupportedExtensions) + ')'
                newpath = self.repairFile(filePath, filt)
                if pathData.internalPath is not None:
                    newpath += pathData.internalPath
                datasetInfo._filePath = getPathVariants(newpath , os.path.split(projectFilePath)[0])[0]
                dirty = True
        
        return datasetInfo, dirty
    def addFileNames(self, fileNames, roleIndex, startingLane=None):
        """
        Add the given filenames to both the GUI table and the top-level operator inputs.
        If startingLane is None, the filenames will be *appended* to the role's list of files.
        """
        infos = []

        if startingLane is None or startingLane == -1:
            startingLane = len(self.topLevelOperator.DatasetGroup)
            endingLane = startingLane+len(fileNames)-1
        else:
            assert startingLane < len(self.topLevelOperator.DatasetGroup)
            max_files = len(self.topLevelOperator.DatasetGroup) - \
                    startingLane
            if len(fileNames) > max_files:
                msg = "You selected {num_selected} files for {num_slots} "\
                      "slots. To add new files use the 'Add new...' option "\
                      "in the context menu or the button in the last row."\
                              .format(num_selected=len(fileNames),
                                      num_slots=max_files)
                QMessageBox.critical( self, "Too many files", msg )
                return
            endingLane = min(startingLane+len(fileNames)-1,
                    len(self.topLevelOperator.DatasetGroup))
            
        if self._max_lanes and endingLane >= self._max_lanes:
            msg = "You may not add more than {} file(s) to this workflow.  Please try again.".format( self._max_lanes )
            QMessageBox.critical( self, "Too many files", msg )
            return

        # Assign values to the new inputs we just allocated.
        # The GUI will be updated by callbacks that are listening to slot changes
        for i, filePath in enumerate(fileNames):
            datasetInfo = DatasetInfo()
            cwd = self.topLevelOperator.WorkingDirectory.value
            
            absPath, relPath = getPathVariants(filePath, cwd)
            
            # Relative by default, unless the file is in a totally different tree from the working directory.
            if relPath is not None and len(os.path.commonprefix([cwd, absPath])) > 1:
                datasetInfo.filePath = relPath
            else:
                datasetInfo.filePath = absPath
                
            datasetInfo.nickname = PathComponents(absPath).filenameBase

            h5Exts = ['.ilp', '.h5', '.hdf5']
            if os.path.splitext(datasetInfo.filePath)[1] in h5Exts:
                datasetNames = self.getPossibleInternalPaths( absPath )
                if len(datasetNames) > 0:
                    datasetInfo.filePath += str(datasetNames[0])
                else:
                    raise RuntimeError("HDF5 file %s has no image datasets" % datasetInfo.filePath)

            # Allow labels by default if this gui isn't being used for batch data.
            datasetInfo.allowLabels = ( self.guiMode == GuiMode.Normal )
            infos.append(datasetInfo)

        # if no exception was thrown, set up the operator now
        opTop = self.topLevelOperator
        originalSize = len(opTop.DatasetGroup)
            
        if len( opTop.DatasetGroup ) < endingLane+1:
            opTop.DatasetGroup.resize( endingLane+1 )
        for laneIndex, info in zip(range(startingLane, endingLane+1), infos):
            try:
                self.topLevelOperator.DatasetGroup[laneIndex][roleIndex].setValue( info )
            except DatasetConstraintError as ex:
                return_val = [False]
                # Give the user a chance to fix the problem
                self.handleDatasetConstraintError(info, info.filePath, ex, roleIndex, laneIndex, return_val)
                if not return_val[0]:
                    # Not successfully repaired.  Roll back the changes and give up.
                    opTop.DatasetGroup.resize( originalSize )
                    break
            except OpDataSelection.InvalidDimensionalityError as ex:
                    opTop.DatasetGroup.resize( originalSize )
                    QMessageBox.critical( self, "Dataset has different dimensionality", ex.message )
                    break
            except:
                QMessageBox.critical( self, "Dataset Load Error", "Wasn't able to load your dataset into the workflow.  See console for details." )
                opTop.DatasetGroup.resize( originalSize )
                raise

        # If we succeeded in adding all images, show the first one.
        if laneIndex == endingLane:
            self.showDataset(startingLane, roleIndex)

        # Notify the workflow that something that could affect applet readyness has occurred.
        self.parentApplet.appletStateUpdateRequested.emit()

        self.updateInternalPathVisiblity()
def runWorkflow(parsed_args):
    args = parsed_args

    # Read the config file
    configFilePath = args.option_config_file
    config = parseClusterConfigFile( configFilePath )

    # If we've got a process name, re-initialize the logger from scratch
    task_name = "node"
    if args.process_name is not None:
        task_name = args.process_name
        ilastik.ilastik_logging.default_config.init(args.process_name + ' ')

    rootLogHandler = None
    if args._node_work_ is None:
        # This is the master process.
        # Tee the log to a file for future reference.

        # Output log directory might be a relative path (relative to config file)
        absLogDir, _ = getPathVariants(config.output_log_directory, os.path.split( configFilePath )[0] )
        if not os.path.exists(absLogDir):
            os.mkdir(absLogDir)

        # Copy the config we're using to the output directory
        shutil.copy(configFilePath, absLogDir)
        
        logFile = os.path.join( absLogDir, "MASTER.log" )
        logFileFormatter = logging.Formatter("%(levelname)s %(name)s: %(message)s")
        rootLogHandler = logging.FileHandler(logFile, 'a')
        rootLogHandler.setFormatter(logFileFormatter)
        rootLogger = logging.getLogger()
        rootLogger.addHandler( rootLogHandler )
        logger.info( "Launched with sys.argv: {}".format( sys.argv ) )

    # Update the monkey_patch settings
    ilastik.monkey_patches.apply_setting_dict( config.__dict__ )

    # If we're running a node job, set the threadpool size if the user specified one.
    # Note that the main thread does not count toward the threadpool total.
    if args._node_work_ is not None and config.task_threadpool_size is not None:
        lazyflow.request.Request.reset_thread_pool( num_workers = config.task_threadpool_size )

    # Make sure project file exists.
    if not os.path.exists(args.project):
        raise RuntimeError("Project file '" + args.project + "' does not exist.")

    # Instantiate 'shell'
    shell = HeadlessShell( functools.partial(Workflow.getSubclass(config.workflow_type) ) )
    
    # Load project (auto-import it if necessary)
    logger.info("Opening project: '" + args.project + "'")
    shell.openProjectPath(args.project)

    workflow = shell.projectManager.workflow
            
    # Attach cluster operators
    resultSlot = None
    finalOutputSlot = workflow.getHeadlessOutputSlot( config.output_slot_id )
    assert finalOutputSlot is not None

    secondaryOutputSlots = workflow.getSecondaryHeadlessOutputSlots( config.output_slot_id )
    secondaryOutputDescriptions = args.secondary_output_description_file # This is a list (see 'action' above)
    if len(secondaryOutputDescriptions) != len(secondaryOutputSlots):
        raise RuntimeError( "This workflow produces exactly {} SECONDARY outputs.  You provided {}.".format( len(secondaryOutputSlots), len(secondaryOutputDescriptions) ) )
    
    clusterOperator = None
    try:
        if args._node_work_ is not None:
            # We're doing node work
            opClusterTaskWorker = OperatorWrapper( OpTaskWorker, parent=finalOutputSlot.getRealOperator().parent )

            # FIXME: Image index is hard-coded as 0.  We assume we are working with only one (big) dataset in cluster mode.            
            opClusterTaskWorker.Input.connect( finalOutputSlot )
            opClusterTaskWorker.RoiString[0].setValue( args._node_work_ )
            opClusterTaskWorker.TaskName.setValue( task_name )
            opClusterTaskWorker.ConfigFilePath.setValue( args.option_config_file )

            # Configure optional slots first for efficiency (avoid multiple calls to setupOutputs)
            opClusterTaskWorker.SecondaryInputs[0].resize( len( secondaryOutputSlots ) )
            opClusterTaskWorker.SecondaryOutputDescriptions[0].resize( len( secondaryOutputSlots ) )
            for i in range( len(secondaryOutputSlots) ):
                opClusterTaskWorker.SecondaryInputs[0][i].connect( secondaryOutputSlots[i][0] )
                opClusterTaskWorker.SecondaryOutputDescriptions[0][i].setValue( secondaryOutputDescriptions[i] )

            opClusterTaskWorker.OutputFilesetDescription.setValue( args.output_description_file )
    
            # If we have a way to report task progress (e.g. by updating the job name),
            #  then subscribe to progress signals
            if config.task_progress_update_command is not None:
                def report_progress( progress ):
                    cmd = config.task_progress_update_command.format( progress=int(progress) )
                    def shell_call(shell_cmd):
                        logger.debug( "Executing progress command: " + cmd )
                        subprocess.call( shell_cmd, shell=True )
                    background_tasks.put( functools.partial( shell_call, cmd ) )
                opClusterTaskWorker.innerOperators[0].progressSignal.subscribe( report_progress )
            
            resultSlot = opClusterTaskWorker.ReturnCode
            clusterOperator = opClusterTaskWorker
        else:
            # We're the master
            opClusterizeMaster = OperatorWrapper( OpClusterize, parent=finalOutputSlot.getRealOperator().parent )

            opClusterizeMaster.Input.connect( finalOutputSlot )
            opClusterizeMaster.ProjectFilePath.setValue( args.project )
            opClusterizeMaster.OutputDatasetDescription.setValue( args.output_description_file )

            # Configure optional slots first for efficiency (avoid multiple calls to setupOutputs)
            opClusterizeMaster.SecondaryInputs[0].resize( len( secondaryOutputSlots ) )
            opClusterizeMaster.SecondaryOutputDescriptions[0].resize( len( secondaryOutputSlots ) )
            for i in range( len(secondaryOutputSlots) ):
                opClusterizeMaster.SecondaryInputs[0][i].connect( secondaryOutputSlots[i][0] )
                opClusterizeMaster.SecondaryOutputDescriptions[0][i].setValue( secondaryOutputDescriptions[i] )    

            opClusterizeMaster.ConfigFilePath.setValue( args.option_config_file )

            resultSlot = opClusterizeMaster.ReturnCode
            clusterOperator = opClusterizeMaster
        
        # Get the result
        logger.info("Starting task")
        result = resultSlot[0].value # FIXME: The image index is hard-coded here.
    finally:
        logger.info("Cleaning up")
        global stop_background_tasks
        stop_background_tasks = True
        
        try:
            if clusterOperator is not None:
                clusterOperator.cleanUp()
        except:
            logger.error("Errors during cleanup.")

        try:
            logger.info("Closing project...")
            shell.closeCurrentProject()
        except:
            logger.error("Errors while closing project.")
    
    logger.info("FINISHED with result {}".format(result))
    if not result:
        logger.error( "FAILED TO COMPLETE!" )

    if rootLogHandler is not None:
        rootLogHandler.close()
    def _readDatasetInfo(self, infoGroup, localDataGroup, projectFilePath, headless):
        # Unready datasets are represented with an empty group.
        if len( infoGroup ) == 0:
            return None, False

        datasetInfo = DatasetInfo()

        # Make a reverse-lookup of the location storage strings
        LocationLookup = { v:k for k,v in list(self.LocationStrings.items()) }
        datasetInfo.location = LocationLookup[ infoGroup['location'].value.decode('utf-8') ]
        
        # Write to the 'private' members to avoid resetting the dataset id
        datasetInfo._filePath = infoGroup['filePath'].value.decode('utf-8')
        datasetInfo._datasetId = infoGroup['datasetId'].value.decode('utf-8')

        try:
            datasetInfo.allowLabels = infoGroup['allowLabels'].value
        except KeyError:
            pass
        
        try:
            datasetInfo.drange = tuple( infoGroup['drange'].value )
        except KeyError:
            pass

        try:
            datasetInfo.laneShape = tuple(infoGroup['shape'].value)
        except KeyError:
            pass

        try:
            datasetInfo.laneDtype = numpy.dtype(infoGroup['dtype'].value.decode('utf-8'))
        except KeyError:
            pass

        try:
            datasetInfo.display_mode = infoGroup['display_mode'].value.decode('utf-8')
        except KeyError:
            pass
        
        try:
            datasetInfo.nickname = infoGroup['nickname'].value.decode('utf-8')
        except KeyError:
            datasetInfo.nickname = PathComponents(datasetInfo.filePath).filenameBase
        
        try:
            datasetInfo.fromstack = infoGroup['fromstack'].value
        except KeyError:
            # Guess based on the storage setting and original filepath
            datasetInfo.fromstack = ( datasetInfo.location == DatasetInfo.Location.ProjectInternal
                                      and ( ('?' in datasetInfo._filePath) or (os.path.pathsep in datasetInfo._filePath) ) )

        try:
            tags = vigra.AxisTags.fromJSON( infoGroup['axistags'].value.decode('utf-8') )
            datasetInfo.axistags = tags
        except KeyError:
            # Old projects just have an 'axisorder' field instead of full axistags
            try:
                axisorder = infoGroup['axisorder'].value.decode('utf-8')
                datasetInfo.axistags = vigra.defaultAxistags(axisorder)
            except KeyError:
                pass
        
        try:
            start, stop = list(map( tuple, infoGroup['subvolume_roi'].value ))
            datasetInfo.subvolume_roi = (start, stop)
        except KeyError:
            pass

        # If the data is supposed to be in the project,
        #  check for it now.
        if datasetInfo.location == DatasetInfo.Location.ProjectInternal:
            if not datasetInfo.datasetId in list(localDataGroup.keys()):
                raise RuntimeError("Corrupt project file.  Could not find data for " + infoGroup.name)

        dirty = False

        # If the data is supposed to exist outside the project, make sure it really does.
        if datasetInfo.location == DatasetInfo.Location.FileSystem \
                and not isUrl(datasetInfo.filePath):
            pathData = PathComponents(datasetInfo.filePath, os.path.split(projectFilePath)[0])
            filePath = pathData.externalPath
            if not os.path.exists(filePath):
                if headless:
                    if self._shouldRetrain:
                        raise RuntimeError(
                            "Retrain was passed in headless mode, "
                            "but could not find data at " + filePath)
                    else:
                        assert datasetInfo.laneShape, \
                            "Headless mode without raw data not supported in old (pre 1.3.2) project files"
                        # Raw data does not exist in headless, use fake data provider
                        datasetInfo.realDataSource = False
                else:
                    # Try to get a new path for the lost file from the user
                    filt = "Image files (" + ' '.join('*.' + x for x in OpDataSelection.SupportedExtensions) + ')'
                    newpath = self.repairFile(filePath, filt)
                    if pathData.internalPath is not None:
                        newpath += pathData.internalPath
                    datasetInfo._filePath = \
                    getPathVariants(newpath, os.path.split(projectFilePath)[0])[0]
                    dirty = True
        
        return datasetInfo, dirty
    def _readDatasetInfo(self, infoGroup, localDataGroup, projectFilePath, headless):
        # Unready datasets are represented with an empty group.
        if len( infoGroup ) == 0:
            return None, False
        datasetInfo = DatasetInfo()

        # Make a reverse-lookup of the location storage strings
        LocationLookup = { v:k for k,v in self.LocationStrings.items() }
        datasetInfo.location = LocationLookup[ str(infoGroup['location'].value) ]
        
        # Write to the 'private' members to avoid resetting the dataset id
        datasetInfo._filePath = infoGroup['filePath'].value
        datasetInfo._datasetId = infoGroup['datasetId'].value

        try:
            datasetInfo.allowLabels = infoGroup['allowLabels'].value
        except KeyError:
            pass
        
        try:
            datasetInfo.drange = tuple( infoGroup['drange'].value )
        except KeyError:
            pass
        
        try:
            datasetInfo.nickname = infoGroup['nickname'].value
        except KeyError:
            datasetInfo.nickname = PathComponents(datasetInfo.filePath).filenameBase
        
        try:
            datasetInfo.fromstack = infoGroup['fromstack'].value
        except KeyError:
            # Guess based on the storage setting and original filepath
            datasetInfo.fromstack = ( datasetInfo.location == DatasetInfo.Location.ProjectInternal
                                      and ( ('?' in datasetInfo._filePath) or (os.path.pathsep in datasetInfo._filePath) ) )

        try:
            tags = vigra.AxisTags.fromJSON( infoGroup['axistags'].value )
            datasetInfo.axistags = tags
        except KeyError:
            # Old projects just have an 'axisorder' field instead of full axistags
            try:
                axisorder = infoGroup['axisorder'].value
                datasetInfo.axistags = vigra.defaultAxistags(axisorder)
            except KeyError:
                pass
        
        try:
            start, stop = map( tuple, infoGroup['subvolume_roi'].value )
            datasetInfo.subvolume_roi = (start, stop)
        except KeyError:
            pass
        
        # If the data is supposed to be in the project,
        #  check for it now.
        if datasetInfo.location == DatasetInfo.Location.ProjectInternal:
            if not datasetInfo.datasetId in localDataGroup.keys():
                raise RuntimeError("Corrupt project file.  Could not find data for " + infoGroup.name)

        dirty = False
        # If the data is supposed to exist outside the project, make sure it really does.
        if datasetInfo.location == DatasetInfo.Location.FileSystem and not isUrl(datasetInfo.filePath):
            pathData = PathComponents( datasetInfo.filePath, os.path.split(projectFilePath)[0])
            filePath = pathData.externalPath
            if not os.path.exists(filePath):
                if headless:
                    raise RuntimeError("Could not find data at " + filePath)
                filt = "Image files (" + ' '.join('*.' + x for x in OpDataSelection.SupportedExtensions) + ')'
                newpath = self.repairFile(filePath, filt)
                if pathData.internalPath is not None:
                    newpath += pathData.internalPath
                datasetInfo._filePath = getPathVariants(newpath , os.path.split(projectFilePath)[0])[0]
                dirty = True
        
        return datasetInfo, dirty
Exemple #18
0
def runWorkflow(parsed_args):
    args = parsed_args

    # Read the config file
    configFilePath = args.option_config_file
    config = parseClusterConfigFile(configFilePath)

    # If we've got a process name, re-initialize the logger from scratch
    task_name = "node"
    if args.process_name is not None:
        task_name = args.process_name
        ilastik.ilastik_logging.default_config.init(args.process_name + ' ')

    rootLogHandler = None
    if args._node_work_ is None:
        # This is the master process.
        # Tee the log to a file for future reference.

        # Output log directory might be a relative path (relative to config file)
        absLogDir, _ = getPathVariants(config.output_log_directory,
                                       os.path.split(configFilePath)[0])
        if not os.path.exists(absLogDir):
            os.mkdir(absLogDir)

        # Copy the config we're using to the output directory
        shutil.copy(configFilePath, absLogDir)

        logFile = os.path.join(absLogDir, "MASTER.log")
        logFileFormatter = logging.Formatter(
            "%(levelname)s %(name)s: %(message)s")
        rootLogHandler = logging.FileHandler(logFile, 'a')
        rootLogHandler.setFormatter(logFileFormatter)
        rootLogger = logging.getLogger()
        rootLogger.addHandler(rootLogHandler)
        logger.info("Launched with sys.argv: {}".format(sys.argv))

    # Update the monkey_patch settings
    ilastik.monkey_patches.apply_setting_dict(config.__dict__)

    # If we're running a node job, set the threadpool size if the user specified one.
    # Note that the main thread does not count toward the threadpool total.
    if args._node_work_ is not None and config.task_threadpool_size is not None:
        lazyflow.request.Request.reset_thread_pool(
            num_workers=config.task_threadpool_size)

    # Make sure project file exists.
    if not os.path.exists(args.project):
        raise RuntimeError("Project file '" + args.project +
                           "' does not exist.")

    # Instantiate 'shell'
    shell = HeadlessShell(
        functools.partial(Workflow.getSubclass(config.workflow_type)))

    # Load project (auto-import it if necessary)
    logger.info("Opening project: '" + args.project + "'")
    shell.openProjectPath(args.project)

    workflow = shell.projectManager.workflow

    # Attach cluster operators
    resultSlot = None
    finalOutputSlot = workflow.getHeadlessOutputSlot(config.output_slot_id)
    assert finalOutputSlot is not None

    secondaryOutputSlots = workflow.getSecondaryHeadlessOutputSlots(
        config.output_slot_id)
    secondaryOutputDescriptions = args.secondary_output_description_file  # This is a list (see 'action' above)
    if len(secondaryOutputDescriptions) != len(secondaryOutputSlots):
        raise RuntimeError(
            "This workflow produces exactly {} SECONDARY outputs.  You provided {}."
            .format(len(secondaryOutputSlots),
                    len(secondaryOutputDescriptions)))

    clusterOperator = None
    try:
        if args._node_work_ is not None:
            # We're doing node work
            opClusterTaskWorker = OperatorWrapper(
                OpTaskWorker, parent=finalOutputSlot.getRealOperator().parent)

            # FIXME: Image index is hard-coded as 0.  We assume we are working with only one (big) dataset in cluster mode.
            opClusterTaskWorker.Input.connect(finalOutputSlot)
            opClusterTaskWorker.RoiString[0].setValue(args._node_work_)
            opClusterTaskWorker.TaskName.setValue(task_name)
            opClusterTaskWorker.ConfigFilePath.setValue(
                args.option_config_file)

            # Configure optional slots first for efficiency (avoid multiple calls to setupOutputs)
            opClusterTaskWorker.SecondaryInputs[0].resize(
                len(secondaryOutputSlots))
            opClusterTaskWorker.SecondaryOutputDescriptions[0].resize(
                len(secondaryOutputSlots))
            for i in range(len(secondaryOutputSlots)):
                opClusterTaskWorker.SecondaryInputs[0][i].connect(
                    secondaryOutputSlots[i][0])
                opClusterTaskWorker.SecondaryOutputDescriptions[0][i].setValue(
                    secondaryOutputDescriptions[i])

            opClusterTaskWorker.OutputFilesetDescription.setValue(
                args.output_description_file)

            # If we have a way to report task progress (e.g. by updating the job name),
            #  then subscribe to progress signals
            if config.task_progress_update_command is not None:

                def report_progress(progress):
                    cmd = config.task_progress_update_command.format(
                        progress=int(progress))

                    def shell_call(shell_cmd):
                        logger.debug("Executing progress command: " + cmd)
                        subprocess.call(shell_cmd, shell=True)

                    background_tasks.put(functools.partial(shell_call, cmd))

                opClusterTaskWorker.innerOperators[0].progressSignal.subscribe(
                    report_progress)

            resultSlot = opClusterTaskWorker.ReturnCode
            clusterOperator = opClusterTaskWorker
        else:
            # We're the master
            opClusterizeMaster = OperatorWrapper(
                OpClusterize, parent=finalOutputSlot.getRealOperator().parent)

            opClusterizeMaster.Input.connect(finalOutputSlot)
            opClusterizeMaster.ProjectFilePath.setValue(args.project)
            opClusterizeMaster.OutputDatasetDescription.setValue(
                args.output_description_file)

            # Configure optional slots first for efficiency (avoid multiple calls to setupOutputs)
            opClusterizeMaster.SecondaryInputs[0].resize(
                len(secondaryOutputSlots))
            opClusterizeMaster.SecondaryOutputDescriptions[0].resize(
                len(secondaryOutputSlots))
            for i in range(len(secondaryOutputSlots)):
                opClusterizeMaster.SecondaryInputs[0][i].connect(
                    secondaryOutputSlots[i][0])
                opClusterizeMaster.SecondaryOutputDescriptions[0][i].setValue(
                    secondaryOutputDescriptions[i])

            opClusterizeMaster.ConfigFilePath.setValue(args.option_config_file)

            resultSlot = opClusterizeMaster.ReturnCode
            clusterOperator = opClusterizeMaster

        # Get the result
        logger.info("Starting task")
        result = resultSlot[
            0].value  # FIXME: The image index is hard-coded here.
    finally:
        logger.info("Cleaning up")
        global stop_background_tasks
        stop_background_tasks = True

        try:
            if clusterOperator is not None:
                clusterOperator.cleanUp()
        except:
            logger.error("Errors during cleanup.")

        try:
            logger.info("Closing project...")
            shell.closeCurrentProject()
        except:
            logger.error("Errors while closing project.")

    logger.info("FINISHED with result {}".format(result))
    if not result:
        logger.error("FAILED TO COMPLETE!")

    if rootLogHandler is not None:
        rootLogHandler.close()