def test_getPathVariants(self): abs, rel = getPathVariants("/aaa/bbb/ccc/ddd.txt", "/aaa/bbb/ccc/eee") # assert abs == '/aaa/bbb/ccc/ddd.txt' # Use normpath to make sure this test works on windows... expected = os.path.normpath( os.path.join("/aaa/bbb/ccc/eee", "/aaa/bbb/ccc/ddd.txt")).replace("\\", "/") assert abs == expected, "{} != {}".format(abs, expected) assert rel == "../ddd.txt" abs, rel = getPathVariants("../ddd.txt", "/aaa/bbb/ccc/eee") # assert abs == '/aaa/bbb/ccc/ddd.txt' # Use normpath to make sure this test works on windows... assert abs == os.path.normpath( os.path.join("/aaa/bbb/ccc/eee", "../ddd.txt")).replace("\\", "/") assert rel == "../ddd.txt" abs, rel = getPathVariants("ddd.txt", "/aaa/bbb/ccc") # assert abs == '/aaa/bbb/ccc/ddd.txt' # Use normpath to make sure this test works on windows... assert abs == os.path.normpath(os.path.join("/aaa/bbb/ccc", "ddd.txt")).replace( "\\", "/") assert rel == "ddd.txt" assert getPathVariants("", "/abc") == ("/abc", ""), "{} != {}".format( getPathVariants("", "/abc"), ("/abc", ""))
def test_getPathVariants(self): abs, rel = getPathVariants('/aaa/bbb/ccc/ddd.txt', '/aaa/bbb/ccc/eee') #assert abs == '/aaa/bbb/ccc/ddd.txt' # Use normpath to make sure this test works on windows... expected = os.path.normpath( os.path.join('/aaa/bbb/ccc/eee', '/aaa/bbb/ccc/ddd.txt')).replace('\\', '/') assert abs == expected, "{} != {}".format(abs, expected) assert rel == '../ddd.txt' abs, rel = getPathVariants('../ddd.txt', '/aaa/bbb/ccc/eee') #assert abs == '/aaa/bbb/ccc/ddd.txt' # Use normpath to make sure this test works on windows... assert abs == os.path.normpath( os.path.join('/aaa/bbb/ccc/eee', '../ddd.txt')).replace('\\', '/') assert rel == '../ddd.txt' abs, rel = getPathVariants('ddd.txt', '/aaa/bbb/ccc') #assert abs == '/aaa/bbb/ccc/ddd.txt' # Use normpath to make sure this test works on windows... assert abs == os.path.normpath(os.path.join('/aaa/bbb/ccc', 'ddd.txt')).replace( '\\', '/') assert rel == 'ddd.txt' assert getPathVariants('', '/abc') == ('/abc', ''), \ "{} != {}".format( getPathVariants('', '/abc'), ('/abc', '') )
def updateWorkingDirectory(self,newpath,oldpath): newdir = PathComponents(newpath).externalDirectory olddir = PathComponents(oldpath).externalDirectory if newdir==olddir: return # Disconnect the working directory while we make these changes. # All the changes will take effect when we set the new working directory. self.topLevelOperator.WorkingDirectory.disconnect() for laneIndex, multislot in enumerate(self.topLevelOperator.DatasetGroup): for roleIndex, slot in enumerate(multislot): if not slot.ready(): # Skip if there is no dataset in this lane/role combination yet. continue datasetInfo = slot.value if datasetInfo.location == DatasetInfo.Location.FileSystem: #construct absolute path and recreate relative to the new path fp = PathComponents(datasetInfo.filePath,olddir).totalPath() abspath, relpath = getPathVariants(fp,newdir) # Same convention as in dataSelectionGui: # Relative by default, unless the file is in a totally different tree from the working directory. if relpath is not None and len(os.path.commonprefix([fp, abspath])) > 1: datasetInfo.filePath = relpath else: datasetInfo.filePath = abspath slot.setValue(datasetInfo, check_changed=False) self.topLevelOperator.WorkingDirectory.setValue(newdir) self._projectFilePath = newdir
def _createDatasetInfo(self, roleIndex, filePath, roi): """ Create a DatasetInfo object for the given filePath and roi. roi may be None, in which case it is ignored. """ datasetInfo = DatasetInfo() if roi is not None: datasetInfo.subvolume_roi = roi cwd = self.topLevelOperator.WorkingDirectory.value absPath, relPath = getPathVariants(filePath, cwd) # Relative by default, unless the file is in a totally different tree from the working directory. if relPath is not None and len(os.path.commonprefix([cwd, absPath ])) > 1: datasetInfo.filePath = relPath else: datasetInfo.filePath = absPath datasetInfo.nickname = PathComponents(absPath).filenameBase h5Exts = ['.ilp', '.h5', '.hdf5'] if os.path.splitext(datasetInfo.filePath)[1] in h5Exts: datasetNames = self.getPossibleInternalPaths(absPath) if len(datasetNames) == 0: raise RuntimeError("HDF5 file %s has no image datasets" % datasetInfo.filePath) elif len(datasetNames) == 1: datasetInfo.filePath += str(datasetNames[0]) else: # If exactly one of the file's datasets matches a user's previous choice, use it. if roleIndex not in self._default_h5_volumes: self._default_h5_volumes[roleIndex] = set() previous_selections = self._default_h5_volumes[roleIndex] possible_auto_selections = previous_selections.intersection( datasetNames) if len(possible_auto_selections) == 1: datasetInfo.filePath += str( list(possible_auto_selections)[0]) else: # Ask the user which dataset to choose dlg = H5VolumeSelectionDlg(datasetNames, self) if dlg.exec_() == QDialog.Accepted: selected_index = dlg.combo.currentIndex() selected_dataset = str(datasetNames[selected_index]) datasetInfo.filePath += selected_dataset self._default_h5_volumes[roleIndex].add( selected_dataset) else: raise DataSelectionGui.UserCancelledError() # Allow labels by default if this gui isn't being used for batch data. datasetInfo.allowLabels = (self.guiMode == GuiMode.Normal) return datasetInfo
def execute(self, slot, subindex, roi, result): dtypeBytes = self._getDtypeBytes() totalBytes = dtypeBytes * numpy.prod(self.Input.meta.shape) totalMB = totalBytes / (1000*1000) logger.info( "Clusterizing computation of {} MB dataset, outputting according to {}".format(totalMB, self.OutputDatasetDescription.value) ) configFilePath = self.ConfigFilePath.value self._config = parseClusterConfigFile( configFilePath ) self._validateConfig() # Create the destination file if necessary blockwiseFileset, taskInfos = self._prepareDestination() try: # Figure out which work doesn't need to be recomputed (if any) unneeded_rois = [] for roi in taskInfos.keys(): if blockwiseFileset.getBlockStatus(roi[0]) == BlockwiseFileset.BLOCK_AVAILABLE \ or blockwiseFileset.isBlockLocked(roi[0]): # We don't attempt to process currently locked blocks. unneeded_rois.append( roi ) # Remove any tasks that we don't need to compute (they were finished in a previous run) for roi in unneeded_rois: logger.info( "No need to run task: {} for roi: {}".format( taskInfos[roi].taskName, roi ) ) del taskInfos[roi] absWorkDir, _ = getPathVariants(self._config.server_working_directory, os.path.split( configFilePath )[0] ) if self._config.task_launch_server == "localhost": def localCommand( cmd ): cwd = os.getcwd() os.chdir( absWorkDir ) subprocess.call( cmd, shell=True ) os.chdir( cwd ) launchFunc = localCommand else: # We use fabric for executing remote tasks # Import it here because it isn't required that the nodes can use it. import fabric.api as fab @fab.hosts( self._config.task_launch_server ) def remoteCommand( cmd ): with fab.cd( absWorkDir ): fab.run( cmd ) launchFunc = functools.partial( fab.execute, remoteCommand ) # Spawn each task for taskInfo in taskInfos.values(): logger.info("Launching node task: " + taskInfo.command ) launchFunc( taskInfo.command ) # Return immediately. We do not attempt to monitor the task progress. result[0] = True return result finally: blockwiseFileset.close()
def test_getPathVariants(self): abs, rel = getPathVariants('/aaa/bbb/ccc/ddd.txt', '/aaa/bbb/ccc/eee') #assert abs == '/aaa/bbb/ccc/ddd.txt' # Use normpath to make sure this test works on windows... assert abs == os.path.normpath(os.path.join('/aaa/bbb/ccc/eee', '/aaa/bbb/ccc/ddd.txt')) assert rel == '../ddd.txt' abs, rel = getPathVariants('../ddd.txt', '/aaa/bbb/ccc/eee') #assert abs == '/aaa/bbb/ccc/ddd.txt' # Use normpath to make sure this test works on windows... assert abs == os.path.normpath(os.path.join('/aaa/bbb/ccc/eee', '../ddd.txt')) assert rel == '../ddd.txt' abs, rel = getPathVariants('ddd.txt', '/aaa/bbb/ccc') #assert abs == '/aaa/bbb/ccc/ddd.txt' # Use normpath to make sure this test works on windows... assert abs == os.path.normpath(os.path.join('/aaa/bbb/ccc', 'ddd.txt')) assert rel == 'ddd.txt' assert getPathVariants('', '/abc') == ('/abc', '')
def test_getPathVariants(self): abs, rel = getPathVariants("/aaa/bbb/ccc/ddd.txt", "/aaa/bbb/ccc/eee") # assert abs == '/aaa/bbb/ccc/ddd.txt' # Use normpath to make sure this test works on windows... expected = os.path.normpath(os.path.join("/aaa/bbb/ccc/eee", "/aaa/bbb/ccc/ddd.txt")).replace("\\", "/") assert abs == expected, "{} != {}".format(abs, expected) assert rel == "../ddd.txt" abs, rel = getPathVariants("../ddd.txt", "/aaa/bbb/ccc/eee") # assert abs == '/aaa/bbb/ccc/ddd.txt' # Use normpath to make sure this test works on windows... assert abs == os.path.normpath(os.path.join("/aaa/bbb/ccc/eee", "../ddd.txt")).replace("\\", "/") assert rel == "../ddd.txt" abs, rel = getPathVariants("ddd.txt", "/aaa/bbb/ccc") # assert abs == '/aaa/bbb/ccc/ddd.txt' # Use normpath to make sure this test works on windows... assert abs == os.path.normpath(os.path.join("/aaa/bbb/ccc", "ddd.txt")).replace("\\", "/") assert rel == "ddd.txt" assert getPathVariants("", "/abc") == ("/abc", ""), "{} != {}".format(getPathVariants("", "/abc"), ("/abc", ""))
def _createDatasetInfo(self, roleIndex, filePath, roi): """ Create a DatasetInfo object for the given filePath and roi. roi may be None, in which case it is ignored. """ datasetInfo = DatasetInfo() if roi is not None: datasetInfo.subvolume_roi = roi cwd = self.topLevelOperator.WorkingDirectory.value absPath, relPath = getPathVariants(filePath, cwd) # Relative by default, unless the file is in a totally different tree from the working directory. if relPath is not None and len(os.path.commonprefix([cwd, absPath])) > 1: datasetInfo.filePath = relPath else: datasetInfo.filePath = absPath datasetInfo.nickname = PathComponents(absPath).filenameBase h5Exts = ['.ilp', '.h5', '.hdf5'] if os.path.splitext(datasetInfo.filePath)[1] in h5Exts: datasetNames = self.getPossibleInternalPaths( absPath ) if len(datasetNames) == 0: raise RuntimeError("HDF5 file %s has no image datasets" % datasetInfo.filePath) elif len(datasetNames) == 1: datasetInfo.filePath += str(datasetNames[0]) else: # If exactly one of the file's datasets matches a user's previous choice, use it. if roleIndex not in self._default_h5_volumes: self._default_h5_volumes[roleIndex] = set() previous_selections = self._default_h5_volumes[roleIndex] possible_auto_selections = previous_selections.intersection(datasetNames) if len(possible_auto_selections) == 1: datasetInfo.filePath += str(list(possible_auto_selections)[0]) else: # Ask the user which dataset to choose dlg = H5VolumeSelectionDlg(datasetNames, self) if dlg.exec_() == QDialog.Accepted: selected_index = dlg.combo.currentIndex() selected_dataset = str(datasetNames[selected_index]) datasetInfo.filePath += selected_dataset self._default_h5_volumes[roleIndex].add( selected_dataset ) else: raise DataSelectionGui.UserCancelledError() # Allow labels by default if this gui isn't being used for batch data. datasetInfo.allowLabels = ( self.guiMode == GuiMode.Normal ) return datasetInfo
def _prepareTaskInfos(self, roiList): # Divide up the workload into large pieces logger.info("Dividing into {} node jobs.".format(len(roiList))) taskInfos = collections.OrderedDict() for roiIndex, roi in enumerate(roiList): roi = (tuple(roi[0]), tuple(roi[1])) taskInfo = OpClusterize.TaskInfo() taskInfo.subregion = SubRegion(None, start=roi[0], stop=roi[1]) taskName = "J{:02}".format(roiIndex) commandArgs = [] commandArgs.append("--option_config_file=" + self.ConfigFilePath.value) commandArgs.append("--project=" + self.ProjectFilePath.value) commandArgs.append("--_node_work_=\"" + Roi.dumps(taskInfo.subregion) + "\"") commandArgs.append("--process_name={}".format(taskName)) commandArgs.append("--output_description_file={}".format( self.OutputDatasetDescription.value)) # Check the command format string: We need to know where to put our args... commandFormat = self._config.command_format assert commandFormat.find("{task_args}") != -1 # Output log directory might be a relative path (relative to config file) absLogDir, _ = getPathVariants( self._config.output_log_directory, os.path.split(self.ConfigFilePath.value)[0]) if not os.path.exists(absLogDir): os.makedirs(absLogDir) taskOutputLogFilename = taskName + ".log" taskOutputLogPath = os.path.join(absLogDir, taskOutputLogFilename) allArgs = " " + " ".join(commandArgs) + " " taskInfo.taskName = taskName taskInfo.command = commandFormat.format( task_args=allArgs, task_name=taskName, task_output_file=taskOutputLogPath) taskInfos[roi] = taskInfo return taskInfos
def _prepareTaskInfos(self, roiList): # Divide up the workload into large pieces logger.info( "Dividing into {} node jobs.".format( len(roiList) ) ) taskInfos = collections.OrderedDict() for roiIndex, roi in enumerate(roiList): roi = ( tuple(roi[0]), tuple(roi[1]) ) taskInfo = OpClusterize.TaskInfo() taskInfo.subregion = SubRegion( None, start=roi[0], stop=roi[1] ) taskName = "J{:02}".format(roiIndex) commandArgs = [] commandArgs.append( "--option_config_file=" + self.ConfigFilePath.value ) commandArgs.append( "--project=" + self.ProjectFilePath.value ) commandArgs.append( "--_node_work_=\"" + Roi.dumps( taskInfo.subregion ) + "\"" ) commandArgs.append( "--process_name={}".format(taskName) ) commandArgs.append( "--output_description_file={}".format( self.OutputDatasetDescription.value ) ) for slot in self.SecondaryOutputDescriptions: commandArgs.append( "--secondary_output_description_file={}".format( slot.value ) ) # Check the command format string: We need to know where to put our args... commandFormat = self._config.command_format assert commandFormat.find("{task_args}") != -1 # Output log directory might be a relative path (relative to config file) absLogDir, _ = getPathVariants(self._config.output_log_directory, os.path.split( self.ConfigFilePath.value )[0] ) taskOutputLogFilename = taskName + ".log" taskOutputLogPath = os.path.join( absLogDir, taskOutputLogFilename ) allArgs = " " + " ".join(commandArgs) + " " taskInfo.taskName = taskName taskInfo.command = commandFormat.format( task_args=allArgs, task_name=taskName, task_output_file=taskOutputLogPath ) taskInfos[roi] = taskInfo return taskInfos
def _readDatasetInfo(self, infoGroup, localDataGroup, projectFilePath, headless): # Unready datasets are represented with an empty group. if len( infoGroup ) == 0: return None, False datasetInfo = DatasetInfo() # Make a reverse-lookup of the location storage strings LocationLookup = { v:k for k,v in self.LocationStrings.items() } datasetInfo.location = LocationLookup[ str(infoGroup['location'].value) ] # Write to the 'private' members to avoid resetting the dataset id datasetInfo._filePath = infoGroup['filePath'].value datasetInfo._datasetId = infoGroup['datasetId'].value try: datasetInfo.allowLabels = infoGroup['allowLabels'].value except KeyError: pass try: datasetInfo.drange = tuple( infoGroup['drange'].value ) except KeyError: pass try: datasetInfo.nickname = infoGroup['nickname'].value except KeyError: datasetInfo.nickname = PathComponents(datasetInfo.filePath).filenameBase try: tags = vigra.AxisTags.fromJSON( infoGroup['axistags'].value ) datasetInfo.axistags = tags except KeyError: # Old projects just have an 'axisorder' field instead of full axistags try: axisorder = infoGroup['axisorder'].value datasetInfo.axistags = vigra.defaultAxistags(axisorder) except KeyError: pass # If the data is supposed to be in the project, # check for it now. if datasetInfo.location == DatasetInfo.Location.ProjectInternal: if not datasetInfo.datasetId in localDataGroup.keys(): raise RuntimeError("Corrupt project file. Could not find data for " + infoGroup.name) dirty = False # If the data is supposed to exist outside the project, make sure it really does. if datasetInfo.location == DatasetInfo.Location.FileSystem and not isUrl(datasetInfo.filePath): pathData = PathComponents( datasetInfo.filePath, os.path.split(projectFilePath)[0]) filePath = pathData.externalPath if not os.path.exists(filePath): if headless: raise RuntimeError("Could not find data at " + filePath) filt = "Image files (" + ' '.join('*.' + x for x in OpDataSelection.SupportedExtensions) + ')' newpath = self.repairFile(filePath, filt) if pathData.internalPath is not None: newpath += pathData.internalPath datasetInfo._filePath = getPathVariants(newpath , os.path.split(projectFilePath)[0])[0] dirty = True return datasetInfo, dirty
def addFileNames(self, fileNames, roleIndex, startingLane=None): """ Add the given filenames to both the GUI table and the top-level operator inputs. If startingLane is None, the filenames will be *appended* to the role's list of files. """ infos = [] if startingLane is None or startingLane == -1: startingLane = len(self.topLevelOperator.DatasetGroup) endingLane = startingLane+len(fileNames)-1 else: assert startingLane < len(self.topLevelOperator.DatasetGroup) max_files = len(self.topLevelOperator.DatasetGroup) - \ startingLane if len(fileNames) > max_files: msg = "You selected {num_selected} files for {num_slots} "\ "slots. To add new files use the 'Add new...' option "\ "in the context menu or the button in the last row."\ .format(num_selected=len(fileNames), num_slots=max_files) QMessageBox.critical( self, "Too many files", msg ) return endingLane = min(startingLane+len(fileNames)-1, len(self.topLevelOperator.DatasetGroup)) if self._max_lanes and endingLane >= self._max_lanes: msg = "You may not add more than {} file(s) to this workflow. Please try again.".format( self._max_lanes ) QMessageBox.critical( self, "Too many files", msg ) return # Assign values to the new inputs we just allocated. # The GUI will be updated by callbacks that are listening to slot changes for i, filePath in enumerate(fileNames): datasetInfo = DatasetInfo() cwd = self.topLevelOperator.WorkingDirectory.value absPath, relPath = getPathVariants(filePath, cwd) # Relative by default, unless the file is in a totally different tree from the working directory. if relPath is not None and len(os.path.commonprefix([cwd, absPath])) > 1: datasetInfo.filePath = relPath else: datasetInfo.filePath = absPath datasetInfo.nickname = PathComponents(absPath).filenameBase h5Exts = ['.ilp', '.h5', '.hdf5'] if os.path.splitext(datasetInfo.filePath)[1] in h5Exts: datasetNames = self.getPossibleInternalPaths( absPath ) if len(datasetNames) > 0: datasetInfo.filePath += str(datasetNames[0]) else: raise RuntimeError("HDF5 file %s has no image datasets" % datasetInfo.filePath) # Allow labels by default if this gui isn't being used for batch data. datasetInfo.allowLabels = ( self.guiMode == GuiMode.Normal ) infos.append(datasetInfo) # if no exception was thrown, set up the operator now opTop = self.topLevelOperator originalSize = len(opTop.DatasetGroup) if len( opTop.DatasetGroup ) < endingLane+1: opTop.DatasetGroup.resize( endingLane+1 ) for laneIndex, info in zip(range(startingLane, endingLane+1), infos): try: self.topLevelOperator.DatasetGroup[laneIndex][roleIndex].setValue( info ) except DatasetConstraintError as ex: return_val = [False] # Give the user a chance to fix the problem self.handleDatasetConstraintError(info, info.filePath, ex, roleIndex, laneIndex, return_val) if not return_val[0]: # Not successfully repaired. Roll back the changes and give up. opTop.DatasetGroup.resize( originalSize ) break except OpDataSelection.InvalidDimensionalityError as ex: opTop.DatasetGroup.resize( originalSize ) QMessageBox.critical( self, "Dataset has different dimensionality", ex.message ) break except: QMessageBox.critical( self, "Dataset Load Error", "Wasn't able to load your dataset into the workflow. See console for details." ) opTop.DatasetGroup.resize( originalSize ) raise # If we succeeded in adding all images, show the first one. if laneIndex == endingLane: self.showDataset(startingLane, roleIndex) # Notify the workflow that something that could affect applet readyness has occurred. self.parentApplet.appletStateUpdateRequested.emit() self.updateInternalPathVisiblity()
def runWorkflow(parsed_args): args = parsed_args # Read the config file configFilePath = args.option_config_file config = parseClusterConfigFile( configFilePath ) # If we've got a process name, re-initialize the logger from scratch task_name = "node" if args.process_name is not None: task_name = args.process_name ilastik.ilastik_logging.default_config.init(args.process_name + ' ') rootLogHandler = None if args._node_work_ is None: # This is the master process. # Tee the log to a file for future reference. # Output log directory might be a relative path (relative to config file) absLogDir, _ = getPathVariants(config.output_log_directory, os.path.split( configFilePath )[0] ) if not os.path.exists(absLogDir): os.mkdir(absLogDir) # Copy the config we're using to the output directory shutil.copy(configFilePath, absLogDir) logFile = os.path.join( absLogDir, "MASTER.log" ) logFileFormatter = logging.Formatter("%(levelname)s %(name)s: %(message)s") rootLogHandler = logging.FileHandler(logFile, 'a') rootLogHandler.setFormatter(logFileFormatter) rootLogger = logging.getLogger() rootLogger.addHandler( rootLogHandler ) logger.info( "Launched with sys.argv: {}".format( sys.argv ) ) # Update the monkey_patch settings ilastik.monkey_patches.apply_setting_dict( config.__dict__ ) # If we're running a node job, set the threadpool size if the user specified one. # Note that the main thread does not count toward the threadpool total. if args._node_work_ is not None and config.task_threadpool_size is not None: lazyflow.request.Request.reset_thread_pool( num_workers = config.task_threadpool_size ) # Make sure project file exists. if not os.path.exists(args.project): raise RuntimeError("Project file '" + args.project + "' does not exist.") # Instantiate 'shell' shell = HeadlessShell( functools.partial(Workflow.getSubclass(config.workflow_type) ) ) # Load project (auto-import it if necessary) logger.info("Opening project: '" + args.project + "'") shell.openProjectPath(args.project) workflow = shell.projectManager.workflow # Attach cluster operators resultSlot = None finalOutputSlot = workflow.getHeadlessOutputSlot( config.output_slot_id ) assert finalOutputSlot is not None secondaryOutputSlots = workflow.getSecondaryHeadlessOutputSlots( config.output_slot_id ) secondaryOutputDescriptions = args.secondary_output_description_file # This is a list (see 'action' above) if len(secondaryOutputDescriptions) != len(secondaryOutputSlots): raise RuntimeError( "This workflow produces exactly {} SECONDARY outputs. You provided {}.".format( len(secondaryOutputSlots), len(secondaryOutputDescriptions) ) ) clusterOperator = None try: if args._node_work_ is not None: # We're doing node work opClusterTaskWorker = OperatorWrapper( OpTaskWorker, parent=finalOutputSlot.getRealOperator().parent ) # FIXME: Image index is hard-coded as 0. We assume we are working with only one (big) dataset in cluster mode. opClusterTaskWorker.Input.connect( finalOutputSlot ) opClusterTaskWorker.RoiString[0].setValue( args._node_work_ ) opClusterTaskWorker.TaskName.setValue( task_name ) opClusterTaskWorker.ConfigFilePath.setValue( args.option_config_file ) # Configure optional slots first for efficiency (avoid multiple calls to setupOutputs) opClusterTaskWorker.SecondaryInputs[0].resize( len( secondaryOutputSlots ) ) opClusterTaskWorker.SecondaryOutputDescriptions[0].resize( len( secondaryOutputSlots ) ) for i in range( len(secondaryOutputSlots) ): opClusterTaskWorker.SecondaryInputs[0][i].connect( secondaryOutputSlots[i][0] ) opClusterTaskWorker.SecondaryOutputDescriptions[0][i].setValue( secondaryOutputDescriptions[i] ) opClusterTaskWorker.OutputFilesetDescription.setValue( args.output_description_file ) # If we have a way to report task progress (e.g. by updating the job name), # then subscribe to progress signals if config.task_progress_update_command is not None: def report_progress( progress ): cmd = config.task_progress_update_command.format( progress=int(progress) ) def shell_call(shell_cmd): logger.debug( "Executing progress command: " + cmd ) subprocess.call( shell_cmd, shell=True ) background_tasks.put( functools.partial( shell_call, cmd ) ) opClusterTaskWorker.innerOperators[0].progressSignal.subscribe( report_progress ) resultSlot = opClusterTaskWorker.ReturnCode clusterOperator = opClusterTaskWorker else: # We're the master opClusterizeMaster = OperatorWrapper( OpClusterize, parent=finalOutputSlot.getRealOperator().parent ) opClusterizeMaster.Input.connect( finalOutputSlot ) opClusterizeMaster.ProjectFilePath.setValue( args.project ) opClusterizeMaster.OutputDatasetDescription.setValue( args.output_description_file ) # Configure optional slots first for efficiency (avoid multiple calls to setupOutputs) opClusterizeMaster.SecondaryInputs[0].resize( len( secondaryOutputSlots ) ) opClusterizeMaster.SecondaryOutputDescriptions[0].resize( len( secondaryOutputSlots ) ) for i in range( len(secondaryOutputSlots) ): opClusterizeMaster.SecondaryInputs[0][i].connect( secondaryOutputSlots[i][0] ) opClusterizeMaster.SecondaryOutputDescriptions[0][i].setValue( secondaryOutputDescriptions[i] ) opClusterizeMaster.ConfigFilePath.setValue( args.option_config_file ) resultSlot = opClusterizeMaster.ReturnCode clusterOperator = opClusterizeMaster # Get the result logger.info("Starting task") result = resultSlot[0].value # FIXME: The image index is hard-coded here. finally: logger.info("Cleaning up") global stop_background_tasks stop_background_tasks = True try: if clusterOperator is not None: clusterOperator.cleanUp() except: logger.error("Errors during cleanup.") try: logger.info("Closing project...") shell.closeCurrentProject() except: logger.error("Errors while closing project.") logger.info("FINISHED with result {}".format(result)) if not result: logger.error( "FAILED TO COMPLETE!" ) if rootLogHandler is not None: rootLogHandler.close()
def _readDatasetInfo(self, infoGroup, localDataGroup, projectFilePath, headless): # Unready datasets are represented with an empty group. if len( infoGroup ) == 0: return None, False datasetInfo = DatasetInfo() # Make a reverse-lookup of the location storage strings LocationLookup = { v:k for k,v in list(self.LocationStrings.items()) } datasetInfo.location = LocationLookup[ infoGroup['location'].value.decode('utf-8') ] # Write to the 'private' members to avoid resetting the dataset id datasetInfo._filePath = infoGroup['filePath'].value.decode('utf-8') datasetInfo._datasetId = infoGroup['datasetId'].value.decode('utf-8') try: datasetInfo.allowLabels = infoGroup['allowLabels'].value except KeyError: pass try: datasetInfo.drange = tuple( infoGroup['drange'].value ) except KeyError: pass try: datasetInfo.laneShape = tuple(infoGroup['shape'].value) except KeyError: pass try: datasetInfo.laneDtype = numpy.dtype(infoGroup['dtype'].value.decode('utf-8')) except KeyError: pass try: datasetInfo.display_mode = infoGroup['display_mode'].value.decode('utf-8') except KeyError: pass try: datasetInfo.nickname = infoGroup['nickname'].value.decode('utf-8') except KeyError: datasetInfo.nickname = PathComponents(datasetInfo.filePath).filenameBase try: datasetInfo.fromstack = infoGroup['fromstack'].value except KeyError: # Guess based on the storage setting and original filepath datasetInfo.fromstack = ( datasetInfo.location == DatasetInfo.Location.ProjectInternal and ( ('?' in datasetInfo._filePath) or (os.path.pathsep in datasetInfo._filePath) ) ) try: tags = vigra.AxisTags.fromJSON( infoGroup['axistags'].value.decode('utf-8') ) datasetInfo.axistags = tags except KeyError: # Old projects just have an 'axisorder' field instead of full axistags try: axisorder = infoGroup['axisorder'].value.decode('utf-8') datasetInfo.axistags = vigra.defaultAxistags(axisorder) except KeyError: pass try: start, stop = list(map( tuple, infoGroup['subvolume_roi'].value )) datasetInfo.subvolume_roi = (start, stop) except KeyError: pass # If the data is supposed to be in the project, # check for it now. if datasetInfo.location == DatasetInfo.Location.ProjectInternal: if not datasetInfo.datasetId in list(localDataGroup.keys()): raise RuntimeError("Corrupt project file. Could not find data for " + infoGroup.name) dirty = False # If the data is supposed to exist outside the project, make sure it really does. if datasetInfo.location == DatasetInfo.Location.FileSystem \ and not isUrl(datasetInfo.filePath): pathData = PathComponents(datasetInfo.filePath, os.path.split(projectFilePath)[0]) filePath = pathData.externalPath if not os.path.exists(filePath): if headless: if self._shouldRetrain: raise RuntimeError( "Retrain was passed in headless mode, " "but could not find data at " + filePath) else: assert datasetInfo.laneShape, \ "Headless mode without raw data not supported in old (pre 1.3.2) project files" # Raw data does not exist in headless, use fake data provider datasetInfo.realDataSource = False else: # Try to get a new path for the lost file from the user filt = "Image files (" + ' '.join('*.' + x for x in OpDataSelection.SupportedExtensions) + ')' newpath = self.repairFile(filePath, filt) if pathData.internalPath is not None: newpath += pathData.internalPath datasetInfo._filePath = \ getPathVariants(newpath, os.path.split(projectFilePath)[0])[0] dirty = True return datasetInfo, dirty
def _readDatasetInfo(self, infoGroup, localDataGroup, projectFilePath, headless): # Unready datasets are represented with an empty group. if len( infoGroup ) == 0: return None, False datasetInfo = DatasetInfo() # Make a reverse-lookup of the location storage strings LocationLookup = { v:k for k,v in self.LocationStrings.items() } datasetInfo.location = LocationLookup[ str(infoGroup['location'].value) ] # Write to the 'private' members to avoid resetting the dataset id datasetInfo._filePath = infoGroup['filePath'].value datasetInfo._datasetId = infoGroup['datasetId'].value try: datasetInfo.allowLabels = infoGroup['allowLabels'].value except KeyError: pass try: datasetInfo.drange = tuple( infoGroup['drange'].value ) except KeyError: pass try: datasetInfo.nickname = infoGroup['nickname'].value except KeyError: datasetInfo.nickname = PathComponents(datasetInfo.filePath).filenameBase try: datasetInfo.fromstack = infoGroup['fromstack'].value except KeyError: # Guess based on the storage setting and original filepath datasetInfo.fromstack = ( datasetInfo.location == DatasetInfo.Location.ProjectInternal and ( ('?' in datasetInfo._filePath) or (os.path.pathsep in datasetInfo._filePath) ) ) try: tags = vigra.AxisTags.fromJSON( infoGroup['axistags'].value ) datasetInfo.axistags = tags except KeyError: # Old projects just have an 'axisorder' field instead of full axistags try: axisorder = infoGroup['axisorder'].value datasetInfo.axistags = vigra.defaultAxistags(axisorder) except KeyError: pass try: start, stop = map( tuple, infoGroup['subvolume_roi'].value ) datasetInfo.subvolume_roi = (start, stop) except KeyError: pass # If the data is supposed to be in the project, # check for it now. if datasetInfo.location == DatasetInfo.Location.ProjectInternal: if not datasetInfo.datasetId in localDataGroup.keys(): raise RuntimeError("Corrupt project file. Could not find data for " + infoGroup.name) dirty = False # If the data is supposed to exist outside the project, make sure it really does. if datasetInfo.location == DatasetInfo.Location.FileSystem and not isUrl(datasetInfo.filePath): pathData = PathComponents( datasetInfo.filePath, os.path.split(projectFilePath)[0]) filePath = pathData.externalPath if not os.path.exists(filePath): if headless: raise RuntimeError("Could not find data at " + filePath) filt = "Image files (" + ' '.join('*.' + x for x in OpDataSelection.SupportedExtensions) + ')' newpath = self.repairFile(filePath, filt) if pathData.internalPath is not None: newpath += pathData.internalPath datasetInfo._filePath = getPathVariants(newpath , os.path.split(projectFilePath)[0])[0] dirty = True return datasetInfo, dirty
def runWorkflow(parsed_args): args = parsed_args # Read the config file configFilePath = args.option_config_file config = parseClusterConfigFile(configFilePath) # If we've got a process name, re-initialize the logger from scratch task_name = "node" if args.process_name is not None: task_name = args.process_name ilastik.ilastik_logging.default_config.init(args.process_name + ' ') rootLogHandler = None if args._node_work_ is None: # This is the master process. # Tee the log to a file for future reference. # Output log directory might be a relative path (relative to config file) absLogDir, _ = getPathVariants(config.output_log_directory, os.path.split(configFilePath)[0]) if not os.path.exists(absLogDir): os.mkdir(absLogDir) # Copy the config we're using to the output directory shutil.copy(configFilePath, absLogDir) logFile = os.path.join(absLogDir, "MASTER.log") logFileFormatter = logging.Formatter( "%(levelname)s %(name)s: %(message)s") rootLogHandler = logging.FileHandler(logFile, 'a') rootLogHandler.setFormatter(logFileFormatter) rootLogger = logging.getLogger() rootLogger.addHandler(rootLogHandler) logger.info("Launched with sys.argv: {}".format(sys.argv)) # Update the monkey_patch settings ilastik.monkey_patches.apply_setting_dict(config.__dict__) # If we're running a node job, set the threadpool size if the user specified one. # Note that the main thread does not count toward the threadpool total. if args._node_work_ is not None and config.task_threadpool_size is not None: lazyflow.request.Request.reset_thread_pool( num_workers=config.task_threadpool_size) # Make sure project file exists. if not os.path.exists(args.project): raise RuntimeError("Project file '" + args.project + "' does not exist.") # Instantiate 'shell' shell = HeadlessShell( functools.partial(Workflow.getSubclass(config.workflow_type))) # Load project (auto-import it if necessary) logger.info("Opening project: '" + args.project + "'") shell.openProjectPath(args.project) workflow = shell.projectManager.workflow # Attach cluster operators resultSlot = None finalOutputSlot = workflow.getHeadlessOutputSlot(config.output_slot_id) assert finalOutputSlot is not None secondaryOutputSlots = workflow.getSecondaryHeadlessOutputSlots( config.output_slot_id) secondaryOutputDescriptions = args.secondary_output_description_file # This is a list (see 'action' above) if len(secondaryOutputDescriptions) != len(secondaryOutputSlots): raise RuntimeError( "This workflow produces exactly {} SECONDARY outputs. You provided {}." .format(len(secondaryOutputSlots), len(secondaryOutputDescriptions))) clusterOperator = None try: if args._node_work_ is not None: # We're doing node work opClusterTaskWorker = OperatorWrapper( OpTaskWorker, parent=finalOutputSlot.getRealOperator().parent) # FIXME: Image index is hard-coded as 0. We assume we are working with only one (big) dataset in cluster mode. opClusterTaskWorker.Input.connect(finalOutputSlot) opClusterTaskWorker.RoiString[0].setValue(args._node_work_) opClusterTaskWorker.TaskName.setValue(task_name) opClusterTaskWorker.ConfigFilePath.setValue( args.option_config_file) # Configure optional slots first for efficiency (avoid multiple calls to setupOutputs) opClusterTaskWorker.SecondaryInputs[0].resize( len(secondaryOutputSlots)) opClusterTaskWorker.SecondaryOutputDescriptions[0].resize( len(secondaryOutputSlots)) for i in range(len(secondaryOutputSlots)): opClusterTaskWorker.SecondaryInputs[0][i].connect( secondaryOutputSlots[i][0]) opClusterTaskWorker.SecondaryOutputDescriptions[0][i].setValue( secondaryOutputDescriptions[i]) opClusterTaskWorker.OutputFilesetDescription.setValue( args.output_description_file) # If we have a way to report task progress (e.g. by updating the job name), # then subscribe to progress signals if config.task_progress_update_command is not None: def report_progress(progress): cmd = config.task_progress_update_command.format( progress=int(progress)) def shell_call(shell_cmd): logger.debug("Executing progress command: " + cmd) subprocess.call(shell_cmd, shell=True) background_tasks.put(functools.partial(shell_call, cmd)) opClusterTaskWorker.innerOperators[0].progressSignal.subscribe( report_progress) resultSlot = opClusterTaskWorker.ReturnCode clusterOperator = opClusterTaskWorker else: # We're the master opClusterizeMaster = OperatorWrapper( OpClusterize, parent=finalOutputSlot.getRealOperator().parent) opClusterizeMaster.Input.connect(finalOutputSlot) opClusterizeMaster.ProjectFilePath.setValue(args.project) opClusterizeMaster.OutputDatasetDescription.setValue( args.output_description_file) # Configure optional slots first for efficiency (avoid multiple calls to setupOutputs) opClusterizeMaster.SecondaryInputs[0].resize( len(secondaryOutputSlots)) opClusterizeMaster.SecondaryOutputDescriptions[0].resize( len(secondaryOutputSlots)) for i in range(len(secondaryOutputSlots)): opClusterizeMaster.SecondaryInputs[0][i].connect( secondaryOutputSlots[i][0]) opClusterizeMaster.SecondaryOutputDescriptions[0][i].setValue( secondaryOutputDescriptions[i]) opClusterizeMaster.ConfigFilePath.setValue(args.option_config_file) resultSlot = opClusterizeMaster.ReturnCode clusterOperator = opClusterizeMaster # Get the result logger.info("Starting task") result = resultSlot[ 0].value # FIXME: The image index is hard-coded here. finally: logger.info("Cleaning up") global stop_background_tasks stop_background_tasks = True try: if clusterOperator is not None: clusterOperator.cleanUp() except: logger.error("Errors during cleanup.") try: logger.info("Closing project...") shell.closeCurrentProject() except: logger.error("Errors while closing project.") logger.info("FINISHED with result {}".format(result)) if not result: logger.error("FAILED TO COMPLETE!") if rootLogHandler is not None: rootLogHandler.close()