def execute(self, slot, subindex, ignored_roi, result): configFilePath = self.ConfigFilePath.value config = parseClusterConfigFile(configFilePath) blockwiseFileset = self._primaryBlockwiseFileset # Check axis compatibility inputAxes = self.Input.meta.getTaggedShape().keys() outputAxes = list(blockwiseFileset.description.axes) assert set(inputAxes) == set(outputAxes), \ "Output dataset has the wrong set of axes. Input axes: {}, Output axes: {}".format( "".join(inputAxes), "".join(outputAxes) ) roiString = self.RoiString.value roi = Roi.loads(roiString) if len(roi.start) != len(self.Input.meta.shape): assert False, "Task roi: {} is not valid for this input. Did the master launch this task correctly?".format( roiString) logger.info("Executing for roi: {}".format(roi)) if config.use_node_local_scratch: assert False, "FIXME." assert ( blockwiseFileset.getEntireBlockRoi(roi.start)[1] == roi.stop ).all( ), "Each task must execute exactly one full block. ({},{}) is not a valid block roi.".format( roi.start, roi.stop) assert self.Input.ready() # Convert the task subrequest shape dict into a shape for this dataset (and axisordering) subrequest_shape = map( lambda tag: config.task_subrequest_shape[tag.key], self.Input.meta.axistags) primary_subrequest_shape = self._primaryBlockwiseFileset.description.sub_block_shape if primary_subrequest_shape is not None: # If the output dataset specified a sub_block_shape, override the cluster config subrequest_shape = primary_subrequest_shape with Timer() as computeTimer: # Stream the data out to disk. streamer = BigRequestStreamer(self.Input, (roi.start, roi.stop), subrequest_shape, config.task_parallel_subrequests) streamer.progressSignal.subscribe(self.progressSignal) streamer.resultSignal.subscribe(self._handlePrimaryResultBlock) streamer.execute() # Now the block is ready. Update the status. blockwiseFileset.setBlockStatus(roi.start, BlockwiseFileset.BLOCK_AVAILABLE) logger.info("Finished task in {} seconds".format( computeTimer.seconds())) result[0] = True return result
def execute(self, slot, subindex, roi, result): dtypeBytes = self._getDtypeBytes() totalBytes = dtypeBytes * numpy.prod(self.Input.meta.shape) totalMB = totalBytes / (1000*1000) logger.info( "Clusterizing computation of {} MB dataset, outputting according to {}".format(totalMB, self.OutputDatasetDescription.value) ) configFilePath = self.ConfigFilePath.value self._config = parseClusterConfigFile( configFilePath ) self._validateConfig() # Create the destination file if necessary blockwiseFileset, taskInfos = self._prepareDestination() try: # Figure out which work doesn't need to be recomputed (if any) unneeded_rois = [] for roi in taskInfos.keys(): if blockwiseFileset.getBlockStatus(roi[0]) == BlockwiseFileset.BLOCK_AVAILABLE \ or blockwiseFileset.isBlockLocked(roi[0]): # We don't attempt to process currently locked blocks. unneeded_rois.append( roi ) # Remove any tasks that we don't need to compute (they were finished in a previous run) for roi in unneeded_rois: logger.info( "No need to run task: {} for roi: {}".format( taskInfos[roi].taskName, roi ) ) del taskInfos[roi] absWorkDir, _ = getPathVariants(self._config.server_working_directory, os.path.split( configFilePath )[0] ) if self._config.task_launch_server == "localhost": def localCommand( cmd ): cwd = os.getcwd() os.chdir( absWorkDir ) subprocess.call( cmd, shell=True ) os.chdir( cwd ) launchFunc = localCommand else: # We use fabric for executing remote tasks # Import it here because it isn't required that the nodes can use it. import fabric.api as fab @fab.hosts( self._config.task_launch_server ) def remoteCommand( cmd ): with fab.cd( absWorkDir ): fab.run( cmd ) launchFunc = functools.partial( fab.execute, remoteCommand ) # Spawn each task for taskInfo in taskInfos.values(): logger.info("Launching node task: " + taskInfo.command ) launchFunc( taskInfo.command ) # Return immediately. We do not attempt to monitor the task progress. result[0] = True return result finally: blockwiseFileset.close()
def execute(self, slot, subindex, ignored_roi, result): configFilePath = self.ConfigFilePath.value config = parseClusterConfigFile( configFilePath ) blockwiseFileset = self._primaryBlockwiseFileset # Check axis compatibility inputAxes = self.Input.meta.getTaggedShape().keys() outputAxes = list(blockwiseFileset.description.axes) assert set(inputAxes) == set(outputAxes), \ "Output dataset has the wrong set of axes. Input axes: {}, Output axes: {}".format( "".join(inputAxes), "".join(outputAxes) ) roiString = self.RoiString.value roi = Roi.loads(roiString) if len( roi.start ) != len( self.Input.meta.shape ): assert False, "Task roi: {} is not valid for this input. Did the master launch this task correctly?".format( roiString ) logger.info( "Executing for roi: {}".format(roi) ) if config.use_node_local_scratch: assert False, "FIXME." assert (blockwiseFileset.getEntireBlockRoi( roi.start )[1] == roi.stop).all(), "Each task must execute exactly one full block. ({},{}) is not a valid block roi.".format( roi.start, roi.stop ) assert self.Input.ready() # Convert the task subrequest shape dict into a shape for this dataset (and axisordering) subrequest_shape = map( lambda tag: config.task_subrequest_shape[tag.key], self.Input.meta.axistags ) primary_subrequest_shape = self._primaryBlockwiseFileset.description.sub_block_shape if primary_subrequest_shape is not None: # If the output dataset specified a sub_block_shape, override the cluster config subrequest_shape = primary_subrequest_shape with Timer() as computeTimer: # Stream the data out to disk. streamer = BigRequestStreamer(self.Input, (roi.start, roi.stop), subrequest_shape, config.task_parallel_subrequests ) streamer.progressSignal.subscribe( self.progressSignal ) streamer.resultSignal.subscribe( self._handlePrimaryResultBlock ) streamer.execute() # Now the block is ready. Update the status. blockwiseFileset.setBlockStatus( roi.start, BlockwiseFileset.BLOCK_AVAILABLE ) logger.info( "Finished task in {} seconds".format( computeTimer.seconds() ) ) result[0] = True return result
def runWorkflow(cluster_args): ilastik_main_args = ilastik_main.parse_args([]) # Copy relevant args from cluster cmdline options to ilastik_main cmdline options ilastik_main_args.headless = True ilastik_main_args.project = cluster_args.project ilastik_main_args.process_name = cluster_args.process_name # Nodes should not write to a common logfile. # Override with /dev/null if cluster_args._node_work_ is None: ilastik_main_args.logfile = cluster_args.logfile else: ilastik_main_args.logfile = "/dev/null" assert cluster_args.project is not None, "Didn't get a project file." # Read the config file configFilePath = cluster_args.option_config_file config = parseClusterConfigFile(configFilePath) # Update the monkey_patch settings ilastik.monkey_patches.apply_setting_dict(config.__dict__) # Configure the thread count. # Nowadays, this is done via an environment variable setting for ilastik_main to detect. if cluster_args._node_work_ is not None and config.task_threadpool_size is not None: os.environ["LAZYFLOW_THREADS"] = str(config.task_threadpool_size) if cluster_args._node_work_ is not None and config.task_total_ram_mb is not None: os.environ["LAZYFLOW_TOTAL_RAM_MB"] = str(config.task_total_ram_mb) # Instantiate 'shell' by calling ilastik_main with our shell = ilastik_main.main(ilastik_main_args) workflow = shell.projectManager.workflow # Attach cluster operators resultSlot = None finalOutputSlot = workflow.getHeadlessOutputSlot(config.output_slot_id) assert finalOutputSlot is not None clusterOperator = None try: if cluster_args._node_work_ is not None: clusterOperator, resultSlot = prepare_node_cluster_operator( config, cluster_args, finalOutputSlot) else: clusterOperator, resultSlot = prepare_master_cluster_operator( cluster_args, finalOutputSlot) # Get the result logger.info("Starting task") result = resultSlot[ 0].value # FIXME: The image index is hard-coded here. finally: logger.info("Cleaning up") global stop_background_tasks stop_background_tasks = True try: if clusterOperator is not None: clusterOperator.cleanUp() except: logger.error("Errors during cleanup.") try: logger.info("Closing project...") shell.closeCurrentProject() except: logger.error("Errors while closing project.") logger.info("FINISHED with result {}".format(result)) if not result: logger.error("FAILED TO COMPLETE!")
def runWorkflow(cluster_args): ilastik_main_args = ilastik_main.parser.parse_args([]) # Copy relevant args from cluster cmdline options to ilastik_main cmdline options ilastik_main_args.headless = True ilastik_main_args.project = cluster_args.project ilastik_main_args.process_name = cluster_args.process_name # Nodes should not write to a common logfile. # Override with /dev/null if cluster_args._node_work_ is None: ilastik_main_args.logfile = cluster_args.logfile else: ilastik_main_args.logfile = "/dev/null" assert cluster_args.project is not None, "Didn't get a project file." # Read the config file configFilePath = cluster_args.option_config_file config = parseClusterConfigFile( configFilePath ) # Update the monkey_patch settings ilastik.monkey_patches.apply_setting_dict( config.__dict__ ) # Configure the thread count. # Nowadays, this is done via an environment variable setting for ilastik_main to detect. if cluster_args._node_work_ is not None and config.task_threadpool_size is not None: os.environ["LAZYFLOW_THREADS"] = str(config.task_threadpool_size) if cluster_args._node_work_ is not None and config.task_total_ram_mb is not None: os.environ["LAZYFLOW_TOTAL_RAM_MB"] = str(config.task_total_ram_mb) # Instantiate 'shell' by calling ilastik_main with our shell = ilastik_main.main( ilastik_main_args ) workflow = shell.projectManager.workflow # Attach cluster operators resultSlot = None finalOutputSlot = workflow.getHeadlessOutputSlot( config.output_slot_id ) assert finalOutputSlot is not None clusterOperator = None try: if cluster_args._node_work_ is not None: clusterOperator, resultSlot = prepare_node_cluster_operator(config, cluster_args, finalOutputSlot) else: clusterOperator, resultSlot = prepare_master_cluster_operator(cluster_args, finalOutputSlot) # Get the result logger.info("Starting task") result = resultSlot[0].value # FIXME: The image index is hard-coded here. finally: logger.info("Cleaning up") global stop_background_tasks stop_background_tasks = True try: if clusterOperator is not None: clusterOperator.cleanUp() except: logger.error("Errors during cleanup.") try: logger.info("Closing project...") shell.closeCurrentProject() except: logger.error("Errors while closing project.") logger.info("FINISHED with result {}".format(result)) if not result: logger.error( "FAILED TO COMPLETE!" )
def runWorkflow(parsed_args): args = parsed_args # Read the config file configFilePath = args.option_config_file config = parseClusterConfigFile( configFilePath ) # If we've got a process name, re-initialize the logger from scratch task_name = "node" if args.process_name is not None: task_name = args.process_name ilastik.ilastik_logging.default_config.init(args.process_name + ' ') rootLogHandler = None if args._node_work_ is None: # This is the master process. # Tee the log to a file for future reference. # Output log directory might be a relative path (relative to config file) absLogDir, relLogDir = getPathVariants(config.output_log_directory, os.path.split( configFilePath )[0] ) if not os.path.exists(absLogDir): os.mkdir(absLogDir) # Copy the config we're using to the output directory shutil.copy(configFilePath, absLogDir) logFile = os.path.join( absLogDir, "MASTER.log" ) logFileFormatter = logging.Formatter("%(levelname)s %(name)s: %(message)s") rootLogHandler = logging.FileHandler(logFile, 'a') rootLogHandler.setFormatter(logFileFormatter) rootLogger = logging.getLogger() rootLogger.addHandler( rootLogHandler ) logger.info( "Launched with sys.argv: {}".format( sys.argv ) ) # Update the monkey_patch settings ilastik.utility.monkey_patches.apply_setting_dict( config.__dict__ ) # If we're running a node job, set the threadpool size if the user specified one. # Note that the main thread does not count toward the threadpool total. if args._node_work_ is not None and config.task_threadpool_size is not None: lazyflow.request.Request.reset_thread_pool( num_workers = config.task_threadpool_size ) # Make sure project file exists. if not os.path.exists(args.project): raise RuntimeError("Project file '" + args.project + "' does not exist.") # Instantiate 'shell' shell = HeadlessShell( functools.partial(Workflow.getSubclass(config.workflow_type) ) ) # Load project (auto-import it if necessary) logger.info("Opening project: '" + args.project + "'") shell.openProjectPath(args.project) workflow = shell.projectManager.workflow # Attach cluster operators resultSlot = None finalOutputSlot = workflow.getHeadlessOutputSlot( config.output_slot_id ) assert finalOutputSlot is not None secondaryOutputSlots = workflow.getSecondaryHeadlessOutputSlots( config.output_slot_id ) secondaryOutputDescriptions = args.secondary_output_description_file # This is a list (see 'action' above) if len(secondaryOutputDescriptions) != len(secondaryOutputSlots): raise RuntimeError( "This workflow produces exactly {} SECONDARY outputs. You provided {}.".format( len(secondaryOutputSlots), len(secondaryOutputDescriptions) ) ) clusterOperator = None try: if args._node_work_ is not None: # We're doing node work opClusterTaskWorker = OperatorWrapper( OpTaskWorker, parent=finalOutputSlot.getRealOperator().parent ) # FIXME: Image index is hard-coded as 0. We assume we are working with only one (big) dataset in cluster mode. opClusterTaskWorker.Input.connect( finalOutputSlot ) opClusterTaskWorker.RoiString[0].setValue( args._node_work_ ) opClusterTaskWorker.TaskName.setValue( task_name ) opClusterTaskWorker.ConfigFilePath.setValue( args.option_config_file ) # Configure optional slots first for efficiency (avoid multiple calls to setupOutputs) opClusterTaskWorker.SecondaryInputs[0].resize( len( secondaryOutputSlots ) ) opClusterTaskWorker.SecondaryOutputDescriptions[0].resize( len( secondaryOutputSlots ) ) for i in range( len(secondaryOutputSlots) ): opClusterTaskWorker.SecondaryInputs[0][i].connect( secondaryOutputSlots[i][0] ) opClusterTaskWorker.SecondaryOutputDescriptions[0][i].setValue( secondaryOutputDescriptions[i] ) opClusterTaskWorker.OutputFilesetDescription.setValue( args.output_description_file ) # If we have a way to report task progress (e.g. by updating the job name), # then subscribe to progress signals if config.task_progress_update_command is not None: def report_progress( progress ): cmd = config.task_progress_update_command.format( progress=int(progress) ) def shell_call(shell_cmd): logger.debug( "Executing progress command: " + cmd ) subprocess.call( shell_cmd, shell=True ) background_tasks.put( functools.partial( shell_call, cmd ) ) opClusterTaskWorker.innerOperators[0].progressSignal.subscribe( report_progress ) resultSlot = opClusterTaskWorker.ReturnCode clusterOperator = opClusterTaskWorker else: # We're the master opClusterizeMaster = OperatorWrapper( OpClusterize, parent=finalOutputSlot.getRealOperator().parent ) opClusterizeMaster.Input.connect( finalOutputSlot ) opClusterizeMaster.ProjectFilePath.setValue( args.project ) opClusterizeMaster.OutputDatasetDescription.setValue( args.output_description_file ) # Configure optional slots first for efficiency (avoid multiple calls to setupOutputs) opClusterizeMaster.SecondaryInputs[0].resize( len( secondaryOutputSlots ) ) opClusterizeMaster.SecondaryOutputDescriptions[0].resize( len( secondaryOutputSlots ) ) for i in range( len(secondaryOutputSlots) ): opClusterizeMaster.SecondaryInputs[0][i].connect( secondaryOutputSlots[i][0] ) opClusterizeMaster.SecondaryOutputDescriptions[0][i].setValue( secondaryOutputDescriptions[i] ) opClusterizeMaster.ConfigFilePath.setValue( args.option_config_file ) resultSlot = opClusterizeMaster.ReturnCode clusterOperator = opClusterizeMaster # Get the result logger.info("Starting task") result = resultSlot[0].value # FIXME: The image index is hard-coded here. finally: logger.info("Cleaning up") global stop_background_tasks stop_background_tasks = True try: if clusterOperator is not None: clusterOperator.cleanUp() except: logger.error("Errors during cleanup.") try: logger.info("Closing project...") shell.closeCurrentProject() except: logger.error("Errors while closing project.") logger.info("FINISHED with result {}".format(result)) if not result: logger.error( "FAILED TO COMPLETE!" ) if rootLogHandler is not None: rootLogHandler.close()
def runWorkflow(parsed_args): args = parsed_args # Read the config file configFilePath = args.option_config_file config = parseClusterConfigFile(configFilePath) # If we've got a process name, re-initialize the logger from scratch task_name = "node" if args.process_name is not None: task_name = args.process_name ilastik.ilastik_logging.default_config.init(args.process_name + ' ') rootLogHandler = None if args._node_work_ is None: # This is the master process. # Tee the log to a file for future reference. # Output log directory might be a relative path (relative to config file) absLogDir, _ = getPathVariants(config.output_log_directory, os.path.split(configFilePath)[0]) if not os.path.exists(absLogDir): os.mkdir(absLogDir) # Copy the config we're using to the output directory shutil.copy(configFilePath, absLogDir) logFile = os.path.join(absLogDir, "MASTER.log") logFileFormatter = logging.Formatter( "%(levelname)s %(name)s: %(message)s") rootLogHandler = logging.FileHandler(logFile, 'a') rootLogHandler.setFormatter(logFileFormatter) rootLogger = logging.getLogger() rootLogger.addHandler(rootLogHandler) logger.info("Launched with sys.argv: {}".format(sys.argv)) # Update the monkey_patch settings ilastik.monkey_patches.apply_setting_dict(config.__dict__) # If we're running a node job, set the threadpool size if the user specified one. # Note that the main thread does not count toward the threadpool total. if args._node_work_ is not None and config.task_threadpool_size is not None: lazyflow.request.Request.reset_thread_pool( num_workers=config.task_threadpool_size) # Make sure project file exists. if not os.path.exists(args.project): raise RuntimeError("Project file '" + args.project + "' does not exist.") # Instantiate 'shell' shell = HeadlessShell( functools.partial(Workflow.getSubclass(config.workflow_type))) # Load project (auto-import it if necessary) logger.info("Opening project: '" + args.project + "'") shell.openProjectPath(args.project) workflow = shell.projectManager.workflow # Attach cluster operators resultSlot = None finalOutputSlot = workflow.getHeadlessOutputSlot(config.output_slot_id) assert finalOutputSlot is not None secondaryOutputSlots = workflow.getSecondaryHeadlessOutputSlots( config.output_slot_id) secondaryOutputDescriptions = args.secondary_output_description_file # This is a list (see 'action' above) if len(secondaryOutputDescriptions) != len(secondaryOutputSlots): raise RuntimeError( "This workflow produces exactly {} SECONDARY outputs. You provided {}." .format(len(secondaryOutputSlots), len(secondaryOutputDescriptions))) clusterOperator = None try: if args._node_work_ is not None: # We're doing node work opClusterTaskWorker = OperatorWrapper( OpTaskWorker, parent=finalOutputSlot.getRealOperator().parent) # FIXME: Image index is hard-coded as 0. We assume we are working with only one (big) dataset in cluster mode. opClusterTaskWorker.Input.connect(finalOutputSlot) opClusterTaskWorker.RoiString[0].setValue(args._node_work_) opClusterTaskWorker.TaskName.setValue(task_name) opClusterTaskWorker.ConfigFilePath.setValue( args.option_config_file) # Configure optional slots first for efficiency (avoid multiple calls to setupOutputs) opClusterTaskWorker.SecondaryInputs[0].resize( len(secondaryOutputSlots)) opClusterTaskWorker.SecondaryOutputDescriptions[0].resize( len(secondaryOutputSlots)) for i in range(len(secondaryOutputSlots)): opClusterTaskWorker.SecondaryInputs[0][i].connect( secondaryOutputSlots[i][0]) opClusterTaskWorker.SecondaryOutputDescriptions[0][i].setValue( secondaryOutputDescriptions[i]) opClusterTaskWorker.OutputFilesetDescription.setValue( args.output_description_file) # If we have a way to report task progress (e.g. by updating the job name), # then subscribe to progress signals if config.task_progress_update_command is not None: def report_progress(progress): cmd = config.task_progress_update_command.format( progress=int(progress)) def shell_call(shell_cmd): logger.debug("Executing progress command: " + cmd) subprocess.call(shell_cmd, shell=True) background_tasks.put(functools.partial(shell_call, cmd)) opClusterTaskWorker.innerOperators[0].progressSignal.subscribe( report_progress) resultSlot = opClusterTaskWorker.ReturnCode clusterOperator = opClusterTaskWorker else: # We're the master opClusterizeMaster = OperatorWrapper( OpClusterize, parent=finalOutputSlot.getRealOperator().parent) opClusterizeMaster.Input.connect(finalOutputSlot) opClusterizeMaster.ProjectFilePath.setValue(args.project) opClusterizeMaster.OutputDatasetDescription.setValue( args.output_description_file) # Configure optional slots first for efficiency (avoid multiple calls to setupOutputs) opClusterizeMaster.SecondaryInputs[0].resize( len(secondaryOutputSlots)) opClusterizeMaster.SecondaryOutputDescriptions[0].resize( len(secondaryOutputSlots)) for i in range(len(secondaryOutputSlots)): opClusterizeMaster.SecondaryInputs[0][i].connect( secondaryOutputSlots[i][0]) opClusterizeMaster.SecondaryOutputDescriptions[0][i].setValue( secondaryOutputDescriptions[i]) opClusterizeMaster.ConfigFilePath.setValue(args.option_config_file) resultSlot = opClusterizeMaster.ReturnCode clusterOperator = opClusterizeMaster # Get the result logger.info("Starting task") result = resultSlot[ 0].value # FIXME: The image index is hard-coded here. finally: logger.info("Cleaning up") global stop_background_tasks stop_background_tasks = True try: if clusterOperator is not None: clusterOperator.cleanUp() except: logger.error("Errors during cleanup.") try: logger.info("Closing project...") shell.closeCurrentProject() except: logger.error("Errors while closing project.") logger.info("FINISHED with result {}".format(result)) if not result: logger.error("FAILED TO COMPLETE!") if rootLogHandler is not None: rootLogHandler.close()