def runWorkflow(parsed_args):
    args = parsed_args

    # Read the config file
    configFilePath = args.option_config_file
    config = parseClusterConfigFile( configFilePath )

    # If we've got a process name, re-initialize the logger from scratch
    task_name = "node"
    if args.process_name is not None:
        task_name = args.process_name
        ilastik.ilastik_logging.default_config.init(args.process_name + ' ')

    rootLogHandler = None
    if args._node_work_ is None:
        # This is the master process.
        # Tee the log to a file for future reference.

        # Output log directory might be a relative path (relative to config file)
        absLogDir, relLogDir = getPathVariants(config.output_log_directory, os.path.split( configFilePath )[0] )
        if not os.path.exists(absLogDir):
            os.mkdir(absLogDir)

        # Copy the config we're using to the output directory
        shutil.copy(configFilePath, absLogDir)
        
        logFile = os.path.join( absLogDir, "MASTER.log" )
        logFileFormatter = logging.Formatter("%(levelname)s %(name)s: %(message)s")
        rootLogHandler = logging.FileHandler(logFile, 'a')
        rootLogHandler.setFormatter(logFileFormatter)
        rootLogger = logging.getLogger()
        rootLogger.addHandler( rootLogHandler )
        logger.info( "Launched with sys.argv: {}".format( sys.argv ) )

    # Update the monkey_patch settings
    ilastik.utility.monkey_patches.apply_setting_dict( config.__dict__ )

    # If we're running a node job, set the threadpool size if the user specified one.
    # Note that the main thread does not count toward the threadpool total.
    if args._node_work_ is not None and config.task_threadpool_size is not None:
        lazyflow.request.Request.reset_thread_pool( num_workers = config.task_threadpool_size )

    # Make sure project file exists.
    if not os.path.exists(args.project):
        raise RuntimeError("Project file '" + args.project + "' does not exist.")

    # Instantiate 'shell'
    shell = HeadlessShell( functools.partial(Workflow.getSubclass(config.workflow_type) ) )
    
    # Load project (auto-import it if necessary)
    logger.info("Opening project: '" + args.project + "'")
    shell.openProjectPath(args.project)

    workflow = shell.projectManager.workflow
            
    # Attach cluster operators
    resultSlot = None
    finalOutputSlot = workflow.getHeadlessOutputSlot( config.output_slot_id )
    assert finalOutputSlot is not None

    secondaryOutputSlots = workflow.getSecondaryHeadlessOutputSlots( config.output_slot_id )
    secondaryOutputDescriptions = args.secondary_output_description_file # This is a list (see 'action' above)
    if len(secondaryOutputDescriptions) != len(secondaryOutputSlots):
        raise RuntimeError( "This workflow produces exactly {} SECONDARY outputs.  You provided {}.".format( len(secondaryOutputSlots), len(secondaryOutputDescriptions) ) )
    
    clusterOperator = None
    try:
        if args._node_work_ is not None:
            # We're doing node work
            opClusterTaskWorker = OperatorWrapper( OpTaskWorker, parent=finalOutputSlot.getRealOperator().parent )

            # FIXME: Image index is hard-coded as 0.  We assume we are working with only one (big) dataset in cluster mode.            
            opClusterTaskWorker.Input.connect( finalOutputSlot )
            opClusterTaskWorker.RoiString[0].setValue( args._node_work_ )
            opClusterTaskWorker.TaskName.setValue( task_name )
            opClusterTaskWorker.ConfigFilePath.setValue( args.option_config_file )

            # Configure optional slots first for efficiency (avoid multiple calls to setupOutputs)
            opClusterTaskWorker.SecondaryInputs[0].resize( len( secondaryOutputSlots ) )
            opClusterTaskWorker.SecondaryOutputDescriptions[0].resize( len( secondaryOutputSlots ) )
            for i in range( len(secondaryOutputSlots) ):
                opClusterTaskWorker.SecondaryInputs[0][i].connect( secondaryOutputSlots[i][0] )
                opClusterTaskWorker.SecondaryOutputDescriptions[0][i].setValue( secondaryOutputDescriptions[i] )

            opClusterTaskWorker.OutputFilesetDescription.setValue( args.output_description_file )
    
            # If we have a way to report task progress (e.g. by updating the job name),
            #  then subscribe to progress signals
            if config.task_progress_update_command is not None:
                def report_progress( progress ):
                    cmd = config.task_progress_update_command.format( progress=int(progress) )
                    def shell_call(shell_cmd):
                        logger.debug( "Executing progress command: " + cmd )
                        subprocess.call( shell_cmd, shell=True )
                    background_tasks.put( functools.partial( shell_call, cmd ) )
                opClusterTaskWorker.innerOperators[0].progressSignal.subscribe( report_progress )
            
            resultSlot = opClusterTaskWorker.ReturnCode
            clusterOperator = opClusterTaskWorker
        else:
            # We're the master
            opClusterizeMaster = OperatorWrapper( OpClusterize, parent=finalOutputSlot.getRealOperator().parent )

            opClusterizeMaster.Input.connect( finalOutputSlot )
            opClusterizeMaster.ProjectFilePath.setValue( args.project )
            opClusterizeMaster.OutputDatasetDescription.setValue( args.output_description_file )

            # Configure optional slots first for efficiency (avoid multiple calls to setupOutputs)
            opClusterizeMaster.SecondaryInputs[0].resize( len( secondaryOutputSlots ) )
            opClusterizeMaster.SecondaryOutputDescriptions[0].resize( len( secondaryOutputSlots ) )
            for i in range( len(secondaryOutputSlots) ):
                opClusterizeMaster.SecondaryInputs[0][i].connect( secondaryOutputSlots[i][0] )
                opClusterizeMaster.SecondaryOutputDescriptions[0][i].setValue( secondaryOutputDescriptions[i] )    

            opClusterizeMaster.ConfigFilePath.setValue( args.option_config_file )

            resultSlot = opClusterizeMaster.ReturnCode
            clusterOperator = opClusterizeMaster
        
        # Get the result
        logger.info("Starting task")
        result = resultSlot[0].value # FIXME: The image index is hard-coded here.
    finally:
        logger.info("Cleaning up")
        global stop_background_tasks
        stop_background_tasks = True
        
        try:
            if clusterOperator is not None:
                clusterOperator.cleanUp()
        except:
            logger.error("Errors during cleanup.")

        try:
            logger.info("Closing project...")
            shell.closeCurrentProject()
        except:
            logger.error("Errors while closing project.")
    
    logger.info("FINISHED with result {}".format(result))
    if not result:
        logger.error( "FAILED TO COMPLETE!" )

    if rootLogHandler is not None:
        rootLogHandler.close()
Esempio n. 2
0
onfinish = None
if parsed_args.exit_on_success:
    onfinish = QApplication.quit

if parsed_args.playback_script is not None:
    from ilastik.utility.gui.eventRecorder import EventPlayer
    def play_recording(shell):
        player = EventPlayer(parsed_args.playback_speed)
        player.play_script(parsed_args.playback_script, onfinish)
    init_funcs.append( partial(play_recording) )

if parsed_args.exit_on_failure:
    old_excepthook = sys.excepthook
    def print_exc_and_exit(*args):
        old_excepthook(*args)
        sys.stderr.write("Exiting early due to an unhandled exception.  See error output above.\n")
        QApplication.exit(1)
    sys.excepthook = print_exc_and_exit
    install_thread_excepthook()

# Import all possible workflows so they are registered with the base class
import ilastik.workflows

# Ask the base class to give us the workflow type
from ilastik.workflow import Workflow
workflowClass = Workflow.getSubclass(parsed_args.workflow)

# Launch the GUI
from ilastik.shell.gui.startShellGui import startShellGui
sys.exit( startShellGui( workflowClass, *init_funcs ) )
Esempio n. 3
0
def runWorkflow(parsed_args):
    args = parsed_args

    # Read the config file
    configFilePath = args.option_config_file
    config = parseClusterConfigFile(configFilePath)

    # If we've got a process name, re-initialize the logger from scratch
    task_name = "node"
    if args.process_name is not None:
        task_name = args.process_name
        ilastik.ilastik_logging.default_config.init(args.process_name + ' ')

    rootLogHandler = None
    if args._node_work_ is None:
        # This is the master process.
        # Tee the log to a file for future reference.

        # Output log directory might be a relative path (relative to config file)
        absLogDir, _ = getPathVariants(config.output_log_directory,
                                       os.path.split(configFilePath)[0])
        if not os.path.exists(absLogDir):
            os.mkdir(absLogDir)

        # Copy the config we're using to the output directory
        shutil.copy(configFilePath, absLogDir)

        logFile = os.path.join(absLogDir, "MASTER.log")
        logFileFormatter = logging.Formatter(
            "%(levelname)s %(name)s: %(message)s")
        rootLogHandler = logging.FileHandler(logFile, 'a')
        rootLogHandler.setFormatter(logFileFormatter)
        rootLogger = logging.getLogger()
        rootLogger.addHandler(rootLogHandler)
        logger.info("Launched with sys.argv: {}".format(sys.argv))

    # Update the monkey_patch settings
    ilastik.monkey_patches.apply_setting_dict(config.__dict__)

    # If we're running a node job, set the threadpool size if the user specified one.
    # Note that the main thread does not count toward the threadpool total.
    if args._node_work_ is not None and config.task_threadpool_size is not None:
        lazyflow.request.Request.reset_thread_pool(
            num_workers=config.task_threadpool_size)

    # Make sure project file exists.
    if not os.path.exists(args.project):
        raise RuntimeError("Project file '" + args.project +
                           "' does not exist.")

    # Instantiate 'shell'
    shell = HeadlessShell(
        functools.partial(Workflow.getSubclass(config.workflow_type)))

    # Load project (auto-import it if necessary)
    logger.info("Opening project: '" + args.project + "'")
    shell.openProjectPath(args.project)

    workflow = shell.projectManager.workflow

    # Attach cluster operators
    resultSlot = None
    finalOutputSlot = workflow.getHeadlessOutputSlot(config.output_slot_id)
    assert finalOutputSlot is not None

    secondaryOutputSlots = workflow.getSecondaryHeadlessOutputSlots(
        config.output_slot_id)
    secondaryOutputDescriptions = args.secondary_output_description_file  # This is a list (see 'action' above)
    if len(secondaryOutputDescriptions) != len(secondaryOutputSlots):
        raise RuntimeError(
            "This workflow produces exactly {} SECONDARY outputs.  You provided {}."
            .format(len(secondaryOutputSlots),
                    len(secondaryOutputDescriptions)))

    clusterOperator = None
    try:
        if args._node_work_ is not None:
            # We're doing node work
            opClusterTaskWorker = OperatorWrapper(
                OpTaskWorker, parent=finalOutputSlot.getRealOperator().parent)

            # FIXME: Image index is hard-coded as 0.  We assume we are working with only one (big) dataset in cluster mode.
            opClusterTaskWorker.Input.connect(finalOutputSlot)
            opClusterTaskWorker.RoiString[0].setValue(args._node_work_)
            opClusterTaskWorker.TaskName.setValue(task_name)
            opClusterTaskWorker.ConfigFilePath.setValue(
                args.option_config_file)

            # Configure optional slots first for efficiency (avoid multiple calls to setupOutputs)
            opClusterTaskWorker.SecondaryInputs[0].resize(
                len(secondaryOutputSlots))
            opClusterTaskWorker.SecondaryOutputDescriptions[0].resize(
                len(secondaryOutputSlots))
            for i in range(len(secondaryOutputSlots)):
                opClusterTaskWorker.SecondaryInputs[0][i].connect(
                    secondaryOutputSlots[i][0])
                opClusterTaskWorker.SecondaryOutputDescriptions[0][i].setValue(
                    secondaryOutputDescriptions[i])

            opClusterTaskWorker.OutputFilesetDescription.setValue(
                args.output_description_file)

            # If we have a way to report task progress (e.g. by updating the job name),
            #  then subscribe to progress signals
            if config.task_progress_update_command is not None:

                def report_progress(progress):
                    cmd = config.task_progress_update_command.format(
                        progress=int(progress))

                    def shell_call(shell_cmd):
                        logger.debug("Executing progress command: " + cmd)
                        subprocess.call(shell_cmd, shell=True)

                    background_tasks.put(functools.partial(shell_call, cmd))

                opClusterTaskWorker.innerOperators[0].progressSignal.subscribe(
                    report_progress)

            resultSlot = opClusterTaskWorker.ReturnCode
            clusterOperator = opClusterTaskWorker
        else:
            # We're the master
            opClusterizeMaster = OperatorWrapper(
                OpClusterize, parent=finalOutputSlot.getRealOperator().parent)

            opClusterizeMaster.Input.connect(finalOutputSlot)
            opClusterizeMaster.ProjectFilePath.setValue(args.project)
            opClusterizeMaster.OutputDatasetDescription.setValue(
                args.output_description_file)

            # Configure optional slots first for efficiency (avoid multiple calls to setupOutputs)
            opClusterizeMaster.SecondaryInputs[0].resize(
                len(secondaryOutputSlots))
            opClusterizeMaster.SecondaryOutputDescriptions[0].resize(
                len(secondaryOutputSlots))
            for i in range(len(secondaryOutputSlots)):
                opClusterizeMaster.SecondaryInputs[0][i].connect(
                    secondaryOutputSlots[i][0])
                opClusterizeMaster.SecondaryOutputDescriptions[0][i].setValue(
                    secondaryOutputDescriptions[i])

            opClusterizeMaster.ConfigFilePath.setValue(args.option_config_file)

            resultSlot = opClusterizeMaster.ReturnCode
            clusterOperator = opClusterizeMaster

        # Get the result
        logger.info("Starting task")
        result = resultSlot[
            0].value  # FIXME: The image index is hard-coded here.
    finally:
        logger.info("Cleaning up")
        global stop_background_tasks
        stop_background_tasks = True

        try:
            if clusterOperator is not None:
                clusterOperator.cleanUp()
        except:
            logger.error("Errors during cleanup.")

        try:
            logger.info("Closing project...")
            shell.closeCurrentProject()
        except:
            logger.error("Errors while closing project.")

    logger.info("FINISHED with result {}".format(result))
    if not result:
        logger.error("FAILED TO COMPLETE!")

    if rootLogHandler is not None:
        rootLogHandler.close()