Ejemplo n.º 1
0
def runfile(script=None,
            raw_args='',
            wdir='.',
            code=None,
            kernel=None,
            **kwargs):
    # this has something to do with Prefix matching rule of parse_known_args
    #
    # That is to say
    #
    #   --rep 3
    #
    # would be parsed as
    #
    #   args.workflow=3, unknown --rep
    #
    # instead of
    #
    #   args.workflow=None, unknown --rep 3
    #
    # we then have to change the parse to disable args.workflow when
    # there is no workflow option.
    raw_args = shlex.split(raw_args) if isinstance(raw_args, str) else raw_args
    if (script is None and code is None) or '-h' in raw_args:
        parser = get_run_parser(interactive=True, with_workflow=True)
        parser.print_help()
        return
    if raw_args and raw_args[0].lstrip().startswith('-'):
        parser = get_run_parser(interactive=True, with_workflow=False)
        parser.error = _parse_error
        args, workflow_args = parser.parse_known_args(raw_args)
        args.workflow = None
    else:
        parser = get_run_parser(interactive=True, with_workflow=True)
        parser.error = _parse_error
        args, workflow_args = parser.parse_known_args(raw_args)

    # for reporting purpose
    sys.argv = ['%run'] + raw_args

    env.verbosity = args.verbosity
    if kernel and not isinstance(env.logger.handlers[0],
                                 NotebookLoggingHandler):
        env.logger.handlers = []
        levels = {
            0: logging.ERROR,
            1: logging.WARNING,
            2: logging.INFO,
            3: logging.DEBUG,
            4: logging.TRACE,
            None: logging.INFO
        }
        env.logger.addHandler(
            NotebookLoggingHandler(levels[env.verbosity],
                                   kernel,
                                   title=' '.join(sys.argv)))
    else:
        env.logger.handers[0].setTitle(' '.join(sys.argv))

    dt = datetime.datetime.now().strftime('%m%d%y_%H%M')
    if args.__dag__ is None:
        args.__dag__ = f'workflow_{dt}.dot'
    elif args.__dag__ == '':
        args.__dag__ = None

    if args.__report__ is None:
        args.__report__ = f'workflow_{dt}.html'
    elif args.__report__ == '':
        args.__report__ = None

    if args.__remote__:
        from sos.utils import load_config_files
        cfg = load_config_files(args.__config__)
        env.sos_dict.set('CONFIG', cfg)

        # if executing on a remote host...
        from sos.hosts import Host
        host = Host(args.__remote__)
        #
        if script is None:
            if not code.strip():
                return
            script = os.path.join('.sos', '__interactive__.sos')
            with open(script, 'w') as s:
                s.write(code)

        # copy script to remote host...
        host.send_to_host(script)
        from sos.utils import remove_arg
        argv = shlex.split(raw_args) if isinstance(raw_args, str) else raw_args
        argv = remove_arg(argv, '-r')
        argv = remove_arg(argv, '-c')
        # execute the command on remote host
        try:
            with kernel.redirect_sos_io():
                ret = host._host_agent.run_command(['sos', 'run', script] +
                                                   argv,
                                                   wait_for_task=True,
                                                   realtime=True)
            if ret:
                kernel.send_response(
                    kernel.iopub_socket, 'stream',
                    dict(name='stderr',
                         text=
                         f'remote execution of workflow exited with code {ret}'
                         ))
        except Exception as e:
            if kernel:
                kernel.send_response(kernel.iopub_socket, 'stream', {
                    'name': 'stdout',
                    'text': str(e)
                })
        return

    if args.__bin_dirs__:
        for d in args.__bin_dirs__:
            if d == '~/.sos/bin' and not os.path.isdir(os.path.expanduser(d)):
                os.makedirs(os.path.expanduser(d), exist_ok=True)
        os.environ['PATH'] = os.pathsep.join(
            [os.path.expanduser(x)
             for x in args.__bin_dirs__]) + os.pathsep + os.environ['PATH']

    # clear __step_input__, __step_output__ etc because there is
    # no concept of passing input/outputs across cells.
    env.sos_dict.set('__step_output__', sos_targets([]))
    for k in [
            '__step_input__', '__default_output__', 'step_input',
            'step_output', 'step_depends', '_input', '_output', '_depends'
    ]:
        env.sos_dict.pop(k, None)

    try:
        if script is None:
            if not code.strip():
                return
            if kernel is None:
                script = SoS_Script(content=code)
            else:
                if kernel._workflow_mode:
                    # in workflow mode, the content is sent by magics %run and %sosrun
                    script = SoS_Script(content=code)
                else:
                    # this is a scratch step...
                    # if there is no section header, add a header so that the block
                    # appears to be a SoS script with one section
                    if not any([
                            SOS_SECTION_HEADER.match(line)
                            or line.startswith('%from')
                            or line.startswith('%include')
                            for line in code.splitlines()
                    ]):
                        code = '[scratch_0]\n' + code
                        script = SoS_Script(content=code)
                    else:
                        #kernel.send_frontend_msg('stream',
                        #                         {'name': 'stdout', 'text': 'Workflow cell can only be executed with magic %run or %sosrun.'},
                        #                         title='# SoS warning')
                        return
        else:
            script = SoS_Script(filename=script)
        workflow = script.workflow(args.workflow,
                                   use_default=not args.__targets__)
        env.config: DefaultDict[str, Union[None, bool, str]] = defaultdict(str)
        executor = Interactive_Executor(
            workflow,
            args=workflow_args,
            config={
                'config_file':
                args.__config__,
                'output_dag':
                args.__dag__,
                'output_report':
                args.__report__,
                'sig_mode':
                'ignore' if args.dryrun else args.__sig_mode__,
                'default_queue':
                '' if args.__queue__ is None else args.__queue__,
                'wait_for_task':
                True if args.__wait__ is True or args.dryrun else
                (False if args.__no_wait__ else None),
                'resume_mode':
                kernel is not None and kernel._resume_execution,
                'run_mode':
                'dryrun' if args.dryrun else 'interactive',
                'verbosity':
                args.verbosity,

                # wait if -w or in dryrun mode, not wait if -W, otherwise use queue default
                'max_procs':
                args.__max_procs__,
                'max_running_jobs':
                args.__max_running_jobs__,
                # for infomration and resume only
                'workdir':
                os.getcwd(),
                'script':
                "interactive",
                'workflow':
                args.workflow,
                'targets':
                args.__targets__,
                'bin_dirs':
                args.__bin_dirs__,
                'workflow_args':
                workflow_args
            })
        return executor.run(args.__targets__)['__last_res__']
    except PendingTasks:
        raise
    except SystemExit:
        # this happens because the executor is in resume mode but nothing
        # needs to be resumed, we simply pass
        return
    except Exception:
        if args.verbosity and args.verbosity > 2:
            sys.stderr.write(get_traceback())
        raise
    finally:
        env.config['sig_mode'] = 'ignore'
        env.verbosity = 2
Ejemplo n.º 2
0
def main():
    from argparse import ArgumentParser, ArgumentDefaultsHelpFormatter, SUPPRESS

    class ArgumentParserError(Exception):
        pass

    class MyArgParser(ArgumentParser):
        def error(self, message):
            raise ArgumentParserError(message)

    #
    p = MyArgParser(description=__doc__,
                    formatter_class=ArgumentDefaultsHelpFormatter,
                    add_help=False)
    p.add_argument('dsc_file',
                   metavar="DSC script",
                   help='DSC script to execute.')
    ce = p.add_argument_group('Benchmark options')
    ce.add_argument('--target',
                    metavar="str",
                    nargs='+',
                    help='''This argument can be used in two contexts:
                   1) When used without "-d" it overrides "DSC::run" in DSC file.
                   Input should be quoted string(s) defining one or multiple valid DSC pipelines
                   (multiple pipelines should be separated by space).
                   2) When used along with "-d" it specifies one or more computational modules,
                   separated by space, whose output are to be removed or replaced by a (smaller) placeholder file.'''
                    )
    ce.add_argument(
        '--truncate',
        action='store_true',
        help='''When applied, DSC will only run one value per parameter.
                   For example with "--truncate", "n: 1,2,3,4,5" will be truncated to "n: 1".
                   This can be used in exploratory analysis and diagnostics, particularly when used in combination with "--target".'''
    )
    ce.add_argument(
        '--replicate',
        metavar='N',
        type=int,
        help=
        '''Overrides "DSC::replicate" to set number of replicates. Will be set to 1 when "--truncate" is in action.'''
    )
    ce.add_argument(
        '-o',
        metavar="str",
        dest='output',
        help=
        '''Benchmark output. It overrides "DSC::output" defined in DSC file.'''
    )
    mt = p.add_argument_group('Execution modes')
    mt.add_argument('-s',
                    metavar="option",
                    choices=["strict", "lenient", "existing", "all", "none"],
                    dest='__construct__',
                    default="strict",
                    help='''How DSC skips or overwrites existing results.
                   "strict": skips jobs whose input, output and code have not been changed since previous execution.
                   "lenient": skips jobs whose output timestamp are newer than their input.
                   It can be used to avoid re-run when nuisent changes are made to module scripts that should not impact results.
                   "existing": skips jobs whose output exists, and mark existing output as "up-to-date" for future re-runs. 
                   It can be used to avoid re-run completely even after file status cache have been deleted (as a result of "-d all" option).
                   "all": skips all modules and only build meta-database required to run `dsc-query` command.
                   It can be used for salvaging a partially completed benchmark making it possible to query from it.
                   "none": force executes DSC from scratch.''')
    mt.add_argument('--touch',
                    action='store_true',
                    dest='__recover__',
                    help=SUPPRESS)
    mt.add_argument('-e',
                    metavar='option',
                    choices=['stop', 'ignore', 'kill'],
                    dest='error_option',
                    help=SUPPRESS)
    mt.add_argument('-d',
                    metavar="option",
                    choices=["obsolete", "replace", "all"],
                    dest='to_remove',
                    help='''How DSC deletes benchmark files.
                   Use option "all" to remove all output from the current benchmark. 
                   "obsolete", when used without "--target", removes from output folder anything irrelevant 
                   to the most recent successful execution of the benchmark.
                   When used with "--target" it deletes specified files, or files from specified modules or module groups.
                   "replace", when used with "--target", deletes files as option "obsolete" does with "--target",
                   but additionally puts in placeholder files with "*.zapped" extension to prevent the module from being executed
                   until they are needed for re-running a downstream module.
                   It can be used to remove large yet unused intermediate module output without triggering re-runs when possible.'''
                    )
    ro = p.add_argument_group('Computing options')
    ro.add_argument(
        '-c',
        type=int,
        metavar='N',
        default=max(min(int(os.cpu_count() / 2), 8), 1),
        dest='__max_jobs__',
        help=
        '''Maximum number of CPU threads for local runs, or job managing sockets for remote execution.'''
    )
    ro.add_argument(
        '-v',
        '--verbosity',
        type=int,
        choices=list(range(5)),
        default=2,
        help='''Output error (0), warning (1), info (2), debug (3) and trace (4)
                   information.''')
    ro.add_argument(
        '-g',
        dest='__dag__',
        action='store_true',
        help='''Output benchmark execution graph animation in HTML format.''')
    rt = p.add_argument_group('HPC settings')
    rt.add_argument(
        '--host',
        metavar='file',
        help='''Configuration file for DSC computational environments.''')
    ot = p.add_argument_group('Other options')
    ot.add_argument('--version', action='version', version=__version__)
    ot.add_argument("-h",
                    "--help",
                    action="help",
                    help="show this help message and exit")
    ot.add_argument('--debug', action='store_true', help=SUPPRESS)
    p.set_defaults(func=execute)
    if len(sys.argv) > 2 and '-h' in sys.argv:
        try:
            from .dsc_parser import DSC_Script
            script = DSC_Script(sys.argv[1])
            script.print_help(print_version='-v' in sys.argv)
            sys.exit(0)
        except Exception as e:
            if '--debug' in sys.argv:
                raise
            else:
                env.logger.error(
                    f'No help information is available for script {sys.argv[1]}: ``{e}``'
                )
                sys.exit(1)
    try:
        args, unknown_args = p.parse_known_args()
    except Exception as e:
        env.logger.error(e)
        env.logger.info("Please type ``{} -h`` to view available options".\
                        format(os.path.basename(sys.argv[0])))
        sys.exit(1)
    #
    env.verbosity = args.verbosity
    # keep `args.__recover__` to maintain backwards compatibility for `--touch` option.
    if args.__recover__:
        env.logger.warning(
            f'Option ``--touch`` is deprecated. Please use ``-s existing`` next time.'
        )
        args.__construct__ = 'existing'
    with Timer(verbose=True if (args.verbosity > 0) else False) as t:
        try:
            args.func(args, unknown_args)
        except KeyboardInterrupt:
            t.disable()
            sys.exit('KeyboardInterrupt')
        except Exception as e:
            if args.debug:
                raise
            if args.verbosity > 2:
                sys.stderr.write(get_traceback())
            t.disable()
            env.logger.error(e)
            sys.exit(1)
Ejemplo n.º 3
0
def execute_scratch_cell(code, raw_args, kernel):
    # we then have to change the parse to disable args.workflow when
    # there is no workflow option.
    raw_args = shlex.split(raw_args) if isinstance(raw_args, str) else raw_args
    if code is None or '-h' in raw_args:
        parser = get_run_parser(interactive=True, with_workflow=True)
        parser.print_help()
        return
    if raw_args and raw_args[0].lstrip().startswith('-'):
        parser = get_run_parser(interactive=True, with_workflow=False)
        parser.error = _parse_error
        args, workflow_args = parser.parse_known_args(raw_args)
        args.workflow = None
    else:
        parser = get_run_parser(interactive=True, with_workflow=True)
        parser.error = _parse_error
        args, workflow_args = parser.parse_known_args(raw_args)

    if not code.strip():
        return

    # for reporting purpose
    sys.argv = ['%run'] + raw_args
    env.verbosity = args.verbosity

    if not any(
            isinstance(x, NotebookLoggingHandler)
            for x in env.logger.handlers):
        env.logger.handlers = [
            x for x in env.logger.handlers
            if type(x) is not logging.StreamHandler
        ]
        levels = {
            0: logging.ERROR,
            1: logging.WARNING,
            2: logging.INFO,
            3: logging.DEBUG,
            4: logging.DEBUG,
            None: logging.INFO
        }
        env.logger.addHandler(
            NotebookLoggingHandler(levels[env.verbosity],
                                   kernel,
                                   title=' '.join(sys.argv)))
    else:
        env.logger.handers[0].setTitle(' '.join(sys.argv))

    global last_cell_id
    # we retain step_input etc only when we step through a cell  #256
    if kernel and kernel.cell_id != last_cell_id:
        # clear __step_input__, __step_output__ etc because there is
        # no concept of passing input/outputs across cells.
        env.sos_dict.set('__step_output__', sos_targets([]))
        for k in [
                '__step_input__', '__default_output__', 'step_input',
                'step_output', 'step_depends', '_input', '_output', '_depends'
        ]:
            env.sos_dict.pop(k, None)

    last_cell_id = kernel.cell_id

    config = {
        'config_file':
        args.__config__,
        'default_queue':
        args.__queue__,
        'run_mode':
        'dryrun' if args.dryrun else 'interactive',
        # issue 230, ignore sig mode in interactive mode
        'sig_mode':
        'ignore',
        'verbosity':
        args.verbosity,
        # for backward compatibility, we try both args.__worker_procs__ and args.__max_procs__
        'worker_procs':
        args.__worker_procs__
        if hasattr(args, '__worker_procs__') else args.__max_procs__,
        'max_running_jobs':
        args.__max_running_jobs__,
        # for infomration and resume only
        'workdir':
        os.getcwd(),
        'workflow':
        args.workflow,
        'targets':
        args.__targets__,
        'workflow_args':
        workflow_args,
        'workflow_id':
        textMD5(code),

        # interactive work is also a slave of the controller
        'slave_id':
        kernel.cell_id,
    }

    env.sos_dict.set('workflow_id', config['workflow_id'])
    env.config.update(config)

    try:
        if not any([
                SOS_SECTION_HEADER.match(line) or line.startswith('%from')
                or line.startswith('%include') for line in code.splitlines()
        ]):
            code = f'[cell_{str(kernel.cell_id)[:8] if kernel and kernel.cell_id else "0"}]\n' + code
            script = SoS_Script(content=code)
        else:
            return
        workflow = script.workflow(args.workflow)
        section = workflow.sections[0]
        res = analyze_section(section)
        env.sos_dict.quick_update({
            '__signature_vars__': res['signature_vars'],
            '__environ_vars__': res['environ_vars'],
            '__changed_vars__': res['changed_vars']
        })
        executor = Interactive_Step_Executor(section, mode='interactive')
        ret = executor.run()
        try:
            return ret['__last_res__']
        except Exception as e:
            raise RuntimeError(
                f'Unknown result returned from executor {ret}: {e}')
    except (UnknownTarget, RemovedTarget) as e:
        raise RuntimeError(f'Unavailable target {e.target}')
    except TerminateExecution as e:
        return
    except SystemExit:
        # this happens because the executor is in resume mode but nothing
        # needs to be resumed, we simply pass
        return
    except Exception:
        env.log_to_file('PROCESS', get_traceback())
        raise
Ejemplo n.º 4
0
def runfile(script=None,
            raw_args='',
            wdir='.',
            code=None,
            kernel=None,
            **kwargs):
    # this has something to do with Prefix matching rule of parse_known_args
    #
    # That is to say
    #
    #   --rep 3
    #
    # would be parsed as
    #
    #   args.workflow=3, unknown --rep
    #
    # instead of
    #
    #   args.workflow=None, unknown --rep 3
    #
    # we then have to change the parse to disable args.workflow when
    # there is no workflow option.
    args = shlex.split(raw_args) if isinstance(raw_args, str) else raw_args
    if (script is None and code is None) or '-h' in args:
        parser = get_run_parser(interactive=True, with_workflow=True)
        parser.print_help()
        return
    if args and args[0].lstrip().startswith('-'):
        parser = get_run_parser(interactive=True, with_workflow=False)
        parser.error = _parse_error
        args, workflow_args = parser.parse_known_args(args)
        args.workflow = None
    else:
        parser = get_run_parser(interactive=True, with_workflow=True)
        parser.error = _parse_error
        args, workflow_args = parser.parse_known_args(args)

    # no multi-processing in interactive mode
    env.max_jobs = 1
    env.verbosity = args.verbosity

    if args.__queue__ == '':
        from sos.hosts import list_queues
        list_queues(args.__config__, args.verbosity)
        return

    if args.__remote__:
        from sos.utils import load_config_files
        cfg = load_config_files(args.__config__)
        env.sos_dict.set('CONFIG', cfg)
        if args.__remote__ == '':
            from .hosts import list_queues
            list_queues(cfg, args.verbosity)
            return

        # if executing on a remote host...
        from sos.hosts import Host
        host = Host(args.__remote__)
        #
        if script is None:
            if not code.strip():
                return
            script = os.path.join('.sos', '__interactive__.sos')
            with open(script, 'w') as s:
                s.write(code)

        # copy script to remote host...
        host.send_to_host(script)
        from sos.utils import remove_arg
        argv = shlex.split(raw_args) if isinstance(raw_args, str) else raw_args
        argv = remove_arg(argv, '-r')
        argv = remove_arg(argv, '-c')
        # execute the command on remote host
        try:
            with kernel.redirect_sos_io():
                ret = host._host_agent.run_command(['sos', 'run', script] +
                                                   argv,
                                                   wait_for_task=True,
                                                   realtime=True)
            if ret:
                kernel.send_response(
                    kernel.iopub_socket, 'stream',
                    dict(name='stderr',
                         text=
                         f'remote execution of workflow exited with code {ret}'
                         ))
        except Exception as e:
            if kernel:
                kernel.send_response(kernel.iopub_socket, 'stream', {
                    'name': 'stdout',
                    'text': str(e)
                })
        return

    if args.__bin_dirs__:
        import fasteners
        for d in args.__bin_dirs__:
            if d == '~/.sos/bin' and not os.path.isdir(os.path.expanduser(d)):
                with fasteners.InterProcessLock(
                        os.path.join(tempfile.gettempdir(), 'sos_lock_bin')):
                    os.makedirs(os.path.expanduser(d))
            elif not os.path.isdir(os.path.expanduser(d)):
                raise ValueError(f'directory does not exist: {d}')
        os.environ['PATH'] = os.pathsep.join(
            [os.path.expanduser(x)
             for x in args.__bin_dirs__]) + os.pathsep + os.environ['PATH']

    # clear __step_input__, __step_output__ etc because there is
    # no concept of passing input/outputs across cells.
    env.sos_dict.set('__step_output__', [])
    for k in ['__step_input__', '__default_output__', 'input', 'output', \
        'depends', '_input', '_output', '_depends']:
        env.sos_dict.pop(k, None)

    try:
        if script is None:
            if not code.strip():
                return
            if kernel is None:
                script = SoS_Script(content=code)
            else:
                if kernel._workflow_mode:
                    # in workflow mode, the content is sent by magics %run and %sosrun
                    script = SoS_Script(content=code)
                else:
                    # this is a scratch step...
                    # if there is no section header, add a header so that the block
                    # appears to be a SoS script with one section
                    if not any([
                            SOS_SECTION_HEADER.match(line)
                            or line.startswith('%from')
                            or line.startswith('%include')
                            for line in code.splitlines()
                    ]):
                        code = '[scratch_0]\n' + code
                        script = SoS_Script(content=code)
                    else:
                        if kernel.cell_idx == -1:
                            kernel.send_frontend_msg(
                                'stream', {
                                    'name':
                                    'stdout',
                                    'text':
                                    'Workflow can only be executed with magic %run or %sosrun.'
                                })
                        return
        else:
            script = SoS_Script(filename=script)
        workflow = script.workflow(args.workflow)
        executor = Interactive_Executor(
            workflow,
            args=workflow_args,
            config={
                'config_file':
                args.__config__,
                'output_dag':
                args.__dag__,
                'sig_mode':
                args.__sig_mode__,
                'default_queue':
                '' if args.__queue__ is None else args.__queue__,
                'wait_for_task':
                True if args.__wait__ is True or args.dryrun else
                (False if args.__no_wait__ else None),
                'resume_mode':
                kernel is not None and kernel._resume_execution,
                'run_mode':
                'dryrun' if args.dryrun else 'interactive',
                'verbosity':
                args.verbosity,

                # wait if -w or in dryrun mode, not wait if -W, otherwise use queue default
                'max_procs':
                1,
                'max_running_jobs':
                args.__max_running_jobs__,
                # for infomration and resume only
                'workdir':
                os.getcwd(),
                'script':
                "interactive",
                'workflow':
                args.workflow,
                'targets':
                args.__targets__,
                'bin_dirs':
                args.__bin_dirs__,
                'workflow_args':
                workflow_args
            })
        return executor.run(args.__targets__)
    except PendingTasks:
        raise
    except SystemExit:
        # this happens because the executor is in resume mode but nothing
        # needs to be resumed, we simply pass
        return
    except Exception:
        if args.verbosity and args.verbosity > 2:
            sys.stderr.write(get_traceback())
        raise
    finally:
        env.config['sig_mode'] = 'ignore'
        env.verbosity = 2
Ejemplo n.º 5
0
    def run(self,
            targets=None,
            parent_pipe=None,
            my_workflow_id=None,
            mode='run'):
        '''Execute a block of SoS script that is sent by iPython/Jupyer/Spyer
        The code can be simple SoS/Python statements, one SoS step, or more
        or more SoS workflows with multiple steps. This executor,
        1. adds a section header to the script if there is no section head
        2. execute the workflow in interactive mode, which is different from
           batch mode in a number of ways, which most notably without support
           for nested workflow.
        3. Optionally execute the workflow in preparation mode for debugging purposes.
        '''
        # if there is no valid code do nothing
        self.reset_dict()

        # this is the result returned by the workflow, if the
        # last stement is an expression.
        last_res = None

        # process step of the pipelinp
        if isinstance(targets, str):
            targets = [targets]
        dag = self.initialize_dag(targets=targets)
        #
        # if targets are specified and there are only signatures for them, we need
        # to remove the signature and really generate them
        if targets:
            for t in targets:
                if not file_target(t).target_exists('target') and file_target(
                        t).target_exists('signature'):
                    env.logger.debug(f'Re-generating {t}')
                    file_target(t).remove('signature')
                else:
                    env.logger.debug(f'Target {t} already exists')
        #
        while True:
            # find any step that can be executed and run it, and update the DAT
            # with status.
            runnable = dag.find_executable()
            if runnable is None:
                # no runnable
                #dag.show_nodes()
                break
            # find the section from runnable
            section = self.workflow.section_by_id(runnable._step_uuid)
            #
            # this is to keep compatibility of dag run with sequential run because
            # in sequential run, we evaluate global section of each step in
            # order to determine values of options such as skip.
            # The consequence is that global definitions are available in
            # SoS namespace.
            try:
                SoS_exec(section.global_def)
            except Exception as e:
                if env.verbosity > 2:
                    sys.stderr.write(get_traceback())
                raise RuntimeError(
                    f'Failed to execute statements\n"{section.global_def}"\n{e}'
                )

            # clear existing keys, otherwise the results from some random result
            # might mess with the execution of another step that does not define input
            for k in [
                    '__step_input__', '__default_output__', '__step_output__'
            ]:
                if k in env.sos_dict:
                    env.sos_dict.pop(k)
            # if the step has its own context
            env.sos_dict.quick_update(runnable._context)
            # execute section with specified input
            runnable._status = 'running'
            try:
                executor = Interactive_Step_Executor(section)
                res = executor.run()
                for k, v in res.items():
                    env.sos_dict.set(k, v)
                last_res = res['__last_res__']
                # set context to the next logic step.
                for edge in dag.out_edges(runnable):
                    node = edge[1]
                    # if node is the logical next step...
                    if node._node_index is not None and runnable._node_index is not None:
                        #and node._node_index == runnable._node_index + 1:
                        node._context.update(env.sos_dict.clone_selected_vars(
                            node._context['__signature_vars__'] | node._context['__environ_vars__'] \
                            | {'_input', '__step_output__', '__default_output__', '__args__'}))
                    node._context['__completed__'].append(res['__step_name__'])
                runnable._status = 'completed'
            except (UnknownTarget, RemovedTarget) as e:
                runnable._status = None
                target = e.target
                if dag.regenerate_target(target):
                    #runnable._depends_targets.append(target)
                    #dag._all_dependent_files[target].append(runnable)
                    dag.build(self.workflow.auxiliary_sections)
                    #
                    cycle = dag.circular_dependencies()
                    if cycle:
                        raise RuntimeError(
                            f'Circular dependency detected {cycle} after regeneration. It is likely a later step produces input of a previous step.'
                        )

                else:
                    if self.resolve_dangling_targets(dag, [target]) == 0:
                        raise RuntimeError(
                            f'Failed to regenerate or resolve {target}{dag.steps_depending_on(target, self.workflow)}.'
                        )
                    runnable._depends_targets.append(target)
                    dag._all_dependent_files[target].append(runnable)
                    dag.build(self.workflow.auxiliary_sections)
                    #
                    cycle = dag.circular_dependencies()
                    if cycle:
                        raise RuntimeError(
                            f'Circular dependency detected {cycle}. It is likely a later step produces input of a previous step.'
                        )
                self.save_dag(dag)
            except UnavailableLock as e:
                runnable._status = 'pending'
                runnable._signature = (e.output, e.sig_file)
                env.logger.debug(
                    f'Waiting on another process for step {section.step_name()}'
                )
            except PendingTasks as e:
                self.record_quit_status(e.tasks)
                raise
            # if the job is failed
            except Exception as e:
                runnable._status = 'failed'
                raise
        if self.md5:
            self.save_workflow_signature(dag)
            env.logger.debug(
                f'Workflow {self.workflow.name} (ID={self.md5}) is executed successfully.'
            )
        # remove task pending status if the workflow is completed normally
        try:
            wf_status = os.path.join(os.path.expanduser('~'), '.sos',
                                     self.md5 + '.status')
            if os.path.isfile(wf_status):
                os.remove(wf_status)
        except Exception as e:
            env.logger.warning(f'Failed to clear workflow status file: {e}')
        return last_res
Ejemplo n.º 6
0
def runfile(script=None, args='', wdir='.', code=None, **kwargs):
    # this has something to do with Prefix matching rule of parse_known_args
    #
    # That is to say
    #
    #   --rep 3
    #
    # would be parsed as
    #
    #   args.workflow=3, unknown --rep
    #
    # instead of
    #
    #   args.workflow=None, unknown --rep 3
    #
    # we then have to change the parse to disable args.workflow when
    # there is no workflow option.
    if isinstance(args, str):
        args = shlex.split(args)
    if (script is None and code is None) or '-h' in args:
        parser = get_run_parser(interactive=True, with_workflow=True)
        parser.print_help()
        return
    if args and args[0].lstrip().startswith('-'):
        parser = get_run_parser(interactive=True, with_workflow=False)
        parser.error = _parse_error
        args, workflow_args = parser.parse_known_args(args)
        args.workflow = None
    else:
        parser = get_run_parser(interactive=True, with_workflow=True)
        parser.error = _parse_error
        args, workflow_args = parser.parse_known_args(args)

    # no multi-processing in interactive mode
    env.max_jobs = 1
    env.verbosity = args.verbosity
    env.__task_engine__ = 'interactive'

    #
    env.sig_mode = args.__sigmode__

    if args.__bin_dirs__:
        import fasteners
        for d in args.__bin_dirs__:
            if d == '~/.sos/bin' and not os.path.isdir(os.path.expanduser(d)):
                with fasteners.InterProcessLock('/tmp/sos_lock_bin'):
                    os.makedirs(os.path.expanduser(d))
            elif not os.path.isdir(os.path.expanduser(d)):
                raise ValueError('directory does not exist: {}'.format(d))
        os.environ['PATH'] = os.pathsep.join([os.path.expanduser(x) for x in args.__bin_dirs__]) + os.pathsep + os.environ['PATH']

    # clear __step_input__, __step_output__ etc because there is
    # no concept of passing input/outputs across cells.
    env.sos_dict.set('__step_output__', [])
    for k in ['__step_input__', '__default_output__', 'input', 'output', \
        'depends', '_input', '_output', '_depends']:
        env.sos_dict.pop(k, None)

    try:
        if script is None:
            if not code.strip():
                return
            # if there is no section header, add a header so that the block
            # appears to be a SoS script with one section
            if not any([SOS_SECTION_HEADER.match(line) for line in code.splitlines()]):
                code = '[interactive_0]\n' + code
            script = SoS_Script(content=code, global_sigil=get_default_global_sigil())
        else:
            script = SoS_Script(filename=script, global_sigil=get_default_global_sigil())
        workflow = script.workflow(args.workflow)
        executor = Interactive_Executor(workflow, args=workflow_args, config={
            'config_file': args.__config__,
            'output_dag': args.__dag__,
            'report_output': args.__report__})

        if args.__dryrun__:
            return executor.dryrun(args.__targets__)
        else:
            return executor.run(args.__targets__)
    except Exception:
        if args.verbosity and args.verbosity > 2:
            sys.stderr.write(get_traceback())
        raise
    finally:
        env.sig_mode = 'default'
        env.verbosity = 1
Ejemplo n.º 7
0
    def run(self, targets=None, mode='interactive'):
        '''Execute a block of SoS script that is sent by iPython/Jupyer/Spyer
        The code can be simple SoS/Python statements, one SoS step, or more
        or more SoS workflows with multiple steps. This executor,
        1. adds a section header to the script if there is no section head
        2. execute the workflow in interactive mode, which is different from
           batch mode in a number of ways, which most notably without support
           for nested workflow.
        3. Optionally execute the workflow in preparation mode for debugging purposes.
        '''
        # if there is no valid code do nothing
        self.set_dict()

        # this is the result returned by the workflow, if the
        # last stement is an expression.
        last_res = None

        env.run_mode = mode

        # process step of the pipelinp
        if isinstance(targets, str):
            targets = [targets]
        dag = self.initialize_dag(targets=targets)
        #
        # if targets are specified and there are only signatures for them, we need
        # to remove the signature and really generate them
        if targets:
            for t in targets:
                if not FileTarget(t).exists('target'):
                    FileTarget(t).remove('signature')
        #
        self.set_dict()
        while True:
            # find any step that can be executed and run it, and update the DAT
            # with status.
            runnable = dag.find_executable()
            if runnable is None:
                # no runnable
                #dag.show_nodes()
                break
            # find the section from runnable
            section = self.workflow.section_by_id(runnable._step_uuid)
            #
            # this is to keep compatibility of dag run with sequential run because
            # in sequential run, we evaluate global section of each step in
            # order to determine values of options such as skip.
            # The consequence is that global definitions are available in
            # SoS namespace.
            try:
                SoS_exec(section.global_def, section.global_sigil)
            except Exception as e:
                if env.verbosity > 2:
                    sys.stderr.write(get_traceback())
                raise RuntimeError('Failed to execute statements\n"{}"\n{}'.format(
                    section.global_def, e))

            # clear existing keys, otherwise the results from some random result
            # might mess with the execution of another step that does not define input
            for k in ['__step_input__', '__default_output__', '__step_output__']:
                if k in env.sos_dict:
                    env.sos_dict.pop(k)
            # if the step has its own context
            env.sos_dict.quick_update(runnable._context)
            # execute section with specified input
            runnable._status = 'running'
            try:
                executor = Interactive_Step_Executor(section)
                res = executor.run()
                for k, v in res.items():
                    env.sos_dict.set(k, v)
                last_res = res['__last_res__']
                # set context to the next logic step.
                for edge in dag.out_edges(runnable):
                    node = edge[1]
                    # if node is the logical next step...
                    if node._node_index is not None and runnable._node_index is not None:
                        #and node._node_index == runnable._node_index + 1:
                        node._context.update(env.sos_dict.clone_selected_vars(
                            node._context['__signature_vars__'] | node._context['__environ_vars__'] \
                            | {'_input', '__step_output__', '__default_output__', '__args__'}))
                runnable._status = 'completed'
            except UnknownTarget as e:
                runnable._status = None
                target = e.target
                if self.resolve_dangling_targets(dag, [target]) == 0:
                    raise RuntimeError('Failed to resolve {}{}.'
                        .format(target, dag.steps_depending_on(target, self.workflow)))
                # now, there should be no dangling targets, let us connect nodes
                # this can be done more efficiently
                runnable._depends_targets.append(target)
                dag._all_dependent_files[target].append(runnable)
                #
                dag.build(self.workflow.auxiliary_sections)
                #dag.show_nodes()
                cycle = dag.circular_dependencies()
                if cycle:
                    raise RuntimeError('Circular dependency detected {}. It is likely a later step produces input of a previous step.'.format(cycle))
            except RemovedTarget as e:
                runnable._status = None
                target = e.target
                if not dag.regenerate_target(target):
                    if self.resolve_dangling_targets(dag, [target]) == 0:
                        raise RuntimeError('Failed to regenerate or resolve {}{}.'
                            .format(target, dag.steps_depending_on(target, self.workflow)))
                    runnable._depends_targets.append(target)
                    dag._all_dependent_files[target].append(runnable)
                    dag.build(self.workflow.auxiliary_sections)
                    #
                    cycle = dag.circular_dependencies()
                    if cycle:
                        raise RuntimeError('Circular dependency detected {}. It is likely a later step produces input of a previous step.'.format(cycle))
                self.save_dag(dag)
            except UnavailableLock as e:
                runnable._status = 'pending'
                runnable._signature = (e.output, e.sig_file)
                env.logger.info('Waiting on another process for step {}'.format(section.step_name()))
            # if the job is failed
            except Exception as e:
                runnable._status = 'failed'
                raise
        if self.md5:
            self.save_workflow_signature(dag)
            env.logger.info('Workflow {} (ID={}) is executed successfully.'.format(self.workflow.name, self.md5))
        return last_res