def runfile(script=None, raw_args='', wdir='.', code=None, kernel=None, **kwargs): # this has something to do with Prefix matching rule of parse_known_args # # That is to say # # --rep 3 # # would be parsed as # # args.workflow=3, unknown --rep # # instead of # # args.workflow=None, unknown --rep 3 # # we then have to change the parse to disable args.workflow when # there is no workflow option. raw_args = shlex.split(raw_args) if isinstance(raw_args, str) else raw_args if (script is None and code is None) or '-h' in raw_args: parser = get_run_parser(interactive=True, with_workflow=True) parser.print_help() return if raw_args and raw_args[0].lstrip().startswith('-'): parser = get_run_parser(interactive=True, with_workflow=False) parser.error = _parse_error args, workflow_args = parser.parse_known_args(raw_args) args.workflow = None else: parser = get_run_parser(interactive=True, with_workflow=True) parser.error = _parse_error args, workflow_args = parser.parse_known_args(raw_args) # for reporting purpose sys.argv = ['%run'] + raw_args env.verbosity = args.verbosity if kernel and not isinstance(env.logger.handlers[0], NotebookLoggingHandler): env.logger.handlers = [] levels = { 0: logging.ERROR, 1: logging.WARNING, 2: logging.INFO, 3: logging.DEBUG, 4: logging.TRACE, None: logging.INFO } env.logger.addHandler( NotebookLoggingHandler(levels[env.verbosity], kernel, title=' '.join(sys.argv))) else: env.logger.handers[0].setTitle(' '.join(sys.argv)) dt = datetime.datetime.now().strftime('%m%d%y_%H%M') if args.__dag__ is None: args.__dag__ = f'workflow_{dt}.dot' elif args.__dag__ == '': args.__dag__ = None if args.__report__ is None: args.__report__ = f'workflow_{dt}.html' elif args.__report__ == '': args.__report__ = None if args.__remote__: from sos.utils import load_config_files cfg = load_config_files(args.__config__) env.sos_dict.set('CONFIG', cfg) # if executing on a remote host... from sos.hosts import Host host = Host(args.__remote__) # if script is None: if not code.strip(): return script = os.path.join('.sos', '__interactive__.sos') with open(script, 'w') as s: s.write(code) # copy script to remote host... host.send_to_host(script) from sos.utils import remove_arg argv = shlex.split(raw_args) if isinstance(raw_args, str) else raw_args argv = remove_arg(argv, '-r') argv = remove_arg(argv, '-c') # execute the command on remote host try: with kernel.redirect_sos_io(): ret = host._host_agent.run_command(['sos', 'run', script] + argv, wait_for_task=True, realtime=True) if ret: kernel.send_response( kernel.iopub_socket, 'stream', dict(name='stderr', text= f'remote execution of workflow exited with code {ret}' )) except Exception as e: if kernel: kernel.send_response(kernel.iopub_socket, 'stream', { 'name': 'stdout', 'text': str(e) }) return if args.__bin_dirs__: for d in args.__bin_dirs__: if d == '~/.sos/bin' and not os.path.isdir(os.path.expanduser(d)): os.makedirs(os.path.expanduser(d), exist_ok=True) os.environ['PATH'] = os.pathsep.join( [os.path.expanduser(x) for x in args.__bin_dirs__]) + os.pathsep + os.environ['PATH'] # clear __step_input__, __step_output__ etc because there is # no concept of passing input/outputs across cells. env.sos_dict.set('__step_output__', sos_targets([])) for k in [ '__step_input__', '__default_output__', 'step_input', 'step_output', 'step_depends', '_input', '_output', '_depends' ]: env.sos_dict.pop(k, None) try: if script is None: if not code.strip(): return if kernel is None: script = SoS_Script(content=code) else: if kernel._workflow_mode: # in workflow mode, the content is sent by magics %run and %sosrun script = SoS_Script(content=code) else: # this is a scratch step... # if there is no section header, add a header so that the block # appears to be a SoS script with one section if not any([ SOS_SECTION_HEADER.match(line) or line.startswith('%from') or line.startswith('%include') for line in code.splitlines() ]): code = '[scratch_0]\n' + code script = SoS_Script(content=code) else: #kernel.send_frontend_msg('stream', # {'name': 'stdout', 'text': 'Workflow cell can only be executed with magic %run or %sosrun.'}, # title='# SoS warning') return else: script = SoS_Script(filename=script) workflow = script.workflow(args.workflow, use_default=not args.__targets__) env.config: DefaultDict[str, Union[None, bool, str]] = defaultdict(str) executor = Interactive_Executor( workflow, args=workflow_args, config={ 'config_file': args.__config__, 'output_dag': args.__dag__, 'output_report': args.__report__, 'sig_mode': 'ignore' if args.dryrun else args.__sig_mode__, 'default_queue': '' if args.__queue__ is None else args.__queue__, 'wait_for_task': True if args.__wait__ is True or args.dryrun else (False if args.__no_wait__ else None), 'resume_mode': kernel is not None and kernel._resume_execution, 'run_mode': 'dryrun' if args.dryrun else 'interactive', 'verbosity': args.verbosity, # wait if -w or in dryrun mode, not wait if -W, otherwise use queue default 'max_procs': args.__max_procs__, 'max_running_jobs': args.__max_running_jobs__, # for infomration and resume only 'workdir': os.getcwd(), 'script': "interactive", 'workflow': args.workflow, 'targets': args.__targets__, 'bin_dirs': args.__bin_dirs__, 'workflow_args': workflow_args }) return executor.run(args.__targets__)['__last_res__'] except PendingTasks: raise except SystemExit: # this happens because the executor is in resume mode but nothing # needs to be resumed, we simply pass return except Exception: if args.verbosity and args.verbosity > 2: sys.stderr.write(get_traceback()) raise finally: env.config['sig_mode'] = 'ignore' env.verbosity = 2
def main(): from argparse import ArgumentParser, ArgumentDefaultsHelpFormatter, SUPPRESS class ArgumentParserError(Exception): pass class MyArgParser(ArgumentParser): def error(self, message): raise ArgumentParserError(message) # p = MyArgParser(description=__doc__, formatter_class=ArgumentDefaultsHelpFormatter, add_help=False) p.add_argument('dsc_file', metavar="DSC script", help='DSC script to execute.') ce = p.add_argument_group('Benchmark options') ce.add_argument('--target', metavar="str", nargs='+', help='''This argument can be used in two contexts: 1) When used without "-d" it overrides "DSC::run" in DSC file. Input should be quoted string(s) defining one or multiple valid DSC pipelines (multiple pipelines should be separated by space). 2) When used along with "-d" it specifies one or more computational modules, separated by space, whose output are to be removed or replaced by a (smaller) placeholder file.''' ) ce.add_argument( '--truncate', action='store_true', help='''When applied, DSC will only run one value per parameter. For example with "--truncate", "n: 1,2,3,4,5" will be truncated to "n: 1". This can be used in exploratory analysis and diagnostics, particularly when used in combination with "--target".''' ) ce.add_argument( '--replicate', metavar='N', type=int, help= '''Overrides "DSC::replicate" to set number of replicates. Will be set to 1 when "--truncate" is in action.''' ) ce.add_argument( '-o', metavar="str", dest='output', help= '''Benchmark output. It overrides "DSC::output" defined in DSC file.''' ) mt = p.add_argument_group('Execution modes') mt.add_argument('-s', metavar="option", choices=["strict", "lenient", "existing", "all", "none"], dest='__construct__', default="strict", help='''How DSC skips or overwrites existing results. "strict": skips jobs whose input, output and code have not been changed since previous execution. "lenient": skips jobs whose output timestamp are newer than their input. It can be used to avoid re-run when nuisent changes are made to module scripts that should not impact results. "existing": skips jobs whose output exists, and mark existing output as "up-to-date" for future re-runs. It can be used to avoid re-run completely even after file status cache have been deleted (as a result of "-d all" option). "all": skips all modules and only build meta-database required to run `dsc-query` command. It can be used for salvaging a partially completed benchmark making it possible to query from it. "none": force executes DSC from scratch.''') mt.add_argument('--touch', action='store_true', dest='__recover__', help=SUPPRESS) mt.add_argument('-e', metavar='option', choices=['stop', 'ignore', 'kill'], dest='error_option', help=SUPPRESS) mt.add_argument('-d', metavar="option", choices=["obsolete", "replace", "all"], dest='to_remove', help='''How DSC deletes benchmark files. Use option "all" to remove all output from the current benchmark. "obsolete", when used without "--target", removes from output folder anything irrelevant to the most recent successful execution of the benchmark. When used with "--target" it deletes specified files, or files from specified modules or module groups. "replace", when used with "--target", deletes files as option "obsolete" does with "--target", but additionally puts in placeholder files with "*.zapped" extension to prevent the module from being executed until they are needed for re-running a downstream module. It can be used to remove large yet unused intermediate module output without triggering re-runs when possible.''' ) ro = p.add_argument_group('Computing options') ro.add_argument( '-c', type=int, metavar='N', default=max(min(int(os.cpu_count() / 2), 8), 1), dest='__max_jobs__', help= '''Maximum number of CPU threads for local runs, or job managing sockets for remote execution.''' ) ro.add_argument( '-v', '--verbosity', type=int, choices=list(range(5)), default=2, help='''Output error (0), warning (1), info (2), debug (3) and trace (4) information.''') ro.add_argument( '-g', dest='__dag__', action='store_true', help='''Output benchmark execution graph animation in HTML format.''') rt = p.add_argument_group('HPC settings') rt.add_argument( '--host', metavar='file', help='''Configuration file for DSC computational environments.''') ot = p.add_argument_group('Other options') ot.add_argument('--version', action='version', version=__version__) ot.add_argument("-h", "--help", action="help", help="show this help message and exit") ot.add_argument('--debug', action='store_true', help=SUPPRESS) p.set_defaults(func=execute) if len(sys.argv) > 2 and '-h' in sys.argv: try: from .dsc_parser import DSC_Script script = DSC_Script(sys.argv[1]) script.print_help(print_version='-v' in sys.argv) sys.exit(0) except Exception as e: if '--debug' in sys.argv: raise else: env.logger.error( f'No help information is available for script {sys.argv[1]}: ``{e}``' ) sys.exit(1) try: args, unknown_args = p.parse_known_args() except Exception as e: env.logger.error(e) env.logger.info("Please type ``{} -h`` to view available options".\ format(os.path.basename(sys.argv[0]))) sys.exit(1) # env.verbosity = args.verbosity # keep `args.__recover__` to maintain backwards compatibility for `--touch` option. if args.__recover__: env.logger.warning( f'Option ``--touch`` is deprecated. Please use ``-s existing`` next time.' ) args.__construct__ = 'existing' with Timer(verbose=True if (args.verbosity > 0) else False) as t: try: args.func(args, unknown_args) except KeyboardInterrupt: t.disable() sys.exit('KeyboardInterrupt') except Exception as e: if args.debug: raise if args.verbosity > 2: sys.stderr.write(get_traceback()) t.disable() env.logger.error(e) sys.exit(1)
def execute_scratch_cell(code, raw_args, kernel): # we then have to change the parse to disable args.workflow when # there is no workflow option. raw_args = shlex.split(raw_args) if isinstance(raw_args, str) else raw_args if code is None or '-h' in raw_args: parser = get_run_parser(interactive=True, with_workflow=True) parser.print_help() return if raw_args and raw_args[0].lstrip().startswith('-'): parser = get_run_parser(interactive=True, with_workflow=False) parser.error = _parse_error args, workflow_args = parser.parse_known_args(raw_args) args.workflow = None else: parser = get_run_parser(interactive=True, with_workflow=True) parser.error = _parse_error args, workflow_args = parser.parse_known_args(raw_args) if not code.strip(): return # for reporting purpose sys.argv = ['%run'] + raw_args env.verbosity = args.verbosity if not any( isinstance(x, NotebookLoggingHandler) for x in env.logger.handlers): env.logger.handlers = [ x for x in env.logger.handlers if type(x) is not logging.StreamHandler ] levels = { 0: logging.ERROR, 1: logging.WARNING, 2: logging.INFO, 3: logging.DEBUG, 4: logging.DEBUG, None: logging.INFO } env.logger.addHandler( NotebookLoggingHandler(levels[env.verbosity], kernel, title=' '.join(sys.argv))) else: env.logger.handers[0].setTitle(' '.join(sys.argv)) global last_cell_id # we retain step_input etc only when we step through a cell #256 if kernel and kernel.cell_id != last_cell_id: # clear __step_input__, __step_output__ etc because there is # no concept of passing input/outputs across cells. env.sos_dict.set('__step_output__', sos_targets([])) for k in [ '__step_input__', '__default_output__', 'step_input', 'step_output', 'step_depends', '_input', '_output', '_depends' ]: env.sos_dict.pop(k, None) last_cell_id = kernel.cell_id config = { 'config_file': args.__config__, 'default_queue': args.__queue__, 'run_mode': 'dryrun' if args.dryrun else 'interactive', # issue 230, ignore sig mode in interactive mode 'sig_mode': 'ignore', 'verbosity': args.verbosity, # for backward compatibility, we try both args.__worker_procs__ and args.__max_procs__ 'worker_procs': args.__worker_procs__ if hasattr(args, '__worker_procs__') else args.__max_procs__, 'max_running_jobs': args.__max_running_jobs__, # for infomration and resume only 'workdir': os.getcwd(), 'workflow': args.workflow, 'targets': args.__targets__, 'workflow_args': workflow_args, 'workflow_id': textMD5(code), # interactive work is also a slave of the controller 'slave_id': kernel.cell_id, } env.sos_dict.set('workflow_id', config['workflow_id']) env.config.update(config) try: if not any([ SOS_SECTION_HEADER.match(line) or line.startswith('%from') or line.startswith('%include') for line in code.splitlines() ]): code = f'[cell_{str(kernel.cell_id)[:8] if kernel and kernel.cell_id else "0"}]\n' + code script = SoS_Script(content=code) else: return workflow = script.workflow(args.workflow) section = workflow.sections[0] res = analyze_section(section) env.sos_dict.quick_update({ '__signature_vars__': res['signature_vars'], '__environ_vars__': res['environ_vars'], '__changed_vars__': res['changed_vars'] }) executor = Interactive_Step_Executor(section, mode='interactive') ret = executor.run() try: return ret['__last_res__'] except Exception as e: raise RuntimeError( f'Unknown result returned from executor {ret}: {e}') except (UnknownTarget, RemovedTarget) as e: raise RuntimeError(f'Unavailable target {e.target}') except TerminateExecution as e: return except SystemExit: # this happens because the executor is in resume mode but nothing # needs to be resumed, we simply pass return except Exception: env.log_to_file('PROCESS', get_traceback()) raise
def runfile(script=None, raw_args='', wdir='.', code=None, kernel=None, **kwargs): # this has something to do with Prefix matching rule of parse_known_args # # That is to say # # --rep 3 # # would be parsed as # # args.workflow=3, unknown --rep # # instead of # # args.workflow=None, unknown --rep 3 # # we then have to change the parse to disable args.workflow when # there is no workflow option. args = shlex.split(raw_args) if isinstance(raw_args, str) else raw_args if (script is None and code is None) or '-h' in args: parser = get_run_parser(interactive=True, with_workflow=True) parser.print_help() return if args and args[0].lstrip().startswith('-'): parser = get_run_parser(interactive=True, with_workflow=False) parser.error = _parse_error args, workflow_args = parser.parse_known_args(args) args.workflow = None else: parser = get_run_parser(interactive=True, with_workflow=True) parser.error = _parse_error args, workflow_args = parser.parse_known_args(args) # no multi-processing in interactive mode env.max_jobs = 1 env.verbosity = args.verbosity if args.__queue__ == '': from sos.hosts import list_queues list_queues(args.__config__, args.verbosity) return if args.__remote__: from sos.utils import load_config_files cfg = load_config_files(args.__config__) env.sos_dict.set('CONFIG', cfg) if args.__remote__ == '': from .hosts import list_queues list_queues(cfg, args.verbosity) return # if executing on a remote host... from sos.hosts import Host host = Host(args.__remote__) # if script is None: if not code.strip(): return script = os.path.join('.sos', '__interactive__.sos') with open(script, 'w') as s: s.write(code) # copy script to remote host... host.send_to_host(script) from sos.utils import remove_arg argv = shlex.split(raw_args) if isinstance(raw_args, str) else raw_args argv = remove_arg(argv, '-r') argv = remove_arg(argv, '-c') # execute the command on remote host try: with kernel.redirect_sos_io(): ret = host._host_agent.run_command(['sos', 'run', script] + argv, wait_for_task=True, realtime=True) if ret: kernel.send_response( kernel.iopub_socket, 'stream', dict(name='stderr', text= f'remote execution of workflow exited with code {ret}' )) except Exception as e: if kernel: kernel.send_response(kernel.iopub_socket, 'stream', { 'name': 'stdout', 'text': str(e) }) return if args.__bin_dirs__: import fasteners for d in args.__bin_dirs__: if d == '~/.sos/bin' and not os.path.isdir(os.path.expanduser(d)): with fasteners.InterProcessLock( os.path.join(tempfile.gettempdir(), 'sos_lock_bin')): os.makedirs(os.path.expanduser(d)) elif not os.path.isdir(os.path.expanduser(d)): raise ValueError(f'directory does not exist: {d}') os.environ['PATH'] = os.pathsep.join( [os.path.expanduser(x) for x in args.__bin_dirs__]) + os.pathsep + os.environ['PATH'] # clear __step_input__, __step_output__ etc because there is # no concept of passing input/outputs across cells. env.sos_dict.set('__step_output__', []) for k in ['__step_input__', '__default_output__', 'input', 'output', \ 'depends', '_input', '_output', '_depends']: env.sos_dict.pop(k, None) try: if script is None: if not code.strip(): return if kernel is None: script = SoS_Script(content=code) else: if kernel._workflow_mode: # in workflow mode, the content is sent by magics %run and %sosrun script = SoS_Script(content=code) else: # this is a scratch step... # if there is no section header, add a header so that the block # appears to be a SoS script with one section if not any([ SOS_SECTION_HEADER.match(line) or line.startswith('%from') or line.startswith('%include') for line in code.splitlines() ]): code = '[scratch_0]\n' + code script = SoS_Script(content=code) else: if kernel.cell_idx == -1: kernel.send_frontend_msg( 'stream', { 'name': 'stdout', 'text': 'Workflow can only be executed with magic %run or %sosrun.' }) return else: script = SoS_Script(filename=script) workflow = script.workflow(args.workflow) executor = Interactive_Executor( workflow, args=workflow_args, config={ 'config_file': args.__config__, 'output_dag': args.__dag__, 'sig_mode': args.__sig_mode__, 'default_queue': '' if args.__queue__ is None else args.__queue__, 'wait_for_task': True if args.__wait__ is True or args.dryrun else (False if args.__no_wait__ else None), 'resume_mode': kernel is not None and kernel._resume_execution, 'run_mode': 'dryrun' if args.dryrun else 'interactive', 'verbosity': args.verbosity, # wait if -w or in dryrun mode, not wait if -W, otherwise use queue default 'max_procs': 1, 'max_running_jobs': args.__max_running_jobs__, # for infomration and resume only 'workdir': os.getcwd(), 'script': "interactive", 'workflow': args.workflow, 'targets': args.__targets__, 'bin_dirs': args.__bin_dirs__, 'workflow_args': workflow_args }) return executor.run(args.__targets__) except PendingTasks: raise except SystemExit: # this happens because the executor is in resume mode but nothing # needs to be resumed, we simply pass return except Exception: if args.verbosity and args.verbosity > 2: sys.stderr.write(get_traceback()) raise finally: env.config['sig_mode'] = 'ignore' env.verbosity = 2
def run(self, targets=None, parent_pipe=None, my_workflow_id=None, mode='run'): '''Execute a block of SoS script that is sent by iPython/Jupyer/Spyer The code can be simple SoS/Python statements, one SoS step, or more or more SoS workflows with multiple steps. This executor, 1. adds a section header to the script if there is no section head 2. execute the workflow in interactive mode, which is different from batch mode in a number of ways, which most notably without support for nested workflow. 3. Optionally execute the workflow in preparation mode for debugging purposes. ''' # if there is no valid code do nothing self.reset_dict() # this is the result returned by the workflow, if the # last stement is an expression. last_res = None # process step of the pipelinp if isinstance(targets, str): targets = [targets] dag = self.initialize_dag(targets=targets) # # if targets are specified and there are only signatures for them, we need # to remove the signature and really generate them if targets: for t in targets: if not file_target(t).target_exists('target') and file_target( t).target_exists('signature'): env.logger.debug(f'Re-generating {t}') file_target(t).remove('signature') else: env.logger.debug(f'Target {t} already exists') # while True: # find any step that can be executed and run it, and update the DAT # with status. runnable = dag.find_executable() if runnable is None: # no runnable #dag.show_nodes() break # find the section from runnable section = self.workflow.section_by_id(runnable._step_uuid) # # this is to keep compatibility of dag run with sequential run because # in sequential run, we evaluate global section of each step in # order to determine values of options such as skip. # The consequence is that global definitions are available in # SoS namespace. try: SoS_exec(section.global_def) except Exception as e: if env.verbosity > 2: sys.stderr.write(get_traceback()) raise RuntimeError( f'Failed to execute statements\n"{section.global_def}"\n{e}' ) # clear existing keys, otherwise the results from some random result # might mess with the execution of another step that does not define input for k in [ '__step_input__', '__default_output__', '__step_output__' ]: if k in env.sos_dict: env.sos_dict.pop(k) # if the step has its own context env.sos_dict.quick_update(runnable._context) # execute section with specified input runnable._status = 'running' try: executor = Interactive_Step_Executor(section) res = executor.run() for k, v in res.items(): env.sos_dict.set(k, v) last_res = res['__last_res__'] # set context to the next logic step. for edge in dag.out_edges(runnable): node = edge[1] # if node is the logical next step... if node._node_index is not None and runnable._node_index is not None: #and node._node_index == runnable._node_index + 1: node._context.update(env.sos_dict.clone_selected_vars( node._context['__signature_vars__'] | node._context['__environ_vars__'] \ | {'_input', '__step_output__', '__default_output__', '__args__'})) node._context['__completed__'].append(res['__step_name__']) runnable._status = 'completed' except (UnknownTarget, RemovedTarget) as e: runnable._status = None target = e.target if dag.regenerate_target(target): #runnable._depends_targets.append(target) #dag._all_dependent_files[target].append(runnable) dag.build(self.workflow.auxiliary_sections) # cycle = dag.circular_dependencies() if cycle: raise RuntimeError( f'Circular dependency detected {cycle} after regeneration. It is likely a later step produces input of a previous step.' ) else: if self.resolve_dangling_targets(dag, [target]) == 0: raise RuntimeError( f'Failed to regenerate or resolve {target}{dag.steps_depending_on(target, self.workflow)}.' ) runnable._depends_targets.append(target) dag._all_dependent_files[target].append(runnable) dag.build(self.workflow.auxiliary_sections) # cycle = dag.circular_dependencies() if cycle: raise RuntimeError( f'Circular dependency detected {cycle}. It is likely a later step produces input of a previous step.' ) self.save_dag(dag) except UnavailableLock as e: runnable._status = 'pending' runnable._signature = (e.output, e.sig_file) env.logger.debug( f'Waiting on another process for step {section.step_name()}' ) except PendingTasks as e: self.record_quit_status(e.tasks) raise # if the job is failed except Exception as e: runnable._status = 'failed' raise if self.md5: self.save_workflow_signature(dag) env.logger.debug( f'Workflow {self.workflow.name} (ID={self.md5}) is executed successfully.' ) # remove task pending status if the workflow is completed normally try: wf_status = os.path.join(os.path.expanduser('~'), '.sos', self.md5 + '.status') if os.path.isfile(wf_status): os.remove(wf_status) except Exception as e: env.logger.warning(f'Failed to clear workflow status file: {e}') return last_res
def runfile(script=None, args='', wdir='.', code=None, **kwargs): # this has something to do with Prefix matching rule of parse_known_args # # That is to say # # --rep 3 # # would be parsed as # # args.workflow=3, unknown --rep # # instead of # # args.workflow=None, unknown --rep 3 # # we then have to change the parse to disable args.workflow when # there is no workflow option. if isinstance(args, str): args = shlex.split(args) if (script is None and code is None) or '-h' in args: parser = get_run_parser(interactive=True, with_workflow=True) parser.print_help() return if args and args[0].lstrip().startswith('-'): parser = get_run_parser(interactive=True, with_workflow=False) parser.error = _parse_error args, workflow_args = parser.parse_known_args(args) args.workflow = None else: parser = get_run_parser(interactive=True, with_workflow=True) parser.error = _parse_error args, workflow_args = parser.parse_known_args(args) # no multi-processing in interactive mode env.max_jobs = 1 env.verbosity = args.verbosity env.__task_engine__ = 'interactive' # env.sig_mode = args.__sigmode__ if args.__bin_dirs__: import fasteners for d in args.__bin_dirs__: if d == '~/.sos/bin' and not os.path.isdir(os.path.expanduser(d)): with fasteners.InterProcessLock('/tmp/sos_lock_bin'): os.makedirs(os.path.expanduser(d)) elif not os.path.isdir(os.path.expanduser(d)): raise ValueError('directory does not exist: {}'.format(d)) os.environ['PATH'] = os.pathsep.join([os.path.expanduser(x) for x in args.__bin_dirs__]) + os.pathsep + os.environ['PATH'] # clear __step_input__, __step_output__ etc because there is # no concept of passing input/outputs across cells. env.sos_dict.set('__step_output__', []) for k in ['__step_input__', '__default_output__', 'input', 'output', \ 'depends', '_input', '_output', '_depends']: env.sos_dict.pop(k, None) try: if script is None: if not code.strip(): return # if there is no section header, add a header so that the block # appears to be a SoS script with one section if not any([SOS_SECTION_HEADER.match(line) for line in code.splitlines()]): code = '[interactive_0]\n' + code script = SoS_Script(content=code, global_sigil=get_default_global_sigil()) else: script = SoS_Script(filename=script, global_sigil=get_default_global_sigil()) workflow = script.workflow(args.workflow) executor = Interactive_Executor(workflow, args=workflow_args, config={ 'config_file': args.__config__, 'output_dag': args.__dag__, 'report_output': args.__report__}) if args.__dryrun__: return executor.dryrun(args.__targets__) else: return executor.run(args.__targets__) except Exception: if args.verbosity and args.verbosity > 2: sys.stderr.write(get_traceback()) raise finally: env.sig_mode = 'default' env.verbosity = 1
def run(self, targets=None, mode='interactive'): '''Execute a block of SoS script that is sent by iPython/Jupyer/Spyer The code can be simple SoS/Python statements, one SoS step, or more or more SoS workflows with multiple steps. This executor, 1. adds a section header to the script if there is no section head 2. execute the workflow in interactive mode, which is different from batch mode in a number of ways, which most notably without support for nested workflow. 3. Optionally execute the workflow in preparation mode for debugging purposes. ''' # if there is no valid code do nothing self.set_dict() # this is the result returned by the workflow, if the # last stement is an expression. last_res = None env.run_mode = mode # process step of the pipelinp if isinstance(targets, str): targets = [targets] dag = self.initialize_dag(targets=targets) # # if targets are specified and there are only signatures for them, we need # to remove the signature and really generate them if targets: for t in targets: if not FileTarget(t).exists('target'): FileTarget(t).remove('signature') # self.set_dict() while True: # find any step that can be executed and run it, and update the DAT # with status. runnable = dag.find_executable() if runnable is None: # no runnable #dag.show_nodes() break # find the section from runnable section = self.workflow.section_by_id(runnable._step_uuid) # # this is to keep compatibility of dag run with sequential run because # in sequential run, we evaluate global section of each step in # order to determine values of options such as skip. # The consequence is that global definitions are available in # SoS namespace. try: SoS_exec(section.global_def, section.global_sigil) except Exception as e: if env.verbosity > 2: sys.stderr.write(get_traceback()) raise RuntimeError('Failed to execute statements\n"{}"\n{}'.format( section.global_def, e)) # clear existing keys, otherwise the results from some random result # might mess with the execution of another step that does not define input for k in ['__step_input__', '__default_output__', '__step_output__']: if k in env.sos_dict: env.sos_dict.pop(k) # if the step has its own context env.sos_dict.quick_update(runnable._context) # execute section with specified input runnable._status = 'running' try: executor = Interactive_Step_Executor(section) res = executor.run() for k, v in res.items(): env.sos_dict.set(k, v) last_res = res['__last_res__'] # set context to the next logic step. for edge in dag.out_edges(runnable): node = edge[1] # if node is the logical next step... if node._node_index is not None and runnable._node_index is not None: #and node._node_index == runnable._node_index + 1: node._context.update(env.sos_dict.clone_selected_vars( node._context['__signature_vars__'] | node._context['__environ_vars__'] \ | {'_input', '__step_output__', '__default_output__', '__args__'})) runnable._status = 'completed' except UnknownTarget as e: runnable._status = None target = e.target if self.resolve_dangling_targets(dag, [target]) == 0: raise RuntimeError('Failed to resolve {}{}.' .format(target, dag.steps_depending_on(target, self.workflow))) # now, there should be no dangling targets, let us connect nodes # this can be done more efficiently runnable._depends_targets.append(target) dag._all_dependent_files[target].append(runnable) # dag.build(self.workflow.auxiliary_sections) #dag.show_nodes() cycle = dag.circular_dependencies() if cycle: raise RuntimeError('Circular dependency detected {}. It is likely a later step produces input of a previous step.'.format(cycle)) except RemovedTarget as e: runnable._status = None target = e.target if not dag.regenerate_target(target): if self.resolve_dangling_targets(dag, [target]) == 0: raise RuntimeError('Failed to regenerate or resolve {}{}.' .format(target, dag.steps_depending_on(target, self.workflow))) runnable._depends_targets.append(target) dag._all_dependent_files[target].append(runnable) dag.build(self.workflow.auxiliary_sections) # cycle = dag.circular_dependencies() if cycle: raise RuntimeError('Circular dependency detected {}. It is likely a later step produces input of a previous step.'.format(cycle)) self.save_dag(dag) except UnavailableLock as e: runnable._status = 'pending' runnable._signature = (e.output, e.sig_file) env.logger.info('Waiting on another process for step {}'.format(section.step_name())) # if the job is failed except Exception as e: runnable._status = 'failed' raise if self.md5: self.save_workflow_signature(dag) env.logger.info('Workflow {} (ID={}) is executed successfully.'.format(self.workflow.name, self.md5)) return last_res