def execute_workflow_step(workflow, task_id, job_data, cwl_args=None, executor=None): """ Constructs and executes single step workflow based on the "workflow" and "task_id". "cwl_args" can be used to update default parameters used for loading and runtime contexts. Exports json file with the execution results. """ cwl_args = {} if cwl_args is None else cwl_args executor = SingleJobExecutor() if executor is None else executor step_tmp_folder, step_cache_folder, step_outputs_folder, step_report = get_temp_folders( task_id=task_id, job_data=job_data) default_cwl_args = get_default_cwl_args(cwl_args) default_cwl_args.update({ # add execution specific parameters "tmp_outdir_prefix": step_cache_folder + "/", "tmpdir_prefix": step_cache_folder + "/", "cidfile_dir": step_tmp_folder, "cidfile_prefix": task_id, "basedir": os.getcwd( ), # job should already have abs path for inputs, so this is useless "outdir": step_outputs_folder }) workflow_step_path = os.path.join(step_tmp_folder, task_id + "_step_workflow.cwl") fast_cwl_step_load( # will save new worlflow to "workflow_step_path" workflow=workflow, target_id=task_id, cwl_args=default_cwl_args, location=workflow_step_path) _stderr = sys.stderr # to trick the logger sys.stderr = sys.__stderr__ step_outputs, step_status = executor( slow_cwl_load(workflow=workflow_step_path, cwl_args=default_cwl_args), job_data, RuntimeContext(default_cwl_args)) sys.stderr = _stderr if step_status != "success": raise ValueError # To remove "http://commonwl.org/cwltool#generation": 0 (copied from cwltool) visit_class(step_outputs, ("File", ), MutationManager().unset_generation) dump_json(step_outputs, step_report) return step_outputs, step_report
def execute(self, context): post_status(context) self.cwlwf, it_is_workflow = load_cwl( self.dag.default_args["cwl_workflow"], self.dag.default_args) self.cwl_step = [ step for step in self.cwlwf.steps if self.task_id == step.id.split("#")[-1] ][0] if it_is_workflow else self.cwlwf _logger.info('{0}: Running!'.format(self.task_id)) upstream_task_ids = [t.task_id for t in self.upstream_list] + \ ([self.reader_task_id] if self.reader_task_id else []) _logger.debug('{0}: Collecting outputs from: \n{1}'.format( self.task_id, json.dumps(upstream_task_ids, indent=4))) upstream_data = self.xcom_pull(context=context, task_ids=upstream_task_ids) _logger.info('{0}: Upstream data: \n {1}'.format( self.task_id, json.dumps(upstream_data, indent=4))) promises = {} for data in upstream_data: # upstream_data is an array with { promises and outdir } promises = merge(promises, data["promises"]) if "outdir" in data: self.outdir = data["outdir"] _d_args = self.dag.default_args if not self.outdir: self.outdir = _d_args['tmp_folder'] _logger.debug('{0}: Step inputs: {1}'.format( self.task_id, json.dumps(self.cwl_step.tool["inputs"], indent=4))) _logger.debug('{0}: Step outputs: {1}'.format( self.task_id, json.dumps(self.cwl_step.tool["outputs"], indent=4))) jobobj = {} for inp in self.cwl_step.tool["inputs"]: jobobj_id = shortname(inp["id"]).split("/")[-1] source_ids = [] promises_outputs = [] try: source_field = inp["source"] if it_is_workflow else inp.get( "id") source_ids = [shortname(s) for s in source_field] if isinstance( source_field, list) else [shortname(source_field)] promises_outputs = [ promises[source_id] for source_id in source_ids if source_id in promises ] except: _logger.warning( "{0}: Couldn't find source field in step input: {1}". format(self.task_id, json.dumps(inp, indent=4))) _logger.info( '{0}: For input {1} with source_ids: {2} found upstream outputs: \n{3}' .format(self.task_id, jobobj_id, source_ids, promises_outputs)) if len(promises_outputs) > 1: if inp.get("linkMerge", "merge_nested") == "merge_flattened": jobobj[jobobj_id] = flatten(promises_outputs) else: jobobj[jobobj_id] = promises_outputs # Should also check if [None], because in this case we need to take default value elif len(promises_outputs) == 1 and (promises_outputs[0] is not None): jobobj[jobobj_id] = promises_outputs[0] elif "valueFrom" in inp: jobobj[jobobj_id] = None elif "default" in inp: d = copy.copy(inp["default"]) jobobj[jobobj_id] = d else: continue _logger.debug('{0}: Collected job object: \n {1}'.format( self.task_id, json.dumps(jobobj, indent=4))) def _post_scatter_eval(shortio, cwl_step): _value_from = { shortname(i["id"]).split("/")[-1]: i["valueFrom"] for i in cwl_step.tool["inputs"] if "valueFrom" in i } _logger.debug('{0}: Step inputs with valueFrom: \n{1}'.format( self.task_id, json.dumps(_value_from, indent=4))) def value_from_func(k, v): if k in _value_from: return expression.do_eval(_value_from[k], shortio, self.cwlwf.tool.get( "requirements", []), None, None, {}, context=v) else: return v return {k: value_from_func(k, v) for k, v in shortio.items()} job = _post_scatter_eval(jobobj, self.cwl_step) _logger.info('{0}: Final job data: \n {1}'.format( self.task_id, json.dumps(job, indent=4))) _d_args['outdir'] = tempfile.mkdtemp( prefix=os.path.join(self.outdir, "step_tmp")) _d_args['tmpdir_prefix'] = os.path.join(_d_args['outdir'], 'cwl_tmp_') _d_args['tmp_outdir_prefix'] = os.path.join(_d_args['outdir'], 'cwl_outdir_') _d_args["record_container_id"] = True _d_args["cidfile_dir"] = _d_args['outdir'] _d_args["cidfile_prefix"] = self.task_id _logger.debug('{0}: Runtime context: \n {1}'.format(self, _d_args)) executor = SingleJobExecutor() runtimeContext = RuntimeContext(_d_args) runtimeContext.make_fs_access = getdefault( runtimeContext.make_fs_access, StdFsAccess) for inp in self.cwl_step.tool["inputs"]: if inp.get("not_connected"): del job[shortname(inp["id"].split("/")[-1])] _stderr = sys.stderr sys.stderr = sys.__stderr__ (output, status) = executor( self.cwl_step.embedded_tool if it_is_workflow else self.cwl_step, job, runtimeContext, logger=_logger) sys.stderr = _stderr if not output and status == "permanentFail": raise ValueError _logger.debug('{0}: Embedded tool outputs: \n {1}'.format( self.task_id, json.dumps(output, indent=4))) promises = {} for out in self.cwl_step.tool["outputs"]: out_id = shortname(out["id"]) jobout_id = out_id.split("/")[-1] try: promises[out_id] = output[jobout_id] except: continue # Unsetting the Generation from final output object visit_class(promises, ("File", ), MutationManager().unset_generation) data = {"promises": promises, "outdir": self.outdir} _logger.info('{0}: Output: \n {1}'.format(self.task_id, json.dumps(data, indent=4))) return data
def main(args=None): if args is None: parser = argparse.ArgumentParser( prog="C2WL-Rocket", description= 'Customizable CWL Rocket - A highly flexible CWL execution engine.' ) subparser = parser.add_subparsers(help="CWLab sub-commands", dest='subcommand') ## subcommand launch: parser_launch = subparser.add_parser( "launch", help="Start execution of a CWL workflow given run input parameter." ) parser_launch.add_argument("--debug", action="store_true", help="Print debugging level messages.") parser_launch.add_argument( '-p', '--exec-profile', help="""Specify an exec profile. Please specify the name to a python module and a contained exec profile class sperated by \":\" (e.g. the default \"c2wl_rocket.exec_profile:LocalToolExec\"). Alternatively you can specify the full path to a python file containing an exec profile class (e.g. \"/path/to/my/exec_profiles.py:CustomExec\"). """, default="c2wl_rocket.exec_profile:LocalToolExec") parser_launch.add_argument('cwl_document', help="Provide a CWL workflow or tool.") parser_launch.add_argument( 'input_params', nargs=argparse.REMAINDER, help="Provide input parameters in YAML or JSON format.") parser_launch.add_argument( "--outdir", type=typing_extensions.Text, help="Output directory, default current directory", default=os.path.abspath('.')) exgroup = parser_launch.add_mutually_exclusive_group() exgroup.add_argument( "--tmp-outdir-prefix", type=typing_extensions.Text, help="Path prefix for intermediate output directories", default=cwltool.utils.DEFAULT_TMP_PREFIX) exgroup.add_argument( "--cachedir", type=typing_extensions.Text, help= "Directory to cache intermediate workflow outputs to avoid recomputing steps.", default="") exgroup = parser_launch.add_mutually_exclusive_group() exgroup.add_argument( "--move-outputs", action="store_const", const="move", default="move", help="Move output files to the workflow output directory and delete " "intermediate output directories (default).", dest="move_outputs") exgroup.add_argument( "--leave-outputs", action="store_const", const="leave", default="move", help="Leave output files in intermediate output directories.", dest="move_outputs") exgroup.add_argument("--copy-outputs", action="store_const", const="copy", default="move", help=""" Copy output files to the workflow output directory, don't delete intermediate output directories. """, dest="move_outputs") # subcommand start_worker: parser_start_worker = subparser.add_parser( "start_worker", help="Start a worker service instance.") parser_start_worker.add_argument("-H", "--web_server_host", type=typing_extensions.Text, help=""" IP of webserver host. Specify \"0.0.0.0\" for remote availablity within the current network. """, default="localhost") parser_start_worker.add_argument("-P", "--web_server_port", type=typing_extensions.Text, help=""" Port of webserver. """, default="5000") args = parser.parse_args() if args.subcommand == "launch": if isinstance(args.exec_profile, str): exec_profile_invalid_message = error_message("main", """ The specified exec profile is invalid. Please either specify a class inheriting from ExecProfileBase at c2wl_rocket.execprofile or if using the commandline specify the name or path to a module that containes such a class. Please see the commandline help for details. """, is_known=True) assert ":" in args.exec_profile, \ exec_profile_invalid_message exec_profile_module_name = args.exec_profile.split(":")[0] exec_profile_class_name = args.exec_profile.split(":")[1] try: exec_profile_module = importlib.import_module( exec_profile_module_name) except: try: spec = importlib.util.spec_from_file_location( "exec_profile_module", exec_profile_module_name) exec_profile_module = importlib.util.module_from_spec(spec) spec.loader.exec_module(exec_profile_module) except: raise AssertionError( error_message("main", """ The specified exec profile module \"{exec_profile_module_name}\" could not be imported. """, is_known=True)) assert hasattr(exec_profile_module, exec_profile_class_name), \ error_message( "main", f""" The specified exec profile module \"{exec_profile_module_name}\" has no class \"{exec_profile_class_name}\". """, is_known=True ) args.exec_profile = getattr(exec_profile_module, exec_profile_class_name) assert isclass(args.exec_profile) and issubclass(args.exec_profile, ExecProfileBase), \ error_message( "main", """ The specified exec profile class does not inherit from ExecProfileBase at c2wl_rocket.execprofile. """, is_known=True ) cwltool_args = copy(cwltool_default_args) cwltool_args.workflow = args.cwl_document cwltool_args.job_order = args.input_params cwltool_args.outdir = args.outdir cwltool_args.tmp_outdir_prefix = args.tmp_outdir_prefix cwltool_args.cachedir = args.cachedir cwltool_args.move_outputs = args.move_outputs cwltool_args.debug = args.debug loading_context = cwltool.main.LoadingContext(vars(cwltool_args)) with open(args.cwl_document, mode="r") as cwl: cwl_content = yaml.load(cwl) assert "cwlVersion" in cwl_content.keys(), error_message( "main", "No cwlVersion as specified in the CWL document.", is_known=True) workflow_metadata = {"cwlVersion": cwl_content["cwlVersion"]} loading_context.construct_tool_object = functools.partial( make_custom_tool, exec_profile_class=args.exec_profile, workflow_metadata=workflow_metadata) runtime_context = cwltool.main.RuntimeContext(vars(cwltool_args)) job_executor = MultithreadedJobExecutor() if cwltool_args.parallel \ else SingleJobExecutor() job_executor.max_ram = job_executor.max_cores = float("inf") # hand arguments over to main exec function: cwltool.main.main(args=cwltool_args, executor=job_executor, loadingContext=loading_context, runtimeContext=runtime_context) elif args.subcommand == "start_worker": worker.start(web_server_host=args.web_server_host, web_server_port=int(args.web_server_port))
def main(args=None): """Main entrypoint for cwl-tes.""" if args is None: args = sys.argv[1:] parser = arg_parser() parsed_args = parser.parse_args(args) if parsed_args.version: print(versionstring()) return 0 if parsed_args.tes is None: print(versionstring()) parser.print_usage() print("cwl-tes: error: argument --tes is required") return 1 if parsed_args.token: try: token_public_key = parsed_args.token_public_key if not token_public_key: header = jwt.get_unverified_header(parsed_args.token) if 'kid' in header: token_public_key = load_public_key(header.get('kid')) else: raise Exception("Invalid token: has no kid in header.") jwt.decode( parsed_args.token, token_public_key.encode('utf-8').decode('unicode_escape'), algorithms=['RS256']) except Exception: raise Exception('Token is not valid') if parsed_args.quiet: log.setLevel(logging.WARN) if parsed_args.debug: log.setLevel(logging.DEBUG) def signal_handler(*args): # pylint: disable=unused-argument """setup signal handler""" log.info("recieved control-c signal") log.info("terminating thread(s)...") log.warning("remote TES task(s) will keep running") sys.exit(1) signal.signal(signal.SIGINT, signal_handler) ftp_cache = {} class CachingFtpFsAccess(FtpFsAccess): """Ensures that the FTP connection cache is shared.""" def __init__(self, basedir, insecure=False): super(CachingFtpFsAccess, self).__init__(basedir, ftp_cache, insecure=insecure) ftp_fs_access = CachingFtpFsAccess(os.curdir, insecure=parsed_args.insecure) if parsed_args.remote_storage_url: parsed_args.remote_storage_url = ftp_fs_access.join( parsed_args.remote_storage_url, str(uuid.uuid4())) loading_context = cwltool.main.LoadingContext(vars(parsed_args)) loading_context.construct_tool_object = functools.partial( make_tes_tool, url=parsed_args.tes, remote_storage_url=parsed_args.remote_storage_url, token=parsed_args.token) runtime_context = cwltool.main.RuntimeContext(vars(parsed_args)) runtime_context.make_fs_access = functools.partial( CachingFtpFsAccess, insecure=parsed_args.insecure) runtime_context.path_mapper = functools.partial(TESPathMapper, fs_access=ftp_fs_access) job_executor = MultithreadedJobExecutor() if parsed_args.parallel \ else SingleJobExecutor() job_executor.max_ram = job_executor.max_cores = float("inf") executor = functools.partial( tes_execute, job_executor=job_executor, loading_context=loading_context, remote_storage_url=parsed_args.remote_storage_url, ftp_access=ftp_fs_access) return cwltool.main.main(args=parsed_args, executor=executor, loadingContext=loading_context, runtimeContext=runtime_context, versionfunc=versionstring, logger_handler=console)
def run_native(config_object: 'ConfigBase', workflow: str, run_directory: str = '.', verbosity="normal") -> int: """Executes the workflow using native Python rather than subprocess "command line" Args: config_object: a constructed ConfigBase-derived object workflow: the path to the workflow to be executed run_directory: the destination folder for workflow output subdirectories (default: CWD) parallel: process libraries in parallel where possible verbosity: controls the depth of information written to terminal by cwltool Returns: None """ def furnish_if_file_record(file_dict): if isinstance(file_dict, dict) and file_dict.get('class', None) == 'File': file_dict['basename'] = os.path.basename(file_dict['path']) file_dict['location'] = file_dict['path'] file_dict['contents'] = None # Upgrade file entries in Run Config with extra descriptors cwltool expects for _, config_param in config_object.config.items(): if isinstance(config_param, list): for config_dict in config_param: furnish_if_file_record(config_dict) else: furnish_if_file_record(config_param) # Set overall config for cwltool runtime_context = RuntimeContext({ 'secret_store': cwltool.secrets.SecretStore(), 'outdir': run_directory, 'on_error': "continue", 'js_console': verbosity == "debug", 'debug': verbosity == "debug" }) # Set proper temp directory for Mac users if sys.platform == "darwin": default_mac_path = "/private/tmp/docker_tmp" if runtime_context.tmp_outdir_prefix == DEFAULT_TMP_PREFIX: runtime_context.tmp_outdir_prefix = default_mac_path if runtime_context.tmpdir_prefix == DEFAULT_TMP_PREFIX: runtime_context.tmpdir_prefix = default_mac_path # Enable rich terminal output (timestamp, color, formatting) logger = logging.getLogger("cwltool") logger.handlers.clear() # executors.py loads a default handler; outputs are printed twice if we don't clear it level = 'DEBUG' if verbosity == 'debug' else 'WARN' if verbosity == "quiet" else "INFO" coloredlogs.install(logger=logger, stream=sys.stderr, fmt="[%(asctime)s] %(levelname)s %(message)s", datefmt="%Y-%m-%d %H:%M:%S", level=level, isatty=True) # Create a wrapper for the executors so that we may pass our logger to them (unsupported by Factory) parallel: MultithreadedJobExecutor = functools.partial(MultithreadedJobExecutor(), logger=logger) serial: SingleJobExecutor = functools.partial(SingleJobExecutor(), logger=logger) # Instantiate Factory with our run preferences cwl = cwltool.factory.Factory( runtime_context=runtime_context, loading_context=LoadingContext({'relax_path_checks': True}), executor=parallel if parallel else serial ) try: # Load the workflow document and execute pipeline = cwl.make(workflow) pipeline(**config_object.config) except cwltool.factory.WorkflowStatus: # For now, return non-zero if workflow did not complete return 1 return 0
def execute_workflow_step(workflow, task_id, job_data, cwl_args=None, executor=None): """ Constructs and executes single step workflow based on the "workflow" and "task_id". "cwl_args" can be used to update default parameters used for loading and runtime contexts. Exports json file with the execution results. If the step was evaluated as the one that need to be skipped, the output "skipped" will set to True and the step_report file will include "nulls". This function doesn't remove any temporary data in both success and failure scenarios. """ cwl_args = {} if cwl_args is None else cwl_args executor = SingleJobExecutor() if executor is None else executor step_tmp_folder, step_cache_folder, step_outputs_folder, step_report = get_temp_folders( task_id=task_id, job_data=job_data) default_cwl_args = get_default_cwl_args(cwl_args) default_cwl_args.update({ # add execution specific parameters "tmp_outdir_prefix": step_cache_folder + "/", "tmpdir_prefix": step_cache_folder + "/", "cidfile_dir": step_tmp_folder, "cidfile_prefix": task_id, "basedir": os.getcwd( ), # job should already have abs path for inputs, so this is useless "outdir": step_outputs_folder }) workflow_step_path = os.path.join(step_tmp_folder, task_id + "_step_workflow.cwl") fast_cwl_step_load( # will save new worlflow to "workflow_step_path" workflow=workflow, target_id=task_id, cwl_args=default_cwl_args, location=workflow_step_path) workflow_data = slow_cwl_load(workflow=workflow_step_path, cwl_args=default_cwl_args) skipped = True step_outputs = { output_id: None for output_id, _ in get_items(workflow_data.tool["outputs"]) } if need_to_run(workflow_data, job_data, task_id): skipped = False _stderr = sys.stderr # to trick the logger sys.stderr = sys.__stderr__ step_outputs, step_status = executor(workflow_data, job_data, RuntimeContext(default_cwl_args)) sys.stderr = _stderr if step_status != "success": raise ValueError("Failed to run workflow step") # To remove "http://commonwl.org/cwltool#generation": 0 (copied from cwltool) visit_class(step_outputs, ("File", ), MutationManager().unset_generation) dump_json(step_outputs, step_report) return step_outputs, step_report, skipped