def load_cwl(fname): """Load and validate CWL file using cwltool """ logger.debug('Loading CWL file "{}"'.format(fname)) # Fetching, preprocessing and validating cwl # Older versions of cwltool if legacy_cwltool: try: (document_loader, workflowobj, uri) = fetch_document(fname) (document_loader, _, processobj, metadata, uri) = \ validate_document(document_loader, workflowobj, uri) except TypeError: from cwltool.context import LoadingContext, getdefault from cwltool import workflow from cwltool.resolver import tool_resolver from cwltool.load_tool import resolve_tool_uri loadingContext = LoadingContext() loadingContext.construct_tool_object = getdefault( loadingContext.construct_tool_object, workflow.default_make_tool) loadingContext.resolver = getdefault(loadingContext.resolver, tool_resolver) uri, tool_file_uri = resolve_tool_uri( fname, resolver=loadingContext.resolver, fetcher_constructor=loadingContext.fetcher_constructor) document_loader, workflowobj, uri = fetch_document( uri, resolver=loadingContext.resolver, fetcher_constructor=loadingContext.fetcher_constructor) document_loader, avsc_names, processobj, metadata, uri = \ validate_document( document_loader, workflowobj, uri, loadingContext.overrides_list, {}, enable_dev=loadingContext.enable_dev, strict=loadingContext.strict, preprocess_only=False, fetcher_constructor=loadingContext.fetcher_constructor, skip_schemas=False, do_validate=loadingContext.do_validate) # Recent versions of cwltool else: (loading_context, workflowobj, uri) = fetch_document(fname) loading_context, uri = resolve_and_validate_document( loading_context, workflowobj, uri) document_loader = loading_context.loader processobj = workflowobj metadata = loading_context.metadata return document_loader, processobj, metadata, uri
def _run_example(as_dict, out=None): if not out: out = _examples_path_for("test.cwl") abstract_as_dict = from_dict(as_dict) with open(out, "w") as f: ordered_dump(abstract_as_dict, f) check_abstract_def(abstract_as_dict) # validate format2 workflows enable_dev = "dev" in CWL_VERSION loadingContext = LoadingContext() loadingContext.enable_dev = enable_dev loadingContext.loader = default_loader( loadingContext.fetcher_constructor, enable_dev=enable_dev, ) loadingContext.resolver = getdefault(loadingContext.resolver, tool_resolver) loadingContext, workflowobj, uri = fetch_document(out, loadingContext) loadingContext, uri = resolve_and_validate_document( loadingContext, workflowobj, uri, ) return abstract_as_dict
def run(self, loading_context): # type: (RuntimeContext) -> None self.output_callback(self.job.collect_output_ports( self.job.tool["outputs"], self.cache_builder, self.output_dir, getdefault(loading_context.compute_checksum, True)), "success")
def execute(self, context): post_status(context) self.cwlwf, it_is_workflow = load_cwl( self.dag.default_args["cwl_workflow"], self.dag.default_args) self.cwl_step = [ step for step in self.cwlwf.steps if self.task_id == step.id.split("#")[-1] ][0] if it_is_workflow else self.cwlwf _logger.info('{0}: Running!'.format(self.task_id)) upstream_task_ids = [t.task_id for t in self.upstream_list] + \ ([self.reader_task_id] if self.reader_task_id else []) _logger.debug('{0}: Collecting outputs from: \n{1}'.format( self.task_id, json.dumps(upstream_task_ids, indent=4))) upstream_data = self.xcom_pull(context=context, task_ids=upstream_task_ids) _logger.info('{0}: Upstream data: \n {1}'.format( self.task_id, json.dumps(upstream_data, indent=4))) promises = {} for data in upstream_data: # upstream_data is an array with { promises and outdir } promises = merge(promises, data["promises"]) if "outdir" in data: self.outdir = data["outdir"] _d_args = self.dag.default_args if not self.outdir: self.outdir = _d_args['tmp_folder'] _logger.debug('{0}: Step inputs: {1}'.format( self.task_id, json.dumps(self.cwl_step.tool["inputs"], indent=4))) _logger.debug('{0}: Step outputs: {1}'.format( self.task_id, json.dumps(self.cwl_step.tool["outputs"], indent=4))) jobobj = {} for inp in self.cwl_step.tool["inputs"]: jobobj_id = shortname(inp["id"]).split("/")[-1] source_ids = [] promises_outputs = [] try: source_field = inp["source"] if it_is_workflow else inp.get( "id") source_ids = [shortname(s) for s in source_field] if isinstance( source_field, list) else [shortname(source_field)] promises_outputs = [ promises[source_id] for source_id in source_ids if source_id in promises ] except: _logger.warning( "{0}: Couldn't find source field in step input: {1}". format(self.task_id, json.dumps(inp, indent=4))) _logger.info( '{0}: For input {1} with source_ids: {2} found upstream outputs: \n{3}' .format(self.task_id, jobobj_id, source_ids, promises_outputs)) if len(promises_outputs) > 1: if inp.get("linkMerge", "merge_nested") == "merge_flattened": jobobj[jobobj_id] = flatten(promises_outputs) else: jobobj[jobobj_id] = promises_outputs # Should also check if [None], because in this case we need to take default value elif len(promises_outputs) == 1 and (promises_outputs[0] is not None): jobobj[jobobj_id] = promises_outputs[0] elif "valueFrom" in inp: jobobj[jobobj_id] = None elif "default" in inp: d = copy.copy(inp["default"]) jobobj[jobobj_id] = d else: continue _logger.debug('{0}: Collected job object: \n {1}'.format( self.task_id, json.dumps(jobobj, indent=4))) def _post_scatter_eval(shortio, cwl_step): _value_from = { shortname(i["id"]).split("/")[-1]: i["valueFrom"] for i in cwl_step.tool["inputs"] if "valueFrom" in i } _logger.debug('{0}: Step inputs with valueFrom: \n{1}'.format( self.task_id, json.dumps(_value_from, indent=4))) def value_from_func(k, v): if k in _value_from: return expression.do_eval(_value_from[k], shortio, self.cwlwf.tool.get( "requirements", []), None, None, {}, context=v) else: return v return {k: value_from_func(k, v) for k, v in shortio.items()} job = _post_scatter_eval(jobobj, self.cwl_step) _logger.info('{0}: Final job data: \n {1}'.format( self.task_id, json.dumps(job, indent=4))) _d_args['outdir'] = tempfile.mkdtemp( prefix=os.path.join(self.outdir, "step_tmp")) _d_args['tmpdir_prefix'] = os.path.join(_d_args['outdir'], 'cwl_tmp_') _d_args['tmp_outdir_prefix'] = os.path.join(_d_args['outdir'], 'cwl_outdir_') _d_args["record_container_id"] = True _d_args["cidfile_dir"] = _d_args['outdir'] _d_args["cidfile_prefix"] = self.task_id _logger.debug('{0}: Runtime context: \n {1}'.format(self, _d_args)) executor = SingleJobExecutor() runtimeContext = RuntimeContext(_d_args) runtimeContext.make_fs_access = getdefault( runtimeContext.make_fs_access, StdFsAccess) for inp in self.cwl_step.tool["inputs"]: if inp.get("not_connected"): del job[shortname(inp["id"].split("/")[-1])] _stderr = sys.stderr sys.stderr = sys.__stderr__ (output, status) = executor( self.cwl_step.embedded_tool if it_is_workflow else self.cwl_step, job, runtimeContext, logger=_logger) sys.stderr = _stderr if not output and status == "permanentFail": raise ValueError _logger.debug('{0}: Embedded tool outputs: \n {1}'.format( self.task_id, json.dumps(output, indent=4))) promises = {} for out in self.cwl_step.tool["outputs"]: out_id = shortname(out["id"]) jobout_id = out_id.split("/")[-1] try: promises[out_id] = output[jobout_id] except: continue # Unsetting the Generation from final output object visit_class(promises, ("File", ), MutationManager().unset_generation) data = {"promises": promises, "outdir": self.outdir} _logger.info('{0}: Output: \n {1}'.format(self.task_id, json.dumps(data, indent=4))) return data
def run(self, runtimeContext): # type: (RuntimeContext) -> None (docker_req, docker_is_req) = self.get_requirement("DockerRequirement") self.prov_obj = runtimeContext.prov_obj img_id = None env = cast(MutableMapping[Text, Text], os.environ) user_space_docker_cmd = runtimeContext.user_space_docker_cmd if docker_req and user_space_docker_cmd: # For user-space docker implementations, a local image name or ID # takes precedence over a network pull if 'dockerImageId' in docker_req: img_id = str(docker_req["dockerImageId"]) elif 'dockerPull' in docker_req: img_id = str(docker_req["dockerPull"]) # else: # raise WorkflowException(SourceLine(docker_req).makeError( # "Docker image must be specified as 'dockerImageId' or " # "'dockerPull' when using user space implementations of " # "Docker")) else: try: if docker_req and runtimeContext.use_container: img_id = str( self.get_from_requirements( docker_req, True, runtimeContext.pull_image, getdefault(runtimeContext.force_docker_pull, False), getdefault(runtimeContext.tmp_outdir_prefix, DEFAULT_TMP_PREFIX))) if img_id is None: if self.builder.find_default_container: default_container = self.builder.find_default_container() if default_container: img_id = str(default_container) if docker_req and img_id is None and runtimeContext.use_container: raise Exception("Docker image not available") if self.prov_obj and img_id and runtimeContext.process_run_id: # TODO: Integrate with record_container_id container_agent = self.prov_obj.document.agent( uuid.uuid4().urn, {"prov:type": PROV["SoftwareAgent"], "cwlprov:image": img_id, "prov:label": "Container execution of image %s" % img_id}) # FIXME: img_id is not a sha256 id, it might just be "debian:8" #img_entity = document.entity("nih:sha-256;%s" % img_id, # {"prov:label": "Container image %s" % img_id} ) # The image is the plan for this activity-agent association #document.wasAssociatedWith(process_run_ID, container_agent, img_entity) self.prov_obj.document.wasAssociatedWith( runtimeContext.process_run_id, container_agent) except Exception as err: container = "Shifter" _logger.debug("%s error", container, exc_info=True) if docker_is_req: raise UnsupportedRequirement( "%s is required to run this tool: %s" % (container, err)) else: raise WorkflowException( "{0} is not available for this tool, try " "--no-container to disable {0}, or install " "a user space Docker replacement like uDocker with " "--user-space-docker-cmd.: {1}".format(container, err)) self._setup(runtimeContext) stageFiles(self.pathmapper, ignoreWritable=True, symLink=True, secret_store=runtimeContext.secret_store) runtime = self.create_runtime(env, runtimeContext, img_id) self._execute(runtime, env, runtimeContext)
def cwlmain( self, argsl=None, # type: List[str] args=None, # type: argparse.Namespace job_order_object=None, # type: MutableMapping[Text, Any] stdin=sys.stdin, # type: IO[Any] stdout=None, # type: Union[TextIO, codecs.StreamWriter] stderr=sys.stderr, # type: IO[Any] versionfunc=versionstring, # type: Callable[[], Text] logger_handler=None, # custom_schema_callback=None, # type: Callable[[], None] executor=None, # type: Callable[..., Tuple[Dict[Text, Any], Text]] loadingContext=None, # type: LoadingContext runtimeContext=None # type: RuntimeContext ): # type: (...) -> int if not stdout: stdout = codecs.getwriter('utf-8')(sys.stdout) _logger.removeHandler(defaultStreamHandler) if logger_handler: stderr_handler = logger_handler else: stderr_handler = logging.StreamHandler(stderr) _logger.addHandler(stderr_handler) try: if args is None: args = arg_parser().parse_args(argsl) if args.workflow and "--outdir" not in argsl: outputPath = args.workflow.split('/')[-1].split('.')[0] setattr( args, "outdir", os.getcwd() + "/" + outputPath + "/" + datetime.datetime.now().strftime('%Y-%m-%d-%H%M')) if runtimeContext is None: runtimeContext = RuntimeContext(vars(args)) else: runtimeContext = runtimeContext.copy() rdflib_logger = logging.getLogger("rdflib.term") rdflib_logger.addHandler(stderr_handler) rdflib_logger.setLevel(logging.ERROR) if args.quiet: _logger.setLevel(logging.WARN) if runtimeContext.debug: _logger.setLevel(logging.DEBUG) rdflib_logger.setLevel(logging.DEBUG) if args.timestamps: formatter = logging.Formatter("[%(asctime)s] %(message)s", "%Y-%m-%d %H:%M:%S") stderr_handler.setFormatter(formatter) # version if args.version: return versionfunc(), 0 else: _logger.info(versionfunc()) if args.print_supported_versions: return "\n".join(supportedCWLversions(args.enable_dev)), 0 if not args.workflow: if os.path.isfile("CWLFile"): setattr(args, "workflow", "CWLFile") else: _logger.error("") _logger.error( "CWL document required, no input file was provided") arg_parser().print_help() return "CWL document required, no input file was provided", 1 if args.relax_path_checks: command_line_tool.ACCEPTLIST_RE = command_line_tool.ACCEPTLIST_EN_RELAXED_RE if args.ga4gh_tool_registries: ga4gh_tool_registries[:] = args.ga4gh_tool_registries if not args.enable_ga4gh_tool_registry: del ga4gh_tool_registries[:] if custom_schema_callback: custom_schema_callback() elif args.enable_ext: res = pkg_resources.resource_stream(__name__, 'extensions.yml') use_custom_schema("v1.0", "http://commonwl.org/cwltool", res.read()) res.close() else: use_standard_schema("v1.0") if loadingContext is None: loadingContext = LoadingContext(vars(args)) else: loadingContext = loadingContext.copy() loadingContext.disable_js_validation = \ args.disable_js_validation or (not args.do_validate) loadingContext.construct_tool_object = getdefault( loadingContext.construct_tool_object, workflow.default_make_tool) loadingContext.resolver = getdefault(loadingContext.resolver, tool_resolver) try: uri, tool_file_uri = resolve_tool_uri( args.workflow, resolver=loadingContext.resolver, fetcher_constructor=loadingContext.fetcher_constructor) except: return "Can't find file " + args.workflow, 0 try_again_msg = "" if args.debug else ", try again with --debug for more information" try: job_order_object, input_basedir, jobloader = load_job_order( args, stdin, loadingContext.fetcher_constructor, loadingContext.overrides_list, tool_file_uri) if args.overrides: loadingContext.overrides_list.extend( load_overrides( file_uri(os.path.abspath(args.overrides)), tool_file_uri)) document_loader, workflowobj, uri = fetch_document( uri, resolver=loadingContext.resolver, fetcher_constructor=loadingContext.fetcher_constructor) if args.print_deps: # printdeps(workflowobj, document_loader, stdout, args.relative_deps, uri) result = returndeps(workflowobj, document_loader, stdout, args.relative_deps, uri) return result, 0 document_loader, avsc_names, processobj, metadata, uri \ = validate_document(document_loader, workflowobj, uri, enable_dev=loadingContext.enable_dev, strict=loadingContext.strict, preprocess_only=(args.print_pre or args.pack), fetcher_constructor=loadingContext.fetcher_constructor, skip_schemas=args.skip_schemas, overrides=loadingContext.overrides_list, do_validate=loadingContext.do_validate) if args.print_pre: # stdout.write(json_dumps(processobj, indent=4)) return json_dumps(processobj, indent=4), 0 loadingContext.overrides_list.extend( metadata.get("cwltool:overrides", [])) tool = make_tool(document_loader, avsc_names, metadata, uri, loadingContext) if args.make_template: yaml.safe_dump(generate_input_template(tool), sys.stdout, default_flow_style=False, indent=4, block_seq_indent=2) return yaml.safe_dump(generate_input_template(tool), indent=4), 0 if args.validate: _logger.info("Tool definition is valid") return "Tool definition is valid", 0 if args.pack: stdout.write( print_pack(document_loader, processobj, uri, metadata)) return print_pack(document_loader, processobj, uri, metadata), 0 if args.print_rdf: stdout.write( printrdf(tool, document_loader.ctx, args.rdf_serializer)) return printrdf(tool, document_loader.ctx, args.rdf_serializer), 0 if args.print_dot: printdot(tool, document_loader.ctx, stdout) return "args.print_dot still not solved", 0 except (validate.ValidationException) as exc: _logger.error(u"Tool definition failed validation:\n%s", exc, exc_info=args.debug) infor = "Tool definition failed validation:\n%s" + exc + args.debug return infor, 1 except (RuntimeError, WorkflowException) as exc: _logger.error(u"Tool definition failed initialization:\n%s", exc, exc_info=args.debug) infor = "Tool definition failed initialization:\n%s" + exc + args.debug return infor, 1 except Exception as exc: _logger.error( u"I'm sorry, I couldn't load this CWL file%s.\nThe error was: %s", try_again_msg, exc if not args.debug else "", exc_info=args.debug) return "I'm sorry, I couldn't load this CWL file", 1 if isinstance(tool, int): return tool, 0 # If on MacOS platform, TMPDIR must be set to be under one of the # shared volumes in Docker for Mac # More info: https://dockstore.org/docs/faq if sys.platform == "darwin": default_mac_path = "/private/tmp/docker_tmp" if runtimeContext.tmp_outdir_prefix == DEFAULT_TMP_PREFIX: runtimeContext.tmp_outdir_prefix = default_mac_path for dirprefix in ("tmpdir_prefix", "tmp_outdir_prefix", "cachedir"): if getattr(runtimeContext, dirprefix) and getattr( runtimeContext, dirprefix) != DEFAULT_TMP_PREFIX: sl = "/" if getattr(runtimeContext, dirprefix).endswith("/") or dirprefix == "cachedir" \ else "" setattr( runtimeContext, dirprefix, os.path.abspath(getattr(runtimeContext, dirprefix)) + sl) if not os.path.exists( os.path.dirname(getattr(runtimeContext, dirprefix))): try: os.makedirs( os.path.dirname( getattr(runtimeContext, dirprefix))) except Exception as e: _logger.error("Failed to create directory: %s", e) infor = "Failed to create directory: %s" + e + "" return infor, 1 if args.cachedir: if args.move_outputs == "move": runtimeContext.move_outputs = "copy" runtimeContext.tmp_outdir_prefix = args.cachedir runtimeContext.secret_store = getdefault( runtimeContext.secret_store, SecretStore()) try: initialized_job_order_object = init_job_order( job_order_object, args, tool, jobloader, stdout, print_input_deps=args.print_input_deps, relative_deps=args.relative_deps, input_basedir=input_basedir, secret_store=runtimeContext.secret_store) except SystemExit as err: return err.code if not executor: if args.parallel: executor = MultithreadedJobExecutor() else: executor = SingleJobExecutor() assert executor is not None if isinstance(initialized_job_order_object, int): return initialized_job_order_object try: runtimeContext.basedir = input_basedir del args.workflow del args.job_order conf_file = getattr(args, "beta_dependency_resolvers_configuration", None) # Text use_conda_dependencies = getattr(args, "beta_conda_dependencies", None) # Text job_script_provider = None # type: Optional[DependenciesConfiguration] if conf_file or use_conda_dependencies: runtimeContext.job_script_provider = DependenciesConfiguration( args) runtimeContext.find_default_container = \ functools.partial(find_default_container, args) runtimeContext.make_fs_access = getdefault( runtimeContext.make_fs_access, StdFsAccess) (out, status) = executor(tool, initialized_job_order_object, runtimeContext, logger=_logger) # This is the workflow output, it needs to be written if out is not None: def loc_to_path(obj): for field in ("path", "nameext", "nameroot", "dirname"): if field in obj: del obj[field] if obj["location"].startswith("file://"): obj["path"] = uri_file_path(obj["location"]) visit_class(out, ("File", "Directory"), loc_to_path) # Unsetting the Generation fron final output object visit_class(out, ("File", ), MutationManager().unset_generation) if isinstance(out, string_types): stdout.write(out) else: stdout.write( json_dumps( out, indent=4, # type: ignore ensure_ascii=False)) stdout.write("\n") if hasattr(stdout, "flush"): stdout.flush() # type: ignore if status != "success": _logger.warning(u"Final process status is %s", status) infor = "Final process status is %s" + status + "" return infor, 1 _logger.info(u"Final process status is %s", status) return out, status except (validate.ValidationException) as exc: _logger.error(u"Input object failed validation:\n%s", exc, exc_info=args.debug) infor = "Input object failed validation:\n%s" + exc + args.debug return infor, 1 except UnsupportedRequirement as exc: _logger.error( u"Workflow or tool uses unsupported feature:\n%s", exc, exc_info=args.debug) infor = "Workflow or tool uses unsupported feature:\n%s" + exc + args.debug return infor, 3 except WorkflowException as exc: _logger.error(u"Workflow error%s:\n%s", try_again_msg, strip_dup_lineno(six.text_type(exc)), exc_info=args.debug) infor = "Workflow error%s:\n%s" + try_again_msg + strip_dup_lineno( six.text_type(exc)) + args.debug return infor, 1 except Exception as exc: _logger.error(u"Unhandled error%s:\n %s", try_again_msg, exc, exc_info=args.debug) infor = "Unhandled error%s:\n %s" + try_again_msg + exc + args.debug return infor, 1 finally: _logger.removeHandler(stderr_handler) _logger.addHandler(defaultStreamHandler)
def execute(self, context): logging.info('Running tool: \n{}'.format( json.dumps(self.cwl_step.tool, indent=4))) collected_outputs = {} for task_outputs in self.xcom_pull( context=context, task_ids=[task.task_id for task in self.upstream_list]): collected_outputs = merge(collected_outputs, task_outputs["outputs"]) logging.debug('Collected outputs:\n{}'.format( json.dumps(collected_outputs, indent=4))) jobobj = {} for inp in self.cwl_step.tool["inputs"]: jobobj_id = shortname(inp["id"]).split("/")[-1] source_ids = [] promises_outputs = [] try: source_ids = [shortname(source) for source in inp["source"]] if isinstance( inp["source"], list) else [shortname(inp["source"])] promises_outputs = [ collected_outputs[source_id] for source_id in source_ids if source_id in collected_outputs ] except Exception as ex: logging.info( "Couldn't find source field in the step input: \n{}". format(json.dumps(inp, indent=4))) logging.info( 'For input {} with sources: \n{} \nfound upstream outputs: \n{}' .format(jobobj_id, source_ids, promises_outputs)) if len(promises_outputs) > 1: if inp.get("linkMerge", "merge_nested") == "merge_flattened": jobobj[jobobj_id] = flatten(promises_outputs) else: jobobj[jobobj_id] = promises_outputs elif len(promises_outputs) == 1 and ( promises_outputs[0] is not None ): # Should also check if [None], because in this case we need to take default value jobobj[jobobj_id] = promises_outputs[0] elif "valueFrom" in inp: jobobj[jobobj_id] = None elif "default" in inp: d = copy.copy(inp["default"]) jobobj[jobobj_id] = d else: continue logging.info('Collected job object: \n{}'.format( json.dumps(jobobj, indent=4))) valueFrom = { shortname(i["id"]).split("/")[-1]: i["valueFrom"] for i in self.cwl_step.tool["inputs"] if "valueFrom" in i } logging.info('Inputs with valueFrom: \n{}'.format( json.dumps(valueFrom, indent=4))) def postScatterEval(shortio): def valueFromFunc(k, v): if k in valueFrom: return cwltool.workflow.expression.do_eval( valueFrom[k], shortio, self.dag.requirements, None, None, {}, context=v) else: return v return {k: valueFromFunc(k, v) for k, v in shortio.items()} job = postScatterEval(jobobj) logging.info( 'Collected job object after valueFrom evaluation: \n{}'.format( json.dumps(job, indent=4))) # maybe need to add here scatter functionality too kwargs = self.dag.default_args tmp_folder = collected_outputs["tmp_folder"] output_folder = collected_outputs["output_folder"] kwargs['outdir'] = tempfile.mkdtemp(dir=tmp_folder, prefix="step_tmp_") kwargs['tmpdir_prefix'] = os.path.join(tmp_folder, "cwl_tmp_") kwargs['tmp_outdir_prefix'] = os.path.join(tmp_folder, "cwl_outdir_tmp_") kwargs['rm_tmpdir'] = False kwargs["basedir"] = os.path.abspath( os.path.dirname(self.dag.default_args["job_data"]["path"])) logger = logging.getLogger("cwltool") sys.stdout = StreamLogWriterUpdated(logger, logging.INFO) sys.stderr = StreamLogWriterUpdated(logger, logging.WARN) executor = cwltool.executors.SingleJobExecutor() runtimeContext = RuntimeContext(kwargs) runtimeContext.make_fs_access = getdefault( runtimeContext.make_fs_access, cwltool.stdfsaccess.StdFsAccess) for inp in self.cwl_step.tool["inputs"]: if inp.get("not_connected"): del job[shortname(inp["id"].split("/")[-1])] (output, status) = executor(self.cwl_step.embedded_tool, job, runtimeContext, logger=logger) if not output and status == "permanentFail": raise ValueError logging.debug('Embedded tool outputs: \n{}'.format( json.dumps(output, indent=4))) promises = {} for out in self.cwl_step.tool["outputs"]: out_id = shortname(out["id"]) jobout_id = out_id.split("/")[-1] try: promises[out_id] = output[jobout_id] except: continue promises["tmp_folder"] = tmp_folder promises["output_folder"] = output_folder data = {"outputs": promises} logging.info('Outputs: \n{}'.format(json.dumps(data, indent=4))) return data
def _parsl_execute( self, runtime, # type: List[Text] env, # type: MutableMapping[Text, Text] runtimeContext # type: RuntimeContext ): # type: (...) -> None scr, _ = self.get_requirement("ShellCommandRequirement") shouldquote = needs_shell_quoting_re.search # type: Callable[[Any], Any] if scr: shouldquote = lambda x: False _logger.info( u"[job %s] %s$ %s%s%s%s", self.name, self.outdir, " \\\n ".join([ shellescape.quote(Text(arg)) if shouldquote(Text(arg)) else Text(arg) for arg in (runtime + self.command_line) ]), u' < %s' % self.stdin if self.stdin else '', u' > %s' % os.path.join(self.outdir, self.stdout) if self.stdout else '', u' 2> %s' % os.path.join(self.outdir, self.stderr) if self.stderr else '') if self.joborder and runtimeContext.research_obj: job_order = self.joborder assert runtimeContext.prov_obj runtimeContext.prov_obj.used_artefacts( job_order, runtimeContext.process_run_id, runtimeContext.reference_locations, str(self.name)) outputs = {} # type: Dict[Text,Text] try: stdin_path = None if self.stdin: rmap = self.pathmapper.reversemap(self.stdin) if not rmap: raise WorkflowException("{} missing from pathmapper".format( self.stdin)) else: stdin_path = rmap[1] stderr_path = None if self.stderr: abserr = os.path.join(self.outdir, self.stderr) dnerr = os.path.dirname(abserr) if dnerr and not os.path.exists(dnerr): os.makedirs(dnerr) stderr_path = abserr stdout_path = None if self.stdout: absout = os.path.join(self.outdir, self.stdout) dn = os.path.dirname(absout) if dn and not os.path.exists(dn): os.makedirs(dn) stdout_path = absout commands = [Text(x) for x in (runtime + self.command_line)] if runtimeContext.secret_store: commands = runtimeContext.secret_store.retrieve(commands) env = runtimeContext.secret_store.retrieve(env) job_script_contents = None # type: Optional[Text] builder = getattr(self, "builder", None) # type: Builder if builder is not None: job_script_contents = builder.build_job_script(commands) print("Running my own execution layer") rcode = _job_popen( commands, stdin_path=stdin_path, stdout_path=stdout_path, stderr_path=stderr_path, env=env, cwd=self.outdir, job_dir=tempfile.mkdtemp(prefix=getdefault( runtimeContext.tmp_outdir_prefix, DEFAULT_TMP_PREFIX)), job_script_contents=job_script_contents, timelimit=self.timelimit, name=self.name) if self.successCodes and rcode in self.successCodes: processStatus = "success" elif self.temporaryFailCodes and rcode in self.temporaryFailCodes: processStatus = "temporaryFail" elif self.permanentFailCodes and rcode in self.permanentFailCodes: processStatus = "permanentFail" elif rcode == 0: processStatus = "success" else: processStatus = "permanentFail" if self.generatefiles["listing"]: assert self.generatemapper is not None relink_initialworkdir(self.generatemapper, self.outdir, self.builder.outdir, inplace_update=self.inplace_update) outputs = self.collect_outputs(self.outdir) outputs = bytes2str_in_dicts(outputs) # type: ignore except OSError as e: if e.errno == 2: if runtime: _logger.error(u"'%s' not found", runtime[0]) else: _logger.error(u"'%s' not found", self.command_line[0]) else: _logger.exception("Exception while running job") processStatus = "permanentFail" except WorkflowException as e: _logger.error(u"[job %s] Job error:\n%s" % (self.name, e)) processStatus = "permanentFail" except Exception as e: _logger.exception("Exception while running job") processStatus = "permanentFail" if runtimeContext.research_obj and self.prov_obj and \ runtimeContext.process_run_id: #creating entities for the outputs produced by each step (in the provenance document) self.prov_obj.generate_output_prov(outputs, runtimeContext.process_run_id, str(self.name)) self.prov_obj.document.wasEndedBy(runtimeContext.process_run_id, None, self.prov_obj.workflow_run_uri, datetime.datetime.now()) if processStatus != "success": _logger.warning(u"[job %s] completed %s", self.name, processStatus) else: _logger.info(u"[job %s] completed %s", self.name, processStatus) if _logger.isEnabledFor(logging.DEBUG): _logger.debug(u"[job %s] %s", self.name, json_dumps(outputs, indent=4)) if self.generatemapper and runtimeContext.secret_store: # Delete any runtime-generated files containing secrets. for f, p in self.generatemapper.items(): if p.type == "CreateFile": if runtimeContext.secret_store.has_secret(p.resolved): host_outdir = self.outdir container_outdir = self.builder.outdir host_outdir_tgt = p.target if p.target.startswith(container_outdir + "/"): host_outdir_tgt = os.path.join( host_outdir, p.target[len(container_outdir) + 1:]) os.remove(host_outdir_tgt) if runtimeContext.workflow_eval_lock is None: raise WorkflowException( "runtimeContext.workflow_eval_lock must not be None") with runtimeContext.workflow_eval_lock: self.output_callback(outputs, processStatus) if self.stagedir and os.path.exists(self.stagedir): _logger.debug(u"[job %s] Removing input staging directory %s", self.name, self.stagedir) shutil.rmtree(self.stagedir, True) if runtimeContext.rm_tmpdir: _logger.debug(u"[job %s] Removing temporary directory %s", self.name, self.tmpdir) shutil.rmtree(self.tmpdir, True)
def job( self, joborder, # type: Dict[Text, AnyValue] output_callbacks, # type: Callable[[Any, Any], Any] runtime_context, # type: RuntimeContext ): # type: (...) -> Generator[Union[JobBase, CallbackJob], None, None] """ Workflow job generator. :param joborder: inputs of the job submission :param output_callbacks: method to fetch step outputs and corresponding step details :param runtime_context: configs about execution environment :return: """ require_prefix = "" if self.metadata["cwlVersion"] == "v1.0": require_prefix = "http://commonwl.org/cwltool#" jobname = uniquename(runtime_context.name or shortname(self.tool.get("id", "job"))) # outdir must be served by the EMS because downstream step will need access to upstream steps output weaver_out_dir = get_wps_output_dir(get_settings(app)) runtime_context.outdir = tempfile.mkdtemp(prefix=getdefault( runtime_context.tmp_outdir_prefix, DEFAULT_TMP_PREFIX), dir=weaver_out_dir) builder = self._init_job(joborder, runtime_context) # `jobname` is the step name and `joborder` is the actual step inputs wps_workflow_job = WpsWorkflowJob( builder, builder.job, self.requirements, self.hints, jobname, self.get_job_process_definition(jobname, joborder, self.tool), self.tool["outputs"]) wps_workflow_job.prov_obj = self.prov_obj wps_workflow_job.successCodes = self.tool.get("successCodes") wps_workflow_job.temporaryFailCodes = self.tool.get( "temporaryFailCodes") wps_workflow_job.permanentFailCodes = self.tool.get( "permanentFailCodes") # TODO Taken from command_line_tool.py maybe this could let us use the revmap if required at all # reffiles = copy.deepcopy(builder.files) # builder.pathmapper = self.make_path_mapper( # reffiles, builder.stagedir, runtimeContext, True) # builder.requirements = wps_workflow_job.requirements wps_workflow_job.outdir = builder.outdir wps_workflow_job.tmpdir = builder.tmpdir wps_workflow_job.stagedir = builder.stagedir readers = {} # type: Dict[Text, Any] timelimit = self.get_requirement(require_prefix + "TimeLimit")[0] if timelimit: with SourceLine(timelimit, "timelimit", validate.ValidationException): wps_workflow_job.timelimit = builder.do_eval( timelimit["timelimit"]) if not isinstance(wps_workflow_job.timelimit, int) or wps_workflow_job.timelimit < 0: raise Exception( "timelimit must be an integer >= 0, got: %s" % wps_workflow_job.timelimit) wps_workflow_job.collect_outputs = partial( self.collect_output_ports, self.tool["outputs"], builder, compute_checksum=getdefault(runtime_context.compute_checksum, True), jobname=jobname, readers=readers) wps_workflow_job.output_callback = output_callbacks yield wps_workflow_job