def test_for_invalid_yaml2(): # Issue 143 document_loader, avsc_names, schema_metadata, metaschema_loader = load_schema( get_data(u"tests/test_schema/CommonWorkflowLanguage.yml")) src = "test19.cwl" try: load_and_validate( document_loader, avsc_names, six.text_type(get_data("tests/test_schema/" + src)), True, ) except RuntimeError as e: msg = reformat_yaml_exception_message( strip_dup_lineno(six.text_type(e))) assert msg.endswith( src + ":2:1: expected <block end>, but found ':'") or msg.endswith( src + ":2:1: expected <block end>, but found u':'") return except ValidationException as e: msgs = str(strip_dup_lineno(six.text_type(e))) print(msgs) # weird splits due to differing path length on MS Windows # & during the release tests assert "{}:2:1: Object".format(src) in msgs assert "is not valid because" in msgs assert "`CommandLineTool`" in msgs assert "mapping with" in msgs assert "implicit" in msgs assert "null key" in msgs return assert False, "Missing RuntimeError or ValidationException"
def test_for_invalid_yaml2(self): # Issue 143 document_loader, avsc_names, schema_metadata, metaschema_loader = load_schema( get_data(u"tests/test_schema/CommonWorkflowLanguage.yml")) src = "test19.cwl" with self.assertRaises(RuntimeError): try: load_and_validate( document_loader, avsc_names, six.text_type(get_data("tests/test_schema/" + src)), True) except RuntimeError as e: msg = reformat_yaml_exception_message( strip_dup_lineno(six.text_type(e))) self.assertTrue( msg.endswith(src + ":1:1: expected <block end>, but found ':'")) print("\n", e) raise
def test_print_oneline_for_invalid_yaml(self): # Issue #137 document_loader, avsc_names, schema_metadata, metaschema_loader = load_schema( get_data(u"tests/test_schema/CommonWorkflowLanguage.yml")) src = "test16.cwl" with self.assertRaises(RuntimeError): try: load_and_validate( document_loader, avsc_names, six.text_type(get_data("tests/test_schema/" + src)), True) except RuntimeError as e: msg = reformat_yaml_exception_message( strip_dup_lineno(six.text_type(e))) msg = to_one_line_messages(msg) self.assertTrue( msg.endswith(src + ":10:1: could not find expected \':\'")) print("\n", e) raise
def test_print_oneline_for_errors_in_resolve_ref(self): # Issue #141 document_loader, avsc_names, schema_metadata, metaschema_loader = load_schema( get_data(u"tests/test_schema/CommonWorkflowLanguage.yml")) src = "test18.cwl" fullpath = normpath(get_data("tests/test_schema/" + src)) with self.assertRaises(ValidationException): try: load_and_validate(document_loader, avsc_names, six.text_type(fullpath), True) except ValidationException as e: msgs = to_one_line_messages( str(strip_dup_lineno(six.text_type(e)))).splitlines() # convert Windows path to Posix path if '\\' in fullpath: fullpath = '/' + fullpath.lower().replace('\\', '/') self.assertEqual(len(msgs), 1) print("\n", e) assert msgs[0].endswith( src + ':13:5: Field `type` references unknown identifier ' '`Filea`, tried file://%s#Filea' % (fullpath)) raise
def test_for_invalid_yaml1(self): # Issue 143 document_loader, avsc_names, schema_metadata, metaschema_loader = load_schema( get_data(u"tests/test_schema/CommonWorkflowLanguage.yml")) src = "test16.cwl" with self.assertRaises(RuntimeError): try: load_and_validate( document_loader, avsc_names, six.text_type(get_data("tests/test_schema/" + src)), True) except RuntimeError as e: msg = reformat_yaml_exception_message( strip_dup_lineno(six.text_type(e))) msgs = msg.splitlines() self.assertEqual(len(msgs), 2) self.assertTrue( msgs[0].endswith(src + ":9:7: while scanning a simple key")) self.assertTrue( msgs[1].endswith(src + ":10:1: could not find expected ':'")) print("\n", e) raise
def test_print_oneline_for_errors_in_resolve_ref(): # Issue #141 document_loader, avsc_names, schema_metadata, metaschema_loader = load_schema( get_data(u"tests/test_schema/CommonWorkflowLanguage.yml")) src = "test18.cwl" fullpath = normpath(get_data("tests/test_schema/" + src)) with pytest.raises(ValidationException): try: load_and_validate(document_loader, avsc_names, six.text_type(fullpath), True) except ValidationException as e: msgs = to_one_line_messages(str(strip_dup_lineno( six.text_type(e)))).splitlines() # convert Windows path to Posix path if "\\" in fullpath: fullpath = "/" + fullpath.lower().replace("\\", "/") assert len(msgs) == 2 print("\n", e) assert msgs[0].endswith(src + ":9:1: checking field `outputs`") assert msgs[1].endswith( src + ":14:5: Field `type` references unknown identifier " "`Filea`, tried file://%s#Filea" % (fullpath)) raise
def main( argsl=None, # type: List[str] args=None, # type: argparse.Namespace job_order_object=None, # type: MutableMapping[Text, Any] stdin=sys.stdin, # type: IO[Any] stdout=None, # type: Union[TextIO, codecs.StreamWriter] stderr=sys.stderr, # type: IO[Any] versionfunc=versionstring, # type: Callable[[], Text] logger_handler=None, # custom_schema_callback=None, # type: Callable[[], None] executor=None, # type: Callable[..., Tuple[Dict[Text, Any], Text]] loadingContext=None, # type: LoadingContext runtimeContext=None # type: RuntimeContext ): # type: (...) -> int if not stdout: # force UTF-8 even if the console is configured differently if (hasattr(sys.stdout, "encoding") # type: ignore and sys.stdout.encoding != 'UTF-8'): # type: ignore if six.PY3 and hasattr(sys.stdout, "detach"): stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8') else: stdout = codecs.getwriter('utf-8')(sys.stdout) # type: ignore else: stdout = cast(TextIO, sys.stdout) # type: ignore _logger.removeHandler(defaultStreamHandler) if logger_handler: stderr_handler = logger_handler else: stderr_handler = logging.StreamHandler(stderr) _logger.addHandler(stderr_handler) # pre-declared for finally block workflowobj = None input_for_prov = None try: if args is None: if argsl is None: argsl = sys.argv[1:] args = arg_parser().parse_args(argsl) if runtimeContext is None: runtimeContext = RuntimeContext(vars(args)) else: runtimeContext = runtimeContext.copy() # If on Windows platform, a default Docker Container is used if not # explicitely provided by user if onWindows() and not runtimeContext.default_container: # This docker image is a minimal alpine image with bash installed # (size 6 mb). source: https://github.com/frol/docker-alpine-bash runtimeContext.default_container = windows_default_container_id # If caller parsed its own arguments, it may not include every # cwltool option, so fill in defaults to avoid crashing when # dereferencing them in args. for key, val in six.iteritems(get_default_args()): if not hasattr(args, key): setattr(args, key, val) rdflib_logger = logging.getLogger("rdflib.term") rdflib_logger.addHandler(stderr_handler) rdflib_logger.setLevel(logging.ERROR) if args.quiet: _logger.setLevel(logging.WARN) if runtimeContext.debug: _logger.setLevel(logging.DEBUG) rdflib_logger.setLevel(logging.DEBUG) if args.timestamps: formatter = logging.Formatter("[%(asctime)s] %(message)s", "%Y-%m-%d %H:%M:%S") stderr_handler.setFormatter(formatter) if args.version: print(versionfunc()) return 0 else: _logger.info(versionfunc()) if args.print_supported_versions: print("\n".join(supportedCWLversions(args.enable_dev))) return 0 if not args.workflow: if os.path.isfile("CWLFile"): setattr(args, "workflow", "CWLFile") else: _logger.error("") _logger.error( "CWL document required, no input file was provided") arg_parser().print_help() return 1 if args.relax_path_checks: command_line_tool.ACCEPTLIST_RE = command_line_tool.ACCEPTLIST_EN_RELAXED_RE if args.ga4gh_tool_registries: ga4gh_tool_registries[:] = args.ga4gh_tool_registries if not args.enable_ga4gh_tool_registry: del ga4gh_tool_registries[:] if custom_schema_callback: custom_schema_callback() elif args.enable_ext: res = pkg_resources.resource_stream(__name__, 'extensions.yml') use_custom_schema("v1.0", "http://commonwl.org/cwltool", res.read()) res.close() else: use_standard_schema("v1.0") #call function from provenance.py if the provenance flag is enabled. if args.provenance: if not args.compute_checksum: _logger.error( "--provenance incompatible with --no-compute-checksum") return 1 runtimeContext.research_obj = ResearchObject( temp_prefix_ro=args.tmpdir_prefix, # Optionals, might be None orcid=args.orcid, full_name=args.cwl_full_name) if loadingContext is None: loadingContext = LoadingContext(vars(args)) else: loadingContext = loadingContext.copy() loadingContext.research_obj = runtimeContext.research_obj loadingContext.disable_js_validation = \ args.disable_js_validation or (not args.do_validate) loadingContext.construct_tool_object = getdefault( loadingContext.construct_tool_object, workflow.default_make_tool) loadingContext.resolver = getdefault(loadingContext.resolver, tool_resolver) uri, tool_file_uri = resolve_tool_uri( args.workflow, resolver=loadingContext.resolver, fetcher_constructor=loadingContext.fetcher_constructor) try_again_msg = "" if args.debug else ", try again with --debug for more information" try: job_order_object, input_basedir, jobloader = load_job_order( args, stdin, loadingContext.fetcher_constructor, loadingContext.overrides_list, tool_file_uri) if args.overrides: loadingContext.overrides_list.extend( load_overrides(file_uri(os.path.abspath(args.overrides)), tool_file_uri)) document_loader, workflowobj, uri = fetch_document( uri, resolver=loadingContext.resolver, fetcher_constructor=loadingContext.fetcher_constructor) if args.print_deps: printdeps(workflowobj, document_loader, stdout, args.relative_deps, uri) return 0 document_loader, avsc_names, processobj, metadata, uri \ = validate_document(document_loader, workflowobj, uri, enable_dev=loadingContext.enable_dev, strict=loadingContext.strict, preprocess_only=(args.print_pre or args.pack), fetcher_constructor=loadingContext.fetcher_constructor, skip_schemas=args.skip_schemas, overrides=loadingContext.overrides_list, do_validate=loadingContext.do_validate) if args.pack: stdout.write( print_pack(document_loader, processobj, uri, metadata)) return 0 if args.provenance and runtimeContext.research_obj: # Can't really be combined with args.pack at same time runtimeContext.research_obj.packed_workflow( print_pack(document_loader, processobj, uri, metadata)) if args.print_pre: stdout.write(json_dumps(processobj, indent=4)) return 0 loadingContext.overrides_list.extend( metadata.get("cwltool:overrides", [])) tool = make_tool(document_loader, avsc_names, metadata, uri, loadingContext) if args.make_template: yaml.safe_dump(generate_input_template(tool), sys.stdout, default_flow_style=False, indent=4, block_seq_indent=2) return 0 if args.validate: _logger.info("Tool definition is valid") return 0 if args.print_rdf: stdout.write( printrdf(tool, document_loader.ctx, args.rdf_serializer)) return 0 if args.print_dot: printdot(tool, document_loader.ctx, stdout) return 0 except (validate.ValidationException) as exc: _logger.error(u"Tool definition failed validation:\n%s", exc, exc_info=args.debug) return 1 except (RuntimeError, WorkflowException) as exc: _logger.error(u"Tool definition failed initialization:\n%s", exc, exc_info=args.debug) return 1 except Exception as exc: _logger.error( u"I'm sorry, I couldn't load this CWL file%s.\nThe error was: %s", try_again_msg, exc if not args.debug else "", exc_info=args.debug) return 1 if isinstance(tool, int): return tool # If on MacOS platform, TMPDIR must be set to be under one of the # shared volumes in Docker for Mac # More info: https://dockstore.org/docs/faq if sys.platform == "darwin": default_mac_path = "/private/tmp/docker_tmp" if runtimeContext.tmp_outdir_prefix == DEFAULT_TMP_PREFIX: runtimeContext.tmp_outdir_prefix = default_mac_path for dirprefix in ("tmpdir_prefix", "tmp_outdir_prefix", "cachedir"): if getattr(runtimeContext, dirprefix) and getattr( runtimeContext, dirprefix) != DEFAULT_TMP_PREFIX: sl = "/" if getattr(runtimeContext, dirprefix).endswith("/") or dirprefix == "cachedir" \ else "" setattr( runtimeContext, dirprefix, os.path.abspath(getattr(runtimeContext, dirprefix)) + sl) if not os.path.exists( os.path.dirname(getattr(runtimeContext, dirprefix))): try: os.makedirs( os.path.dirname(getattr(runtimeContext, dirprefix))) except Exception as e: _logger.error("Failed to create directory: %s", e) return 1 if args.cachedir: if args.move_outputs == "move": runtimeContext.move_outputs = "copy" runtimeContext.tmp_outdir_prefix = args.cachedir runtimeContext.secret_store = getdefault(runtimeContext.secret_store, SecretStore()) try: initialized_job_order_object, input_for_prov = init_job_order( job_order_object, args, tool, jobloader, stdout, print_input_deps=args.print_input_deps, provArgs=runtimeContext.research_obj, relative_deps=args.relative_deps, input_basedir=input_basedir, secret_store=runtimeContext.secret_store) except SystemExit as err: return err.code if not executor: if args.parallel: executor = MultithreadedJobExecutor() runtimeContext.select_resources = executor.select_resources else: executor = SingleJobExecutor() assert executor is not None try: runtimeContext.basedir = input_basedir del args.workflow del args.job_order conf_file = getattr(args, "beta_dependency_resolvers_configuration", None) # Text use_conda_dependencies = getattr(args, "beta_conda_dependencies", None) # Text if conf_file or use_conda_dependencies: runtimeContext.job_script_provider = DependenciesConfiguration( args) runtimeContext.find_default_container = functools.partial( find_default_container, default_container=runtimeContext.default_container, use_biocontainers=args.beta_use_biocontainers) runtimeContext.make_fs_access = getdefault( runtimeContext.make_fs_access, StdFsAccess) (out, status) = executor(tool, initialized_job_order_object, runtimeContext, logger=_logger) if out is not None: def loc_to_path(obj): for field in ("path", "nameext", "nameroot", "dirname"): if field in obj: del obj[field] if obj["location"].startswith("file://"): obj["path"] = uri_file_path(obj["location"]) visit_class(out, ("File", "Directory"), loc_to_path) # Unsetting the Generation from final output object visit_class(out, ("File", ), MutationManager().unset_generation) if isinstance(out, string_types): stdout.write(out) else: stdout.write( json_dumps( out, indent=4, # type: ignore ensure_ascii=False)) stdout.write("\n") if hasattr(stdout, "flush"): stdout.flush() # type: ignore if status != "success": _logger.warning(u"Final process status is %s", status) return 1 _logger.info(u"Final process status is %s", status) return 0 except (validate.ValidationException) as exc: _logger.error(u"Input object failed validation:\n%s", exc, exc_info=args.debug) return 1 except UnsupportedRequirement as exc: _logger.error(u"Workflow or tool uses unsupported feature:\n%s", exc, exc_info=args.debug) return 33 except WorkflowException as exc: _logger.error(u"Workflow error%s:\n%s", try_again_msg, strip_dup_lineno(six.text_type(exc)), exc_info=args.debug) return 1 except Exception as exc: _logger.error(u"Unhandled error%s:\n %s", try_again_msg, exc, exc_info=args.debug) return 1 finally: if args and runtimeContext and runtimeContext.research_obj \ and args.rm_tmpdir and workflowobj: #adding all related cwl files to RO prov_dependencies = printdeps(workflowobj, document_loader, stdout, args.relative_deps, uri, runtimeContext.research_obj) prov_dep = prov_dependencies[1] assert prov_dep runtimeContext.research_obj.generate_snapshot(prov_dep) #for input file dependencies if input_for_prov: runtimeContext.research_obj.generate_snapshot(input_for_prov) #NOTE: keep these commented out lines to evaluate tests later #if job_order_object: #runtimeContext.research_obj.generate_snapshot(job_order_object) runtimeContext.research_obj.close(args.provenance) _logger.removeHandler(stderr_handler) _logger.addHandler(defaultStreamHandler)
def main( argsl: Optional[List[str]] = None, args: Optional[argparse.Namespace] = None, job_order_object: Optional[CWLObjectType] = None, stdin: IO[Any] = sys.stdin, stdout: Optional[Union[TextIO, StreamWriter]] = None, stderr: IO[Any] = sys.stderr, versionfunc: Callable[[], str] = versionstring, logger_handler: Optional[logging.Handler] = None, custom_schema_callback: Optional[Callable[[], None]] = None, executor: Optional[JobExecutor] = None, loadingContext: Optional[LoadingContext] = None, runtimeContext: Optional[RuntimeContext] = None, input_required: bool = True, ) -> int: if not stdout: # force UTF-8 even if the console is configured differently if hasattr(sys.stdout, "encoding") and sys.stdout.encoding.upper() not in ( "UTF-8", "UTF8", ): if hasattr(sys.stdout, "detach"): stdout = io.TextIOWrapper(sys.stdout.buffer, encoding="utf-8") else: stdout = getwriter("utf-8")(sys.stdout) # type: ignore else: stdout = sys.stdout _logger.removeHandler(defaultStreamHandler) stderr_handler = logger_handler if stderr_handler is not None: _logger.addHandler(stderr_handler) else: coloredlogs.install(logger=_logger, stream=stderr) stderr_handler = _logger.handlers[-1] workflowobj = None prov_log_handler = None # type: Optional[logging.StreamHandler] try: if args is None: if argsl is None: argsl = sys.argv[1:] addl = [] # type: List[str] if "CWLTOOL_OPTIONS" in os.environ: addl = os.environ["CWLTOOL_OPTIONS"].split(" ") parser = arg_parser() argcomplete.autocomplete(parser) args = parser.parse_args(addl + argsl) if args.record_container_id: if not args.cidfile_dir: args.cidfile_dir = os.getcwd() del args.record_container_id if runtimeContext is None: runtimeContext = RuntimeContext(vars(args)) else: runtimeContext = runtimeContext.copy() # If on Windows platform, a default Docker Container is used if not # explicitely provided by user if onWindows() and not runtimeContext.default_container: # This docker image is a minimal alpine image with bash installed # (size 6 mb). source: https://github.com/frol/docker-alpine-bash runtimeContext.default_container = windows_default_container_id # If caller parsed its own arguments, it may not include every # cwltool option, so fill in defaults to avoid crashing when # dereferencing them in args. for key, val in get_default_args().items(): if not hasattr(args, key): setattr(args, key, val) configure_logging(args, stderr_handler, runtimeContext) if args.version: print(versionfunc()) return 0 _logger.info(versionfunc()) if args.print_supported_versions: print("\n".join(supported_cwl_versions(args.enable_dev))) return 0 if not args.workflow: if os.path.isfile("CWLFile"): args.workflow = "CWLFile" else: _logger.error( "CWL document required, no input file was provided") parser.print_help() return 1 if args.relax_path_checks: command_line_tool.ACCEPTLIST_RE = command_line_tool.ACCEPTLIST_EN_RELAXED_RE if args.ga4gh_tool_registries: ga4gh_tool_registries[:] = args.ga4gh_tool_registries if not args.enable_ga4gh_tool_registry: del ga4gh_tool_registries[:] if args.mpi_config_file is not None: runtimeContext.mpi_config = MpiConfig.load(args.mpi_config_file) setup_schema(args, custom_schema_callback) if args.provenance: if argsl is None: raise Exception("argsl cannot be None") if setup_provenance(args, argsl, runtimeContext) is not None: return 1 loadingContext = setup_loadingContext(loadingContext, runtimeContext, args) uri, tool_file_uri = resolve_tool_uri( args.workflow, resolver=loadingContext.resolver, fetcher_constructor=loadingContext.fetcher_constructor, ) try_again_msg = ("" if args.debug else ", try again with --debug for more information") try: job_order_object, input_basedir, jobloader = load_job_order( args, stdin, loadingContext.fetcher_constructor, loadingContext.overrides_list, tool_file_uri, ) if args.overrides: loadingContext.overrides_list.extend( load_overrides(file_uri(os.path.abspath(args.overrides)), tool_file_uri)) loadingContext, workflowobj, uri = fetch_document( uri, loadingContext) if args.print_deps and loadingContext.loader: printdeps(workflowobj, loadingContext.loader, stdout, args.relative_deps, uri) return 0 loadingContext, uri = resolve_and_validate_document( loadingContext, workflowobj, uri, preprocess_only=(args.print_pre or args.pack), skip_schemas=args.skip_schemas, ) if loadingContext.loader is None: raise Exception("Impossible code path.") processobj, metadata = loadingContext.loader.resolve_ref(uri) processobj = cast(CommentedMap, processobj) if args.pack: stdout.write(print_pack(loadingContext, uri)) return 0 if args.provenance and runtimeContext.research_obj: # Can't really be combined with args.pack at same time runtimeContext.research_obj.packed_workflow( print_pack(loadingContext, uri)) if args.print_pre: stdout.write( json_dumps(processobj, indent=4, sort_keys=True, separators=(",", ": "))) return 0 tool = make_tool(uri, loadingContext) if args.make_template: make_template(tool) return 0 if args.validate: print("{} is valid CWL.".format(args.workflow)) return 0 if args.print_rdf: stdout.write( printrdf(tool, loadingContext.loader.ctx, args.rdf_serializer)) return 0 if args.print_dot: printdot(tool, loadingContext.loader.ctx, stdout) return 0 if args.print_targets: for f in ("outputs", "steps", "inputs"): if tool.tool[f]: _logger.info("%s%s targets:", f[0].upper(), f[1:-1]) stdout.write(" " + "\n ".join( [shortname(t["id"]) for t in tool.tool[f]]) + "\n") return 0 if args.target: ctool = choose_target(args, tool, loadingContext) if ctool is None: return 1 else: tool = ctool if args.print_subgraph: if "name" in tool.tool: del tool.tool["name"] stdout.write( json_dumps(tool.tool, indent=4, sort_keys=True, separators=(",", ": "))) return 0 except (ValidationException) as exc: _logger.error("Tool definition failed validation:\n%s", str(exc), exc_info=args.debug) return 1 except (RuntimeError, WorkflowException) as exc: _logger.error( "Tool definition failed initialization:\n%s", str(exc), exc_info=args.debug, ) return 1 except Exception as exc: _logger.error( "I'm sorry, I couldn't load this CWL file%s.\nThe error was: %s", try_again_msg, str(exc) if not args.debug else "", exc_info=args.debug, ) return 1 if isinstance(tool, int): return tool # If on MacOS platform, TMPDIR must be set to be under one of the # shared volumes in Docker for Mac # More info: https://dockstore.org/docs/faq if sys.platform == "darwin": default_mac_path = "/private/tmp/docker_tmp" if runtimeContext.tmp_outdir_prefix == DEFAULT_TMP_PREFIX: runtimeContext.tmp_outdir_prefix = default_mac_path if runtimeContext.tmpdir_prefix == DEFAULT_TMP_PREFIX: runtimeContext.tmpdir_prefix = default_mac_path if check_working_directories(runtimeContext) is not None: return 1 if args.cachedir: if args.move_outputs == "move": runtimeContext.move_outputs = "copy" runtimeContext.tmp_outdir_prefix = args.cachedir runtimeContext.secret_store = getdefault(runtimeContext.secret_store, SecretStore()) runtimeContext.make_fs_access = getdefault( runtimeContext.make_fs_access, StdFsAccess) if not executor: if args.parallel: temp_executor = MultithreadedJobExecutor() runtimeContext.select_resources = temp_executor.select_resources real_executor = temp_executor # type: JobExecutor else: real_executor = SingleJobExecutor() else: real_executor = executor try: runtimeContext.basedir = input_basedir if isinstance(tool, ProcessGenerator): tfjob_order = {} # type: CWLObjectType if loadingContext.jobdefaults: tfjob_order.update(loadingContext.jobdefaults) if job_order_object: tfjob_order.update(job_order_object) tfout, tfstatus = real_executor(tool.embedded_tool, tfjob_order, runtimeContext) if not tfout or tfstatus != "success": raise WorkflowException( "ProcessGenerator failed to generate workflow") tool, job_order_object = tool.result(tfjob_order, tfout, runtimeContext) if not job_order_object: job_order_object = None try: initialized_job_order_object = init_job_order( job_order_object, args, tool, jobloader, stdout, print_input_deps=args.print_input_deps, relative_deps=args.relative_deps, make_fs_access=runtimeContext.make_fs_access, input_basedir=input_basedir, secret_store=runtimeContext.secret_store, input_required=input_required, ) except SystemExit as err: return err.code del args.workflow del args.job_order conf_file = getattr(args, "beta_dependency_resolvers_configuration", None) # str use_conda_dependencies = getattr(args, "beta_conda_dependencies", None) # str if conf_file or use_conda_dependencies: runtimeContext.job_script_provider = DependenciesConfiguration( args) else: runtimeContext.find_default_container = functools.partial( find_default_container, default_container=runtimeContext.default_container, use_biocontainers=args.beta_use_biocontainers, ) (out, status) = real_executor(tool, initialized_job_order_object, runtimeContext, logger=_logger) if out is not None: if runtimeContext.research_obj is not None: runtimeContext.research_obj.create_job(out, True) def remove_at_id(doc: CWLObjectType) -> None: for key in list(doc.keys()): if key == "@id": del doc[key] else: value = doc[key] if isinstance(value, MutableMapping): remove_at_id(value) elif isinstance(value, MutableSequence): for entry in value: if isinstance(entry, MutableMapping): remove_at_id(entry) remove_at_id(out) visit_class( out, ("File", ), functools.partial(add_sizes, runtimeContext.make_fs_access("")), ) def loc_to_path(obj: CWLObjectType) -> None: for field in ("path", "nameext", "nameroot", "dirname"): if field in obj: del obj[field] if cast(str, obj["location"]).startswith("file://"): obj["path"] = uri_file_path(cast(str, obj["location"])) visit_class(out, ("File", "Directory"), loc_to_path) # Unsetting the Generation from final output object visit_class(out, ("File", ), MutationManager().unset_generation) if isinstance(out, str): stdout.write(out) else: stdout.write(json_dumps(out, indent=4, ensure_ascii=False)) stdout.write("\n") if hasattr(stdout, "flush"): stdout.flush() if status != "success": _logger.warning("Final process status is %s", status) return 1 _logger.info("Final process status is %s", status) return 0 except (ValidationException) as exc: _logger.error("Input object failed validation:\n%s", str(exc), exc_info=args.debug) return 1 except UnsupportedRequirement as exc: _logger.error( "Workflow or tool uses unsupported feature:\n%s", str(exc), exc_info=args.debug, ) return 33 except WorkflowException as exc: _logger.error( "Workflow error%s:\n%s", try_again_msg, strip_dup_lineno(str(exc)), exc_info=args.debug, ) return 1 except Exception as exc: # pylint: disable=broad-except _logger.error( "Unhandled error%s:\n %s", try_again_msg, str(exc), exc_info=args.debug, ) return 1 finally: if (args and runtimeContext and runtimeContext.research_obj and workflowobj and loadingContext): research_obj = runtimeContext.research_obj if loadingContext.loader is not None: research_obj.generate_snapshot( prov_deps(workflowobj, loadingContext.loader, uri)) else: _logger.warning("Unable to generate provenance snapshot " " due to missing loadingContext.loader.") if prov_log_handler is not None: # Stop logging so we won't half-log adding ourself to RO _logger.debug("[provenance] Closing provenance log file %s", prov_log_handler) _logger.removeHandler(prov_log_handler) # Ensure last log lines are written out prov_log_handler.flush() # Underlying WritableBagFile will add the tagfile to the manifest prov_log_handler.stream.close() prov_log_handler.close() research_obj.close(args.provenance) _logger.removeHandler(stderr_handler) _logger.addHandler(defaultStreamHandler)
def main( argsl=None, # type: List[str] args=None, # type: argparse.Namespace executor=single_job_executor, # type: Callable[..., Tuple[Dict[Text, Any], Text]] makeTool=workflow.defaultMakeTool, # type: Callable[..., Process] selectResources=None, # type: Callable[[Dict[Text, int]], Dict[Text, int]] stdin=sys.stdin, # type: IO[Any] stdout=sys.stdout, # type: IO[Any] stderr=sys.stderr, # type: IO[Any] versionfunc=versionstring, # type: Callable[[], Text] job_order_object=None, # type: MutableMapping[Text, Any] make_fs_access=StdFsAccess, # type: Callable[[Text], StdFsAccess] fetcher_constructor=None, # type: Callable[[Dict[Text, Text], requests.sessions.Session], Fetcher] resolver=tool_resolver, logger_handler=None, custom_schema_callback=None # type: Callable[[], None] ): # type: (...) -> int _logger.removeHandler(defaultStreamHandler) if logger_handler: stderr_handler = logger_handler else: stderr_handler = logging.StreamHandler(stderr) _logger.addHandler(stderr_handler) try: if args is None: if argsl is None: argsl = sys.argv[1:] args = arg_parser().parse_args(argsl) # If On windows platform, A default Docker Container is Used if not explicitely provided by user if onWindows() and not args.default_container: # This docker image is a minimal alpine image with bash installed(size 6 mb). source: https://github.com/frol/docker-alpine-bash args.default_container = windows_default_container_id # If caller provided custom arguments, it may be not every expected # option is set, so fill in no-op defaults to avoid crashing when # dereferencing them in args. for k, v in six.iteritems({ 'print_deps': False, 'print_pre': False, 'print_rdf': False, 'print_dot': False, 'relative_deps': False, 'tmp_outdir_prefix': 'tmp', 'tmpdir_prefix': 'tmp', 'print_input_deps': False, 'cachedir': None, 'quiet': False, 'debug': False, 'js_console': False, 'version': False, 'enable_dev': False, 'enable_ext': False, 'strict': True, 'skip_schemas': False, 'rdf_serializer': None, 'basedir': None, 'tool_help': False, 'workflow': None, 'job_order': None, 'pack': False, 'on_error': 'continue', 'relax_path_checks': False, 'validate': False, 'enable_ga4gh_tool_registry': False, 'ga4gh_tool_registries': [], 'find_default_container': None, 'make_template': False, 'overrides': None }): if not hasattr(args, k): setattr(args, k, v) if args.quiet: _logger.setLevel(logging.WARN) if args.debug: _logger.setLevel(logging.DEBUG) if args.version: print(versionfunc()) return 0 else: _logger.info(versionfunc()) if args.print_supported_versions: print("\n".join(supportedCWLversions(args.enable_dev))) return 0 if not args.workflow: if os.path.isfile("CWLFile"): setattr(args, "workflow", "CWLFile") else: _logger.error("") _logger.error( "CWL document required, no input file was provided") arg_parser().print_help() return 1 if args.relax_path_checks: draft2tool.ACCEPTLIST_RE = draft2tool.ACCEPTLIST_EN_RELAXED_RE if args.ga4gh_tool_registries: ga4gh_tool_registries[:] = args.ga4gh_tool_registries if not args.enable_ga4gh_tool_registry: del ga4gh_tool_registries[:] if custom_schema_callback: custom_schema_callback() elif args.enable_ext: res = pkg_resources.resource_stream(__name__, 'extensions.yml') use_custom_schema("v1.0", "http://commonwl.org/cwltool", res.read()) res.close() else: use_standard_schema("v1.0") uri, tool_file_uri = resolve_tool_uri( args.workflow, resolver=resolver, fetcher_constructor=fetcher_constructor) overrides = [] # type: List[Dict[Text, Any]] try: job_order_object, input_basedir, jobloader = load_job_order( args, stdin, fetcher_constructor, overrides, tool_file_uri) except Exception as e: _logger.error(Text(e), exc_info=args.debug) if args.overrides: overrides.extend( load_overrides(file_uri(os.path.abspath(args.overrides)), tool_file_uri)) try: document_loader, workflowobj, uri = fetch_document( uri, resolver=resolver, fetcher_constructor=fetcher_constructor) if args.print_deps: printdeps(workflowobj, document_loader, stdout, args.relative_deps, uri) return 0 document_loader, avsc_names, processobj, metadata, uri \ = validate_document(document_loader, workflowobj, uri, enable_dev=args.enable_dev, strict=args.strict, preprocess_only=args.print_pre or args.pack, fetcher_constructor=fetcher_constructor, skip_schemas=args.skip_schemas, overrides=overrides) if args.print_pre: stdout.write(json.dumps(processobj, indent=4)) return 0 overrides.extend(metadata.get("cwltool:overrides", [])) conf_file = getattr(args, "beta_dependency_resolvers_configuration", None) # Text use_conda_dependencies = getattr(args, "beta_conda_dependencies", None) # Text make_tool_kwds = vars(args) job_script_provider = None # type: Callable[[Any, List[str]], Text] if conf_file or use_conda_dependencies: dependencies_configuration = DependenciesConfiguration( args) # type: DependenciesConfiguration make_tool_kwds[ "job_script_provider"] = dependencies_configuration make_tool_kwds["find_default_container"] = functools.partial( find_default_container, args) make_tool_kwds["overrides"] = overrides tool = make_tool(document_loader, avsc_names, metadata, uri, makeTool, make_tool_kwds) if args.make_template: yaml.safe_dump(generate_input_template(tool), sys.stdout, default_flow_style=False, indent=4, block_seq_indent=2) return 0 if args.validate: _logger.info("Tool definition is valid") return 0 if args.pack: stdout.write( print_pack(document_loader, processobj, uri, metadata)) return 0 if args.print_rdf: stdout.write( printrdf(tool, document_loader.ctx, args.rdf_serializer)) return 0 if args.print_dot: printdot(tool, document_loader.ctx, stdout) return 0 except (validate.ValidationException) as exc: _logger.error(u"Tool definition failed validation:\n%s", exc, exc_info=args.debug) return 1 except (RuntimeError, WorkflowException) as exc: _logger.error(u"Tool definition failed initialization:\n%s", exc, exc_info=args.debug) return 1 except Exception as exc: _logger.error( u"I'm sorry, I couldn't load this CWL file%s", ", try again with --debug for more information.\nThe error was: " "%s" % exc if not args.debug else ". The error was:", exc_info=args.debug) return 1 if isinstance(tool, int): return tool for dirprefix in ("tmpdir_prefix", "tmp_outdir_prefix", "cachedir"): if getattr(args, dirprefix) and getattr(args, dirprefix) != 'tmp': sl = "/" if getattr(args, dirprefix).endswith( "/") or dirprefix == "cachedir" else "" setattr(args, dirprefix, os.path.abspath(getattr(args, dirprefix)) + sl) if not os.path.exists(os.path.dirname(getattr(args, dirprefix))): try: os.makedirs(os.path.dirname(getattr(args, dirprefix))) except Exception as e: _logger.error("Failed to create directory: %s", e) return 1 if args.cachedir: if args.move_outputs == "move": setattr(args, 'move_outputs', "copy") setattr(args, "tmp_outdir_prefix", args.cachedir) try: job_order_object = init_job_order( job_order_object, args, tool, print_input_deps=args.print_input_deps, relative_deps=args.relative_deps, stdout=stdout, make_fs_access=make_fs_access, loader=jobloader, input_basedir=input_basedir) except SystemExit as e: return e.code if isinstance(job_order_object, int): return job_order_object try: setattr(args, 'basedir', input_basedir) del args.workflow del args.job_order (out, status) = executor(tool, job_order_object, makeTool=makeTool, select_resources=selectResources, make_fs_access=make_fs_access, **vars(args)) # This is the workflow output, it needs to be written if out is not None: def locToPath(p): for field in ("path", "nameext", "nameroot", "dirname"): if field in p: del p[field] if p["location"].startswith("file://"): p["path"] = uri_file_path(p["location"]) visit_class(out, ("File", "Directory"), locToPath) # Unsetting the Generation fron final output object visit_class(out, ("File", ), MutationManager().unset_generation) if isinstance(out, six.string_types): stdout.write(out) else: stdout.write(json.dumps(out, indent=4)) stdout.write("\n") stdout.flush() if status != "success": _logger.warning(u"Final process status is %s", status) return 1 else: _logger.info(u"Final process status is %s", status) return 0 except (validate.ValidationException) as exc: _logger.error(u"Input object failed validation:\n%s", exc, exc_info=args.debug) return 1 except UnsupportedRequirement as exc: _logger.error(u"Workflow or tool uses unsupported feature:\n%s", exc, exc_info=args.debug) return 33 except WorkflowException as exc: _logger.error( u"Workflow error, try again with --debug for more " "information:\n%s", strip_dup_lineno(six.text_type(exc)), exc_info=args.debug) return 1 except Exception as exc: _logger.error( u"Unhandled error, try again with --debug for more information:\n" " %s", exc, exc_info=args.debug) return 1 finally: _logger.removeHandler(stderr_handler) _logger.addHandler(defaultStreamHandler)
def _init_job(self, joborder, runtime_context): # type: (Mapping[str, str], RuntimeContext) -> Builder if self.metadata.get("cwlVersion") != INTERNAL_VERSION: raise WorkflowException( "Process object loaded with version '%s', must update to '%s' in order to execute." % (self.metadata.get("cwlVersion"), INTERNAL_VERSION)) job = cast(Dict[str, expression.JSON], copy.deepcopy(joborder)) make_fs_access = getdefault(runtime_context.make_fs_access, StdFsAccess) fs_access = make_fs_access(runtime_context.basedir) load_listing_req, _ = self.get_requirement("LoadListingRequirement") if load_listing_req is not None: load_listing = load_listing_req.get("loadListing") else: load_listing = "no_listing" # Validate job order try: fill_in_defaults(self.tool["inputs"], job, fs_access) normalizeFilesDirs(job) schema = self.names.get_name("input_record_schema", "") if schema is None: raise WorkflowException("Missing input record schema: " "{}".format(self.names)) validate.validate_ex(schema, job, strict=False, logger=_logger_validation_warnings) if load_listing and load_listing != "no_listing": get_listing(fs_access, job, recursive=(load_listing == "deep_listing")) visit_class(job, ("File", ), functools.partial(add_sizes, fs_access)) if load_listing == "deep_listing": for i, inparm in enumerate(self.tool["inputs"]): k = shortname(inparm["id"]) if k not in job: continue v = job[k] dircount = [0] def inc(d): # type: (List[int]) -> None d[0] += 1 visit_class(v, ("Directory", ), lambda x: inc(dircount)) if dircount[0] == 0: continue filecount = [0] visit_class(v, ("File", ), lambda x: inc(filecount)) if filecount[0] > FILE_COUNT_WARNING: # Long lines in this message are okay, will be reflowed based on terminal columns. _logger.warning( strip_dup_lineno( SourceLine(self.tool["inputs"], i, str). makeError( """Recursive directory listing has resulted in a large number of File objects (%s) passed to the input parameter '%s'. This may negatively affect workflow performance and memory use. If this is a problem, use the hint 'cwltool:LoadListingRequirement' with "shallow_listing" or "no_listing" to change the directory listing behavior: $namespaces: cwltool: "http://commonwl.org/cwltool#" hints: cwltool:LoadListingRequirement: loadListing: shallow_listing """ % (filecount[0], k)))) except (validate.ValidationException, WorkflowException) as err: raise WorkflowException("Invalid job input record:\n" + str(err)) from err files = [] # type: List[Dict[str, str]] bindings = CommentedSeq() tmpdir = "" stagedir = "" docker_req, _ = self.get_requirement("DockerRequirement") default_docker = None if docker_req is None and runtime_context.default_container: default_docker = runtime_context.default_container if (docker_req or default_docker) and runtime_context.use_container: if docker_req is not None: # Check if docker output directory is absolute if docker_req.get("dockerOutputDirectory") and docker_req.get( "dockerOutputDirectory").startswith("/"): outdir = docker_req.get("dockerOutputDirectory") else: outdir = (docker_req.get("dockerOutputDirectory") or runtime_context.docker_outdir or random_outdir()) elif default_docker is not None: outdir = runtime_context.docker_outdir or random_outdir() tmpdir = runtime_context.docker_tmpdir or "/tmp" # nosec stagedir = runtime_context.docker_stagedir or "/var/lib/cwl" else: outdir = fs_access.realpath( runtime_context.outdir or tempfile.mkdtemp(prefix=getdefault( runtime_context.tmp_outdir_prefix, DEFAULT_TMP_PREFIX))) if self.tool["class"] != "Workflow": tmpdir = fs_access.realpath(runtime_context.tmpdir or tempfile.mkdtemp()) stagedir = fs_access.realpath(runtime_context.stagedir or tempfile.mkdtemp()) builder = Builder( job, files, bindings, self.schemaDefs, self.names, self.requirements, self.hints, {}, runtime_context.mutation_manager, self.formatgraph, make_fs_access, fs_access, runtime_context.job_script_provider, runtime_context.eval_timeout, runtime_context.debug, runtime_context.js_console, runtime_context.force_docker_pull, load_listing, outdir, tmpdir, stagedir, ) bindings.extend( builder.bind_input( self.inputs_record_schema, job, discover_secondaryFiles=getdefault(runtime_context.toplevel, False), )) if self.tool.get("baseCommand"): for index, command in enumerate(aslist(self.tool["baseCommand"])): bindings.append({ "position": [-1000000, index], "datum": command }) if self.tool.get("arguments"): for i, arg in enumerate(self.tool["arguments"]): lc = self.tool["arguments"].lc.data[i] filename = self.tool["arguments"].lc.filename bindings.lc.add_kv_line_col(len(bindings), lc) if isinstance(arg, MutableMapping): arg = copy.deepcopy(arg) if arg.get("position"): position = arg.get("position") if isinstance(position, str): # no need to test the # CWLVersion as the v1.0 # schema only allows ints position = builder.do_eval(position) if position is None: position = 0 arg["position"] = [position, i] else: arg["position"] = [0, i] bindings.append(arg) elif ("$(" in arg) or ("${" in arg): cm = CommentedMap((("position", [0, i]), ("valueFrom", arg))) cm.lc.add_kv_line_col("valueFrom", lc) cm.lc.filename = filename bindings.append(cm) else: cm = CommentedMap((("position", [0, i]), ("datum", arg))) cm.lc.add_kv_line_col("datum", lc) cm.lc.filename = filename bindings.append(cm) # use python2 like sorting of heterogeneous lists # (containing str and int types), key = functools.cmp_to_key(cmp_like_py2) # This awkward construction replaces the contents of # "bindings" in place (because Builder expects it to be # mutated in place, sigh, I'm sorry) with its contents sorted, # supporting different versions of Python and ruamel.yaml with # different behaviors/bugs in CommentedSeq. bindings_copy = copy.deepcopy(bindings) del bindings[:] bindings.extend(sorted(bindings_copy, key=key)) if self.tool["class"] != "Workflow": builder.resources = self.evalResources(builder, runtime_context) return builder
def main(argsl=None, # type: List[str] args=None, # type: argparse.Namespace job_order_object=None, # type: MutableMapping[Text, Any] stdin=sys.stdin, # type: IO[Any] stdout=None, # type: Union[TextIO, codecs.StreamWriter] stderr=sys.stderr, # type: IO[Any] versionfunc=versionstring, # type: Callable[[], Text] logger_handler=None, # custom_schema_callback=None, # type: Callable[[], None] executor=None, # type: Callable[..., Tuple[Dict[Text, Any], Text]] loadingContext=None, # type: LoadingContext runtimeContext=None # type: RuntimeContext ): # type: (...) -> int if not stdout: # force UTF-8 even if the console is configured differently if (hasattr(sys.stdout, "encoding") # type: ignore and sys.stdout.encoding != 'UTF-8'): # type: ignore if six.PY3 and hasattr(sys.stdout, "detach"): stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8') else: stdout = codecs.getwriter('utf-8')(sys.stdout) # type: ignore else: stdout = cast(TextIO, sys.stdout) # type: ignore _logger.removeHandler(defaultStreamHandler) if logger_handler: stderr_handler = logger_handler else: stderr_handler = logging.StreamHandler(stderr) _logger.addHandler(stderr_handler) # pre-declared for finally block workflowobj = None try: if args is None: if argsl is None: argsl = sys.argv[1:] args = arg_parser().parse_args(argsl) if runtimeContext is None: runtimeContext = RuntimeContext(vars(args)) else: runtimeContext = runtimeContext.copy() # If on Windows platform, a default Docker Container is used if not # explicitely provided by user if onWindows() and not runtimeContext.default_container: # This docker image is a minimal alpine image with bash installed # (size 6 mb). source: https://github.com/frol/docker-alpine-bash runtimeContext.default_container = windows_default_container_id # If caller parsed its own arguments, it may not include every # cwltool option, so fill in defaults to avoid crashing when # dereferencing them in args. for key, val in six.iteritems(get_default_args()): if not hasattr(args, key): setattr(args, key, val) rdflib_logger = logging.getLogger("rdflib.term") rdflib_logger.addHandler(stderr_handler) rdflib_logger.setLevel(logging.ERROR) if args.quiet: _logger.setLevel(logging.WARN) if runtimeContext.debug: _logger.setLevel(logging.DEBUG) rdflib_logger.setLevel(logging.DEBUG) if args.timestamps: formatter = logging.Formatter("[%(asctime)s] %(message)s", "%Y-%m-%d %H:%M:%S") stderr_handler.setFormatter(formatter) if args.version: print(versionfunc()) return 0 _logger.info(versionfunc()) if args.print_supported_versions: print("\n".join(supported_cwl_versions(args.enable_dev))) return 0 if not args.workflow: if os.path.isfile("CWLFile"): setattr(args, "workflow", "CWLFile") else: _logger.error("") _logger.error("CWL document required, no input file was provided") arg_parser().print_help() return 1 if args.relax_path_checks: command_line_tool.ACCEPTLIST_RE = command_line_tool.ACCEPTLIST_EN_RELAXED_RE if args.ga4gh_tool_registries: ga4gh_tool_registries[:] = args.ga4gh_tool_registries if not args.enable_ga4gh_tool_registry: del ga4gh_tool_registries[:] if custom_schema_callback: custom_schema_callback() elif args.enable_ext: res = pkg_resources.resource_stream(__name__, 'extensions.yml') use_custom_schema("v1.0", "http://commonwl.org/cwltool", res.read()) res.close() else: use_standard_schema("v1.0") #call function from provenance.py if the provenance flag is enabled. if args.provenance: if not args.compute_checksum: _logger.error("--provenance incompatible with --no-compute-checksum") return 1 runtimeContext.research_obj = ResearchObject( temp_prefix_ro=args.tmpdir_prefix, # Optionals, might be None orcid=args.orcid, full_name=args.cwl_full_name) if loadingContext is None: loadingContext = LoadingContext(vars(args)) else: loadingContext = loadingContext.copy() loadingContext.research_obj = runtimeContext.research_obj loadingContext.disable_js_validation = \ args.disable_js_validation or (not args.do_validate) loadingContext.construct_tool_object = getdefault( loadingContext.construct_tool_object, workflow.default_make_tool) loadingContext.resolver = getdefault(loadingContext.resolver, tool_resolver) uri, tool_file_uri = resolve_tool_uri( args.workflow, resolver=loadingContext.resolver, fetcher_constructor=loadingContext.fetcher_constructor) try_again_msg = "" if args.debug else ", try again with --debug for more information" try: job_order_object, input_basedir, jobloader = load_job_order( args, stdin, loadingContext.fetcher_constructor, loadingContext.overrides_list, tool_file_uri) if args.overrides: loadingContext.overrides_list.extend(load_overrides( file_uri(os.path.abspath(args.overrides)), tool_file_uri)) document_loader, workflowobj, uri = fetch_document( uri, resolver=loadingContext.resolver, fetcher_constructor=loadingContext.fetcher_constructor) if args.print_deps: printdeps(workflowobj, document_loader, stdout, args.relative_deps, uri) return 0 document_loader, avsc_names, processobj, metadata, uri \ = validate_document(document_loader, workflowobj, uri, enable_dev=loadingContext.enable_dev, strict=loadingContext.strict, preprocess_only=(args.print_pre or args.pack), fetcher_constructor=loadingContext.fetcher_constructor, skip_schemas=args.skip_schemas, overrides=loadingContext.overrides_list, do_validate=loadingContext.do_validate) if args.pack: stdout.write(print_pack(document_loader, processobj, uri, metadata)) return 0 if args.provenance and runtimeContext.research_obj: # Can't really be combined with args.pack at same time runtimeContext.research_obj.packed_workflow( print_pack(document_loader, processobj, uri, metadata)) if args.print_pre: stdout.write(json_dumps(processobj, indent=4)) return 0 loadingContext.overrides_list.extend(metadata.get("cwltool:overrides", [])) tool = make_tool(document_loader, avsc_names, metadata, uri, loadingContext) if args.make_template: yaml.safe_dump(generate_input_template(tool), sys.stdout, default_flow_style=False, indent=4, block_seq_indent=2) return 0 if args.validate: _logger.info("Tool definition is valid") return 0 if args.print_rdf: stdout.write(printrdf(tool, document_loader.ctx, args.rdf_serializer)) return 0 if args.print_dot: printdot(tool, document_loader.ctx, stdout) return 0 except (validate.ValidationException) as exc: _logger.error(u"Tool definition failed validation:\n%s", exc, exc_info=args.debug) return 1 except (RuntimeError, WorkflowException) as exc: _logger.error(u"Tool definition failed initialization:\n%s", exc, exc_info=args.debug) return 1 except Exception as exc: _logger.error( u"I'm sorry, I couldn't load this CWL file%s.\nThe error was: %s", try_again_msg, exc if not args.debug else "", exc_info=args.debug) return 1 if isinstance(tool, int): return tool # If on MacOS platform, TMPDIR must be set to be under one of the # shared volumes in Docker for Mac # More info: https://dockstore.org/docs/faq if sys.platform == "darwin": default_mac_path = "/private/tmp/docker_tmp" if runtimeContext.tmp_outdir_prefix == DEFAULT_TMP_PREFIX: runtimeContext.tmp_outdir_prefix = default_mac_path for dirprefix in ("tmpdir_prefix", "tmp_outdir_prefix", "cachedir"): if getattr(runtimeContext, dirprefix) and getattr(runtimeContext, dirprefix) != DEFAULT_TMP_PREFIX: sl = "/" if getattr(runtimeContext, dirprefix).endswith("/") or dirprefix == "cachedir" \ else "" setattr(runtimeContext, dirprefix, os.path.abspath(getattr(runtimeContext, dirprefix)) + sl) if not os.path.exists(os.path.dirname(getattr(runtimeContext, dirprefix))): try: os.makedirs(os.path.dirname(getattr(runtimeContext, dirprefix))) except Exception as e: _logger.error("Failed to create directory: %s", e) return 1 if args.cachedir: if args.move_outputs == "move": runtimeContext.move_outputs = "copy" runtimeContext.tmp_outdir_prefix = args.cachedir runtimeContext.secret_store = getdefault(runtimeContext.secret_store, SecretStore()) runtimeContext.make_fs_access = getdefault(runtimeContext.make_fs_access, StdFsAccess) try: initialized_job_order_object = init_job_order( job_order_object, args, tool, jobloader, stdout, print_input_deps=args.print_input_deps, relative_deps=args.relative_deps, make_fs_access=runtimeContext.make_fs_access, input_basedir=input_basedir, secret_store=runtimeContext.secret_store) except SystemExit as err: return err.code if not executor: if args.parallel: executor = MultithreadedJobExecutor() runtimeContext.select_resources = executor.select_resources else: executor = SingleJobExecutor() assert executor is not None try: runtimeContext.basedir = input_basedir del args.workflow del args.job_order conf_file = getattr(args, "beta_dependency_resolvers_configuration", None) # Text use_conda_dependencies = getattr(args, "beta_conda_dependencies", None) # Text if conf_file or use_conda_dependencies: runtimeContext.job_script_provider = DependenciesConfiguration(args) runtimeContext.find_default_container = functools.partial( find_default_container, default_container=runtimeContext.default_container, use_biocontainers=args.beta_use_biocontainers) (out, status) = executor(tool, initialized_job_order_object, runtimeContext, logger=_logger) if out is not None: def loc_to_path(obj): for field in ("path", "nameext", "nameroot", "dirname"): if field in obj: del obj[field] if obj["location"].startswith("file://"): obj["path"] = uri_file_path(obj["location"]) visit_class(out, ("File", "Directory"), loc_to_path) # Unsetting the Generation from final output object visit_class(out, ("File", ), MutationManager().unset_generation) if isinstance(out, string_types): stdout.write(out) else: stdout.write(json_dumps(out, indent=4, # type: ignore ensure_ascii=False)) stdout.write("\n") if hasattr(stdout, "flush"): stdout.flush() # type: ignore if status != "success": _logger.warning(u"Final process status is %s", status) return 1 _logger.info(u"Final process status is %s", status) return 0 except (validate.ValidationException) as exc: _logger.error(u"Input object failed validation:\n%s", exc, exc_info=args.debug) return 1 except UnsupportedRequirement as exc: _logger.error( u"Workflow or tool uses unsupported feature:\n%s", exc, exc_info=args.debug) return 33 except WorkflowException as exc: _logger.error( u"Workflow error%s:\n%s", try_again_msg, strip_dup_lineno(six.text_type(exc)), exc_info=args.debug) return 1 except Exception as exc: _logger.error( u"Unhandled error%s:\n %s", try_again_msg, exc, exc_info=args.debug) return 1 finally: if args and runtimeContext and runtimeContext.research_obj \ and args.rm_tmpdir and workflowobj: #adding all related cwl files to RO prov_dependencies = printdeps( workflowobj, document_loader, stdout, args.relative_deps, uri, runtimeContext.research_obj) prov_dep = prov_dependencies[1] assert prov_dep runtimeContext.research_obj.generate_snapshot(prov_dep) runtimeContext.research_obj.close(args.provenance) _logger.removeHandler(stderr_handler) _logger.addHandler(defaultStreamHandler)
def main(argsl=None, # type: List[str] args=None, # type: argparse.Namespace executor=single_job_executor, # type: Callable[..., Tuple[Dict[Text, Any], Text]] makeTool=workflow.defaultMakeTool, # type: Callable[..., Process] selectResources=None, # type: Callable[[Dict[Text, int]], Dict[Text, int]] stdin=sys.stdin, # type: IO[Any] stdout=sys.stdout, # type: IO[Any] stderr=sys.stderr, # type: IO[Any] versionfunc=versionstring, # type: Callable[[], Text] job_order_object=None, # type: Union[Tuple[Dict[Text, Any], Text], int] make_fs_access=StdFsAccess, # type: Callable[[Text], StdFsAccess] fetcher_constructor=None, # type: Callable[[Dict[unicode, unicode], requests.sessions.Session], Fetcher] resolver=tool_resolver, logger_handler=None ): # type: (...) -> int _logger.removeHandler(defaultStreamHandler) if logger_handler: stderr_handler = logger_handler else: stderr_handler = logging.StreamHandler(stderr) _logger.addHandler(stderr_handler) try: if args is None: if argsl is None: argsl = sys.argv[1:] args = arg_parser().parse_args(argsl) # If caller provided custom arguments, it may be not every expected # option is set, so fill in no-op defaults to avoid crashing when # dereferencing them in args. for k, v in {'print_deps': False, 'print_pre': False, 'print_rdf': False, 'print_dot': False, 'relative_deps': False, 'tmp_outdir_prefix': 'tmp', 'tmpdir_prefix': 'tmp', 'print_input_deps': False, 'cachedir': None, 'quiet': False, 'debug': False, 'version': False, 'enable_dev': False, 'strict': True, 'rdf_serializer': None, 'basedir': None, 'tool_help': False, 'workflow': None, 'job_order': None, 'pack': False, 'on_error': 'continue', 'relax_path_checks': False, 'validate': False}.iteritems(): if not hasattr(args, k): setattr(args, k, v) if args.quiet: _logger.setLevel(logging.WARN) if args.debug: _logger.setLevel(logging.DEBUG) if args.version: print(versionfunc()) return 0 else: _logger.info(versionfunc()) if not args.workflow: if os.path.isfile("CWLFile"): setattr(args, "workflow", "CWLFile") else: _logger.error("") _logger.error("CWL document required, try --help for details") return 1 if args.relax_path_checks: draft2tool.ACCEPTLIST_RE = draft2tool.ACCEPTLIST_EN_RELAXED_RE try: document_loader, workflowobj, uri = fetch_document(args.workflow, resolver=resolver, fetcher_constructor=fetcher_constructor) if args.print_deps: printdeps(workflowobj, document_loader, stdout, args.relative_deps, uri) return 0 document_loader, avsc_names, processobj, metadata, uri \ = validate_document(document_loader, workflowobj, uri, enable_dev=args.enable_dev, strict=args.strict, preprocess_only=args.print_pre or args.pack, fetcher_constructor=fetcher_constructor) if args.validate: return 0 if args.pack: stdout.write(print_pack(document_loader, processobj, uri, metadata)) return 0 if args.print_pre: stdout.write(json.dumps(processobj, indent=4)) return 0 tool = make_tool(document_loader, avsc_names, metadata, uri, makeTool, vars(args)) if args.print_rdf: printrdf(tool, document_loader.ctx, args.rdf_serializer, stdout) return 0 if args.print_dot: printdot(tool, document_loader.ctx, stdout) return 0 except (validate.ValidationException) as exc: _logger.error(u"Tool definition failed validation:\n%s", exc, exc_info=args.debug) return 1 except (RuntimeError, WorkflowException) as exc: _logger.error(u"Tool definition failed initialization:\n%s", exc, exc_info=args.debug) return 1 except Exception as exc: _logger.error( u"I'm sorry, I couldn't load this CWL file%s", ", try again with --debug for more information.\nThe error was: " "%s" % exc if not args.debug else ". The error was:", exc_info=args.debug) return 1 if isinstance(tool, int): return tool for dirprefix in ("tmpdir_prefix", "tmp_outdir_prefix", "cachedir"): if getattr(args, dirprefix) and getattr(args, dirprefix) != 'tmp': sl = "/" if getattr(args, dirprefix).endswith("/") or dirprefix == "cachedir" else "" setattr(args, dirprefix, os.path.abspath(getattr(args, dirprefix)) + sl) if not os.path.exists(os.path.dirname(getattr(args, dirprefix))): try: os.makedirs(os.path.dirname(getattr(args, dirprefix))) except Exception as e: _logger.error("Failed to create directory: %s", e) return 1 if args.cachedir: if args.move_outputs == "move": setattr(args, 'move_outputs', "copy") setattr(args, "tmp_outdir_prefix", args.cachedir) if job_order_object is None: job_order_object = load_job_order(args, tool, stdin, print_input_deps=args.print_input_deps, relative_deps=args.relative_deps, stdout=stdout, make_fs_access=make_fs_access, fetcher_constructor=fetcher_constructor) if isinstance(job_order_object, int): return job_order_object try: setattr(args, 'basedir', job_order_object[1]) del args.workflow del args.job_order (out, status) = executor(tool, job_order_object[0], makeTool=makeTool, select_resources=selectResources, make_fs_access=make_fs_access, **vars(args)) # This is the workflow output, it needs to be written if out is not None: def locToPath(p): if p["location"].startswith("file://"): p["path"] = uri_file_path(p["location"]) adjustDirObjs(out, locToPath) adjustFileObjs(out, locToPath) if isinstance(out, basestring): stdout.write(out) else: stdout.write(json.dumps(out, indent=4)) stdout.write("\n") stdout.flush() if status != "success": _logger.warn(u"Final process status is %s", status) return 1 else: _logger.info(u"Final process status is %s", status) return 0 except (validate.ValidationException) as exc: _logger.error(u"Input object failed validation:\n%s", exc, exc_info=args.debug) return 1 except UnsupportedRequirement as exc: _logger.error( u"Workflow or tool uses unsupported feature:\n%s", exc, exc_info=args.debug) return 33 except WorkflowException as exc: _logger.error( u"Workflow error, try again with --debug for more " "information:\n%s", strip_dup_lineno(unicode(exc)), exc_info=args.debug) return 1 except Exception as exc: _logger.error( u"Unhandled error, try again with --debug for more information:\n" " %s", exc, exc_info=args.debug) return 1 finally: _logger.removeHandler(stderr_handler) _logger.addHandler(defaultStreamHandler)
def static_checker(workflow_inputs, workflow_outputs, step_inputs, step_outputs): # type: (List[Dict[Text, Any]], List[Dict[Text, Any]], List[Dict[Text, Any]], List[Dict[Text, Any]]) -> None """Check if all source and sink types of a workflow are compatible before run time. """ # source parameters: workflow_inputs and step_outputs # sink parameters: step_inputs and workflow_outputs # make a dictionary of source parameters, indexed by the "id" field src_parms = workflow_inputs + step_outputs src_dict = {} for parm in src_parms: src_dict[parm["id"]] = parm step_inputs_val = check_all_types(src_dict, step_inputs, "source") workflow_outputs_val = check_all_types(src_dict, workflow_outputs, "outputSource") warnings = step_inputs_val["warning"] + workflow_outputs_val["warning"] exceptions = step_inputs_val["exception"] + workflow_outputs_val[ "exception"] warning_msgs = [] exception_msgs = [] for warning in warnings: src = warning.src sink = warning.sink linkMerge = warning.linkMerge if sink.get("secondaryFiles") and sorted(sink.get( "secondaryFiles", [])) != sorted(src.get("secondaryFiles", [])): msg1 = "Sink '%s'" % (shortname(sink["id"])) msg2 = SourceLine(sink.get("_tool_entry", sink), "secondaryFiles").makeError( "expects secondaryFiles: %s but" % (sink.get("secondaryFiles"))) if "secondaryFiles" in src: msg3 = SourceLine(src, "secondaryFiles").makeError( "source '%s' has secondaryFiles %s." % (shortname(src["id"]), src.get("secondaryFiles"))) else: msg3 = SourceLine(src, "id").makeError( "source '%s' does not include secondaryFiles." % (shortname(src["id"]))) msg4 = SourceLine(src, "id").makeError( "To fix, add secondaryFiles: %s to definition of '%s'." % (sink.get("secondaryFiles"), shortname(src["id"]))) msg = SourceLine(sink).makeError( "%s\n%s" % (msg1, bullets([msg2, msg3, msg4], " "))) else: msg = SourceLine(src, "type").makeError( "Source '%s' of type %s may be incompatible" % (shortname(src["id"]), json.dumps(src["type"]))) + "\n" + \ SourceLine(sink, "type").makeError( " with sink '%s' of type %s" % (shortname(sink["id"]), json.dumps(sink["type"]))) if linkMerge: msg += "\n" + SourceLine(sink).makeError( " source has linkMerge method %s" % linkMerge) warning_msgs.append(msg) for exception in exceptions: src = exception.src sink = exception.sink linkMerge = exception.linkMerge msg = SourceLine(src, "type").makeError( "Source '%s' of type %s is incompatible" % (shortname(src["id"]), json.dumps(src["type"]))) + "\n" + \ SourceLine(sink, "type").makeError( " with sink '%s' of type %s" % (shortname(sink["id"]), json.dumps(sink["type"]))) if linkMerge: msg += "\n" + SourceLine(sink).makeError( " source has linkMerge method %s" % linkMerge) exception_msgs.append(msg) for sink in step_inputs: if ('null' != sink["type"] and 'null' not in sink["type"] and "source" not in sink and "default" not in sink and "valueFrom" not in sink): msg = SourceLine(sink).makeError( "Required parameter '%s' does not have source, default, or valueFrom expression" % shortname(sink["id"])) exception_msgs.append(msg) all_warning_msg = strip_dup_lineno("\n".join(warning_msgs)) all_exception_msg = strip_dup_lineno("\n".join(exception_msgs)) if warnings: _logger.warning("Workflow checker warning:\n%s" % all_warning_msg) if exceptions: raise validate.ValidationException(all_exception_msg)
def cwlmain( self, argsl=None, # type: List[str] args=None, # type: argparse.Namespace job_order_object=None, # type: MutableMapping[Text, Any] stdin=sys.stdin, # type: IO[Any] stdout=None, # type: Union[TextIO, codecs.StreamWriter] stderr=sys.stderr, # type: IO[Any] versionfunc=versionstring, # type: Callable[[], Text] logger_handler=None, # custom_schema_callback=None, # type: Callable[[], None] executor=None, # type: Callable[..., Tuple[Dict[Text, Any], Text]] loadingContext=None, # type: LoadingContext runtimeContext=None # type: RuntimeContext ): # type: (...) -> int if not stdout: stdout = codecs.getwriter('utf-8')(sys.stdout) _logger.removeHandler(defaultStreamHandler) if logger_handler: stderr_handler = logger_handler else: stderr_handler = logging.StreamHandler(stderr) _logger.addHandler(stderr_handler) try: if args is None: args = arg_parser().parse_args(argsl) if args.workflow and "--outdir" not in argsl: outputPath = args.workflow.split('/')[-1].split('.')[0] setattr( args, "outdir", os.getcwd() + "/" + outputPath + "/" + datetime.datetime.now().strftime('%Y-%m-%d-%H%M')) if runtimeContext is None: runtimeContext = RuntimeContext(vars(args)) else: runtimeContext = runtimeContext.copy() rdflib_logger = logging.getLogger("rdflib.term") rdflib_logger.addHandler(stderr_handler) rdflib_logger.setLevel(logging.ERROR) if args.quiet: _logger.setLevel(logging.WARN) if runtimeContext.debug: _logger.setLevel(logging.DEBUG) rdflib_logger.setLevel(logging.DEBUG) if args.timestamps: formatter = logging.Formatter("[%(asctime)s] %(message)s", "%Y-%m-%d %H:%M:%S") stderr_handler.setFormatter(formatter) # version if args.version: return versionfunc(), 0 else: _logger.info(versionfunc()) if args.print_supported_versions: return "\n".join(supportedCWLversions(args.enable_dev)), 0 if not args.workflow: if os.path.isfile("CWLFile"): setattr(args, "workflow", "CWLFile") else: _logger.error("") _logger.error( "CWL document required, no input file was provided") arg_parser().print_help() return "CWL document required, no input file was provided", 1 if args.relax_path_checks: command_line_tool.ACCEPTLIST_RE = command_line_tool.ACCEPTLIST_EN_RELAXED_RE if args.ga4gh_tool_registries: ga4gh_tool_registries[:] = args.ga4gh_tool_registries if not args.enable_ga4gh_tool_registry: del ga4gh_tool_registries[:] if custom_schema_callback: custom_schema_callback() elif args.enable_ext: res = pkg_resources.resource_stream(__name__, 'extensions.yml') use_custom_schema("v1.0", "http://commonwl.org/cwltool", res.read()) res.close() else: use_standard_schema("v1.0") if loadingContext is None: loadingContext = LoadingContext(vars(args)) else: loadingContext = loadingContext.copy() loadingContext.disable_js_validation = \ args.disable_js_validation or (not args.do_validate) loadingContext.construct_tool_object = getdefault( loadingContext.construct_tool_object, workflow.default_make_tool) loadingContext.resolver = getdefault(loadingContext.resolver, tool_resolver) try: uri, tool_file_uri = resolve_tool_uri( args.workflow, resolver=loadingContext.resolver, fetcher_constructor=loadingContext.fetcher_constructor) except: return "Can't find file " + args.workflow, 0 try_again_msg = "" if args.debug else ", try again with --debug for more information" try: job_order_object, input_basedir, jobloader = load_job_order( args, stdin, loadingContext.fetcher_constructor, loadingContext.overrides_list, tool_file_uri) if args.overrides: loadingContext.overrides_list.extend( load_overrides( file_uri(os.path.abspath(args.overrides)), tool_file_uri)) document_loader, workflowobj, uri = fetch_document( uri, resolver=loadingContext.resolver, fetcher_constructor=loadingContext.fetcher_constructor) if args.print_deps: # printdeps(workflowobj, document_loader, stdout, args.relative_deps, uri) result = returndeps(workflowobj, document_loader, stdout, args.relative_deps, uri) return result, 0 document_loader, avsc_names, processobj, metadata, uri \ = validate_document(document_loader, workflowobj, uri, enable_dev=loadingContext.enable_dev, strict=loadingContext.strict, preprocess_only=(args.print_pre or args.pack), fetcher_constructor=loadingContext.fetcher_constructor, skip_schemas=args.skip_schemas, overrides=loadingContext.overrides_list, do_validate=loadingContext.do_validate) if args.print_pre: # stdout.write(json_dumps(processobj, indent=4)) return json_dumps(processobj, indent=4), 0 loadingContext.overrides_list.extend( metadata.get("cwltool:overrides", [])) tool = make_tool(document_loader, avsc_names, metadata, uri, loadingContext) if args.make_template: yaml.safe_dump(generate_input_template(tool), sys.stdout, default_flow_style=False, indent=4, block_seq_indent=2) return yaml.safe_dump(generate_input_template(tool), indent=4), 0 if args.validate: _logger.info("Tool definition is valid") return "Tool definition is valid", 0 if args.pack: stdout.write( print_pack(document_loader, processobj, uri, metadata)) return print_pack(document_loader, processobj, uri, metadata), 0 if args.print_rdf: stdout.write( printrdf(tool, document_loader.ctx, args.rdf_serializer)) return printrdf(tool, document_loader.ctx, args.rdf_serializer), 0 if args.print_dot: printdot(tool, document_loader.ctx, stdout) return "args.print_dot still not solved", 0 except (validate.ValidationException) as exc: _logger.error(u"Tool definition failed validation:\n%s", exc, exc_info=args.debug) infor = "Tool definition failed validation:\n%s" + exc + args.debug return infor, 1 except (RuntimeError, WorkflowException) as exc: _logger.error(u"Tool definition failed initialization:\n%s", exc, exc_info=args.debug) infor = "Tool definition failed initialization:\n%s" + exc + args.debug return infor, 1 except Exception as exc: _logger.error( u"I'm sorry, I couldn't load this CWL file%s.\nThe error was: %s", try_again_msg, exc if not args.debug else "", exc_info=args.debug) return "I'm sorry, I couldn't load this CWL file", 1 if isinstance(tool, int): return tool, 0 # If on MacOS platform, TMPDIR must be set to be under one of the # shared volumes in Docker for Mac # More info: https://dockstore.org/docs/faq if sys.platform == "darwin": default_mac_path = "/private/tmp/docker_tmp" if runtimeContext.tmp_outdir_prefix == DEFAULT_TMP_PREFIX: runtimeContext.tmp_outdir_prefix = default_mac_path for dirprefix in ("tmpdir_prefix", "tmp_outdir_prefix", "cachedir"): if getattr(runtimeContext, dirprefix) and getattr( runtimeContext, dirprefix) != DEFAULT_TMP_PREFIX: sl = "/" if getattr(runtimeContext, dirprefix).endswith("/") or dirprefix == "cachedir" \ else "" setattr( runtimeContext, dirprefix, os.path.abspath(getattr(runtimeContext, dirprefix)) + sl) if not os.path.exists( os.path.dirname(getattr(runtimeContext, dirprefix))): try: os.makedirs( os.path.dirname( getattr(runtimeContext, dirprefix))) except Exception as e: _logger.error("Failed to create directory: %s", e) infor = "Failed to create directory: %s" + e + "" return infor, 1 if args.cachedir: if args.move_outputs == "move": runtimeContext.move_outputs = "copy" runtimeContext.tmp_outdir_prefix = args.cachedir runtimeContext.secret_store = getdefault( runtimeContext.secret_store, SecretStore()) try: initialized_job_order_object = init_job_order( job_order_object, args, tool, jobloader, stdout, print_input_deps=args.print_input_deps, relative_deps=args.relative_deps, input_basedir=input_basedir, secret_store=runtimeContext.secret_store) except SystemExit as err: return err.code if not executor: if args.parallel: executor = MultithreadedJobExecutor() else: executor = SingleJobExecutor() assert executor is not None if isinstance(initialized_job_order_object, int): return initialized_job_order_object try: runtimeContext.basedir = input_basedir del args.workflow del args.job_order conf_file = getattr(args, "beta_dependency_resolvers_configuration", None) # Text use_conda_dependencies = getattr(args, "beta_conda_dependencies", None) # Text job_script_provider = None # type: Optional[DependenciesConfiguration] if conf_file or use_conda_dependencies: runtimeContext.job_script_provider = DependenciesConfiguration( args) runtimeContext.find_default_container = \ functools.partial(find_default_container, args) runtimeContext.make_fs_access = getdefault( runtimeContext.make_fs_access, StdFsAccess) (out, status) = executor(tool, initialized_job_order_object, runtimeContext, logger=_logger) # This is the workflow output, it needs to be written if out is not None: def loc_to_path(obj): for field in ("path", "nameext", "nameroot", "dirname"): if field in obj: del obj[field] if obj["location"].startswith("file://"): obj["path"] = uri_file_path(obj["location"]) visit_class(out, ("File", "Directory"), loc_to_path) # Unsetting the Generation fron final output object visit_class(out, ("File", ), MutationManager().unset_generation) if isinstance(out, string_types): stdout.write(out) else: stdout.write( json_dumps( out, indent=4, # type: ignore ensure_ascii=False)) stdout.write("\n") if hasattr(stdout, "flush"): stdout.flush() # type: ignore if status != "success": _logger.warning(u"Final process status is %s", status) infor = "Final process status is %s" + status + "" return infor, 1 _logger.info(u"Final process status is %s", status) return out, status except (validate.ValidationException) as exc: _logger.error(u"Input object failed validation:\n%s", exc, exc_info=args.debug) infor = "Input object failed validation:\n%s" + exc + args.debug return infor, 1 except UnsupportedRequirement as exc: _logger.error( u"Workflow or tool uses unsupported feature:\n%s", exc, exc_info=args.debug) infor = "Workflow or tool uses unsupported feature:\n%s" + exc + args.debug return infor, 3 except WorkflowException as exc: _logger.error(u"Workflow error%s:\n%s", try_again_msg, strip_dup_lineno(six.text_type(exc)), exc_info=args.debug) infor = "Workflow error%s:\n%s" + try_again_msg + strip_dup_lineno( six.text_type(exc)) + args.debug return infor, 1 except Exception as exc: _logger.error(u"Unhandled error%s:\n %s", try_again_msg, exc, exc_info=args.debug) infor = "Unhandled error%s:\n %s" + try_again_msg + exc + args.debug return infor, 1 finally: _logger.removeHandler(stderr_handler) _logger.addHandler(defaultStreamHandler)
def _init_job(self, joborder, runtime_context): # type: (Mapping[Text, Text], RuntimeContext) -> Builder job = cast(Dict[Text, Union[Dict[Text, Any], List[Any], Text, None]], copy.deepcopy(joborder)) make_fs_access = getdefault(runtime_context.make_fs_access, StdFsAccess) fs_access = make_fs_access(runtime_context.basedir) load_listing_req, _ = self.get_requirement( "LoadListingRequirement") if load_listing_req is not None: load_listing = load_listing_req.get("loadListing") else: load_listing = "no_listing" # Validate job order try: fill_in_defaults(self.tool[u"inputs"], job, fs_access) normalizeFilesDirs(job) schema = self.names.get_name("input_record_schema", "") if schema is None: raise WorkflowException("Missing input record schema: " "{}".format(self.names)) validate.validate_ex(schema, job, strict=False, logger=_logger_validation_warnings) if load_listing and load_listing != "no_listing": get_listing(fs_access, job, recursive=(load_listing == "deep_listing")) visit_class(job, ("File",), functools.partial(add_sizes, fs_access)) if load_listing == "deep_listing": for i, inparm in enumerate(self.tool["inputs"]): k = shortname(inparm["id"]) if k not in job: continue v = job[k] dircount = [0] def inc(d): # type: (List[int]) -> None d[0] += 1 visit_class(v, ("Directory",), lambda x: inc(dircount)) if dircount[0] == 0: continue filecount = [0] visit_class(v, ("File",), lambda x: inc(filecount)) if filecount[0] > FILE_COUNT_WARNING: # Long lines in this message are okay, will be reflowed based on terminal columns. _logger.warning(strip_dup_lineno(SourceLine(self.tool["inputs"], i, Text).makeError( """Recursive directory listing has resulted in a large number of File objects (%s) passed to the input parameter '%s'. This may negatively affect workflow performance and memory use. If this is a problem, use the hint 'cwltool:LoadListingRequirement' with "shallow_listing" or "no_listing" to change the directory listing behavior: $namespaces: cwltool: "http://commonwl.org/cwltool#" hints: cwltool:LoadListingRequirement: loadListing: shallow_listing """ % (filecount[0], k)))) except (validate.ValidationException, WorkflowException) as err: raise WorkflowException("Invalid job input record:\n" + Text(err)) files = [] # type: List[Dict[Text, Text]] bindings = CommentedSeq() tmpdir = u"" stagedir = u"" docker_req, _ = self.get_requirement("DockerRequirement") default_docker = None if docker_req is None and runtime_context.default_container: default_docker = runtime_context.default_container if (docker_req or default_docker) and runtime_context.use_container: if docker_req is not None: # Check if docker output directory is absolute if docker_req.get("dockerOutputDirectory") and \ docker_req.get("dockerOutputDirectory").startswith('/'): outdir = docker_req.get("dockerOutputDirectory") else: outdir = docker_req.get("dockerOutputDirectory") or \ runtime_context.docker_outdir or random_outdir() elif default_docker is not None: outdir = runtime_context.docker_outdir or random_outdir() tmpdir = runtime_context.docker_tmpdir or "/tmp" stagedir = runtime_context.docker_stagedir or "/var/lib/cwl" else: outdir = fs_access.realpath( runtime_context.outdir or tempfile.mkdtemp( prefix=getdefault(runtime_context.tmp_outdir_prefix, DEFAULT_TMP_PREFIX))) if self.tool[u"class"] != 'Workflow': tmpdir = fs_access.realpath(runtime_context.tmpdir or tempfile.mkdtemp()) stagedir = fs_access.realpath(runtime_context.stagedir or tempfile.mkdtemp()) builder = Builder(job, files, bindings, self.schemaDefs, self.names, self.requirements, self.hints, {}, runtime_context.mutation_manager, self.formatgraph, make_fs_access, fs_access, runtime_context.job_script_provider, runtime_context.eval_timeout, runtime_context.debug, runtime_context.js_console, runtime_context.force_docker_pull, load_listing, outdir, tmpdir, stagedir) bindings.extend(builder.bind_input( self.inputs_record_schema, job, discover_secondaryFiles=getdefault(runtime_context.toplevel, False))) if self.tool.get("baseCommand"): for index, command in enumerate(aslist(self.tool["baseCommand"])): bindings.append({ "position": [-1000000, index], "datum": command }) if self.tool.get("arguments"): for i, arg in enumerate(self.tool["arguments"]): lc = self.tool["arguments"].lc.data[i] filename = self.tool["arguments"].lc.filename bindings.lc.add_kv_line_col(len(bindings), lc) if isinstance(arg, MutableMapping): arg = copy.deepcopy(arg) if arg.get("position"): arg["position"] = [arg["position"], i] else: arg["position"] = [0, i] bindings.append(arg) elif ("$(" in arg) or ("${" in arg): cm = CommentedMap(( ("position", [0, i]), ("valueFrom", arg) )) cm.lc.add_kv_line_col("valueFrom", lc) cm.lc.filename = filename bindings.append(cm) else: cm = CommentedMap(( ("position", [0, i]), ("datum", arg) )) cm.lc.add_kv_line_col("datum", lc) cm.lc.filename = filename bindings.append(cm) # use python2 like sorting of heterogeneous lists # (containing str and int types), if PY3: key = functools.cmp_to_key(cmp_like_py2) else: # PY2 key = lambda d: d["position"] # This awkward construction replaces the contents of # "bindings" in place (because Builder expects it to be # mutated in place, sigh, I'm sorry) with its contents sorted, # supporting different versions of Python and ruamel.yaml with # different behaviors/bugs in CommentedSeq. bindings_copy = copy.deepcopy(bindings) del bindings[:] bindings.extend(sorted(bindings_copy, key=key)) if self.tool[u"class"] != 'Workflow': builder.resources = self.evalResources(builder, runtime_context) return builder
def static_checker( workflow_inputs: List[CWLObjectType], workflow_outputs: MutableSequence[CWLObjectType], step_inputs: MutableSequence[CWLObjectType], step_outputs: List[CWLObjectType], param_to_step: Dict[str, CWLObjectType], ) -> None: """Check if all source and sink types of a workflow are compatible before run time.""" # source parameters: workflow_inputs and step_outputs # sink parameters: step_inputs and workflow_outputs # make a dictionary of source parameters, indexed by the "id" field src_parms = workflow_inputs + step_outputs src_dict = {} # type: Dict[str, CWLObjectType] for parm in src_parms: src_dict[cast(str, parm["id"])] = parm step_inputs_val = check_all_types(src_dict, step_inputs, "source", param_to_step) workflow_outputs_val = check_all_types(src_dict, workflow_outputs, "outputSource", param_to_step) warnings = step_inputs_val["warning"] + workflow_outputs_val["warning"] exceptions = step_inputs_val["exception"] + workflow_outputs_val[ "exception"] warning_msgs = [] exception_msgs = [] for warning in warnings: src = warning.src sink = warning.sink linkMerge = warning.linkMerge sinksf = sorted(p["pattern"] for p in sink.get("secondaryFiles", []) if p.get("required", True)) srcsf = sorted(p["pattern"] for p in src.get("secondaryFiles", [])) # Every secondaryFile required by the sink, should be declared # by the source missing = missing_subset(srcsf, sinksf) if missing: msg1 = "Parameter '{}' requires secondaryFiles {} but".format( shortname(sink["id"]), missing, ) msg3 = SourceLine(src, "id").makeError( "source '%s' does not provide those secondaryFiles." % (shortname(src["id"]))) msg4 = SourceLine( src.get("_tool_entry", src), "secondaryFiles" ).makeError( "To resolve, add missing secondaryFiles patterns to definition of '%s' or" % (shortname(src["id"]))) msg5 = SourceLine( sink.get("_tool_entry", sink), "secondaryFiles" ).makeError( "mark missing secondaryFiles in definition of '%s' as optional." % shortname(sink["id"])) msg = SourceLine(sink).makeError("{}\n{}".format( msg1, bullets([msg3, msg4, msg5], " "))) elif sink.get("not_connected"): if not sink.get("used_by_step"): msg = SourceLine(sink, "type").makeError( "'%s' is not an input parameter of %s, expected %s" % ( shortname(sink["id"]), param_to_step[sink["id"]]["run"], ", ".join( shortname(cast(str, s["id"])) for s in cast( List[Dict[str, Union[str, bool]]], param_to_step[sink["id"]]["inputs"], ) if not s.get("not_connected")), )) else: msg = "" else: msg = (SourceLine(src, "type").makeError( "Source '%s' of type %s may be incompatible" % (shortname(src["id"]), json_dumps(src["type"]))) + "\n" + SourceLine(sink, "type").makeError( " with sink '%s' of type %s" % (shortname(sink["id"]), json_dumps(sink["type"])))) if linkMerge is not None: msg += "\n" + SourceLine(sink).makeError( " source has linkMerge method %s" % linkMerge) if warning.message is not None: msg += "\n" + SourceLine(sink).makeError(" " + warning.message) if msg: warning_msgs.append(msg) for exception in exceptions: src = exception.src sink = exception.sink linkMerge = exception.linkMerge extra_message = exception.message msg = (SourceLine(src, "type").makeError( "Source '%s' of type %s is incompatible" % (shortname(src["id"]), json_dumps(src["type"]))) + "\n" + SourceLine(sink, "type").makeError( " with sink '%s' of type %s" % (shortname(sink["id"]), json_dumps(sink["type"])))) if extra_message is not None: msg += "\n" + SourceLine(sink).makeError(" " + extra_message) if linkMerge is not None: msg += "\n" + SourceLine(sink).makeError( " source has linkMerge method %s" % linkMerge) exception_msgs.append(msg) for sink in step_inputs: if ("null" != sink["type"] and "null" not in sink["type"] and "source" not in sink and "default" not in sink and "valueFrom" not in sink): msg = SourceLine(sink).makeError( "Required parameter '%s' does not have source, default, or valueFrom expression" % shortname(sink["id"])) exception_msgs.append(msg) all_warning_msg = strip_dup_lineno("\n".join(warning_msgs)) all_exception_msg = strip_dup_lineno("\n" + "\n".join(exception_msgs)) if all_warning_msg: _logger.warning("Workflow checker warning:\n%s", all_warning_msg) if exceptions: raise ValidationException(all_exception_msg)
def main(argsl=None, # type: List[str] args=None, # type: argparse.Namespace job_order_object=None, # type: MutableMapping[Text, Any] stdin=sys.stdin, # type: IO[Any] stdout=None, # type: Union[TextIO, StreamWriter] stderr=sys.stderr, # type: IO[Any] versionfunc=versionstring, # type: Callable[[], Text] logger_handler=None, # custom_schema_callback=None, # type: Callable[[], None] executor=None, # type: Callable[..., Tuple[Dict[Text, Any], Text]] loadingContext=None, # type: LoadingContext runtimeContext=None # type: RuntimeContext ): # type: (...) -> int if not stdout: # force UTF-8 even if the console is configured differently if (hasattr(sys.stdout, "encoding") # type: ignore and sys.stdout.encoding != 'UTF-8'): # type: ignore if PY3 and hasattr(sys.stdout, "detach"): stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8') else: stdout = getwriter('utf-8')(sys.stdout) # type: ignore else: stdout = cast(TextIO, sys.stdout) # type: ignore _logger.removeHandler(defaultStreamHandler) if logger_handler is not None: stderr_handler = logger_handler else: stderr_handler = logging.StreamHandler(stderr) _logger.addHandler(stderr_handler) # pre-declared for finally block workflowobj = None prov_log_handler = None # type: Optional[logging.StreamHandler] try: if args is None: if argsl is None: argsl = sys.argv[1:] args = arg_parser().parse_args(argsl) if args.record_container_id: if not args.cidfile_dir: args.cidfile_dir = os.getcwd() del args.record_container_id if runtimeContext is None: runtimeContext = RuntimeContext(vars(args)) else: runtimeContext = runtimeContext.copy() # If on Windows platform, a default Docker Container is used if not # explicitely provided by user if onWindows() and not runtimeContext.default_container: # This docker image is a minimal alpine image with bash installed # (size 6 mb). source: https://github.com/frol/docker-alpine-bash runtimeContext.default_container = windows_default_container_id # If caller parsed its own arguments, it may not include every # cwltool option, so fill in defaults to avoid crashing when # dereferencing them in args. for key, val in iteritems(get_default_args()): if not hasattr(args, key): setattr(args, key, val) # Configure logging rdflib_logger = logging.getLogger("rdflib.term") rdflib_logger.addHandler(stderr_handler) rdflib_logger.setLevel(logging.ERROR) if args.quiet: # Silence STDERR, not an eventual provenance log file stderr_handler.setLevel(logging.WARN) if runtimeContext.debug: # Increase to debug for both stderr and provenance log file _logger.setLevel(logging.DEBUG) rdflib_logger.setLevel(logging.DEBUG) formatter = None # type: Optional[logging.Formatter] if args.timestamps: formatter = logging.Formatter("[%(asctime)s] %(message)s", "%Y-%m-%d %H:%M:%S") stderr_handler.setFormatter(formatter) ## if args.version: print(versionfunc()) return 0 _logger.info(versionfunc()) if args.print_supported_versions: print("\n".join(supported_cwl_versions(args.enable_dev))) return 0 if not args.workflow: if os.path.isfile("CWLFile"): setattr(args, "workflow", "CWLFile") else: _logger.error("") _logger.error("CWL document required, no input file was provided") arg_parser().print_help() return 1 if args.relax_path_checks: command_line_tool.ACCEPTLIST_RE = command_line_tool.ACCEPTLIST_EN_RELAXED_RE if args.ga4gh_tool_registries: ga4gh_tool_registries[:] = args.ga4gh_tool_registries if not args.enable_ga4gh_tool_registry: del ga4gh_tool_registries[:] if custom_schema_callback is not None: custom_schema_callback() elif args.enable_ext: res = pkg_resources.resource_stream(__name__, 'extensions.yml') use_custom_schema("v1.0", "http://commonwl.org/cwltool", res.read()) res.close() else: use_standard_schema("v1.0") if args.provenance: if not args.compute_checksum: _logger.error("--provenance incompatible with --no-compute-checksum") return 1 ro = ResearchObject( temp_prefix_ro=args.tmpdir_prefix, orcid=args.orcid, full_name=args.cwl_full_name) runtimeContext.research_obj = ro log_file_io = ro.open_log_file_for_activity(ro.engine_uuid) prov_log_handler = logging.StreamHandler(log_file_io) class ProvLogFormatter(logging.Formatter): """Enforce ISO8601 with both T and Z.""" def __init__(self): # type: () -> None super(ProvLogFormatter, self).__init__( "[%(asctime)sZ] %(message)s") def formatTime(self, record, datefmt=None): # type: (logging.LogRecord, str) -> str record_time = time.gmtime(record.created) formatted_time = time.strftime("%Y-%m-%dT%H:%M:%S", record_time) with_msecs = "%s,%03d" % (formatted_time, record.msecs) return with_msecs prov_log_handler.setFormatter(ProvLogFormatter()) _logger.addHandler(prov_log_handler) _logger.debug(u"[provenance] Logging to %s", log_file_io) if argsl is not None: # Log cwltool command line options to provenance file _logger.info("[cwltool] %s %s", sys.argv[0], u" ".join(argsl)) _logger.debug(u"[cwltool] Arguments: %s", args) if loadingContext is None: loadingContext = LoadingContext(vars(args)) else: loadingContext = loadingContext.copy() loadingContext.loader = default_loader(loadingContext.fetcher_constructor) loadingContext.research_obj = runtimeContext.research_obj loadingContext.disable_js_validation = \ args.disable_js_validation or (not args.do_validate) loadingContext.construct_tool_object = getdefault( loadingContext.construct_tool_object, workflow.default_make_tool) loadingContext.resolver = getdefault(loadingContext.resolver, tool_resolver) loadingContext.do_update = not (args.pack or args.print_subgraph) uri, tool_file_uri = resolve_tool_uri( args.workflow, resolver=loadingContext.resolver, fetcher_constructor=loadingContext.fetcher_constructor) try_again_msg = "" if args.debug else ", try again with --debug for more information" try: job_order_object, input_basedir, jobloader = load_job_order( args, stdin, loadingContext.fetcher_constructor, loadingContext.overrides_list, tool_file_uri) if args.overrides: loadingContext.overrides_list.extend(load_overrides( file_uri(os.path.abspath(args.overrides)), tool_file_uri)) loadingContext, workflowobj, uri = fetch_document( uri, loadingContext) assert loadingContext.loader is not None if args.print_deps: printdeps(workflowobj, loadingContext.loader, stdout, args.relative_deps, uri) return 0 loadingContext, uri \ = resolve_and_validate_document(loadingContext, workflowobj, uri, preprocess_only=(args.print_pre or args.pack), skip_schemas=args.skip_schemas) assert loadingContext.loader is not None processobj, metadata = loadingContext.loader.resolve_ref(uri) processobj = cast(CommentedMap, processobj) if args.pack: stdout.write(print_pack(loadingContext.loader, processobj, uri, metadata)) return 0 if args.provenance and runtimeContext.research_obj: # Can't really be combined with args.pack at same time runtimeContext.research_obj.packed_workflow( print_pack(loadingContext.loader, processobj, uri, metadata)) if args.print_pre: stdout.write(json_dumps(processobj, indent=4, sort_keys=True, separators=(',', ': '))) return 0 tool = make_tool(uri, loadingContext) if args.make_template: def my_represent_none(self, data): # pylint: disable=unused-argument """Force clean representation of 'null'.""" return self.represent_scalar(u'tag:yaml.org,2002:null', u'null') yaml.RoundTripRepresenter.add_representer(type(None), my_represent_none) yaml.round_trip_dump( generate_input_template(tool), sys.stdout, default_flow_style=False, indent=4, block_seq_indent=2) return 0 if args.validate: print("{} is valid CWL.".format(args.workflow)) return 0 if args.print_rdf: stdout.write(printrdf(tool, loadingContext.loader.ctx, args.rdf_serializer)) return 0 if args.print_dot: printdot(tool, loadingContext.loader.ctx, stdout) return 0 if args.print_targets: for f in ("outputs", "steps", "inputs"): if tool.tool[f]: _logger.info("%s%s targets:", f[0].upper(), f[1:-1]) stdout.write(" "+"\n ".join([shortname(t["id"]) for t in tool.tool[f]])+"\n") return 0 if args.target: if isinstance(tool, Workflow): url = urllib.parse.urlparse(tool.tool["id"]) if url.fragment: extracted = get_subgraph([tool.tool["id"] + "/" + r for r in args.target], tool) else: extracted = get_subgraph([loadingContext.loader.fetcher.urljoin(tool.tool["id"], "#" + r) for r in args.target], tool) else: _logger.error("Can only use --target on Workflows") return 1 loadingContext.loader.idx[extracted["id"]] = extracted tool = make_tool(extracted["id"], loadingContext) if args.print_subgraph: if "name" in tool.tool: del tool.tool["name"] stdout.write(json_dumps(tool.tool, indent=4, sort_keys=True, separators=(',', ': '))) return 0 except (validate.ValidationException) as exc: _logger.error(u"Tool definition failed validation:\n%s", exc, exc_info=args.debug) return 1 except (RuntimeError, WorkflowException) as exc: _logger.error(u"Tool definition failed initialization:\n%s", exc, exc_info=args.debug) return 1 except Exception as exc: _logger.error( u"I'm sorry, I couldn't load this CWL file%s.\nThe error was: %s", try_again_msg, exc if not args.debug else "", exc_info=args.debug) return 1 if isinstance(tool, int): return tool # If on MacOS platform, TMPDIR must be set to be under one of the # shared volumes in Docker for Mac # More info: https://dockstore.org/docs/faq if sys.platform == "darwin": default_mac_path = "/private/tmp/docker_tmp" if runtimeContext.tmp_outdir_prefix == DEFAULT_TMP_PREFIX: runtimeContext.tmp_outdir_prefix = default_mac_path if runtimeContext.tmpdir_prefix == DEFAULT_TMP_PREFIX: runtimeContext.tmpdir_prefix = default_mac_path for dirprefix in ("tmpdir_prefix", "tmp_outdir_prefix", "cachedir"): if getattr(runtimeContext, dirprefix) and getattr(runtimeContext, dirprefix) != DEFAULT_TMP_PREFIX: sl = "/" if getattr(runtimeContext, dirprefix).endswith("/") or dirprefix == "cachedir" \ else "" setattr(runtimeContext, dirprefix, os.path.abspath(getattr(runtimeContext, dirprefix)) + sl) if not os.path.exists(os.path.dirname(getattr(runtimeContext, dirprefix))): try: os.makedirs(os.path.dirname(getattr(runtimeContext, dirprefix))) except Exception as e: _logger.error("Failed to create directory: %s", e) return 1 if args.cachedir: if args.move_outputs == "move": runtimeContext.move_outputs = "copy" runtimeContext.tmp_outdir_prefix = args.cachedir runtimeContext.secret_store = getdefault(runtimeContext.secret_store, SecretStore()) runtimeContext.make_fs_access = getdefault(runtimeContext.make_fs_access, StdFsAccess) try: initialized_job_order_object = init_job_order( job_order_object, args, tool, jobloader, stdout, print_input_deps=args.print_input_deps, relative_deps=args.relative_deps, make_fs_access=runtimeContext.make_fs_access, input_basedir=input_basedir, secret_store=runtimeContext.secret_store) except SystemExit as err: return err.code if not executor: if args.parallel: executor = MultithreadedJobExecutor() runtimeContext.select_resources = executor.select_resources else: executor = SingleJobExecutor() assert executor is not None try: runtimeContext.basedir = input_basedir del args.workflow del args.job_order conf_file = getattr(args, "beta_dependency_resolvers_configuration", None) # Text use_conda_dependencies = getattr(args, "beta_conda_dependencies", None) # Text if conf_file or use_conda_dependencies: runtimeContext.job_script_provider = DependenciesConfiguration(args) else: runtimeContext.find_default_container = functools.partial( find_default_container, default_container=runtimeContext.default_container, use_biocontainers=args.beta_use_biocontainers) (out, status) = executor(tool, initialized_job_order_object, runtimeContext, logger=_logger) if out is not None: if runtimeContext.research_obj is not None: runtimeContext.research_obj.create_job( out, None, True) def loc_to_path(obj): for field in ("path", "nameext", "nameroot", "dirname"): if field in obj: del obj[field] if obj["location"].startswith("file://"): obj["path"] = uri_file_path(obj["location"]) visit_class(out, ("File", "Directory"), loc_to_path) # Unsetting the Generation from final output object visit_class(out, ("File", ), MutationManager().unset_generation) if isinstance(out, string_types): stdout.write(out) else: stdout.write(json_dumps(out, indent=4, # type: ignore ensure_ascii=False)) stdout.write("\n") if hasattr(stdout, "flush"): stdout.flush() # type: ignore if status != "success": _logger.warning(u"Final process status is %s", status) return 1 _logger.info(u"Final process status is %s", status) return 0 except (validate.ValidationException) as exc: _logger.error(u"Input object failed validation:\n%s", exc, exc_info=args.debug) return 1 except UnsupportedRequirement as exc: _logger.error( u"Workflow or tool uses unsupported feature:\n%s", exc, exc_info=args.debug) return 33 except WorkflowException as exc: _logger.error( u"Workflow error%s:\n%s", try_again_msg, strip_dup_lineno(Text(exc)), exc_info=args.debug) return 1 except Exception as exc: # pylint: disable=broad-except _logger.error( u"Unhandled error%s:\n %s", try_again_msg, exc, exc_info=args.debug) return 1 finally: if args and runtimeContext and runtimeContext.research_obj \ and workflowobj: research_obj = runtimeContext.research_obj assert loadingContext is not None assert loadingContext.loader is not None prov_dependencies = prov_deps(workflowobj, loadingContext.loader, uri) research_obj.generate_snapshot(prov_dependencies) if prov_log_handler is not None: # Stop logging so we won't half-log adding ourself to RO _logger.debug(u"[provenance] Closing provenance log file %s", prov_log_handler) _logger.removeHandler(prov_log_handler) # Ensure last log lines are written out prov_log_handler.flush() # Underlying WritableBagFile will add the tagfile to the manifest prov_log_handler.stream.close() prov_log_handler.close() research_obj.close(args.provenance) _logger.removeHandler(stderr_handler) _logger.addHandler(defaultStreamHandler)
def main(argsl=None, # type: List[str] args=None, # type: argparse.Namespace executor=single_job_executor, # type: Callable[..., Tuple[Dict[Text, Any], Text]] makeTool=workflow.defaultMakeTool, # type: Callable[..., Process] selectResources=None, # type: Callable[[Dict[Text, int]], Dict[Text, int]] stdin=sys.stdin, # type: IO[Any] stdout=sys.stdout, # type: IO[Any] stderr=sys.stderr, # type: IO[Any] versionfunc=versionstring, # type: Callable[[], Text] job_order_object=None, # type: Union[Tuple[Dict[Text, Any], Text], int] make_fs_access=StdFsAccess, # type: Callable[[Text], StdFsAccess] fetcher_constructor=None, # type: Callable[[Dict[unicode, unicode], requests.sessions.Session], Fetcher] resolver=tool_resolver, logger_handler=None, custom_schema_callback=None # type: Callable[[], None] ): # type: (...) -> int _logger.removeHandler(defaultStreamHandler) if logger_handler: stderr_handler = logger_handler else: stderr_handler = logging.StreamHandler(stderr) _logger.addHandler(stderr_handler) try: if args is None: if argsl is None: argsl = sys.argv[1:] args = arg_parser().parse_args(argsl) # If caller provided custom arguments, it may be not every expected # option is set, so fill in no-op defaults to avoid crashing when # dereferencing them in args. for k, v in {'print_deps': False, 'print_pre': False, 'print_rdf': False, 'print_dot': False, 'relative_deps': False, 'tmp_outdir_prefix': 'tmp', 'tmpdir_prefix': 'tmp', 'print_input_deps': False, 'cachedir': None, 'quiet': False, 'debug': False, 'version': False, 'enable_dev': False, 'enable_ext': False, 'strict': True, 'rdf_serializer': None, 'basedir': None, 'tool_help': False, 'workflow': None, 'job_order': None, 'pack': False, 'on_error': 'continue', 'relax_path_checks': False, 'validate': False, 'enable_ga4gh_tool_registry': False, 'ga4gh_tool_registries': [] }.iteritems(): if not hasattr(args, k): setattr(args, k, v) if args.quiet: _logger.setLevel(logging.WARN) if args.debug: _logger.setLevel(logging.DEBUG) if args.version: print(versionfunc()) return 0 else: _logger.info(versionfunc()) if args.print_supported_versions: print("\n".join(supportedCWLversions(args.enable_dev))) return 0 if not args.workflow: if os.path.isfile("CWLFile"): setattr(args, "workflow", "CWLFile") else: _logger.error("") _logger.error("CWL document required, no input file was provided") arg_parser().print_help() return 1 if args.relax_path_checks: draft2tool.ACCEPTLIST_RE = draft2tool.ACCEPTLIST_EN_RELAXED_RE if args.ga4gh_tool_registries: ga4gh_tool_registries[:] = args.ga4gh_tool_registries if not args.enable_ga4gh_tool_registry: del ga4gh_tool_registries[:] if custom_schema_callback: custom_schema_callback() elif args.enable_ext: res = pkg_resources.resource_stream(__name__, 'extensions.yml') use_custom_schema("v1.0", "http://commonwl.org/cwltool", res.read()) res.close() else: use_standard_schema("v1.0") try: document_loader, workflowobj, uri = fetch_document(args.workflow, resolver=resolver, fetcher_constructor=fetcher_constructor) if args.print_deps: printdeps(workflowobj, document_loader, stdout, args.relative_deps, uri) return 0 document_loader, avsc_names, processobj, metadata, uri \ = validate_document(document_loader, workflowobj, uri, enable_dev=args.enable_dev, strict=args.strict, preprocess_only=args.print_pre or args.pack, fetcher_constructor=fetcher_constructor) if args.pack: stdout.write(print_pack(document_loader, processobj, uri, metadata)) return 0 if args.print_pre: stdout.write(json.dumps(processobj, indent=4)) return 0 tool = make_tool(document_loader, avsc_names, metadata, uri, makeTool, vars(args)) if args.validate: return 0 if args.print_rdf: printrdf(tool, document_loader.ctx, args.rdf_serializer, stdout) return 0 if args.print_dot: printdot(tool, document_loader.ctx, stdout) return 0 except (validate.ValidationException) as exc: _logger.error(u"Tool definition failed validation:\n%s", exc, exc_info=args.debug) return 1 except (RuntimeError, WorkflowException) as exc: _logger.error(u"Tool definition failed initialization:\n%s", exc, exc_info=args.debug) return 1 except Exception as exc: _logger.error( u"I'm sorry, I couldn't load this CWL file%s", ", try again with --debug for more information.\nThe error was: " "%s" % exc if not args.debug else ". The error was:", exc_info=args.debug) return 1 if isinstance(tool, int): return tool for dirprefix in ("tmpdir_prefix", "tmp_outdir_prefix", "cachedir"): if getattr(args, dirprefix) and getattr(args, dirprefix) != 'tmp': sl = "/" if getattr(args, dirprefix).endswith("/") or dirprefix == "cachedir" else "" setattr(args, dirprefix, os.path.abspath(getattr(args, dirprefix)) + sl) if not os.path.exists(os.path.dirname(getattr(args, dirprefix))): try: os.makedirs(os.path.dirname(getattr(args, dirprefix))) except Exception as e: _logger.error("Failed to create directory: %s", e) return 1 if args.cachedir: if args.move_outputs == "move": setattr(args, 'move_outputs', "copy") setattr(args, "tmp_outdir_prefix", args.cachedir) try: if job_order_object is None: job_order_object = load_job_order(args, tool, stdin, print_input_deps=args.print_input_deps, relative_deps=args.relative_deps, stdout=stdout, make_fs_access=make_fs_access, fetcher_constructor=fetcher_constructor) except SystemExit as e: return e.code if isinstance(job_order_object, int): return job_order_object try: setattr(args, 'basedir', job_order_object[1]) del args.workflow del args.job_order (out, status) = executor(tool, job_order_object[0], makeTool=makeTool, select_resources=selectResources, make_fs_access=make_fs_access, **vars(args)) # This is the workflow output, it needs to be written if out is not None: def locToPath(p): if p["location"].startswith("file://"): p["path"] = uri_file_path(p["location"]) visit_class(out, ("File", "Directory"), locToPath) if isinstance(out, basestring): stdout.write(out) else: stdout.write(json.dumps(out, indent=4)) stdout.write("\n") stdout.flush() if status != "success": _logger.warn(u"Final process status is %s", status) return 1 else: _logger.info(u"Final process status is %s", status) return 0 except (validate.ValidationException) as exc: _logger.error(u"Input object failed validation:\n%s", exc, exc_info=args.debug) return 1 except UnsupportedRequirement as exc: _logger.error( u"Workflow or tool uses unsupported feature:\n%s", exc, exc_info=args.debug) return 33 except WorkflowException as exc: _logger.error( u"Workflow error, try again with --debug for more " "information:\n%s", strip_dup_lineno(unicode(exc)), exc_info=args.debug) return 1 except Exception as exc: _logger.error( u"Unhandled error, try again with --debug for more information:\n" " %s", exc, exc_info=args.debug) return 1 finally: _logger.removeHandler(stderr_handler) _logger.addHandler(defaultStreamHandler)
def static_checker(workflow_inputs, workflow_outputs, step_inputs, step_outputs, param_to_step): # type: (List[Dict[Text, Any]], List[Dict[Text, Any]], List[Dict[Text, Any]], List[Dict[Text, Any]], Dict[Text, Dict[Text, Any]]) -> None """Check if all source and sink types of a workflow are compatible before run time.""" # source parameters: workflow_inputs and step_outputs # sink parameters: step_inputs and workflow_outputs # make a dictionary of source parameters, indexed by the "id" field src_parms = workflow_inputs + step_outputs src_dict = {} for parm in src_parms: src_dict[parm["id"]] = parm step_inputs_val = check_all_types(src_dict, step_inputs, "source") workflow_outputs_val = check_all_types(src_dict, workflow_outputs, "outputSource") warnings = step_inputs_val["warning"] + workflow_outputs_val["warning"] exceptions = step_inputs_val["exception"] + workflow_outputs_val[ "exception"] warning_msgs = [] exception_msgs = [] for warning in warnings: src = warning.src sink = warning.sink linkMerge = warning.linkMerge sinksf = sorted([ p["pattern"] for p in sink.get("secondaryFiles", []) if p.get("required", True) ]) srcsf = sorted([p["pattern"] for p in src.get("secondaryFiles", [])]) # Every secondaryFile required by the sink, should be declared # by the source missing = missing_subset(srcsf, sinksf) if missing: msg1 = "Parameter '%s' requires secondaryFiles %s but" % ( shortname(sink["id"]), missing) msg3 = SourceLine(src, "id").makeError( "source '%s' does not provide those secondaryFiles." % (shortname(src["id"]))) msg4 = SourceLine( src.get("_tool_entry", src), "secondaryFiles" ).makeError( "To resolve, add missing secondaryFiles patterns to definition of '%s' or" % (shortname(src["id"]))) msg5 = SourceLine( sink.get("_tool_entry", sink), "secondaryFiles" ).makeError( "mark missing secondaryFiles in definition of '%s' as optional." % shortname(sink["id"])) msg = SourceLine(sink).makeError( "%s\n%s" % (msg1, bullets([msg3, msg4, msg5], " "))) elif sink.get("not_connected"): msg = SourceLine(sink, "type").makeError( "'%s' is not an input parameter of %s, expected %s" % (shortname( sink["id"]), param_to_step[sink["id"]]["run"], ", ".join( shortname(s["id"]) for s in param_to_step[sink["id"]]["inputs"] if not s.get("not_connected")))) else: msg = SourceLine(src, "type").makeError( "Source '%s' of type %s may be incompatible" % (shortname(src["id"]), json_dumps(src["type"]))) + "\n" + \ SourceLine(sink, "type").makeError( " with sink '%s' of type %s" % (shortname(sink["id"]), json_dumps(sink["type"]))) if linkMerge is not None: msg += "\n" + SourceLine(sink).makeError( " source has linkMerge method %s" % linkMerge) warning_msgs.append(msg) for exception in exceptions: src = exception.src sink = exception.sink linkMerge = exception.linkMerge msg = SourceLine(src, "type").makeError( "Source '%s' of type %s is incompatible" % (shortname(src["id"]), json_dumps(src["type"]))) + "\n" + \ SourceLine(sink, "type").makeError( " with sink '%s' of type %s" % (shortname(sink["id"]), json_dumps(sink["type"]))) if linkMerge is not None: msg += "\n" + SourceLine(sink).makeError( " source has linkMerge method %s" % linkMerge) exception_msgs.append(msg) for sink in step_inputs: if ('null' != sink["type"] and 'null' not in sink["type"] and "source" not in sink and "default" not in sink and "valueFrom" not in sink): msg = SourceLine(sink).makeError( "Required parameter '%s' does not have source, default, or valueFrom expression" % shortname(sink["id"])) exception_msgs.append(msg) all_warning_msg = strip_dup_lineno("\n".join(warning_msgs)) all_exception_msg = strip_dup_lineno("\n".join(exception_msgs)) if warnings: _logger.warning("Workflow checker warning:\n%s", all_warning_msg) if exceptions: raise validate.ValidationException(all_exception_msg)
def main(argsl=None, # type: List[str] args=None, # type: argparse.Namespace executor=single_job_executor, # type: Callable[..., Tuple[Dict[Text, Any], Text]] makeTool=workflow.defaultMakeTool, # type: Callable[..., Process] selectResources=None, # type: Callable[[Dict[Text, int]], Dict[Text, int]] stdin=sys.stdin, # type: IO[Any] stdout=sys.stdout, # type: IO[Any] stderr=sys.stderr, # type: IO[Any] versionfunc=versionstring, # type: Callable[[], Text] job_order_object=None, # type: MutableMapping[Text, Any] make_fs_access=StdFsAccess, # type: Callable[[Text], StdFsAccess] fetcher_constructor=None, # type: FetcherConstructorType resolver=tool_resolver, logger_handler=None, custom_schema_callback=None # type: Callable[[], None] ): # type: (...) -> int _logger.removeHandler(defaultStreamHandler) if logger_handler: stderr_handler = logger_handler else: stderr_handler = logging.StreamHandler(stderr) _logger.addHandler(stderr_handler) try: if args is None: if argsl is None: argsl = sys.argv[1:] args = arg_parser().parse_args(argsl) # If On windows platform, A default Docker Container is Used if not explicitely provided by user if onWindows() and not args.default_container: # This docker image is a minimal alpine image with bash installed(size 6 mb). source: https://github.com/frol/docker-alpine-bash args.default_container = windows_default_container_id # If caller provided custom arguments, it may be not every expected # option is set, so fill in no-op defaults to avoid crashing when # dereferencing them in args. for k, v in six.iteritems({'print_deps': False, 'print_pre': False, 'print_rdf': False, 'print_dot': False, 'relative_deps': False, 'tmp_outdir_prefix': 'tmp', 'tmpdir_prefix': 'tmp', 'print_input_deps': False, 'cachedir': None, 'quiet': False, 'debug': False, 'js_console': False, 'version': False, 'enable_dev': False, 'enable_ext': False, 'strict': True, 'skip_schemas': False, 'rdf_serializer': None, 'basedir': None, 'tool_help': False, 'workflow': None, 'job_order': None, 'pack': False, 'on_error': 'continue', 'relax_path_checks': False, 'validate': False, 'enable_ga4gh_tool_registry': False, 'ga4gh_tool_registries': [], 'find_default_container': None, 'make_template': False, 'overrides': None }): if not hasattr(args, k): setattr(args, k, v) if args.quiet: _logger.setLevel(logging.WARN) if args.debug: _logger.setLevel(logging.DEBUG) if args.version: print(versionfunc()) return 0 else: _logger.info(versionfunc()) if args.print_supported_versions: print("\n".join(supportedCWLversions(args.enable_dev))) return 0 if not args.workflow: if os.path.isfile("CWLFile"): setattr(args, "workflow", "CWLFile") else: _logger.error("") _logger.error("CWL document required, no input file was provided") arg_parser().print_help() return 1 if args.relax_path_checks: draft2tool.ACCEPTLIST_RE = draft2tool.ACCEPTLIST_EN_RELAXED_RE if args.ga4gh_tool_registries: ga4gh_tool_registries[:] = args.ga4gh_tool_registries if not args.enable_ga4gh_tool_registry: del ga4gh_tool_registries[:] if custom_schema_callback: custom_schema_callback() elif args.enable_ext: res = pkg_resources.resource_stream(__name__, 'extensions.yml') use_custom_schema("v1.0", "http://commonwl.org/cwltool", res.read()) res.close() else: use_standard_schema("v1.0") uri, tool_file_uri = resolve_tool_uri(args.workflow, resolver=resolver, fetcher_constructor=fetcher_constructor) overrides = [] # type: List[Dict[Text, Any]] try: job_order_object, input_basedir, jobloader = load_job_order(args, stdin, fetcher_constructor, overrides, tool_file_uri) except Exception as e: _logger.error(Text(e), exc_info=args.debug) if args.overrides: overrides.extend(load_overrides(file_uri(os.path.abspath(args.overrides)), tool_file_uri)) try: document_loader, workflowobj, uri = fetch_document(uri, resolver=resolver, fetcher_constructor=fetcher_constructor) if args.print_deps: printdeps(workflowobj, document_loader, stdout, args.relative_deps, uri) return 0 document_loader, avsc_names, processobj, metadata, uri \ = validate_document(document_loader, workflowobj, uri, enable_dev=args.enable_dev, strict=args.strict, preprocess_only=args.print_pre or args.pack, fetcher_constructor=fetcher_constructor, skip_schemas=args.skip_schemas, overrides=overrides) if args.print_pre: stdout.write(json.dumps(processobj, indent=4)) return 0 overrides.extend(metadata.get("cwltool:overrides", [])) conf_file = getattr(args, "beta_dependency_resolvers_configuration", None) # Text use_conda_dependencies = getattr(args, "beta_conda_dependencies", None) # Text make_tool_kwds = vars(args) job_script_provider = None # type: Callable[[Any, List[str]], Text] if conf_file or use_conda_dependencies: dependencies_configuration = DependenciesConfiguration(args) # type: DependenciesConfiguration make_tool_kwds["job_script_provider"] = dependencies_configuration make_tool_kwds["find_default_container"] = functools.partial(find_default_container, args) make_tool_kwds["overrides"] = overrides tool = make_tool(document_loader, avsc_names, metadata, uri, makeTool, make_tool_kwds) if args.make_template: yaml.safe_dump(generate_input_template(tool), sys.stdout, default_flow_style=False, indent=4, block_seq_indent=2) return 0 if args.validate: _logger.info("Tool definition is valid") return 0 if args.pack: stdout.write(print_pack(document_loader, processobj, uri, metadata)) return 0 if args.print_rdf: stdout.write(printrdf(tool, document_loader.ctx, args.rdf_serializer)) return 0 if args.print_dot: printdot(tool, document_loader.ctx, stdout) return 0 except (validate.ValidationException) as exc: _logger.error(u"Tool definition failed validation:\n%s", exc, exc_info=args.debug) return 1 except (RuntimeError, WorkflowException) as exc: _logger.error(u"Tool definition failed initialization:\n%s", exc, exc_info=args.debug) return 1 except Exception as exc: _logger.error( u"I'm sorry, I couldn't load this CWL file%s", ", try again with --debug for more information.\nThe error was: " "%s" % exc if not args.debug else ". The error was:", exc_info=args.debug) return 1 if isinstance(tool, int): return tool for dirprefix in ("tmpdir_prefix", "tmp_outdir_prefix", "cachedir"): if getattr(args, dirprefix) and getattr(args, dirprefix) != 'tmp': sl = "/" if getattr(args, dirprefix).endswith("/") or dirprefix == "cachedir" else "" setattr(args, dirprefix, os.path.abspath(getattr(args, dirprefix)) + sl) if not os.path.exists(os.path.dirname(getattr(args, dirprefix))): try: os.makedirs(os.path.dirname(getattr(args, dirprefix))) except Exception as e: _logger.error("Failed to create directory: %s", e) return 1 if args.cachedir: if args.move_outputs == "move": setattr(args, 'move_outputs', "copy") setattr(args, "tmp_outdir_prefix", args.cachedir) try: job_order_object = init_job_order(job_order_object, args, tool, print_input_deps=args.print_input_deps, relative_deps=args.relative_deps, stdout=stdout, make_fs_access=make_fs_access, loader=jobloader, input_basedir=input_basedir) except SystemExit as e: return e.code if isinstance(job_order_object, int): return job_order_object try: setattr(args, 'basedir', input_basedir) del args.workflow del args.job_order (out, status) = executor(tool, job_order_object, makeTool=makeTool, select_resources=selectResources, make_fs_access=make_fs_access, **vars(args)) # This is the workflow output, it needs to be written if out is not None: def locToPath(p): for field in ("path", "nameext", "nameroot", "dirname"): if field in p: del p[field] if p["location"].startswith("file://"): p["path"] = uri_file_path(p["location"]) visit_class(out, ("File", "Directory"), locToPath) # Unsetting the Generation fron final output object visit_class(out,("File",), MutationManager().unset_generation) if isinstance(out, six.string_types): stdout.write(out) else: stdout.write(json.dumps(out, indent=4)) stdout.write("\n") stdout.flush() if status != "success": _logger.warning(u"Final process status is %s", status) return 1 else: _logger.info(u"Final process status is %s", status) return 0 except (validate.ValidationException) as exc: _logger.error(u"Input object failed validation:\n%s", exc, exc_info=args.debug) return 1 except UnsupportedRequirement as exc: _logger.error( u"Workflow or tool uses unsupported feature:\n%s", exc, exc_info=args.debug) return 33 except WorkflowException as exc: _logger.error( u"Workflow error, try again with --debug for more " "information:\n%s", strip_dup_lineno(six.text_type(exc)), exc_info=args.debug) return 1 except Exception as exc: _logger.error( u"Unhandled error, try again with --debug for more information:\n" " %s", exc, exc_info=args.debug) return 1 finally: _logger.removeHandler(stderr_handler) _logger.addHandler(defaultStreamHandler)
def main(argsl=None, # type: List[str] args=None, # type: argparse.Namespace job_order_object=None, # type: MutableMapping[Text, Any] stdin=sys.stdin, # type: IO[Any] stdout=None, # type: Union[TextIO, StreamWriter] stderr=sys.stderr, # type: IO[Any] versionfunc=versionstring, # type: Callable[[], Text] logger_handler=None, # custom_schema_callback=None, # type: Callable[[], None] executor=None, # type: Callable[..., Tuple[Dict[Text, Any], Text]] loadingContext=None, # type: LoadingContext runtimeContext=None # type: RuntimeContext ): # type: (...) -> int if not stdout: # force UTF-8 even if the console is configured differently if (hasattr(sys.stdout, "encoding") # type: ignore and sys.stdout.encoding != 'UTF-8'): # type: ignore if PY3 and hasattr(sys.stdout, "detach"): stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8') else: stdout = getwriter('utf-8')(sys.stdout) # type: ignore else: stdout = cast(TextIO, sys.stdout) # type: ignore _logger.removeHandler(defaultStreamHandler) if logger_handler is not None: stderr_handler = logger_handler else: stderr_handler = logging.StreamHandler(stderr) _logger.addHandler(stderr_handler) # pre-declared for finally block workflowobj = None prov_log_handler = None # type: Optional[logging.StreamHandler] try: if args is None: if argsl is None: argsl = sys.argv[1:] args = arg_parser().parse_args(argsl) if args.record_container_id: if not args.cidfile_dir: args.cidfile_dir = os.getcwd() del args.record_container_id if runtimeContext is None: runtimeContext = RuntimeContext(vars(args)) else: runtimeContext = runtimeContext.copy() # If on Windows platform, a default Docker Container is used if not # explicitely provided by user if onWindows() and not runtimeContext.default_container: # This docker image is a minimal alpine image with bash installed # (size 6 mb). source: https://github.com/frol/docker-alpine-bash runtimeContext.default_container = windows_default_container_id # If caller parsed its own arguments, it may not include every # cwltool option, so fill in defaults to avoid crashing when # dereferencing them in args. for key, val in iteritems(get_default_args()): if not hasattr(args, key): setattr(args, key, val) # Configure logging rdflib_logger = logging.getLogger("rdflib.term") rdflib_logger.addHandler(stderr_handler) rdflib_logger.setLevel(logging.ERROR) if args.quiet: # Silence STDERR, not an eventual provenance log file stderr_handler.setLevel(logging.WARN) if runtimeContext.debug: # Increase to debug for both stderr and provenance log file _logger.setLevel(logging.DEBUG) rdflib_logger.setLevel(logging.DEBUG) formatter = None # type: Optional[logging.Formatter] if args.timestamps: formatter = logging.Formatter("[%(asctime)s] %(message)s", "%Y-%m-%d %H:%M:%S") stderr_handler.setFormatter(formatter) ## if args.version: print(versionfunc()) return 0 _logger.info(versionfunc()) if args.print_supported_versions: print("\n".join(supported_cwl_versions(args.enable_dev))) return 0 if not args.workflow: if os.path.isfile("CWLFile"): setattr(args, "workflow", "CWLFile") else: _logger.error("") _logger.error("CWL document required, no input file was provided") arg_parser().print_help() return 1 if args.relax_path_checks: command_line_tool.ACCEPTLIST_RE = command_line_tool.ACCEPTLIST_EN_RELAXED_RE if args.ga4gh_tool_registries: ga4gh_tool_registries[:] = args.ga4gh_tool_registries if not args.enable_ga4gh_tool_registry: del ga4gh_tool_registries[:] if custom_schema_callback is not None: custom_schema_callback() elif args.enable_ext: res = pkg_resources.resource_stream(__name__, 'extensions.yml') use_custom_schema("v1.0", "http://commonwl.org/cwltool", res.read()) res.close() else: use_standard_schema("v1.0") if args.provenance: if not args.compute_checksum: _logger.error("--provenance incompatible with --no-compute-checksum") return 1 ro = ResearchObject( temp_prefix_ro=args.tmpdir_prefix, orcid=args.orcid, full_name=args.cwl_full_name) runtimeContext.research_obj = ro log_file_io = ro.open_log_file_for_activity(ro.engine_uuid) prov_log_handler = logging.StreamHandler(log_file_io) class ProvLogFormatter(logging.Formatter): """Enforce ISO8601 with both T and Z.""" def __init__(self): # type: () -> None super(ProvLogFormatter, self).__init__( "[%(asctime)sZ] %(message)s") def formatTime(self, record, datefmt=None): # type: (logging.LogRecord, str) -> str record_time = time.gmtime(record.created) formatted_time = time.strftime("%Y-%m-%dT%H:%M:%S", record_time) with_msecs = "%s,%03d" % (formatted_time, record.msecs) return with_msecs prov_log_handler.setFormatter(ProvLogFormatter()) _logger.addHandler(prov_log_handler) _logger.debug(u"[provenance] Logging to %s", log_file_io) if argsl is not None: # Log cwltool command line options to provenance file _logger.info("[cwltool] %s %s", sys.argv[0], u" ".join(argsl)) _logger.debug(u"[cwltool] Arguments: %s", args) if loadingContext is None: loadingContext = LoadingContext(vars(args)) else: loadingContext = loadingContext.copy() loadingContext.loader = default_loader(loadingContext.fetcher_constructor) loadingContext.research_obj = runtimeContext.research_obj loadingContext.disable_js_validation = \ args.disable_js_validation or (not args.do_validate) loadingContext.construct_tool_object = getdefault( loadingContext.construct_tool_object, workflow.default_make_tool) loadingContext.resolver = getdefault(loadingContext.resolver, tool_resolver) loadingContext.do_update = not (args.pack or args.print_subgraph) uri, tool_file_uri = resolve_tool_uri( args.workflow, resolver=loadingContext.resolver, fetcher_constructor=loadingContext.fetcher_constructor) try_again_msg = "" if args.debug else ", try again with --debug for more information" try: job_order_object, input_basedir, jobloader = load_job_order( args, stdin, loadingContext.fetcher_constructor, loadingContext.overrides_list, tool_file_uri) if args.overrides: loadingContext.overrides_list.extend(load_overrides( file_uri(os.path.abspath(args.overrides)), tool_file_uri)) loadingContext, workflowobj, uri = fetch_document( uri, loadingContext) if args.print_deps and loadingContext.loader: printdeps(workflowobj, loadingContext.loader, stdout, args.relative_deps, uri) return 0 loadingContext, uri \ = resolve_and_validate_document(loadingContext, workflowobj, uri, preprocess_only=(args.print_pre or args.pack), skip_schemas=args.skip_schemas) if loadingContext.loader is None: raise Exception("Impossible code path.") processobj, metadata = loadingContext.loader.resolve_ref(uri) processobj = cast(CommentedMap, processobj) if args.pack: stdout.write(print_pack(loadingContext.loader, processobj, uri, metadata)) return 0 if args.provenance and runtimeContext.research_obj: # Can't really be combined with args.pack at same time runtimeContext.research_obj.packed_workflow( print_pack(loadingContext.loader, processobj, uri, metadata)) if args.print_pre: stdout.write(json_dumps(processobj, indent=4, sort_keys=True, separators=(',', ': '))) return 0 tool = make_tool(uri, loadingContext) if args.make_template: def my_represent_none(self, data): # pylint: disable=unused-argument """Force clean representation of 'null'.""" return self.represent_scalar(u'tag:yaml.org,2002:null', u'null') yaml.RoundTripRepresenter.add_representer(type(None), my_represent_none) yaml.round_trip_dump( generate_input_template(tool), sys.stdout, default_flow_style=False, indent=4, block_seq_indent=2) return 0 if args.validate: print("{} is valid CWL.".format(args.workflow)) return 0 if args.print_rdf: stdout.write(printrdf(tool, loadingContext.loader.ctx, args.rdf_serializer)) return 0 if args.print_dot: printdot(tool, loadingContext.loader.ctx, stdout) return 0 if args.print_targets: for f in ("outputs", "steps", "inputs"): if tool.tool[f]: _logger.info("%s%s targets:", f[0].upper(), f[1:-1]) stdout.write(" "+"\n ".join([shortname(t["id"]) for t in tool.tool[f]])+"\n") return 0 if args.target: if isinstance(tool, Workflow): url = urllib.parse.urlparse(tool.tool["id"]) if url.fragment: extracted = get_subgraph([tool.tool["id"] + "/" + r for r in args.target], tool) else: extracted = get_subgraph([loadingContext.loader.fetcher.urljoin(tool.tool["id"], "#" + r) for r in args.target], tool) else: _logger.error("Can only use --target on Workflows") return 1 loadingContext.loader.idx[extracted["id"]] = extracted tool = make_tool(extracted["id"], loadingContext) if args.print_subgraph: if "name" in tool.tool: del tool.tool["name"] stdout.write(json_dumps(tool.tool, indent=4, sort_keys=True, separators=(',', ': '))) return 0 except (validate.ValidationException) as exc: _logger.error(u"Tool definition failed validation:\n%s", exc, exc_info=args.debug) return 1 except (RuntimeError, WorkflowException) as exc: _logger.error(u"Tool definition failed initialization:\n%s", exc, exc_info=args.debug) return 1 except Exception as exc: _logger.error( u"I'm sorry, I couldn't load this CWL file%s.\nThe error was: %s", try_again_msg, exc if not args.debug else "", exc_info=args.debug) return 1 if isinstance(tool, int): return tool # If on MacOS platform, TMPDIR must be set to be under one of the # shared volumes in Docker for Mac # More info: https://dockstore.org/docs/faq if sys.platform == "darwin": default_mac_path = "/private/tmp/docker_tmp" if runtimeContext.tmp_outdir_prefix == DEFAULT_TMP_PREFIX: runtimeContext.tmp_outdir_prefix = default_mac_path if runtimeContext.tmpdir_prefix == DEFAULT_TMP_PREFIX: runtimeContext.tmpdir_prefix = default_mac_path for dirprefix in ("tmpdir_prefix", "tmp_outdir_prefix", "cachedir"): if getattr(runtimeContext, dirprefix) and getattr(runtimeContext, dirprefix) != DEFAULT_TMP_PREFIX: sl = "/" if getattr(runtimeContext, dirprefix).endswith("/") or dirprefix == "cachedir" \ else "" setattr(runtimeContext, dirprefix, os.path.abspath(getattr(runtimeContext, dirprefix)) + sl) if not os.path.exists(os.path.dirname(getattr(runtimeContext, dirprefix))): try: os.makedirs(os.path.dirname(getattr(runtimeContext, dirprefix))) except Exception as e: _logger.error("Failed to create directory: %s", e) return 1 if args.cachedir: if args.move_outputs == "move": runtimeContext.move_outputs = "copy" runtimeContext.tmp_outdir_prefix = args.cachedir runtimeContext.secret_store = getdefault(runtimeContext.secret_store, SecretStore()) runtimeContext.make_fs_access = getdefault(runtimeContext.make_fs_access, StdFsAccess) try: initialized_job_order_object = init_job_order( job_order_object, args, tool, jobloader, stdout, print_input_deps=args.print_input_deps, relative_deps=args.relative_deps, make_fs_access=runtimeContext.make_fs_access, input_basedir=input_basedir, secret_store=runtimeContext.secret_store) except SystemExit as err: return err.code if not executor: if args.parallel: executor = MultithreadedJobExecutor() runtimeContext.select_resources = executor.select_resources else: executor = SingleJobExecutor() try: runtimeContext.basedir = input_basedir del args.workflow del args.job_order conf_file = getattr(args, "beta_dependency_resolvers_configuration", None) # Text use_conda_dependencies = getattr(args, "beta_conda_dependencies", None) # Text if conf_file or use_conda_dependencies: runtimeContext.job_script_provider = DependenciesConfiguration(args) else: runtimeContext.find_default_container = functools.partial( find_default_container, default_container=runtimeContext.default_container, use_biocontainers=args.beta_use_biocontainers) (out, status) = executor(tool, initialized_job_order_object, runtimeContext, logger=_logger) if out is not None: if runtimeContext.research_obj is not None: runtimeContext.research_obj.create_job( out, None, True) def loc_to_path(obj): for field in ("path", "nameext", "nameroot", "dirname"): if field in obj: del obj[field] if obj["location"].startswith("file://"): obj["path"] = uri_file_path(obj["location"]) visit_class(out, ("File", "Directory"), loc_to_path) # Unsetting the Generation from final output object visit_class(out, ("File", ), MutationManager().unset_generation) if isinstance(out, string_types): stdout.write(out) else: stdout.write(json_dumps(out, indent=4, # type: ignore ensure_ascii=False)) stdout.write("\n") if hasattr(stdout, "flush"): stdout.flush() # type: ignore if status != "success": _logger.warning(u"Final process status is %s", status) return 1 _logger.info(u"Final process status is %s", status) return 0 except (validate.ValidationException) as exc: _logger.error(u"Input object failed validation:\n%s", exc, exc_info=args.debug) return 1 except UnsupportedRequirement as exc: _logger.error( u"Workflow or tool uses unsupported feature:\n%s", exc, exc_info=args.debug) return 33 except WorkflowException as exc: _logger.error( u"Workflow error%s:\n%s", try_again_msg, strip_dup_lineno(Text(exc)), exc_info=args.debug) return 1 except Exception as exc: # pylint: disable=broad-except _logger.error( u"Unhandled error%s:\n %s", try_again_msg, exc, exc_info=args.debug) return 1 finally: if args and runtimeContext and runtimeContext.research_obj \ and workflowobj and loadingContext: research_obj = runtimeContext.research_obj if loadingContext.loader is not None: research_obj.generate_snapshot(prov_deps( workflowobj, loadingContext.loader, uri)) else: _logger.warning("Unable to generate provenance snapshot " " due to missing loadingContext.loader.") if prov_log_handler is not None: # Stop logging so we won't half-log adding ourself to RO _logger.debug(u"[provenance] Closing provenance log file %s", prov_log_handler) _logger.removeHandler(prov_log_handler) # Ensure last log lines are written out prov_log_handler.flush() # Underlying WritableBagFile will add the tagfile to the manifest prov_log_handler.stream.close() prov_log_handler.close() research_obj.close(args.provenance) _logger.removeHandler(stderr_handler) _logger.addHandler(defaultStreamHandler)
def static_checker(workflow_inputs, workflow_outputs, step_inputs, step_outputs, param_to_step): # type: (List[Dict[Text, Any]], List[Dict[Text, Any]], List[Dict[Text, Any]], List[Dict[Text, Any]], Dict[Text, Dict[Text, Any]]) -> None """Check if all source and sink types of a workflow are compatible before run time. """ # source parameters: workflow_inputs and step_outputs # sink parameters: step_inputs and workflow_outputs # make a dictionary of source parameters, indexed by the "id" field src_parms = workflow_inputs + step_outputs src_dict = {} for parm in src_parms: src_dict[parm["id"]] = parm step_inputs_val = check_all_types(src_dict, step_inputs, "source") workflow_outputs_val = check_all_types(src_dict, workflow_outputs, "outputSource") warnings = step_inputs_val["warning"] + workflow_outputs_val["warning"] exceptions = step_inputs_val["exception"] + workflow_outputs_val["exception"] warning_msgs = [] exception_msgs = [] for warning in warnings: src = warning.src sink = warning.sink linkMerge = warning.linkMerge if sink.get("secondaryFiles") and sorted( sink.get("secondaryFiles", [])) != sorted(src.get("secondaryFiles", [])): msg1 = "Sink '%s'" % (shortname(sink["id"])) msg2 = SourceLine(sink.get("_tool_entry", sink), "secondaryFiles").makeError( "expects secondaryFiles: %s but" % (sink.get("secondaryFiles"))) if "secondaryFiles" in src: msg3 = SourceLine(src, "secondaryFiles").makeError( "source '%s' has secondaryFiles %s." % (shortname(src["id"]), src.get("secondaryFiles"))) else: msg3 = SourceLine(src, "id").makeError( "source '%s' does not include secondaryFiles." % (shortname(src["id"]))) msg4 = SourceLine(src, "id").makeError("To fix, add secondaryFiles: %s to definition of '%s'." % (sink.get("secondaryFiles"), shortname(src["id"]))) msg = SourceLine(sink).makeError("%s\n%s" % (msg1, bullets([msg2, msg3, msg4], " "))) elif sink.get("not_connected"): msg = SourceLine(sink, "type").makeError( "'%s' is not an input parameter of %s, expected %s" % (shortname(sink["id"]), param_to_step[sink["id"]]["run"], ", ".join(shortname(s["id"]) for s in param_to_step[sink["id"]]["inputs"] if not s.get("not_connected")))) else: msg = SourceLine(src, "type").makeError( "Source '%s' of type %s may be incompatible" % (shortname(src["id"]), json_dumps(src["type"]))) + "\n" + \ SourceLine(sink, "type").makeError( " with sink '%s' of type %s" % (shortname(sink["id"]), json_dumps(sink["type"]))) if linkMerge: msg += "\n" + SourceLine(sink).makeError(" source has linkMerge method %s" % linkMerge) warning_msgs.append(msg) for exception in exceptions: src = exception.src sink = exception.sink linkMerge = exception.linkMerge msg = SourceLine(src, "type").makeError( "Source '%s' of type %s is incompatible" % (shortname(src["id"]), json_dumps(src["type"]))) + "\n" + \ SourceLine(sink, "type").makeError( " with sink '%s' of type %s" % (shortname(sink["id"]), json_dumps(sink["type"]))) if linkMerge: msg += "\n" + SourceLine(sink).makeError(" source has linkMerge method %s" % linkMerge) exception_msgs.append(msg) for sink in step_inputs: if ('null' != sink["type"] and 'null' not in sink["type"] and "source" not in sink and "default" not in sink and "valueFrom" not in sink): msg = SourceLine(sink).makeError( "Required parameter '%s' does not have source, default, or valueFrom expression" % shortname(sink["id"])) exception_msgs.append(msg) all_warning_msg = strip_dup_lineno("\n".join(warning_msgs)) all_exception_msg = strip_dup_lineno("\n".join(exception_msgs)) if warnings: _logger.warning("Workflow checker warning:\n%s", all_warning_msg) if exceptions: raise validate.ValidationException(all_exception_msg)