def test_for_invalid_yaml2():
    # Issue 143
    document_loader, avsc_names, schema_metadata, metaschema_loader = load_schema(
        get_data(u"tests/test_schema/CommonWorkflowLanguage.yml"))

    src = "test19.cwl"
    try:
        load_and_validate(
            document_loader,
            avsc_names,
            six.text_type(get_data("tests/test_schema/" + src)),
            True,
        )
    except RuntimeError as e:
        msg = reformat_yaml_exception_message(
            strip_dup_lineno(six.text_type(e)))
        assert msg.endswith(
            src + ":2:1: expected <block end>, but found ':'") or msg.endswith(
                src + ":2:1: expected <block end>, but found u':'")
        return
    except ValidationException as e:
        msgs = str(strip_dup_lineno(six.text_type(e)))
        print(msgs)
        # weird splits due to differing path length on MS Windows
        # & during the release tests
        assert "{}:2:1: Object".format(src) in msgs
        assert "is not valid because" in msgs
        assert "`CommandLineTool`" in msgs
        assert "mapping with" in msgs
        assert "implicit" in msgs
        assert "null key" in msgs
        return
    assert False, "Missing RuntimeError or ValidationException"
Ejemplo n.º 2
0
    def test_for_invalid_yaml2(self):
        # Issue 143
        document_loader, avsc_names, schema_metadata, metaschema_loader = load_schema(
            get_data(u"tests/test_schema/CommonWorkflowLanguage.yml"))

        src = "test19.cwl"
        with self.assertRaises(RuntimeError):
            try:
                load_and_validate(
                    document_loader, avsc_names,
                    six.text_type(get_data("tests/test_schema/" + src)), True)
            except RuntimeError as e:
                msg = reformat_yaml_exception_message(
                    strip_dup_lineno(six.text_type(e)))
                self.assertTrue(
                    msg.endswith(src +
                                 ":1:1: expected <block end>, but found ':'"))
                print("\n", e)
                raise
Ejemplo n.º 3
0
    def test_print_oneline_for_invalid_yaml(self):
        # Issue #137
        document_loader, avsc_names, schema_metadata, metaschema_loader = load_schema(
            get_data(u"tests/test_schema/CommonWorkflowLanguage.yml"))

        src = "test16.cwl"
        with self.assertRaises(RuntimeError):
            try:
                load_and_validate(
                    document_loader, avsc_names,
                    six.text_type(get_data("tests/test_schema/" + src)), True)
            except RuntimeError as e:
                msg = reformat_yaml_exception_message(
                    strip_dup_lineno(six.text_type(e)))
                msg = to_one_line_messages(msg)
                self.assertTrue(
                    msg.endswith(src + ":10:1: could not find expected \':\'"))
                print("\n", e)
                raise
Ejemplo n.º 4
0
    def test_print_oneline_for_errors_in_resolve_ref(self):
        # Issue #141
        document_loader, avsc_names, schema_metadata, metaschema_loader = load_schema(
            get_data(u"tests/test_schema/CommonWorkflowLanguage.yml"))

        src = "test18.cwl"
        fullpath = normpath(get_data("tests/test_schema/" + src))
        with self.assertRaises(ValidationException):
            try:
                load_and_validate(document_loader, avsc_names,
                                  six.text_type(fullpath), True)
            except ValidationException as e:
                msgs = to_one_line_messages(
                    str(strip_dup_lineno(six.text_type(e)))).splitlines()
                # convert Windows path to Posix path
                if '\\' in fullpath:
                    fullpath = '/' + fullpath.lower().replace('\\', '/')
                self.assertEqual(len(msgs), 1)
                print("\n", e)
                assert msgs[0].endswith(
                    src + ':13:5: Field `type` references unknown identifier '
                    '`Filea`, tried file://%s#Filea' % (fullpath))
                raise
Ejemplo n.º 5
0
    def test_for_invalid_yaml1(self):
        # Issue 143
        document_loader, avsc_names, schema_metadata, metaschema_loader = load_schema(
            get_data(u"tests/test_schema/CommonWorkflowLanguage.yml"))

        src = "test16.cwl"
        with self.assertRaises(RuntimeError):
            try:
                load_and_validate(
                    document_loader, avsc_names,
                    six.text_type(get_data("tests/test_schema/" + src)), True)
            except RuntimeError as e:
                msg = reformat_yaml_exception_message(
                    strip_dup_lineno(six.text_type(e)))
                msgs = msg.splitlines()
                self.assertEqual(len(msgs), 2)
                self.assertTrue(
                    msgs[0].endswith(src +
                                     ":9:7: while scanning a simple key"))
                self.assertTrue(
                    msgs[1].endswith(src +
                                     ":10:1:   could not find expected ':'"))
                print("\n", e)
                raise
def test_print_oneline_for_errors_in_resolve_ref():
    # Issue #141
    document_loader, avsc_names, schema_metadata, metaschema_loader = load_schema(
        get_data(u"tests/test_schema/CommonWorkflowLanguage.yml"))

    src = "test18.cwl"
    fullpath = normpath(get_data("tests/test_schema/" + src))
    with pytest.raises(ValidationException):
        try:
            load_and_validate(document_loader, avsc_names,
                              six.text_type(fullpath), True)
        except ValidationException as e:
            msgs = to_one_line_messages(str(strip_dup_lineno(
                six.text_type(e)))).splitlines()
            # convert Windows path to Posix path
            if "\\" in fullpath:
                fullpath = "/" + fullpath.lower().replace("\\", "/")
            assert len(msgs) == 2
            print("\n", e)
            assert msgs[0].endswith(src + ":9:1: checking field `outputs`")
            assert msgs[1].endswith(
                src + ":14:5: Field `type` references unknown identifier "
                "`Filea`, tried file://%s#Filea" % (fullpath))
            raise
Ejemplo n.º 7
0
def main(
    argsl=None,  # type: List[str]
    args=None,  # type: argparse.Namespace
    job_order_object=None,  # type: MutableMapping[Text, Any]
    stdin=sys.stdin,  # type: IO[Any]
    stdout=None,  # type: Union[TextIO, codecs.StreamWriter]
    stderr=sys.stderr,  # type: IO[Any]
    versionfunc=versionstring,  # type: Callable[[], Text]
    logger_handler=None,  #
    custom_schema_callback=None,  # type: Callable[[], None]
    executor=None,  # type: Callable[..., Tuple[Dict[Text, Any], Text]]
    loadingContext=None,  # type: LoadingContext
    runtimeContext=None  # type: RuntimeContext
):  # type: (...) -> int
    if not stdout:  # force UTF-8 even if the console is configured differently
        if (hasattr(sys.stdout, "encoding")  # type: ignore
                and sys.stdout.encoding != 'UTF-8'):  # type: ignore
            if six.PY3 and hasattr(sys.stdout, "detach"):
                stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8')
            else:
                stdout = codecs.getwriter('utf-8')(sys.stdout)  # type: ignore
        else:
            stdout = cast(TextIO, sys.stdout)  # type: ignore

    _logger.removeHandler(defaultStreamHandler)
    if logger_handler:
        stderr_handler = logger_handler
    else:
        stderr_handler = logging.StreamHandler(stderr)
    _logger.addHandler(stderr_handler)
    # pre-declared for finally block
    workflowobj = None
    input_for_prov = None
    try:
        if args is None:
            if argsl is None:
                argsl = sys.argv[1:]
            args = arg_parser().parse_args(argsl)

        if runtimeContext is None:
            runtimeContext = RuntimeContext(vars(args))
        else:
            runtimeContext = runtimeContext.copy()

        # If on Windows platform, a default Docker Container is used if not
        # explicitely provided by user
        if onWindows() and not runtimeContext.default_container:
            # This docker image is a minimal alpine image with bash installed
            # (size 6 mb). source: https://github.com/frol/docker-alpine-bash
            runtimeContext.default_container = windows_default_container_id

        # If caller parsed its own arguments, it may not include every
        # cwltool option, so fill in defaults to avoid crashing when
        # dereferencing them in args.
        for key, val in six.iteritems(get_default_args()):
            if not hasattr(args, key):
                setattr(args, key, val)

        rdflib_logger = logging.getLogger("rdflib.term")
        rdflib_logger.addHandler(stderr_handler)
        rdflib_logger.setLevel(logging.ERROR)
        if args.quiet:
            _logger.setLevel(logging.WARN)
        if runtimeContext.debug:
            _logger.setLevel(logging.DEBUG)
            rdflib_logger.setLevel(logging.DEBUG)
        if args.timestamps:
            formatter = logging.Formatter("[%(asctime)s] %(message)s",
                                          "%Y-%m-%d %H:%M:%S")
            stderr_handler.setFormatter(formatter)

        if args.version:
            print(versionfunc())
            return 0
        else:
            _logger.info(versionfunc())

        if args.print_supported_versions:
            print("\n".join(supportedCWLversions(args.enable_dev)))
            return 0

        if not args.workflow:
            if os.path.isfile("CWLFile"):
                setattr(args, "workflow", "CWLFile")
            else:
                _logger.error("")
                _logger.error(
                    "CWL document required, no input file was provided")
                arg_parser().print_help()
                return 1
        if args.relax_path_checks:
            command_line_tool.ACCEPTLIST_RE = command_line_tool.ACCEPTLIST_EN_RELAXED_RE

        if args.ga4gh_tool_registries:
            ga4gh_tool_registries[:] = args.ga4gh_tool_registries
        if not args.enable_ga4gh_tool_registry:
            del ga4gh_tool_registries[:]

        if custom_schema_callback:
            custom_schema_callback()
        elif args.enable_ext:
            res = pkg_resources.resource_stream(__name__, 'extensions.yml')
            use_custom_schema("v1.0", "http://commonwl.org/cwltool",
                              res.read())
            res.close()
        else:
            use_standard_schema("v1.0")
        #call function from provenance.py if the provenance flag is enabled.
        if args.provenance:
            if not args.compute_checksum:
                _logger.error(
                    "--provenance incompatible with --no-compute-checksum")
                return 1

            runtimeContext.research_obj = ResearchObject(
                temp_prefix_ro=args.tmpdir_prefix,
                # Optionals, might be None
                orcid=args.orcid,
                full_name=args.cwl_full_name)

        if loadingContext is None:
            loadingContext = LoadingContext(vars(args))
        else:
            loadingContext = loadingContext.copy()
        loadingContext.research_obj = runtimeContext.research_obj
        loadingContext.disable_js_validation = \
            args.disable_js_validation or (not args.do_validate)
        loadingContext.construct_tool_object = getdefault(
            loadingContext.construct_tool_object, workflow.default_make_tool)
        loadingContext.resolver = getdefault(loadingContext.resolver,
                                             tool_resolver)

        uri, tool_file_uri = resolve_tool_uri(
            args.workflow,
            resolver=loadingContext.resolver,
            fetcher_constructor=loadingContext.fetcher_constructor)

        try_again_msg = "" if args.debug else ", try again with --debug for more information"

        try:
            job_order_object, input_basedir, jobloader = load_job_order(
                args, stdin, loadingContext.fetcher_constructor,
                loadingContext.overrides_list, tool_file_uri)

            if args.overrides:
                loadingContext.overrides_list.extend(
                    load_overrides(file_uri(os.path.abspath(args.overrides)),
                                   tool_file_uri))

            document_loader, workflowobj, uri = fetch_document(
                uri,
                resolver=loadingContext.resolver,
                fetcher_constructor=loadingContext.fetcher_constructor)

            if args.print_deps:
                printdeps(workflowobj, document_loader, stdout,
                          args.relative_deps, uri)
                return 0

            document_loader, avsc_names, processobj, metadata, uri \
                = validate_document(document_loader, workflowobj, uri,
                                    enable_dev=loadingContext.enable_dev,
                                    strict=loadingContext.strict,
                                    preprocess_only=(args.print_pre or args.pack),
                                    fetcher_constructor=loadingContext.fetcher_constructor,
                                    skip_schemas=args.skip_schemas,
                                    overrides=loadingContext.overrides_list,
                                    do_validate=loadingContext.do_validate)
            if args.pack:
                stdout.write(
                    print_pack(document_loader, processobj, uri, metadata))
                return 0
            if args.provenance and runtimeContext.research_obj:
                # Can't really be combined with args.pack at same time
                runtimeContext.research_obj.packed_workflow(
                    print_pack(document_loader, processobj, uri, metadata))

            if args.print_pre:
                stdout.write(json_dumps(processobj, indent=4))
                return 0

            loadingContext.overrides_list.extend(
                metadata.get("cwltool:overrides", []))

            tool = make_tool(document_loader, avsc_names, metadata, uri,
                             loadingContext)
            if args.make_template:
                yaml.safe_dump(generate_input_template(tool),
                               sys.stdout,
                               default_flow_style=False,
                               indent=4,
                               block_seq_indent=2)
                return 0

            if args.validate:
                _logger.info("Tool definition is valid")
                return 0

            if args.print_rdf:
                stdout.write(
                    printrdf(tool, document_loader.ctx, args.rdf_serializer))
                return 0

            if args.print_dot:
                printdot(tool, document_loader.ctx, stdout)
                return 0

        except (validate.ValidationException) as exc:
            _logger.error(u"Tool definition failed validation:\n%s",
                          exc,
                          exc_info=args.debug)
            return 1
        except (RuntimeError, WorkflowException) as exc:
            _logger.error(u"Tool definition failed initialization:\n%s",
                          exc,
                          exc_info=args.debug)
            return 1
        except Exception as exc:
            _logger.error(
                u"I'm sorry, I couldn't load this CWL file%s.\nThe error was: %s",
                try_again_msg,
                exc if not args.debug else "",
                exc_info=args.debug)
            return 1

        if isinstance(tool, int):
            return tool
        # If on MacOS platform, TMPDIR must be set to be under one of the
        # shared volumes in Docker for Mac
        # More info: https://dockstore.org/docs/faq
        if sys.platform == "darwin":
            default_mac_path = "/private/tmp/docker_tmp"
            if runtimeContext.tmp_outdir_prefix == DEFAULT_TMP_PREFIX:
                runtimeContext.tmp_outdir_prefix = default_mac_path

        for dirprefix in ("tmpdir_prefix", "tmp_outdir_prefix", "cachedir"):
            if getattr(runtimeContext, dirprefix) and getattr(
                    runtimeContext, dirprefix) != DEFAULT_TMP_PREFIX:
                sl = "/" if getattr(runtimeContext, dirprefix).endswith("/") or dirprefix == "cachedir" \
                        else ""
                setattr(
                    runtimeContext, dirprefix,
                    os.path.abspath(getattr(runtimeContext, dirprefix)) + sl)
                if not os.path.exists(
                        os.path.dirname(getattr(runtimeContext, dirprefix))):
                    try:
                        os.makedirs(
                            os.path.dirname(getattr(runtimeContext,
                                                    dirprefix)))
                    except Exception as e:
                        _logger.error("Failed to create directory: %s", e)
                        return 1

        if args.cachedir:
            if args.move_outputs == "move":
                runtimeContext.move_outputs = "copy"
            runtimeContext.tmp_outdir_prefix = args.cachedir

        runtimeContext.secret_store = getdefault(runtimeContext.secret_store,
                                                 SecretStore())
        try:
            initialized_job_order_object, input_for_prov = init_job_order(
                job_order_object,
                args,
                tool,
                jobloader,
                stdout,
                print_input_deps=args.print_input_deps,
                provArgs=runtimeContext.research_obj,
                relative_deps=args.relative_deps,
                input_basedir=input_basedir,
                secret_store=runtimeContext.secret_store)
        except SystemExit as err:
            return err.code

        if not executor:
            if args.parallel:
                executor = MultithreadedJobExecutor()
                runtimeContext.select_resources = executor.select_resources
            else:
                executor = SingleJobExecutor()
        assert executor is not None

        try:
            runtimeContext.basedir = input_basedir
            del args.workflow
            del args.job_order

            conf_file = getattr(args,
                                "beta_dependency_resolvers_configuration",
                                None)  # Text
            use_conda_dependencies = getattr(args, "beta_conda_dependencies",
                                             None)  # Text

            if conf_file or use_conda_dependencies:
                runtimeContext.job_script_provider = DependenciesConfiguration(
                    args)

            runtimeContext.find_default_container = functools.partial(
                find_default_container,
                default_container=runtimeContext.default_container,
                use_biocontainers=args.beta_use_biocontainers)
            runtimeContext.make_fs_access = getdefault(
                runtimeContext.make_fs_access, StdFsAccess)
            (out, status) = executor(tool,
                                     initialized_job_order_object,
                                     runtimeContext,
                                     logger=_logger)

            if out is not None:

                def loc_to_path(obj):
                    for field in ("path", "nameext", "nameroot", "dirname"):
                        if field in obj:
                            del obj[field]
                    if obj["location"].startswith("file://"):
                        obj["path"] = uri_file_path(obj["location"])

                visit_class(out, ("File", "Directory"), loc_to_path)

                # Unsetting the Generation from final output object
                visit_class(out, ("File", ),
                            MutationManager().unset_generation)

                if isinstance(out, string_types):
                    stdout.write(out)
                else:
                    stdout.write(
                        json_dumps(
                            out,
                            indent=4,  # type: ignore
                            ensure_ascii=False))
                stdout.write("\n")
                if hasattr(stdout, "flush"):
                    stdout.flush()  # type: ignore

            if status != "success":
                _logger.warning(u"Final process status is %s", status)
                return 1
            _logger.info(u"Final process status is %s", status)
            return 0

        except (validate.ValidationException) as exc:
            _logger.error(u"Input object failed validation:\n%s",
                          exc,
                          exc_info=args.debug)
            return 1
        except UnsupportedRequirement as exc:
            _logger.error(u"Workflow or tool uses unsupported feature:\n%s",
                          exc,
                          exc_info=args.debug)
            return 33
        except WorkflowException as exc:
            _logger.error(u"Workflow error%s:\n%s",
                          try_again_msg,
                          strip_dup_lineno(six.text_type(exc)),
                          exc_info=args.debug)
            return 1
        except Exception as exc:
            _logger.error(u"Unhandled error%s:\n  %s",
                          try_again_msg,
                          exc,
                          exc_info=args.debug)
            return 1

    finally:
        if args and runtimeContext and runtimeContext.research_obj \
                and args.rm_tmpdir and workflowobj:
            #adding all related cwl files to RO
            prov_dependencies = printdeps(workflowobj, document_loader, stdout,
                                          args.relative_deps, uri,
                                          runtimeContext.research_obj)
            prov_dep = prov_dependencies[1]
            assert prov_dep
            runtimeContext.research_obj.generate_snapshot(prov_dep)
            #for input file dependencies
            if input_for_prov:
                runtimeContext.research_obj.generate_snapshot(input_for_prov)
            #NOTE: keep these commented out lines to evaluate tests later
            #if job_order_object:
            #runtimeContext.research_obj.generate_snapshot(job_order_object)

            runtimeContext.research_obj.close(args.provenance)

        _logger.removeHandler(stderr_handler)
        _logger.addHandler(defaultStreamHandler)
Ejemplo n.º 8
0
def main(
    argsl: Optional[List[str]] = None,
    args: Optional[argparse.Namespace] = None,
    job_order_object: Optional[CWLObjectType] = None,
    stdin: IO[Any] = sys.stdin,
    stdout: Optional[Union[TextIO, StreamWriter]] = None,
    stderr: IO[Any] = sys.stderr,
    versionfunc: Callable[[], str] = versionstring,
    logger_handler: Optional[logging.Handler] = None,
    custom_schema_callback: Optional[Callable[[], None]] = None,
    executor: Optional[JobExecutor] = None,
    loadingContext: Optional[LoadingContext] = None,
    runtimeContext: Optional[RuntimeContext] = None,
    input_required: bool = True,
) -> int:
    if not stdout:  # force UTF-8 even if the console is configured differently
        if hasattr(sys.stdout,
                   "encoding") and sys.stdout.encoding.upper() not in (
                       "UTF-8",
                       "UTF8",
                   ):
            if hasattr(sys.stdout, "detach"):
                stdout = io.TextIOWrapper(sys.stdout.buffer, encoding="utf-8")
            else:
                stdout = getwriter("utf-8")(sys.stdout)  # type: ignore
        else:
            stdout = sys.stdout

    _logger.removeHandler(defaultStreamHandler)
    stderr_handler = logger_handler
    if stderr_handler is not None:
        _logger.addHandler(stderr_handler)
    else:
        coloredlogs.install(logger=_logger, stream=stderr)
        stderr_handler = _logger.handlers[-1]
    workflowobj = None
    prov_log_handler = None  # type: Optional[logging.StreamHandler]
    try:
        if args is None:
            if argsl is None:
                argsl = sys.argv[1:]
            addl = []  # type: List[str]
            if "CWLTOOL_OPTIONS" in os.environ:
                addl = os.environ["CWLTOOL_OPTIONS"].split(" ")
            parser = arg_parser()
            argcomplete.autocomplete(parser)
            args = parser.parse_args(addl + argsl)
            if args.record_container_id:
                if not args.cidfile_dir:
                    args.cidfile_dir = os.getcwd()
                del args.record_container_id

        if runtimeContext is None:
            runtimeContext = RuntimeContext(vars(args))
        else:
            runtimeContext = runtimeContext.copy()

        # If on Windows platform, a default Docker Container is used if not
        # explicitely provided by user
        if onWindows() and not runtimeContext.default_container:
            # This docker image is a minimal alpine image with bash installed
            # (size 6 mb). source: https://github.com/frol/docker-alpine-bash
            runtimeContext.default_container = windows_default_container_id

        # If caller parsed its own arguments, it may not include every
        # cwltool option, so fill in defaults to avoid crashing when
        # dereferencing them in args.
        for key, val in get_default_args().items():
            if not hasattr(args, key):
                setattr(args, key, val)

        configure_logging(args, stderr_handler, runtimeContext)

        if args.version:
            print(versionfunc())
            return 0
        _logger.info(versionfunc())

        if args.print_supported_versions:
            print("\n".join(supported_cwl_versions(args.enable_dev)))
            return 0

        if not args.workflow:
            if os.path.isfile("CWLFile"):
                args.workflow = "CWLFile"
            else:
                _logger.error(
                    "CWL document required, no input file was provided")
                parser.print_help()
                return 1
        if args.relax_path_checks:
            command_line_tool.ACCEPTLIST_RE = command_line_tool.ACCEPTLIST_EN_RELAXED_RE

        if args.ga4gh_tool_registries:
            ga4gh_tool_registries[:] = args.ga4gh_tool_registries
        if not args.enable_ga4gh_tool_registry:
            del ga4gh_tool_registries[:]

        if args.mpi_config_file is not None:
            runtimeContext.mpi_config = MpiConfig.load(args.mpi_config_file)

        setup_schema(args, custom_schema_callback)

        if args.provenance:
            if argsl is None:
                raise Exception("argsl cannot be None")
            if setup_provenance(args, argsl, runtimeContext) is not None:
                return 1

        loadingContext = setup_loadingContext(loadingContext, runtimeContext,
                                              args)

        uri, tool_file_uri = resolve_tool_uri(
            args.workflow,
            resolver=loadingContext.resolver,
            fetcher_constructor=loadingContext.fetcher_constructor,
        )

        try_again_msg = ("" if args.debug else
                         ", try again with --debug for more information")

        try:
            job_order_object, input_basedir, jobloader = load_job_order(
                args,
                stdin,
                loadingContext.fetcher_constructor,
                loadingContext.overrides_list,
                tool_file_uri,
            )

            if args.overrides:
                loadingContext.overrides_list.extend(
                    load_overrides(file_uri(os.path.abspath(args.overrides)),
                                   tool_file_uri))

            loadingContext, workflowobj, uri = fetch_document(
                uri, loadingContext)

            if args.print_deps and loadingContext.loader:
                printdeps(workflowobj, loadingContext.loader, stdout,
                          args.relative_deps, uri)
                return 0

            loadingContext, uri = resolve_and_validate_document(
                loadingContext,
                workflowobj,
                uri,
                preprocess_only=(args.print_pre or args.pack),
                skip_schemas=args.skip_schemas,
            )

            if loadingContext.loader is None:
                raise Exception("Impossible code path.")
            processobj, metadata = loadingContext.loader.resolve_ref(uri)
            processobj = cast(CommentedMap, processobj)
            if args.pack:
                stdout.write(print_pack(loadingContext, uri))
                return 0

            if args.provenance and runtimeContext.research_obj:
                # Can't really be combined with args.pack at same time
                runtimeContext.research_obj.packed_workflow(
                    print_pack(loadingContext, uri))

            if args.print_pre:
                stdout.write(
                    json_dumps(processobj,
                               indent=4,
                               sort_keys=True,
                               separators=(",", ": ")))
                return 0

            tool = make_tool(uri, loadingContext)
            if args.make_template:
                make_template(tool)
                return 0

            if args.validate:
                print("{} is valid CWL.".format(args.workflow))
                return 0

            if args.print_rdf:
                stdout.write(
                    printrdf(tool, loadingContext.loader.ctx,
                             args.rdf_serializer))
                return 0

            if args.print_dot:
                printdot(tool, loadingContext.loader.ctx, stdout)
                return 0

            if args.print_targets:
                for f in ("outputs", "steps", "inputs"):
                    if tool.tool[f]:
                        _logger.info("%s%s targets:", f[0].upper(), f[1:-1])
                        stdout.write("  " + "\n  ".join(
                            [shortname(t["id"]) for t in tool.tool[f]]) + "\n")
                return 0

            if args.target:
                ctool = choose_target(args, tool, loadingContext)
                if ctool is None:
                    return 1
                else:
                    tool = ctool

            if args.print_subgraph:
                if "name" in tool.tool:
                    del tool.tool["name"]
                stdout.write(
                    json_dumps(tool.tool,
                               indent=4,
                               sort_keys=True,
                               separators=(",", ": ")))
                return 0

        except (ValidationException) as exc:
            _logger.error("Tool definition failed validation:\n%s",
                          str(exc),
                          exc_info=args.debug)
            return 1
        except (RuntimeError, WorkflowException) as exc:
            _logger.error(
                "Tool definition failed initialization:\n%s",
                str(exc),
                exc_info=args.debug,
            )
            return 1
        except Exception as exc:
            _logger.error(
                "I'm sorry, I couldn't load this CWL file%s.\nThe error was: %s",
                try_again_msg,
                str(exc) if not args.debug else "",
                exc_info=args.debug,
            )
            return 1

        if isinstance(tool, int):
            return tool

        # If on MacOS platform, TMPDIR must be set to be under one of the
        # shared volumes in Docker for Mac
        # More info: https://dockstore.org/docs/faq
        if sys.platform == "darwin":
            default_mac_path = "/private/tmp/docker_tmp"
            if runtimeContext.tmp_outdir_prefix == DEFAULT_TMP_PREFIX:
                runtimeContext.tmp_outdir_prefix = default_mac_path
            if runtimeContext.tmpdir_prefix == DEFAULT_TMP_PREFIX:
                runtimeContext.tmpdir_prefix = default_mac_path

        if check_working_directories(runtimeContext) is not None:
            return 1

        if args.cachedir:
            if args.move_outputs == "move":
                runtimeContext.move_outputs = "copy"
            runtimeContext.tmp_outdir_prefix = args.cachedir

        runtimeContext.secret_store = getdefault(runtimeContext.secret_store,
                                                 SecretStore())
        runtimeContext.make_fs_access = getdefault(
            runtimeContext.make_fs_access, StdFsAccess)

        if not executor:
            if args.parallel:
                temp_executor = MultithreadedJobExecutor()
                runtimeContext.select_resources = temp_executor.select_resources
                real_executor = temp_executor  # type: JobExecutor
            else:
                real_executor = SingleJobExecutor()
        else:
            real_executor = executor

        try:
            runtimeContext.basedir = input_basedir

            if isinstance(tool, ProcessGenerator):
                tfjob_order = {}  # type: CWLObjectType
                if loadingContext.jobdefaults:
                    tfjob_order.update(loadingContext.jobdefaults)
                if job_order_object:
                    tfjob_order.update(job_order_object)
                tfout, tfstatus = real_executor(tool.embedded_tool,
                                                tfjob_order, runtimeContext)
                if not tfout or tfstatus != "success":
                    raise WorkflowException(
                        "ProcessGenerator failed to generate workflow")
                tool, job_order_object = tool.result(tfjob_order, tfout,
                                                     runtimeContext)
                if not job_order_object:
                    job_order_object = None

            try:
                initialized_job_order_object = init_job_order(
                    job_order_object,
                    args,
                    tool,
                    jobloader,
                    stdout,
                    print_input_deps=args.print_input_deps,
                    relative_deps=args.relative_deps,
                    make_fs_access=runtimeContext.make_fs_access,
                    input_basedir=input_basedir,
                    secret_store=runtimeContext.secret_store,
                    input_required=input_required,
                )
            except SystemExit as err:
                return err.code

            del args.workflow
            del args.job_order

            conf_file = getattr(args,
                                "beta_dependency_resolvers_configuration",
                                None)  # str
            use_conda_dependencies = getattr(args, "beta_conda_dependencies",
                                             None)  # str

            if conf_file or use_conda_dependencies:
                runtimeContext.job_script_provider = DependenciesConfiguration(
                    args)
            else:
                runtimeContext.find_default_container = functools.partial(
                    find_default_container,
                    default_container=runtimeContext.default_container,
                    use_biocontainers=args.beta_use_biocontainers,
                )

            (out, status) = real_executor(tool,
                                          initialized_job_order_object,
                                          runtimeContext,
                                          logger=_logger)

            if out is not None:
                if runtimeContext.research_obj is not None:
                    runtimeContext.research_obj.create_job(out, True)

                    def remove_at_id(doc: CWLObjectType) -> None:
                        for key in list(doc.keys()):
                            if key == "@id":
                                del doc[key]
                            else:
                                value = doc[key]
                                if isinstance(value, MutableMapping):
                                    remove_at_id(value)
                                elif isinstance(value, MutableSequence):
                                    for entry in value:
                                        if isinstance(entry, MutableMapping):
                                            remove_at_id(entry)

                    remove_at_id(out)
                    visit_class(
                        out,
                        ("File", ),
                        functools.partial(add_sizes,
                                          runtimeContext.make_fs_access("")),
                    )

                def loc_to_path(obj: CWLObjectType) -> None:
                    for field in ("path", "nameext", "nameroot", "dirname"):
                        if field in obj:
                            del obj[field]
                    if cast(str, obj["location"]).startswith("file://"):
                        obj["path"] = uri_file_path(cast(str, obj["location"]))

                visit_class(out, ("File", "Directory"), loc_to_path)

                # Unsetting the Generation from final output object
                visit_class(out, ("File", ),
                            MutationManager().unset_generation)

                if isinstance(out, str):
                    stdout.write(out)
                else:
                    stdout.write(json_dumps(out, indent=4, ensure_ascii=False))
                stdout.write("\n")
                if hasattr(stdout, "flush"):
                    stdout.flush()

            if status != "success":
                _logger.warning("Final process status is %s", status)
                return 1
            _logger.info("Final process status is %s", status)
            return 0

        except (ValidationException) as exc:
            _logger.error("Input object failed validation:\n%s",
                          str(exc),
                          exc_info=args.debug)
            return 1
        except UnsupportedRequirement as exc:
            _logger.error(
                "Workflow or tool uses unsupported feature:\n%s",
                str(exc),
                exc_info=args.debug,
            )
            return 33
        except WorkflowException as exc:
            _logger.error(
                "Workflow error%s:\n%s",
                try_again_msg,
                strip_dup_lineno(str(exc)),
                exc_info=args.debug,
            )
            return 1
        except Exception as exc:  # pylint: disable=broad-except
            _logger.error(
                "Unhandled error%s:\n  %s",
                try_again_msg,
                str(exc),
                exc_info=args.debug,
            )
            return 1

    finally:
        if (args and runtimeContext and runtimeContext.research_obj
                and workflowobj and loadingContext):
            research_obj = runtimeContext.research_obj
            if loadingContext.loader is not None:
                research_obj.generate_snapshot(
                    prov_deps(workflowobj, loadingContext.loader, uri))
            else:
                _logger.warning("Unable to generate provenance snapshot "
                                " due to missing loadingContext.loader.")
            if prov_log_handler is not None:
                # Stop logging so we won't half-log adding ourself to RO
                _logger.debug("[provenance] Closing provenance log file %s",
                              prov_log_handler)
                _logger.removeHandler(prov_log_handler)
                # Ensure last log lines are written out
                prov_log_handler.flush()
                # Underlying WritableBagFile will add the tagfile to the manifest
                prov_log_handler.stream.close()
                prov_log_handler.close()
            research_obj.close(args.provenance)

        _logger.removeHandler(stderr_handler)
        _logger.addHandler(defaultStreamHandler)
Ejemplo n.º 9
0
def main(
        argsl=None,  # type: List[str]
        args=None,  # type: argparse.Namespace
        executor=single_job_executor,  # type: Callable[..., Tuple[Dict[Text, Any], Text]]
        makeTool=workflow.defaultMakeTool,  # type: Callable[..., Process]
        selectResources=None,  # type: Callable[[Dict[Text, int]], Dict[Text, int]]
        stdin=sys.stdin,  # type: IO[Any]
        stdout=sys.stdout,  # type: IO[Any]
        stderr=sys.stderr,  # type: IO[Any]
        versionfunc=versionstring,  # type: Callable[[], Text]
        job_order_object=None,  # type: MutableMapping[Text, Any]
        make_fs_access=StdFsAccess,  # type: Callable[[Text], StdFsAccess]
        fetcher_constructor=None,  # type: Callable[[Dict[Text, Text], requests.sessions.Session], Fetcher]
        resolver=tool_resolver,
        logger_handler=None,
        custom_schema_callback=None  # type: Callable[[], None]
):
    # type: (...) -> int

    _logger.removeHandler(defaultStreamHandler)
    if logger_handler:
        stderr_handler = logger_handler
    else:
        stderr_handler = logging.StreamHandler(stderr)
    _logger.addHandler(stderr_handler)
    try:
        if args is None:
            if argsl is None:
                argsl = sys.argv[1:]
            args = arg_parser().parse_args(argsl)

        # If On windows platform, A default Docker Container is Used if not explicitely provided by user
        if onWindows() and not args.default_container:
            # This docker image is a minimal alpine image with bash installed(size 6 mb). source: https://github.com/frol/docker-alpine-bash
            args.default_container = windows_default_container_id

        # If caller provided custom arguments, it may be not every expected
        # option is set, so fill in no-op defaults to avoid crashing when
        # dereferencing them in args.
        for k, v in six.iteritems({
                'print_deps': False,
                'print_pre': False,
                'print_rdf': False,
                'print_dot': False,
                'relative_deps': False,
                'tmp_outdir_prefix': 'tmp',
                'tmpdir_prefix': 'tmp',
                'print_input_deps': False,
                'cachedir': None,
                'quiet': False,
                'debug': False,
                'js_console': False,
                'version': False,
                'enable_dev': False,
                'enable_ext': False,
                'strict': True,
                'skip_schemas': False,
                'rdf_serializer': None,
                'basedir': None,
                'tool_help': False,
                'workflow': None,
                'job_order': None,
                'pack': False,
                'on_error': 'continue',
                'relax_path_checks': False,
                'validate': False,
                'enable_ga4gh_tool_registry': False,
                'ga4gh_tool_registries': [],
                'find_default_container': None,
                'make_template': False,
                'overrides': None
        }):
            if not hasattr(args, k):
                setattr(args, k, v)

        if args.quiet:
            _logger.setLevel(logging.WARN)
        if args.debug:
            _logger.setLevel(logging.DEBUG)

        if args.version:
            print(versionfunc())
            return 0
        else:
            _logger.info(versionfunc())

        if args.print_supported_versions:
            print("\n".join(supportedCWLversions(args.enable_dev)))
            return 0

        if not args.workflow:
            if os.path.isfile("CWLFile"):
                setattr(args, "workflow", "CWLFile")
            else:
                _logger.error("")
                _logger.error(
                    "CWL document required, no input file was provided")
                arg_parser().print_help()
                return 1
        if args.relax_path_checks:
            draft2tool.ACCEPTLIST_RE = draft2tool.ACCEPTLIST_EN_RELAXED_RE

        if args.ga4gh_tool_registries:
            ga4gh_tool_registries[:] = args.ga4gh_tool_registries
        if not args.enable_ga4gh_tool_registry:
            del ga4gh_tool_registries[:]

        if custom_schema_callback:
            custom_schema_callback()
        elif args.enable_ext:
            res = pkg_resources.resource_stream(__name__, 'extensions.yml')
            use_custom_schema("v1.0", "http://commonwl.org/cwltool",
                              res.read())
            res.close()
        else:
            use_standard_schema("v1.0")

        uri, tool_file_uri = resolve_tool_uri(
            args.workflow,
            resolver=resolver,
            fetcher_constructor=fetcher_constructor)

        overrides = []  # type: List[Dict[Text, Any]]

        try:
            job_order_object, input_basedir, jobloader = load_job_order(
                args, stdin, fetcher_constructor, overrides, tool_file_uri)
        except Exception as e:
            _logger.error(Text(e), exc_info=args.debug)

        if args.overrides:
            overrides.extend(
                load_overrides(file_uri(os.path.abspath(args.overrides)),
                               tool_file_uri))

        try:
            document_loader, workflowobj, uri = fetch_document(
                uri,
                resolver=resolver,
                fetcher_constructor=fetcher_constructor)

            if args.print_deps:
                printdeps(workflowobj, document_loader, stdout,
                          args.relative_deps, uri)
                return 0

            document_loader, avsc_names, processobj, metadata, uri \
                = validate_document(document_loader, workflowobj, uri,
                                    enable_dev=args.enable_dev, strict=args.strict,
                                    preprocess_only=args.print_pre or args.pack,
                                    fetcher_constructor=fetcher_constructor,
                                    skip_schemas=args.skip_schemas,
                                    overrides=overrides)

            if args.print_pre:
                stdout.write(json.dumps(processobj, indent=4))
                return 0

            overrides.extend(metadata.get("cwltool:overrides", []))

            conf_file = getattr(args,
                                "beta_dependency_resolvers_configuration",
                                None)  # Text
            use_conda_dependencies = getattr(args, "beta_conda_dependencies",
                                             None)  # Text

            make_tool_kwds = vars(args)

            job_script_provider = None  # type: Callable[[Any, List[str]], Text]
            if conf_file or use_conda_dependencies:
                dependencies_configuration = DependenciesConfiguration(
                    args)  # type: DependenciesConfiguration
                make_tool_kwds[
                    "job_script_provider"] = dependencies_configuration

            make_tool_kwds["find_default_container"] = functools.partial(
                find_default_container, args)
            make_tool_kwds["overrides"] = overrides

            tool = make_tool(document_loader, avsc_names, metadata, uri,
                             makeTool, make_tool_kwds)
            if args.make_template:
                yaml.safe_dump(generate_input_template(tool),
                               sys.stdout,
                               default_flow_style=False,
                               indent=4,
                               block_seq_indent=2)
                return 0

            if args.validate:
                _logger.info("Tool definition is valid")
                return 0

            if args.pack:
                stdout.write(
                    print_pack(document_loader, processobj, uri, metadata))
                return 0

            if args.print_rdf:
                stdout.write(
                    printrdf(tool, document_loader.ctx, args.rdf_serializer))
                return 0

            if args.print_dot:
                printdot(tool, document_loader.ctx, stdout)
                return 0

        except (validate.ValidationException) as exc:
            _logger.error(u"Tool definition failed validation:\n%s",
                          exc,
                          exc_info=args.debug)
            return 1
        except (RuntimeError, WorkflowException) as exc:
            _logger.error(u"Tool definition failed initialization:\n%s",
                          exc,
                          exc_info=args.debug)
            return 1
        except Exception as exc:
            _logger.error(
                u"I'm sorry, I couldn't load this CWL file%s",
                ", try again with --debug for more information.\nThe error was: "
                "%s" % exc if not args.debug else ".  The error was:",
                exc_info=args.debug)
            return 1

        if isinstance(tool, int):
            return tool

        for dirprefix in ("tmpdir_prefix", "tmp_outdir_prefix", "cachedir"):
            if getattr(args, dirprefix) and getattr(args, dirprefix) != 'tmp':
                sl = "/" if getattr(args, dirprefix).endswith(
                    "/") or dirprefix == "cachedir" else ""
                setattr(args, dirprefix,
                        os.path.abspath(getattr(args, dirprefix)) + sl)
                if not os.path.exists(os.path.dirname(getattr(args,
                                                              dirprefix))):
                    try:
                        os.makedirs(os.path.dirname(getattr(args, dirprefix)))
                    except Exception as e:
                        _logger.error("Failed to create directory: %s", e)
                        return 1

        if args.cachedir:
            if args.move_outputs == "move":
                setattr(args, 'move_outputs', "copy")
            setattr(args, "tmp_outdir_prefix", args.cachedir)

        try:
            job_order_object = init_job_order(
                job_order_object,
                args,
                tool,
                print_input_deps=args.print_input_deps,
                relative_deps=args.relative_deps,
                stdout=stdout,
                make_fs_access=make_fs_access,
                loader=jobloader,
                input_basedir=input_basedir)
        except SystemExit as e:
            return e.code

        if isinstance(job_order_object, int):
            return job_order_object

        try:
            setattr(args, 'basedir', input_basedir)
            del args.workflow
            del args.job_order
            (out, status) = executor(tool,
                                     job_order_object,
                                     makeTool=makeTool,
                                     select_resources=selectResources,
                                     make_fs_access=make_fs_access,
                                     **vars(args))

            # This is the workflow output, it needs to be written
            if out is not None:

                def locToPath(p):
                    for field in ("path", "nameext", "nameroot", "dirname"):
                        if field in p:
                            del p[field]
                    if p["location"].startswith("file://"):
                        p["path"] = uri_file_path(p["location"])

                visit_class(out, ("File", "Directory"), locToPath)

                # Unsetting the Generation fron final output object
                visit_class(out, ("File", ),
                            MutationManager().unset_generation)

                if isinstance(out, six.string_types):
                    stdout.write(out)
                else:
                    stdout.write(json.dumps(out, indent=4))
                stdout.write("\n")
                stdout.flush()

            if status != "success":
                _logger.warning(u"Final process status is %s", status)
                return 1
            else:
                _logger.info(u"Final process status is %s", status)
                return 0

        except (validate.ValidationException) as exc:
            _logger.error(u"Input object failed validation:\n%s",
                          exc,
                          exc_info=args.debug)
            return 1
        except UnsupportedRequirement as exc:
            _logger.error(u"Workflow or tool uses unsupported feature:\n%s",
                          exc,
                          exc_info=args.debug)
            return 33
        except WorkflowException as exc:
            _logger.error(
                u"Workflow error, try again with --debug for more "
                "information:\n%s",
                strip_dup_lineno(six.text_type(exc)),
                exc_info=args.debug)
            return 1
        except Exception as exc:
            _logger.error(
                u"Unhandled error, try again with --debug for more information:\n"
                "  %s",
                exc,
                exc_info=args.debug)
            return 1

    finally:
        _logger.removeHandler(stderr_handler)
        _logger.addHandler(defaultStreamHandler)
Ejemplo n.º 10
0
    def _init_job(self, joborder, runtime_context):
        # type: (Mapping[str, str], RuntimeContext) -> Builder

        if self.metadata.get("cwlVersion") != INTERNAL_VERSION:
            raise WorkflowException(
                "Process object loaded with version '%s', must update to '%s' in order to execute."
                % (self.metadata.get("cwlVersion"), INTERNAL_VERSION))

        job = cast(Dict[str, expression.JSON], copy.deepcopy(joborder))

        make_fs_access = getdefault(runtime_context.make_fs_access,
                                    StdFsAccess)
        fs_access = make_fs_access(runtime_context.basedir)

        load_listing_req, _ = self.get_requirement("LoadListingRequirement")

        if load_listing_req is not None:
            load_listing = load_listing_req.get("loadListing")
        else:
            load_listing = "no_listing"

        # Validate job order
        try:
            fill_in_defaults(self.tool["inputs"], job, fs_access)

            normalizeFilesDirs(job)
            schema = self.names.get_name("input_record_schema", "")
            if schema is None:
                raise WorkflowException("Missing input record schema: "
                                        "{}".format(self.names))
            validate.validate_ex(schema,
                                 job,
                                 strict=False,
                                 logger=_logger_validation_warnings)

            if load_listing and load_listing != "no_listing":
                get_listing(fs_access,
                            job,
                            recursive=(load_listing == "deep_listing"))

            visit_class(job, ("File", ),
                        functools.partial(add_sizes, fs_access))

            if load_listing == "deep_listing":
                for i, inparm in enumerate(self.tool["inputs"]):
                    k = shortname(inparm["id"])
                    if k not in job:
                        continue
                    v = job[k]
                    dircount = [0]

                    def inc(d):  # type: (List[int]) -> None
                        d[0] += 1

                    visit_class(v, ("Directory", ), lambda x: inc(dircount))
                    if dircount[0] == 0:
                        continue
                    filecount = [0]
                    visit_class(v, ("File", ), lambda x: inc(filecount))
                    if filecount[0] > FILE_COUNT_WARNING:
                        # Long lines in this message are okay, will be reflowed based on terminal columns.
                        _logger.warning(
                            strip_dup_lineno(
                                SourceLine(self.tool["inputs"], i, str).
                                makeError(
                                    """Recursive directory listing has resulted in a large number of File objects (%s) passed to the input parameter '%s'.  This may negatively affect workflow performance and memory use.

If this is a problem, use the hint 'cwltool:LoadListingRequirement' with "shallow_listing" or "no_listing" to change the directory listing behavior:

$namespaces:
  cwltool: "http://commonwl.org/cwltool#"
hints:
  cwltool:LoadListingRequirement:
    loadListing: shallow_listing

""" % (filecount[0], k))))

        except (validate.ValidationException, WorkflowException) as err:
            raise WorkflowException("Invalid job input record:\n" +
                                    str(err)) from err

        files = []  # type: List[Dict[str, str]]
        bindings = CommentedSeq()
        tmpdir = ""
        stagedir = ""

        docker_req, _ = self.get_requirement("DockerRequirement")
        default_docker = None

        if docker_req is None and runtime_context.default_container:
            default_docker = runtime_context.default_container

        if (docker_req or default_docker) and runtime_context.use_container:
            if docker_req is not None:
                # Check if docker output directory is absolute
                if docker_req.get("dockerOutputDirectory") and docker_req.get(
                        "dockerOutputDirectory").startswith("/"):
                    outdir = docker_req.get("dockerOutputDirectory")
                else:
                    outdir = (docker_req.get("dockerOutputDirectory")
                              or runtime_context.docker_outdir
                              or random_outdir())
            elif default_docker is not None:
                outdir = runtime_context.docker_outdir or random_outdir()
            tmpdir = runtime_context.docker_tmpdir or "/tmp"  # nosec
            stagedir = runtime_context.docker_stagedir or "/var/lib/cwl"
        else:
            outdir = fs_access.realpath(
                runtime_context.outdir or tempfile.mkdtemp(prefix=getdefault(
                    runtime_context.tmp_outdir_prefix, DEFAULT_TMP_PREFIX)))
            if self.tool["class"] != "Workflow":
                tmpdir = fs_access.realpath(runtime_context.tmpdir
                                            or tempfile.mkdtemp())
                stagedir = fs_access.realpath(runtime_context.stagedir
                                              or tempfile.mkdtemp())

        builder = Builder(
            job,
            files,
            bindings,
            self.schemaDefs,
            self.names,
            self.requirements,
            self.hints,
            {},
            runtime_context.mutation_manager,
            self.formatgraph,
            make_fs_access,
            fs_access,
            runtime_context.job_script_provider,
            runtime_context.eval_timeout,
            runtime_context.debug,
            runtime_context.js_console,
            runtime_context.force_docker_pull,
            load_listing,
            outdir,
            tmpdir,
            stagedir,
        )

        bindings.extend(
            builder.bind_input(
                self.inputs_record_schema,
                job,
                discover_secondaryFiles=getdefault(runtime_context.toplevel,
                                                   False),
            ))

        if self.tool.get("baseCommand"):
            for index, command in enumerate(aslist(self.tool["baseCommand"])):
                bindings.append({
                    "position": [-1000000, index],
                    "datum": command
                })

        if self.tool.get("arguments"):
            for i, arg in enumerate(self.tool["arguments"]):
                lc = self.tool["arguments"].lc.data[i]
                filename = self.tool["arguments"].lc.filename
                bindings.lc.add_kv_line_col(len(bindings), lc)
                if isinstance(arg, MutableMapping):
                    arg = copy.deepcopy(arg)
                    if arg.get("position"):
                        position = arg.get("position")
                        if isinstance(position, str):  # no need to test the
                            # CWLVersion as the v1.0
                            # schema only allows ints
                            position = builder.do_eval(position)
                            if position is None:
                                position = 0
                        arg["position"] = [position, i]
                    else:
                        arg["position"] = [0, i]
                    bindings.append(arg)
                elif ("$(" in arg) or ("${" in arg):
                    cm = CommentedMap((("position", [0,
                                                     i]), ("valueFrom", arg)))
                    cm.lc.add_kv_line_col("valueFrom", lc)
                    cm.lc.filename = filename
                    bindings.append(cm)
                else:
                    cm = CommentedMap((("position", [0, i]), ("datum", arg)))
                    cm.lc.add_kv_line_col("datum", lc)
                    cm.lc.filename = filename
                    bindings.append(cm)

        # use python2 like sorting of heterogeneous lists
        # (containing str and int types),
        key = functools.cmp_to_key(cmp_like_py2)

        # This awkward construction replaces the contents of
        # "bindings" in place (because Builder expects it to be
        # mutated in place, sigh, I'm sorry) with its contents sorted,
        # supporting different versions of Python and ruamel.yaml with
        # different behaviors/bugs in CommentedSeq.
        bindings_copy = copy.deepcopy(bindings)
        del bindings[:]
        bindings.extend(sorted(bindings_copy, key=key))

        if self.tool["class"] != "Workflow":
            builder.resources = self.evalResources(builder, runtime_context)
        return builder
Ejemplo n.º 11
0
def main(argsl=None,                   # type: List[str]
         args=None,                    # type: argparse.Namespace
         job_order_object=None,        # type: MutableMapping[Text, Any]
         stdin=sys.stdin,              # type: IO[Any]
         stdout=None,                  # type: Union[TextIO, codecs.StreamWriter]
         stderr=sys.stderr,            # type: IO[Any]
         versionfunc=versionstring,    # type: Callable[[], Text]
         logger_handler=None,          #
         custom_schema_callback=None,  # type: Callable[[], None]
         executor=None,                # type: Callable[..., Tuple[Dict[Text, Any], Text]]
         loadingContext=None,          # type: LoadingContext
         runtimeContext=None           # type: RuntimeContext
        ):  # type: (...) -> int
    if not stdout:  # force UTF-8 even if the console is configured differently
        if (hasattr(sys.stdout, "encoding")  # type: ignore
                and sys.stdout.encoding != 'UTF-8'):  # type: ignore
            if six.PY3 and hasattr(sys.stdout, "detach"):
                stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8')
            else:
                stdout = codecs.getwriter('utf-8')(sys.stdout)  # type: ignore
        else:
            stdout = cast(TextIO, sys.stdout)  # type: ignore

    _logger.removeHandler(defaultStreamHandler)
    if logger_handler:
        stderr_handler = logger_handler
    else:
        stderr_handler = logging.StreamHandler(stderr)
    _logger.addHandler(stderr_handler)
    # pre-declared for finally block
    workflowobj = None
    try:
        if args is None:
            if argsl is None:
                argsl = sys.argv[1:]
            args = arg_parser().parse_args(argsl)

        if runtimeContext is None:
            runtimeContext = RuntimeContext(vars(args))
        else:
            runtimeContext = runtimeContext.copy()

        # If on Windows platform, a default Docker Container is used if not
        # explicitely provided by user
        if onWindows() and not runtimeContext.default_container:
            # This docker image is a minimal alpine image with bash installed
            # (size 6 mb). source: https://github.com/frol/docker-alpine-bash
            runtimeContext.default_container = windows_default_container_id

        # If caller parsed its own arguments, it may not include every
        # cwltool option, so fill in defaults to avoid crashing when
        # dereferencing them in args.
        for key, val in six.iteritems(get_default_args()):
            if not hasattr(args, key):
                setattr(args, key, val)

        rdflib_logger = logging.getLogger("rdflib.term")
        rdflib_logger.addHandler(stderr_handler)
        rdflib_logger.setLevel(logging.ERROR)
        if args.quiet:
            _logger.setLevel(logging.WARN)
        if runtimeContext.debug:
            _logger.setLevel(logging.DEBUG)
            rdflib_logger.setLevel(logging.DEBUG)
        if args.timestamps:
            formatter = logging.Formatter("[%(asctime)s] %(message)s",
                                          "%Y-%m-%d %H:%M:%S")
            stderr_handler.setFormatter(formatter)

        if args.version:
            print(versionfunc())
            return 0
        _logger.info(versionfunc())

        if args.print_supported_versions:
            print("\n".join(supported_cwl_versions(args.enable_dev)))
            return 0

        if not args.workflow:
            if os.path.isfile("CWLFile"):
                setattr(args, "workflow", "CWLFile")
            else:
                _logger.error("")
                _logger.error("CWL document required, no input file was provided")
                arg_parser().print_help()
                return 1
        if args.relax_path_checks:
            command_line_tool.ACCEPTLIST_RE = command_line_tool.ACCEPTLIST_EN_RELAXED_RE

        if args.ga4gh_tool_registries:
            ga4gh_tool_registries[:] = args.ga4gh_tool_registries
        if not args.enable_ga4gh_tool_registry:
            del ga4gh_tool_registries[:]

        if custom_schema_callback:
            custom_schema_callback()
        elif args.enable_ext:
            res = pkg_resources.resource_stream(__name__, 'extensions.yml')
            use_custom_schema("v1.0", "http://commonwl.org/cwltool", res.read())
            res.close()
        else:
            use_standard_schema("v1.0")
        #call function from provenance.py if the provenance flag is enabled.
        if args.provenance:
            if not args.compute_checksum:
                _logger.error("--provenance incompatible with --no-compute-checksum")
                return 1

            runtimeContext.research_obj = ResearchObject(
                temp_prefix_ro=args.tmpdir_prefix,
                # Optionals, might be None
                orcid=args.orcid,
                full_name=args.cwl_full_name)



        if loadingContext is None:
            loadingContext = LoadingContext(vars(args))
        else:
            loadingContext = loadingContext.copy()
        loadingContext.research_obj = runtimeContext.research_obj
        loadingContext.disable_js_validation = \
            args.disable_js_validation or (not args.do_validate)
        loadingContext.construct_tool_object = getdefault(
            loadingContext.construct_tool_object, workflow.default_make_tool)
        loadingContext.resolver = getdefault(loadingContext.resolver, tool_resolver)

        uri, tool_file_uri = resolve_tool_uri(
            args.workflow, resolver=loadingContext.resolver,
            fetcher_constructor=loadingContext.fetcher_constructor)

        try_again_msg = "" if args.debug else ", try again with --debug for more information"

        try:
            job_order_object, input_basedir, jobloader = load_job_order(
                args, stdin, loadingContext.fetcher_constructor,
                loadingContext.overrides_list, tool_file_uri)

            if args.overrides:
                loadingContext.overrides_list.extend(load_overrides(
                    file_uri(os.path.abspath(args.overrides)), tool_file_uri))

            document_loader, workflowobj, uri = fetch_document(
                uri, resolver=loadingContext.resolver,
                fetcher_constructor=loadingContext.fetcher_constructor)

            if args.print_deps:
                printdeps(workflowobj, document_loader, stdout, args.relative_deps, uri)
                return 0

            document_loader, avsc_names, processobj, metadata, uri \
                = validate_document(document_loader, workflowobj, uri,
                                    enable_dev=loadingContext.enable_dev,
                                    strict=loadingContext.strict,
                                    preprocess_only=(args.print_pre or args.pack),
                                    fetcher_constructor=loadingContext.fetcher_constructor,
                                    skip_schemas=args.skip_schemas,
                                    overrides=loadingContext.overrides_list,
                                    do_validate=loadingContext.do_validate)
            if args.pack:
                stdout.write(print_pack(document_loader, processobj, uri, metadata))
                return 0
            if args.provenance and runtimeContext.research_obj:
                # Can't really be combined with args.pack at same time
                runtimeContext.research_obj.packed_workflow(
                    print_pack(document_loader, processobj, uri, metadata))

            if args.print_pre:
                stdout.write(json_dumps(processobj, indent=4))
                return 0

            loadingContext.overrides_list.extend(metadata.get("cwltool:overrides", []))

            tool = make_tool(document_loader, avsc_names,
                             metadata, uri, loadingContext)
            if args.make_template:
                yaml.safe_dump(generate_input_template(tool), sys.stdout,
                               default_flow_style=False, indent=4,
                               block_seq_indent=2)
                return 0

            if args.validate:
                _logger.info("Tool definition is valid")
                return 0

            if args.print_rdf:
                stdout.write(printrdf(tool, document_loader.ctx, args.rdf_serializer))
                return 0

            if args.print_dot:
                printdot(tool, document_loader.ctx, stdout)
                return 0

        except (validate.ValidationException) as exc:
            _logger.error(u"Tool definition failed validation:\n%s", exc,
                          exc_info=args.debug)
            return 1
        except (RuntimeError, WorkflowException) as exc:
            _logger.error(u"Tool definition failed initialization:\n%s", exc,
                          exc_info=args.debug)
            return 1
        except Exception as exc:
            _logger.error(
                u"I'm sorry, I couldn't load this CWL file%s.\nThe error was: %s",
                try_again_msg,
                exc if not args.debug else "",
                exc_info=args.debug)
            return 1

        if isinstance(tool, int):
            return tool
        # If on MacOS platform, TMPDIR must be set to be under one of the
        # shared volumes in Docker for Mac
        # More info: https://dockstore.org/docs/faq
        if sys.platform == "darwin":
            default_mac_path = "/private/tmp/docker_tmp"
            if runtimeContext.tmp_outdir_prefix == DEFAULT_TMP_PREFIX:
                runtimeContext.tmp_outdir_prefix = default_mac_path

        for dirprefix in ("tmpdir_prefix", "tmp_outdir_prefix", "cachedir"):
            if getattr(runtimeContext, dirprefix) and getattr(runtimeContext, dirprefix) != DEFAULT_TMP_PREFIX:
                sl = "/" if getattr(runtimeContext, dirprefix).endswith("/") or dirprefix == "cachedir" \
                        else ""
                setattr(runtimeContext, dirprefix,
                        os.path.abspath(getattr(runtimeContext, dirprefix)) + sl)
                if not os.path.exists(os.path.dirname(getattr(runtimeContext, dirprefix))):
                    try:
                        os.makedirs(os.path.dirname(getattr(runtimeContext, dirprefix)))
                    except Exception as e:
                        _logger.error("Failed to create directory: %s", e)
                        return 1

        if args.cachedir:
            if args.move_outputs == "move":
                runtimeContext.move_outputs = "copy"
            runtimeContext.tmp_outdir_prefix = args.cachedir

        runtimeContext.secret_store = getdefault(runtimeContext.secret_store, SecretStore())
        runtimeContext.make_fs_access = getdefault(runtimeContext.make_fs_access, StdFsAccess)
        try:
            initialized_job_order_object = init_job_order(
                job_order_object, args, tool, jobloader, stdout,
                print_input_deps=args.print_input_deps,
                relative_deps=args.relative_deps,
                make_fs_access=runtimeContext.make_fs_access,
                input_basedir=input_basedir,
                secret_store=runtimeContext.secret_store)
        except SystemExit as err:
            return err.code

        if not executor:
            if args.parallel:
                executor = MultithreadedJobExecutor()
                runtimeContext.select_resources = executor.select_resources
            else:
                executor = SingleJobExecutor()
        assert executor is not None

        try:
            runtimeContext.basedir = input_basedir
            del args.workflow
            del args.job_order

            conf_file = getattr(args, "beta_dependency_resolvers_configuration", None)  # Text
            use_conda_dependencies = getattr(args, "beta_conda_dependencies", None)  # Text

            if conf_file or use_conda_dependencies:
                runtimeContext.job_script_provider = DependenciesConfiguration(args)

            runtimeContext.find_default_container = functools.partial(
                find_default_container,
                default_container=runtimeContext.default_container,
                use_biocontainers=args.beta_use_biocontainers)
            (out, status) = executor(tool,
                                     initialized_job_order_object,
                                     runtimeContext,
                                     logger=_logger)

            if out is not None:
                def loc_to_path(obj):
                    for field in ("path", "nameext", "nameroot", "dirname"):
                        if field in obj:
                            del obj[field]
                    if obj["location"].startswith("file://"):
                        obj["path"] = uri_file_path(obj["location"])

                visit_class(out, ("File", "Directory"), loc_to_path)

                # Unsetting the Generation from final output object
                visit_class(out, ("File", ), MutationManager().unset_generation)

                if isinstance(out, string_types):
                    stdout.write(out)
                else:
                    stdout.write(json_dumps(out, indent=4,  # type: ignore
                                            ensure_ascii=False))
                stdout.write("\n")
                if hasattr(stdout, "flush"):
                    stdout.flush()  # type: ignore

            if status != "success":
                _logger.warning(u"Final process status is %s", status)
                return 1
            _logger.info(u"Final process status is %s", status)
            return 0

        except (validate.ValidationException) as exc:
            _logger.error(u"Input object failed validation:\n%s", exc,
                          exc_info=args.debug)
            return 1
        except UnsupportedRequirement as exc:
            _logger.error(
                u"Workflow or tool uses unsupported feature:\n%s", exc,
                exc_info=args.debug)
            return 33
        except WorkflowException as exc:
            _logger.error(
                u"Workflow error%s:\n%s", try_again_msg, strip_dup_lineno(six.text_type(exc)),
                exc_info=args.debug)
            return 1
        except Exception as exc:
            _logger.error(
                u"Unhandled error%s:\n  %s", try_again_msg, exc, exc_info=args.debug)
            return 1

    finally:
        if args and runtimeContext and runtimeContext.research_obj \
                and args.rm_tmpdir and workflowobj:
            #adding all related cwl files to RO
            prov_dependencies = printdeps(
                workflowobj, document_loader, stdout, args.relative_deps, uri,
                runtimeContext.research_obj)
            prov_dep = prov_dependencies[1]
            assert prov_dep
            runtimeContext.research_obj.generate_snapshot(prov_dep)

            runtimeContext.research_obj.close(args.provenance)

        _logger.removeHandler(stderr_handler)
        _logger.addHandler(defaultStreamHandler)
Ejemplo n.º 12
0
def main(argsl=None,  # type: List[str]
         args=None,  # type: argparse.Namespace
         executor=single_job_executor,  # type: Callable[..., Tuple[Dict[Text, Any], Text]]
         makeTool=workflow.defaultMakeTool,  # type: Callable[..., Process]
         selectResources=None,  # type: Callable[[Dict[Text, int]], Dict[Text, int]]
         stdin=sys.stdin,  # type: IO[Any]
         stdout=sys.stdout,  # type: IO[Any]
         stderr=sys.stderr,  # type: IO[Any]
         versionfunc=versionstring,  # type: Callable[[], Text]
         job_order_object=None,  # type: Union[Tuple[Dict[Text, Any], Text], int]
         make_fs_access=StdFsAccess,  # type: Callable[[Text], StdFsAccess]
         fetcher_constructor=None,  # type: Callable[[Dict[unicode, unicode], requests.sessions.Session], Fetcher]
         resolver=tool_resolver,
         logger_handler=None
         ):
    # type: (...) -> int

    _logger.removeHandler(defaultStreamHandler)
    if logger_handler:
        stderr_handler = logger_handler
    else:
        stderr_handler = logging.StreamHandler(stderr)
    _logger.addHandler(stderr_handler)
    try:
        if args is None:
            if argsl is None:
                argsl = sys.argv[1:]
            args = arg_parser().parse_args(argsl)

        # If caller provided custom arguments, it may be not every expected
        # option is set, so fill in no-op defaults to avoid crashing when
        # dereferencing them in args.
        for k, v in {'print_deps': False,
                     'print_pre': False,
                     'print_rdf': False,
                     'print_dot': False,
                     'relative_deps': False,
                     'tmp_outdir_prefix': 'tmp',
                     'tmpdir_prefix': 'tmp',
                     'print_input_deps': False,
                     'cachedir': None,
                     'quiet': False,
                     'debug': False,
                     'version': False,
                     'enable_dev': False,
                     'strict': True,
                     'rdf_serializer': None,
                     'basedir': None,
                     'tool_help': False,
                     'workflow': None,
                     'job_order': None,
                     'pack': False,
                     'on_error': 'continue',
                     'relax_path_checks': False,
                     'validate': False}.iteritems():
            if not hasattr(args, k):
                setattr(args, k, v)

        if args.quiet:
            _logger.setLevel(logging.WARN)
        if args.debug:
            _logger.setLevel(logging.DEBUG)

        if args.version:
            print(versionfunc())
            return 0
        else:
            _logger.info(versionfunc())

        if not args.workflow:
            if os.path.isfile("CWLFile"):
                setattr(args, "workflow", "CWLFile")
            else:
                _logger.error("")
                _logger.error("CWL document required, try --help for details")
                return 1
        if args.relax_path_checks:
            draft2tool.ACCEPTLIST_RE = draft2tool.ACCEPTLIST_EN_RELAXED_RE

        try:
            document_loader, workflowobj, uri = fetch_document(args.workflow, resolver=resolver,
                                                               fetcher_constructor=fetcher_constructor)

            if args.print_deps:
                printdeps(workflowobj, document_loader, stdout, args.relative_deps, uri)
                return 0

            document_loader, avsc_names, processobj, metadata, uri \
                = validate_document(document_loader, workflowobj, uri,
                                    enable_dev=args.enable_dev, strict=args.strict,
                                    preprocess_only=args.print_pre or args.pack,
                                    fetcher_constructor=fetcher_constructor)

            if args.validate:
                return 0

            if args.pack:
                stdout.write(print_pack(document_loader, processobj, uri, metadata))
                return 0

            if args.print_pre:
                stdout.write(json.dumps(processobj, indent=4))
                return 0

            tool = make_tool(document_loader, avsc_names, metadata, uri,
                             makeTool, vars(args))

            if args.print_rdf:
                printrdf(tool, document_loader.ctx, args.rdf_serializer, stdout)
                return 0

            if args.print_dot:
                printdot(tool, document_loader.ctx, stdout)
                return 0

        except (validate.ValidationException) as exc:
            _logger.error(u"Tool definition failed validation:\n%s", exc,
                          exc_info=args.debug)
            return 1
        except (RuntimeError, WorkflowException) as exc:
            _logger.error(u"Tool definition failed initialization:\n%s", exc,
                          exc_info=args.debug)
            return 1
        except Exception as exc:
            _logger.error(
                u"I'm sorry, I couldn't load this CWL file%s",
                ", try again with --debug for more information.\nThe error was: "
                "%s" % exc if not args.debug else ".  The error was:",
                exc_info=args.debug)
            return 1

        if isinstance(tool, int):
            return tool

        for dirprefix in ("tmpdir_prefix", "tmp_outdir_prefix", "cachedir"):
            if getattr(args, dirprefix) and getattr(args, dirprefix) != 'tmp':
                sl = "/" if getattr(args, dirprefix).endswith("/") or dirprefix == "cachedir" else ""
                setattr(args, dirprefix,
                        os.path.abspath(getattr(args, dirprefix)) + sl)
                if not os.path.exists(os.path.dirname(getattr(args, dirprefix))):
                    try:
                        os.makedirs(os.path.dirname(getattr(args, dirprefix)))
                    except Exception as e:
                        _logger.error("Failed to create directory: %s", e)
                        return 1

        if args.cachedir:
            if args.move_outputs == "move":
                setattr(args, 'move_outputs', "copy")
            setattr(args, "tmp_outdir_prefix", args.cachedir)

        if job_order_object is None:
            job_order_object = load_job_order(args, tool, stdin,
                                              print_input_deps=args.print_input_deps,
                                              relative_deps=args.relative_deps,
                                              stdout=stdout,
                                              make_fs_access=make_fs_access,
                                              fetcher_constructor=fetcher_constructor)

        if isinstance(job_order_object, int):
            return job_order_object

        try:
            setattr(args, 'basedir', job_order_object[1])
            del args.workflow
            del args.job_order
            (out, status) = executor(tool, job_order_object[0],
                                     makeTool=makeTool,
                                     select_resources=selectResources,
                                     make_fs_access=make_fs_access,
                                     **vars(args))

            # This is the workflow output, it needs to be written
            if out is not None:
                def locToPath(p):
                    if p["location"].startswith("file://"):
                        p["path"] = uri_file_path(p["location"])

                adjustDirObjs(out, locToPath)
                adjustFileObjs(out, locToPath)

                if isinstance(out, basestring):
                    stdout.write(out)
                else:
                    stdout.write(json.dumps(out, indent=4))
                stdout.write("\n")
                stdout.flush()

            if status != "success":
                _logger.warn(u"Final process status is %s", status)
                return 1
            else:
                _logger.info(u"Final process status is %s", status)
                return 0

        except (validate.ValidationException) as exc:
            _logger.error(u"Input object failed validation:\n%s", exc,
                          exc_info=args.debug)
            return 1
        except UnsupportedRequirement as exc:
            _logger.error(
                u"Workflow or tool uses unsupported feature:\n%s", exc,
                exc_info=args.debug)
            return 33
        except WorkflowException as exc:
            _logger.error(
                u"Workflow error, try again with --debug for more "
                "information:\n%s", strip_dup_lineno(unicode(exc)), exc_info=args.debug)
            return 1
        except Exception as exc:
            _logger.error(
                u"Unhandled error, try again with --debug for more information:\n"
                "  %s", exc, exc_info=args.debug)
            return 1

    finally:
        _logger.removeHandler(stderr_handler)
        _logger.addHandler(defaultStreamHandler)
Ejemplo n.º 13
0
def static_checker(workflow_inputs, workflow_outputs, step_inputs,
                   step_outputs):
    # type: (List[Dict[Text, Any]], List[Dict[Text, Any]], List[Dict[Text, Any]], List[Dict[Text, Any]]) -> None
    """Check if all source and sink types of a workflow are compatible before run time.
    """

    # source parameters: workflow_inputs and step_outputs
    # sink parameters: step_inputs and workflow_outputs

    # make a dictionary of source parameters, indexed by the "id" field
    src_parms = workflow_inputs + step_outputs
    src_dict = {}
    for parm in src_parms:
        src_dict[parm["id"]] = parm

    step_inputs_val = check_all_types(src_dict, step_inputs, "source")
    workflow_outputs_val = check_all_types(src_dict, workflow_outputs,
                                           "outputSource")

    warnings = step_inputs_val["warning"] + workflow_outputs_val["warning"]
    exceptions = step_inputs_val["exception"] + workflow_outputs_val[
        "exception"]

    warning_msgs = []
    exception_msgs = []
    for warning in warnings:
        src = warning.src
        sink = warning.sink
        linkMerge = warning.linkMerge
        if sink.get("secondaryFiles") and sorted(sink.get(
                "secondaryFiles", [])) != sorted(src.get("secondaryFiles",
                                                         [])):
            msg1 = "Sink '%s'" % (shortname(sink["id"]))
            msg2 = SourceLine(sink.get("_tool_entry", sink),
                              "secondaryFiles").makeError(
                                  "expects secondaryFiles: %s but" %
                                  (sink.get("secondaryFiles")))
            if "secondaryFiles" in src:
                msg3 = SourceLine(src, "secondaryFiles").makeError(
                    "source '%s' has secondaryFiles %s." %
                    (shortname(src["id"]), src.get("secondaryFiles")))
            else:
                msg3 = SourceLine(src, "id").makeError(
                    "source '%s' does not include secondaryFiles." %
                    (shortname(src["id"])))
            msg4 = SourceLine(src, "id").makeError(
                "To fix, add secondaryFiles: %s to definition of '%s'." %
                (sink.get("secondaryFiles"), shortname(src["id"])))
            msg = SourceLine(sink).makeError(
                "%s\n%s" % (msg1, bullets([msg2, msg3, msg4], "  ")))
        else:
            msg = SourceLine(src, "type").makeError(
                "Source '%s' of type %s may be incompatible"
                % (shortname(src["id"]), json.dumps(src["type"]))) + "\n" + \
                SourceLine(sink, "type").makeError(
                "  with sink '%s' of type %s"
                % (shortname(sink["id"]), json.dumps(sink["type"])))
            if linkMerge:
                msg += "\n" + SourceLine(sink).makeError(
                    "  source has linkMerge method %s" % linkMerge)

        warning_msgs.append(msg)
    for exception in exceptions:
        src = exception.src
        sink = exception.sink
        linkMerge = exception.linkMerge
        msg = SourceLine(src, "type").makeError(
            "Source '%s' of type %s is incompatible"
            % (shortname(src["id"]), json.dumps(src["type"]))) + "\n" + \
            SourceLine(sink, "type").makeError(
            "  with sink '%s' of type %s"
            % (shortname(sink["id"]), json.dumps(sink["type"])))
        if linkMerge:
            msg += "\n" + SourceLine(sink).makeError(
                "  source has linkMerge method %s" % linkMerge)
        exception_msgs.append(msg)

    for sink in step_inputs:
        if ('null' != sink["type"] and 'null' not in sink["type"]
                and "source" not in sink and "default" not in sink
                and "valueFrom" not in sink):
            msg = SourceLine(sink).makeError(
                "Required parameter '%s' does not have source, default, or valueFrom expression"
                % shortname(sink["id"]))
            exception_msgs.append(msg)

    all_warning_msg = strip_dup_lineno("\n".join(warning_msgs))
    all_exception_msg = strip_dup_lineno("\n".join(exception_msgs))

    if warnings:
        _logger.warning("Workflow checker warning:\n%s" % all_warning_msg)
    if exceptions:
        raise validate.ValidationException(all_exception_msg)
Ejemplo n.º 14
0
    def cwlmain(
        self,
        argsl=None,  # type: List[str]
        args=None,  # type: argparse.Namespace
        job_order_object=None,  # type: MutableMapping[Text, Any]
        stdin=sys.stdin,  # type: IO[Any]
        stdout=None,  # type: Union[TextIO, codecs.StreamWriter]
        stderr=sys.stderr,  # type: IO[Any]
        versionfunc=versionstring,  # type: Callable[[], Text]
        logger_handler=None,  #
        custom_schema_callback=None,  # type: Callable[[], None]
        executor=None,  # type: Callable[..., Tuple[Dict[Text, Any], Text]]
        loadingContext=None,  # type: LoadingContext
        runtimeContext=None  # type: RuntimeContext
    ):  # type: (...) -> int

        if not stdout:
            stdout = codecs.getwriter('utf-8')(sys.stdout)
        _logger.removeHandler(defaultStreamHandler)
        if logger_handler:
            stderr_handler = logger_handler
        else:
            stderr_handler = logging.StreamHandler(stderr)
        _logger.addHandler(stderr_handler)
        try:
            if args is None:
                args = arg_parser().parse_args(argsl)
                if args.workflow and "--outdir" not in argsl:
                    outputPath = args.workflow.split('/')[-1].split('.')[0]
                    setattr(
                        args, "outdir",
                        os.getcwd() + "/" + outputPath + "/" +
                        datetime.datetime.now().strftime('%Y-%m-%d-%H%M'))
            if runtimeContext is None:
                runtimeContext = RuntimeContext(vars(args))
            else:
                runtimeContext = runtimeContext.copy()

            rdflib_logger = logging.getLogger("rdflib.term")
            rdflib_logger.addHandler(stderr_handler)
            rdflib_logger.setLevel(logging.ERROR)
            if args.quiet:
                _logger.setLevel(logging.WARN)
            if runtimeContext.debug:
                _logger.setLevel(logging.DEBUG)
                rdflib_logger.setLevel(logging.DEBUG)
            if args.timestamps:
                formatter = logging.Formatter("[%(asctime)s] %(message)s",
                                              "%Y-%m-%d %H:%M:%S")
                stderr_handler.setFormatter(formatter)
            # version
            if args.version:
                return versionfunc(), 0
            else:
                _logger.info(versionfunc())

            if args.print_supported_versions:
                return "\n".join(supportedCWLversions(args.enable_dev)), 0

            if not args.workflow:
                if os.path.isfile("CWLFile"):
                    setattr(args, "workflow", "CWLFile")
                else:
                    _logger.error("")
                    _logger.error(
                        "CWL document required, no input file was provided")
                    arg_parser().print_help()
                    return "CWL document required, no input file was provided", 1
            if args.relax_path_checks:
                command_line_tool.ACCEPTLIST_RE = command_line_tool.ACCEPTLIST_EN_RELAXED_RE

            if args.ga4gh_tool_registries:
                ga4gh_tool_registries[:] = args.ga4gh_tool_registries
            if not args.enable_ga4gh_tool_registry:
                del ga4gh_tool_registries[:]

            if custom_schema_callback:
                custom_schema_callback()
            elif args.enable_ext:
                res = pkg_resources.resource_stream(__name__, 'extensions.yml')
                use_custom_schema("v1.0", "http://commonwl.org/cwltool",
                                  res.read())
                res.close()
            else:
                use_standard_schema("v1.0")

            if loadingContext is None:
                loadingContext = LoadingContext(vars(args))
            else:
                loadingContext = loadingContext.copy()

            loadingContext.disable_js_validation = \
                args.disable_js_validation or (not args.do_validate)
            loadingContext.construct_tool_object = getdefault(
                loadingContext.construct_tool_object,
                workflow.default_make_tool)
            loadingContext.resolver = getdefault(loadingContext.resolver,
                                                 tool_resolver)
            try:
                uri, tool_file_uri = resolve_tool_uri(
                    args.workflow,
                    resolver=loadingContext.resolver,
                    fetcher_constructor=loadingContext.fetcher_constructor)
            except:
                return "Can't find file " + args.workflow, 0

            try_again_msg = "" if args.debug else ", try again with --debug for more information"

            try:
                job_order_object, input_basedir, jobloader = load_job_order(
                    args, stdin, loadingContext.fetcher_constructor,
                    loadingContext.overrides_list, tool_file_uri)

                if args.overrides:
                    loadingContext.overrides_list.extend(
                        load_overrides(
                            file_uri(os.path.abspath(args.overrides)),
                            tool_file_uri))

                document_loader, workflowobj, uri = fetch_document(
                    uri,
                    resolver=loadingContext.resolver,
                    fetcher_constructor=loadingContext.fetcher_constructor)

                if args.print_deps:
                    # printdeps(workflowobj, document_loader, stdout, args.relative_deps, uri)
                    result = returndeps(workflowobj, document_loader, stdout,
                                        args.relative_deps, uri)
                    return result, 0

                document_loader, avsc_names, processobj, metadata, uri \
                    = validate_document(document_loader, workflowobj, uri,
                                        enable_dev=loadingContext.enable_dev,
                                        strict=loadingContext.strict,
                                        preprocess_only=(args.print_pre or args.pack),
                                        fetcher_constructor=loadingContext.fetcher_constructor,
                                        skip_schemas=args.skip_schemas,
                                        overrides=loadingContext.overrides_list,
                                        do_validate=loadingContext.do_validate)

                if args.print_pre:
                    # stdout.write(json_dumps(processobj, indent=4))
                    return json_dumps(processobj, indent=4), 0

                loadingContext.overrides_list.extend(
                    metadata.get("cwltool:overrides", []))

                tool = make_tool(document_loader, avsc_names, metadata, uri,
                                 loadingContext)
                if args.make_template:
                    yaml.safe_dump(generate_input_template(tool),
                                   sys.stdout,
                                   default_flow_style=False,
                                   indent=4,
                                   block_seq_indent=2)
                    return yaml.safe_dump(generate_input_template(tool),
                                          indent=4), 0

                if args.validate:
                    _logger.info("Tool definition is valid")
                    return "Tool definition is valid", 0

                if args.pack:
                    stdout.write(
                        print_pack(document_loader, processobj, uri, metadata))
                    return print_pack(document_loader, processobj, uri,
                                      metadata), 0

                if args.print_rdf:
                    stdout.write(
                        printrdf(tool, document_loader.ctx,
                                 args.rdf_serializer))
                    return printrdf(tool, document_loader.ctx,
                                    args.rdf_serializer), 0

                if args.print_dot:
                    printdot(tool, document_loader.ctx, stdout)
                    return "args.print_dot still not solved", 0

            except (validate.ValidationException) as exc:
                _logger.error(u"Tool definition failed validation:\n%s",
                              exc,
                              exc_info=args.debug)
                infor = "Tool definition failed validation:\n%s" + exc + args.debug
                return infor, 1
            except (RuntimeError, WorkflowException) as exc:
                _logger.error(u"Tool definition failed initialization:\n%s",
                              exc,
                              exc_info=args.debug)
                infor = "Tool definition failed initialization:\n%s" + exc + args.debug
                return infor, 1
            except Exception as exc:
                _logger.error(
                    u"I'm sorry, I couldn't load this CWL file%s.\nThe error was: %s",
                    try_again_msg,
                    exc if not args.debug else "",
                    exc_info=args.debug)
                return "I'm sorry, I couldn't load this CWL file", 1

            if isinstance(tool, int):
                return tool, 0

            # If on MacOS platform, TMPDIR must be set to be under one of the
            # shared volumes in Docker for Mac
            # More info: https://dockstore.org/docs/faq
            if sys.platform == "darwin":
                default_mac_path = "/private/tmp/docker_tmp"
                if runtimeContext.tmp_outdir_prefix == DEFAULT_TMP_PREFIX:
                    runtimeContext.tmp_outdir_prefix = default_mac_path

            for dirprefix in ("tmpdir_prefix", "tmp_outdir_prefix",
                              "cachedir"):
                if getattr(runtimeContext, dirprefix) and getattr(
                        runtimeContext, dirprefix) != DEFAULT_TMP_PREFIX:
                    sl = "/" if getattr(runtimeContext, dirprefix).endswith("/") or dirprefix == "cachedir" \
                        else ""
                    setattr(
                        runtimeContext, dirprefix,
                        os.path.abspath(getattr(runtimeContext, dirprefix)) +
                        sl)
                    if not os.path.exists(
                            os.path.dirname(getattr(runtimeContext,
                                                    dirprefix))):
                        try:
                            os.makedirs(
                                os.path.dirname(
                                    getattr(runtimeContext, dirprefix)))
                        except Exception as e:
                            _logger.error("Failed to create directory: %s", e)
                            infor = "Failed to create directory: %s" + e + ""
                            return infor, 1

            if args.cachedir:
                if args.move_outputs == "move":
                    runtimeContext.move_outputs = "copy"
                runtimeContext.tmp_outdir_prefix = args.cachedir

            runtimeContext.secret_store = getdefault(
                runtimeContext.secret_store, SecretStore())

            try:
                initialized_job_order_object = init_job_order(
                    job_order_object,
                    args,
                    tool,
                    jobloader,
                    stdout,
                    print_input_deps=args.print_input_deps,
                    relative_deps=args.relative_deps,
                    input_basedir=input_basedir,
                    secret_store=runtimeContext.secret_store)
            except SystemExit as err:
                return err.code
            if not executor:
                if args.parallel:
                    executor = MultithreadedJobExecutor()
                else:
                    executor = SingleJobExecutor()
            assert executor is not None

            if isinstance(initialized_job_order_object, int):
                return initialized_job_order_object

            try:
                runtimeContext.basedir = input_basedir
                del args.workflow
                del args.job_order

                conf_file = getattr(args,
                                    "beta_dependency_resolvers_configuration",
                                    None)  # Text
                use_conda_dependencies = getattr(args,
                                                 "beta_conda_dependencies",
                                                 None)  # Text

                job_script_provider = None  # type: Optional[DependenciesConfiguration]
                if conf_file or use_conda_dependencies:
                    runtimeContext.job_script_provider = DependenciesConfiguration(
                        args)

                runtimeContext.find_default_container = \
                    functools.partial(find_default_container, args)
                runtimeContext.make_fs_access = getdefault(
                    runtimeContext.make_fs_access, StdFsAccess)

                (out, status) = executor(tool,
                                         initialized_job_order_object,
                                         runtimeContext,
                                         logger=_logger)
                # This is the workflow output, it needs to be written
                if out is not None:

                    def loc_to_path(obj):
                        for field in ("path", "nameext", "nameroot",
                                      "dirname"):
                            if field in obj:
                                del obj[field]
                        if obj["location"].startswith("file://"):
                            obj["path"] = uri_file_path(obj["location"])

                    visit_class(out, ("File", "Directory"), loc_to_path)

                    # Unsetting the Generation fron final output object
                    visit_class(out, ("File", ),
                                MutationManager().unset_generation)

                    if isinstance(out, string_types):
                        stdout.write(out)
                    else:
                        stdout.write(
                            json_dumps(
                                out,
                                indent=4,  # type: ignore
                                ensure_ascii=False))
                    stdout.write("\n")
                    if hasattr(stdout, "flush"):
                        stdout.flush()  # type: ignore

                if status != "success":
                    _logger.warning(u"Final process status is %s", status)
                    infor = "Final process status is %s" + status + ""
                    return infor, 1

                _logger.info(u"Final process status is %s", status)
                return out, status

            except (validate.ValidationException) as exc:
                _logger.error(u"Input object failed validation:\n%s",
                              exc,
                              exc_info=args.debug)
                infor = "Input object failed validation:\n%s" + exc + args.debug
                return infor, 1
            except UnsupportedRequirement as exc:
                _logger.error(
                    u"Workflow or tool uses unsupported feature:\n%s",
                    exc,
                    exc_info=args.debug)
                infor = "Workflow or tool uses unsupported feature:\n%s" + exc + args.debug
                return infor, 3
            except WorkflowException as exc:
                _logger.error(u"Workflow error%s:\n%s",
                              try_again_msg,
                              strip_dup_lineno(six.text_type(exc)),
                              exc_info=args.debug)
                infor = "Workflow error%s:\n%s" + try_again_msg + strip_dup_lineno(
                    six.text_type(exc)) + args.debug
                return infor, 1
            except Exception as exc:
                _logger.error(u"Unhandled error%s:\n  %s",
                              try_again_msg,
                              exc,
                              exc_info=args.debug)
                infor = "Unhandled error%s:\n  %s" + try_again_msg + exc + args.debug
                return infor, 1

        finally:
            _logger.removeHandler(stderr_handler)
            _logger.addHandler(defaultStreamHandler)
Ejemplo n.º 15
0
    def _init_job(self, joborder, runtime_context):
        # type: (Mapping[Text, Text], RuntimeContext) -> Builder

        job = cast(Dict[Text, Union[Dict[Text, Any], List[Any], Text, None]],
                   copy.deepcopy(joborder))

        make_fs_access = getdefault(runtime_context.make_fs_access, StdFsAccess)
        fs_access = make_fs_access(runtime_context.basedir)

        load_listing_req, _ = self.get_requirement(
            "LoadListingRequirement")

        if load_listing_req is not None:
            load_listing = load_listing_req.get("loadListing")
        else:
            load_listing = "no_listing"

        # Validate job order
        try:
            fill_in_defaults(self.tool[u"inputs"], job, fs_access)

            normalizeFilesDirs(job)
            schema = self.names.get_name("input_record_schema", "")
            if schema is None:
                raise WorkflowException("Missing input record schema: "
                    "{}".format(self.names))
            validate.validate_ex(schema, job, strict=False,
                                 logger=_logger_validation_warnings)

            if load_listing and load_listing != "no_listing":
                get_listing(fs_access, job, recursive=(load_listing == "deep_listing"))

            visit_class(job, ("File",), functools.partial(add_sizes, fs_access))

            if load_listing == "deep_listing":
                for i, inparm in enumerate(self.tool["inputs"]):
                    k = shortname(inparm["id"])
                    if k not in job:
                        continue
                    v = job[k]
                    dircount = [0]
                    def inc(d):  # type: (List[int]) -> None
                        d[0] += 1
                    visit_class(v, ("Directory",), lambda x: inc(dircount))
                    if dircount[0] == 0:
                        continue
                    filecount = [0]
                    visit_class(v, ("File",), lambda x: inc(filecount))
                    if filecount[0] > FILE_COUNT_WARNING:
                        # Long lines in this message are okay, will be reflowed based on terminal columns.
                        _logger.warning(strip_dup_lineno(SourceLine(self.tool["inputs"], i, Text).makeError(
                    """Recursive directory listing has resulted in a large number of File objects (%s) passed to the input parameter '%s'.  This may negatively affect workflow performance and memory use.

If this is a problem, use the hint 'cwltool:LoadListingRequirement' with "shallow_listing" or "no_listing" to change the directory listing behavior:

$namespaces:
  cwltool: "http://commonwl.org/cwltool#"
hints:
  cwltool:LoadListingRequirement:
    loadListing: shallow_listing

""" % (filecount[0], k))))

        except (validate.ValidationException, WorkflowException) as err:
            raise WorkflowException("Invalid job input record:\n" + Text(err))

        files = []  # type: List[Dict[Text, Text]]
        bindings = CommentedSeq()
        tmpdir = u""
        stagedir = u""

        docker_req, _ = self.get_requirement("DockerRequirement")
        default_docker = None

        if docker_req is None and runtime_context.default_container:
            default_docker = runtime_context.default_container

        if (docker_req or default_docker) and runtime_context.use_container:
            if docker_req is not None:
                # Check if docker output directory is absolute
                if docker_req.get("dockerOutputDirectory") and \
                        docker_req.get("dockerOutputDirectory").startswith('/'):
                    outdir = docker_req.get("dockerOutputDirectory")
                else:
                    outdir = docker_req.get("dockerOutputDirectory") or \
                        runtime_context.docker_outdir or random_outdir()
            elif default_docker is not None:
                outdir = runtime_context.docker_outdir or random_outdir()
            tmpdir = runtime_context.docker_tmpdir or "/tmp"
            stagedir = runtime_context.docker_stagedir or "/var/lib/cwl"
        else:
            outdir = fs_access.realpath(
                runtime_context.outdir or tempfile.mkdtemp(
                    prefix=getdefault(runtime_context.tmp_outdir_prefix,
                                      DEFAULT_TMP_PREFIX)))
            if self.tool[u"class"] != 'Workflow':
                tmpdir = fs_access.realpath(runtime_context.tmpdir
                                            or tempfile.mkdtemp())
                stagedir = fs_access.realpath(runtime_context.stagedir
                                              or tempfile.mkdtemp())

        builder = Builder(job,
                          files,
                          bindings,
                          self.schemaDefs,
                          self.names,
                          self.requirements,
                          self.hints,
                          {},
                          runtime_context.mutation_manager,
                          self.formatgraph,
                          make_fs_access,
                          fs_access,
                          runtime_context.job_script_provider,
                          runtime_context.eval_timeout,
                          runtime_context.debug,
                          runtime_context.js_console,
                          runtime_context.force_docker_pull,
                          load_listing,
                          outdir,
                          tmpdir,
                          stagedir)

        bindings.extend(builder.bind_input(
            self.inputs_record_schema, job,
            discover_secondaryFiles=getdefault(runtime_context.toplevel, False)))

        if self.tool.get("baseCommand"):
            for index, command in enumerate(aslist(self.tool["baseCommand"])):
                bindings.append({
                    "position": [-1000000, index],
                    "datum": command
                })

        if self.tool.get("arguments"):
            for i, arg in enumerate(self.tool["arguments"]):
                lc = self.tool["arguments"].lc.data[i]
                filename = self.tool["arguments"].lc.filename
                bindings.lc.add_kv_line_col(len(bindings), lc)
                if isinstance(arg, MutableMapping):
                    arg = copy.deepcopy(arg)
                    if arg.get("position"):
                        arg["position"] = [arg["position"], i]
                    else:
                        arg["position"] = [0, i]
                    bindings.append(arg)
                elif ("$(" in arg) or ("${" in arg):
                    cm = CommentedMap((
                        ("position", [0, i]),
                        ("valueFrom", arg)
                    ))
                    cm.lc.add_kv_line_col("valueFrom", lc)
                    cm.lc.filename = filename
                    bindings.append(cm)
                else:
                    cm = CommentedMap((
                        ("position", [0, i]),
                        ("datum", arg)
                    ))
                    cm.lc.add_kv_line_col("datum", lc)
                    cm.lc.filename = filename
                    bindings.append(cm)

        # use python2 like sorting of heterogeneous lists
        # (containing str and int types),
        if PY3:
            key = functools.cmp_to_key(cmp_like_py2)
        else:  # PY2
            key = lambda d: d["position"]

        # This awkward construction replaces the contents of
        # "bindings" in place (because Builder expects it to be
        # mutated in place, sigh, I'm sorry) with its contents sorted,
        # supporting different versions of Python and ruamel.yaml with
        # different behaviors/bugs in CommentedSeq.
        bindings_copy = copy.deepcopy(bindings)
        del bindings[:]
        bindings.extend(sorted(bindings_copy, key=key))

        if self.tool[u"class"] != 'Workflow':
            builder.resources = self.evalResources(builder, runtime_context)
        return builder
Ejemplo n.º 16
0
def static_checker(
    workflow_inputs: List[CWLObjectType],
    workflow_outputs: MutableSequence[CWLObjectType],
    step_inputs: MutableSequence[CWLObjectType],
    step_outputs: List[CWLObjectType],
    param_to_step: Dict[str, CWLObjectType],
) -> None:
    """Check if all source and sink types of a workflow are compatible before run time."""
    # source parameters: workflow_inputs and step_outputs
    # sink parameters: step_inputs and workflow_outputs

    # make a dictionary of source parameters, indexed by the "id" field
    src_parms = workflow_inputs + step_outputs
    src_dict = {}  # type: Dict[str, CWLObjectType]
    for parm in src_parms:
        src_dict[cast(str, parm["id"])] = parm

    step_inputs_val = check_all_types(src_dict, step_inputs, "source",
                                      param_to_step)
    workflow_outputs_val = check_all_types(src_dict, workflow_outputs,
                                           "outputSource", param_to_step)

    warnings = step_inputs_val["warning"] + workflow_outputs_val["warning"]
    exceptions = step_inputs_val["exception"] + workflow_outputs_val[
        "exception"]

    warning_msgs = []
    exception_msgs = []
    for warning in warnings:
        src = warning.src
        sink = warning.sink
        linkMerge = warning.linkMerge
        sinksf = sorted(p["pattern"] for p in sink.get("secondaryFiles", [])
                        if p.get("required", True))
        srcsf = sorted(p["pattern"] for p in src.get("secondaryFiles", []))
        # Every secondaryFile required by the sink, should be declared
        # by the source
        missing = missing_subset(srcsf, sinksf)
        if missing:
            msg1 = "Parameter '{}' requires secondaryFiles {} but".format(
                shortname(sink["id"]),
                missing,
            )
            msg3 = SourceLine(src, "id").makeError(
                "source '%s' does not provide those secondaryFiles." %
                (shortname(src["id"])))
            msg4 = SourceLine(
                src.get("_tool_entry", src), "secondaryFiles"
            ).makeError(
                "To resolve, add missing secondaryFiles patterns to definition of '%s' or"
                % (shortname(src["id"])))
            msg5 = SourceLine(
                sink.get("_tool_entry", sink), "secondaryFiles"
            ).makeError(
                "mark missing secondaryFiles in definition of '%s' as optional."
                % shortname(sink["id"]))
            msg = SourceLine(sink).makeError("{}\n{}".format(
                msg1, bullets([msg3, msg4, msg5], "  ")))
        elif sink.get("not_connected"):
            if not sink.get("used_by_step"):
                msg = SourceLine(sink, "type").makeError(
                    "'%s' is not an input parameter of %s, expected %s" % (
                        shortname(sink["id"]),
                        param_to_step[sink["id"]]["run"],
                        ", ".join(
                            shortname(cast(str, s["id"])) for s in cast(
                                List[Dict[str, Union[str, bool]]],
                                param_to_step[sink["id"]]["inputs"],
                            ) if not s.get("not_connected")),
                    ))
            else:
                msg = ""
        else:
            msg = (SourceLine(src, "type").makeError(
                "Source '%s' of type %s may be incompatible" %
                (shortname(src["id"]), json_dumps(src["type"]))) + "\n" +
                   SourceLine(sink, "type").makeError(
                       "  with sink '%s' of type %s" %
                       (shortname(sink["id"]), json_dumps(sink["type"]))))
            if linkMerge is not None:
                msg += "\n" + SourceLine(sink).makeError(
                    "  source has linkMerge method %s" % linkMerge)

        if warning.message is not None:
            msg += "\n" + SourceLine(sink).makeError("  " + warning.message)

        if msg:
            warning_msgs.append(msg)

    for exception in exceptions:
        src = exception.src
        sink = exception.sink
        linkMerge = exception.linkMerge
        extra_message = exception.message
        msg = (SourceLine(src, "type").makeError(
            "Source '%s' of type %s is incompatible" %
            (shortname(src["id"]), json_dumps(src["type"]))) + "\n" +
               SourceLine(sink, "type").makeError(
                   "  with sink '%s' of type %s" %
                   (shortname(sink["id"]), json_dumps(sink["type"]))))
        if extra_message is not None:
            msg += "\n" + SourceLine(sink).makeError("  " + extra_message)

        if linkMerge is not None:
            msg += "\n" + SourceLine(sink).makeError(
                "  source has linkMerge method %s" % linkMerge)
        exception_msgs.append(msg)

    for sink in step_inputs:
        if ("null" != sink["type"] and "null" not in sink["type"]
                and "source" not in sink and "default" not in sink
                and "valueFrom" not in sink):
            msg = SourceLine(sink).makeError(
                "Required parameter '%s' does not have source, default, or valueFrom expression"
                % shortname(sink["id"]))
            exception_msgs.append(msg)

    all_warning_msg = strip_dup_lineno("\n".join(warning_msgs))
    all_exception_msg = strip_dup_lineno("\n" + "\n".join(exception_msgs))

    if all_warning_msg:
        _logger.warning("Workflow checker warning:\n%s", all_warning_msg)
    if exceptions:
        raise ValidationException(all_exception_msg)
Ejemplo n.º 17
0
def main(argsl=None,                   # type: List[str]
         args=None,                    # type: argparse.Namespace
         job_order_object=None,        # type: MutableMapping[Text, Any]
         stdin=sys.stdin,              # type: IO[Any]
         stdout=None,                  # type: Union[TextIO, StreamWriter]
         stderr=sys.stderr,            # type: IO[Any]
         versionfunc=versionstring,    # type: Callable[[], Text]
         logger_handler=None,          #
         custom_schema_callback=None,  # type: Callable[[], None]
         executor=None,                # type: Callable[..., Tuple[Dict[Text, Any], Text]]
         loadingContext=None,          # type: LoadingContext
         runtimeContext=None           # type: RuntimeContext
        ):  # type: (...) -> int
    if not stdout:  # force UTF-8 even if the console is configured differently
        if (hasattr(sys.stdout, "encoding")  # type: ignore
                and sys.stdout.encoding != 'UTF-8'):  # type: ignore
            if PY3 and hasattr(sys.stdout, "detach"):
                stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8')
            else:
                stdout = getwriter('utf-8')(sys.stdout)  # type: ignore
        else:
            stdout = cast(TextIO, sys.stdout)  # type: ignore

    _logger.removeHandler(defaultStreamHandler)
    if logger_handler is not None:
        stderr_handler = logger_handler
    else:
        stderr_handler = logging.StreamHandler(stderr)
    _logger.addHandler(stderr_handler)
    # pre-declared for finally block
    workflowobj = None
    prov_log_handler = None  # type: Optional[logging.StreamHandler]
    try:
        if args is None:
            if argsl is None:
                argsl = sys.argv[1:]
            args = arg_parser().parse_args(argsl)
            if args.record_container_id:
                if not args.cidfile_dir:
                    args.cidfile_dir = os.getcwd()
                del args.record_container_id

        if runtimeContext is None:
            runtimeContext = RuntimeContext(vars(args))
        else:
            runtimeContext = runtimeContext.copy()

        # If on Windows platform, a default Docker Container is used if not
        # explicitely provided by user
        if onWindows() and not runtimeContext.default_container:
            # This docker image is a minimal alpine image with bash installed
            # (size 6 mb). source: https://github.com/frol/docker-alpine-bash
            runtimeContext.default_container = windows_default_container_id

        # If caller parsed its own arguments, it may not include every
        # cwltool option, so fill in defaults to avoid crashing when
        # dereferencing them in args.
        for key, val in iteritems(get_default_args()):
            if not hasattr(args, key):
                setattr(args, key, val)

        # Configure logging
        rdflib_logger = logging.getLogger("rdflib.term")
        rdflib_logger.addHandler(stderr_handler)
        rdflib_logger.setLevel(logging.ERROR)
        if args.quiet:
            # Silence STDERR, not an eventual provenance log file
            stderr_handler.setLevel(logging.WARN)
        if runtimeContext.debug:
            # Increase to debug for both stderr and provenance log file
            _logger.setLevel(logging.DEBUG)
            rdflib_logger.setLevel(logging.DEBUG)
        formatter = None  # type: Optional[logging.Formatter]
        if args.timestamps:
            formatter = logging.Formatter("[%(asctime)s] %(message)s",
                                          "%Y-%m-%d %H:%M:%S")
            stderr_handler.setFormatter(formatter)
        ##

        if args.version:
            print(versionfunc())
            return 0
        _logger.info(versionfunc())

        if args.print_supported_versions:
            print("\n".join(supported_cwl_versions(args.enable_dev)))
            return 0

        if not args.workflow:
            if os.path.isfile("CWLFile"):
                setattr(args, "workflow", "CWLFile")
            else:
                _logger.error("")
                _logger.error("CWL document required, no input file was provided")
                arg_parser().print_help()
                return 1
        if args.relax_path_checks:
            command_line_tool.ACCEPTLIST_RE = command_line_tool.ACCEPTLIST_EN_RELAXED_RE

        if args.ga4gh_tool_registries:
            ga4gh_tool_registries[:] = args.ga4gh_tool_registries
        if not args.enable_ga4gh_tool_registry:
            del ga4gh_tool_registries[:]

        if custom_schema_callback is not None:
            custom_schema_callback()
        elif args.enable_ext:
            res = pkg_resources.resource_stream(__name__, 'extensions.yml')
            use_custom_schema("v1.0", "http://commonwl.org/cwltool", res.read())
            res.close()
        else:
            use_standard_schema("v1.0")
        if args.provenance:
            if not args.compute_checksum:
                _logger.error("--provenance incompatible with --no-compute-checksum")
                return 1
            ro = ResearchObject(
                temp_prefix_ro=args.tmpdir_prefix, orcid=args.orcid,
                full_name=args.cwl_full_name)
            runtimeContext.research_obj = ro
            log_file_io = ro.open_log_file_for_activity(ro.engine_uuid)
            prov_log_handler = logging.StreamHandler(log_file_io)

            class ProvLogFormatter(logging.Formatter):
                """Enforce ISO8601 with both T and Z."""
                def __init__(self):  # type: () -> None
                    super(ProvLogFormatter, self).__init__(
                        "[%(asctime)sZ] %(message)s")

                def formatTime(self, record, datefmt=None):
                    # type: (logging.LogRecord, str) -> str
                    record_time = time.gmtime(record.created)
                    formatted_time = time.strftime("%Y-%m-%dT%H:%M:%S", record_time)
                    with_msecs = "%s,%03d" % (formatted_time, record.msecs)
                    return with_msecs
            prov_log_handler.setFormatter(ProvLogFormatter())
            _logger.addHandler(prov_log_handler)
            _logger.debug(u"[provenance] Logging to %s", log_file_io)
            if argsl is not None:
                # Log cwltool command line options to provenance file
                _logger.info("[cwltool] %s %s", sys.argv[0], u" ".join(argsl))
            _logger.debug(u"[cwltool] Arguments: %s", args)

        if loadingContext is None:
            loadingContext = LoadingContext(vars(args))
        else:
            loadingContext = loadingContext.copy()
        loadingContext.loader = default_loader(loadingContext.fetcher_constructor)
        loadingContext.research_obj = runtimeContext.research_obj
        loadingContext.disable_js_validation = \
            args.disable_js_validation or (not args.do_validate)
        loadingContext.construct_tool_object = getdefault(
            loadingContext.construct_tool_object, workflow.default_make_tool)
        loadingContext.resolver = getdefault(loadingContext.resolver, tool_resolver)
        loadingContext.do_update = not (args.pack or args.print_subgraph)

        uri, tool_file_uri = resolve_tool_uri(
            args.workflow, resolver=loadingContext.resolver,
            fetcher_constructor=loadingContext.fetcher_constructor)

        try_again_msg = "" if args.debug else ", try again with --debug for more information"

        try:
            job_order_object, input_basedir, jobloader = load_job_order(
                args, stdin, loadingContext.fetcher_constructor,
                loadingContext.overrides_list, tool_file_uri)

            if args.overrides:
                loadingContext.overrides_list.extend(load_overrides(
                    file_uri(os.path.abspath(args.overrides)), tool_file_uri))

            loadingContext, workflowobj, uri = fetch_document(
                uri, loadingContext)

            assert loadingContext.loader is not None

            if args.print_deps:
                printdeps(workflowobj, loadingContext.loader, stdout,
                           args.relative_deps, uri)
                return 0

            loadingContext, uri \
                = resolve_and_validate_document(loadingContext, workflowobj, uri,
                                    preprocess_only=(args.print_pre or args.pack),
                                    skip_schemas=args.skip_schemas)
            assert loadingContext.loader is not None
            processobj, metadata = loadingContext.loader.resolve_ref(uri)
            processobj = cast(CommentedMap, processobj)
            if args.pack:
                stdout.write(print_pack(loadingContext.loader, processobj, uri, metadata))
                return 0

            if args.provenance and runtimeContext.research_obj:
                # Can't really be combined with args.pack at same time
                runtimeContext.research_obj.packed_workflow(
                    print_pack(loadingContext.loader, processobj, uri, metadata))

            if args.print_pre:
                stdout.write(json_dumps(processobj, indent=4, sort_keys=True, separators=(',', ': ')))
                return 0

            tool = make_tool(uri, loadingContext)
            if args.make_template:
                def my_represent_none(self, data):  # pylint: disable=unused-argument
                    """Force clean representation of 'null'."""
                    return self.represent_scalar(u'tag:yaml.org,2002:null', u'null')
                yaml.RoundTripRepresenter.add_representer(type(None), my_represent_none)
                yaml.round_trip_dump(
                    generate_input_template(tool), sys.stdout,
                    default_flow_style=False, indent=4, block_seq_indent=2)
                return 0

            if args.validate:
                print("{} is valid CWL.".format(args.workflow))
                return 0

            if args.print_rdf:
                stdout.write(printrdf(tool, loadingContext.loader.ctx, args.rdf_serializer))
                return 0

            if args.print_dot:
                printdot(tool, loadingContext.loader.ctx, stdout)
                return 0

            if args.print_targets:
                for f in ("outputs", "steps", "inputs"):
                    if tool.tool[f]:
                        _logger.info("%s%s targets:", f[0].upper(), f[1:-1])
                        stdout.write("  "+"\n  ".join([shortname(t["id"]) for t in tool.tool[f]])+"\n")
                return 0

            if args.target:
                if isinstance(tool, Workflow):
                    url = urllib.parse.urlparse(tool.tool["id"])
                    if url.fragment:
                        extracted = get_subgraph([tool.tool["id"] + "/" + r for r in args.target], tool)
                    else:
                        extracted = get_subgraph([loadingContext.loader.fetcher.urljoin(tool.tool["id"], "#" + r)
                                                 for r in args.target],
                                                 tool)
                else:
                    _logger.error("Can only use --target on Workflows")
                    return 1
                loadingContext.loader.idx[extracted["id"]] = extracted
                tool = make_tool(extracted["id"],
                                 loadingContext)

            if args.print_subgraph:
                if "name" in tool.tool:
                    del tool.tool["name"]
                stdout.write(json_dumps(tool.tool, indent=4, sort_keys=True, separators=(',', ': ')))
                return 0

        except (validate.ValidationException) as exc:
            _logger.error(u"Tool definition failed validation:\n%s", exc,
                          exc_info=args.debug)
            return 1
        except (RuntimeError, WorkflowException) as exc:
            _logger.error(u"Tool definition failed initialization:\n%s", exc,
                          exc_info=args.debug)
            return 1
        except Exception as exc:
            _logger.error(
                u"I'm sorry, I couldn't load this CWL file%s.\nThe error was: %s",
                try_again_msg,
                exc if not args.debug else "",
                exc_info=args.debug)
            return 1

        if isinstance(tool, int):
            return tool
        # If on MacOS platform, TMPDIR must be set to be under one of the
        # shared volumes in Docker for Mac
        # More info: https://dockstore.org/docs/faq
        if sys.platform == "darwin":
            default_mac_path = "/private/tmp/docker_tmp"
            if runtimeContext.tmp_outdir_prefix == DEFAULT_TMP_PREFIX:
                runtimeContext.tmp_outdir_prefix = default_mac_path
            if runtimeContext.tmpdir_prefix == DEFAULT_TMP_PREFIX:
                runtimeContext.tmpdir_prefix = default_mac_path

        for dirprefix in ("tmpdir_prefix", "tmp_outdir_prefix", "cachedir"):
            if getattr(runtimeContext, dirprefix) and getattr(runtimeContext, dirprefix) != DEFAULT_TMP_PREFIX:
                sl = "/" if getattr(runtimeContext, dirprefix).endswith("/") or dirprefix == "cachedir" \
                    else ""
                setattr(runtimeContext, dirprefix,
                        os.path.abspath(getattr(runtimeContext, dirprefix)) + sl)
                if not os.path.exists(os.path.dirname(getattr(runtimeContext, dirprefix))):
                    try:
                        os.makedirs(os.path.dirname(getattr(runtimeContext, dirprefix)))
                    except Exception as e:
                        _logger.error("Failed to create directory: %s", e)
                        return 1

        if args.cachedir:
            if args.move_outputs == "move":
                runtimeContext.move_outputs = "copy"
            runtimeContext.tmp_outdir_prefix = args.cachedir

        runtimeContext.secret_store = getdefault(runtimeContext.secret_store, SecretStore())
        runtimeContext.make_fs_access = getdefault(runtimeContext.make_fs_access, StdFsAccess)
        try:
            initialized_job_order_object = init_job_order(
                job_order_object, args, tool, jobloader, stdout,
                print_input_deps=args.print_input_deps,
                relative_deps=args.relative_deps,
                make_fs_access=runtimeContext.make_fs_access,
                input_basedir=input_basedir,
                secret_store=runtimeContext.secret_store)
        except SystemExit as err:
            return err.code

        if not executor:
            if args.parallel:
                executor = MultithreadedJobExecutor()
                runtimeContext.select_resources = executor.select_resources
            else:
                executor = SingleJobExecutor()
        assert executor is not None

        try:
            runtimeContext.basedir = input_basedir
            del args.workflow
            del args.job_order

            conf_file = getattr(args, "beta_dependency_resolvers_configuration", None)  # Text
            use_conda_dependencies = getattr(args, "beta_conda_dependencies", None)  # Text

            if conf_file or use_conda_dependencies:
                runtimeContext.job_script_provider = DependenciesConfiguration(args)
            else:
                runtimeContext.find_default_container = functools.partial(
                    find_default_container,
                    default_container=runtimeContext.default_container,
                    use_biocontainers=args.beta_use_biocontainers)

            (out, status) = executor(tool,
                                     initialized_job_order_object,
                                     runtimeContext,
                                     logger=_logger)

            if out is not None:
                if runtimeContext.research_obj is not None:
                    runtimeContext.research_obj.create_job(
                        out, None, True)

                def loc_to_path(obj):
                    for field in ("path", "nameext", "nameroot", "dirname"):
                        if field in obj:
                            del obj[field]
                    if obj["location"].startswith("file://"):
                        obj["path"] = uri_file_path(obj["location"])

                visit_class(out, ("File", "Directory"), loc_to_path)

                # Unsetting the Generation from final output object
                visit_class(out, ("File", ), MutationManager().unset_generation)

                if isinstance(out, string_types):
                    stdout.write(out)
                else:
                    stdout.write(json_dumps(out, indent=4,  # type: ignore
                                            ensure_ascii=False))
                stdout.write("\n")
                if hasattr(stdout, "flush"):
                    stdout.flush()  # type: ignore

            if status != "success":
                _logger.warning(u"Final process status is %s", status)
                return 1
            _logger.info(u"Final process status is %s", status)
            return 0

        except (validate.ValidationException) as exc:
            _logger.error(u"Input object failed validation:\n%s", exc,
                          exc_info=args.debug)
            return 1
        except UnsupportedRequirement as exc:
            _logger.error(
                u"Workflow or tool uses unsupported feature:\n%s", exc,
                exc_info=args.debug)
            return 33
        except WorkflowException as exc:
            _logger.error(
                u"Workflow error%s:\n%s", try_again_msg, strip_dup_lineno(Text(exc)),
                exc_info=args.debug)
            return 1
        except Exception as exc:  # pylint: disable=broad-except
            _logger.error(
                u"Unhandled error%s:\n  %s", try_again_msg, exc, exc_info=args.debug)
            return 1

    finally:
        if args and runtimeContext and runtimeContext.research_obj \
                and workflowobj:
            research_obj = runtimeContext.research_obj
            assert loadingContext is not None
            assert loadingContext.loader is not None
            prov_dependencies = prov_deps(workflowobj, loadingContext.loader, uri)
            research_obj.generate_snapshot(prov_dependencies)
            if prov_log_handler is not None:
                # Stop logging so we won't half-log adding ourself to RO
                _logger.debug(u"[provenance] Closing provenance log file %s",
                    prov_log_handler)
                _logger.removeHandler(prov_log_handler)
                # Ensure last log lines are written out
                prov_log_handler.flush()
                # Underlying WritableBagFile will add the tagfile to the manifest
                prov_log_handler.stream.close()
                prov_log_handler.close()
            research_obj.close(args.provenance)

        _logger.removeHandler(stderr_handler)
        _logger.addHandler(defaultStreamHandler)
Ejemplo n.º 18
0
def main(argsl=None,  # type: List[str]
         args=None,  # type: argparse.Namespace
         executor=single_job_executor,  # type: Callable[..., Tuple[Dict[Text, Any], Text]]
         makeTool=workflow.defaultMakeTool,  # type: Callable[..., Process]
         selectResources=None,  # type: Callable[[Dict[Text, int]], Dict[Text, int]]
         stdin=sys.stdin,  # type: IO[Any]
         stdout=sys.stdout,  # type: IO[Any]
         stderr=sys.stderr,  # type: IO[Any]
         versionfunc=versionstring,  # type: Callable[[], Text]
         job_order_object=None,  # type: Union[Tuple[Dict[Text, Any], Text], int]
         make_fs_access=StdFsAccess,  # type: Callable[[Text], StdFsAccess]
         fetcher_constructor=None,  # type: Callable[[Dict[unicode, unicode], requests.sessions.Session], Fetcher]
         resolver=tool_resolver,
         logger_handler=None,
         custom_schema_callback=None  # type: Callable[[], None]
         ):
    # type: (...) -> int

    _logger.removeHandler(defaultStreamHandler)
    if logger_handler:
        stderr_handler = logger_handler
    else:
        stderr_handler = logging.StreamHandler(stderr)
    _logger.addHandler(stderr_handler)
    try:
        if args is None:
            if argsl is None:
                argsl = sys.argv[1:]
            args = arg_parser().parse_args(argsl)

        # If caller provided custom arguments, it may be not every expected
        # option is set, so fill in no-op defaults to avoid crashing when
        # dereferencing them in args.
        for k, v in {'print_deps': False,
                     'print_pre': False,
                     'print_rdf': False,
                     'print_dot': False,
                     'relative_deps': False,
                     'tmp_outdir_prefix': 'tmp',
                     'tmpdir_prefix': 'tmp',
                     'print_input_deps': False,
                     'cachedir': None,
                     'quiet': False,
                     'debug': False,
                     'version': False,
                     'enable_dev': False,
                     'enable_ext': False,
                     'strict': True,
                     'rdf_serializer': None,
                     'basedir': None,
                     'tool_help': False,
                     'workflow': None,
                     'job_order': None,
                     'pack': False,
                     'on_error': 'continue',
                     'relax_path_checks': False,
                     'validate': False,
                     'enable_ga4gh_tool_registry': False,
                     'ga4gh_tool_registries': []
        }.iteritems():
            if not hasattr(args, k):
                setattr(args, k, v)

        if args.quiet:
            _logger.setLevel(logging.WARN)
        if args.debug:
            _logger.setLevel(logging.DEBUG)

        if args.version:
            print(versionfunc())
            return 0
        else:
            _logger.info(versionfunc())

        if args.print_supported_versions:
            print("\n".join(supportedCWLversions(args.enable_dev)))
            return 0

        if not args.workflow:
            if os.path.isfile("CWLFile"):
                setattr(args, "workflow", "CWLFile")
            else:
                _logger.error("")
                _logger.error("CWL document required, no input file was provided")
                arg_parser().print_help()
                return 1
        if args.relax_path_checks:
            draft2tool.ACCEPTLIST_RE = draft2tool.ACCEPTLIST_EN_RELAXED_RE

        if args.ga4gh_tool_registries:
            ga4gh_tool_registries[:] = args.ga4gh_tool_registries
        if not args.enable_ga4gh_tool_registry:
            del ga4gh_tool_registries[:]

        if custom_schema_callback:
            custom_schema_callback()
        elif args.enable_ext:
            res = pkg_resources.resource_stream(__name__, 'extensions.yml')
            use_custom_schema("v1.0", "http://commonwl.org/cwltool", res.read())
            res.close()
        else:
            use_standard_schema("v1.0")

        try:
            document_loader, workflowobj, uri = fetch_document(args.workflow, resolver=resolver,
                                                               fetcher_constructor=fetcher_constructor)

            if args.print_deps:
                printdeps(workflowobj, document_loader, stdout, args.relative_deps, uri)
                return 0

            document_loader, avsc_names, processobj, metadata, uri \
                = validate_document(document_loader, workflowobj, uri,
                                    enable_dev=args.enable_dev, strict=args.strict,
                                    preprocess_only=args.print_pre or args.pack,
                                    fetcher_constructor=fetcher_constructor)

            if args.pack:
                stdout.write(print_pack(document_loader, processobj, uri, metadata))
                return 0

            if args.print_pre:
                stdout.write(json.dumps(processobj, indent=4))
                return 0

            tool = make_tool(document_loader, avsc_names, metadata, uri,
                             makeTool, vars(args))

            if args.validate:
                return 0

            if args.print_rdf:
                printrdf(tool, document_loader.ctx, args.rdf_serializer, stdout)
                return 0

            if args.print_dot:
                printdot(tool, document_loader.ctx, stdout)
                return 0

        except (validate.ValidationException) as exc:
            _logger.error(u"Tool definition failed validation:\n%s", exc,
                          exc_info=args.debug)
            return 1
        except (RuntimeError, WorkflowException) as exc:
            _logger.error(u"Tool definition failed initialization:\n%s", exc,
                          exc_info=args.debug)
            return 1
        except Exception as exc:
            _logger.error(
                u"I'm sorry, I couldn't load this CWL file%s",
                ", try again with --debug for more information.\nThe error was: "
                "%s" % exc if not args.debug else ".  The error was:",
                exc_info=args.debug)
            return 1

        if isinstance(tool, int):
            return tool

        for dirprefix in ("tmpdir_prefix", "tmp_outdir_prefix", "cachedir"):
            if getattr(args, dirprefix) and getattr(args, dirprefix) != 'tmp':
                sl = "/" if getattr(args, dirprefix).endswith("/") or dirprefix == "cachedir" else ""
                setattr(args, dirprefix,
                        os.path.abspath(getattr(args, dirprefix)) + sl)
                if not os.path.exists(os.path.dirname(getattr(args, dirprefix))):
                    try:
                        os.makedirs(os.path.dirname(getattr(args, dirprefix)))
                    except Exception as e:
                        _logger.error("Failed to create directory: %s", e)
                        return 1

        if args.cachedir:
            if args.move_outputs == "move":
                setattr(args, 'move_outputs', "copy")
            setattr(args, "tmp_outdir_prefix", args.cachedir)

        try:
            if job_order_object is None:
                    job_order_object = load_job_order(args, tool, stdin,
                                                      print_input_deps=args.print_input_deps,
                                                      relative_deps=args.relative_deps,
                                                      stdout=stdout,
                                                      make_fs_access=make_fs_access,
                                                      fetcher_constructor=fetcher_constructor)
        except SystemExit as e:
            return e.code

        if isinstance(job_order_object, int):
            return job_order_object

        try:
            setattr(args, 'basedir', job_order_object[1])
            del args.workflow
            del args.job_order
            (out, status) = executor(tool, job_order_object[0],
                                     makeTool=makeTool,
                                     select_resources=selectResources,
                                     make_fs_access=make_fs_access,
                                     **vars(args))

            # This is the workflow output, it needs to be written
            if out is not None:

                def locToPath(p):
                    if p["location"].startswith("file://"):
                        p["path"] = uri_file_path(p["location"])

                visit_class(out, ("File", "Directory"), locToPath)

                if isinstance(out, basestring):
                    stdout.write(out)
                else:
                    stdout.write(json.dumps(out, indent=4))
                stdout.write("\n")
                stdout.flush()

            if status != "success":
                _logger.warn(u"Final process status is %s", status)
                return 1
            else:
                _logger.info(u"Final process status is %s", status)
                return 0

        except (validate.ValidationException) as exc:
            _logger.error(u"Input object failed validation:\n%s", exc,
                          exc_info=args.debug)
            return 1
        except UnsupportedRequirement as exc:
            _logger.error(
                u"Workflow or tool uses unsupported feature:\n%s", exc,
                exc_info=args.debug)
            return 33
        except WorkflowException as exc:
            _logger.error(
                u"Workflow error, try again with --debug for more "
                "information:\n%s", strip_dup_lineno(unicode(exc)), exc_info=args.debug)
            return 1
        except Exception as exc:
            _logger.error(
                u"Unhandled error, try again with --debug for more information:\n"
                "  %s", exc, exc_info=args.debug)
            return 1

    finally:
        _logger.removeHandler(stderr_handler)
        _logger.addHandler(defaultStreamHandler)
Ejemplo n.º 19
0
def static_checker(workflow_inputs, workflow_outputs, step_inputs,
                   step_outputs, param_to_step):
    # type: (List[Dict[Text, Any]], List[Dict[Text, Any]], List[Dict[Text, Any]], List[Dict[Text, Any]], Dict[Text, Dict[Text, Any]]) -> None
    """Check if all source and sink types of a workflow are compatible before run time."""
    # source parameters: workflow_inputs and step_outputs
    # sink parameters: step_inputs and workflow_outputs

    # make a dictionary of source parameters, indexed by the "id" field
    src_parms = workflow_inputs + step_outputs
    src_dict = {}
    for parm in src_parms:
        src_dict[parm["id"]] = parm

    step_inputs_val = check_all_types(src_dict, step_inputs, "source")
    workflow_outputs_val = check_all_types(src_dict, workflow_outputs,
                                           "outputSource")

    warnings = step_inputs_val["warning"] + workflow_outputs_val["warning"]
    exceptions = step_inputs_val["exception"] + workflow_outputs_val[
        "exception"]

    warning_msgs = []
    exception_msgs = []
    for warning in warnings:
        src = warning.src
        sink = warning.sink
        linkMerge = warning.linkMerge
        sinksf = sorted([
            p["pattern"] for p in sink.get("secondaryFiles", [])
            if p.get("required", True)
        ])
        srcsf = sorted([p["pattern"] for p in src.get("secondaryFiles", [])])
        # Every secondaryFile required by the sink, should be declared
        # by the source
        missing = missing_subset(srcsf, sinksf)
        if missing:
            msg1 = "Parameter '%s' requires secondaryFiles %s but" % (
                shortname(sink["id"]), missing)
            msg3 = SourceLine(src, "id").makeError(
                "source '%s' does not provide those secondaryFiles." %
                (shortname(src["id"])))
            msg4 = SourceLine(
                src.get("_tool_entry", src), "secondaryFiles"
            ).makeError(
                "To resolve, add missing secondaryFiles patterns to definition of '%s' or"
                % (shortname(src["id"])))
            msg5 = SourceLine(
                sink.get("_tool_entry", sink), "secondaryFiles"
            ).makeError(
                "mark missing secondaryFiles in definition of '%s' as optional."
                % shortname(sink["id"]))
            msg = SourceLine(sink).makeError(
                "%s\n%s" % (msg1, bullets([msg3, msg4, msg5], "  ")))
        elif sink.get("not_connected"):
            msg = SourceLine(sink, "type").makeError(
                "'%s' is not an input parameter of %s, expected %s" %
                (shortname(
                    sink["id"]), param_to_step[sink["id"]]["run"], ", ".join(
                        shortname(s["id"])
                        for s in param_to_step[sink["id"]]["inputs"]
                        if not s.get("not_connected"))))
        else:
            msg = SourceLine(src, "type").makeError(
                "Source '%s' of type %s may be incompatible"
                % (shortname(src["id"]), json_dumps(src["type"]))) + "\n" + \
                SourceLine(sink, "type").makeError(
                    "  with sink '%s' of type %s"
                    % (shortname(sink["id"]), json_dumps(sink["type"])))
            if linkMerge is not None:
                msg += "\n" + SourceLine(sink).makeError(
                    "  source has linkMerge method %s" % linkMerge)

        warning_msgs.append(msg)
    for exception in exceptions:
        src = exception.src
        sink = exception.sink
        linkMerge = exception.linkMerge
        msg = SourceLine(src, "type").makeError(
            "Source '%s' of type %s is incompatible"
            % (shortname(src["id"]), json_dumps(src["type"]))) + "\n" + \
            SourceLine(sink, "type").makeError(
                "  with sink '%s' of type %s"
                % (shortname(sink["id"]), json_dumps(sink["type"])))
        if linkMerge is not None:
            msg += "\n" + SourceLine(sink).makeError(
                "  source has linkMerge method %s" % linkMerge)
        exception_msgs.append(msg)

    for sink in step_inputs:
        if ('null' != sink["type"] and 'null' not in sink["type"]
                and "source" not in sink and "default" not in sink
                and "valueFrom" not in sink):
            msg = SourceLine(sink).makeError(
                "Required parameter '%s' does not have source, default, or valueFrom expression"
                % shortname(sink["id"]))
            exception_msgs.append(msg)

    all_warning_msg = strip_dup_lineno("\n".join(warning_msgs))
    all_exception_msg = strip_dup_lineno("\n".join(exception_msgs))

    if warnings:
        _logger.warning("Workflow checker warning:\n%s", all_warning_msg)
    if exceptions:
        raise validate.ValidationException(all_exception_msg)
Ejemplo n.º 20
0
Archivo: main.py Proyecto: bmeg/cwltool
def main(argsl=None,  # type: List[str]
         args=None,  # type: argparse.Namespace
         executor=single_job_executor,  # type: Callable[..., Tuple[Dict[Text, Any], Text]]
         makeTool=workflow.defaultMakeTool,  # type: Callable[..., Process]
         selectResources=None,  # type: Callable[[Dict[Text, int]], Dict[Text, int]]
         stdin=sys.stdin,  # type: IO[Any]
         stdout=sys.stdout,  # type: IO[Any]
         stderr=sys.stderr,  # type: IO[Any]
         versionfunc=versionstring,  # type: Callable[[], Text]
         job_order_object=None,  # type: MutableMapping[Text, Any]
         make_fs_access=StdFsAccess,  # type: Callable[[Text], StdFsAccess]
         fetcher_constructor=None,  # type: FetcherConstructorType
         resolver=tool_resolver,
         logger_handler=None,
         custom_schema_callback=None  # type: Callable[[], None]
         ):
    # type: (...) -> int

    _logger.removeHandler(defaultStreamHandler)
    if logger_handler:
        stderr_handler = logger_handler
    else:
        stderr_handler = logging.StreamHandler(stderr)
    _logger.addHandler(stderr_handler)
    try:
        if args is None:
            if argsl is None:
                argsl = sys.argv[1:]
            args = arg_parser().parse_args(argsl)

        # If On windows platform, A default Docker Container is Used if not explicitely provided by user
        if onWindows() and not args.default_container:
            # This docker image is a minimal alpine image with bash installed(size 6 mb). source: https://github.com/frol/docker-alpine-bash
            args.default_container = windows_default_container_id

        # If caller provided custom arguments, it may be not every expected
        # option is set, so fill in no-op defaults to avoid crashing when
        # dereferencing them in args.
        for k, v in six.iteritems({'print_deps': False,
                     'print_pre': False,
                     'print_rdf': False,
                     'print_dot': False,
                     'relative_deps': False,
                     'tmp_outdir_prefix': 'tmp',
                     'tmpdir_prefix': 'tmp',
                     'print_input_deps': False,
                     'cachedir': None,
                     'quiet': False,
                     'debug': False,
                     'js_console': False,
                     'version': False,
                     'enable_dev': False,
                     'enable_ext': False,
                     'strict': True,
                     'skip_schemas': False,
                     'rdf_serializer': None,
                     'basedir': None,
                     'tool_help': False,
                     'workflow': None,
                     'job_order': None,
                     'pack': False,
                     'on_error': 'continue',
                     'relax_path_checks': False,
                     'validate': False,
                     'enable_ga4gh_tool_registry': False,
                     'ga4gh_tool_registries': [],
                     'find_default_container': None,
                                   'make_template': False,
                                   'overrides': None
        }):
            if not hasattr(args, k):
                setattr(args, k, v)

        if args.quiet:
            _logger.setLevel(logging.WARN)
        if args.debug:
            _logger.setLevel(logging.DEBUG)

        if args.version:
            print(versionfunc())
            return 0
        else:
            _logger.info(versionfunc())

        if args.print_supported_versions:
            print("\n".join(supportedCWLversions(args.enable_dev)))
            return 0

        if not args.workflow:
            if os.path.isfile("CWLFile"):
                setattr(args, "workflow", "CWLFile")
            else:
                _logger.error("")
                _logger.error("CWL document required, no input file was provided")
                arg_parser().print_help()
                return 1
        if args.relax_path_checks:
            draft2tool.ACCEPTLIST_RE = draft2tool.ACCEPTLIST_EN_RELAXED_RE

        if args.ga4gh_tool_registries:
            ga4gh_tool_registries[:] = args.ga4gh_tool_registries
        if not args.enable_ga4gh_tool_registry:
            del ga4gh_tool_registries[:]

        if custom_schema_callback:
            custom_schema_callback()
        elif args.enable_ext:
            res = pkg_resources.resource_stream(__name__, 'extensions.yml')
            use_custom_schema("v1.0", "http://commonwl.org/cwltool", res.read())
            res.close()
        else:
            use_standard_schema("v1.0")

        uri, tool_file_uri = resolve_tool_uri(args.workflow,
                                              resolver=resolver,
                                              fetcher_constructor=fetcher_constructor)

        overrides = []  # type: List[Dict[Text, Any]]

        try:
            job_order_object, input_basedir, jobloader = load_job_order(args,
                                                                        stdin,
                                                                        fetcher_constructor,
                                                                        overrides,
                                                                        tool_file_uri)
        except Exception as e:
            _logger.error(Text(e), exc_info=args.debug)

        if args.overrides:
            overrides.extend(load_overrides(file_uri(os.path.abspath(args.overrides)), tool_file_uri))

        try:
            document_loader, workflowobj, uri = fetch_document(uri, resolver=resolver,
                                                               fetcher_constructor=fetcher_constructor)

            if args.print_deps:
                printdeps(workflowobj, document_loader, stdout, args.relative_deps, uri)
                return 0

            document_loader, avsc_names, processobj, metadata, uri \
                = validate_document(document_loader, workflowobj, uri,
                                    enable_dev=args.enable_dev, strict=args.strict,
                                    preprocess_only=args.print_pre or args.pack,
                                    fetcher_constructor=fetcher_constructor,
                                    skip_schemas=args.skip_schemas,
                                    overrides=overrides)

            if args.print_pre:
                stdout.write(json.dumps(processobj, indent=4))
                return 0

            overrides.extend(metadata.get("cwltool:overrides", []))

            conf_file = getattr(args, "beta_dependency_resolvers_configuration", None)  # Text
            use_conda_dependencies = getattr(args, "beta_conda_dependencies", None)  # Text

            make_tool_kwds = vars(args)

            job_script_provider = None  # type: Callable[[Any, List[str]], Text]
            if conf_file or use_conda_dependencies:
                dependencies_configuration = DependenciesConfiguration(args)  # type: DependenciesConfiguration
                make_tool_kwds["job_script_provider"] = dependencies_configuration

            make_tool_kwds["find_default_container"] = functools.partial(find_default_container, args)
            make_tool_kwds["overrides"] = overrides

            tool = make_tool(document_loader, avsc_names, metadata, uri,
                             makeTool, make_tool_kwds)
            if args.make_template:
                yaml.safe_dump(generate_input_template(tool), sys.stdout,
                               default_flow_style=False, indent=4,
                               block_seq_indent=2)
                return 0

            if args.validate:
                _logger.info("Tool definition is valid")
                return 0

            if args.pack:
                stdout.write(print_pack(document_loader, processobj, uri, metadata))
                return 0

            if args.print_rdf:
                stdout.write(printrdf(tool, document_loader.ctx, args.rdf_serializer))
                return 0

            if args.print_dot:
                printdot(tool, document_loader.ctx, stdout)
                return 0

        except (validate.ValidationException) as exc:
            _logger.error(u"Tool definition failed validation:\n%s", exc,
                          exc_info=args.debug)
            return 1
        except (RuntimeError, WorkflowException) as exc:
            _logger.error(u"Tool definition failed initialization:\n%s", exc,
                          exc_info=args.debug)
            return 1
        except Exception as exc:
            _logger.error(
                u"I'm sorry, I couldn't load this CWL file%s",
                ", try again with --debug for more information.\nThe error was: "
                "%s" % exc if not args.debug else ".  The error was:",
                exc_info=args.debug)
            return 1

        if isinstance(tool, int):
            return tool

        for dirprefix in ("tmpdir_prefix", "tmp_outdir_prefix", "cachedir"):
            if getattr(args, dirprefix) and getattr(args, dirprefix) != 'tmp':
                sl = "/" if getattr(args, dirprefix).endswith("/") or dirprefix == "cachedir" else ""
                setattr(args, dirprefix,
                        os.path.abspath(getattr(args, dirprefix)) + sl)
                if not os.path.exists(os.path.dirname(getattr(args, dirprefix))):
                    try:
                        os.makedirs(os.path.dirname(getattr(args, dirprefix)))
                    except Exception as e:
                        _logger.error("Failed to create directory: %s", e)
                        return 1

        if args.cachedir:
            if args.move_outputs == "move":
                setattr(args, 'move_outputs', "copy")
            setattr(args, "tmp_outdir_prefix", args.cachedir)

        try:
            job_order_object = init_job_order(job_order_object, args, tool,
                                              print_input_deps=args.print_input_deps,
                                              relative_deps=args.relative_deps,
                                              stdout=stdout,
                                              make_fs_access=make_fs_access,
                                              loader=jobloader,
                                              input_basedir=input_basedir)
        except SystemExit as e:
            return e.code

        if isinstance(job_order_object, int):
            return job_order_object

        try:
            setattr(args, 'basedir', input_basedir)
            del args.workflow
            del args.job_order
            (out, status) = executor(tool, job_order_object,
                                     makeTool=makeTool,
                                     select_resources=selectResources,
                                     make_fs_access=make_fs_access,
                                     **vars(args))

            # This is the workflow output, it needs to be written
            if out is not None:

                def locToPath(p):
                    for field in ("path", "nameext", "nameroot", "dirname"):
                        if field in p:
                            del p[field]
                    if p["location"].startswith("file://"):
                        p["path"] = uri_file_path(p["location"])

                visit_class(out, ("File", "Directory"), locToPath)

                # Unsetting the Generation fron final output object
                visit_class(out,("File",), MutationManager().unset_generation)

                if isinstance(out, six.string_types):
                    stdout.write(out)
                else:
                    stdout.write(json.dumps(out, indent=4))
                stdout.write("\n")
                stdout.flush()

            if status != "success":
                _logger.warning(u"Final process status is %s", status)
                return 1
            else:
                _logger.info(u"Final process status is %s", status)
                return 0

        except (validate.ValidationException) as exc:
            _logger.error(u"Input object failed validation:\n%s", exc,
                          exc_info=args.debug)
            return 1
        except UnsupportedRequirement as exc:
            _logger.error(
                u"Workflow or tool uses unsupported feature:\n%s", exc,
                exc_info=args.debug)
            return 33
        except WorkflowException as exc:
            _logger.error(
                u"Workflow error, try again with --debug for more "
                "information:\n%s", strip_dup_lineno(six.text_type(exc)), exc_info=args.debug)
            return 1
        except Exception as exc:
            _logger.error(
                u"Unhandled error, try again with --debug for more information:\n"
                "  %s", exc, exc_info=args.debug)
            return 1

    finally:
        _logger.removeHandler(stderr_handler)
        _logger.addHandler(defaultStreamHandler)
Ejemplo n.º 21
0
def main(argsl=None,                   # type: List[str]
         args=None,                    # type: argparse.Namespace
         job_order_object=None,        # type: MutableMapping[Text, Any]
         stdin=sys.stdin,              # type: IO[Any]
         stdout=None,                  # type: Union[TextIO, StreamWriter]
         stderr=sys.stderr,            # type: IO[Any]
         versionfunc=versionstring,    # type: Callable[[], Text]
         logger_handler=None,          #
         custom_schema_callback=None,  # type: Callable[[], None]
         executor=None,                # type: Callable[..., Tuple[Dict[Text, Any], Text]]
         loadingContext=None,          # type: LoadingContext
         runtimeContext=None           # type: RuntimeContext
        ):  # type: (...) -> int
    if not stdout:  # force UTF-8 even if the console is configured differently
        if (hasattr(sys.stdout, "encoding")  # type: ignore
                and sys.stdout.encoding != 'UTF-8'):  # type: ignore
            if PY3 and hasattr(sys.stdout, "detach"):
                stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8')
            else:
                stdout = getwriter('utf-8')(sys.stdout)  # type: ignore
        else:
            stdout = cast(TextIO, sys.stdout)  # type: ignore

    _logger.removeHandler(defaultStreamHandler)
    if logger_handler is not None:
        stderr_handler = logger_handler
    else:
        stderr_handler = logging.StreamHandler(stderr)
    _logger.addHandler(stderr_handler)
    # pre-declared for finally block
    workflowobj = None
    prov_log_handler = None  # type: Optional[logging.StreamHandler]
    try:
        if args is None:
            if argsl is None:
                argsl = sys.argv[1:]
            args = arg_parser().parse_args(argsl)
            if args.record_container_id:
                if not args.cidfile_dir:
                    args.cidfile_dir = os.getcwd()
                del args.record_container_id

        if runtimeContext is None:
            runtimeContext = RuntimeContext(vars(args))
        else:
            runtimeContext = runtimeContext.copy()

        # If on Windows platform, a default Docker Container is used if not
        # explicitely provided by user
        if onWindows() and not runtimeContext.default_container:
            # This docker image is a minimal alpine image with bash installed
            # (size 6 mb). source: https://github.com/frol/docker-alpine-bash
            runtimeContext.default_container = windows_default_container_id

        # If caller parsed its own arguments, it may not include every
        # cwltool option, so fill in defaults to avoid crashing when
        # dereferencing them in args.
        for key, val in iteritems(get_default_args()):
            if not hasattr(args, key):
                setattr(args, key, val)

        # Configure logging
        rdflib_logger = logging.getLogger("rdflib.term")
        rdflib_logger.addHandler(stderr_handler)
        rdflib_logger.setLevel(logging.ERROR)
        if args.quiet:
            # Silence STDERR, not an eventual provenance log file
            stderr_handler.setLevel(logging.WARN)
        if runtimeContext.debug:
            # Increase to debug for both stderr and provenance log file
            _logger.setLevel(logging.DEBUG)
            rdflib_logger.setLevel(logging.DEBUG)
        formatter = None  # type: Optional[logging.Formatter]
        if args.timestamps:
            formatter = logging.Formatter("[%(asctime)s] %(message)s",
                                          "%Y-%m-%d %H:%M:%S")
            stderr_handler.setFormatter(formatter)
        ##

        if args.version:
            print(versionfunc())
            return 0
        _logger.info(versionfunc())

        if args.print_supported_versions:
            print("\n".join(supported_cwl_versions(args.enable_dev)))
            return 0

        if not args.workflow:
            if os.path.isfile("CWLFile"):
                setattr(args, "workflow", "CWLFile")
            else:
                _logger.error("")
                _logger.error("CWL document required, no input file was provided")
                arg_parser().print_help()
                return 1
        if args.relax_path_checks:
            command_line_tool.ACCEPTLIST_RE = command_line_tool.ACCEPTLIST_EN_RELAXED_RE

        if args.ga4gh_tool_registries:
            ga4gh_tool_registries[:] = args.ga4gh_tool_registries
        if not args.enable_ga4gh_tool_registry:
            del ga4gh_tool_registries[:]

        if custom_schema_callback is not None:
            custom_schema_callback()
        elif args.enable_ext:
            res = pkg_resources.resource_stream(__name__, 'extensions.yml')
            use_custom_schema("v1.0", "http://commonwl.org/cwltool", res.read())
            res.close()
        else:
            use_standard_schema("v1.0")
        if args.provenance:
            if not args.compute_checksum:
                _logger.error("--provenance incompatible with --no-compute-checksum")
                return 1
            ro = ResearchObject(
                temp_prefix_ro=args.tmpdir_prefix, orcid=args.orcid,
                full_name=args.cwl_full_name)
            runtimeContext.research_obj = ro
            log_file_io = ro.open_log_file_for_activity(ro.engine_uuid)
            prov_log_handler = logging.StreamHandler(log_file_io)

            class ProvLogFormatter(logging.Formatter):
                """Enforce ISO8601 with both T and Z."""
                def __init__(self):  # type: () -> None
                    super(ProvLogFormatter, self).__init__(
                        "[%(asctime)sZ] %(message)s")

                def formatTime(self, record, datefmt=None):
                    # type: (logging.LogRecord, str) -> str
                    record_time = time.gmtime(record.created)
                    formatted_time = time.strftime("%Y-%m-%dT%H:%M:%S", record_time)
                    with_msecs = "%s,%03d" % (formatted_time, record.msecs)
                    return with_msecs
            prov_log_handler.setFormatter(ProvLogFormatter())
            _logger.addHandler(prov_log_handler)
            _logger.debug(u"[provenance] Logging to %s", log_file_io)
            if argsl is not None:
                # Log cwltool command line options to provenance file
                _logger.info("[cwltool] %s %s", sys.argv[0], u" ".join(argsl))
            _logger.debug(u"[cwltool] Arguments: %s", args)

        if loadingContext is None:
            loadingContext = LoadingContext(vars(args))
        else:
            loadingContext = loadingContext.copy()
        loadingContext.loader = default_loader(loadingContext.fetcher_constructor)
        loadingContext.research_obj = runtimeContext.research_obj
        loadingContext.disable_js_validation = \
            args.disable_js_validation or (not args.do_validate)
        loadingContext.construct_tool_object = getdefault(
            loadingContext.construct_tool_object, workflow.default_make_tool)
        loadingContext.resolver = getdefault(loadingContext.resolver, tool_resolver)
        loadingContext.do_update = not (args.pack or args.print_subgraph)

        uri, tool_file_uri = resolve_tool_uri(
            args.workflow, resolver=loadingContext.resolver,
            fetcher_constructor=loadingContext.fetcher_constructor)

        try_again_msg = "" if args.debug else ", try again with --debug for more information"

        try:
            job_order_object, input_basedir, jobloader = load_job_order(
                args, stdin, loadingContext.fetcher_constructor,
                loadingContext.overrides_list, tool_file_uri)

            if args.overrides:
                loadingContext.overrides_list.extend(load_overrides(
                    file_uri(os.path.abspath(args.overrides)), tool_file_uri))

            loadingContext, workflowobj, uri = fetch_document(
                uri, loadingContext)

            if args.print_deps and loadingContext.loader:
                printdeps(workflowobj, loadingContext.loader, stdout,
                          args.relative_deps, uri)
                return 0

            loadingContext, uri \
                = resolve_and_validate_document(loadingContext, workflowobj, uri,
                                    preprocess_only=(args.print_pre or args.pack),
                                    skip_schemas=args.skip_schemas)
            
            if loadingContext.loader is None:
                raise Exception("Impossible code path.")
            processobj, metadata = loadingContext.loader.resolve_ref(uri)
            processobj = cast(CommentedMap, processobj)
            if args.pack:
                stdout.write(print_pack(loadingContext.loader, processobj, uri, metadata))
                return 0

            if args.provenance and runtimeContext.research_obj:
                # Can't really be combined with args.pack at same time
                runtimeContext.research_obj.packed_workflow(
                    print_pack(loadingContext.loader, processobj, uri, metadata))

            if args.print_pre:
                stdout.write(json_dumps(processobj, indent=4, sort_keys=True, separators=(',', ': ')))
                return 0

            tool = make_tool(uri, loadingContext)
            if args.make_template:
                def my_represent_none(self, data):  # pylint: disable=unused-argument
                    """Force clean representation of 'null'."""
                    return self.represent_scalar(u'tag:yaml.org,2002:null', u'null')
                yaml.RoundTripRepresenter.add_representer(type(None), my_represent_none)
                yaml.round_trip_dump(
                    generate_input_template(tool), sys.stdout,
                    default_flow_style=False, indent=4, block_seq_indent=2)
                return 0

            if args.validate:
                print("{} is valid CWL.".format(args.workflow))
                return 0

            if args.print_rdf:
                stdout.write(printrdf(tool, loadingContext.loader.ctx, args.rdf_serializer))
                return 0

            if args.print_dot:
                printdot(tool, loadingContext.loader.ctx, stdout)
                return 0

            if args.print_targets:
                for f in ("outputs", "steps", "inputs"):
                    if tool.tool[f]:
                        _logger.info("%s%s targets:", f[0].upper(), f[1:-1])
                        stdout.write("  "+"\n  ".join([shortname(t["id"]) for t in tool.tool[f]])+"\n")
                return 0

            if args.target:
                if isinstance(tool, Workflow):
                    url = urllib.parse.urlparse(tool.tool["id"])
                    if url.fragment:
                        extracted = get_subgraph([tool.tool["id"] + "/" + r for r in args.target], tool)
                    else:
                        extracted = get_subgraph([loadingContext.loader.fetcher.urljoin(tool.tool["id"], "#" + r)
                                                 for r in args.target],
                                                 tool)
                else:
                    _logger.error("Can only use --target on Workflows")
                    return 1
                loadingContext.loader.idx[extracted["id"]] = extracted
                tool = make_tool(extracted["id"],
                                 loadingContext)

            if args.print_subgraph:
                if "name" in tool.tool:
                    del tool.tool["name"]
                stdout.write(json_dumps(tool.tool, indent=4, sort_keys=True, separators=(',', ': ')))
                return 0

        except (validate.ValidationException) as exc:
            _logger.error(u"Tool definition failed validation:\n%s", exc,
                          exc_info=args.debug)
            return 1
        except (RuntimeError, WorkflowException) as exc:
            _logger.error(u"Tool definition failed initialization:\n%s", exc,
                          exc_info=args.debug)
            return 1
        except Exception as exc:
            _logger.error(
                u"I'm sorry, I couldn't load this CWL file%s.\nThe error was: %s",
                try_again_msg,
                exc if not args.debug else "",
                exc_info=args.debug)
            return 1

        if isinstance(tool, int):
            return tool
        # If on MacOS platform, TMPDIR must be set to be under one of the
        # shared volumes in Docker for Mac
        # More info: https://dockstore.org/docs/faq
        if sys.platform == "darwin":
            default_mac_path = "/private/tmp/docker_tmp"
            if runtimeContext.tmp_outdir_prefix == DEFAULT_TMP_PREFIX:
                runtimeContext.tmp_outdir_prefix = default_mac_path
            if runtimeContext.tmpdir_prefix == DEFAULT_TMP_PREFIX:
                runtimeContext.tmpdir_prefix = default_mac_path

        for dirprefix in ("tmpdir_prefix", "tmp_outdir_prefix", "cachedir"):
            if getattr(runtimeContext, dirprefix) and getattr(runtimeContext, dirprefix) != DEFAULT_TMP_PREFIX:
                sl = "/" if getattr(runtimeContext, dirprefix).endswith("/") or dirprefix == "cachedir" \
                    else ""
                setattr(runtimeContext, dirprefix,
                        os.path.abspath(getattr(runtimeContext, dirprefix)) + sl)
                if not os.path.exists(os.path.dirname(getattr(runtimeContext, dirprefix))):
                    try:
                        os.makedirs(os.path.dirname(getattr(runtimeContext, dirprefix)))
                    except Exception as e:
                        _logger.error("Failed to create directory: %s", e)
                        return 1

        if args.cachedir:
            if args.move_outputs == "move":
                runtimeContext.move_outputs = "copy"
            runtimeContext.tmp_outdir_prefix = args.cachedir

        runtimeContext.secret_store = getdefault(runtimeContext.secret_store, SecretStore())
        runtimeContext.make_fs_access = getdefault(runtimeContext.make_fs_access, StdFsAccess)
        try:
            initialized_job_order_object = init_job_order(
                job_order_object, args, tool, jobloader, stdout,
                print_input_deps=args.print_input_deps,
                relative_deps=args.relative_deps,
                make_fs_access=runtimeContext.make_fs_access,
                input_basedir=input_basedir,
                secret_store=runtimeContext.secret_store)
        except SystemExit as err:
            return err.code

        if not executor:
            if args.parallel:
                executor = MultithreadedJobExecutor()
                runtimeContext.select_resources = executor.select_resources
            else:
                executor = SingleJobExecutor()

        try:
            runtimeContext.basedir = input_basedir
            del args.workflow
            del args.job_order

            conf_file = getattr(args, "beta_dependency_resolvers_configuration", None)  # Text
            use_conda_dependencies = getattr(args, "beta_conda_dependencies", None)  # Text

            if conf_file or use_conda_dependencies:
                runtimeContext.job_script_provider = DependenciesConfiguration(args)
            else:
                runtimeContext.find_default_container = functools.partial(
                    find_default_container,
                    default_container=runtimeContext.default_container,
                    use_biocontainers=args.beta_use_biocontainers)

            (out, status) = executor(tool,
                                     initialized_job_order_object,
                                     runtimeContext,
                                     logger=_logger)

            if out is not None:
                if runtimeContext.research_obj is not None:
                    runtimeContext.research_obj.create_job(
                        out, None, True)

                def loc_to_path(obj):
                    for field in ("path", "nameext", "nameroot", "dirname"):
                        if field in obj:
                            del obj[field]
                    if obj["location"].startswith("file://"):
                        obj["path"] = uri_file_path(obj["location"])

                visit_class(out, ("File", "Directory"), loc_to_path)

                # Unsetting the Generation from final output object
                visit_class(out, ("File", ), MutationManager().unset_generation)

                if isinstance(out, string_types):
                    stdout.write(out)
                else:
                    stdout.write(json_dumps(out, indent=4,  # type: ignore
                                            ensure_ascii=False))
                stdout.write("\n")
                if hasattr(stdout, "flush"):
                    stdout.flush()  # type: ignore

            if status != "success":
                _logger.warning(u"Final process status is %s", status)
                return 1
            _logger.info(u"Final process status is %s", status)
            return 0

        except (validate.ValidationException) as exc:
            _logger.error(u"Input object failed validation:\n%s", exc,
                          exc_info=args.debug)
            return 1
        except UnsupportedRequirement as exc:
            _logger.error(
                u"Workflow or tool uses unsupported feature:\n%s", exc,
                exc_info=args.debug)
            return 33
        except WorkflowException as exc:
            _logger.error(
                u"Workflow error%s:\n%s", try_again_msg, strip_dup_lineno(Text(exc)),
                exc_info=args.debug)
            return 1
        except Exception as exc:  # pylint: disable=broad-except
            _logger.error(
                u"Unhandled error%s:\n  %s", try_again_msg, exc, exc_info=args.debug)
            return 1

    finally:
        if args and runtimeContext and runtimeContext.research_obj \
                and workflowobj and loadingContext:
            research_obj = runtimeContext.research_obj
            if loadingContext.loader is not None:
                research_obj.generate_snapshot(prov_deps(
                    workflowobj, loadingContext.loader, uri))
            else:
                _logger.warning("Unable to generate provenance snapshot "
                    " due to missing loadingContext.loader.")
            if prov_log_handler is not None:
                # Stop logging so we won't half-log adding ourself to RO
                _logger.debug(u"[provenance] Closing provenance log file %s",
                    prov_log_handler)
                _logger.removeHandler(prov_log_handler)
                # Ensure last log lines are written out
                prov_log_handler.flush()
                # Underlying WritableBagFile will add the tagfile to the manifest
                prov_log_handler.stream.close()
                prov_log_handler.close()
            research_obj.close(args.provenance)

        _logger.removeHandler(stderr_handler)
        _logger.addHandler(defaultStreamHandler)
Ejemplo n.º 22
0
def static_checker(workflow_inputs, workflow_outputs, step_inputs, step_outputs, param_to_step):
    # type: (List[Dict[Text, Any]], List[Dict[Text, Any]], List[Dict[Text, Any]], List[Dict[Text, Any]], Dict[Text, Dict[Text, Any]]) -> None
    """Check if all source and sink types of a workflow are compatible before run time.
    """

    # source parameters: workflow_inputs and step_outputs
    # sink parameters: step_inputs and workflow_outputs

    # make a dictionary of source parameters, indexed by the "id" field
    src_parms = workflow_inputs + step_outputs
    src_dict = {}
    for parm in src_parms:
        src_dict[parm["id"]] = parm

    step_inputs_val = check_all_types(src_dict, step_inputs, "source")
    workflow_outputs_val = check_all_types(src_dict, workflow_outputs, "outputSource")

    warnings = step_inputs_val["warning"] + workflow_outputs_val["warning"]
    exceptions = step_inputs_val["exception"] + workflow_outputs_val["exception"]

    warning_msgs = []
    exception_msgs = []
    for warning in warnings:
        src = warning.src
        sink = warning.sink
        linkMerge = warning.linkMerge
        if sink.get("secondaryFiles") and sorted(
                sink.get("secondaryFiles", [])) != sorted(src.get("secondaryFiles", [])):
            msg1 = "Sink '%s'" % (shortname(sink["id"]))
            msg2 = SourceLine(sink.get("_tool_entry", sink), "secondaryFiles").makeError(
                "expects secondaryFiles: %s but" % (sink.get("secondaryFiles")))
            if "secondaryFiles" in src:
                msg3 = SourceLine(src, "secondaryFiles").makeError(
                    "source '%s' has secondaryFiles %s." % (shortname(src["id"]), src.get("secondaryFiles")))
            else:
                msg3 = SourceLine(src, "id").makeError(
                    "source '%s' does not include secondaryFiles." % (shortname(src["id"])))
            msg4 = SourceLine(src, "id").makeError("To fix, add secondaryFiles: %s to definition of '%s'." % (sink.get("secondaryFiles"), shortname(src["id"])))
            msg = SourceLine(sink).makeError("%s\n%s" % (msg1, bullets([msg2, msg3, msg4], "  ")))
        elif sink.get("not_connected"):
            msg = SourceLine(sink, "type").makeError(
                "'%s' is not an input parameter of %s, expected %s"
                % (shortname(sink["id"]), param_to_step[sink["id"]]["run"],
                   ", ".join(shortname(s["id"])
                             for s in param_to_step[sink["id"]]["inputs"]
                             if not s.get("not_connected"))))
        else:
            msg = SourceLine(src, "type").makeError(
                "Source '%s' of type %s may be incompatible"
                % (shortname(src["id"]), json_dumps(src["type"]))) + "\n" + \
                SourceLine(sink, "type").makeError(
                    "  with sink '%s' of type %s"
                    % (shortname(sink["id"]), json_dumps(sink["type"])))
            if linkMerge:
                msg += "\n" + SourceLine(sink).makeError("  source has linkMerge method %s" % linkMerge)

        warning_msgs.append(msg)
    for exception in exceptions:
        src = exception.src
        sink = exception.sink
        linkMerge = exception.linkMerge
        msg = SourceLine(src, "type").makeError(
            "Source '%s' of type %s is incompatible"
            % (shortname(src["id"]), json_dumps(src["type"]))) + "\n" + \
            SourceLine(sink, "type").makeError(
                "  with sink '%s' of type %s"
                % (shortname(sink["id"]), json_dumps(sink["type"])))
        if linkMerge:
            msg += "\n" + SourceLine(sink).makeError("  source has linkMerge method %s" % linkMerge)
        exception_msgs.append(msg)

    for sink in step_inputs:
        if ('null' != sink["type"] and 'null' not in sink["type"]
                and "source" not in sink and "default" not in sink and "valueFrom" not in sink):
            msg = SourceLine(sink).makeError(
                "Required parameter '%s' does not have source, default, or valueFrom expression"
                % shortname(sink["id"]))
            exception_msgs.append(msg)

    all_warning_msg = strip_dup_lineno("\n".join(warning_msgs))
    all_exception_msg = strip_dup_lineno("\n".join(exception_msgs))

    if warnings:
        _logger.warning("Workflow checker warning:\n%s", all_warning_msg)
    if exceptions:
        raise validate.ValidationException(all_exception_msg)