Ejemplo n.º 1
0
def main():
    parser = arg_parser()
    parsed_args = parser.parse_args(sys.argv[1:])

    # Load the requested parsl configuration
    if parsed_args.parsl == 'cori':
        parsl.load(cori_regular_config)
    elif parsed_args.parsl == 'cori-debug':
        parsl.load(cori_debug_config)
    else:
        parsl.load(threads_config)

    # Trigger the argparse message if the cwl file is missing
    # Otherwise cwltool will use the default argparser
    if not parsed_args.workflow:
        if os.path.isfile("CWLFile"):
            setattr(parsed_args, "workflow", "CWLFile")
        else:
            _logger.error("")
            _logger.error("CWL document required, no input file was provided")
            parser.print_help()
            sys.exit(1)
    elif not parsed_args.basedir:
        _logger.error("")
        _logger.error("Basedir is required for storing itermediate results")
        parser.print_help()
        sys.exit(1)

    rc = RuntimeContext(vars(parsed_args))
    rc.shifter = False
    parsed_args.__dict__['parallel'] = True

    rc.tmpdir_prefix = rc.basedir + '/tmp/tmp'
    rc.tmp_outdir_prefix = rc.basedir + '/out/out'  # type: Text
    if parsed_args.shifter:
        rc.shifter = True
        rc.docker_outdir = '/spooldir'
        rc.docker_stagedir = rc.basedir + '/stage'
        rc.docker_tmpdir = '/tmpdir'

    lc = LoadingContext(vars(parsed_args))
    lc.construct_tool_object = customMakeTool

    sys.exit(
        cwltool.main.main(args=parsed_args,
                          loadingContext=lc,
                          runtimeContext=rc))
Ejemplo n.º 2
0
def run_native(config_object: 'ConfigBase', workflow: str, run_directory: str = '.', verbosity="normal") -> int:
    """Executes the workflow using native Python rather than subprocess "command line"

    Args:
        config_object: a constructed ConfigBase-derived object
        workflow: the path to the workflow to be executed
        run_directory: the destination folder for workflow output subdirectories (default: CWD)
        parallel: process libraries in parallel where possible
        verbosity: controls the depth of information written to terminal by cwltool

    Returns: None

    """

    def furnish_if_file_record(file_dict):
        if isinstance(file_dict, dict) and file_dict.get('class', None) == 'File':
            file_dict['basename'] = os.path.basename(file_dict['path'])
            file_dict['location'] = file_dict['path']
            file_dict['contents'] = None

    # Upgrade file entries in Run Config with extra descriptors cwltool expects
    for _, config_param in config_object.config.items():
        if isinstance(config_param, list):
            for config_dict in config_param:
                furnish_if_file_record(config_dict)
        else:
            furnish_if_file_record(config_param)

    # Set overall config for cwltool
    runtime_context = RuntimeContext({
        'secret_store': cwltool.secrets.SecretStore(),
        'outdir': run_directory,
        'on_error': "continue",
        'js_console': verbosity == "debug",
        'debug': verbosity == "debug"
    })

    # Set proper temp directory for Mac users
    if sys.platform == "darwin":
        default_mac_path = "/private/tmp/docker_tmp"
        if runtime_context.tmp_outdir_prefix == DEFAULT_TMP_PREFIX:
            runtime_context.tmp_outdir_prefix = default_mac_path
        if runtime_context.tmpdir_prefix == DEFAULT_TMP_PREFIX:
            runtime_context.tmpdir_prefix = default_mac_path

    # Enable rich terminal output (timestamp, color, formatting)
    logger = logging.getLogger("cwltool")
    logger.handlers.clear()  # executors.py loads a default handler; outputs are printed twice if we don't clear it
    level = 'DEBUG' if verbosity == 'debug' else 'WARN' if verbosity == "quiet" else "INFO"
    coloredlogs.install(logger=logger, stream=sys.stderr, fmt="[%(asctime)s] %(levelname)s %(message)s",
                        datefmt="%Y-%m-%d %H:%M:%S", level=level, isatty=True)

    # Create a wrapper for the executors so that we may pass our logger to them (unsupported by Factory)
    parallel: MultithreadedJobExecutor = functools.partial(MultithreadedJobExecutor(), logger=logger)
    serial: SingleJobExecutor = functools.partial(SingleJobExecutor(), logger=logger)

    # Instantiate Factory with our run preferences
    cwl = cwltool.factory.Factory(
        runtime_context=runtime_context,
        loading_context=LoadingContext({'relax_path_checks': True}),
        executor=parallel if parallel else serial
    )

    try:
        # Load the workflow document and execute
        pipeline = cwl.make(workflow)
        pipeline(**config_object.config)
    except cwltool.factory.WorkflowStatus:
        # For now, return non-zero if workflow did not complete
        return 1

    return 0
Ejemplo n.º 3
0
    def cwlmain(
        self,
        argsl=None,  # type: List[str]
        args=None,  # type: argparse.Namespace
        job_order_object=None,  # type: MutableMapping[Text, Any]
        stdin=sys.stdin,  # type: IO[Any]
        stdout=None,  # type: Union[TextIO, codecs.StreamWriter]
        stderr=sys.stderr,  # type: IO[Any]
        versionfunc=versionstring,  # type: Callable[[], Text]
        logger_handler=None,  #
        custom_schema_callback=None,  # type: Callable[[], None]
        executor=None,  # type: Callable[..., Tuple[Dict[Text, Any], Text]]
        loadingContext=None,  # type: LoadingContext
        runtimeContext=None  # type: RuntimeContext
    ):  # type: (...) -> int

        if not stdout:
            stdout = codecs.getwriter('utf-8')(sys.stdout)
        _logger.removeHandler(defaultStreamHandler)
        if logger_handler:
            stderr_handler = logger_handler
        else:
            stderr_handler = logging.StreamHandler(stderr)
        _logger.addHandler(stderr_handler)
        try:
            if args is None:
                args = arg_parser().parse_args(argsl)
                if args.workflow and "--outdir" not in argsl:
                    outputPath = args.workflow.split('/')[-1].split('.')[0]
                    setattr(
                        args, "outdir",
                        os.getcwd() + "/" + outputPath + "/" +
                        datetime.datetime.now().strftime('%Y-%m-%d-%H%M'))
            if runtimeContext is None:
                runtimeContext = RuntimeContext(vars(args))
            else:
                runtimeContext = runtimeContext.copy()

            rdflib_logger = logging.getLogger("rdflib.term")
            rdflib_logger.addHandler(stderr_handler)
            rdflib_logger.setLevel(logging.ERROR)
            if args.quiet:
                _logger.setLevel(logging.WARN)
            if runtimeContext.debug:
                _logger.setLevel(logging.DEBUG)
                rdflib_logger.setLevel(logging.DEBUG)
            if args.timestamps:
                formatter = logging.Formatter("[%(asctime)s] %(message)s",
                                              "%Y-%m-%d %H:%M:%S")
                stderr_handler.setFormatter(formatter)
            # version
            if args.version:
                return versionfunc(), 0
            else:
                _logger.info(versionfunc())

            if args.print_supported_versions:
                return "\n".join(supportedCWLversions(args.enable_dev)), 0

            if not args.workflow:
                if os.path.isfile("CWLFile"):
                    setattr(args, "workflow", "CWLFile")
                else:
                    _logger.error("")
                    _logger.error(
                        "CWL document required, no input file was provided")
                    arg_parser().print_help()
                    return "CWL document required, no input file was provided", 1
            if args.relax_path_checks:
                command_line_tool.ACCEPTLIST_RE = command_line_tool.ACCEPTLIST_EN_RELAXED_RE

            if args.ga4gh_tool_registries:
                ga4gh_tool_registries[:] = args.ga4gh_tool_registries
            if not args.enable_ga4gh_tool_registry:
                del ga4gh_tool_registries[:]

            if custom_schema_callback:
                custom_schema_callback()
            elif args.enable_ext:
                res = pkg_resources.resource_stream(__name__, 'extensions.yml')
                use_custom_schema("v1.0", "http://commonwl.org/cwltool",
                                  res.read())
                res.close()
            else:
                use_standard_schema("v1.0")

            if loadingContext is None:
                loadingContext = LoadingContext(vars(args))
            else:
                loadingContext = loadingContext.copy()

            loadingContext.disable_js_validation = \
                args.disable_js_validation or (not args.do_validate)
            loadingContext.construct_tool_object = getdefault(
                loadingContext.construct_tool_object,
                workflow.default_make_tool)
            loadingContext.resolver = getdefault(loadingContext.resolver,
                                                 tool_resolver)
            try:
                uri, tool_file_uri = resolve_tool_uri(
                    args.workflow,
                    resolver=loadingContext.resolver,
                    fetcher_constructor=loadingContext.fetcher_constructor)
            except:
                return "Can't find file " + args.workflow, 0

            try_again_msg = "" if args.debug else ", try again with --debug for more information"

            try:
                job_order_object, input_basedir, jobloader = load_job_order(
                    args, stdin, loadingContext.fetcher_constructor,
                    loadingContext.overrides_list, tool_file_uri)

                if args.overrides:
                    loadingContext.overrides_list.extend(
                        load_overrides(
                            file_uri(os.path.abspath(args.overrides)),
                            tool_file_uri))

                document_loader, workflowobj, uri = fetch_document(
                    uri,
                    resolver=loadingContext.resolver,
                    fetcher_constructor=loadingContext.fetcher_constructor)

                if args.print_deps:
                    # printdeps(workflowobj, document_loader, stdout, args.relative_deps, uri)
                    result = returndeps(workflowobj, document_loader, stdout,
                                        args.relative_deps, uri)
                    return result, 0

                document_loader, avsc_names, processobj, metadata, uri \
                    = validate_document(document_loader, workflowobj, uri,
                                        enable_dev=loadingContext.enable_dev,
                                        strict=loadingContext.strict,
                                        preprocess_only=(args.print_pre or args.pack),
                                        fetcher_constructor=loadingContext.fetcher_constructor,
                                        skip_schemas=args.skip_schemas,
                                        overrides=loadingContext.overrides_list,
                                        do_validate=loadingContext.do_validate)

                if args.print_pre:
                    # stdout.write(json_dumps(processobj, indent=4))
                    return json_dumps(processobj, indent=4), 0

                loadingContext.overrides_list.extend(
                    metadata.get("cwltool:overrides", []))

                tool = make_tool(document_loader, avsc_names, metadata, uri,
                                 loadingContext)
                if args.make_template:
                    yaml.safe_dump(generate_input_template(tool),
                                   sys.stdout,
                                   default_flow_style=False,
                                   indent=4,
                                   block_seq_indent=2)
                    return yaml.safe_dump(generate_input_template(tool),
                                          indent=4), 0

                if args.validate:
                    _logger.info("Tool definition is valid")
                    return "Tool definition is valid", 0

                if args.pack:
                    stdout.write(
                        print_pack(document_loader, processobj, uri, metadata))
                    return print_pack(document_loader, processobj, uri,
                                      metadata), 0

                if args.print_rdf:
                    stdout.write(
                        printrdf(tool, document_loader.ctx,
                                 args.rdf_serializer))
                    return printrdf(tool, document_loader.ctx,
                                    args.rdf_serializer), 0

                if args.print_dot:
                    printdot(tool, document_loader.ctx, stdout)
                    return "args.print_dot still not solved", 0

            except (validate.ValidationException) as exc:
                _logger.error(u"Tool definition failed validation:\n%s",
                              exc,
                              exc_info=args.debug)
                infor = "Tool definition failed validation:\n%s" + exc + args.debug
                return infor, 1
            except (RuntimeError, WorkflowException) as exc:
                _logger.error(u"Tool definition failed initialization:\n%s",
                              exc,
                              exc_info=args.debug)
                infor = "Tool definition failed initialization:\n%s" + exc + args.debug
                return infor, 1
            except Exception as exc:
                _logger.error(
                    u"I'm sorry, I couldn't load this CWL file%s.\nThe error was: %s",
                    try_again_msg,
                    exc if not args.debug else "",
                    exc_info=args.debug)
                return "I'm sorry, I couldn't load this CWL file", 1

            if isinstance(tool, int):
                return tool, 0

            # If on MacOS platform, TMPDIR must be set to be under one of the
            # shared volumes in Docker for Mac
            # More info: https://dockstore.org/docs/faq
            if sys.platform == "darwin":
                default_mac_path = "/private/tmp/docker_tmp"
                if runtimeContext.tmp_outdir_prefix == DEFAULT_TMP_PREFIX:
                    runtimeContext.tmp_outdir_prefix = default_mac_path

            for dirprefix in ("tmpdir_prefix", "tmp_outdir_prefix",
                              "cachedir"):
                if getattr(runtimeContext, dirprefix) and getattr(
                        runtimeContext, dirprefix) != DEFAULT_TMP_PREFIX:
                    sl = "/" if getattr(runtimeContext, dirprefix).endswith("/") or dirprefix == "cachedir" \
                        else ""
                    setattr(
                        runtimeContext, dirprefix,
                        os.path.abspath(getattr(runtimeContext, dirprefix)) +
                        sl)
                    if not os.path.exists(
                            os.path.dirname(getattr(runtimeContext,
                                                    dirprefix))):
                        try:
                            os.makedirs(
                                os.path.dirname(
                                    getattr(runtimeContext, dirprefix)))
                        except Exception as e:
                            _logger.error("Failed to create directory: %s", e)
                            infor = "Failed to create directory: %s" + e + ""
                            return infor, 1

            if args.cachedir:
                if args.move_outputs == "move":
                    runtimeContext.move_outputs = "copy"
                runtimeContext.tmp_outdir_prefix = args.cachedir

            runtimeContext.secret_store = getdefault(
                runtimeContext.secret_store, SecretStore())

            try:
                initialized_job_order_object = init_job_order(
                    job_order_object,
                    args,
                    tool,
                    jobloader,
                    stdout,
                    print_input_deps=args.print_input_deps,
                    relative_deps=args.relative_deps,
                    input_basedir=input_basedir,
                    secret_store=runtimeContext.secret_store)
            except SystemExit as err:
                return err.code
            if not executor:
                if args.parallel:
                    executor = MultithreadedJobExecutor()
                else:
                    executor = SingleJobExecutor()
            assert executor is not None

            if isinstance(initialized_job_order_object, int):
                return initialized_job_order_object

            try:
                runtimeContext.basedir = input_basedir
                del args.workflow
                del args.job_order

                conf_file = getattr(args,
                                    "beta_dependency_resolvers_configuration",
                                    None)  # Text
                use_conda_dependencies = getattr(args,
                                                 "beta_conda_dependencies",
                                                 None)  # Text

                job_script_provider = None  # type: Optional[DependenciesConfiguration]
                if conf_file or use_conda_dependencies:
                    runtimeContext.job_script_provider = DependenciesConfiguration(
                        args)

                runtimeContext.find_default_container = \
                    functools.partial(find_default_container, args)
                runtimeContext.make_fs_access = getdefault(
                    runtimeContext.make_fs_access, StdFsAccess)

                (out, status) = executor(tool,
                                         initialized_job_order_object,
                                         runtimeContext,
                                         logger=_logger)
                # This is the workflow output, it needs to be written
                if out is not None:

                    def loc_to_path(obj):
                        for field in ("path", "nameext", "nameroot",
                                      "dirname"):
                            if field in obj:
                                del obj[field]
                        if obj["location"].startswith("file://"):
                            obj["path"] = uri_file_path(obj["location"])

                    visit_class(out, ("File", "Directory"), loc_to_path)

                    # Unsetting the Generation fron final output object
                    visit_class(out, ("File", ),
                                MutationManager().unset_generation)

                    if isinstance(out, string_types):
                        stdout.write(out)
                    else:
                        stdout.write(
                            json_dumps(
                                out,
                                indent=4,  # type: ignore
                                ensure_ascii=False))
                    stdout.write("\n")
                    if hasattr(stdout, "flush"):
                        stdout.flush()  # type: ignore

                if status != "success":
                    _logger.warning(u"Final process status is %s", status)
                    infor = "Final process status is %s" + status + ""
                    return infor, 1

                _logger.info(u"Final process status is %s", status)
                return out, status

            except (validate.ValidationException) as exc:
                _logger.error(u"Input object failed validation:\n%s",
                              exc,
                              exc_info=args.debug)
                infor = "Input object failed validation:\n%s" + exc + args.debug
                return infor, 1
            except UnsupportedRequirement as exc:
                _logger.error(
                    u"Workflow or tool uses unsupported feature:\n%s",
                    exc,
                    exc_info=args.debug)
                infor = "Workflow or tool uses unsupported feature:\n%s" + exc + args.debug
                return infor, 3
            except WorkflowException as exc:
                _logger.error(u"Workflow error%s:\n%s",
                              try_again_msg,
                              strip_dup_lineno(six.text_type(exc)),
                              exc_info=args.debug)
                infor = "Workflow error%s:\n%s" + try_again_msg + strip_dup_lineno(
                    six.text_type(exc)) + args.debug
                return infor, 1
            except Exception as exc:
                _logger.error(u"Unhandled error%s:\n  %s",
                              try_again_msg,
                              exc,
                              exc_info=args.debug)
                infor = "Unhandled error%s:\n  %s" + try_again_msg + exc + args.debug
                return infor, 1

        finally:
            _logger.removeHandler(stderr_handler)
            _logger.addHandler(defaultStreamHandler)