Ejemplo n.º 1
0
def execute_workflow_step(workflow,
                          task_id,
                          job_data,
                          cwl_args=None,
                          executor=None):
    """
    Constructs and executes single step workflow based on the "workflow"
    and "task_id". "cwl_args" can be used to update default parameters
    used for loading and runtime contexts. Exports json file with the
    execution results.
    """

    cwl_args = {} if cwl_args is None else cwl_args
    executor = SingleJobExecutor() if executor is None else executor

    step_tmp_folder, step_cache_folder, step_outputs_folder, step_report = get_temp_folders(
        task_id=task_id, job_data=job_data)

    default_cwl_args = get_default_cwl_args(cwl_args)

    default_cwl_args.update({  # add execution specific parameters
        "tmp_outdir_prefix": step_cache_folder + "/",
        "tmpdir_prefix": step_cache_folder + "/",
        "cidfile_dir": step_tmp_folder,
        "cidfile_prefix": task_id,
        "basedir": os.getcwd(
        ),  # job should already have abs path for inputs, so this is useless
        "outdir": step_outputs_folder
    })

    workflow_step_path = os.path.join(step_tmp_folder,
                                      task_id + "_step_workflow.cwl")

    fast_cwl_step_load(  # will save new worlflow to "workflow_step_path"
        workflow=workflow,
        target_id=task_id,
        cwl_args=default_cwl_args,
        location=workflow_step_path)

    _stderr = sys.stderr  # to trick the logger
    sys.stderr = sys.__stderr__
    step_outputs, step_status = executor(
        slow_cwl_load(workflow=workflow_step_path, cwl_args=default_cwl_args),
        job_data, RuntimeContext(default_cwl_args))
    sys.stderr = _stderr

    if step_status != "success":
        raise ValueError

    # To remove "http://commonwl.org/cwltool#generation": 0 (copied from cwltool)
    visit_class(step_outputs, ("File", ), MutationManager().unset_generation)

    dump_json(step_outputs, step_report)

    return step_outputs, step_report
Ejemplo n.º 2
0
    def execute(self, context):

        post_status(context)

        self.cwlwf, it_is_workflow = load_cwl(
            self.dag.default_args["cwl_workflow"], self.dag.default_args)
        self.cwl_step = [
            step for step in self.cwlwf.steps
            if self.task_id == step.id.split("#")[-1]
        ][0] if it_is_workflow else self.cwlwf

        _logger.info('{0}: Running!'.format(self.task_id))

        upstream_task_ids = [t.task_id for t in self.upstream_list] + \
                            ([self.reader_task_id] if self.reader_task_id else [])
        _logger.debug('{0}: Collecting outputs from: \n{1}'.format(
            self.task_id, json.dumps(upstream_task_ids, indent=4)))

        upstream_data = self.xcom_pull(context=context,
                                       task_ids=upstream_task_ids)
        _logger.info('{0}: Upstream data: \n {1}'.format(
            self.task_id, json.dumps(upstream_data, indent=4)))

        promises = {}
        for data in upstream_data:  # upstream_data is an array with { promises and outdir }
            promises = merge(promises, data["promises"])
            if "outdir" in data:
                self.outdir = data["outdir"]

        _d_args = self.dag.default_args

        if not self.outdir:
            self.outdir = _d_args['tmp_folder']

        _logger.debug('{0}: Step inputs: {1}'.format(
            self.task_id, json.dumps(self.cwl_step.tool["inputs"], indent=4)))

        _logger.debug('{0}: Step outputs: {1}'.format(
            self.task_id, json.dumps(self.cwl_step.tool["outputs"], indent=4)))

        jobobj = {}

        for inp in self.cwl_step.tool["inputs"]:
            jobobj_id = shortname(inp["id"]).split("/")[-1]
            source_ids = []
            promises_outputs = []
            try:
                source_field = inp["source"] if it_is_workflow else inp.get(
                    "id")
                source_ids = [shortname(s)
                              for s in source_field] if isinstance(
                                  source_field,
                                  list) else [shortname(source_field)]
                promises_outputs = [
                    promises[source_id] for source_id in source_ids
                    if source_id in promises
                ]
            except:
                _logger.warning(
                    "{0}: Couldn't find source field in step input: {1}".
                    format(self.task_id, json.dumps(inp, indent=4)))

            _logger.info(
                '{0}: For input {1} with source_ids: {2} found upstream outputs: \n{3}'
                .format(self.task_id, jobobj_id, source_ids, promises_outputs))

            if len(promises_outputs) > 1:
                if inp.get("linkMerge", "merge_nested") == "merge_flattened":
                    jobobj[jobobj_id] = flatten(promises_outputs)
                else:
                    jobobj[jobobj_id] = promises_outputs
            # Should also check if [None], because in this case we need to take default value
            elif len(promises_outputs) == 1 and (promises_outputs[0]
                                                 is not None):
                jobobj[jobobj_id] = promises_outputs[0]
            elif "valueFrom" in inp:
                jobobj[jobobj_id] = None
            elif "default" in inp:
                d = copy.copy(inp["default"])
                jobobj[jobobj_id] = d
            else:
                continue

        _logger.debug('{0}: Collected job object: \n {1}'.format(
            self.task_id, json.dumps(jobobj, indent=4)))

        def _post_scatter_eval(shortio, cwl_step):
            _value_from = {
                shortname(i["id"]).split("/")[-1]: i["valueFrom"]
                for i in cwl_step.tool["inputs"] if "valueFrom" in i
            }
            _logger.debug('{0}: Step inputs with valueFrom: \n{1}'.format(
                self.task_id, json.dumps(_value_from, indent=4)))

            def value_from_func(k, v):
                if k in _value_from:
                    return expression.do_eval(_value_from[k],
                                              shortio,
                                              self.cwlwf.tool.get(
                                                  "requirements", []),
                                              None,
                                              None, {},
                                              context=v)
                else:
                    return v

            return {k: value_from_func(k, v) for k, v in shortio.items()}

        job = _post_scatter_eval(jobobj, self.cwl_step)
        _logger.info('{0}: Final job data: \n {1}'.format(
            self.task_id, json.dumps(job, indent=4)))

        _d_args['outdir'] = tempfile.mkdtemp(
            prefix=os.path.join(self.outdir, "step_tmp"))
        _d_args['tmpdir_prefix'] = os.path.join(_d_args['outdir'], 'cwl_tmp_')
        _d_args['tmp_outdir_prefix'] = os.path.join(_d_args['outdir'],
                                                    'cwl_outdir_')

        _d_args["record_container_id"] = True
        _d_args["cidfile_dir"] = _d_args['outdir']
        _d_args["cidfile_prefix"] = self.task_id

        _logger.debug('{0}: Runtime context: \n {1}'.format(self, _d_args))

        executor = SingleJobExecutor()
        runtimeContext = RuntimeContext(_d_args)
        runtimeContext.make_fs_access = getdefault(
            runtimeContext.make_fs_access, StdFsAccess)

        for inp in self.cwl_step.tool["inputs"]:
            if inp.get("not_connected"):
                del job[shortname(inp["id"].split("/")[-1])]

        _stderr = sys.stderr
        sys.stderr = sys.__stderr__
        (output, status) = executor(
            self.cwl_step.embedded_tool if it_is_workflow else self.cwl_step,
            job,
            runtimeContext,
            logger=_logger)
        sys.stderr = _stderr

        if not output and status == "permanentFail":
            raise ValueError

        _logger.debug('{0}: Embedded tool outputs: \n {1}'.format(
            self.task_id, json.dumps(output, indent=4)))

        promises = {}

        for out in self.cwl_step.tool["outputs"]:

            out_id = shortname(out["id"])
            jobout_id = out_id.split("/")[-1]
            try:
                promises[out_id] = output[jobout_id]
            except:
                continue

        # Unsetting the Generation from final output object
        visit_class(promises, ("File", ), MutationManager().unset_generation)

        data = {"promises": promises, "outdir": self.outdir}

        _logger.info('{0}: Output: \n {1}'.format(self.task_id,
                                                  json.dumps(data, indent=4)))

        return data
Ejemplo n.º 3
0
def main(args=None):
    if args is None:
        parser = argparse.ArgumentParser(
            prog="C2WL-Rocket",
            description=
            'Customizable CWL Rocket - A highly flexible CWL execution engine.'
        )

        subparser = parser.add_subparsers(help="CWLab sub-commands",
                                          dest='subcommand')

        ## subcommand launch:
        parser_launch = subparser.add_parser(
            "launch",
            help="Start execution of a CWL workflow given run input parameter."
        )
        parser_launch.add_argument("--debug",
                                   action="store_true",
                                   help="Print debugging level messages.")

        parser_launch.add_argument(
            '-p',
            '--exec-profile',
            help="""Specify an exec profile.
                    Please specify the name to a python module and
                    a contained exec profile class sperated by \":\" 
                    (e.g. the default \"c2wl_rocket.exec_profile:LocalToolExec\").
                    Alternatively you can specify the full path to a python file
                    containing an exec profile class 
                    (e.g. \"/path/to/my/exec_profiles.py:CustomExec\").
                """,
            default="c2wl_rocket.exec_profile:LocalToolExec")

        parser_launch.add_argument('cwl_document',
                                   help="Provide a CWL workflow or tool.")

        parser_launch.add_argument(
            'input_params',
            nargs=argparse.REMAINDER,
            help="Provide input parameters in YAML or JSON format.")

        parser_launch.add_argument(
            "--outdir",
            type=typing_extensions.Text,
            help="Output directory, default current directory",
            default=os.path.abspath('.'))

        exgroup = parser_launch.add_mutually_exclusive_group()
        exgroup.add_argument(
            "--tmp-outdir-prefix",
            type=typing_extensions.Text,
            help="Path prefix for intermediate output directories",
            default=cwltool.utils.DEFAULT_TMP_PREFIX)

        exgroup.add_argument(
            "--cachedir",
            type=typing_extensions.Text,
            help=
            "Directory to cache intermediate workflow outputs to avoid recomputing steps.",
            default="")

        exgroup = parser_launch.add_mutually_exclusive_group()
        exgroup.add_argument(
            "--move-outputs",
            action="store_const",
            const="move",
            default="move",
            help="Move output files to the workflow output directory and delete "
            "intermediate output directories (default).",
            dest="move_outputs")

        exgroup.add_argument(
            "--leave-outputs",
            action="store_const",
            const="leave",
            default="move",
            help="Leave output files in intermediate output directories.",
            dest="move_outputs")

        exgroup.add_argument("--copy-outputs",
                             action="store_const",
                             const="copy",
                             default="move",
                             help="""
                Copy output files to the workflow output directory, 
                don't delete intermediate output directories.
            """,
                             dest="move_outputs")

        # subcommand start_worker:
        parser_start_worker = subparser.add_parser(
            "start_worker", help="Start a worker service instance.")
        parser_start_worker.add_argument("-H",
                                         "--web_server_host",
                                         type=typing_extensions.Text,
                                         help="""
                IP of webserver host. 
                Specify \"0.0.0.0\" for remote availablity within
                the current network.
            """,
                                         default="localhost")
        parser_start_worker.add_argument("-P",
                                         "--web_server_port",
                                         type=typing_extensions.Text,
                                         help="""
                Port of webserver.
            """,
                                         default="5000")

        args = parser.parse_args()

    if args.subcommand == "launch":
        if isinstance(args.exec_profile, str):
            exec_profile_invalid_message = error_message("main",
                                                         """
                    The specified exec profile is invalid.
                    Please either specify a class inheriting from 
                    ExecProfileBase at c2wl_rocket.execprofile or
                    if using the commandline specify the name or path
                    to a module that containes such a class.
                    Please see the commandline help for details.
                """,
                                                         is_known=True)

            assert ":" in args.exec_profile, \
                exec_profile_invalid_message
            exec_profile_module_name = args.exec_profile.split(":")[0]
            exec_profile_class_name = args.exec_profile.split(":")[1]

            try:
                exec_profile_module = importlib.import_module(
                    exec_profile_module_name)
            except:
                try:
                    spec = importlib.util.spec_from_file_location(
                        "exec_profile_module", exec_profile_module_name)
                    exec_profile_module = importlib.util.module_from_spec(spec)
                    spec.loader.exec_module(exec_profile_module)
                except:
                    raise AssertionError(
                        error_message("main",
                                      """
                                The specified exec profile module \"{exec_profile_module_name}\"
                                could not be imported.
                            """,
                                      is_known=True))

            assert hasattr(exec_profile_module, exec_profile_class_name), \
                error_message(
                    "main",
                    f"""
                        The specified exec profile module \"{exec_profile_module_name}\"
                        has no class \"{exec_profile_class_name}\".
                    """,
                    is_known=True
                )
            args.exec_profile = getattr(exec_profile_module,
                                        exec_profile_class_name)


        assert isclass(args.exec_profile) and issubclass(args.exec_profile, ExecProfileBase), \
                error_message(
                    "main",
                    """
                        The specified exec profile class does not inherit
                        from ExecProfileBase at c2wl_rocket.execprofile.
                    """,
                    is_known=True
                )

        cwltool_args = copy(cwltool_default_args)
        cwltool_args.workflow = args.cwl_document
        cwltool_args.job_order = args.input_params
        cwltool_args.outdir = args.outdir
        cwltool_args.tmp_outdir_prefix = args.tmp_outdir_prefix
        cwltool_args.cachedir = args.cachedir
        cwltool_args.move_outputs = args.move_outputs
        cwltool_args.debug = args.debug

        loading_context = cwltool.main.LoadingContext(vars(cwltool_args))
        with open(args.cwl_document, mode="r") as cwl:
            cwl_content = yaml.load(cwl)
        assert "cwlVersion" in cwl_content.keys(), error_message(
            "main",
            "No cwlVersion as specified in the CWL document.",
            is_known=True)
        workflow_metadata = {"cwlVersion": cwl_content["cwlVersion"]}
        loading_context.construct_tool_object = functools.partial(
            make_custom_tool,
            exec_profile_class=args.exec_profile,
            workflow_metadata=workflow_metadata)
        runtime_context = cwltool.main.RuntimeContext(vars(cwltool_args))
        job_executor = MultithreadedJobExecutor() if cwltool_args.parallel \
            else SingleJobExecutor()
        job_executor.max_ram = job_executor.max_cores = float("inf")

        # hand arguments over to main exec function:
        cwltool.main.main(args=cwltool_args,
                          executor=job_executor,
                          loadingContext=loading_context,
                          runtimeContext=runtime_context)

    elif args.subcommand == "start_worker":
        worker.start(web_server_host=args.web_server_host,
                     web_server_port=int(args.web_server_port))
Ejemplo n.º 4
0
def main(args=None):
    """Main entrypoint for cwl-tes."""
    if args is None:
        args = sys.argv[1:]

    parser = arg_parser()
    parsed_args = parser.parse_args(args)

    if parsed_args.version:
        print(versionstring())
        return 0

    if parsed_args.tes is None:
        print(versionstring())
        parser.print_usage()
        print("cwl-tes: error: argument --tes is required")
        return 1

    if parsed_args.token:
        try:
            token_public_key = parsed_args.token_public_key
            if not token_public_key:
                header = jwt.get_unverified_header(parsed_args.token)
                if 'kid' in header:
                    token_public_key = load_public_key(header.get('kid'))
                else:
                    raise Exception("Invalid token: has no kid in header.")

            jwt.decode(
                parsed_args.token,
                token_public_key.encode('utf-8').decode('unicode_escape'),
                algorithms=['RS256'])
        except Exception:
            raise Exception('Token is not valid')

    if parsed_args.quiet:
        log.setLevel(logging.WARN)
    if parsed_args.debug:
        log.setLevel(logging.DEBUG)

    def signal_handler(*args):  # pylint: disable=unused-argument
        """setup signal handler"""
        log.info("recieved control-c signal")
        log.info("terminating thread(s)...")
        log.warning("remote TES task(s) will keep running")
        sys.exit(1)

    signal.signal(signal.SIGINT, signal_handler)

    ftp_cache = {}

    class CachingFtpFsAccess(FtpFsAccess):
        """Ensures that the FTP connection cache is shared."""
        def __init__(self, basedir, insecure=False):
            super(CachingFtpFsAccess, self).__init__(basedir,
                                                     ftp_cache,
                                                     insecure=insecure)

    ftp_fs_access = CachingFtpFsAccess(os.curdir,
                                       insecure=parsed_args.insecure)
    if parsed_args.remote_storage_url:
        parsed_args.remote_storage_url = ftp_fs_access.join(
            parsed_args.remote_storage_url, str(uuid.uuid4()))
    loading_context = cwltool.main.LoadingContext(vars(parsed_args))
    loading_context.construct_tool_object = functools.partial(
        make_tes_tool,
        url=parsed_args.tes,
        remote_storage_url=parsed_args.remote_storage_url,
        token=parsed_args.token)
    runtime_context = cwltool.main.RuntimeContext(vars(parsed_args))
    runtime_context.make_fs_access = functools.partial(
        CachingFtpFsAccess, insecure=parsed_args.insecure)
    runtime_context.path_mapper = functools.partial(TESPathMapper,
                                                    fs_access=ftp_fs_access)
    job_executor = MultithreadedJobExecutor() if parsed_args.parallel \
        else SingleJobExecutor()
    job_executor.max_ram = job_executor.max_cores = float("inf")
    executor = functools.partial(
        tes_execute,
        job_executor=job_executor,
        loading_context=loading_context,
        remote_storage_url=parsed_args.remote_storage_url,
        ftp_access=ftp_fs_access)
    return cwltool.main.main(args=parsed_args,
                             executor=executor,
                             loadingContext=loading_context,
                             runtimeContext=runtime_context,
                             versionfunc=versionstring,
                             logger_handler=console)
Ejemplo n.º 5
0
def run_native(config_object: 'ConfigBase', workflow: str, run_directory: str = '.', verbosity="normal") -> int:
    """Executes the workflow using native Python rather than subprocess "command line"

    Args:
        config_object: a constructed ConfigBase-derived object
        workflow: the path to the workflow to be executed
        run_directory: the destination folder for workflow output subdirectories (default: CWD)
        parallel: process libraries in parallel where possible
        verbosity: controls the depth of information written to terminal by cwltool

    Returns: None

    """

    def furnish_if_file_record(file_dict):
        if isinstance(file_dict, dict) and file_dict.get('class', None) == 'File':
            file_dict['basename'] = os.path.basename(file_dict['path'])
            file_dict['location'] = file_dict['path']
            file_dict['contents'] = None

    # Upgrade file entries in Run Config with extra descriptors cwltool expects
    for _, config_param in config_object.config.items():
        if isinstance(config_param, list):
            for config_dict in config_param:
                furnish_if_file_record(config_dict)
        else:
            furnish_if_file_record(config_param)

    # Set overall config for cwltool
    runtime_context = RuntimeContext({
        'secret_store': cwltool.secrets.SecretStore(),
        'outdir': run_directory,
        'on_error': "continue",
        'js_console': verbosity == "debug",
        'debug': verbosity == "debug"
    })

    # Set proper temp directory for Mac users
    if sys.platform == "darwin":
        default_mac_path = "/private/tmp/docker_tmp"
        if runtime_context.tmp_outdir_prefix == DEFAULT_TMP_PREFIX:
            runtime_context.tmp_outdir_prefix = default_mac_path
        if runtime_context.tmpdir_prefix == DEFAULT_TMP_PREFIX:
            runtime_context.tmpdir_prefix = default_mac_path

    # Enable rich terminal output (timestamp, color, formatting)
    logger = logging.getLogger("cwltool")
    logger.handlers.clear()  # executors.py loads a default handler; outputs are printed twice if we don't clear it
    level = 'DEBUG' if verbosity == 'debug' else 'WARN' if verbosity == "quiet" else "INFO"
    coloredlogs.install(logger=logger, stream=sys.stderr, fmt="[%(asctime)s] %(levelname)s %(message)s",
                        datefmt="%Y-%m-%d %H:%M:%S", level=level, isatty=True)

    # Create a wrapper for the executors so that we may pass our logger to them (unsupported by Factory)
    parallel: MultithreadedJobExecutor = functools.partial(MultithreadedJobExecutor(), logger=logger)
    serial: SingleJobExecutor = functools.partial(SingleJobExecutor(), logger=logger)

    # Instantiate Factory with our run preferences
    cwl = cwltool.factory.Factory(
        runtime_context=runtime_context,
        loading_context=LoadingContext({'relax_path_checks': True}),
        executor=parallel if parallel else serial
    )

    try:
        # Load the workflow document and execute
        pipeline = cwl.make(workflow)
        pipeline(**config_object.config)
    except cwltool.factory.WorkflowStatus:
        # For now, return non-zero if workflow did not complete
        return 1

    return 0
Ejemplo n.º 6
0
def execute_workflow_step(workflow,
                          task_id,
                          job_data,
                          cwl_args=None,
                          executor=None):
    """
    Constructs and executes single step workflow based on the "workflow"
    and "task_id". "cwl_args" can be used to update default parameters
    used for loading and runtime contexts. Exports json file with the
    execution results. If the step was evaluated as the one that need to
    be skipped, the output "skipped" will set to True and the step_report
    file will include "nulls". This function doesn't remove any temporary
    data in both success and failure scenarios.
    """

    cwl_args = {} if cwl_args is None else cwl_args
    executor = SingleJobExecutor() if executor is None else executor

    step_tmp_folder, step_cache_folder, step_outputs_folder, step_report = get_temp_folders(
        task_id=task_id, job_data=job_data)

    default_cwl_args = get_default_cwl_args(cwl_args)

    default_cwl_args.update({  # add execution specific parameters
        "tmp_outdir_prefix": step_cache_folder + "/",
        "tmpdir_prefix": step_cache_folder + "/",
        "cidfile_dir": step_tmp_folder,
        "cidfile_prefix": task_id,
        "basedir": os.getcwd(
        ),  # job should already have abs path for inputs, so this is useless
        "outdir": step_outputs_folder
    })

    workflow_step_path = os.path.join(step_tmp_folder,
                                      task_id + "_step_workflow.cwl")

    fast_cwl_step_load(  # will save new worlflow to "workflow_step_path"
        workflow=workflow,
        target_id=task_id,
        cwl_args=default_cwl_args,
        location=workflow_step_path)

    workflow_data = slow_cwl_load(workflow=workflow_step_path,
                                  cwl_args=default_cwl_args)

    skipped = True
    step_outputs = {
        output_id: None
        for output_id, _ in get_items(workflow_data.tool["outputs"])
    }
    if need_to_run(workflow_data, job_data, task_id):
        skipped = False
        _stderr = sys.stderr  # to trick the logger
        sys.stderr = sys.__stderr__
        step_outputs, step_status = executor(workflow_data, job_data,
                                             RuntimeContext(default_cwl_args))
        sys.stderr = _stderr

        if step_status != "success":
            raise ValueError("Failed to run workflow step")

        # To remove "http://commonwl.org/cwltool#generation": 0 (copied from cwltool)
        visit_class(step_outputs, ("File", ),
                    MutationManager().unset_generation)

    dump_json(step_outputs, step_report)

    return step_outputs, step_report, skipped