Esempio n. 1
0
def tes_execute(
        process,  # type: Process
        job_order,  # type: Dict[Text, Any]
        runtime_context,  # type: RuntimeContext
        job_executor,  # type: JobExecutor
        loading_context,  # type: LoadingContext
        remote_storage_url,
        ftp_access,
        logger=log):  # type: (...) -> Tuple[Optional[Dict[Text, Any]], Text]
    """
    Upload to the remote_storage_url (if needed) and execute.

    Adapted from:
    https://github.com/curoverse/arvados/blob/2b0b06579199967eca3d44d955ad64195d2db3c3/sdk/cwl/arvados_cwl/__init__.py#L407
    """
    if remote_storage_url:
        upload_workflow_deps_ftp(process, remote_storage_url, ftp_access)
        # Reload tool object which may have been updated by
        # upload_workflow_deps
        # Don't validate this time because it will just print redundant errors.
        loading_context = loading_context.copy()
        loading_context.loader = process.doc_loader
        loading_context.avsc_names = process.doc_schema
        loading_context.metadata = process.metadata
        loading_context.do_validate = False
        process = loading_context.construct_tool_object(
            process.doc_loader.idx[process.tool["id"]], loading_context)
        job_order = upload_job_order_ftp(process, job_order,
                                         remote_storage_url, ftp_access)

    if not job_executor:
        job_executor = MultithreadedJobExecutor()
    return job_executor(process, job_order, runtime_context, logger)
Esempio n. 2
0
def test_scattered_workflow() -> None:
    test_file = "tests/wf/scatter-wf4.cwl"
    job_file = "tests/wf/scatter-job2.json"
    factory = get_windows_safe_factory(executor=MultithreadedJobExecutor())
    echo = factory.make(get_data(test_file))
    with open(get_data(job_file)) as job:
        assert echo(**json.load(job)) == {"out": ["foo one three", "foo two four"]}
Esempio n. 3
0
 def test_scattered_workflow(self):
     test_file = "tests/wf/scatter-wf4.cwl"
     job_file = "tests/wf/scatter-job2.json"
     f = get_windows_safe_factory(executor=MultithreadedJobExecutor())
     echo = f.make(get_data(test_file))
     with open(get_data(job_file)) as job:
         self.assertEqual(echo(**json.load(job)), {'out': ['foo one three', 'foo two four']})
Esempio n. 4
0
 def test_sequential_workflow(self):
     test_file = "tests/wf/count-lines1-wf.cwl"
     f = cwltool.factory.Factory(executor=MultithreadedJobExecutor())
     echo = f.make(get_data(test_file))
     self.assertEqual(
         echo(file1={
             "class": "File",
             "location": get_data("tests/wf/whale.txt")
         }), {"count_output": 16})
Esempio n. 5
0
def test_scattered_workflow():
    load_tool.loaders = {}
    test_file = "tests/wf/scatter-wf4.cwl"
    job_file = "tests/wf/scatter-job2.json"
    factory = get_windows_safe_factory(executor=MultithreadedJobExecutor())
    echo = factory.make(get_data(test_file))
    with open(get_data(job_file)) as job:
        assert echo(**json.load(job)) == {
            'out': ['foo one three', 'foo two four']
        }
Esempio n. 6
0
def test_sequential_workflow(tmp_path: Path) -> None:
    test_file = "tests/wf/count-lines1-wf.cwl"
    executor = MultithreadedJobExecutor()
    runtime_context = RuntimeContext()
    runtime_context.outdir = str(tmp_path)
    runtime_context.select_resources = executor.select_resources
    factory = get_windows_safe_factory(
        executor=executor, runtime_context=runtime_context
    )
    echo = factory.make(get_data(test_file))
    file_contents = {"class": "File", "location": get_data("tests/wf/whale.txt")}
    assert echo(file1=file_contents) == {"count_output": 16}
 def test_sequential_workflow(self):
     test_file = "tests/wf/count-lines1-wf.cwl"
     executor = MultithreadedJobExecutor()
     runtime_context = RuntimeContext()
     runtime_context.select_resources = executor.select_resources
     factory = get_windows_safe_factory(executor=executor,
                                        runtime_context=runtime_context)
     echo = factory.make(get_data(test_file))
     self.assertEqual(
         echo(file1={
             "class": "File",
             "location": get_data("tests/wf/whale.txt")
         }), {"count_output": 16})
Esempio n. 8
0
def main(args=None):
    if args is None:
        args = sys.argv[1:]

    parser = arg_parser()
    parsed_args = parser.parse_args(args)

    if parsed_args.version:
        print(versionstring())
        return 0

    if parsed_args.tes is None:
        print(versionstring())
        parser.print_usage()
        print("cwl-tes: error: argument --tes is required")
        return 1

    if parsed_args.quiet:
        log.setLevel(logging.WARN)
    if parsed_args.debug:
        log.setLevel(logging.DEBUG)

    # setup signal handler
    def signal_handler(*args):
        log.info("recieved control-c signal")
        log.info("terminating thread(s)...")
        log.warning("remote TES task(s) will keep running")
        sys.exit(1)

    signal.signal(signal.SIGINT, signal_handler)

    loading_context = cwltool.main.LoadingContext(vars(parsed_args))
    loading_context.construct_tool_object = functools.partial(
        make_tes_tool, url=parsed_args.tes)
    return cwltool.main.main(args=parsed_args,
                             executor=MultithreadedJobExecutor(),
                             loadingContext=loading_context,
                             versionfunc=versionstring,
                             logger_handler=console)
Esempio n. 9
0
def main(args=None):
    if args is None:
        parser = argparse.ArgumentParser(
            prog="C2WL-Rocket",
            description=
            'Customizable CWL Rocket - A highly flexible CWL execution engine.'
        )

        subparser = parser.add_subparsers(help="CWLab sub-commands",
                                          dest='subcommand')

        ## subcommand launch:
        parser_launch = subparser.add_parser(
            "launch",
            help="Start execution of a CWL workflow given run input parameter."
        )
        parser_launch.add_argument("--debug",
                                   action="store_true",
                                   help="Print debugging level messages.")

        parser_launch.add_argument(
            '-p',
            '--exec-profile',
            help="""Specify an exec profile.
                    Please specify the name to a python module and
                    a contained exec profile class sperated by \":\" 
                    (e.g. the default \"c2wl_rocket.exec_profile:LocalToolExec\").
                    Alternatively you can specify the full path to a python file
                    containing an exec profile class 
                    (e.g. \"/path/to/my/exec_profiles.py:CustomExec\").
                """,
            default="c2wl_rocket.exec_profile:LocalToolExec")

        parser_launch.add_argument('cwl_document',
                                   help="Provide a CWL workflow or tool.")

        parser_launch.add_argument(
            'input_params',
            nargs=argparse.REMAINDER,
            help="Provide input parameters in YAML or JSON format.")

        parser_launch.add_argument(
            "--outdir",
            type=typing_extensions.Text,
            help="Output directory, default current directory",
            default=os.path.abspath('.'))

        exgroup = parser_launch.add_mutually_exclusive_group()
        exgroup.add_argument(
            "--tmp-outdir-prefix",
            type=typing_extensions.Text,
            help="Path prefix for intermediate output directories",
            default=cwltool.utils.DEFAULT_TMP_PREFIX)

        exgroup.add_argument(
            "--cachedir",
            type=typing_extensions.Text,
            help=
            "Directory to cache intermediate workflow outputs to avoid recomputing steps.",
            default="")

        exgroup = parser_launch.add_mutually_exclusive_group()
        exgroup.add_argument(
            "--move-outputs",
            action="store_const",
            const="move",
            default="move",
            help="Move output files to the workflow output directory and delete "
            "intermediate output directories (default).",
            dest="move_outputs")

        exgroup.add_argument(
            "--leave-outputs",
            action="store_const",
            const="leave",
            default="move",
            help="Leave output files in intermediate output directories.",
            dest="move_outputs")

        exgroup.add_argument("--copy-outputs",
                             action="store_const",
                             const="copy",
                             default="move",
                             help="""
                Copy output files to the workflow output directory, 
                don't delete intermediate output directories.
            """,
                             dest="move_outputs")

        # subcommand start_worker:
        parser_start_worker = subparser.add_parser(
            "start_worker", help="Start a worker service instance.")
        parser_start_worker.add_argument("-H",
                                         "--web_server_host",
                                         type=typing_extensions.Text,
                                         help="""
                IP of webserver host. 
                Specify \"0.0.0.0\" for remote availablity within
                the current network.
            """,
                                         default="localhost")
        parser_start_worker.add_argument("-P",
                                         "--web_server_port",
                                         type=typing_extensions.Text,
                                         help="""
                Port of webserver.
            """,
                                         default="5000")

        args = parser.parse_args()

    if args.subcommand == "launch":
        if isinstance(args.exec_profile, str):
            exec_profile_invalid_message = error_message("main",
                                                         """
                    The specified exec profile is invalid.
                    Please either specify a class inheriting from 
                    ExecProfileBase at c2wl_rocket.execprofile or
                    if using the commandline specify the name or path
                    to a module that containes such a class.
                    Please see the commandline help for details.
                """,
                                                         is_known=True)

            assert ":" in args.exec_profile, \
                exec_profile_invalid_message
            exec_profile_module_name = args.exec_profile.split(":")[0]
            exec_profile_class_name = args.exec_profile.split(":")[1]

            try:
                exec_profile_module = importlib.import_module(
                    exec_profile_module_name)
            except:
                try:
                    spec = importlib.util.spec_from_file_location(
                        "exec_profile_module", exec_profile_module_name)
                    exec_profile_module = importlib.util.module_from_spec(spec)
                    spec.loader.exec_module(exec_profile_module)
                except:
                    raise AssertionError(
                        error_message("main",
                                      """
                                The specified exec profile module \"{exec_profile_module_name}\"
                                could not be imported.
                            """,
                                      is_known=True))

            assert hasattr(exec_profile_module, exec_profile_class_name), \
                error_message(
                    "main",
                    f"""
                        The specified exec profile module \"{exec_profile_module_name}\"
                        has no class \"{exec_profile_class_name}\".
                    """,
                    is_known=True
                )
            args.exec_profile = getattr(exec_profile_module,
                                        exec_profile_class_name)


        assert isclass(args.exec_profile) and issubclass(args.exec_profile, ExecProfileBase), \
                error_message(
                    "main",
                    """
                        The specified exec profile class does not inherit
                        from ExecProfileBase at c2wl_rocket.execprofile.
                    """,
                    is_known=True
                )

        cwltool_args = copy(cwltool_default_args)
        cwltool_args.workflow = args.cwl_document
        cwltool_args.job_order = args.input_params
        cwltool_args.outdir = args.outdir
        cwltool_args.tmp_outdir_prefix = args.tmp_outdir_prefix
        cwltool_args.cachedir = args.cachedir
        cwltool_args.move_outputs = args.move_outputs
        cwltool_args.debug = args.debug

        loading_context = cwltool.main.LoadingContext(vars(cwltool_args))
        with open(args.cwl_document, mode="r") as cwl:
            cwl_content = yaml.load(cwl)
        assert "cwlVersion" in cwl_content.keys(), error_message(
            "main",
            "No cwlVersion as specified in the CWL document.",
            is_known=True)
        workflow_metadata = {"cwlVersion": cwl_content["cwlVersion"]}
        loading_context.construct_tool_object = functools.partial(
            make_custom_tool,
            exec_profile_class=args.exec_profile,
            workflow_metadata=workflow_metadata)
        runtime_context = cwltool.main.RuntimeContext(vars(cwltool_args))
        job_executor = MultithreadedJobExecutor() if cwltool_args.parallel \
            else SingleJobExecutor()
        job_executor.max_ram = job_executor.max_cores = float("inf")

        # hand arguments over to main exec function:
        cwltool.main.main(args=cwltool_args,
                          executor=job_executor,
                          loadingContext=loading_context,
                          runtimeContext=runtime_context)

    elif args.subcommand == "start_worker":
        worker.start(web_server_host=args.web_server_host,
                     web_server_port=int(args.web_server_port))
Esempio n. 10
0
def main(args=None):
    """Main entrypoint for cwl-tes."""
    if args is None:
        args = sys.argv[1:]

    parser = arg_parser()
    parsed_args = parser.parse_args(args)

    if parsed_args.version:
        print(versionstring())
        return 0

    if parsed_args.tes is None:
        print(versionstring())
        parser.print_usage()
        print("cwl-tes: error: argument --tes is required")
        return 1

    if parsed_args.token:
        try:
            token_public_key = parsed_args.token_public_key
            if not token_public_key:
                header = jwt.get_unverified_header(parsed_args.token)
                if 'kid' in header:
                    token_public_key = load_public_key(header.get('kid'))
                else:
                    raise Exception("Invalid token: has no kid in header.")

            jwt.decode(
                parsed_args.token,
                token_public_key.encode('utf-8').decode('unicode_escape'),
                algorithms=['RS256'])
        except Exception:
            raise Exception('Token is not valid')

    if parsed_args.quiet:
        log.setLevel(logging.WARN)
    if parsed_args.debug:
        log.setLevel(logging.DEBUG)

    def signal_handler(*args):  # pylint: disable=unused-argument
        """setup signal handler"""
        log.info("recieved control-c signal")
        log.info("terminating thread(s)...")
        log.warning("remote TES task(s) will keep running")
        sys.exit(1)

    signal.signal(signal.SIGINT, signal_handler)

    ftp_cache = {}

    class CachingFtpFsAccess(FtpFsAccess):
        """Ensures that the FTP connection cache is shared."""
        def __init__(self, basedir, insecure=False):
            super(CachingFtpFsAccess, self).__init__(basedir,
                                                     ftp_cache,
                                                     insecure=insecure)

    ftp_fs_access = CachingFtpFsAccess(os.curdir,
                                       insecure=parsed_args.insecure)
    if parsed_args.remote_storage_url:
        parsed_args.remote_storage_url = ftp_fs_access.join(
            parsed_args.remote_storage_url, str(uuid.uuid4()))
    loading_context = cwltool.main.LoadingContext(vars(parsed_args))
    loading_context.construct_tool_object = functools.partial(
        make_tes_tool,
        url=parsed_args.tes,
        remote_storage_url=parsed_args.remote_storage_url,
        token=parsed_args.token)
    runtime_context = cwltool.main.RuntimeContext(vars(parsed_args))
    runtime_context.make_fs_access = functools.partial(
        CachingFtpFsAccess, insecure=parsed_args.insecure)
    runtime_context.path_mapper = functools.partial(TESPathMapper,
                                                    fs_access=ftp_fs_access)
    job_executor = MultithreadedJobExecutor() if parsed_args.parallel \
        else SingleJobExecutor()
    job_executor.max_ram = job_executor.max_cores = float("inf")
    executor = functools.partial(
        tes_execute,
        job_executor=job_executor,
        loading_context=loading_context,
        remote_storage_url=parsed_args.remote_storage_url,
        ftp_access=ftp_fs_access)
    return cwltool.main.main(args=parsed_args,
                             executor=executor,
                             loadingContext=loading_context,
                             runtimeContext=runtime_context,
                             versionfunc=versionstring,
                             logger_handler=console)
Esempio n. 11
0
def run_native(config_object: 'ConfigBase', workflow: str, run_directory: str = '.', verbosity="normal") -> int:
    """Executes the workflow using native Python rather than subprocess "command line"

    Args:
        config_object: a constructed ConfigBase-derived object
        workflow: the path to the workflow to be executed
        run_directory: the destination folder for workflow output subdirectories (default: CWD)
        parallel: process libraries in parallel where possible
        verbosity: controls the depth of information written to terminal by cwltool

    Returns: None

    """

    def furnish_if_file_record(file_dict):
        if isinstance(file_dict, dict) and file_dict.get('class', None) == 'File':
            file_dict['basename'] = os.path.basename(file_dict['path'])
            file_dict['location'] = file_dict['path']
            file_dict['contents'] = None

    # Upgrade file entries in Run Config with extra descriptors cwltool expects
    for _, config_param in config_object.config.items():
        if isinstance(config_param, list):
            for config_dict in config_param:
                furnish_if_file_record(config_dict)
        else:
            furnish_if_file_record(config_param)

    # Set overall config for cwltool
    runtime_context = RuntimeContext({
        'secret_store': cwltool.secrets.SecretStore(),
        'outdir': run_directory,
        'on_error': "continue",
        'js_console': verbosity == "debug",
        'debug': verbosity == "debug"
    })

    # Set proper temp directory for Mac users
    if sys.platform == "darwin":
        default_mac_path = "/private/tmp/docker_tmp"
        if runtime_context.tmp_outdir_prefix == DEFAULT_TMP_PREFIX:
            runtime_context.tmp_outdir_prefix = default_mac_path
        if runtime_context.tmpdir_prefix == DEFAULT_TMP_PREFIX:
            runtime_context.tmpdir_prefix = default_mac_path

    # Enable rich terminal output (timestamp, color, formatting)
    logger = logging.getLogger("cwltool")
    logger.handlers.clear()  # executors.py loads a default handler; outputs are printed twice if we don't clear it
    level = 'DEBUG' if verbosity == 'debug' else 'WARN' if verbosity == "quiet" else "INFO"
    coloredlogs.install(logger=logger, stream=sys.stderr, fmt="[%(asctime)s] %(levelname)s %(message)s",
                        datefmt="%Y-%m-%d %H:%M:%S", level=level, isatty=True)

    # Create a wrapper for the executors so that we may pass our logger to them (unsupported by Factory)
    parallel: MultithreadedJobExecutor = functools.partial(MultithreadedJobExecutor(), logger=logger)
    serial: SingleJobExecutor = functools.partial(SingleJobExecutor(), logger=logger)

    # Instantiate Factory with our run preferences
    cwl = cwltool.factory.Factory(
        runtime_context=runtime_context,
        loading_context=LoadingContext({'relax_path_checks': True}),
        executor=parallel if parallel else serial
    )

    try:
        # Load the workflow document and execute
        pipeline = cwl.make(workflow)
        pipeline(**config_object.config)
    except cwltool.factory.WorkflowStatus:
        # For now, return non-zero if workflow did not complete
        return 1

    return 0