def tes_execute( process, # type: Process job_order, # type: Dict[Text, Any] runtime_context, # type: RuntimeContext job_executor, # type: JobExecutor loading_context, # type: LoadingContext remote_storage_url, ftp_access, logger=log): # type: (...) -> Tuple[Optional[Dict[Text, Any]], Text] """ Upload to the remote_storage_url (if needed) and execute. Adapted from: https://github.com/curoverse/arvados/blob/2b0b06579199967eca3d44d955ad64195d2db3c3/sdk/cwl/arvados_cwl/__init__.py#L407 """ if remote_storage_url: upload_workflow_deps_ftp(process, remote_storage_url, ftp_access) # Reload tool object which may have been updated by # upload_workflow_deps # Don't validate this time because it will just print redundant errors. loading_context = loading_context.copy() loading_context.loader = process.doc_loader loading_context.avsc_names = process.doc_schema loading_context.metadata = process.metadata loading_context.do_validate = False process = loading_context.construct_tool_object( process.doc_loader.idx[process.tool["id"]], loading_context) job_order = upload_job_order_ftp(process, job_order, remote_storage_url, ftp_access) if not job_executor: job_executor = MultithreadedJobExecutor() return job_executor(process, job_order, runtime_context, logger)
def test_scattered_workflow() -> None: test_file = "tests/wf/scatter-wf4.cwl" job_file = "tests/wf/scatter-job2.json" factory = get_windows_safe_factory(executor=MultithreadedJobExecutor()) echo = factory.make(get_data(test_file)) with open(get_data(job_file)) as job: assert echo(**json.load(job)) == {"out": ["foo one three", "foo two four"]}
def test_scattered_workflow(self): test_file = "tests/wf/scatter-wf4.cwl" job_file = "tests/wf/scatter-job2.json" f = get_windows_safe_factory(executor=MultithreadedJobExecutor()) echo = f.make(get_data(test_file)) with open(get_data(job_file)) as job: self.assertEqual(echo(**json.load(job)), {'out': ['foo one three', 'foo two four']})
def test_sequential_workflow(self): test_file = "tests/wf/count-lines1-wf.cwl" f = cwltool.factory.Factory(executor=MultithreadedJobExecutor()) echo = f.make(get_data(test_file)) self.assertEqual( echo(file1={ "class": "File", "location": get_data("tests/wf/whale.txt") }), {"count_output": 16})
def test_scattered_workflow(): load_tool.loaders = {} test_file = "tests/wf/scatter-wf4.cwl" job_file = "tests/wf/scatter-job2.json" factory = get_windows_safe_factory(executor=MultithreadedJobExecutor()) echo = factory.make(get_data(test_file)) with open(get_data(job_file)) as job: assert echo(**json.load(job)) == { 'out': ['foo one three', 'foo two four'] }
def test_sequential_workflow(tmp_path: Path) -> None: test_file = "tests/wf/count-lines1-wf.cwl" executor = MultithreadedJobExecutor() runtime_context = RuntimeContext() runtime_context.outdir = str(tmp_path) runtime_context.select_resources = executor.select_resources factory = get_windows_safe_factory( executor=executor, runtime_context=runtime_context ) echo = factory.make(get_data(test_file)) file_contents = {"class": "File", "location": get_data("tests/wf/whale.txt")} assert echo(file1=file_contents) == {"count_output": 16}
def test_sequential_workflow(self): test_file = "tests/wf/count-lines1-wf.cwl" executor = MultithreadedJobExecutor() runtime_context = RuntimeContext() runtime_context.select_resources = executor.select_resources factory = get_windows_safe_factory(executor=executor, runtime_context=runtime_context) echo = factory.make(get_data(test_file)) self.assertEqual( echo(file1={ "class": "File", "location": get_data("tests/wf/whale.txt") }), {"count_output": 16})
def main(args=None): if args is None: args = sys.argv[1:] parser = arg_parser() parsed_args = parser.parse_args(args) if parsed_args.version: print(versionstring()) return 0 if parsed_args.tes is None: print(versionstring()) parser.print_usage() print("cwl-tes: error: argument --tes is required") return 1 if parsed_args.quiet: log.setLevel(logging.WARN) if parsed_args.debug: log.setLevel(logging.DEBUG) # setup signal handler def signal_handler(*args): log.info("recieved control-c signal") log.info("terminating thread(s)...") log.warning("remote TES task(s) will keep running") sys.exit(1) signal.signal(signal.SIGINT, signal_handler) loading_context = cwltool.main.LoadingContext(vars(parsed_args)) loading_context.construct_tool_object = functools.partial( make_tes_tool, url=parsed_args.tes) return cwltool.main.main(args=parsed_args, executor=MultithreadedJobExecutor(), loadingContext=loading_context, versionfunc=versionstring, logger_handler=console)
def main(args=None): if args is None: parser = argparse.ArgumentParser( prog="C2WL-Rocket", description= 'Customizable CWL Rocket - A highly flexible CWL execution engine.' ) subparser = parser.add_subparsers(help="CWLab sub-commands", dest='subcommand') ## subcommand launch: parser_launch = subparser.add_parser( "launch", help="Start execution of a CWL workflow given run input parameter." ) parser_launch.add_argument("--debug", action="store_true", help="Print debugging level messages.") parser_launch.add_argument( '-p', '--exec-profile', help="""Specify an exec profile. Please specify the name to a python module and a contained exec profile class sperated by \":\" (e.g. the default \"c2wl_rocket.exec_profile:LocalToolExec\"). Alternatively you can specify the full path to a python file containing an exec profile class (e.g. \"/path/to/my/exec_profiles.py:CustomExec\"). """, default="c2wl_rocket.exec_profile:LocalToolExec") parser_launch.add_argument('cwl_document', help="Provide a CWL workflow or tool.") parser_launch.add_argument( 'input_params', nargs=argparse.REMAINDER, help="Provide input parameters in YAML or JSON format.") parser_launch.add_argument( "--outdir", type=typing_extensions.Text, help="Output directory, default current directory", default=os.path.abspath('.')) exgroup = parser_launch.add_mutually_exclusive_group() exgroup.add_argument( "--tmp-outdir-prefix", type=typing_extensions.Text, help="Path prefix for intermediate output directories", default=cwltool.utils.DEFAULT_TMP_PREFIX) exgroup.add_argument( "--cachedir", type=typing_extensions.Text, help= "Directory to cache intermediate workflow outputs to avoid recomputing steps.", default="") exgroup = parser_launch.add_mutually_exclusive_group() exgroup.add_argument( "--move-outputs", action="store_const", const="move", default="move", help="Move output files to the workflow output directory and delete " "intermediate output directories (default).", dest="move_outputs") exgroup.add_argument( "--leave-outputs", action="store_const", const="leave", default="move", help="Leave output files in intermediate output directories.", dest="move_outputs") exgroup.add_argument("--copy-outputs", action="store_const", const="copy", default="move", help=""" Copy output files to the workflow output directory, don't delete intermediate output directories. """, dest="move_outputs") # subcommand start_worker: parser_start_worker = subparser.add_parser( "start_worker", help="Start a worker service instance.") parser_start_worker.add_argument("-H", "--web_server_host", type=typing_extensions.Text, help=""" IP of webserver host. Specify \"0.0.0.0\" for remote availablity within the current network. """, default="localhost") parser_start_worker.add_argument("-P", "--web_server_port", type=typing_extensions.Text, help=""" Port of webserver. """, default="5000") args = parser.parse_args() if args.subcommand == "launch": if isinstance(args.exec_profile, str): exec_profile_invalid_message = error_message("main", """ The specified exec profile is invalid. Please either specify a class inheriting from ExecProfileBase at c2wl_rocket.execprofile or if using the commandline specify the name or path to a module that containes such a class. Please see the commandline help for details. """, is_known=True) assert ":" in args.exec_profile, \ exec_profile_invalid_message exec_profile_module_name = args.exec_profile.split(":")[0] exec_profile_class_name = args.exec_profile.split(":")[1] try: exec_profile_module = importlib.import_module( exec_profile_module_name) except: try: spec = importlib.util.spec_from_file_location( "exec_profile_module", exec_profile_module_name) exec_profile_module = importlib.util.module_from_spec(spec) spec.loader.exec_module(exec_profile_module) except: raise AssertionError( error_message("main", """ The specified exec profile module \"{exec_profile_module_name}\" could not be imported. """, is_known=True)) assert hasattr(exec_profile_module, exec_profile_class_name), \ error_message( "main", f""" The specified exec profile module \"{exec_profile_module_name}\" has no class \"{exec_profile_class_name}\". """, is_known=True ) args.exec_profile = getattr(exec_profile_module, exec_profile_class_name) assert isclass(args.exec_profile) and issubclass(args.exec_profile, ExecProfileBase), \ error_message( "main", """ The specified exec profile class does not inherit from ExecProfileBase at c2wl_rocket.execprofile. """, is_known=True ) cwltool_args = copy(cwltool_default_args) cwltool_args.workflow = args.cwl_document cwltool_args.job_order = args.input_params cwltool_args.outdir = args.outdir cwltool_args.tmp_outdir_prefix = args.tmp_outdir_prefix cwltool_args.cachedir = args.cachedir cwltool_args.move_outputs = args.move_outputs cwltool_args.debug = args.debug loading_context = cwltool.main.LoadingContext(vars(cwltool_args)) with open(args.cwl_document, mode="r") as cwl: cwl_content = yaml.load(cwl) assert "cwlVersion" in cwl_content.keys(), error_message( "main", "No cwlVersion as specified in the CWL document.", is_known=True) workflow_metadata = {"cwlVersion": cwl_content["cwlVersion"]} loading_context.construct_tool_object = functools.partial( make_custom_tool, exec_profile_class=args.exec_profile, workflow_metadata=workflow_metadata) runtime_context = cwltool.main.RuntimeContext(vars(cwltool_args)) job_executor = MultithreadedJobExecutor() if cwltool_args.parallel \ else SingleJobExecutor() job_executor.max_ram = job_executor.max_cores = float("inf") # hand arguments over to main exec function: cwltool.main.main(args=cwltool_args, executor=job_executor, loadingContext=loading_context, runtimeContext=runtime_context) elif args.subcommand == "start_worker": worker.start(web_server_host=args.web_server_host, web_server_port=int(args.web_server_port))
def main(args=None): """Main entrypoint for cwl-tes.""" if args is None: args = sys.argv[1:] parser = arg_parser() parsed_args = parser.parse_args(args) if parsed_args.version: print(versionstring()) return 0 if parsed_args.tes is None: print(versionstring()) parser.print_usage() print("cwl-tes: error: argument --tes is required") return 1 if parsed_args.token: try: token_public_key = parsed_args.token_public_key if not token_public_key: header = jwt.get_unverified_header(parsed_args.token) if 'kid' in header: token_public_key = load_public_key(header.get('kid')) else: raise Exception("Invalid token: has no kid in header.") jwt.decode( parsed_args.token, token_public_key.encode('utf-8').decode('unicode_escape'), algorithms=['RS256']) except Exception: raise Exception('Token is not valid') if parsed_args.quiet: log.setLevel(logging.WARN) if parsed_args.debug: log.setLevel(logging.DEBUG) def signal_handler(*args): # pylint: disable=unused-argument """setup signal handler""" log.info("recieved control-c signal") log.info("terminating thread(s)...") log.warning("remote TES task(s) will keep running") sys.exit(1) signal.signal(signal.SIGINT, signal_handler) ftp_cache = {} class CachingFtpFsAccess(FtpFsAccess): """Ensures that the FTP connection cache is shared.""" def __init__(self, basedir, insecure=False): super(CachingFtpFsAccess, self).__init__(basedir, ftp_cache, insecure=insecure) ftp_fs_access = CachingFtpFsAccess(os.curdir, insecure=parsed_args.insecure) if parsed_args.remote_storage_url: parsed_args.remote_storage_url = ftp_fs_access.join( parsed_args.remote_storage_url, str(uuid.uuid4())) loading_context = cwltool.main.LoadingContext(vars(parsed_args)) loading_context.construct_tool_object = functools.partial( make_tes_tool, url=parsed_args.tes, remote_storage_url=parsed_args.remote_storage_url, token=parsed_args.token) runtime_context = cwltool.main.RuntimeContext(vars(parsed_args)) runtime_context.make_fs_access = functools.partial( CachingFtpFsAccess, insecure=parsed_args.insecure) runtime_context.path_mapper = functools.partial(TESPathMapper, fs_access=ftp_fs_access) job_executor = MultithreadedJobExecutor() if parsed_args.parallel \ else SingleJobExecutor() job_executor.max_ram = job_executor.max_cores = float("inf") executor = functools.partial( tes_execute, job_executor=job_executor, loading_context=loading_context, remote_storage_url=parsed_args.remote_storage_url, ftp_access=ftp_fs_access) return cwltool.main.main(args=parsed_args, executor=executor, loadingContext=loading_context, runtimeContext=runtime_context, versionfunc=versionstring, logger_handler=console)
def run_native(config_object: 'ConfigBase', workflow: str, run_directory: str = '.', verbosity="normal") -> int: """Executes the workflow using native Python rather than subprocess "command line" Args: config_object: a constructed ConfigBase-derived object workflow: the path to the workflow to be executed run_directory: the destination folder for workflow output subdirectories (default: CWD) parallel: process libraries in parallel where possible verbosity: controls the depth of information written to terminal by cwltool Returns: None """ def furnish_if_file_record(file_dict): if isinstance(file_dict, dict) and file_dict.get('class', None) == 'File': file_dict['basename'] = os.path.basename(file_dict['path']) file_dict['location'] = file_dict['path'] file_dict['contents'] = None # Upgrade file entries in Run Config with extra descriptors cwltool expects for _, config_param in config_object.config.items(): if isinstance(config_param, list): for config_dict in config_param: furnish_if_file_record(config_dict) else: furnish_if_file_record(config_param) # Set overall config for cwltool runtime_context = RuntimeContext({ 'secret_store': cwltool.secrets.SecretStore(), 'outdir': run_directory, 'on_error': "continue", 'js_console': verbosity == "debug", 'debug': verbosity == "debug" }) # Set proper temp directory for Mac users if sys.platform == "darwin": default_mac_path = "/private/tmp/docker_tmp" if runtime_context.tmp_outdir_prefix == DEFAULT_TMP_PREFIX: runtime_context.tmp_outdir_prefix = default_mac_path if runtime_context.tmpdir_prefix == DEFAULT_TMP_PREFIX: runtime_context.tmpdir_prefix = default_mac_path # Enable rich terminal output (timestamp, color, formatting) logger = logging.getLogger("cwltool") logger.handlers.clear() # executors.py loads a default handler; outputs are printed twice if we don't clear it level = 'DEBUG' if verbosity == 'debug' else 'WARN' if verbosity == "quiet" else "INFO" coloredlogs.install(logger=logger, stream=sys.stderr, fmt="[%(asctime)s] %(levelname)s %(message)s", datefmt="%Y-%m-%d %H:%M:%S", level=level, isatty=True) # Create a wrapper for the executors so that we may pass our logger to them (unsupported by Factory) parallel: MultithreadedJobExecutor = functools.partial(MultithreadedJobExecutor(), logger=logger) serial: SingleJobExecutor = functools.partial(SingleJobExecutor(), logger=logger) # Instantiate Factory with our run preferences cwl = cwltool.factory.Factory( runtime_context=runtime_context, loading_context=LoadingContext({'relax_path_checks': True}), executor=parallel if parallel else serial ) try: # Load the workflow document and execute pipeline = cwl.make(workflow) pipeline(**config_object.config) except cwltool.factory.WorkflowStatus: # For now, return non-zero if workflow did not complete return 1 return 0