def test_fetcher(self): class TestFetcher(schema_salad.ref_resolver.Fetcher): def __init__(self, a, b): pass def fetch_text(self, url): # type: (unicode) -> unicode if url == "baz:bar/foo.cwl": return """ cwlVersion: v1.0 class: CommandLineTool baseCommand: echo inputs: [] outputs: [] """ else: raise RuntimeError("Not foo.cwl") def check_exists(self, url): # type: (unicode) -> bool if url == "baz:bar/foo.cwl": return True else: return False def test_resolver(d, a): return "baz:bar/" + a load_tool("foo.cwl", defaultMakeTool, resolver=test_resolver, fetcher_constructor=TestFetcher) self.assertEquals(0, main(["--print-pre", "--debug", "foo.cwl"], resolver=test_resolver, fetcher_constructor=TestFetcher))
def test_fetcher(self): class TestFetcher(schema_salad.ref_resolver.Fetcher): def __init__(self, a, b): pass def fetch_text(self, url): # type: (unicode) -> unicode if url == "baz:bar/foo.cwl": return """ cwlVersion: v1.0 class: CommandLineTool baseCommand: echo inputs: [] outputs: [] """ else: raise RuntimeError("Not foo.cwl, was %s" % url) def check_exists(self, url): # type: (unicode) -> bool if url == "baz:bar/foo.cwl": return True else: return False def urljoin(self, base, url): urlsp = urllib.parse.urlsplit(url) if urlsp.scheme: return url basesp = urllib.parse.urlsplit(base) if basesp.scheme == "keep": return base + "/" + url return urllib.parse.urljoin(base, url) def test_resolver(d, a): if a.startswith("baz:bar/"): return a else: return "baz:bar/" + a loadingContext = LoadingContext({"construct_tool_object": default_make_tool, "resolver": test_resolver, "fetcher_constructor": TestFetcher}) load_tool("foo.cwl", loadingContext) self.assertEquals(0, main(["--print-pre", "--debug", "foo.cwl"], loadingContext=loadingContext))
def test_get_subgraph(): loadingContext = LoadingContext({"construct_tool_object": default_make_tool}) wf = norm(Path(get_data("tests/subgraph/count-lines1-wf.cwl")).as_uri()) loadingContext.do_update = False tool = load_tool(wf, loadingContext) sg = norm(Path(get_data("tests/subgraph")).as_uri()) def clean(val): if isinstance(val, string_types): if val.startswith(sg): return val[len(sg)+1:] if isinstance(val, dict): return {k: clean(v) for k,v in val.items()} if isinstance(val, list): return [clean(v) for v in val] return val for a in ("file1", "file2", "file3", "count_output", "output3", "output4", "output5", "step1", "step2", "step3", "step4", "step5"): extracted = get_subgraph([wf+"#"+a], tool) with open(get_data("tests/subgraph/extract_"+a+".json")) as f: assert json.load(f) == clean(convert_to_dict(extracted))
def run(): # Timestamps are added by crunch-job, so don't print redundant timestamps. arvados.log_handler.setFormatter(logging.Formatter('%(name)s %(levelname)s: %(message)s')) # Print package versions logger.info(arvados_cwl.versionstring()) api = arvados.api("v1") arvados_cwl.add_arv_hints() runner = None try: job_order_object = arvados.current_job()['script_parameters'] toolpath = "file://%s/%s" % (os.environ['TASK_KEEPMOUNT'], job_order_object.pop("cwl:tool")) pdh_path = re.compile(r'^[0-9a-f]{32}\+\d+(/.+)?$') def keeppath(v): if pdh_path.match(v): return "keep:%s" % v else: return v def keeppathObj(v): if "location" in v: v["location"] = keeppath(v["location"]) for k,v in job_order_object.items(): if isinstance(v, basestring) and arvados.util.keep_locator_pattern.match(v): job_order_object[k] = { "class": "File", "location": "keep:%s" % v } adjustFileObjs(job_order_object, keeppathObj) adjustDirObjs(job_order_object, keeppathObj) normalizeFilesDirs(job_order_object) output_name = None output_tags = None enable_reuse = True on_error = "continue" debug = False if "arv:output_name" in job_order_object: output_name = job_order_object["arv:output_name"] del job_order_object["arv:output_name"] if "arv:output_tags" in job_order_object: output_tags = job_order_object["arv:output_tags"] del job_order_object["arv:output_tags"] if "arv:enable_reuse" in job_order_object: enable_reuse = job_order_object["arv:enable_reuse"] del job_order_object["arv:enable_reuse"] if "arv:on_error" in job_order_object: on_error = job_order_object["arv:on_error"] del job_order_object["arv:on_error"] if "arv:debug" in job_order_object: debug = job_order_object["arv:debug"] del job_order_object["arv:debug"] arvargs = argparse.Namespace() arvargs.work_api = "jobs" arvargs.output_name = output_name arvargs.output_tags = output_tags arvargs.thread_count = 1 runner = arvados_cwl.ArvCwlRunner(api_client=arvados.safeapi.ThreadSafeApiCache( api_params={"model": OrderedJsonModel()}, keep_params={"num_retries": 4}), arvargs=arvargs) make_fs_access = functools.partial(CollectionFsAccess, collection_cache=runner.collection_cache) t = load_tool(toolpath, runner.loadingContext) if debug: logger.setLevel(logging.DEBUG) logging.getLogger('arvados').setLevel(logging.DEBUG) logging.getLogger("cwltool").setLevel(logging.DEBUG) args = ArvRuntimeContext(vars(arvargs)) args.project_uuid = arvados.current_job()["owner_uuid"] args.enable_reuse = enable_reuse args.on_error = on_error args.submit = False args.debug = debug args.quiet = False args.ignore_docker_for_reuse = False args.basedir = os.getcwd() args.name = None args.cwl_runner_job={"uuid": arvados.current_job()["uuid"], "state": arvados.current_job()["state"]} args.make_fs_access = make_fs_access args.trash_intermediate = False args.intermediate_output_ttl = 0 args.priority = arvados_cwl.DEFAULT_PRIORITY args.do_validate = True args.disable_js_validation = False args.tmp_outdir_prefix = "tmp" runner.arv_executor(t, job_order_object, args, logger=logger) except Exception as e: if isinstance(e, WorkflowException): logging.info("Workflow error %s", e) else: logging.exception("Unhandled exception") if runner and runner.final_output_collection: outputCollection = runner.final_output_collection.portable_data_hash() else: outputCollection = None api.job_tasks().update(uuid=arvados.current_task()['uuid'], body={ 'output': outputCollection, 'success': False, 'progress':1.0 }).execute()
def run(): # Timestamps are added by crunch-job, so don't print redundant timestamps. arvados.log_handler.setFormatter(logging.Formatter('%(name)s %(levelname)s: %(message)s')) # Print package versions logger.info(arvados_cwl.versionstring()) api = arvados.api("v1") arvados_cwl.add_arv_hints() runner = None try: job_order_object = arvados.current_job()['script_parameters'] toolpath = "file://%s/%s" % (os.environ['TASK_KEEPMOUNT'], job_order_object.pop("cwl:tool")) pdh_path = re.compile(r'^[0-9a-f]{32}\+\d+(/.+)?$') def keeppath(v): if pdh_path.match(v): return "keep:%s" % v else: return v def keeppathObj(v): if "location" in v: v["location"] = keeppath(v["location"]) for k,v in job_order_object.items(): if isinstance(v, basestring) and arvados.util.keep_locator_pattern.match(v): job_order_object[k] = { "class": "File", "location": "keep:%s" % v } adjustFileObjs(job_order_object, keeppathObj) adjustDirObjs(job_order_object, keeppathObj) normalizeFilesDirs(job_order_object) output_name = None output_tags = None enable_reuse = True on_error = "continue" if "arv:output_name" in job_order_object: output_name = job_order_object["arv:output_name"] del job_order_object["arv:output_name"] if "arv:output_tags" in job_order_object: output_tags = job_order_object["arv:output_tags"] del job_order_object["arv:output_tags"] if "arv:enable_reuse" in job_order_object: enable_reuse = job_order_object["arv:enable_reuse"] del job_order_object["arv:enable_reuse"] if "arv:on_error" in job_order_object: on_error = job_order_object["arv:on_error"] del job_order_object["arv:on_error"] runner = arvados_cwl.ArvCwlRunner(api_client=arvados.api('v1', model=OrderedJsonModel()), output_name=output_name, output_tags=output_tags) make_fs_access = functools.partial(CollectionFsAccess, collection_cache=runner.collection_cache) t = load_tool(toolpath, runner.arv_make_tool, fetcher_constructor=functools.partial(CollectionFetcher, api_client=runner.api, fs_access=make_fs_access(""), num_retries=runner.num_retries)) args = argparse.Namespace() args.project_uuid = arvados.current_job()["owner_uuid"] args.enable_reuse = enable_reuse args.on_error = on_error args.submit = False args.debug = False args.quiet = False args.ignore_docker_for_reuse = False args.basedir = os.getcwd() args.name = None args.cwl_runner_job={"uuid": arvados.current_job()["uuid"], "state": arvados.current_job()["state"]} args.make_fs_access = make_fs_access runner.arv_executor(t, job_order_object, **vars(args)) except Exception as e: if isinstance(e, WorkflowException): logging.info("Workflow error %s", e) else: logging.exception("Unhandled exception") if runner and runner.final_output_collection: outputCollection = runner.final_output_collection.portable_data_hash() else: outputCollection = None api.job_tasks().update(uuid=arvados.current_task()['uuid'], body={ 'output': outputCollection, 'success': False, 'progress':1.0 }).execute()
def run(): # Timestamps are added by crunch-job, so don't print redundant timestamps. arvados.log_handler.setFormatter( logging.Formatter('%(name)s %(levelname)s: %(message)s')) # Print package versions logger.info(arvados_cwl.versionstring()) api = arvados.api("v1") arvados_cwl.add_arv_hints() runner = None try: job_order_object = arvados.current_job()['script_parameters'] toolpath = "file://%s/%s" % (os.environ['TASK_KEEPMOUNT'], job_order_object.pop("cwl:tool")) pdh_path = re.compile(r'^[0-9a-f]{32}\+\d+(/.+)?$') def keeppath(v): if pdh_path.match(v): return "keep:%s" % v else: return v def keeppathObj(v): if "location" in v: v["location"] = keeppath(v["location"]) for k, v in viewitems(job_order_object): if isinstance( v, basestring) and arvados.util.keep_locator_pattern.match(v): job_order_object[k] = { "class": "File", "location": "keep:%s" % v } adjustFileObjs(job_order_object, keeppathObj) adjustDirObjs(job_order_object, keeppathObj) normalizeFilesDirs(job_order_object) output_name = None output_tags = None enable_reuse = True on_error = "continue" debug = False if "arv:output_name" in job_order_object: output_name = job_order_object["arv:output_name"] del job_order_object["arv:output_name"] if "arv:output_tags" in job_order_object: output_tags = job_order_object["arv:output_tags"] del job_order_object["arv:output_tags"] if "arv:enable_reuse" in job_order_object: enable_reuse = job_order_object["arv:enable_reuse"] del job_order_object["arv:enable_reuse"] if "arv:on_error" in job_order_object: on_error = job_order_object["arv:on_error"] del job_order_object["arv:on_error"] if "arv:debug" in job_order_object: debug = job_order_object["arv:debug"] del job_order_object["arv:debug"] arvargs = argparse.Namespace() arvargs.work_api = "jobs" arvargs.output_name = output_name arvargs.output_tags = output_tags arvargs.thread_count = 1 arvargs.collection_cache_size = None runner = arvados_cwl.ArvCwlExecutor( api_client=arvados.safeapi.ThreadSafeApiCache( api_params={"model": OrderedJsonModel()}, keep_params={"num_retries": 4}), arvargs=arvargs) make_fs_access = functools.partial( CollectionFsAccess, collection_cache=runner.collection_cache) t = load_tool(toolpath, runner.loadingContext) if debug: logger.setLevel(logging.DEBUG) logging.getLogger('arvados').setLevel(logging.DEBUG) logging.getLogger("cwltool").setLevel(logging.DEBUG) args = ArvRuntimeContext(vars(arvargs)) args.project_uuid = arvados.current_job()["owner_uuid"] args.enable_reuse = enable_reuse args.on_error = on_error args.submit = False args.debug = debug args.quiet = False args.ignore_docker_for_reuse = False args.basedir = os.getcwd() args.name = None args.cwl_runner_job = { "uuid": arvados.current_job()["uuid"], "state": arvados.current_job()["state"] } args.make_fs_access = make_fs_access args.trash_intermediate = False args.intermediate_output_ttl = 0 args.priority = arvados_cwl.DEFAULT_PRIORITY args.do_validate = True args.disable_js_validation = False args.tmp_outdir_prefix = "tmp" runner.arv_executor(t, job_order_object, args, logger=logger) except Exception as e: if isinstance(e, WorkflowException): logging.info("Workflow error %s", e) else: logging.exception("Unhandled exception") if runner and runner.final_output_collection: outputCollection = runner.final_output_collection.portable_data_hash( ) else: outputCollection = None api.job_tasks().update(uuid=arvados.current_task()['uuid'], body={ 'output': outputCollection, 'success': False, 'progress': 1.0 }).execute()
def __load(location): return load_tool( location, setup_loadingContext(LoadingContext(default_cwl_args), RuntimeContext(default_cwl_args), argparse.Namespace(**default_cwl_args)))
def arv_executor(self, tool, job_order, runtimeContext, logger=None): self.debug = runtimeContext.debug tool.visit(self.check_features) self.project_uuid = runtimeContext.project_uuid self.pipeline = None self.fs_access = runtimeContext.make_fs_access(runtimeContext.basedir) self.secret_store = runtimeContext.secret_store self.trash_intermediate = runtimeContext.trash_intermediate if self.trash_intermediate and self.work_api != "containers": raise Exception( "--trash-intermediate is only supported with --api=containers." ) self.intermediate_output_ttl = runtimeContext.intermediate_output_ttl if self.intermediate_output_ttl and self.work_api != "containers": raise Exception( "--intermediate-output-ttl is only supported with --api=containers." ) if self.intermediate_output_ttl < 0: raise Exception( "Invalid value %d for --intermediate-output-ttl, cannot be less than zero" % self.intermediate_output_ttl) if runtimeContext.submit_request_uuid and self.work_api != "containers": raise Exception( "--submit-request-uuid requires containers API, but using '{}' api" .format(self.work_api)) if not runtimeContext.name: runtimeContext.name = self.name = tool.tool.get( "label") or tool.metadata.get("label") or os.path.basename( tool.tool["id"]) # Upload local file references in the job order. job_order = upload_job_order(self, "%s input" % runtimeContext.name, tool, job_order) submitting = (runtimeContext.update_workflow or runtimeContext.create_workflow or (runtimeContext.submit and not (tool.tool["class"] == "CommandLineTool" and runtimeContext.wait and not runtimeContext.always_submit_runner))) loadingContext = self.loadingContext.copy() loadingContext.do_validate = False loadingContext.do_update = False if submitting: # Document may have been auto-updated. Reload the original # document with updating disabled because we want to # submit the original document, not the auto-updated one. tool = load_tool(tool.tool["id"], loadingContext) # Upload direct dependencies of workflow steps, get back mapping of files to keep references. # Also uploads docker images. merged_map = upload_workflow_deps(self, tool) # Recreate process object (ArvadosWorkflow or # ArvadosCommandTool) because tool document may have been # updated by upload_workflow_deps in ways that modify # inheritance of hints or requirements. loadingContext.loader = tool.doc_loader loadingContext.avsc_names = tool.doc_schema loadingContext.metadata = tool.metadata tool = load_tool(tool.tool, loadingContext) existing_uuid = runtimeContext.update_workflow if existing_uuid or runtimeContext.create_workflow: # Create a pipeline template or workflow record and exit. if self.work_api == "containers": return (upload_workflow( self, tool, job_order, self.project_uuid, uuid=existing_uuid, submit_runner_ram=runtimeContext.submit_runner_ram, name=runtimeContext.name, merged_map=merged_map), "success") self.apply_reqs(job_order, tool) self.ignore_docker_for_reuse = runtimeContext.ignore_docker_for_reuse self.eval_timeout = runtimeContext.eval_timeout runtimeContext = runtimeContext.copy() runtimeContext.use_container = True runtimeContext.tmpdir_prefix = "tmp" runtimeContext.work_api = self.work_api if self.work_api == "containers": if self.ignore_docker_for_reuse: raise Exception( "--ignore-docker-for-reuse not supported with containers API." ) runtimeContext.outdir = "/var/spool/cwl" runtimeContext.docker_outdir = "/var/spool/cwl" runtimeContext.tmpdir = "/tmp" runtimeContext.docker_tmpdir = "/tmp" if runtimeContext.priority < 1 or runtimeContext.priority > 1000: raise Exception("--priority must be in the range 1..1000.") if self.should_estimate_cache_size: visited = set() estimated_size = [0] def estimate_collection_cache(obj): if obj.get("location", "").startswith("keep:"): m = pdh_size.match(obj["location"][5:]) if m and m.group(1) not in visited: visited.add(m.group(1)) estimated_size[0] += int(m.group(2)) visit_class(job_order, ("File", "Directory"), estimate_collection_cache) runtimeContext.collection_cache_size = max( ((estimated_size[0] * 192) // (1024 * 1024)) + 1, 256) self.collection_cache.set_cap( runtimeContext.collection_cache_size * 1024 * 1024) logger.info("Using collection cache size %s MiB", runtimeContext.collection_cache_size) runnerjob = None if runtimeContext.submit: # Submit a runner job to run the workflow for us. if self.work_api == "containers": if tool.tool[ "class"] == "CommandLineTool" and runtimeContext.wait and ( not runtimeContext.always_submit_runner): runtimeContext.runnerjob = tool.tool["id"] else: tool = RunnerContainer( self, tool, loadingContext, runtimeContext.enable_reuse, self.output_name, self.output_tags, submit_runner_ram=runtimeContext.submit_runner_ram, name=runtimeContext.name, on_error=runtimeContext.on_error, submit_runner_image=runtimeContext.submit_runner_image, intermediate_output_ttl=runtimeContext. intermediate_output_ttl, merged_map=merged_map, priority=runtimeContext.priority, secret_store=self.secret_store, collection_cache_size=runtimeContext. collection_cache_size, collection_cache_is_default=self. should_estimate_cache_size) if runtimeContext.cwl_runner_job is not None: self.uuid = runtimeContext.cwl_runner_job.get('uuid') jobiter = tool.job(job_order, self.output_callback, runtimeContext) if runtimeContext.submit and not runtimeContext.wait: runnerjob = next(jobiter) runnerjob.run(runtimeContext) return (runnerjob.uuid, "success") current_container = arvados_cwl.util.get_current_container( self.api, self.num_retries, logger) if current_container: logger.info("Running inside container %s", current_container.get("uuid")) self.poll_api = arvados.api('v1', timeout=runtimeContext.http_timeout) self.polling_thread = threading.Thread(target=self.poll_states) self.polling_thread.start() self.task_queue = TaskQueue(self.workflow_eval_lock, self.thread_count) try: self.workflow_eval_lock.acquire() # Holds the lock while this code runs and releases it when # it is safe to do so in self.workflow_eval_lock.wait(), # at which point on_message can update job state and # process output callbacks. loopperf = Perf(metrics, "jobiter") loopperf.__enter__() for runnable in jobiter: loopperf.__exit__() if self.stop_polling.is_set(): break if self.task_queue.error is not None: raise self.task_queue.error if runnable: with Perf(metrics, "run"): self.start_run(runnable, runtimeContext) else: if (self.task_queue.in_flight + len(self.processes)) > 0: self.workflow_eval_lock.wait(3) else: logger.error( "Workflow is deadlocked, no runnable processes and not waiting on any pending processes." ) break if self.stop_polling.is_set(): break loopperf.__enter__() loopperf.__exit__() while (self.task_queue.in_flight + len(self.processes)) > 0: if self.task_queue.error is not None: raise self.task_queue.error self.workflow_eval_lock.wait(3) except UnsupportedRequirement: raise except: if sys.exc_info()[0] is KeyboardInterrupt or sys.exc_info( )[0] is SystemExit: logger.error("Interrupted, workflow will be cancelled") elif isinstance(sys.exc_info()[1], WorkflowException): logger.error( "Workflow execution failed:\n%s", sys.exc_info()[1], exc_info=(sys.exc_info()[1] if self.debug else False)) else: logger.exception("Workflow execution failed") if self.pipeline: self.api.pipeline_instances().update( uuid=self.pipeline["uuid"], body={ "state": "Failed" }).execute(num_retries=self.num_retries) if self.work_api == "containers" and not current_container: # Not running in a crunch container, so cancel any outstanding processes. for p in self.processes: try: self.api.container_requests().update( uuid=p, body={ "priority": "0" }).execute(num_retries=self.num_retries) except Exception: pass finally: self.workflow_eval_lock.release() self.task_queue.drain() self.stop_polling.set() self.polling_thread.join() self.task_queue.join() if self.final_status == "UnsupportedRequirement": raise UnsupportedRequirement("Check log for details.") if self.final_output is None: raise WorkflowException("Workflow did not return a result.") if runtimeContext.submit and isinstance(tool, Runner): logger.info("Final output collection %s", tool.final_output) else: if self.output_name is None: self.output_name = "Output of %s" % (shortname( tool.tool["id"])) if self.output_tags is None: self.output_tags = "" storage_classes = runtimeContext.storage_classes.strip().split(",") self.final_output, self.final_output_collection = self.make_output_collection( self.output_name, storage_classes, self.output_tags, self.final_output) self.set_crunch_output() if runtimeContext.compute_checksum: adjustDirObjs(self.final_output, partial(get_listing, self.fs_access)) adjustFileObjs(self.final_output, partial(compute_checksums, self.fs_access)) if self.trash_intermediate and self.final_status == "success": self.trash_intermediate_output() return (self.final_output, self.final_status)
def load_cwl(cwl_file): load.loaders = {} loading_context = cwltool.context.LoadingContext(get_default_args()) loading_context.construct_tool_object = default_make_tool loading_context.resolver = tool_resolver return load.load_tool(cwl_file, loading_context)
def run(): # Print package versions logger.info(arvados_cwl.versionstring()) api = arvados.api("v1") arvados_cwl.add_arv_hints() runner = None try: job_order_object = arvados.current_job()['script_parameters'] pdh_path = re.compile(r'^[0-9a-f]{32}\+\d+(/.+)?$') def keeppath(v): if pdh_path.match(v): return "keep:%s" % v else: return v def keeppathObj(v): v["location"] = keeppath(v["location"]) job_order_object["cwl:tool"] = "file://%s/%s" % ( os.environ['TASK_KEEPMOUNT'], job_order_object["cwl:tool"]) for k, v in job_order_object.items(): if isinstance( v, basestring) and arvados.util.keep_locator_pattern.match(v): job_order_object[k] = { "class": "File", "location": "keep:%s" % v } adjustFileObjs(job_order_object, keeppathObj) adjustDirObjs(job_order_object, keeppathObj) normalizeFilesDirs(job_order_object) adjustDirObjs( job_order_object, functools.partial( getListing, arvados_cwl.fsaccess.CollectionFsAccess("", api_client=api))) output_name = None output_tags = None enable_reuse = True if "arv:output_name" in job_order_object: output_name = job_order_object["arv:output_name"] del job_order_object["arv:output_name"] if "arv:output_tags" in job_order_object: output_tags = job_order_object["arv:output_tags"] del job_order_object["arv:output_tags"] if "arv:enable_reuse" in job_order_object: enable_reuse = job_order_object["arv:enable_reuse"] del job_order_object["arv:enable_reuse"] runner = arvados_cwl.ArvCwlRunner(api_client=arvados.api( 'v1', model=OrderedJsonModel()), output_name=output_name, output_tags=output_tags) t = load_tool(job_order_object, runner.arv_make_tool) args = argparse.Namespace() args.project_uuid = arvados.current_job()["owner_uuid"] args.enable_reuse = enable_reuse args.submit = False args.debug = True args.quiet = False args.ignore_docker_for_reuse = False args.basedir = os.getcwd() args.cwl_runner_job = { "uuid": arvados.current_job()["uuid"], "state": arvados.current_job()["state"] } outputObj = runner.arv_executor(t, job_order_object, **vars(args)) except Exception as e: if isinstance(e, WorkflowException): logging.info("Workflow error %s", e) else: logging.exception("Unhandled exception") if runner and runner.final_output_collection: outputCollection = runner.final_output_collection.portable_data_hash( ) else: outputCollection = None api.job_tasks().update(uuid=arvados.current_task()['uuid'], body={ 'output': outputCollection, 'success': False, 'progress': 1.0 }).execute()
def read_config_from_cwl_file(cwl_file): print_pref = "[read_cwl_file]:" configs = {} metadata = { "doc": "", "workflow_name": os.path.basename(cwl_file), "workflow_path": os.path.abspath(cwl_file), "workflow_type": "CWL" } # cwltool needs to be imported on demand since # repeatedly calling functions on a document named # with same name caused errors. from cwltool.context import LoadingContext from cwltool.load_tool import load_tool from cwltool.workflow import default_make_tool loadingContext = LoadingContext({"construct_tool_object": default_make_tool, "disable_js_validation": True}) try: cwl_document = load_tool(cwl_file, loadingContext) except AssertionError as e: raise AssertionError( print_pref + "failed to read cwl file \"" + cwl_file + "\": does not exist or is invalid") inp_records = cwl_document.inputs_record_schema["fields"] if "doc" in cwl_document.tool: metadata["doc"] = cwl_document.tool["doc"] for inp_rec in inp_records: name = clean_string( inp_rec["name"] ) is_array = False null_allowed = False null_items_allowed = False default_value = [""] allowed_selection = [""] # read type: try: type_, null_allowed, is_array, null_items_allowed, allowed_selection = read_inp_rec_type_field(inp_rec["type"]) except Exception as e: raise AssertionError( print_pref + "E: reading type of param \"{}\": {}".format(name, str(e))) # get the default: if "default" in inp_rec: if is_basic_type_instance(inp_rec["default"]): default_value = [clean_string(inp_rec["default"])] else: if is_array and isinstance(inp_rec["default"], list): default_value = [] for entry in inp_rec["default"]: if is_basic_type_instance(inp_rec["default"]): default_value.append(clean_string(entry)) else: print(print_pref + "W: invalid default value for parameter " + name + ": will be ignored", file=sys.stderr) default_value = [""] elif type_ == "File" and isinstance(inp_rec["default"], dict): print(print_pref + "W: invalid default value for parameter " + name + ": defaults for File class are not supported yet; will be ignored", file=sys.stderr) default_value = [""] else: print(print_pref + "W: invalid default value for parameter " + name + ": will be ignored", file=sys.stderr) default_value = [""] else: default_value = [""] # read secondary files: if type_ == "File" and "secondaryFiles" in inp_rec: if isinstance(inp_rec["secondaryFiles"], str): secondary_files = [ inp_rec["secondaryFiles"] ] elif isinstance(inp_rec["secondaryFiles"], CommentedMap) and \ "pattern" in inp_rec["secondaryFiles"].keys(): secondary_files = [ inp_rec["secondaryFiles"]["pattern"] ] elif isinstance(inp_rec["secondaryFiles"], CommentedSeq) or isinstance(inp_rec["secondaryFiles"], list): secondary_files = [] for sec_file in inp_rec["secondaryFiles"]: if isinstance(sec_file, CommentedMap) and "pattern" in sec_file: secondary_files.append(sec_file["pattern"]) elif isinstance(sec_file, str): secondary_files.append(sec_file) else: raise AssertionError(print_pref + "E: invalid secondaryFiles field for parameter " + name) else: raise AssertionError( print_pref + "E: invalid secondaryFiles field for parameter " + name ) else: secondary_files = [ "" ] # read doc: if "doc" in inp_rec: doc = inp_rec["doc"] else: doc = "" # assemble config parameters: inp_configs = { "type": type_, "is_array": is_array, "null_allowed": null_allowed, "null_items_allowed": null_items_allowed, "secondary_files": secondary_files, "default_value": default_value, "allowed_selection": allowed_selection, "doc": doc } # add to configs dict: configs[ name ] = inp_configs return configs, metadata