def collect_output(self, schema, builder, outdir): r = [] if "outputBinding" in schema: binding = schema["outputBinding"] globpatterns = [] #revmap = partial(revmap_file, builder, outdir) if "glob" in binding: for gb in aslist(binding["glob"]): gb = builder.do_eval(gb) #_logger.debug("gb evaluated to: '{}'".format(gb)) if gb: globpatterns.extend(aslist(gb)) for gb in globpatterns: g = os.path.join(outdir, gb) cls = "File" if schema["type"] == "File" else "Directory" r.extend([ {"location": g, "path": g, "basename": os.path.basename(g), "nameroot": os.path.splitext(os.path.basename(g))[0], "nameext": os.path.splitext(os.path.basename(g))[1], "class": cls} ]) optional = False single = False if isinstance(schema["type"], list): if "null" in schema["type"]: optional = True if "File" in schema["type"] or "Directory" in schema["type"]: single = True elif schema["type"] == "File" or schema["type"] == "Directory": single = True if "outputEval" in binding: r = builder.do_eval(binding["outputEval"], context=r) if single: if not r and not optional: raise WorkflowException("Did not find output file with glob pattern: '{}'".format(globpatterns)) elif not r and optional: pass elif isinstance(r,list): if len(r) > 1: raise WorkflowException("Multiple matches for output item that is a single file") else: r = r[0] return r
def simplify_list(maybe_list): """Turn a length one list loaded by cwltool into a scalar. Anything else is passed as-is, by reference.""" if isinstance(maybe_list, MutableSequence): is_list = aslist(maybe_list) if len(is_list) == 1: return is_list[0] return maybe_list
def simplify_list(l): """Turn a length one list loaded by cwltool into a scalar. Anything else is passed as-is, by reference.""" if isinstance(l, CommentedSeq): l = aslist(l) if len(l) == 1: return l[0] return l
def upload_job_order(arvrunner, name, tool, job_order): """Upload local files referenced in the input object and return updated input object with 'location' updated to the proper keep references. """ # Make a copy of the job order and set defaults. builder_job_order = copy.copy(job_order) # fill_in_defaults throws an error if there are any # missing required parameters, we don't want it to do that # so make them all optional. inputs_copy = copy.deepcopy(tool.tool["inputs"]) for i in inputs_copy: if "null" not in i["type"]: i["type"] = ["null"] + aslist(i["type"]) fill_in_defaults(inputs_copy, builder_job_order, arvrunner.fs_access) # Need to create a builder object to evaluate expressions. builder = make_builder(builder_job_order, tool.hints, tool.requirements, ArvRuntimeContext(), tool.metadata) # Now update job_order with secondaryFiles discover_secondary_files(arvrunner.fs_access, builder, tool.tool["inputs"], job_order) jobmapper = upload_dependencies(arvrunner, name, tool.doc_loader, job_order, job_order.get("id", "#"), False) if "id" in job_order: del job_order["id"] # Need to filter this out, gets added by cwltool when providing # parameters on the command line. if "job_order" in job_order: del job_order["job_order"] return job_order
def run(self, dry_run=False, pull_image=True, **kwargs): container_request = { "command": self.command_line, "owner_uuid": self.arvrunner.project_uuid, "name": self.name, "output_path": self.outdir, "cwd": self.outdir, "priority": 1, "state": "Committed", "properties": {}, } runtime_constraints = {} resources = self.builder.resources if resources is not None: runtime_constraints["vcpus"] = resources.get("cores", 1) runtime_constraints["ram"] = resources.get("ram") * 2**20 mounts = { self.outdir: { "kind": "tmp", "capacity": resources.get("outdirSize", 0) * 2**20 }, self.tmpdir: { "kind": "tmp", "capacity": resources.get("tmpdirSize", 0) * 2**20 } } scheduling_parameters = {} rf = [ self.pathmapper.mapper(f) for f in self.pathmapper.referenced_files ] rf.sort(key=lambda k: k.resolved) prevdir = None for resolved, target, tp, stg in rf: if not stg: continue if prevdir and target.startswith(prevdir): continue if tp == "Directory": targetdir = target else: targetdir = os.path.dirname(target) sp = resolved.split("/", 1) pdh = sp[0][5:] # remove "keep:" mounts[targetdir] = { "kind": "collection", "portable_data_hash": pdh } if len(sp) == 2: if tp == "Directory": path = sp[1] else: path = os.path.dirname(sp[1]) if path and path != "/": mounts[targetdir]["path"] = path prevdir = targetdir + "/" with Perf(metrics, "generatefiles %s" % self.name): if self.generatefiles["listing"]: vwd = arvados.collection.Collection( api_client=self.arvrunner.api, keep_client=self.arvrunner.keep_client, num_retries=self.arvrunner.num_retries) generatemapper = NoFollowPathMapper([self.generatefiles], "", "", separateDirs=False) with Perf(metrics, "createfiles %s" % self.name): for f, p in generatemapper.items(): if not p.target: pass elif p.type in ("File", "Directory"): source, path = self.arvrunner.fs_access.get_collection( p.resolved) vwd.copy(path, p.target, source_collection=source) elif p.type == "CreateFile": with vwd.open(p.target, "w") as n: n.write(p.resolved.encode("utf-8")) with Perf(metrics, "generatefiles.save_new %s" % self.name): vwd.save_new() for f, p in generatemapper.items(): if not p.target: continue mountpoint = "%s/%s" % (self.outdir, p.target) mounts[mountpoint] = { "kind": "collection", "portable_data_hash": vwd.portable_data_hash(), "path": p.target } container_request["environment"] = { "TMPDIR": self.tmpdir, "HOME": self.outdir } if self.environment: container_request["environment"].update(self.environment) if self.stdin: sp = self.stdin[6:].split("/", 1) mounts["stdin"] = { "kind": "collection", "portable_data_hash": sp[0], "path": sp[1] } if self.stderr: mounts["stderr"] = { "kind": "file", "path": "%s/%s" % (self.outdir, self.stderr) } if self.stdout: mounts["stdout"] = { "kind": "file", "path": "%s/%s" % (self.outdir, self.stdout) } (docker_req, docker_is_req) = get_feature(self, "DockerRequirement") if not docker_req: docker_req = {"dockerImageId": "arvados/jobs"} container_request["container_image"] = arv_docker_get_image( self.arvrunner.api, docker_req, pull_image, self.arvrunner.project_uuid) api_req, _ = get_feature(self, "http://arvados.org/cwl#APIRequirement") if api_req: runtime_constraints["API"] = True runtime_req, _ = get_feature( self, "http://arvados.org/cwl#RuntimeConstraints") if runtime_req: if "keep_cache" in runtime_req: runtime_constraints[ "keep_cache_ram"] = runtime_req["keep_cache"] * 2**20 if "outputDirType" in runtime_req: if runtime_req["outputDirType"] == "local_output_dir": # Currently the default behavior. pass elif runtime_req["outputDirType"] == "keep_output_dir": mounts[self.outdir] = { "kind": "collection", "writable": True } partition_req, _ = get_feature( self, "http://arvados.org/cwl#PartitionRequirement") if partition_req: scheduling_parameters["partitions"] = aslist( partition_req["partition"]) intermediate_output_req, _ = get_feature( self, "http://arvados.org/cwl#IntermediateOutput") if intermediate_output_req: self.output_ttl = intermediate_output_req["outputTTL"] else: self.output_ttl = self.arvrunner.intermediate_output_ttl if self.output_ttl < 0: raise WorkflowError( "Invalid value %d for output_ttl, cannot be less than zero" % container_request["output_ttl"]) container_request["output_ttl"] = self.output_ttl container_request["mounts"] = mounts container_request["runtime_constraints"] = runtime_constraints container_request["use_existing"] = kwargs.get("enable_reuse", True) container_request["scheduling_parameters"] = scheduling_parameters if kwargs.get("runnerjob", "").startswith("arvwf:"): wfuuid = kwargs["runnerjob"][6:kwargs["runnerjob"].index("#")] wfrecord = self.arvrunner.api.workflows().get(uuid=wfuuid).execute( num_retries=self.arvrunner.num_retries) if container_request["name"] == "main": container_request["name"] = wfrecord["name"] container_request["properties"]["template_uuid"] = wfuuid try: response = self.arvrunner.api.container_requests().create( body=container_request).execute( num_retries=self.arvrunner.num_retries) self.uuid = response["uuid"] self.arvrunner.processes[self.uuid] = self if response["state"] == "Final": logger.info("%s reused container %s", self.arvrunner.label(self), response["container_uuid"]) self.done(response) else: logger.info("%s %s state is %s", self.arvrunner.label(self), response["uuid"], response["state"]) except Exception as e: logger.error("%s got error %s" % (self.arvrunner.label(self), str(e))) self.output_callback({}, "permanentFail")
def run(self, fileStore): cwljob = resolve_indirect(self.cwljob) # `promises` dict # from: each parameter (workflow input or step output) # that may be used as a "source" for a step input workflow output # parameter # to: the job that will produce that value. promises = {} # `jobs` dict from step id to job that implements that step. jobs = {} for inp in self.cwlwf.tool["inputs"]: promises[inp["id"]] = SelfJob(self, cwljob) alloutputs_fufilled = False while not alloutputs_fufilled: # Iteratively go over the workflow steps, scheduling jobs as their # dependencies can be fufilled by upstream workflow inputs or # step outputs. Loop exits when the workflow outputs # are satisfied. alloutputs_fufilled = True for step in self.cwlwf.steps: if step.tool["id"] not in jobs: stepinputs_fufilled = True for inp in step.tool["inputs"]: if "source" in inp: for s in aslist(inp["source"]): if s not in promises: stepinputs_fufilled = False if stepinputs_fufilled: jobobj = {} for inp in step.tool["inputs"]: key = shortname(inp["id"]) if "source" in inp: if inp.get("linkMerge") or len(aslist(inp["source"])) > 1: linkMerge = inp.get("linkMerge", "merge_nested") if linkMerge == "merge_nested": jobobj[key] = ( MergeInputsNested([(shortname(s), promises[s].rv()) for s in aslist(inp["source"])])) elif linkMerge == "merge_flattened": jobobj[key] = ( MergeInputsFlattened([(shortname(s), promises[s].rv()) for s in aslist(inp["source"])])) else: raise validate.ValidationException( "Unsupported linkMerge '%s'", linkMerge) else: jobobj[key] = (shortname(inp["source"]), promises[inp["source"]].rv()) elif "default" in inp: d = copy.copy(inp["default"]) jobobj[key] = ("default", {"default": d}) if "valueFrom" in inp and "scatter" not in step.tool: if key in jobobj: jobobj[key] = StepValueFrom(inp["valueFrom"], jobobj[key], self.cwlwf.requirements) else: jobobj[key] = StepValueFrom(inp["valueFrom"], ("None", {"None": None}), self.cwlwf.requirements) if "scatter" in step.tool: wfjob = CWLScatter(step, IndirectDict(jobobj), **self.executor_options) followOn = CWLGather(step, wfjob.rv()) wfjob.addFollowOn(followOn) else: (wfjob, followOn) = makeJob(step.embedded_tool, IndirectDict(jobobj), step_inputs=step.tool["inputs"], **self.executor_options) jobs[step.tool["id"]] = followOn connected = False for inp in step.tool["inputs"]: for s in aslist(inp.get("source", [])): if not promises[s].hasChild(wfjob): promises[s].addChild(wfjob) connected = True if not connected: # workflow step has default inputs only, isn't connected to other jobs, # so add it as child of workflow. self.addChild(wfjob) for out in step.tool["outputs"]: promises[out["id"]] = followOn for inp in step.tool["inputs"]: for s in aslist(inp.get("source", [])): if s not in promises: alloutputs_fufilled = False # may need a test for out in self.cwlwf.tool["outputs"]: if "source" in out: if out["source"] not in promises: alloutputs_fufilled = False outobj = {} for out in self.cwlwf.tool["outputs"]: outobj[shortname(out["id"])] = (shortname(out["outputSource"]), promises[out["outputSource"]].rv()) return IndirectDict(outobj)
def run(self, runtimeContext): # ArvadosCommandTool subclasses from cwltool.CommandLineTool, # which calls makeJobRunner() to get a new ArvadosContainer # object. The fields that define execution such as # command_line, environment, etc are set on the # ArvadosContainer object by CommandLineTool.job() before # run() is called. runtimeContext = self.job_runtime container_request = { "command": self.command_line, "name": self.name, "output_path": self.outdir, "cwd": self.outdir, "priority": runtimeContext.priority, "state": "Committed", "properties": {}, } runtime_constraints = {} if runtimeContext.project_uuid: container_request["owner_uuid"] = runtimeContext.project_uuid if self.arvrunner.secret_store.has_secret(self.command_line): raise WorkflowException( "Secret material leaked on command line, only file literals may contain secrets" ) if self.arvrunner.secret_store.has_secret(self.environment): raise WorkflowException( "Secret material leaked in environment, only file literals may contain secrets" ) resources = self.builder.resources if resources is not None: runtime_constraints["vcpus"] = math.ceil(resources.get("cores", 1)) runtime_constraints["ram"] = math.ceil( resources.get("ram") * 2**20) mounts = { self.outdir: { "kind": "tmp", "capacity": math.ceil(resources.get("outdirSize", 0) * 2**20) }, self.tmpdir: { "kind": "tmp", "capacity": math.ceil(resources.get("tmpdirSize", 0) * 2**20) } } secret_mounts = {} scheduling_parameters = {} rf = [ self.pathmapper.mapper(f) for f in self.pathmapper.referenced_files ] rf.sort(key=lambda k: k.resolved) prevdir = None for resolved, target, tp, stg in rf: if not stg: continue if prevdir and target.startswith(prevdir): continue if tp == "Directory": targetdir = target else: targetdir = os.path.dirname(target) sp = resolved.split("/", 1) pdh = sp[0][5:] # remove "keep:" mounts[targetdir] = { "kind": "collection", "portable_data_hash": pdh } if pdh in self.pathmapper.pdh_to_uuid: mounts[targetdir]["uuid"] = self.pathmapper.pdh_to_uuid[pdh] if len(sp) == 2: if tp == "Directory": path = sp[1] else: path = os.path.dirname(sp[1]) if path and path != "/": mounts[targetdir]["path"] = path prevdir = targetdir + "/" with Perf(metrics, "generatefiles %s" % self.name): if self.generatefiles["listing"]: vwd = arvados.collection.Collection( api_client=self.arvrunner.api, keep_client=self.arvrunner.keep_client, num_retries=self.arvrunner.num_retries) generatemapper = NoFollowPathMapper( self.generatefiles["listing"], "", "", separateDirs=False) sorteditems = sorted(generatemapper.items(), key=lambda n: n[1].target) logger.debug("generatemapper is %s", sorteditems) with Perf(metrics, "createfiles %s" % self.name): for f, p in sorteditems: if not p.target: continue if p.target.startswith("/"): dst = p.target[len(self.outdir) + 1:] if p.target.startswith( self.outdir + "/") else p.target[1:] else: dst = p.target if p.type in ("File", "Directory", "WritableFile", "WritableDirectory"): if p.resolved.startswith("_:"): vwd.mkdirs(dst) else: source, path = self.arvrunner.fs_access.get_collection( p.resolved) vwd.copy(path or ".", dst, source_collection=source) elif p.type == "CreateFile": if self.arvrunner.secret_store.has_secret( p.resolved): mountpoint = p.target if p.target.startswith( "/") else os.path.join( self.outdir, p.target) secret_mounts[mountpoint] = { "kind": "text", "content": self.arvrunner.secret_store.retrieve( p.resolved) } else: with vwd.open(dst, "w") as n: n.write(p.resolved) def keepemptydirs(p): if isinstance(p, arvados.collection.RichCollectionBase): if len(p) == 0: p.open(".keep", "w").close() else: for c in p: keepemptydirs(p[c]) keepemptydirs(vwd) if not runtimeContext.current_container: runtimeContext.current_container = arvados_cwl.util.get_current_container( self.arvrunner.api, self.arvrunner.num_retries, logger) info = arvados_cwl.util.get_intermediate_collection_info( self.name, runtimeContext.current_container, runtimeContext.intermediate_output_ttl) vwd.save_new(name=info["name"], owner_uuid=runtimeContext.project_uuid, ensure_unique_name=True, trash_at=info["trash_at"], properties=info["properties"]) prev = None for f, p in sorteditems: if (not p.target or self.arvrunner.secret_store.has_secret( p.resolved) or (prev is not None and p.target.startswith(prev))): continue if p.target.startswith("/"): dst = p.target[len(self.outdir) + 1:] if p.target.startswith( self.outdir + "/") else p.target[1:] else: dst = p.target mountpoint = p.target if p.target.startswith( "/") else os.path.join(self.outdir, p.target) mounts[mountpoint] = { "kind": "collection", "portable_data_hash": vwd.portable_data_hash(), "path": dst } if p.type.startswith("Writable"): mounts[mountpoint]["writable"] = True prev = p.target + "/" container_request["environment"] = { "TMPDIR": self.tmpdir, "HOME": self.outdir } if self.environment: container_request["environment"].update(self.environment) if self.stdin: sp = self.stdin[6:].split("/", 1) mounts["stdin"] = { "kind": "collection", "portable_data_hash": sp[0], "path": sp[1] } if self.stderr: mounts["stderr"] = { "kind": "file", "path": "%s/%s" % (self.outdir, self.stderr) } if self.stdout: mounts["stdout"] = { "kind": "file", "path": "%s/%s" % (self.outdir, self.stdout) } (docker_req, docker_is_req) = self.get_requirement("DockerRequirement") if not docker_req: docker_req = {"dockerImageId": "arvados/jobs:" + __version__} container_request["container_image"] = arv_docker_get_image( self.arvrunner.api, docker_req, runtimeContext.pull_image, runtimeContext.project_uuid, runtimeContext.force_docker_pull, runtimeContext.tmp_outdir_prefix) network_req, _ = self.get_requirement("NetworkAccess") if network_req: runtime_constraints["API"] = network_req["networkAccess"] api_req, _ = self.get_requirement( "http://arvados.org/cwl#APIRequirement") if api_req: runtime_constraints["API"] = True runtime_req, _ = self.get_requirement( "http://arvados.org/cwl#RuntimeConstraints") if runtime_req: if "keep_cache" in runtime_req: runtime_constraints["keep_cache_ram"] = math.ceil( runtime_req["keep_cache"] * 2**20) if "outputDirType" in runtime_req: if runtime_req["outputDirType"] == "local_output_dir": # Currently the default behavior. pass elif runtime_req["outputDirType"] == "keep_output_dir": mounts[self.outdir] = { "kind": "collection", "writable": True } partition_req, _ = self.get_requirement( "http://arvados.org/cwl#PartitionRequirement") if partition_req: scheduling_parameters["partitions"] = aslist( partition_req["partition"]) intermediate_output_req, _ = self.get_requirement( "http://arvados.org/cwl#IntermediateOutput") if intermediate_output_req: self.output_ttl = intermediate_output_req["outputTTL"] else: self.output_ttl = self.arvrunner.intermediate_output_ttl if self.output_ttl < 0: raise WorkflowException( "Invalid value %d for output_ttl, cannot be less than zero" % container_request["output_ttl"]) if self.timelimit is not None and self.timelimit > 0: scheduling_parameters["max_run_time"] = self.timelimit extra_submit_params = {} if runtimeContext.submit_runner_cluster: extra_submit_params[ "cluster_id"] = runtimeContext.submit_runner_cluster container_request["output_name"] = "Output for step %s" % (self.name) container_request["output_ttl"] = self.output_ttl container_request["mounts"] = mounts container_request["secret_mounts"] = secret_mounts container_request["runtime_constraints"] = runtime_constraints container_request["scheduling_parameters"] = scheduling_parameters enable_reuse = runtimeContext.enable_reuse if enable_reuse: reuse_req, _ = self.get_requirement("WorkReuse") if reuse_req: enable_reuse = reuse_req["enableReuse"] reuse_req, _ = self.get_requirement( "http://arvados.org/cwl#ReuseRequirement") if reuse_req: enable_reuse = reuse_req["enableReuse"] container_request["use_existing"] = enable_reuse if runtimeContext.runnerjob.startswith("arvwf:"): wfuuid = runtimeContext.runnerjob[6:runtimeContext.runnerjob. index("#")] wfrecord = self.arvrunner.api.workflows().get(uuid=wfuuid).execute( num_retries=self.arvrunner.num_retries) if container_request["name"] == "main": container_request["name"] = wfrecord["name"] container_request["properties"]["template_uuid"] = wfuuid self.output_callback = self.arvrunner.get_wrapped_callback( self.output_callback) try: if runtimeContext.submit_request_uuid: response = self.arvrunner.api.container_requests().update( uuid=runtimeContext.submit_request_uuid, body=container_request, **extra_submit_params).execute( num_retries=self.arvrunner.num_retries) else: response = self.arvrunner.api.container_requests().create( body=container_request, **extra_submit_params).execute( num_retries=self.arvrunner.num_retries) self.uuid = response["uuid"] self.arvrunner.process_submitted(self) if response["state"] == "Final": logger.info("%s reused container %s", self.arvrunner.label(self), response["container_uuid"]) else: logger.info("%s %s state is %s", self.arvrunner.label(self), response["uuid"], response["state"]) except Exception as e: logger.exception("%s error submitting container\n%s", self.arvrunner.label(self), e) logger.debug("Container request was %s", container_request) self.output_callback({}, "permanentFail")
def run(self, file_store): cwljob = resolve_indirect(self.cwljob) # `promises` dict # from: each parameter (workflow input or step output) # that may be used as a "source" for a step input workflow output # parameter # to: the job that will produce that value. promises = {} # `jobs` dict from step id to job that implements that step. jobs = {} for inp in self.cwlwf.tool["inputs"]: promises[inp["id"]] = SelfJob(self, cwljob) alloutputs_fufilled = False while not alloutputs_fufilled: # Iteratively go over the workflow steps, scheduling jobs as their # dependencies can be fufilled by upstream workflow inputs or # step outputs. Loop exits when the workflow outputs # are satisfied. alloutputs_fufilled = True for step in self.cwlwf.steps: if step.tool["id"] not in jobs: stepinputs_fufilled = True for inp in step.tool["inputs"]: if "source" in inp: for s in aslist(inp["source"]): if s not in promises: stepinputs_fufilled = False if stepinputs_fufilled: jobobj = {} for inp in step.tool["inputs"]: key = shortname(inp["id"]) if "source" in inp: if inp.get("linkMerge") \ or len(aslist(inp["source"])) > 1: linkMerge = inp.get( "linkMerge", "merge_nested") if linkMerge == "merge_nested": jobobj[key] = ( MergeInputsNested( [(shortname(s), promises[s].rv()) for s in aslist( inp["source"])])) elif linkMerge == "merge_flattened": jobobj[key] = ( MergeInputsFlattened( [(shortname(s), promises[s].rv()) for s in aslist( inp["source"])])) else: raise validate.ValidationException( "Unsupported linkMerge '%s'" % linkMerge) else: inpSource = inp["source"] if isinstance(inpSource, MutableSequence): # It seems that an input source with a # '#' in the name will be returned as a # CommentedSeq list by the yaml parser. inpSource = str(inpSource[0]) jobobj[key] = (shortname(inpSource), promises[inpSource].rv()) if "default" in inp: if key in jobobj: if isinstance(jobobj[key][1], Promise): d = copy.copy(inp["default"]) jobobj[key] = DefaultWithSource( d, jobobj[key]) else: if jobobj[key][1][ jobobj[key][0]] is None: d = copy.copy(inp["default"]) jobobj[key] = ( "default", {"default": d}) else: d = copy.copy(inp["default"]) jobobj[key] = ("default", {"default": d}) if "valueFrom" in inp \ and "scatter" not in step.tool: if key in jobobj: jobobj[key] = StepValueFrom( inp["valueFrom"], jobobj[key], self.cwlwf.requirements) else: jobobj[key] = StepValueFrom( inp["valueFrom"], ( "None", {"None": None}), self.cwlwf.requirements) if "scatter" in step.tool: wfjob = CWLScatter(step, IndirectDict(jobobj), self.runtime_context) followOn = CWLGather(step, wfjob.rv()) wfjob.addFollowOn(followOn) else: (wfjob, followOn) = makeJob( step.embedded_tool, IndirectDict(jobobj), step.tool["inputs"], self.runtime_context) jobs[step.tool["id"]] = followOn connected = False for inp in step.tool["inputs"]: for s in aslist(inp.get("source", [])): if (isinstance( promises[s], (CWLJobWrapper, CWLGather) ) and not promises[s].hasFollowOn(wfjob)): promises[s].addFollowOn(wfjob) connected = True if (not isinstance( promises[s], (CWLJobWrapper, CWLGather) ) and not promises[s].hasChild(wfjob)): promises[s].addChild(wfjob) connected = True if not connected: # the workflow step has default inputs only & isn't # connected to other jobs, so add it as child of # this workflow. self.addChild(wfjob) for out in step.tool["outputs"]: promises[out["id"]] = followOn for inp in step.tool["inputs"]: for source in aslist(inp.get("source", [])): if source not in promises: alloutputs_fufilled = False # may need a test for out in self.cwlwf.tool["outputs"]: if "source" in out: if out["source"] not in promises: alloutputs_fufilled = False outobj = {} for out in self.cwlwf.tool["outputs"]: key = shortname(out["id"]) if out.get("linkMerge") or len(aslist(out["outputSource"])) > 1: link_merge = out.get("linkMerge", "merge_nested") if link_merge == "merge_nested": outobj[key] = ( MergeInputsNested( [(shortname(s), promises[s].rv()) for s in aslist(out["outputSource"])])) elif link_merge == "merge_flattened": outobj[key] = ( MergeInputsFlattened([ (shortname(s), promises[s].rv()) for s in aslist(out["source"])])) else: raise validate.ValidationException( "Unsupported linkMerge '{}'".format(link_merge)) else: # A CommentedSeq of length one still appears here rarely - # not clear why from the CWL code. When it does, it breaks # the execution by causing a non-hashable type exception. # We simplify the list into its first (and only) element. src = simplify_list(out["outputSource"]) outobj[key] = (shortname(src), promises[src].rv()) return IndirectDict(outobj)
def run(self, dry_run=False, pull_image=True, **kwargs): container_request = { "command": self.command_line, "owner_uuid": self.arvrunner.project_uuid, "name": self.name, "output_path": self.outdir, "cwd": self.outdir, "priority": 1, "state": "Committed", "properties": {} } runtime_constraints = {} resources = self.builder.resources if resources is not None: runtime_constraints["vcpus"] = resources.get("cores", 1) runtime_constraints["ram"] = resources.get("ram") * 2**20 mounts = { self.outdir: { "kind": "tmp", "capacity": resources.get("outdirSize", 0) * 2**20 }, self.tmpdir: { "kind": "tmp", "capacity": resources.get("tmpdirSize", 0) * 2**20 } } scheduling_parameters = {} rf = [self.pathmapper.mapper(f) for f in self.pathmapper.referenced_files] rf.sort(key=lambda k: k.resolved) prevdir = None for resolved, target, tp, stg in rf: if not stg: continue if prevdir and target.startswith(prevdir): continue if tp == "Directory": targetdir = target else: targetdir = os.path.dirname(target) sp = resolved.split("/", 1) pdh = sp[0][5:] # remove "keep:" mounts[targetdir] = { "kind": "collection", "portable_data_hash": pdh } if len(sp) == 2: if tp == "Directory": path = sp[1] else: path = os.path.dirname(sp[1]) if path and path != "/": mounts[targetdir]["path"] = path prevdir = targetdir + "/" with Perf(metrics, "generatefiles %s" % self.name): if self.generatefiles["listing"]: vwd = arvados.collection.Collection(api_client=self.arvrunner.api, keep_client=self.arvrunner.keep_client, num_retries=self.arvrunner.num_retries) generatemapper = NoFollowPathMapper([self.generatefiles], "", "", separateDirs=False) with Perf(metrics, "createfiles %s" % self.name): for f, p in generatemapper.items(): if not p.target: pass elif p.type in ("File", "Directory"): source, path = self.arvrunner.fs_access.get_collection(p.resolved) vwd.copy(path, p.target, source_collection=source) elif p.type == "CreateFile": with vwd.open(p.target, "w") as n: n.write(p.resolved.encode("utf-8")) with Perf(metrics, "generatefiles.save_new %s" % self.name): vwd.save_new() for f, p in generatemapper.items(): if not p.target: continue mountpoint = "%s/%s" % (self.outdir, p.target) mounts[mountpoint] = {"kind": "collection", "portable_data_hash": vwd.portable_data_hash(), "path": p.target} container_request["environment"] = {"TMPDIR": self.tmpdir, "HOME": self.outdir} if self.environment: container_request["environment"].update(self.environment) if self.stdin: sp = self.stdin[6:].split("/", 1) mounts["stdin"] = {"kind": "collection", "portable_data_hash": sp[0], "path": sp[1]} if self.stderr: mounts["stderr"] = {"kind": "file", "path": "%s/%s" % (self.outdir, self.stderr)} if self.stdout: mounts["stdout"] = {"kind": "file", "path": "%s/%s" % (self.outdir, self.stdout)} (docker_req, docker_is_req) = get_feature(self, "DockerRequirement") if not docker_req: docker_req = {"dockerImageId": "arvados/jobs"} container_request["container_image"] = arv_docker_get_image(self.arvrunner.api, docker_req, pull_image, self.arvrunner.project_uuid) api_req, _ = get_feature(self, "http://arvados.org/cwl#APIRequirement") if api_req: runtime_constraints["API"] = True runtime_req, _ = get_feature(self, "http://arvados.org/cwl#RuntimeConstraints") if runtime_req: if "keep_cache" in runtime_req: runtime_constraints["keep_cache_ram"] = runtime_req["keep_cache"] * 2**20 if "outputDirType" in runtime_req: if runtime_req["outputDirType"] == "local_output_dir": # Currently the default behavior. pass elif runtime_req["outputDirType"] == "keep_output_dir": mounts[self.outdir]= { "kind": "collection", "writable": True } partition_req, _ = get_feature(self, "http://arvados.org/cwl#PartitionRequirement") if partition_req: scheduling_parameters["partitions"] = aslist(partition_req["partition"]) container_request["mounts"] = mounts container_request["runtime_constraints"] = runtime_constraints container_request["use_existing"] = kwargs.get("enable_reuse", True) container_request["scheduling_parameters"] = scheduling_parameters if kwargs.get("runnerjob", "").startswith("arvwf:"): wfuuid = kwargs["runnerjob"][6:kwargs["runnerjob"].index("#")] wfrecord = self.arvrunner.api.workflows().get(uuid=wfuuid).execute(num_retries=self.arvrunner.num_retries) if container_request["name"] == "main": container_request["name"] = wfrecord["name"] container_request["properties"]["template_uuid"] = wfuuid try: response = self.arvrunner.api.container_requests().create( body=container_request ).execute(num_retries=self.arvrunner.num_retries) self.uuid = response["uuid"] self.arvrunner.processes[self.uuid] = self if response["state"] == "Final": logger.info("%s reused container %s", self.arvrunner.label(self), response["container_uuid"]) self.done(response) else: logger.info("%s %s state is %s", self.arvrunner.label(self), response["uuid"], response["state"]) except Exception as e: logger.error("%s got error %s" % (self.arvrunner.label(self), str(e))) self.output_callback({}, "permanentFail")
def run(self, dry_run=False, pull_image=True, **kwargs): container_request = { "command": self.command_line, "owner_uuid": self.arvrunner.project_uuid, "name": self.name, "output_path": self.outdir, "cwd": self.outdir, "priority": 1, "state": "Committed", "properties": {} } runtime_constraints = {} mounts = {self.outdir: {"kind": "tmp"}} scheduling_parameters = {} dirs = set() for f in self.pathmapper.files(): _, p, tp = self.pathmapper.mapper(f) if tp == "Directory" and '/' not in p[6:]: mounts[p] = {"kind": "collection", "portable_data_hash": p[6:]} dirs.add(p[6:]) for f in self.pathmapper.files(): _, p, tp = self.pathmapper.mapper(f) if p[6:].split("/")[0] not in dirs: mounts[p] = {"kind": "collection", "portable_data_hash": p[6:]} if self.generatefiles["listing"]: raise UnsupportedRequirement( "InitialWorkDirRequirement not supported with --api=containers" ) container_request["environment"] = { "TMPDIR": self.tmpdir, "HOME": self.outdir } if self.environment: container_request["environment"].update(self.environment) if self.stdin: raise UnsupportedRequirement( "Stdin redirection currently not suppported") if self.stderr: raise UnsupportedRequirement( "Stderr redirection currently not suppported") if self.stdout: mounts["stdout"] = { "kind": "file", "path": "%s/%s" % (self.outdir, self.stdout) } (docker_req, docker_is_req) = get_feature(self, "DockerRequirement") if not docker_req: docker_req = {"dockerImageId": arvados_jobs_image(self.arvrunner)} container_request["container_image"] = arv_docker_get_image( self.arvrunner.api, docker_req, pull_image, self.arvrunner.project_uuid) resources = self.builder.resources if resources is not None: runtime_constraints["vcpus"] = resources.get("cores", 1) runtime_constraints["ram"] = resources.get("ram") * 2**20 api_req, _ = get_feature(self, "http://arvados.org/cwl#APIRequirement") if api_req: runtime_constraints["API"] = True runtime_req, _ = get_feature( self, "http://arvados.org/cwl#RuntimeConstraints") if runtime_req: if "keep_cache" in runtime_req: runtime_constraints["keep_cache_ram"] = runtime_req[ "keep_cache"] partition_req, _ = get_feature( self, "http://arvados.org/cwl#PartitionRequirement") if partition_req: scheduling_parameters["partitions"] = aslist( partition_req["partition"]) container_request["mounts"] = mounts container_request["runtime_constraints"] = runtime_constraints container_request["use_existing"] = kwargs.get("enable_reuse", True) container_request["scheduling_parameters"] = scheduling_parameters if kwargs.get("runnerjob", "").startswith("arvwf:"): wfuuid = kwargs["runnerjob"][6:kwargs["runnerjob"].index("#")] wfrecord = self.arvrunner.api.workflows().get(uuid=wfuuid).execute( num_retries=self.arvrunner.num_retries) if container_request["name"] == "main": container_request["name"] = wfrecord["name"] container_request["properties"]["template_uuid"] = wfuuid try: response = self.arvrunner.api.container_requests().create( body=container_request).execute( num_retries=self.arvrunner.num_retries) self.uuid = response["uuid"] self.arvrunner.processes[self.uuid] = self logger.info("Container request %s (%s) state is %s", self.name, response["uuid"], response["state"]) if response["state"] == "Final": self.done(response) except Exception as e: logger.error("Got error %s" % str(e)) self.output_callback({}, "permanentFail")
def run(self, dry_run=False, pull_image=True, **kwargs): container_request = { "command": self.command_line, "owner_uuid": self.arvrunner.project_uuid, "name": self.name, "output_path": self.outdir, "cwd": self.outdir, "priority": 1, "state": "Committed", "properties": {} } runtime_constraints = {} mounts = {self.outdir: {"kind": "tmp"}} scheduling_parameters = {} dirs = set() for f in self.pathmapper.files(): pdh, p, tp = self.pathmapper.mapper(f) if tp == "Directory" and '/' not in pdh: mounts[p] = { "kind": "collection", "portable_data_hash": pdh[5:] } dirs.add(pdh) for f in self.pathmapper.files(): res, p, tp = self.pathmapper.mapper(f) if res.startswith("keep:"): res = res[5:] elif res.startswith("/keep/"): res = res[6:] else: continue sp = res.split("/", 1) pdh = sp[0] if pdh not in dirs: mounts[p] = {"kind": "collection", "portable_data_hash": pdh} if len(sp) == 2: mounts[p]["path"] = sp[1] with Perf(metrics, "generatefiles %s" % self.name): if self.generatefiles["listing"]: vwd = arvados.collection.Collection( api_client=self.arvrunner.api, keep_client=self.arvrunner.keep_client, num_retries=self.arvrunner.num_retries) generatemapper = NoFollowPathMapper([self.generatefiles], "", "", separateDirs=False) with Perf(metrics, "createfiles %s" % self.name): for f, p in generatemapper.items(): if not p.target: pass elif p.type in ("File", "Directory"): source, path = self.arvrunner.fs_access.get_collection( p.resolved) vwd.copy(path, p.target, source_collection=source) elif p.type == "CreateFile": with vwd.open(p.target, "w") as n: n.write(p.resolved.encode("utf-8")) with Perf(metrics, "generatefiles.save_new %s" % self.name): vwd.save_new() for f, p in generatemapper.items(): if not p.target: continue mountpoint = "%s/%s" % (self.outdir, p.target) mounts[mountpoint] = { "kind": "collection", "portable_data_hash": vwd.portable_data_hash(), "path": p.target } container_request["environment"] = { "TMPDIR": self.tmpdir, "HOME": self.outdir } if self.environment: container_request["environment"].update(self.environment) if self.stdin: raise UnsupportedRequirement( "Stdin redirection currently not suppported") if self.stderr: raise UnsupportedRequirement( "Stderr redirection currently not suppported") if self.stdout: mounts["stdout"] = { "kind": "file", "path": "%s/%s" % (self.outdir, self.stdout) } (docker_req, docker_is_req) = get_feature(self, "DockerRequirement") if not docker_req: docker_req = {"dockerImageId": "arvados/jobs"} container_request["container_image"] = arv_docker_get_image( self.arvrunner.api, docker_req, pull_image, self.arvrunner.project_uuid) resources = self.builder.resources if resources is not None: runtime_constraints["vcpus"] = resources.get("cores", 1) runtime_constraints["ram"] = resources.get("ram") * 2**20 api_req, _ = get_feature(self, "http://arvados.org/cwl#APIRequirement") if api_req: runtime_constraints["API"] = True runtime_req, _ = get_feature( self, "http://arvados.org/cwl#RuntimeConstraints") if runtime_req: if "keep_cache" in runtime_req: runtime_constraints[ "keep_cache_ram"] = runtime_req["keep_cache"] * 2**20 partition_req, _ = get_feature( self, "http://arvados.org/cwl#PartitionRequirement") if partition_req: scheduling_parameters["partitions"] = aslist( partition_req["partition"]) container_request["mounts"] = mounts container_request["runtime_constraints"] = runtime_constraints container_request["use_existing"] = kwargs.get("enable_reuse", True) container_request["scheduling_parameters"] = scheduling_parameters if kwargs.get("runnerjob", "").startswith("arvwf:"): wfuuid = kwargs["runnerjob"][6:kwargs["runnerjob"].index("#")] wfrecord = self.arvrunner.api.workflows().get(uuid=wfuuid).execute( num_retries=self.arvrunner.num_retries) if container_request["name"] == "main": container_request["name"] = wfrecord["name"] container_request["properties"]["template_uuid"] = wfuuid try: response = self.arvrunner.api.container_requests().create( body=container_request).execute( num_retries=self.arvrunner.num_retries) self.uuid = response["uuid"] self.arvrunner.processes[self.uuid] = self if response["state"] == "Final": logger.info("%s reused container %s", self.arvrunner.label(self), response["container_uuid"]) self.done(response) else: logger.info("%s %s state is %s", self.arvrunner.label(self), response["uuid"], response["state"]) except Exception as e: logger.error("%s got error %s" % (self.arvrunner.label(self), str(e))) self.output_callback({}, "permanentFail")
def arv_executor(self, updated_tool, job_order, runtimeContext, logger=None): self.debug = runtimeContext.debug workbench1 = self.api.config()["Services"]["Workbench1"]["ExternalURL"] workbench2 = self.api.config()["Services"]["Workbench2"]["ExternalURL"] controller = self.api.config()["Services"]["Controller"]["ExternalURL"] logger.info("Using cluster %s (%s)", self.api.config()["ClusterID"], workbench2 or workbench1 or controller) updated_tool.visit(self.check_features) self.project_uuid = runtimeContext.project_uuid self.pipeline = None self.fs_access = runtimeContext.make_fs_access(runtimeContext.basedir) self.secret_store = runtimeContext.secret_store self.trash_intermediate = runtimeContext.trash_intermediate if self.trash_intermediate and self.work_api != "containers": raise Exception( "--trash-intermediate is only supported with --api=containers." ) self.intermediate_output_ttl = runtimeContext.intermediate_output_ttl if self.intermediate_output_ttl and self.work_api != "containers": raise Exception( "--intermediate-output-ttl is only supported with --api=containers." ) if self.intermediate_output_ttl < 0: raise Exception( "Invalid value %d for --intermediate-output-ttl, cannot be less than zero" % self.intermediate_output_ttl) if runtimeContext.submit_request_uuid and self.work_api != "containers": raise Exception( "--submit-request-uuid requires containers API, but using '{}' api" .format(self.work_api)) default_storage_classes = ",".join([ k for k, v in self.api.config().get("StorageClasses", { "default": { "Default": True } }).items() if v.get("Default") is True ]) if runtimeContext.storage_classes == "default": runtimeContext.storage_classes = default_storage_classes if runtimeContext.intermediate_storage_classes == "default": runtimeContext.intermediate_storage_classes = default_storage_classes if not runtimeContext.name: runtimeContext.name = self.name = updated_tool.tool.get( "label") or updated_tool.metadata.get( "label") or os.path.basename(updated_tool.tool["id"]) # Upload local file references in the job order. job_order = upload_job_order(self, "%s input" % runtimeContext.name, updated_tool, job_order) # the last clause means: if it is a command line tool, and we # are going to wait for the result, and always_submit_runner # is false, then we don't submit a runner process. submitting = (runtimeContext.update_workflow or runtimeContext.create_workflow or (runtimeContext.submit and not (updated_tool.tool["class"] == "CommandLineTool" and runtimeContext.wait and not runtimeContext.always_submit_runner))) loadingContext = self.loadingContext.copy() loadingContext.do_validate = False if submitting: loadingContext.do_update = False # Document may have been auto-updated. Reload the original # document with updating disabled because we want to # submit the document with its original CWL version, not # the auto-updated one. tool = load_tool(updated_tool.tool["id"], loadingContext) else: tool = updated_tool # Upload direct dependencies of workflow steps, get back mapping of files to keep references. # Also uploads docker images. merged_map = upload_workflow_deps(self, tool) # Recreate process object (ArvadosWorkflow or # ArvadosCommandTool) because tool document may have been # updated by upload_workflow_deps in ways that modify # inheritance of hints or requirements. loadingContext.loader = tool.doc_loader loadingContext.avsc_names = tool.doc_schema loadingContext.metadata = tool.metadata tool = load_tool(tool.tool, loadingContext) existing_uuid = runtimeContext.update_workflow if existing_uuid or runtimeContext.create_workflow: # Create a pipeline template or workflow record and exit. if self.work_api == "containers": uuid = upload_workflow( self, tool, job_order, self.project_uuid, uuid=existing_uuid, submit_runner_ram=runtimeContext.submit_runner_ram, name=runtimeContext.name, merged_map=merged_map, submit_runner_image=runtimeContext.submit_runner_image) self.stdout.write(uuid + "\n") return (None, "success") self.apply_reqs(job_order, tool) self.ignore_docker_for_reuse = runtimeContext.ignore_docker_for_reuse self.eval_timeout = runtimeContext.eval_timeout runtimeContext = runtimeContext.copy() runtimeContext.use_container = True runtimeContext.tmpdir_prefix = "tmp" runtimeContext.work_api = self.work_api if self.work_api == "containers": if self.ignore_docker_for_reuse: raise Exception( "--ignore-docker-for-reuse not supported with containers API." ) runtimeContext.outdir = "/var/spool/cwl" runtimeContext.docker_outdir = "/var/spool/cwl" runtimeContext.tmpdir = "/tmp" runtimeContext.docker_tmpdir = "/tmp" if runtimeContext.priority < 1 or runtimeContext.priority > 1000: raise Exception("--priority must be in the range 1..1000.") if self.should_estimate_cache_size: visited = set() estimated_size = [0] def estimate_collection_cache(obj): if obj.get("location", "").startswith("keep:"): m = pdh_size.match(obj["location"][5:]) if m and m.group(1) not in visited: visited.add(m.group(1)) estimated_size[0] += int(m.group(2)) visit_class(job_order, ("File", "Directory"), estimate_collection_cache) runtimeContext.collection_cache_size = max( ((estimated_size[0] * 192) // (1024 * 1024)) + 1, 256) self.collection_cache.set_cap( runtimeContext.collection_cache_size * 1024 * 1024) logger.info("Using collection cache size %s MiB", runtimeContext.collection_cache_size) runnerjob = None if runtimeContext.submit: # Submit a runner job to run the workflow for us. if self.work_api == "containers": if submitting: tool = RunnerContainer( self, updated_tool, tool, loadingContext, runtimeContext.enable_reuse, self.output_name, self.output_tags, submit_runner_ram=runtimeContext.submit_runner_ram, name=runtimeContext.name, on_error=runtimeContext.on_error, submit_runner_image=runtimeContext.submit_runner_image, intermediate_output_ttl=runtimeContext. intermediate_output_ttl, merged_map=merged_map, priority=runtimeContext.priority, secret_store=self.secret_store, collection_cache_size=runtimeContext. collection_cache_size, collection_cache_is_default=self. should_estimate_cache_size) else: runtimeContext.runnerjob = tool.tool["id"] if runtimeContext.cwl_runner_job is not None: self.uuid = runtimeContext.cwl_runner_job.get('uuid') jobiter = tool.job(job_order, self.output_callback, runtimeContext) if runtimeContext.submit and not runtimeContext.wait: runnerjob = next(jobiter) runnerjob.run(runtimeContext) self.stdout.write(runnerjob.uuid + "\n") return (None, "success") current_container = arvados_cwl.util.get_current_container( self.api, self.num_retries, logger) if current_container: logger.info("Running inside container %s", current_container.get("uuid")) self.poll_api = arvados.api('v1', timeout=runtimeContext.http_timeout) self.polling_thread = threading.Thread(target=self.poll_states) self.polling_thread.start() self.task_queue = TaskQueue(self.workflow_eval_lock, self.thread_count) try: self.workflow_eval_lock.acquire() # Holds the lock while this code runs and releases it when # it is safe to do so in self.workflow_eval_lock.wait(), # at which point on_message can update job state and # process output callbacks. loopperf = Perf(metrics, "jobiter") loopperf.__enter__() for runnable in jobiter: loopperf.__exit__() if self.stop_polling.is_set(): break if self.task_queue.error is not None: raise self.task_queue.error if runnable: with Perf(metrics, "run"): self.start_run(runnable, runtimeContext) else: if (self.task_queue.in_flight + len(self.processes)) > 0: self.workflow_eval_lock.wait(3) else: logger.error( "Workflow is deadlocked, no runnable processes and not waiting on any pending processes." ) break if self.stop_polling.is_set(): break loopperf.__enter__() loopperf.__exit__() while (self.task_queue.in_flight + len(self.processes)) > 0: if self.task_queue.error is not None: raise self.task_queue.error self.workflow_eval_lock.wait(3) except UnsupportedRequirement: raise except: if sys.exc_info()[0] is KeyboardInterrupt or sys.exc_info( )[0] is SystemExit: logger.error("Interrupted, workflow will be cancelled") elif isinstance(sys.exc_info()[1], WorkflowException): logger.error( "Workflow execution failed:\n%s", sys.exc_info()[1], exc_info=(sys.exc_info()[1] if self.debug else False)) else: logger.exception("Workflow execution failed") if self.pipeline: self.api.pipeline_instances().update( uuid=self.pipeline["uuid"], body={ "state": "Failed" }).execute(num_retries=self.num_retries) if self.work_api == "containers" and not current_container: # Not running in a crunch container, so cancel any outstanding processes. for p in self.processes: try: self.api.container_requests().update( uuid=p, body={ "priority": "0" }).execute(num_retries=self.num_retries) except Exception: pass finally: self.workflow_eval_lock.release() self.task_queue.drain() self.stop_polling.set() self.polling_thread.join() self.task_queue.join() if self.final_status == "UnsupportedRequirement": raise UnsupportedRequirement("Check log for details.") if self.final_output is None: raise WorkflowException("Workflow did not return a result.") if runtimeContext.submit and isinstance(tool, Runner): logger.info("Final output collection %s", tool.final_output) if workbench2 or workbench1: logger.info("Output at %scollections/%s", workbench2 or workbench1, tool.final_output) else: if self.output_name is None: self.output_name = "Output of %s" % (shortname( tool.tool["id"])) if self.output_tags is None: self.output_tags = "" storage_classes = "" storage_class_req, _ = tool.get_requirement( "http://arvados.org/cwl#OutputStorageClass") if storage_class_req and storage_class_req.get( "finalStorageClass"): storage_classes = aslist( storage_class_req["finalStorageClass"]) else: storage_classes = runtimeContext.storage_classes.strip().split( ",") self.final_output, self.final_output_collection = self.make_output_collection( self.output_name, storage_classes, self.output_tags, self.final_output) self.set_crunch_output() if runtimeContext.compute_checksum: adjustDirObjs(self.final_output, partial(get_listing, self.fs_access)) adjustFileObjs(self.final_output, partial(compute_checksums, self.fs_access)) if self.trash_intermediate and self.final_status == "success": self.trash_intermediate_output() return (self.final_output, self.final_status)
def run(self, dry_run=False, pull_image=True, **kwargs): container_request = { "command": self.command_line, "owner_uuid": self.arvrunner.project_uuid, "name": self.name, "output_path": self.outdir, "cwd": self.outdir, "priority": 1, "state": "Committed" } runtime_constraints = {} mounts = {self.outdir: {"kind": "tmp"}} dirs = set() for f in self.pathmapper.files(): _, p, tp = self.pathmapper.mapper(f) if tp == "Directory" and '/' not in p[6:]: mounts[p] = {"kind": "collection", "portable_data_hash": p[6:]} dirs.add(p[6:]) for f in self.pathmapper.files(): _, p, tp = self.pathmapper.mapper(f) if p[6:].split("/")[0] not in dirs: mounts[p] = {"kind": "collection", "portable_data_hash": p[6:]} if self.generatefiles["listing"]: raise UnsupportedRequirement("Generate files not supported") container_request["environment"] = { "TMPDIR": self.tmpdir, "HOME": self.outdir } if self.environment: container_request["environment"].update(self.environment) if self.stdin: raise UnsupportedRequirement( "Stdin redirection currently not suppported") if self.stderr: raise UnsupportedRequirement( "Stderr redirection currently not suppported") if self.stdout: mounts["stdout"] = { "kind": "file", "path": "%s/%s" % (self.outdir, self.stdout) } (docker_req, docker_is_req) = get_feature(self, "DockerRequirement") if not docker_req: docker_req = {"dockerImageId": "arvados/jobs"} container_request["container_image"] = arv_docker_get_image( self.arvrunner.api, docker_req, pull_image, self.arvrunner.project_uuid) resources = self.builder.resources if resources is not None: runtime_constraints["vcpus"] = resources.get("cores", 1) runtime_constraints["ram"] = resources.get("ram") * 2**20 api_req, _ = get_feature(self, "http://arvados.org/cwl#APIRequirement") if api_req: runtime_constraints["API"] = True runtime_req, _ = get_feature( self, "http://arvados.org/cwl#RuntimeConstraints") if runtime_req: logger.warn( "RuntimeConstraints not yet supported by container API") partition_req, _ = get_feature( self, "http://arvados.org/cwl#PartitionRequirement") if partition_req: runtime_constraints["partition"] = aslist( partition_req["partition"]) container_request["mounts"] = mounts container_request["runtime_constraints"] = runtime_constraints try: response = self.arvrunner.api.container_requests().create( body=container_request).execute( num_retries=self.arvrunner.num_retries) self.arvrunner.processes[response["container_uuid"]] = self logger.info("Container %s (%s) request state is %s", self.name, response["uuid"], response["state"]) if response["state"] == "Final": self.done(response) except Exception as e: logger.error("Got error %s" % str(e)) self.output_callback({}, "permanentFail")
def collect_output( self, schema, # type: Dict[Text, Any] builder, # type: Builder outdir, # type: Text fs_access, # type: StdFsAccess compute_checksum=True # type: bool ): # type: (...) -> Optional[Union[Dict[Text, Any], List[Union[Dict[Text, Any], Text]]]] result = [] # type: List[Any] empty_and_optional = False debug = LOGGER.isEnabledFor(logging.DEBUG) if "outputBinding" in schema: binding = schema["outputBinding"] globpatterns = [] # type: List[Text] revmap = partial(command_line_tool.revmap_file, builder, outdir) if "glob" in binding: with SourceLine(binding, "glob", WorkflowException, debug): for glob in aslist(binding["glob"]): glob = builder.do_eval(glob) if glob: globpatterns.extend(aslist(glob)) for glob in globpatterns: if glob.startswith(outdir): glob = glob[len(outdir) + 1:] elif glob == ".": glob = outdir elif glob.startswith("/"): raise WorkflowException( "glob patterns must not start with '/'") try: prefix = fs_access.glob(outdir) key = cmp_to_key( cast(Callable[[Text, Text], int], locale.strcoll)) # In case of stdout.log or stderr.log file not created if "stdout" in self.tool and "stderr" in self.tool \ and glob in (self.tool["stdout"], self.tool["stderr"]): filepath = Path(fs_access.join(outdir, glob)) if not filepath.is_file(): Path(filepath).touch() result.extend([{ "location": g, "path": fs_access.join(builder.outdir, g[len(prefix[0]) + 1:]), "basename": os.path.basename(g), "nameroot": os.path.splitext(os.path.basename(g))[0], "nameext": os.path.splitext(os.path.basename(g))[1], "class": "File" if fs_access.isfile(g) else "Directory" } for g in sorted(fs_access.glob( fs_access.join(outdir, glob)), key=key)]) except (OSError, IOError) as exc: LOGGER.warning(Text(exc)) except Exception: LOGGER.exception("Unexpected error from fs_access") raise for files in result: rfile = files.copy() # TODO This function raise an exception and seems to be related to docker (which is not used here) # revmap(rfile) if files["class"] == "Directory": load_listing = builder.loadListing or ( binding and binding.get("loadListing")) if load_listing and load_listing != "no_listing": get_listing(fs_access, files, (load_listing == "deep_listing")) else: with fs_access.open(rfile["location"], "rb") as f: contents = b"" if binding.get("loadContents") or compute_checksum: contents = f.read(CONTENT_LIMIT) if binding.get("loadContents"): files["contents"] = contents.decode("utf-8") if compute_checksum: checksum = hashlib.sha1() # nosec: B303 while contents != b"": checksum.update(contents) contents = f.read(1024 * 1024) files[ "checksum"] = "sha1$%s" % checksum.hexdigest( ) f.seek(0, 2) file_size = f.tell() files["size"] = file_size optional = False single = False if isinstance(schema["type"], list): if "null" in schema["type"]: optional = True if "File" in schema["type"] or "Directory" in schema["type"]: single = True elif schema["type"] == "File" or schema["type"] == "Directory": single = True if "outputEval" in binding: with SourceLine(binding, "outputEval", WorkflowException, debug): result = builder.do_eval(binding["outputEval"], context=result) if single: if not result and not optional: with SourceLine(binding, "glob", WorkflowException, debug): raise WorkflowException( "Did not find output file with glob pattern: '{}'". format(globpatterns)) elif not result and optional: pass elif isinstance(result, list): if len(result) > 1: raise WorkflowException( "Multiple matches for output item that is a single file." ) result = result[0] if "secondaryFiles" in schema: with SourceLine(schema, "secondaryFiles", WorkflowException, debug): for primary in aslist(result): if isinstance(primary, dict): primary.setdefault("secondaryFiles", []) pathprefix = primary["path"][0:primary["path"]. rindex("/") + 1] for file in aslist(schema["secondaryFiles"]): if isinstance( file, dict) or "$(" in file or "${" in file: sfpath = builder.do_eval(file, context=primary) subst = False else: sfpath = file subst = True for sfitem in aslist(sfpath): if isinstance(sfitem, str): if subst: sfitem = { "path": substitute( primary["path"], sfitem) } else: sfitem = { "path": pathprefix + sfitem } if "path" in sfitem and "location" not in sfitem: revmap(sfitem) if fs_access.isfile(sfitem["location"]): sfitem["class"] = "File" primary["secondaryFiles"].append( sfitem) elif fs_access.isdir(sfitem["location"]): sfitem["class"] = "Directory" primary["secondaryFiles"].append( sfitem) if "format" in schema: for primary in aslist(result): primary["format"] = builder.do_eval(schema["format"], context=primary) # Ensure files point to local references outside of the run environment # TODO: Again removing revmap.... # adjustFileObjs(result, revmap) if not result and optional: return None if not empty_and_optional and isinstance( schema["type"], dict) and schema["type"]["type"] == "record": out = {} for f in schema["type"]["fields"]: out[shortname( f["name"])] = self.collect_output( # type: ignore f, builder, outdir, fs_access, compute_checksum=compute_checksum) return out return result
def collect_output( self, schema, # type: Dict[Text, Any] builder, # type: Builder outdir, # type: Text fs_access, # type: StdFsAccess compute_checksum=True # type: bool ): # type: (...) -> Optional[Union[Dict[Text, Any], List[Union[Dict[Text, Any], Text]]]] """ Collect outputs from the step :term:`Process` following its execution. .. note: When :term:`CWL` runner tries to forward ``step(i) outputs -> step(i+1) inputs`` using :meth:`collect_outputs`, it expects exact ``outputBindings`` locations to be matched. In other words, a definition like ``outputBindings: {glob: outputs/*.txt}`` will generate results located in ``step(i)`` as ``"<tmp-workdir>/outputs/file.txt"`` and ``step(i+1)`` will look explicitly in ``"<tmp-workdir>/outputs`` using the ``glob`` pattern. Because each of our :term:`Process` in the workflow are distinct/remote entities, each one stages its outputs at different URL locations, not sharing the same *root directory*. When we stage intermediate results locally, the sub-dirs are lost. Therefore, they act like individual :term:`CWL` runner calls where the *final results* are moved back to the local directory for convenient access, but our *local directory* is the URL WPS-outputs location. To let :term:`CWL` :term:`Workflow` inter-steps mapping work as intended, we must remap the locations ignoring any nested dirs where the modified *outputBindings* definition will be able to match as if each step :term:`Process` outputs were generated locally. """ result = [] # type: List[Any] empty_and_optional = False debug = LOGGER.isEnabledFor(logging.DEBUG) if "outputBinding" in schema: binding = schema["outputBinding"] globpatterns = [] # type: List[Text] revmap = partial(command_line_tool.revmap_file, builder, outdir) if "glob" in binding: with SourceLine(binding, "glob", WorkflowException, debug): for glob in aslist(binding["glob"]): glob = builder.do_eval(glob) if glob: globpatterns.extend(aslist(glob)) # rebase glob pattern as applicable (see note) for glob in list(globpatterns): if not any( glob.startswith(part) for part in [".", "/", "~"]) and "/" in glob: glob = builder.do_eval(glob.split("/")[-1]) if glob: globpatterns.extend(aslist(glob)) for glob in globpatterns: if glob.startswith(outdir): glob = glob[len(outdir) + 1:] elif glob == ".": glob = outdir elif glob.startswith("/"): raise WorkflowException( "glob patterns must not start with '/'") try: prefix = fs_access.glob(outdir) key = cmp_to_key( cast(Callable[[Text, Text], int], locale.strcoll)) # In case of stdout.log or stderr.log file not created if "stdout" in self.tool and "stderr" in self.tool \ and glob in (self.tool["stdout"], self.tool["stderr"]): filepath = Path(fs_access.join(outdir, glob)) if not filepath.is_file(): Path(filepath).touch() result.extend([{ "location": g, "path": fs_access.join(builder.outdir, g[len(prefix[0]) + 1:]), "basename": os.path.basename(g), "nameroot": os.path.splitext(os.path.basename(g))[0], "nameext": os.path.splitext(os.path.basename(g))[1], "class": "File" if fs_access.isfile(g) else "Directory" } for g in sorted(fs_access.glob( fs_access.join(outdir, glob)), key=key)]) except (OSError, IOError) as exc: LOGGER.warning(Text(exc)) except Exception: LOGGER.exception("Unexpected error from fs_access") raise for files in result: rfile = files.copy() # TODO This function raise an exception and seems to be related to docker (which is not used here) # revmap(rfile) if files["class"] == "Directory": load_listing = builder.loadListing or ( binding and binding.get("loadListing")) if load_listing and load_listing != "no_listing": get_listing(fs_access, files, (load_listing == "deep_listing")) else: with fs_access.open(rfile["location"], "rb") as f: contents = b"" if binding.get("loadContents") or compute_checksum: contents = f.read(CONTENT_LIMIT) if binding.get("loadContents"): files["contents"] = contents.decode("utf-8") if compute_checksum: checksum = hashlib.sha1() # nosec: B303 while contents != b"": checksum.update(contents) contents = f.read(1024 * 1024) files[ "checksum"] = f"sha1${checksum.hexdigest()}" f.seek(0, 2) file_size = f.tell() files["size"] = file_size optional = False single = False if isinstance(schema["type"], list): if "null" in schema["type"]: optional = True if "File" in schema["type"] or "Directory" in schema["type"]: single = True elif schema["type"] == "File" or schema["type"] == "Directory": single = True if "outputEval" in binding: with SourceLine(binding, "outputEval", WorkflowException, debug): result = builder.do_eval(binding["outputEval"], context=result) if single: if not result and not optional: with SourceLine(binding, "glob", WorkflowException, debug): raise WorkflowException( f"Did not find output file with glob pattern: '{globpatterns}'" ) elif not result and optional: pass elif isinstance(result, list): if len(result) > 1: raise WorkflowException( "Multiple matches for output item that is a single file." ) result = result[0] if "secondaryFiles" in schema: with SourceLine(schema, "secondaryFiles", WorkflowException, debug): for primary in aslist(result): if isinstance(primary, dict): primary.setdefault("secondaryFiles", []) pathprefix = primary["path"][0:primary["path"]. rindex("/") + 1] for file in aslist(schema["secondaryFiles"]): if isinstance( file, dict) or "$(" in file or "${" in file: sfpath = builder.do_eval(file, context=primary) subst = False else: sfpath = file subst = True for sfitem in aslist(sfpath): if isinstance(sfitem, str): if subst: sfitem = { "path": substitute( primary["path"], sfitem) } else: sfitem = { "path": pathprefix + sfitem } if "path" in sfitem and "location" not in sfitem: revmap(sfitem) if fs_access.isfile(sfitem["location"]): sfitem["class"] = "File" primary["secondaryFiles"].append( sfitem) elif fs_access.isdir(sfitem["location"]): sfitem["class"] = "Directory" primary["secondaryFiles"].append( sfitem) if "format" in schema: for primary in aslist(result): primary["format"] = builder.do_eval(schema["format"], context=primary) # Ensure files point to local references outside of the run environment # TODO: Again removing revmap.... # adjustFileObjs(result, revmap) if not result and optional: return None if not empty_and_optional and isinstance( schema["type"], dict) and schema["type"]["type"] == "record": out = {} for f in schema["type"]["fields"]: out[shortname( f["name"])] = self.collect_output( # type: ignore f, builder, outdir, fs_access, compute_checksum=compute_checksum) return out return result
def set_secondary(fsaccess, builder, inputschema, secondaryspec, primary, discovered): if isinstance(inputschema, Sequence) and not isinstance(inputschema, basestring): # union type, collect all possible secondaryFiles for i in inputschema: set_secondary(fsaccess, builder, i, secondaryspec, primary, discovered) return if isinstance(inputschema, basestring): sd = search_schemadef(inputschema, reversed(builder.hints + builder.requirements)) if sd: inputschema = sd else: return if "secondaryFiles" in inputschema: # set secondaryFiles, may be inherited by compound types. secondaryspec = inputschema["secondaryFiles"] if (isinstance(inputschema["type"], (Mapping, Sequence)) and not isinstance(inputschema["type"], basestring)): # compound type (union, array, record) set_secondary(fsaccess, builder, inputschema["type"], secondaryspec, primary, discovered) elif (inputschema["type"] == "record" and isinstance(primary, Mapping)): # # record type, find secondary files associated with fields. # for f in inputschema["fields"]: p = primary.get(shortname(f["name"])) if p: set_secondary(fsaccess, builder, f, secondaryspec, p, discovered) elif (inputschema["type"] == "array" and isinstance(primary, Sequence)): # # array type, find secondary files of elements # for p in primary: set_secondary(fsaccess, builder, {"type": inputschema["items"]}, secondaryspec, p, discovered) elif (inputschema["type"] == "File" and secondaryspec and isinstance(primary, Mapping) and primary.get("class") == "File" and "secondaryFiles" not in primary): # # Found a file, check for secondaryFiles # specs = [] primary["secondaryFiles"] = secondaryspec for i, sf in enumerate(aslist(secondaryspec)): if builder.cwlVersion == "v1.0": pattern = builder.do_eval(sf, context=primary) else: pattern = builder.do_eval(sf["pattern"], context=primary) if pattern is None: continue if isinstance(pattern, list): specs.extend(pattern) elif isinstance(pattern, dict): specs.append(pattern) elif isinstance(pattern, str): if builder.cwlVersion == "v1.0": specs.append({"pattern": pattern, "required": True}) else: specs.append({ "pattern": pattern, "required": sf.get("required") }) else: raise SourceLine( primary["secondaryFiles"], i, validate.ValidationException).makeError( "Expression must return list, object, string or null") found = [] for i, sf in enumerate(specs): if isinstance(sf, dict): if sf.get("class") == "File": pattern = None if sf.get("location") is None: raise SourceLine( primary["secondaryFiles"], i, validate.ValidationException).makeError( "File object is missing 'location': %s" % sf) sfpath = sf["location"] required = True else: pattern = sf["pattern"] required = sf.get("required") elif isinstance(sf, str): pattern = sf required = True else: raise SourceLine( primary["secondaryFiles"], i, validate.ValidationException).makeError( "Expression must return list, object, string or null") if pattern is not None: sfpath = substitute(primary["location"], pattern) required = builder.do_eval(required, context=primary) if fsaccess.exists(sfpath): if pattern is not None: found.append({"location": sfpath, "class": "File"}) else: found.append(sf) elif required: raise SourceLine( primary["secondaryFiles"], i, validate.ValidationException).makeError( "Required secondary file '%s' does not exist" % sfpath) primary["secondaryFiles"] = cmap(found) if discovered is not None: discovered[primary["location"]] = primary["secondaryFiles"] elif inputschema["type"] not in primitive_types_set: set_secondary(fsaccess, builder, inputschema["type"], secondaryspec, primary, discovered)
def run(self, runtimeContext): # ArvadosCommandTool subclasses from cwltool.CommandLineTool, # which calls makeJobRunner() to get a new ArvadosContainer # object. The fields that define execution such as # command_line, environment, etc are set on the # ArvadosContainer object by CommandLineTool.job() before # run() is called. runtimeContext = self.job_runtime container_request = { "command": self.command_line, "name": self.name, "output_path": self.outdir, "cwd": self.outdir, "priority": runtimeContext.priority, "state": "Committed", "properties": {}, } runtime_constraints = {} if runtimeContext.project_uuid: container_request["owner_uuid"] = runtimeContext.project_uuid if self.arvrunner.secret_store.has_secret(self.command_line): raise WorkflowException("Secret material leaked on command line, only file literals may contain secrets") if self.arvrunner.secret_store.has_secret(self.environment): raise WorkflowException("Secret material leaked in environment, only file literals may contain secrets") resources = self.builder.resources if resources is not None: runtime_constraints["vcpus"] = math.ceil(resources.get("cores", 1)) runtime_constraints["ram"] = math.ceil(resources.get("ram") * 2**20) mounts = { self.outdir: { "kind": "tmp", "capacity": math.ceil(resources.get("outdirSize", 0) * 2**20) }, self.tmpdir: { "kind": "tmp", "capacity": math.ceil(resources.get("tmpdirSize", 0) * 2**20) } } secret_mounts = {} scheduling_parameters = {} rf = [self.pathmapper.mapper(f) for f in self.pathmapper.referenced_files] rf.sort(key=lambda k: k.resolved) prevdir = None for resolved, target, tp, stg in rf: if not stg: continue if prevdir and target.startswith(prevdir): continue if tp == "Directory": targetdir = target else: targetdir = os.path.dirname(target) sp = resolved.split("/", 1) pdh = sp[0][5:] # remove "keep:" mounts[targetdir] = { "kind": "collection", "portable_data_hash": pdh } if pdh in self.pathmapper.pdh_to_uuid: mounts[targetdir]["uuid"] = self.pathmapper.pdh_to_uuid[pdh] if len(sp) == 2: if tp == "Directory": path = sp[1] else: path = os.path.dirname(sp[1]) if path and path != "/": mounts[targetdir]["path"] = path prevdir = targetdir + "/" with Perf(metrics, "generatefiles %s" % self.name): if self.generatefiles["listing"]: vwd = arvados.collection.Collection(api_client=self.arvrunner.api, keep_client=self.arvrunner.keep_client, num_retries=self.arvrunner.num_retries) generatemapper = NoFollowPathMapper(self.generatefiles["listing"], "", "", separateDirs=False) sorteditems = sorted(generatemapper.items(), key=lambda n: n[1].target) logger.debug("generatemapper is %s", sorteditems) with Perf(metrics, "createfiles %s" % self.name): for f, p in sorteditems: if not p.target: pass elif p.type in ("File", "Directory", "WritableFile", "WritableDirectory"): if p.resolved.startswith("_:"): vwd.mkdirs(p.target) else: source, path = self.arvrunner.fs_access.get_collection(p.resolved) vwd.copy(path or ".", p.target, source_collection=source) elif p.type == "CreateFile": if self.arvrunner.secret_store.has_secret(p.resolved): secret_mounts["%s/%s" % (self.outdir, p.target)] = { "kind": "text", "content": self.arvrunner.secret_store.retrieve(p.resolved) } else: with vwd.open(p.target, "w") as n: n.write(p.resolved) def keepemptydirs(p): if isinstance(p, arvados.collection.RichCollectionBase): if len(p) == 0: p.open(".keep", "w").close() else: for c in p: keepemptydirs(p[c]) keepemptydirs(vwd) if not runtimeContext.current_container: runtimeContext.current_container = arvados_cwl.util.get_current_container(self.arvrunner.api, self.arvrunner.num_retries, logger) info = arvados_cwl.util.get_intermediate_collection_info(self.name, runtimeContext.current_container, runtimeContext.intermediate_output_ttl) vwd.save_new(name=info["name"], owner_uuid=runtimeContext.project_uuid, ensure_unique_name=True, trash_at=info["trash_at"], properties=info["properties"]) prev = None for f, p in sorteditems: if (not p.target or self.arvrunner.secret_store.has_secret(p.resolved) or (prev is not None and p.target.startswith(prev))): continue mountpoint = "%s/%s" % (self.outdir, p.target) mounts[mountpoint] = {"kind": "collection", "portable_data_hash": vwd.portable_data_hash(), "path": p.target} if p.type.startswith("Writable"): mounts[mountpoint]["writable"] = True prev = p.target + "/" container_request["environment"] = {"TMPDIR": self.tmpdir, "HOME": self.outdir} if self.environment: container_request["environment"].update(self.environment) if self.stdin: sp = self.stdin[6:].split("/", 1) mounts["stdin"] = {"kind": "collection", "portable_data_hash": sp[0], "path": sp[1]} if self.stderr: mounts["stderr"] = {"kind": "file", "path": "%s/%s" % (self.outdir, self.stderr)} if self.stdout: mounts["stdout"] = {"kind": "file", "path": "%s/%s" % (self.outdir, self.stdout)} (docker_req, docker_is_req) = self.get_requirement("DockerRequirement") if not docker_req: docker_req = {"dockerImageId": "arvados/jobs"} container_request["container_image"] = arv_docker_get_image(self.arvrunner.api, docker_req, runtimeContext.pull_image, runtimeContext.project_uuid) api_req, _ = self.get_requirement("http://arvados.org/cwl#APIRequirement") if api_req: runtime_constraints["API"] = True runtime_req, _ = self.get_requirement("http://arvados.org/cwl#RuntimeConstraints") if runtime_req: if "keep_cache" in runtime_req: runtime_constraints["keep_cache_ram"] = math.ceil(runtime_req["keep_cache"] * 2**20) if "outputDirType" in runtime_req: if runtime_req["outputDirType"] == "local_output_dir": # Currently the default behavior. pass elif runtime_req["outputDirType"] == "keep_output_dir": mounts[self.outdir]= { "kind": "collection", "writable": True } partition_req, _ = self.get_requirement("http://arvados.org/cwl#PartitionRequirement") if partition_req: scheduling_parameters["partitions"] = aslist(partition_req["partition"]) intermediate_output_req, _ = self.get_requirement("http://arvados.org/cwl#IntermediateOutput") if intermediate_output_req: self.output_ttl = intermediate_output_req["outputTTL"] else: self.output_ttl = self.arvrunner.intermediate_output_ttl if self.output_ttl < 0: raise WorkflowException("Invalid value %d for output_ttl, cannot be less than zero" % container_request["output_ttl"]) if self.timelimit is not None: scheduling_parameters["max_run_time"] = self.timelimit extra_submit_params = {} if runtimeContext.submit_runner_cluster: extra_submit_params["cluster_id"] = runtimeContext.submit_runner_cluster container_request["output_name"] = "Output for step %s" % (self.name) container_request["output_ttl"] = self.output_ttl container_request["mounts"] = mounts container_request["secret_mounts"] = secret_mounts container_request["runtime_constraints"] = runtime_constraints container_request["scheduling_parameters"] = scheduling_parameters enable_reuse = runtimeContext.enable_reuse if enable_reuse: reuse_req, _ = self.get_requirement("http://arvados.org/cwl#ReuseRequirement") if reuse_req: enable_reuse = reuse_req["enableReuse"] container_request["use_existing"] = enable_reuse if runtimeContext.runnerjob.startswith("arvwf:"): wfuuid = runtimeContext.runnerjob[6:runtimeContext.runnerjob.index("#")] wfrecord = self.arvrunner.api.workflows().get(uuid=wfuuid).execute(num_retries=self.arvrunner.num_retries) if container_request["name"] == "main": container_request["name"] = wfrecord["name"] container_request["properties"]["template_uuid"] = wfuuid self.output_callback = self.arvrunner.get_wrapped_callback(self.output_callback) try: if runtimeContext.submit_request_uuid: response = self.arvrunner.api.container_requests().update( uuid=runtimeContext.submit_request_uuid, body=container_request, **extra_submit_params ).execute(num_retries=self.arvrunner.num_retries) else: response = self.arvrunner.api.container_requests().create( body=container_request, **extra_submit_params ).execute(num_retries=self.arvrunner.num_retries) self.uuid = response["uuid"] self.arvrunner.process_submitted(self) if response["state"] == "Final": logger.info("%s reused container %s", self.arvrunner.label(self), response["container_uuid"]) else: logger.info("%s %s state is %s", self.arvrunner.label(self), response["uuid"], response["state"]) except Exception: logger.exception("%s got an error", self.arvrunner.label(self)) self.output_callback({}, "permanentFail")
def run(self, fileStore): cwljob = resolve_indirect(self.cwljob) # `promises` dict # from: each parameter (workflow input or step output) # that may be used as a "source" for a step input workflow output # parameter # to: the job that will produce that value. promises = {} # `jobs` dict from step id to job that implements that step. jobs = {} for inp in self.cwlwf.tool["inputs"]: promises[inp["id"]] = SelfJob(self, cwljob) alloutputs_fufilled = False while not alloutputs_fufilled: # Iteratively go over the workflow steps, scheduling jobs as their # dependencies can be fufilled by upstream workflow inputs or # step outputs. Loop exits when the workflow outputs # are satisfied. alloutputs_fufilled = True for step in self.cwlwf.steps: if step.tool["id"] not in jobs: stepinputs_fufilled = True for inp in step.tool["inputs"]: if "source" in inp: for s in aslist(inp["source"]): if s not in promises: stepinputs_fufilled = False if stepinputs_fufilled: jobobj = {} for inp in step.tool["inputs"]: key = shortname(inp["id"]) if "source" in inp: if inp.get("linkMerge") or len(aslist(inp["source"])) > 1: linkMerge = inp.get("linkMerge", "merge_nested") if linkMerge == "merge_nested": jobobj[key] = ( MergeInputsNested([(shortname(s), promises[s].rv()) for s in aslist(inp["source"])])) elif linkMerge == "merge_flattened": jobobj[key] = ( MergeInputsFlattened([(shortname(s), promises[s].rv()) for s in aslist(inp["source"])])) else: raise validate.ValidationException( "Unsupported linkMerge '%s'", linkMerge) else: jobobj[key] = ( shortname(inp["source"]), promises[inp["source"]].rv()) elif "default" in inp: d = copy.copy(inp["default"]) jobobj[key] = ("default", {"default": d}) if "valueFrom" in inp and "scatter" not in step.tool: if key in jobobj: jobobj[key] = StepValueFrom(inp["valueFrom"], jobobj[key], self.cwlwf.requirements) else: jobobj[key] = StepValueFrom(inp["valueFrom"], ("None", {"None": None}), self.cwlwf.requirements) if "scatter" in step.tool: wfjob = CWLScatter(step, IndirectDict(jobobj), **self.executor_options) followOn = CWLGather(step, wfjob.rv()) wfjob.addFollowOn(followOn) else: (wfjob, followOn) = makeJob(step.embedded_tool, IndirectDict(jobobj), **self.executor_options) jobs[step.tool["id"]] = followOn connected = False for inp in step.tool["inputs"]: for s in aslist(inp.get("source", [])): if not promises[s].hasChild(wfjob): promises[s].addChild(wfjob) connected = True if not connected: # workflow step has default inputs only, isn't connected to other jobs, # so add it as child of workflow. self.addChild(wfjob) for out in step.tool["outputs"]: promises[out["id"]] = followOn for inp in step.tool["inputs"]: for s in aslist(inp.get("source", [])): if s not in promises: alloutputs_fufilled = False # may need a test for out in self.cwlwf.tool["outputs"]: if "source" in out: if out["source"] not in promises: alloutputs_fufilled = False outobj = {} for out in self.cwlwf.tool["outputs"]: outobj[shortname(out["id"])] = (shortname(out["outputSource"]), promises[out["outputSource"]].rv()) return IndirectDict(outobj)