def run(self, fileStore): cwljob = resolve_indirect(self.cwljob) inpdir = os.path.join(fileStore.getLocalTempDir(), "inp") outdir = os.path.join(fileStore.getLocalTempDir(), "out") tmpdir = os.path.join(fileStore.getLocalTempDir(), "tmp") os.mkdir(inpdir) os.mkdir(outdir) os.mkdir(tmpdir) # Copy input files out of the global file store. index={} adjustFiles(cwljob, lambda x: getFile(fileStore, inpdir, *x, index=index)) logging.getLogger("cwltool").setLevel(logging.DEBUG) output = cwltool.main.single_job_executor(self.cwltool, cwljob, os.getcwd(), None, outdir=outdir, tmpdir=tmpdir, use_container=True) # Copy output files into the global file store. adjustFiles(output, functools.partial(writeFile, fileStore, {})) return output
def run(self, fileStore): cwljob = resolve_indirect(self.cwljob) fillInDefaults(self.cwltool.tool["inputs"], cwljob) inpdir = os.path.join(fileStore.getLocalTempDir(), "inp") outdir = os.path.join(fileStore.getLocalTempDir(), "out") tmpdir = os.path.join(fileStore.getLocalTempDir(), "tmp") os.mkdir(inpdir) os.mkdir(outdir) os.mkdir(tmpdir) # Copy input files out of the global file store. index={} adjustFilesWithSecondary(cwljob, functools.partial(getFile, fileStore, inpdir, index=index)) # Run the tool output = cwltool.main.single_job_executor(self.cwltool, cwljob, os.getcwd(), None, outdir=outdir, tmpdir=tmpdir, **self.executor_options) # Copy output files into the global file store. adjustFiles(output, functools.partial(writeFile, fileStore.writeGlobalFile, {})) return output
def importDefault(tool): adjustFiles( tool, lambda x: "file://%s" % x if not urlparse.urlparse(x).scheme else x) adjustFiles(tool, functools.partial(writeFile, toil.importFile, {})) return tool
def run(self, fileStore): cwljob = resolve_indirect(self.cwljob) index={} adjustFiles(cwljob, lambda x: getFile(fileStore, self.outdir, *x, index=index, copy=True)) with open(os.path.join(self.outdir, "cwl.output.json"), "w") as f: json.dump(cwljob, f, indent=4) return True
def done(self, record): if record["state"] == "Complete": processStatus = "success" else: processStatus = "permanentFail" outputs = None try: try: outc = arvados.collection.Collection(record["output"]) with outc.open("cwl.output.json") as f: outputs = json.load(f) def keepify(path): if not path.startswith("keep:"): return "keep:%s/%s" % (record["output"], path) adjustFiles(outputs, keepify) except Exception as e: logger.error("While getting final output object: %s", e) self.arvrunner.output_callback(outputs, processStatus) finally: del self.arvrunner.jobs[record["uuid"]]
def arvados_job_spec(self, dry_run=False, pull_image=True, **kwargs): """Create an Arvados job specification for this workflow. The returned dict can be used to create a job (i.e., passed as the +body+ argument to jobs().create()), or as a component in a pipeline template or pipeline instance. """ self.upload_docker(self.tool) workflowfiles = set() jobfiles = set() workflowfiles.add(self.tool.tool["id"]) self.name = os.path.basename(self.tool.tool["id"]) def visitFiles(files, path): files.add(path) return path document_loader, workflowobj, uri = fetch_document(self.tool.tool["id"]) def loadref(b, u): return document_loader.fetch(urlparse.urljoin(b, u)) sc = scandeps(uri, workflowobj, set(("$import", "run")), set(("$include", "$schemas", "path")), loadref) adjustFiles(sc, partial(visitFiles, workflowfiles)) adjustFiles(self.job_order, partial(visitFiles, jobfiles)) workflowmapper = ArvPathMapper(self.arvrunner, workflowfiles, "", "%s", "%s/%s", name=self.name, **kwargs) jobmapper = ArvPathMapper(self.arvrunner, jobfiles, "", "%s", "%s/%s", name=os.path.basename(self.job_order.get("id", "#")), **kwargs) adjustFiles(self.job_order, lambda p: jobmapper.mapper(p)[1]) if "id" in self.job_order: del self.job_order["id"] self.job_order["cwl:tool"] = workflowmapper.mapper(self.tool.tool["id"])[1] return { "script": "cwl-runner", "script_version": "master", "repository": "arvados", "script_parameters": self.job_order, "runtime_constraints": { "docker_image": "arvados/jobs" } }
def arvados_job_spec(self, dry_run=False, pull_image=True, **kwargs): """Create an Arvados job specification for this workflow. The returned dict can be used to create a job (i.e., passed as the +body+ argument to jobs().create()), or as a component in a pipeline template or pipeline instance. """ self.upload_docker(self.tool) workflowfiles = set() jobfiles = set() workflowfiles.add(self.tool.tool["id"]) self.name = os.path.basename(self.tool.tool["id"]) def visitFiles(files, path): files.add(path) return path document_loader, workflowobj, uri = fetch_document( self.tool.tool["id"]) def loadref(b, u): return document_loader.fetch(urlparse.urljoin(b, u)) sc = scandeps(uri, workflowobj, set(("$import", "run")), set(("$include", "$schemas", "path")), loadref) adjustFiles(sc, partial(visitFiles, workflowfiles)) adjustFiles(self.job_order, partial(visitFiles, jobfiles)) workflowmapper = ArvPathMapper(self.arvrunner, workflowfiles, "", "%s", "%s/%s", name=self.name, **kwargs) jobmapper = ArvPathMapper(self.arvrunner, jobfiles, "", "%s", "%s/%s", name=os.path.basename( self.job_order.get("id", "#")), **kwargs) adjustFiles(self.job_order, lambda p: jobmapper.mapper(p)[1]) if "id" in self.job_order: del self.job_order["id"] self.job_order["cwl:tool"] = workflowmapper.mapper( self.tool.tool["id"])[1] return { "script": "cwl-runner", "script_version": "master", "repository": "arvados", "script_parameters": self.job_order, "runtime_constraints": { "docker_image": "arvados/jobs" } }
def run(self, dry_run=False, pull_image=True, **kwargs): self.upload_docker(self.tool) workflowfiles = set() jobfiles = set() workflowfiles.add(self.tool.tool["id"]) self.name = os.path.basename(self.tool.tool["id"]) def visitFiles(files, path): files.add(path) return path document_loader, _, _ = cwltool.process.get_schema() def loadref(b, u): return document_loader.resolve_ref(u, base_url=b)[0] sc = scandeps("", self.tool.tool, set(("$import", "run")), set(("$include", "$schemas", "path")), loadref) adjustFiles(sc, functools.partial(visitFiles, workflowfiles)) adjustFiles(self.job_order, functools.partial(visitFiles, jobfiles)) workflowmapper = ArvPathMapper(self.arvrunner, workflowfiles, "", "%s", "%s/%s", name=self.name, **kwargs) jobmapper = ArvPathMapper(self.arvrunner, jobfiles, "", "%s", "%s/%s", name=os.path.basename( self.job_order.get("id", "#")), **kwargs) adjustFiles(self.job_order, lambda p: jobmapper.mapper(p)[1]) if "id" in self.job_order: del self.job_order["id"] self.job_order["cwl:tool"] = workflowmapper.mapper( self.tool.tool["id"])[1] response = self.arvrunner.api.jobs().create( body={ "owner_uuid": self.arvrunner.project_uuid, "script": "cwl-runner", "script_version": "master", "repository": "arvados", "script_parameters": self.job_order, "runtime_constraints": { "docker_image": "arvados/jobs" } }, find_or_create=self.enable_reuse).execute( num_retries=self.arvrunner.num_retries) self.arvrunner.jobs[response["uuid"]] = self logger.info("Submitted job %s", response["uuid"]) if response["state"] in ("Complete", "Failed", "Cancelled"): self.done(response)
def importDefault(tool): adjustFiles(tool, lambda x: "file://%s" % x if not urlparse.urlparse(x).scheme else x) adjustFiles(tool, functools.partial(writeFile, toil.importFile, {})) return tool
def main(args=None, stdout=sys.stdout): parser = ArgumentParser() Job.Runner.addToilOptions(parser) parser.add_argument("cwltool", type=str) parser.add_argument("cwljob", type=str) # Will override the "jobStore" positional argument, enables # user to select jobStore or get a default from logic one below. parser.add_argument("--jobStore", type=str) parser.add_argument("--conformance-test", action="store_true") parser.add_argument("--no-container", action="store_true") parser.add_argument("--quiet", dest="logLevel", action="store_const", const="ERROR") parser.add_argument("--basedir", type=str) parser.add_argument("--outdir", type=str, default=os.getcwd()) parser.add_argument("--version", action='version', version=version) parser.add_argument("--preserve-environment", type=str, nargs='+', help="Preserve specified environment variables when running CommandLineTools", metavar=("VAR1,VAR2"), default=("PATH",), dest="preserve_environment") # mkdtemp actually creates the directory, but # toil requires that the directory not exist, # so make it and delete it and allow # toil to create it again (!) workdir = tempfile.mkdtemp() os.rmdir(workdir) if args is None: args = sys.argv[1:] options = parser.parse_args([workdir] + args) use_container = not options.no_container setLoggingFromOptions(options) if options.logLevel: cwllogger.setLevel(options.logLevel) uri = options.cwljob if urlparse.urlparse(options.cwljob).scheme else "file://" + os.path.abspath(options.cwljob) try: t = cwltool.main.load_tool(options.cwltool, False, True, cwltool.workflow.defaultMakeTool, True) except cwltool.process.UnsupportedRequirement as e: logging.error(e) return 33 if options.conformance_test: loader = schema_salad.ref_resolver.Loader({}) else: jobloaderctx = {"path": {"@type": "@id"}, "format": {"@type": "@id"}} jobloaderctx.update(t.metadata.get("$namespaces", {})) loader = schema_salad.ref_resolver.Loader(jobloaderctx) job, _ = loader.resolve_ref(uri) if type(t) == int: return t fillInDefaults(t.tool["inputs"], job) if options.conformance_test: adjustFiles(job, lambda x: x.replace("file://", "")) stdout.write(json.dumps( cwltool.main.single_job_executor(t, job, options.basedir, options, conformance_test=True, use_container=use_container, preserve_environment=options.preserve_environment), indent=4)) return 0 if not options.basedir: options.basedir = os.path.dirname(os.path.abspath(options.cwljob)) outdir = options.outdir with Toil(options) as toil: def importDefault(tool): adjustFiles(tool, lambda x: "file://%s" % x if not urlparse.urlparse(x).scheme else x) adjustFiles(tool, functools.partial(writeFile, toil.importFile, {})) return tool t.visit(importDefault) builder = t._init_job(job, os.path.dirname(os.path.abspath(options.cwljob))) (wf1, wf2) = makeJob(t, {}, use_container=use_container, preserve_environment=options.preserve_environment) adjustFiles(builder.job, lambda x: "file://%s" % x if not urlparse.urlparse(x).scheme else x) adjustFiles(builder.job, functools.partial(writeFile, toil.importFile, {})) wf1.cwljob = builder.job outobj = toil.start(wf1) outobj = resolve_indirect(outobj) adjustFilesWithSecondary(outobj, functools.partial(getFile, toil, outdir, index={}, export=True, rename_collision=True)) stdout.write(json.dumps(outobj, indent=4)) return 0
def main(args=None): parser = ArgumentParser() Job.Runner.addToilOptions(parser) parser.add_argument("cwltool", type=str) parser.add_argument("cwljob", type=str) # Will override the "jobStore" positional argument, enables # user to select jobStore or get a default from logic one below. parser.add_argument("--jobStore", type=str) parser.add_argument("--conformance-test", action="store_true") parser.add_argument("--no-container", action="store_true") parser.add_argument("--quiet", action="store_true") parser.add_argument("--basedir", type=str) parser.add_argument("--outdir", type=str, default=os.getcwd()) parser.add_argument("--version", action='version', version=version) # mkdtemp actually creates the directory, but # toil requires that the directory not exist, # so make it and delete it and allow # toil to create it again (!) workdir = tempfile.mkdtemp() os.rmdir(workdir) if args is None: args = sys.argv[1:] options = parser.parse_args([workdir] + args) if options.quiet: options.logLevel = "WARNING" uri = "file://" + os.path.abspath(options.cwljob) t = cwltool.main.load_tool(options.cwltool, False, True, cwltool.workflow.defaultMakeTool, True) if options.conformance_test: loader = schema_salad.ref_resolver.Loader({}) else: jobloaderctx = {"path": {"@type": "@id"}, "format": {"@type": "@id"}} jobloaderctx.update(t.metadata.get("$namespaces", {})) loader = schema_salad.ref_resolver.Loader(jobloaderctx) job, _ = loader.resolve_ref(uri) if type(t) == int: return t try: checkRequirements(t.tool) except Exception as e: logging.error(e) return 33 jobobj = {} for inp in t.tool["inputs"]: if shortname(inp["id"]) in job: pass elif shortname(inp["id"]) not in job and "default" in inp: job[shortname(inp["id"])] = copy.copy(inp["default"]) elif shortname(inp["id"]) not in job and inp["type"][0] == "null": pass else: raise validate.ValidationException("Missing inputs `%s`" % shortname(inp["id"])) adjustFiles(job, lambda x: x.replace("file://", "")) if options.conformance_test: sys.stdout.write(json.dumps( cwltool.main.single_job_executor(t, job, options.basedir, options, conformance_test=True), indent=4)) return 0 if not options.basedir: options.basedir = os.path.dirname(os.path.abspath(options.cwljob)) outdir = options.outdir staging = StageJob(t, job, os.path.dirname(os.path.abspath(options.cwljob))) (wf1, wf2) = makeJob(t, staging.rv()) staging.addFollowOn(wf1) wf2.addFollowOn(FinalJob(wf2.rv(), outdir)) Job.Runner.startToil(staging, options) with open(os.path.join(outdir, "cwl.output.json"), "r") as f: sys.stdout.write(f.read()) return 0
def run(self, fileStore): cwljob = resolve_indirect(self.cwljob) # `promises` dict # from: each parameter (workflow input or step output) # that may be used as a "source" for a step input workflow output # parameter # to: the job that will produce that value. promises = {} # `jobs` dict from step id to job that implements that step. jobs = {} for inp in self.cwlwf.tool["inputs"]: promises[inp["id"]] = SelfJob(self, cwljob) alloutputs_fufilled = False while not alloutputs_fufilled: # Iteratively go over the workflow steps, scheduling jobs as their # dependencies can be fufilled by upstream workflow inputs or # step outputs. Loop exits when the workflow outputs # are satisfied. alloutputs_fufilled = True for step in self.cwlwf.steps: if step.tool["id"] not in jobs: stepinputs_fufilled = True for inp in step.tool["inputs"]: if "source" in inp: for s in aslist(inp["source"]): if s not in promises: stepinputs_fufilled = False if stepinputs_fufilled: jobobj = {} # TODO: Handle multiple inbound links # (both are discussed in section 5.1.2 in CWL spec draft-2) for inp in step.tool["inputs"]: key = shortname(inp["id"]) if "source" in inp: if inp.get("linkMerge") or len(aslist(inp["source"])) > 1: linkMerge = inp.get("linkMerge", "merge_nested") if linkMerge == "merge_nested": jobobj[key] = ( MergeInputsNested([(shortname(s), promises[s].rv()) for s in aslist(inp["source"])])) elif linkMerge == "merge_flattened": jobobj[key] = ( MergeInputsFlattened([(shortname(s), promises[s].rv()) for s in aslist(inp["source"])])) else: raise validate.ValidationException( "Unsupported linkMerge '%s'", linkMerge) else: jobobj[key] = ( shortname(inp["source"]), promises[inp["source"]].rv()) elif "default" in inp: d = copy.copy(inp["default"]) adjustFiles(d, lambda x: x.replace("file://", "")) adjustFiles(d, functools.partial(writeFile, fileStore, {})) jobobj[key] = ("default", {"default": d}) if "valueFrom" in inp and "scatter" not in step.tool: if key in jobobj: jobobj[key] = StepValueFrom(inp["valueFrom"], jobobj[key], self.cwlwf.requirements) else: jobobj[key] = StepValueFrom(inp["valueFrom"], ("None", {"None": None}), self.cwlwf.requirements) if "scatter" in step.tool: wfjob = CWLScatter(step, IndirectDict(jobobj)) followOn = CWLGather(step, wfjob.rv()) wfjob.addFollowOn(followOn) else: (wfjob, followOn) = makeJob(step.embedded_tool, IndirectDict(jobobj)) jobs[step.tool["id"]] = followOn connected = False for inp in step.tool["inputs"]: for s in aslist(inp.get("source", [])): if not promises[s].hasChild(wfjob): promises[s].addChild(wfjob) connected = True if not connected: # workflow step has default inputs only, isn't connected to other jobs, # so add it as child of workflow. self.addChild(wfjob) for out in step.tool["outputs"]: promises[out["id"]] = followOn for inp in step.tool["inputs"]: for s in aslist(inp.get("source", [])): if s not in promises: alloutputs_fufilled = False # may need a test for out in self.cwlwf.tool["outputs"]: if "source" in out: if out["source"] not in promises: alloutputs_fufilled = False outobj = {} for out in self.cwlwf.tool["outputs"]: outobj[shortname(out["id"])] = (shortname(out["source"]), promises[out["source"]].rv()) return IndirectDict(outobj)
def run(self, fileStore): cwljob = resolve_indirect(self.cwljob) builder = self.cwlwf._init_job(cwljob, self.basedir) adjustFiles(builder.job, functools.partial(writeFile, fileStore, {})) return builder.job