def run(self, dry_run=False, pull_image=True, **kwargs): script_parameters = { "command": self.command_line } runtime_constraints = {} if self.generatefiles: vwd = arvados.collection.Collection() script_parameters["task.vwd"] = {} for t in self.generatefiles: if isinstance(self.generatefiles[t], dict): src, rest = self.arvrunner.fs_access.get_collection(self.generatefiles[t]["path"].replace("$(task.keep)/", "keep:")) vwd.copy(rest, t, source_collection=src) else: with vwd.open(t, "w") as f: f.write(self.generatefiles[t]) vwd.save_new() for t in self.generatefiles: script_parameters["task.vwd"][t] = "$(task.keep)/%s/%s" % (vwd.portable_data_hash(), t) script_parameters["task.env"] = {"TMPDIR": "$(task.tmpdir)"} if self.environment: script_parameters["task.env"].update(self.environment) if self.stdin: script_parameters["task.stdin"] = self.pathmapper.mapper(self.stdin)[1] if self.stdout: script_parameters["task.stdout"] = self.stdout (docker_req, docker_is_req) = get_feature(self, "DockerRequirement") if docker_req and kwargs.get("use_container") is not False: runtime_constraints["docker_image"] = arv_docker_get_image(self.arvrunner.api, docker_req, pull_image) try: response = self.arvrunner.api.jobs().create(body={ "script": "crunchrunner", "repository": "arvados", "script_version": "master", "minimum_script_version": "9e5b98e8f5f4727856b53447191f9c06e3da2ba6", "script_parameters": {"tasks": [script_parameters], "crunchrunner": crunchrunner_pdh+"/crunchrunner"}, "runtime_constraints": runtime_constraints }, find_or_create=kwargs.get("enable_reuse", True)).execute(num_retries=self.arvrunner.num_retries) self.arvrunner.jobs[response["uuid"]] = self self.arvrunner.pipeline["components"][self.name] = {"job": response} self.arvrunner.pipeline = self.arvrunner.api.pipeline_instances().update(uuid=self.arvrunner.pipeline["uuid"], body={ "components": self.arvrunner.pipeline["components"] }).execute(num_retries=self.arvrunner.num_retries) logger.info("Job %s (%s) is %s", self.name, response["uuid"], response["state"]) if response["state"] in ("Complete", "Failed", "Cancelled"): self.done(response) except Exception as e: logger.error("Got error %s" % str(e)) self.output_callback({}, "permanentFail")
def upload_docker(self, tool): if isinstance(tool, CommandLineTool): (docker_req, docker_is_req) = get_feature(tool, "DockerRequirement") if docker_req: arv_docker_get_image(self.arvrunner.api, docker_req, True, self.arvrunner.project_uuid) elif isinstance(tool, cwltool.workflow.Workflow): for s in tool.steps: self.upload_docker(s.embedded_tool)
def run(self, dry_run=False, pull_image=True, **kwargs): script_parameters = {"command": self.command_line} runtime_constraints = {} if self.generatefiles: vwd = arvados.collection.Collection() for t in self.generatefiles: if isinstance(self.generatefiles[t], dict): src, rest = self.arvrunner.fs_access.get_collection( self.generatefiles[t]["path"][6:]) vwd.copy(rest, t, source_collection=src) else: with vwd.open(t, "w") as f: f.write(self.generatefiles[t]) vwd.save_new() script_parameters["task.vwd"] = vwd.portable_data_hash() script_parameters["task.env"] = {"TMPDIR": "$(task.tmpdir)"} if self.environment: script_parameters["task.env"].update(self.environment) if self.stdin: script_parameters["task.stdin"] = self.pathmapper.mapper( self.stdin)[1] if self.stdout: script_parameters["task.stdout"] = self.stdout (docker_req, docker_is_req) = get_feature(self, "DockerRequirement") if docker_req and kwargs.get("use_container") is not False: runtime_constraints["docker_image"] = arv_docker_get_image( self.arvrunner.api, docker_req, pull_image) runtime_constraints["arvados_sdk_version"] = "master" response = self.arvrunner.api.jobs().create(body={ "script": "run-command", "repository": "arvados", "script_version": "master", "script_parameters": script_parameters, "runtime_constraints": runtime_constraints }, find_or_create=kwargs.get( "enable_reuse", True)).execute() self.arvrunner.jobs[response["uuid"]] = self logger.info("Job %s is %s", response["uuid"], response["state"]) if response["state"] in ("Complete", "Failed", "Cancelled"): self.done(response)
def upload_docker(arvrunner, tool): if isinstance(tool, CommandLineTool): (docker_req, docker_is_req) = get_feature(tool, "DockerRequirement") if docker_req: if docker_req.get("dockerOutputDirectory"): # TODO: can be supported by containers API, but not jobs API. raise UnsupportedRequirement( "Option 'dockerOutputDirectory' of DockerRequirement not supported." ) arv_docker_get_image(arvrunner.api, docker_req, True, arvrunner.project_uuid) elif isinstance(tool, cwltool.workflow.Workflow): for s in tool.steps: upload_docker(arvrunner, s.embedded_tool)
def upload_docker(arvrunner, tool): """Uploads Docker images used in CommandLineTool objects.""" if isinstance(tool, CommandLineTool): (docker_req, docker_is_req) = get_feature(tool, "DockerRequirement") if docker_req: if docker_req.get("dockerOutputDirectory") and arvrunner.work_api != "containers": # TODO: can be supported by containers API, but not jobs API. raise SourceLine(docker_req, "dockerOutputDirectory", UnsupportedRequirement).makeError( "Option 'dockerOutputDirectory' of DockerRequirement not supported.") arv_docker_get_image(arvrunner.api, docker_req, True, arvrunner.project_uuid) else: arv_docker_get_image(arvrunner.api, {"dockerPull": "arvados/jobs"}, True, arvrunner.project_uuid) elif isinstance(tool, cwltool.workflow.Workflow): for s in tool.steps: upload_docker(arvrunner, s.embedded_tool)
def run(self, dry_run=False, pull_image=True, **kwargs): script_parameters = { "command": self.command_line } runtime_constraints = {} if self.generatefiles: vwd = arvados.collection.Collection() for t in self.generatefiles: if isinstance(self.generatefiles[t], dict): src, rest = self.arvrunner.fs_access.get_collection(self.generatefiles[t]["path"][6:]) vwd.copy(rest, t, source_collection=src) else: with vwd.open(t, "w") as f: f.write(self.generatefiles[t]) vwd.save_new() script_parameters["task.vwd"] = vwd.portable_data_hash() script_parameters["task.env"] = {"TMPDIR": "$(task.tmpdir)"} if self.environment: script_parameters["task.env"].update(self.environment) if self.stdin: script_parameters["task.stdin"] = self.pathmapper.mapper(self.stdin)[1] if self.stdout: script_parameters["task.stdout"] = self.stdout (docker_req, docker_is_req) = get_feature(self, "DockerRequirement") if docker_req and kwargs.get("use_container") is not False: runtime_constraints["docker_image"] = arv_docker_get_image(self.arvrunner.api, docker_req, pull_image) runtime_constraints["arvados_sdk_version"] = "master" response = self.arvrunner.api.jobs().create(body={ "script": "run-command", "repository": "arvados", "script_version": "master", "script_parameters": script_parameters, "runtime_constraints": runtime_constraints }, find_or_create=kwargs.get("enable_reuse", True)).execute() self.arvrunner.jobs[response["uuid"]] = self logger.info("Job %s is %s", response["uuid"], response["state"]) if response["state"] in ("Complete", "Failed", "Cancelled"): self.done(response)
def __init__(self, runner, tool, job_order, enable_reuse, output_name, output_tags, submit_runner_ram=0, name=None, on_error=None, submit_runner_image=None, intermediate_output_ttl=0, merged_map=None): self.arvrunner = runner self.tool = tool self.job_order = job_order self.running = False if enable_reuse: # If reuse is permitted by command line arguments but # disabled by the workflow itself, disable it. reuse_req, _ = get_feature( self.tool, "http://arvados.org/cwl#ReuseRequirement") if reuse_req: enable_reuse = reuse_req["enableReuse"] self.enable_reuse = enable_reuse self.uuid = None self.final_output = None self.output_name = output_name self.output_tags = output_tags self.name = name self.on_error = on_error self.jobs_image = submit_runner_image or "arvados/jobs:" + __version__ self.intermediate_output_ttl = intermediate_output_ttl if submit_runner_ram: self.submit_runner_ram = submit_runner_ram else: self.submit_runner_ram = 3000 if self.submit_runner_ram <= 0: raise Exception( "Value of --submit-runner-ram must be greater than zero") self.merged_map = merged_map or {}
def run(self, dry_run=False, pull_image=True, **kwargs): container_request = { "command": self.command_line, "owner_uuid": self.arvrunner.project_uuid, "name": self.name, "output_path": self.outdir, "cwd": self.outdir, "priority": 1, "state": "Committed", "properties": {}, } runtime_constraints = {} resources = self.builder.resources if resources is not None: runtime_constraints["vcpus"] = resources.get("cores", 1) runtime_constraints["ram"] = resources.get("ram") * 2**20 mounts = { self.outdir: { "kind": "tmp", "capacity": resources.get("outdirSize", 0) * 2**20 }, self.tmpdir: { "kind": "tmp", "capacity": resources.get("tmpdirSize", 0) * 2**20 } } scheduling_parameters = {} rf = [ self.pathmapper.mapper(f) for f in self.pathmapper.referenced_files ] rf.sort(key=lambda k: k.resolved) prevdir = None for resolved, target, tp, stg in rf: if not stg: continue if prevdir and target.startswith(prevdir): continue if tp == "Directory": targetdir = target else: targetdir = os.path.dirname(target) sp = resolved.split("/", 1) pdh = sp[0][5:] # remove "keep:" mounts[targetdir] = { "kind": "collection", "portable_data_hash": pdh } if len(sp) == 2: if tp == "Directory": path = sp[1] else: path = os.path.dirname(sp[1]) if path and path != "/": mounts[targetdir]["path"] = path prevdir = targetdir + "/" with Perf(metrics, "generatefiles %s" % self.name): if self.generatefiles["listing"]: vwd = arvados.collection.Collection( api_client=self.arvrunner.api, keep_client=self.arvrunner.keep_client, num_retries=self.arvrunner.num_retries) generatemapper = NoFollowPathMapper([self.generatefiles], "", "", separateDirs=False) with Perf(metrics, "createfiles %s" % self.name): for f, p in generatemapper.items(): if not p.target: pass elif p.type in ("File", "Directory"): source, path = self.arvrunner.fs_access.get_collection( p.resolved) vwd.copy(path, p.target, source_collection=source) elif p.type == "CreateFile": with vwd.open(p.target, "w") as n: n.write(p.resolved.encode("utf-8")) with Perf(metrics, "generatefiles.save_new %s" % self.name): vwd.save_new() for f, p in generatemapper.items(): if not p.target: continue mountpoint = "%s/%s" % (self.outdir, p.target) mounts[mountpoint] = { "kind": "collection", "portable_data_hash": vwd.portable_data_hash(), "path": p.target } container_request["environment"] = { "TMPDIR": self.tmpdir, "HOME": self.outdir } if self.environment: container_request["environment"].update(self.environment) if self.stdin: sp = self.stdin[6:].split("/", 1) mounts["stdin"] = { "kind": "collection", "portable_data_hash": sp[0], "path": sp[1] } if self.stderr: mounts["stderr"] = { "kind": "file", "path": "%s/%s" % (self.outdir, self.stderr) } if self.stdout: mounts["stdout"] = { "kind": "file", "path": "%s/%s" % (self.outdir, self.stdout) } (docker_req, docker_is_req) = get_feature(self, "DockerRequirement") if not docker_req: docker_req = {"dockerImageId": "arvados/jobs"} container_request["container_image"] = arv_docker_get_image( self.arvrunner.api, docker_req, pull_image, self.arvrunner.project_uuid) api_req, _ = get_feature(self, "http://arvados.org/cwl#APIRequirement") if api_req: runtime_constraints["API"] = True runtime_req, _ = get_feature( self, "http://arvados.org/cwl#RuntimeConstraints") if runtime_req: if "keep_cache" in runtime_req: runtime_constraints[ "keep_cache_ram"] = runtime_req["keep_cache"] * 2**20 if "outputDirType" in runtime_req: if runtime_req["outputDirType"] == "local_output_dir": # Currently the default behavior. pass elif runtime_req["outputDirType"] == "keep_output_dir": mounts[self.outdir] = { "kind": "collection", "writable": True } partition_req, _ = get_feature( self, "http://arvados.org/cwl#PartitionRequirement") if partition_req: scheduling_parameters["partitions"] = aslist( partition_req["partition"]) intermediate_output_req, _ = get_feature( self, "http://arvados.org/cwl#IntermediateOutput") if intermediate_output_req: self.output_ttl = intermediate_output_req["outputTTL"] else: self.output_ttl = self.arvrunner.intermediate_output_ttl if self.output_ttl < 0: raise WorkflowError( "Invalid value %d for output_ttl, cannot be less than zero" % container_request["output_ttl"]) container_request["output_ttl"] = self.output_ttl container_request["mounts"] = mounts container_request["runtime_constraints"] = runtime_constraints container_request["use_existing"] = kwargs.get("enable_reuse", True) container_request["scheduling_parameters"] = scheduling_parameters if kwargs.get("runnerjob", "").startswith("arvwf:"): wfuuid = kwargs["runnerjob"][6:kwargs["runnerjob"].index("#")] wfrecord = self.arvrunner.api.workflows().get(uuid=wfuuid).execute( num_retries=self.arvrunner.num_retries) if container_request["name"] == "main": container_request["name"] = wfrecord["name"] container_request["properties"]["template_uuid"] = wfuuid try: response = self.arvrunner.api.container_requests().create( body=container_request).execute( num_retries=self.arvrunner.num_retries) self.uuid = response["uuid"] self.arvrunner.processes[self.uuid] = self if response["state"] == "Final": logger.info("%s reused container %s", self.arvrunner.label(self), response["container_uuid"]) self.done(response) else: logger.info("%s %s state is %s", self.arvrunner.label(self), response["uuid"], response["state"]) except Exception as e: logger.error("%s got error %s" % (self.arvrunner.label(self), str(e))) self.output_callback({}, "permanentFail")
def run(self, dry_run=False, pull_image=True, **kwargs): script_parameters = { "command": self.command_line } runtime_constraints = {} if self.generatefiles["listing"]: vwd = arvados.collection.Collection() script_parameters["task.vwd"] = {} generatemapper = InitialWorkDirPathMapper([self.generatefiles], "", "", separateDirs=False) for f, p in generatemapper.items(): if p.type == "CreateFile": with vwd.open(p.target, "w") as n: n.write(p.resolved.encode("utf-8")) vwd.save_new() for f, p in generatemapper.items(): if p.type == "File": script_parameters["task.vwd"][p.target] = p.resolved if p.type == "CreateFile": script_parameters["task.vwd"][p.target] = "$(task.keep)/%s/%s" % (vwd.portable_data_hash(), p.target) script_parameters["task.env"] = {"TMPDIR": "$(task.tmpdir)"} if self.environment: script_parameters["task.env"].update(self.environment) if self.stdin: script_parameters["task.stdin"] = self.stdin if self.stdout: script_parameters["task.stdout"] = self.stdout if self.stderr: script_parameters["task.stderr"] = self.stderr if self.successCodes: script_parameters["task.successCodes"] = self.successCodes if self.temporaryFailCodes: script_parameters["task.temporaryFailCodes"] = self.temporaryFailCodes if self.permanentFailCodes: script_parameters["task.permanentFailCodes"] = self.permanentFailCodes (docker_req, docker_is_req) = get_feature(self, "DockerRequirement") if docker_req and kwargs.get("use_container") is not False: runtime_constraints["docker_image"] = arv_docker_get_image(self.arvrunner.api, docker_req, pull_image, self.arvrunner.project_uuid) else: runtime_constraints["docker_image"] = "arvados/jobs" resources = self.builder.resources if resources is not None: runtime_constraints["min_cores_per_node"] = resources.get("cores", 1) runtime_constraints["min_ram_mb_per_node"] = resources.get("ram") runtime_constraints["min_scratch_mb_per_node"] = resources.get("tmpdirSize", 0) + resources.get("outdirSize", 0) filters = [["repository", "=", "arvados"], ["script", "=", "crunchrunner"], ["script_version", "in git", "9e5b98e8f5f4727856b53447191f9c06e3da2ba6"]] if not self.arvrunner.ignore_docker_for_reuse: filters.append(["docker_image_locator", "in docker", runtime_constraints["docker_image"]]) try: response = self.arvrunner.api.jobs().create( body={ "owner_uuid": self.arvrunner.project_uuid, "script": "crunchrunner", "repository": "arvados", "script_version": "master", "minimum_script_version": "9e5b98e8f5f4727856b53447191f9c06e3da2ba6", "script_parameters": {"tasks": [script_parameters]}, "runtime_constraints": runtime_constraints }, filters=filters, find_or_create=kwargs.get("enable_reuse", True) ).execute(num_retries=self.arvrunner.num_retries) self.arvrunner.processes[response["uuid"]] = self self.update_pipeline_component(response) logger.info("Job %s (%s) is %s", self.name, response["uuid"], response["state"]) if response["state"] in ("Complete", "Failed", "Cancelled"): self.done(response) except Exception as e: logger.error("Got error %s" % str(e)) self.output_callback({}, "permanentFail")
def run(self, dry_run=False, pull_image=True, **kwargs): container_request = { "command": self.command_line, "owner_uuid": self.arvrunner.project_uuid, "name": self.name, "output_path": self.outdir, "cwd": self.outdir, "priority": 1, "state": "Committed", "properties": {} } runtime_constraints = {} mounts = {self.outdir: {"kind": "tmp"}} scheduling_parameters = {} dirs = set() for f in self.pathmapper.files(): _, p, tp = self.pathmapper.mapper(f) if tp == "Directory" and '/' not in p[6:]: mounts[p] = {"kind": "collection", "portable_data_hash": p[6:]} dirs.add(p[6:]) for f in self.pathmapper.files(): _, p, tp = self.pathmapper.mapper(f) if p[6:].split("/")[0] not in dirs: mounts[p] = {"kind": "collection", "portable_data_hash": p[6:]} if self.generatefiles["listing"]: raise UnsupportedRequirement( "InitialWorkDirRequirement not supported with --api=containers" ) container_request["environment"] = { "TMPDIR": self.tmpdir, "HOME": self.outdir } if self.environment: container_request["environment"].update(self.environment) if self.stdin: raise UnsupportedRequirement( "Stdin redirection currently not suppported") if self.stderr: raise UnsupportedRequirement( "Stderr redirection currently not suppported") if self.stdout: mounts["stdout"] = { "kind": "file", "path": "%s/%s" % (self.outdir, self.stdout) } (docker_req, docker_is_req) = get_feature(self, "DockerRequirement") if not docker_req: docker_req = {"dockerImageId": arvados_jobs_image(self.arvrunner)} container_request["container_image"] = arv_docker_get_image( self.arvrunner.api, docker_req, pull_image, self.arvrunner.project_uuid) resources = self.builder.resources if resources is not None: runtime_constraints["vcpus"] = resources.get("cores", 1) runtime_constraints["ram"] = resources.get("ram") * 2**20 api_req, _ = get_feature(self, "http://arvados.org/cwl#APIRequirement") if api_req: runtime_constraints["API"] = True runtime_req, _ = get_feature( self, "http://arvados.org/cwl#RuntimeConstraints") if runtime_req: if "keep_cache" in runtime_req: runtime_constraints["keep_cache_ram"] = runtime_req[ "keep_cache"] partition_req, _ = get_feature( self, "http://arvados.org/cwl#PartitionRequirement") if partition_req: scheduling_parameters["partitions"] = aslist( partition_req["partition"]) container_request["mounts"] = mounts container_request["runtime_constraints"] = runtime_constraints container_request["use_existing"] = kwargs.get("enable_reuse", True) container_request["scheduling_parameters"] = scheduling_parameters if kwargs.get("runnerjob", "").startswith("arvwf:"): wfuuid = kwargs["runnerjob"][6:kwargs["runnerjob"].index("#")] wfrecord = self.arvrunner.api.workflows().get(uuid=wfuuid).execute( num_retries=self.arvrunner.num_retries) if container_request["name"] == "main": container_request["name"] = wfrecord["name"] container_request["properties"]["template_uuid"] = wfuuid try: response = self.arvrunner.api.container_requests().create( body=container_request).execute( num_retries=self.arvrunner.num_retries) self.uuid = response["uuid"] self.arvrunner.processes[self.uuid] = self logger.info("Container request %s (%s) state is %s", self.name, response["uuid"], response["state"]) if response["state"] == "Final": self.done(response) except Exception as e: logger.error("Got error %s" % str(e)) self.output_callback({}, "permanentFail")
def run(self, dry_run=False, pull_image=True, **kwargs): script_parameters = {"command": self.command_line} runtime_constraints = {} if self.generatefiles: vwd = arvados.collection.Collection() script_parameters["task.vwd"] = {} for t in self.generatefiles: if isinstance(self.generatefiles[t], dict): src, rest = self.arvrunner.fs_access.get_collection( self.generatefiles[t]["path"].replace( "$(task.keep)/", "keep:")) vwd.copy(rest, t, source_collection=src) else: with vwd.open(t, "w") as f: f.write(self.generatefiles[t]) vwd.save_new() for t in self.generatefiles: script_parameters["task.vwd"][t] = "$(task.keep)/%s/%s" % ( vwd.portable_data_hash(), t) script_parameters["task.env"] = {"TMPDIR": "$(task.tmpdir)"} if self.environment: script_parameters["task.env"].update(self.environment) if self.stdin: script_parameters["task.stdin"] = self.pathmapper.mapper( self.stdin)[1] if self.stdout: script_parameters["task.stdout"] = self.stdout (docker_req, docker_is_req) = get_feature(self, "DockerRequirement") if docker_req and kwargs.get("use_container") is not False: runtime_constraints["docker_image"] = arv_docker_get_image( self.arvrunner.api, docker_req, pull_image, self.arvrunner.project_uuid) else: runtime_constraints["docker_image"] = "arvados/jobs" resources = self.builder.resources if resources is not None: runtime_constraints["min_cores_per_node"] = resources.get( "cores", 1) runtime_constraints["min_ram_mb_per_node"] = resources.get("ram") runtime_constraints["min_scratch_mb_per_node"] = resources.get( "tmpdirSize", 0) + resources.get("outdirSize", 0) filters = [["repository", "=", "arvados"], ["script", "=", "crunchrunner"], [ "script_version", "in git", "9e5b98e8f5f4727856b53447191f9c06e3da2ba6" ]] if not self.arvrunner.ignore_docker_for_reuse: filters.append([ "docker_image_locator", "in docker", runtime_constraints["docker_image"] ]) try: response = self.arvrunner.api.jobs().create( body={ "owner_uuid": self.arvrunner.project_uuid, "script": "crunchrunner", "repository": "arvados", "script_version": "master", "minimum_script_version": "9e5b98e8f5f4727856b53447191f9c06e3da2ba6", "script_parameters": { "tasks": [script_parameters] }, "runtime_constraints": runtime_constraints }, filters=filters, find_or_create=kwargs.get( "enable_reuse", True)).execute(num_retries=self.arvrunner.num_retries) self.arvrunner.jobs[response["uuid"]] = self self.update_pipeline_component(response) logger.info("Job %s (%s) is %s", self.name, response["uuid"], response["state"]) if response["state"] in ("Complete", "Failed", "Cancelled"): self.done(response) except Exception as e: logger.error("Got error %s" % str(e)) self.output_callback({}, "permanentFail")
def run(self, dry_run=False, pull_image=True, **kwargs): script_parameters = {"command": self.command_line} runtime_constraints = {} with Perf(metrics, "generatefiles %s" % self.name): if self.generatefiles["listing"]: vwd = arvados.collection.Collection( api_client=self.arvrunner.api, keep_client=self.arvrunner.keep_client, num_retries=self.arvrunner.num_retries) script_parameters["task.vwd"] = {} generatemapper = VwdPathMapper([self.generatefiles], "", "", separateDirs=False) with Perf(metrics, "createfiles %s" % self.name): for f, p in generatemapper.items(): if p.type == "CreateFile": with vwd.open(p.target, "w") as n: n.write(p.resolved.encode("utf-8")) if vwd: with Perf(metrics, "generatefiles.save_new %s" % self.name): vwd.save_new() for f, p in generatemapper.items(): if p.type == "File": script_parameters["task.vwd"][p.target] = p.resolved if p.type == "CreateFile": script_parameters["task.vwd"][ p.target] = "$(task.keep)/%s/%s" % ( vwd.portable_data_hash(), p.target) script_parameters["task.env"] = { "TMPDIR": self.tmpdir, "HOME": self.outdir } if self.environment: script_parameters["task.env"].update(self.environment) if self.stdin: script_parameters["task.stdin"] = self.stdin if self.stdout: script_parameters["task.stdout"] = self.stdout if self.stderr: script_parameters["task.stderr"] = self.stderr if self.successCodes: script_parameters["task.successCodes"] = self.successCodes if self.temporaryFailCodes: script_parameters[ "task.temporaryFailCodes"] = self.temporaryFailCodes if self.permanentFailCodes: script_parameters[ "task.permanentFailCodes"] = self.permanentFailCodes with Perf(metrics, "arv_docker_get_image %s" % self.name): (docker_req, docker_is_req) = get_feature(self, "DockerRequirement") if docker_req and kwargs.get("use_container") is not False: if docker_req.get("dockerOutputDirectory"): raise SourceLine( docker_req, "dockerOutputDirectory", UnsupportedRequirement ).makeError( "Option 'dockerOutputDirectory' of DockerRequirement not supported." ) runtime_constraints["docker_image"] = arv_docker_get_image( self.arvrunner.api, docker_req, pull_image, self.arvrunner.project_uuid) else: runtime_constraints["docker_image"] = "arvados/jobs" resources = self.builder.resources if resources is not None: runtime_constraints["min_cores_per_node"] = resources.get( "cores", 1) runtime_constraints["min_ram_mb_per_node"] = resources.get("ram") runtime_constraints["min_scratch_mb_per_node"] = resources.get( "tmpdirSize", 0) + resources.get("outdirSize", 0) runtime_req, _ = get_feature( self, "http://arvados.org/cwl#RuntimeConstraints") if runtime_req: if "keep_cache" in runtime_req: runtime_constraints["keep_cache_mb_per_task"] = runtime_req[ "keep_cache"] runtime_constraints["min_ram_mb_per_node"] += runtime_req[ "keep_cache"] if "outputDirType" in runtime_req: if runtime_req["outputDirType"] == "local_output_dir": script_parameters["task.keepTmpOutput"] = False elif runtime_req["outputDirType"] == "keep_output_dir": script_parameters["task.keepTmpOutput"] = True filters = [["repository", "=", "arvados"], ["script", "=", "crunchrunner"], ["script_version", "in git", crunchrunner_git_commit]] if not self.arvrunner.ignore_docker_for_reuse: filters.append([ "docker_image_locator", "in docker", runtime_constraints["docker_image"] ]) enable_reuse = kwargs.get("enable_reuse", True) if enable_reuse: reuse_req, _ = get_feature( self, "http://arvados.org/cwl#ReuseRequirement") if reuse_req: enable_reuse = reuse_req["enableReuse"] try: with Perf(metrics, "create %s" % self.name): response = self.arvrunner.api.jobs().create( body={ "owner_uuid": self.arvrunner.project_uuid, "script": "crunchrunner", "repository": "arvados", "script_version": "master", "minimum_script_version": crunchrunner_git_commit, "script_parameters": { "tasks": [script_parameters] }, "runtime_constraints": runtime_constraints }, filters=filters, find_or_create=enable_reuse).execute( num_retries=self.arvrunner.num_retries) self.arvrunner.processes[response["uuid"]] = self self.update_pipeline_component(response) if response["state"] == "Complete": logger.info("%s reused job %s", self.arvrunner.label(self), response["uuid"]) # Give read permission to the desired project on reused jobs if response["owner_uuid"] != self.arvrunner.project_uuid: try: self.arvrunner.api.links().create( body={ 'link_class': 'permission', 'name': 'can_read', 'tail_uuid': self.arvrunner.project_uuid, 'head_uuid': response["uuid"], }).execute(num_retries=self.arvrunner.num_retries) except ApiError as e: # The user might not have "manage" access on the job: log # a message and continue. logger.info("Creating read permission on job %s: %s", response["uuid"], e) with Perf(metrics, "done %s" % self.name): self.done(response) else: logger.info("%s %s is %s", self.arvrunner.label(self), response["uuid"], response["state"]) except Exception as e: logger.exception("%s error" % (self.arvrunner.label(self))) self.output_callback({}, "permanentFail")
def run(self, dry_run=False, pull_image=True, **kwargs): container_request = { "command": self.command_line, "owner_uuid": self.arvrunner.project_uuid, "name": self.name, "output_path": self.outdir, "cwd": self.outdir, "priority": 1, "state": "Committed", "properties": {} } runtime_constraints = {} resources = self.builder.resources if resources is not None: runtime_constraints["vcpus"] = resources.get("cores", 1) runtime_constraints["ram"] = resources.get("ram") * 2**20 mounts = { self.outdir: { "kind": "tmp", "capacity": resources.get("outdirSize", 0) * 2**20 }, self.tmpdir: { "kind": "tmp", "capacity": resources.get("tmpdirSize", 0) * 2**20 } } scheduling_parameters = {} rf = [self.pathmapper.mapper(f) for f in self.pathmapper.referenced_files] rf.sort(key=lambda k: k.resolved) prevdir = None for resolved, target, tp, stg in rf: if not stg: continue if prevdir and target.startswith(prevdir): continue if tp == "Directory": targetdir = target else: targetdir = os.path.dirname(target) sp = resolved.split("/", 1) pdh = sp[0][5:] # remove "keep:" mounts[targetdir] = { "kind": "collection", "portable_data_hash": pdh } if len(sp) == 2: if tp == "Directory": path = sp[1] else: path = os.path.dirname(sp[1]) if path and path != "/": mounts[targetdir]["path"] = path prevdir = targetdir + "/" with Perf(metrics, "generatefiles %s" % self.name): if self.generatefiles["listing"]: vwd = arvados.collection.Collection(api_client=self.arvrunner.api, keep_client=self.arvrunner.keep_client, num_retries=self.arvrunner.num_retries) generatemapper = NoFollowPathMapper([self.generatefiles], "", "", separateDirs=False) with Perf(metrics, "createfiles %s" % self.name): for f, p in generatemapper.items(): if not p.target: pass elif p.type in ("File", "Directory"): source, path = self.arvrunner.fs_access.get_collection(p.resolved) vwd.copy(path, p.target, source_collection=source) elif p.type == "CreateFile": with vwd.open(p.target, "w") as n: n.write(p.resolved.encode("utf-8")) with Perf(metrics, "generatefiles.save_new %s" % self.name): vwd.save_new() for f, p in generatemapper.items(): if not p.target: continue mountpoint = "%s/%s" % (self.outdir, p.target) mounts[mountpoint] = {"kind": "collection", "portable_data_hash": vwd.portable_data_hash(), "path": p.target} container_request["environment"] = {"TMPDIR": self.tmpdir, "HOME": self.outdir} if self.environment: container_request["environment"].update(self.environment) if self.stdin: sp = self.stdin[6:].split("/", 1) mounts["stdin"] = {"kind": "collection", "portable_data_hash": sp[0], "path": sp[1]} if self.stderr: mounts["stderr"] = {"kind": "file", "path": "%s/%s" % (self.outdir, self.stderr)} if self.stdout: mounts["stdout"] = {"kind": "file", "path": "%s/%s" % (self.outdir, self.stdout)} (docker_req, docker_is_req) = get_feature(self, "DockerRequirement") if not docker_req: docker_req = {"dockerImageId": "arvados/jobs"} container_request["container_image"] = arv_docker_get_image(self.arvrunner.api, docker_req, pull_image, self.arvrunner.project_uuid) api_req, _ = get_feature(self, "http://arvados.org/cwl#APIRequirement") if api_req: runtime_constraints["API"] = True runtime_req, _ = get_feature(self, "http://arvados.org/cwl#RuntimeConstraints") if runtime_req: if "keep_cache" in runtime_req: runtime_constraints["keep_cache_ram"] = runtime_req["keep_cache"] * 2**20 if "outputDirType" in runtime_req: if runtime_req["outputDirType"] == "local_output_dir": # Currently the default behavior. pass elif runtime_req["outputDirType"] == "keep_output_dir": mounts[self.outdir]= { "kind": "collection", "writable": True } partition_req, _ = get_feature(self, "http://arvados.org/cwl#PartitionRequirement") if partition_req: scheduling_parameters["partitions"] = aslist(partition_req["partition"]) container_request["mounts"] = mounts container_request["runtime_constraints"] = runtime_constraints container_request["use_existing"] = kwargs.get("enable_reuse", True) container_request["scheduling_parameters"] = scheduling_parameters if kwargs.get("runnerjob", "").startswith("arvwf:"): wfuuid = kwargs["runnerjob"][6:kwargs["runnerjob"].index("#")] wfrecord = self.arvrunner.api.workflows().get(uuid=wfuuid).execute(num_retries=self.arvrunner.num_retries) if container_request["name"] == "main": container_request["name"] = wfrecord["name"] container_request["properties"]["template_uuid"] = wfuuid try: response = self.arvrunner.api.container_requests().create( body=container_request ).execute(num_retries=self.arvrunner.num_retries) self.uuid = response["uuid"] self.arvrunner.processes[self.uuid] = self if response["state"] == "Final": logger.info("%s reused container %s", self.arvrunner.label(self), response["container_uuid"]) self.done(response) else: logger.info("%s %s state is %s", self.arvrunner.label(self), response["uuid"], response["state"]) except Exception as e: logger.error("%s got error %s" % (self.arvrunner.label(self), str(e))) self.output_callback({}, "permanentFail")
def run(self, dry_run=False, pull_image=True, **kwargs): container_request = { "command": self.command_line, "owner_uuid": self.arvrunner.project_uuid, "name": self.name, "output_path": self.outdir, "cwd": self.outdir, "priority": 1, "state": "Committed" } runtime_constraints = {} mounts = { self.outdir: { "kind": "tmp" } } dirs = set() for f in self.pathmapper.files(): _, p, tp = self.pathmapper.mapper(f) if tp == "Directory" and '/' not in p[6:]: mounts[p] = { "kind": "collection", "portable_data_hash": p[6:] } dirs.add(p[6:]) for f in self.pathmapper.files(): _, p, tp = self.pathmapper.mapper(f) if p[6:].split("/")[0] not in dirs: mounts[p] = { "kind": "collection", "portable_data_hash": p[6:] } if self.generatefiles["listing"]: raise UnsupportedRequirement("Generate files not supported") container_request["environment"] = {"TMPDIR": self.tmpdir, "HOME": self.outdir} if self.environment: container_request["environment"].update(self.environment) if self.stdin: raise UnsupportedRequirement("Stdin redirection currently not suppported") if self.stderr: raise UnsupportedRequirement("Stderr redirection currently not suppported") if self.stdout: mounts["stdout"] = {"kind": "file", "path": "%s/%s" % (self.outdir, self.stdout)} (docker_req, docker_is_req) = get_feature(self, "DockerRequirement") if not docker_req: docker_req = {"dockerImageId": "arvados/jobs"} container_request["container_image"] = arv_docker_get_image(self.arvrunner.api, docker_req, pull_image, self.arvrunner.project_uuid) resources = self.builder.resources if resources is not None: runtime_constraints["vcpus"] = resources.get("cores", 1) runtime_constraints["ram"] = resources.get("ram") * 2**20 container_request["mounts"] = mounts container_request["runtime_constraints"] = runtime_constraints try: response = self.arvrunner.api.container_requests().create( body=container_request ).execute(num_retries=self.arvrunner.num_retries) self.arvrunner.processes[response["container_uuid"]] = self logger.info("Container %s (%s) request state is %s", self.name, response["uuid"], response["state"]) if response["state"] == "Final": self.done(response) except Exception as e: logger.error("Got error %s" % str(e)) self.output_callback({}, "permanentFail")
def run(self, dry_run=False, pull_image=True, **kwargs): script_parameters = { "command": self.command_line } runtime_constraints = {} with Perf(metrics, "generatefiles %s" % self.name): if self.generatefiles["listing"]: vwd = arvados.collection.Collection(api_client=self.arvrunner.api, keep_client=self.arvrunner.keep_client, num_retries=self.arvrunner.num_retries) script_parameters["task.vwd"] = {} generatemapper = VwdPathMapper([self.generatefiles], "", "", separateDirs=False) with Perf(metrics, "createfiles %s" % self.name): for f, p in generatemapper.items(): if p.type == "CreateFile": with vwd.open(p.target, "w") as n: n.write(p.resolved.encode("utf-8")) if vwd: with Perf(metrics, "generatefiles.save_new %s" % self.name): vwd.save_new() for f, p in generatemapper.items(): if p.type == "File": script_parameters["task.vwd"][p.target] = p.resolved if p.type == "CreateFile": script_parameters["task.vwd"][p.target] = "$(task.keep)/%s/%s" % (vwd.portable_data_hash(), p.target) script_parameters["task.env"] = {"TMPDIR": self.tmpdir, "HOME": self.outdir} if self.environment: script_parameters["task.env"].update(self.environment) if self.stdin: script_parameters["task.stdin"] = self.stdin if self.stdout: script_parameters["task.stdout"] = self.stdout if self.stderr: script_parameters["task.stderr"] = self.stderr if self.successCodes: script_parameters["task.successCodes"] = self.successCodes if self.temporaryFailCodes: script_parameters["task.temporaryFailCodes"] = self.temporaryFailCodes if self.permanentFailCodes: script_parameters["task.permanentFailCodes"] = self.permanentFailCodes with Perf(metrics, "arv_docker_get_image %s" % self.name): (docker_req, docker_is_req) = get_feature(self, "DockerRequirement") if docker_req and kwargs.get("use_container") is not False: if docker_req.get("dockerOutputDirectory"): raise SourceLine(docker_req, "dockerOutputDirectory", UnsupportedRequirement).makeError( "Option 'dockerOutputDirectory' of DockerRequirement not supported.") runtime_constraints["docker_image"] = arv_docker_get_image(self.arvrunner.api, docker_req, pull_image, self.arvrunner.project_uuid) else: runtime_constraints["docker_image"] = "arvados/jobs" resources = self.builder.resources if resources is not None: runtime_constraints["min_cores_per_node"] = resources.get("cores", 1) runtime_constraints["min_ram_mb_per_node"] = resources.get("ram") runtime_constraints["min_scratch_mb_per_node"] = resources.get("tmpdirSize", 0) + resources.get("outdirSize", 0) runtime_req, _ = get_feature(self, "http://arvados.org/cwl#RuntimeConstraints") if runtime_req: if "keep_cache" in runtime_req: runtime_constraints["keep_cache_mb_per_task"] = runtime_req["keep_cache"] runtime_constraints["min_ram_mb_per_node"] += runtime_req["keep_cache"] if "outputDirType" in runtime_req: if runtime_req["outputDirType"] == "local_output_dir": script_parameters["task.keepTmpOutput"] = False elif runtime_req["outputDirType"] == "keep_output_dir": script_parameters["task.keepTmpOutput"] = True filters = [["repository", "=", "arvados"], ["script", "=", "crunchrunner"], ["script_version", "in git", crunchrunner_git_commit]] if not self.arvrunner.ignore_docker_for_reuse: filters.append(["docker_image_locator", "in docker", runtime_constraints["docker_image"]]) try: with Perf(metrics, "create %s" % self.name): response = self.arvrunner.api.jobs().create( body={ "owner_uuid": self.arvrunner.project_uuid, "script": "crunchrunner", "repository": "arvados", "script_version": "master", "minimum_script_version": crunchrunner_git_commit, "script_parameters": {"tasks": [script_parameters]}, "runtime_constraints": runtime_constraints }, filters=filters, find_or_create=kwargs.get("enable_reuse", True) ).execute(num_retries=self.arvrunner.num_retries) self.arvrunner.processes[response["uuid"]] = self self.update_pipeline_component(response) if response["state"] == "Complete": logger.info("%s reused job %s", self.arvrunner.label(self), response["uuid"]) with Perf(metrics, "done %s" % self.name): self.done(response) else: logger.info("%s %s is %s", self.arvrunner.label(self), response["uuid"], response["state"]) except Exception as e: logger.exception("%s error" % (self.arvrunner.label(self))) self.output_callback({}, "permanentFail")
def run(self, dry_run=False, pull_image=True, **kwargs): # ArvadosCommandTool subclasses from cwltool.CommandLineTool, # which calls makeJobRunner() to get a new ArvadosContainer # object. The fields that define execution such as # command_line, environment, etc are set on the # ArvadosContainer object by CommandLineTool.job() before # run() is called. container_request = { "command": self.command_line, "owner_uuid": self.arvrunner.project_uuid, "name": self.name, "output_path": self.outdir, "cwd": self.outdir, "priority": kwargs.get("priority"), "state": "Committed", "properties": {}, } runtime_constraints = {} if self.arvrunner.secret_store.has_secret(self.command_line): raise WorkflowException("Secret material leaked on command line, only file literals may contain secrets") if self.arvrunner.secret_store.has_secret(self.environment): raise WorkflowException("Secret material leaked in environment, only file literals may contain secrets") resources = self.builder.resources if resources is not None: runtime_constraints["vcpus"] = resources.get("cores", 1) runtime_constraints["ram"] = resources.get("ram") * 2**20 mounts = { self.outdir: { "kind": "tmp", "capacity": resources.get("outdirSize", 0) * 2**20 }, self.tmpdir: { "kind": "tmp", "capacity": resources.get("tmpdirSize", 0) * 2**20 } } secret_mounts = {} scheduling_parameters = {} rf = [self.pathmapper.mapper(f) for f in self.pathmapper.referenced_files] rf.sort(key=lambda k: k.resolved) prevdir = None for resolved, target, tp, stg in rf: if not stg: continue if prevdir and target.startswith(prevdir): continue if tp == "Directory": targetdir = target else: targetdir = os.path.dirname(target) sp = resolved.split("/", 1) pdh = sp[0][5:] # remove "keep:" mounts[targetdir] = { "kind": "collection", "portable_data_hash": pdh } if len(sp) == 2: if tp == "Directory": path = sp[1] else: path = os.path.dirname(sp[1]) if path and path != "/": mounts[targetdir]["path"] = path prevdir = targetdir + "/" with Perf(metrics, "generatefiles %s" % self.name): if self.generatefiles["listing"]: vwd = arvados.collection.Collection(api_client=self.arvrunner.api, keep_client=self.arvrunner.keep_client, num_retries=self.arvrunner.num_retries) generatemapper = NoFollowPathMapper([self.generatefiles], "", "", separateDirs=False) sorteditems = sorted(generatemapper.items(), None, key=lambda n: n[1].target) logger.debug("generatemapper is %s", sorteditems) with Perf(metrics, "createfiles %s" % self.name): for f, p in sorteditems: if not p.target: pass elif p.type in ("File", "Directory", "WritableFile", "WritableDirectory"): if p.resolved.startswith("_:"): vwd.mkdirs(p.target) else: source, path = self.arvrunner.fs_access.get_collection(p.resolved) vwd.copy(path, p.target, source_collection=source) elif p.type == "CreateFile": if self.arvrunner.secret_store.has_secret(p.resolved): secret_mounts["%s/%s" % (self.outdir, p.target)] = { "kind": "text", "content": self.arvrunner.secret_store.retrieve(p.resolved) } else: with vwd.open(p.target, "w") as n: n.write(p.resolved.encode("utf-8")) def keepemptydirs(p): if isinstance(p, arvados.collection.RichCollectionBase): if len(p) == 0: p.open(".keep", "w").close() else: for c in p: keepemptydirs(p[c]) keepemptydirs(vwd) with Perf(metrics, "generatefiles.save_new %s" % self.name): vwd.save_new() prev = None for f, p in sorteditems: if (not p.target or self.arvrunner.secret_store.has_secret(p.resolved) or (prev is not None and p.target.startswith(prev))): continue mountpoint = "%s/%s" % (self.outdir, p.target) mounts[mountpoint] = {"kind": "collection", "portable_data_hash": vwd.portable_data_hash(), "path": p.target} if p.type.startswith("Writable"): mounts[mountpoint]["writable"] = True prev = p.target + "/" container_request["environment"] = {"TMPDIR": self.tmpdir, "HOME": self.outdir} if self.environment: container_request["environment"].update(self.environment) if self.stdin: sp = self.stdin[6:].split("/", 1) mounts["stdin"] = {"kind": "collection", "portable_data_hash": sp[0], "path": sp[1]} if self.stderr: mounts["stderr"] = {"kind": "file", "path": "%s/%s" % (self.outdir, self.stderr)} if self.stdout: mounts["stdout"] = {"kind": "file", "path": "%s/%s" % (self.outdir, self.stdout)} (docker_req, docker_is_req) = get_feature(self, "DockerRequirement") if not docker_req: docker_req = {"dockerImageId": "arvados/jobs"} container_request["container_image"] = arv_docker_get_image(self.arvrunner.api, docker_req, pull_image, self.arvrunner.project_uuid) api_req, _ = get_feature(self, "http://arvados.org/cwl#APIRequirement") if api_req: runtime_constraints["API"] = True runtime_req, _ = get_feature(self, "http://arvados.org/cwl#RuntimeConstraints") if runtime_req: if "keep_cache" in runtime_req: runtime_constraints["keep_cache_ram"] = runtime_req["keep_cache"] * 2**20 if "outputDirType" in runtime_req: if runtime_req["outputDirType"] == "local_output_dir": # Currently the default behavior. pass elif runtime_req["outputDirType"] == "keep_output_dir": mounts[self.outdir]= { "kind": "collection", "writable": True } partition_req, _ = get_feature(self, "http://arvados.org/cwl#PartitionRequirement") if partition_req: scheduling_parameters["partitions"] = aslist(partition_req["partition"]) intermediate_output_req, _ = get_feature(self, "http://arvados.org/cwl#IntermediateOutput") if intermediate_output_req: self.output_ttl = intermediate_output_req["outputTTL"] else: self.output_ttl = self.arvrunner.intermediate_output_ttl if self.output_ttl < 0: raise WorkflowException("Invalid value %d for output_ttl, cannot be less than zero" % container_request["output_ttl"]) container_request["output_ttl"] = self.output_ttl container_request["mounts"] = mounts container_request["secret_mounts"] = secret_mounts container_request["runtime_constraints"] = runtime_constraints container_request["scheduling_parameters"] = scheduling_parameters enable_reuse = kwargs.get("enable_reuse", True) if enable_reuse: reuse_req, _ = get_feature(self, "http://arvados.org/cwl#ReuseRequirement") if reuse_req: enable_reuse = reuse_req["enableReuse"] container_request["use_existing"] = enable_reuse if kwargs.get("runnerjob", "").startswith("arvwf:"): wfuuid = kwargs["runnerjob"][6:kwargs["runnerjob"].index("#")] wfrecord = self.arvrunner.api.workflows().get(uuid=wfuuid).execute(num_retries=self.arvrunner.num_retries) if container_request["name"] == "main": container_request["name"] = wfrecord["name"] container_request["properties"]["template_uuid"] = wfuuid self.output_callback = self.arvrunner.get_wrapped_callback(self.output_callback) try: response = self.arvrunner.api.container_requests().create( body=container_request ).execute(num_retries=self.arvrunner.num_retries) self.uuid = response["uuid"] self.arvrunner.process_submitted(self) if response["state"] == "Final": logger.info("%s reused container %s", self.arvrunner.label(self), response["container_uuid"]) self.done(response) else: logger.info("%s %s state is %s", self.arvrunner.label(self), response["uuid"], response["state"]) except Exception as e: logger.error("%s got error %s" % (self.arvrunner.label(self), str(e))) self.output_callback({}, "permanentFail")
def run(self, dry_run=False, pull_image=True, **kwargs): script_parameters = {"command": self.command_line} runtime_constraints = {} with Perf(metrics, "generatefiles %s" % self.name): if self.generatefiles["listing"]: vwd = arvados.collection.Collection( api_client=self.arvrunner.api, keep_client=self.arvrunner.keep_client, num_retries=self.arvrunner.num_retries) script_parameters["task.vwd"] = {} generatemapper = InitialWorkDirPathMapper([self.generatefiles], "", "", separateDirs=False) with Perf(metrics, "createfiles %s" % self.name): for f, p in generatemapper.items(): if p.type == "CreateFile": with vwd.open(p.target, "w") as n: n.write(p.resolved.encode("utf-8")) with Perf(metrics, "generatefiles.save_new %s" % self.name): vwd.save_new() for f, p in generatemapper.items(): if p.type == "File": script_parameters["task.vwd"][p.target] = p.resolved if p.type == "CreateFile": script_parameters["task.vwd"][ p.target] = "$(task.keep)/%s/%s" % ( vwd.portable_data_hash(), p.target) script_parameters["task.env"] = { "TMPDIR": self.tmpdir, "HOME": self.outdir } if self.environment: script_parameters["task.env"].update(self.environment) if self.stdin: script_parameters["task.stdin"] = self.stdin if self.stdout: script_parameters["task.stdout"] = self.stdout if self.stderr: script_parameters["task.stderr"] = self.stderr if self.successCodes: script_parameters["task.successCodes"] = self.successCodes if self.temporaryFailCodes: script_parameters[ "task.temporaryFailCodes"] = self.temporaryFailCodes if self.permanentFailCodes: script_parameters[ "task.permanentFailCodes"] = self.permanentFailCodes with Perf(metrics, "arv_docker_get_image %s" % self.name): (docker_req, docker_is_req) = get_feature(self, "DockerRequirement") if docker_req and kwargs.get("use_container") is not False: if docker_req.get("dockerOutputDirectory"): raise UnsupportedRequirement( "Option 'dockerOutputDirectory' of DockerRequirement not supported." ) runtime_constraints["docker_image"] = arv_docker_get_image( self.arvrunner.api, docker_req, pull_image, self.arvrunner.project_uuid) else: runtime_constraints["docker_image"] = arvados_jobs_image( self.arvrunner) resources = self.builder.resources if resources is not None: runtime_constraints["min_cores_per_node"] = resources.get( "cores", 1) runtime_constraints["min_ram_mb_per_node"] = resources.get("ram") runtime_constraints["min_scratch_mb_per_node"] = resources.get( "tmpdirSize", 0) + resources.get("outdirSize", 0) runtime_req, _ = get_feature( self, "http://arvados.org/cwl#RuntimeConstraints") if runtime_req: if "keep_cache" in runtime_req: runtime_constraints["keep_cache_mb_per_task"] = runtime_req[ "keep_cache"] if "outputDirType" in runtime_req: if runtime_req["outputDirType"] == "local_output_dir": script_parameters["task.keepTmpOutput"] = False elif runtime_req["outputDirType"] == "keep_output_dir": script_parameters["task.keepTmpOutput"] = True filters = [["repository", "=", "arvados"], ["script", "=", "crunchrunner"], [ "script_version", "in git", "9e5b98e8f5f4727856b53447191f9c06e3da2ba6" ]] if not self.arvrunner.ignore_docker_for_reuse: filters.append([ "docker_image_locator", "in docker", runtime_constraints["docker_image"] ]) try: with Perf(metrics, "create %s" % self.name): response = self.arvrunner.api.jobs().create( body={ "owner_uuid": self.arvrunner.project_uuid, "script": "crunchrunner", "repository": "arvados", "script_version": "master", "minimum_script_version": "9e5b98e8f5f4727856b53447191f9c06e3da2ba6", "script_parameters": { "tasks": [script_parameters] }, "runtime_constraints": runtime_constraints }, filters=filters, find_or_create=kwargs.get( "enable_reuse", True)).execute(num_retries=self.arvrunner.num_retries) self.arvrunner.processes[response["uuid"]] = self self.update_pipeline_component(response) logger.info("Job %s (%s) is %s", self.name, response["uuid"], response["state"]) if response["state"] in ("Complete", "Failed", "Cancelled"): with Perf(metrics, "done %s" % self.name): self.done(response) except Exception as e: logger.exception("Job %s error" % (self.name)) self.output_callback({}, "permanentFail")
def run(self, dry_run=False, pull_image=True, **kwargs): container_request = { "command": self.command_line, "owner_uuid": self.arvrunner.project_uuid, "name": self.name, "output_path": self.outdir, "cwd": self.outdir, "priority": 1, "state": "Committed", "properties": {} } runtime_constraints = {} mounts = {self.outdir: {"kind": "tmp"}} scheduling_parameters = {} dirs = set() for f in self.pathmapper.files(): pdh, p, tp = self.pathmapper.mapper(f) if tp == "Directory" and '/' not in pdh: mounts[p] = { "kind": "collection", "portable_data_hash": pdh[5:] } dirs.add(pdh) for f in self.pathmapper.files(): res, p, tp = self.pathmapper.mapper(f) if res.startswith("keep:"): res = res[5:] elif res.startswith("/keep/"): res = res[6:] else: continue sp = res.split("/", 1) pdh = sp[0] if pdh not in dirs: mounts[p] = {"kind": "collection", "portable_data_hash": pdh} if len(sp) == 2: mounts[p]["path"] = sp[1] with Perf(metrics, "generatefiles %s" % self.name): if self.generatefiles["listing"]: vwd = arvados.collection.Collection( api_client=self.arvrunner.api, keep_client=self.arvrunner.keep_client, num_retries=self.arvrunner.num_retries) generatemapper = NoFollowPathMapper([self.generatefiles], "", "", separateDirs=False) with Perf(metrics, "createfiles %s" % self.name): for f, p in generatemapper.items(): if not p.target: pass elif p.type in ("File", "Directory"): source, path = self.arvrunner.fs_access.get_collection( p.resolved) vwd.copy(path, p.target, source_collection=source) elif p.type == "CreateFile": with vwd.open(p.target, "w") as n: n.write(p.resolved.encode("utf-8")) with Perf(metrics, "generatefiles.save_new %s" % self.name): vwd.save_new() for f, p in generatemapper.items(): if not p.target: continue mountpoint = "%s/%s" % (self.outdir, p.target) mounts[mountpoint] = { "kind": "collection", "portable_data_hash": vwd.portable_data_hash(), "path": p.target } container_request["environment"] = { "TMPDIR": self.tmpdir, "HOME": self.outdir } if self.environment: container_request["environment"].update(self.environment) if self.stdin: raise UnsupportedRequirement( "Stdin redirection currently not suppported") if self.stderr: raise UnsupportedRequirement( "Stderr redirection currently not suppported") if self.stdout: mounts["stdout"] = { "kind": "file", "path": "%s/%s" % (self.outdir, self.stdout) } (docker_req, docker_is_req) = get_feature(self, "DockerRequirement") if not docker_req: docker_req = {"dockerImageId": "arvados/jobs"} container_request["container_image"] = arv_docker_get_image( self.arvrunner.api, docker_req, pull_image, self.arvrunner.project_uuid) resources = self.builder.resources if resources is not None: runtime_constraints["vcpus"] = resources.get("cores", 1) runtime_constraints["ram"] = resources.get("ram") * 2**20 api_req, _ = get_feature(self, "http://arvados.org/cwl#APIRequirement") if api_req: runtime_constraints["API"] = True runtime_req, _ = get_feature( self, "http://arvados.org/cwl#RuntimeConstraints") if runtime_req: if "keep_cache" in runtime_req: runtime_constraints[ "keep_cache_ram"] = runtime_req["keep_cache"] * 2**20 partition_req, _ = get_feature( self, "http://arvados.org/cwl#PartitionRequirement") if partition_req: scheduling_parameters["partitions"] = aslist( partition_req["partition"]) container_request["mounts"] = mounts container_request["runtime_constraints"] = runtime_constraints container_request["use_existing"] = kwargs.get("enable_reuse", True) container_request["scheduling_parameters"] = scheduling_parameters if kwargs.get("runnerjob", "").startswith("arvwf:"): wfuuid = kwargs["runnerjob"][6:kwargs["runnerjob"].index("#")] wfrecord = self.arvrunner.api.workflows().get(uuid=wfuuid).execute( num_retries=self.arvrunner.num_retries) if container_request["name"] == "main": container_request["name"] = wfrecord["name"] container_request["properties"]["template_uuid"] = wfuuid try: response = self.arvrunner.api.container_requests().create( body=container_request).execute( num_retries=self.arvrunner.num_retries) self.uuid = response["uuid"] self.arvrunner.processes[self.uuid] = self if response["state"] == "Final": logger.info("%s reused container %s", self.arvrunner.label(self), response["container_uuid"]) self.done(response) else: logger.info("%s %s state is %s", self.arvrunner.label(self), response["uuid"], response["state"]) except Exception as e: logger.error("%s got error %s" % (self.arvrunner.label(self), str(e))) self.output_callback({}, "permanentFail")