Esempio n. 1
0
    def run(self, dry_run=False, pull_image=True, **kwargs):
        script_parameters = {
            "command": self.command_line
        }
        runtime_constraints = {}

        if self.generatefiles:
            vwd = arvados.collection.Collection()
            script_parameters["task.vwd"] = {}
            for t in self.generatefiles:
                if isinstance(self.generatefiles[t], dict):
                    src, rest = self.arvrunner.fs_access.get_collection(self.generatefiles[t]["path"].replace("$(task.keep)/", "keep:"))
                    vwd.copy(rest, t, source_collection=src)
                else:
                    with vwd.open(t, "w") as f:
                        f.write(self.generatefiles[t])
            vwd.save_new()
            for t in self.generatefiles:
                script_parameters["task.vwd"][t] = "$(task.keep)/%s/%s" % (vwd.portable_data_hash(), t)

        script_parameters["task.env"] = {"TMPDIR": "$(task.tmpdir)"}
        if self.environment:
            script_parameters["task.env"].update(self.environment)

        if self.stdin:
            script_parameters["task.stdin"] = self.pathmapper.mapper(self.stdin)[1]

        if self.stdout:
            script_parameters["task.stdout"] = self.stdout

        (docker_req, docker_is_req) = get_feature(self, "DockerRequirement")
        if docker_req and kwargs.get("use_container") is not False:
            runtime_constraints["docker_image"] = arv_docker_get_image(self.arvrunner.api, docker_req, pull_image)

        try:
            response = self.arvrunner.api.jobs().create(body={
                "script": "crunchrunner",
                "repository": "arvados",
                "script_version": "master",
                "minimum_script_version": "9e5b98e8f5f4727856b53447191f9c06e3da2ba6",
                "script_parameters": {"tasks": [script_parameters], "crunchrunner": crunchrunner_pdh+"/crunchrunner"},
                "runtime_constraints": runtime_constraints
            }, find_or_create=kwargs.get("enable_reuse", True)).execute(num_retries=self.arvrunner.num_retries)

            self.arvrunner.jobs[response["uuid"]] = self

            self.arvrunner.pipeline["components"][self.name] = {"job": response}
            self.arvrunner.pipeline = self.arvrunner.api.pipeline_instances().update(uuid=self.arvrunner.pipeline["uuid"],
                                                                                     body={
                                                                                         "components": self.arvrunner.pipeline["components"]
                                                                                     }).execute(num_retries=self.arvrunner.num_retries)

            logger.info("Job %s (%s) is %s", self.name, response["uuid"], response["state"])

            if response["state"] in ("Complete", "Failed", "Cancelled"):
                self.done(response)
        except Exception as e:
            logger.error("Got error %s" % str(e))
            self.output_callback({}, "permanentFail")
Esempio n. 2
0
 def upload_docker(self, tool):
     if isinstance(tool, CommandLineTool):
         (docker_req, docker_is_req) = get_feature(tool, "DockerRequirement")
         if docker_req:
             arv_docker_get_image(self.arvrunner.api, docker_req, True, self.arvrunner.project_uuid)
     elif isinstance(tool, cwltool.workflow.Workflow):
         for s in tool.steps:
             self.upload_docker(s.embedded_tool)
Esempio n. 3
0
 def upload_docker(self, tool):
     if isinstance(tool, CommandLineTool):
         (docker_req, docker_is_req) = get_feature(tool, "DockerRequirement")
         if docker_req:
             arv_docker_get_image(self.arvrunner.api, docker_req, True, self.arvrunner.project_uuid)
     elif isinstance(tool, cwltool.workflow.Workflow):
         for s in tool.steps:
             self.upload_docker(s.embedded_tool)
Esempio n. 4
0
    def run(self, dry_run=False, pull_image=True, **kwargs):
        script_parameters = {"command": self.command_line}
        runtime_constraints = {}

        if self.generatefiles:
            vwd = arvados.collection.Collection()
            for t in self.generatefiles:
                if isinstance(self.generatefiles[t], dict):
                    src, rest = self.arvrunner.fs_access.get_collection(
                        self.generatefiles[t]["path"][6:])
                    vwd.copy(rest, t, source_collection=src)
                else:
                    with vwd.open(t, "w") as f:
                        f.write(self.generatefiles[t])
            vwd.save_new()
            script_parameters["task.vwd"] = vwd.portable_data_hash()

        script_parameters["task.env"] = {"TMPDIR": "$(task.tmpdir)"}
        if self.environment:
            script_parameters["task.env"].update(self.environment)

        if self.stdin:
            script_parameters["task.stdin"] = self.pathmapper.mapper(
                self.stdin)[1]

        if self.stdout:
            script_parameters["task.stdout"] = self.stdout

        (docker_req, docker_is_req) = get_feature(self, "DockerRequirement")
        if docker_req and kwargs.get("use_container") is not False:
            runtime_constraints["docker_image"] = arv_docker_get_image(
                self.arvrunner.api, docker_req, pull_image)
            runtime_constraints["arvados_sdk_version"] = "master"

        response = self.arvrunner.api.jobs().create(body={
            "script":
            "run-command",
            "repository":
            "arvados",
            "script_version":
            "master",
            "script_parameters":
            script_parameters,
            "runtime_constraints":
            runtime_constraints
        },
                                                    find_or_create=kwargs.get(
                                                        "enable_reuse",
                                                        True)).execute()

        self.arvrunner.jobs[response["uuid"]] = self

        logger.info("Job %s is %s", response["uuid"], response["state"])

        if response["state"] in ("Complete", "Failed", "Cancelled"):
            self.done(response)
Esempio n. 5
0
def upload_docker(arvrunner, tool):
    if isinstance(tool, CommandLineTool):
        (docker_req, docker_is_req) = get_feature(tool, "DockerRequirement")
        if docker_req:
            if docker_req.get("dockerOutputDirectory"):
                # TODO: can be supported by containers API, but not jobs API.
                raise UnsupportedRequirement(
                    "Option 'dockerOutputDirectory' of DockerRequirement not supported."
                )
            arv_docker_get_image(arvrunner.api, docker_req, True,
                                 arvrunner.project_uuid)
    elif isinstance(tool, cwltool.workflow.Workflow):
        for s in tool.steps:
            upload_docker(arvrunner, s.embedded_tool)
Esempio n. 6
0
def upload_docker(arvrunner, tool):
    """Uploads Docker images used in CommandLineTool objects."""

    if isinstance(tool, CommandLineTool):
        (docker_req, docker_is_req) = get_feature(tool, "DockerRequirement")
        if docker_req:
            if docker_req.get("dockerOutputDirectory") and arvrunner.work_api != "containers":
                # TODO: can be supported by containers API, but not jobs API.
                raise SourceLine(docker_req, "dockerOutputDirectory", UnsupportedRequirement).makeError(
                    "Option 'dockerOutputDirectory' of DockerRequirement not supported.")
            arv_docker_get_image(arvrunner.api, docker_req, True, arvrunner.project_uuid)
        else:
            arv_docker_get_image(arvrunner.api, {"dockerPull": "arvados/jobs"}, True, arvrunner.project_uuid)
    elif isinstance(tool, cwltool.workflow.Workflow):
        for s in tool.steps:
            upload_docker(arvrunner, s.embedded_tool)
Esempio n. 7
0
    def run(self, dry_run=False, pull_image=True, **kwargs):
        script_parameters = {
            "command": self.command_line
        }
        runtime_constraints = {}

        if self.generatefiles:
            vwd = arvados.collection.Collection()
            for t in self.generatefiles:
                if isinstance(self.generatefiles[t], dict):
                    src, rest = self.arvrunner.fs_access.get_collection(self.generatefiles[t]["path"][6:])
                    vwd.copy(rest, t, source_collection=src)
                else:
                    with vwd.open(t, "w") as f:
                        f.write(self.generatefiles[t])
            vwd.save_new()
            script_parameters["task.vwd"] = vwd.portable_data_hash()

        script_parameters["task.env"] = {"TMPDIR": "$(task.tmpdir)"}
        if self.environment:
            script_parameters["task.env"].update(self.environment)

        if self.stdin:
            script_parameters["task.stdin"] = self.pathmapper.mapper(self.stdin)[1]

        if self.stdout:
            script_parameters["task.stdout"] = self.stdout

        (docker_req, docker_is_req) = get_feature(self, "DockerRequirement")
        if docker_req and kwargs.get("use_container") is not False:
            runtime_constraints["docker_image"] = arv_docker_get_image(self.arvrunner.api, docker_req, pull_image)
            runtime_constraints["arvados_sdk_version"] = "master"

        response = self.arvrunner.api.jobs().create(body={
            "script": "run-command",
            "repository": "arvados",
            "script_version": "master",
            "script_parameters": script_parameters,
            "runtime_constraints": runtime_constraints
        }, find_or_create=kwargs.get("enable_reuse", True)).execute()

        self.arvrunner.jobs[response["uuid"]] = self

        logger.info("Job %s is %s", response["uuid"], response["state"])

        if response["state"] in ("Complete", "Failed", "Cancelled"):
            self.done(response)
Esempio n. 8
0
    def __init__(self,
                 runner,
                 tool,
                 job_order,
                 enable_reuse,
                 output_name,
                 output_tags,
                 submit_runner_ram=0,
                 name=None,
                 on_error=None,
                 submit_runner_image=None,
                 intermediate_output_ttl=0,
                 merged_map=None):
        self.arvrunner = runner
        self.tool = tool
        self.job_order = job_order
        self.running = False
        if enable_reuse:
            # If reuse is permitted by command line arguments but
            # disabled by the workflow itself, disable it.
            reuse_req, _ = get_feature(
                self.tool, "http://arvados.org/cwl#ReuseRequirement")
            if reuse_req:
                enable_reuse = reuse_req["enableReuse"]
        self.enable_reuse = enable_reuse
        self.uuid = None
        self.final_output = None
        self.output_name = output_name
        self.output_tags = output_tags
        self.name = name
        self.on_error = on_error
        self.jobs_image = submit_runner_image or "arvados/jobs:" + __version__
        self.intermediate_output_ttl = intermediate_output_ttl

        if submit_runner_ram:
            self.submit_runner_ram = submit_runner_ram
        else:
            self.submit_runner_ram = 3000

        if self.submit_runner_ram <= 0:
            raise Exception(
                "Value of --submit-runner-ram must be greater than zero")

        self.merged_map = merged_map or {}
Esempio n. 9
0
    def run(self, dry_run=False, pull_image=True, **kwargs):
        container_request = {
            "command": self.command_line,
            "owner_uuid": self.arvrunner.project_uuid,
            "name": self.name,
            "output_path": self.outdir,
            "cwd": self.outdir,
            "priority": 1,
            "state": "Committed",
            "properties": {},
        }
        runtime_constraints = {}

        resources = self.builder.resources
        if resources is not None:
            runtime_constraints["vcpus"] = resources.get("cores", 1)
            runtime_constraints["ram"] = resources.get("ram") * 2**20

        mounts = {
            self.outdir: {
                "kind": "tmp",
                "capacity": resources.get("outdirSize", 0) * 2**20
            },
            self.tmpdir: {
                "kind": "tmp",
                "capacity": resources.get("tmpdirSize", 0) * 2**20
            }
        }
        scheduling_parameters = {}

        rf = [
            self.pathmapper.mapper(f) for f in self.pathmapper.referenced_files
        ]
        rf.sort(key=lambda k: k.resolved)
        prevdir = None
        for resolved, target, tp, stg in rf:
            if not stg:
                continue
            if prevdir and target.startswith(prevdir):
                continue
            if tp == "Directory":
                targetdir = target
            else:
                targetdir = os.path.dirname(target)
            sp = resolved.split("/", 1)
            pdh = sp[0][5:]  # remove "keep:"
            mounts[targetdir] = {
                "kind": "collection",
                "portable_data_hash": pdh
            }
            if len(sp) == 2:
                if tp == "Directory":
                    path = sp[1]
                else:
                    path = os.path.dirname(sp[1])
                if path and path != "/":
                    mounts[targetdir]["path"] = path
            prevdir = targetdir + "/"

        with Perf(metrics, "generatefiles %s" % self.name):
            if self.generatefiles["listing"]:
                vwd = arvados.collection.Collection(
                    api_client=self.arvrunner.api,
                    keep_client=self.arvrunner.keep_client,
                    num_retries=self.arvrunner.num_retries)
                generatemapper = NoFollowPathMapper([self.generatefiles],
                                                    "",
                                                    "",
                                                    separateDirs=False)

                with Perf(metrics, "createfiles %s" % self.name):
                    for f, p in generatemapper.items():
                        if not p.target:
                            pass
                        elif p.type in ("File", "Directory"):
                            source, path = self.arvrunner.fs_access.get_collection(
                                p.resolved)
                            vwd.copy(path, p.target, source_collection=source)
                        elif p.type == "CreateFile":
                            with vwd.open(p.target, "w") as n:
                                n.write(p.resolved.encode("utf-8"))

                with Perf(metrics, "generatefiles.save_new %s" % self.name):
                    vwd.save_new()

                for f, p in generatemapper.items():
                    if not p.target:
                        continue
                    mountpoint = "%s/%s" % (self.outdir, p.target)
                    mounts[mountpoint] = {
                        "kind": "collection",
                        "portable_data_hash": vwd.portable_data_hash(),
                        "path": p.target
                    }

        container_request["environment"] = {
            "TMPDIR": self.tmpdir,
            "HOME": self.outdir
        }
        if self.environment:
            container_request["environment"].update(self.environment)

        if self.stdin:
            sp = self.stdin[6:].split("/", 1)
            mounts["stdin"] = {
                "kind": "collection",
                "portable_data_hash": sp[0],
                "path": sp[1]
            }

        if self.stderr:
            mounts["stderr"] = {
                "kind": "file",
                "path": "%s/%s" % (self.outdir, self.stderr)
            }

        if self.stdout:
            mounts["stdout"] = {
                "kind": "file",
                "path": "%s/%s" % (self.outdir, self.stdout)
            }

        (docker_req, docker_is_req) = get_feature(self, "DockerRequirement")
        if not docker_req:
            docker_req = {"dockerImageId": "arvados/jobs"}

        container_request["container_image"] = arv_docker_get_image(
            self.arvrunner.api, docker_req, pull_image,
            self.arvrunner.project_uuid)

        api_req, _ = get_feature(self, "http://arvados.org/cwl#APIRequirement")
        if api_req:
            runtime_constraints["API"] = True

        runtime_req, _ = get_feature(
            self, "http://arvados.org/cwl#RuntimeConstraints")
        if runtime_req:
            if "keep_cache" in runtime_req:
                runtime_constraints[
                    "keep_cache_ram"] = runtime_req["keep_cache"] * 2**20
            if "outputDirType" in runtime_req:
                if runtime_req["outputDirType"] == "local_output_dir":
                    # Currently the default behavior.
                    pass
                elif runtime_req["outputDirType"] == "keep_output_dir":
                    mounts[self.outdir] = {
                        "kind": "collection",
                        "writable": True
                    }

        partition_req, _ = get_feature(
            self, "http://arvados.org/cwl#PartitionRequirement")
        if partition_req:
            scheduling_parameters["partitions"] = aslist(
                partition_req["partition"])

        intermediate_output_req, _ = get_feature(
            self, "http://arvados.org/cwl#IntermediateOutput")
        if intermediate_output_req:
            self.output_ttl = intermediate_output_req["outputTTL"]
        else:
            self.output_ttl = self.arvrunner.intermediate_output_ttl

        if self.output_ttl < 0:
            raise WorkflowError(
                "Invalid value %d for output_ttl, cannot be less than zero" %
                container_request["output_ttl"])

        container_request["output_ttl"] = self.output_ttl
        container_request["mounts"] = mounts
        container_request["runtime_constraints"] = runtime_constraints
        container_request["use_existing"] = kwargs.get("enable_reuse", True)
        container_request["scheduling_parameters"] = scheduling_parameters

        if kwargs.get("runnerjob", "").startswith("arvwf:"):
            wfuuid = kwargs["runnerjob"][6:kwargs["runnerjob"].index("#")]
            wfrecord = self.arvrunner.api.workflows().get(uuid=wfuuid).execute(
                num_retries=self.arvrunner.num_retries)
            if container_request["name"] == "main":
                container_request["name"] = wfrecord["name"]
            container_request["properties"]["template_uuid"] = wfuuid

        try:
            response = self.arvrunner.api.container_requests().create(
                body=container_request).execute(
                    num_retries=self.arvrunner.num_retries)

            self.uuid = response["uuid"]
            self.arvrunner.processes[self.uuid] = self

            if response["state"] == "Final":
                logger.info("%s reused container %s",
                            self.arvrunner.label(self),
                            response["container_uuid"])
                self.done(response)
            else:
                logger.info("%s %s state is %s", self.arvrunner.label(self),
                            response["uuid"], response["state"])
        except Exception as e:
            logger.error("%s got error %s" %
                         (self.arvrunner.label(self), str(e)))
            self.output_callback({}, "permanentFail")
Esempio n. 10
0
    def run(self, dry_run=False, pull_image=True, **kwargs):
        script_parameters = {
            "command": self.command_line
        }
        runtime_constraints = {}

        if self.generatefiles["listing"]:
            vwd = arvados.collection.Collection()
            script_parameters["task.vwd"] = {}
            generatemapper = InitialWorkDirPathMapper([self.generatefiles], "", "",
                                        separateDirs=False)
            for f, p in generatemapper.items():
                if p.type == "CreateFile":
                    with vwd.open(p.target, "w") as n:
                        n.write(p.resolved.encode("utf-8"))
            vwd.save_new()
            for f, p in generatemapper.items():
                if p.type == "File":
                    script_parameters["task.vwd"][p.target] = p.resolved
                if p.type == "CreateFile":
                    script_parameters["task.vwd"][p.target] = "$(task.keep)/%s/%s" % (vwd.portable_data_hash(), p.target)

        script_parameters["task.env"] = {"TMPDIR": "$(task.tmpdir)"}
        if self.environment:
            script_parameters["task.env"].update(self.environment)

        if self.stdin:
            script_parameters["task.stdin"] = self.stdin

        if self.stdout:
            script_parameters["task.stdout"] = self.stdout

        if self.stderr:
            script_parameters["task.stderr"] = self.stderr

        if self.successCodes:
            script_parameters["task.successCodes"] = self.successCodes
        if self.temporaryFailCodes:
            script_parameters["task.temporaryFailCodes"] = self.temporaryFailCodes
        if self.permanentFailCodes:
            script_parameters["task.permanentFailCodes"] = self.permanentFailCodes

        (docker_req, docker_is_req) = get_feature(self, "DockerRequirement")
        if docker_req and kwargs.get("use_container") is not False:
            runtime_constraints["docker_image"] = arv_docker_get_image(self.arvrunner.api, docker_req, pull_image, self.arvrunner.project_uuid)
        else:
            runtime_constraints["docker_image"] = "arvados/jobs"

        resources = self.builder.resources
        if resources is not None:
            runtime_constraints["min_cores_per_node"] = resources.get("cores", 1)
            runtime_constraints["min_ram_mb_per_node"] = resources.get("ram")
            runtime_constraints["min_scratch_mb_per_node"] = resources.get("tmpdirSize", 0) + resources.get("outdirSize", 0)

        filters = [["repository", "=", "arvados"],
                   ["script", "=", "crunchrunner"],
                   ["script_version", "in git", "9e5b98e8f5f4727856b53447191f9c06e3da2ba6"]]
        if not self.arvrunner.ignore_docker_for_reuse:
            filters.append(["docker_image_locator", "in docker", runtime_constraints["docker_image"]])

        try:
            response = self.arvrunner.api.jobs().create(
                body={
                    "owner_uuid": self.arvrunner.project_uuid,
                    "script": "crunchrunner",
                    "repository": "arvados",
                    "script_version": "master",
                    "minimum_script_version": "9e5b98e8f5f4727856b53447191f9c06e3da2ba6",
                    "script_parameters": {"tasks": [script_parameters]},
                    "runtime_constraints": runtime_constraints
                },
                filters=filters,
                find_or_create=kwargs.get("enable_reuse", True)
            ).execute(num_retries=self.arvrunner.num_retries)

            self.arvrunner.processes[response["uuid"]] = self

            self.update_pipeline_component(response)

            logger.info("Job %s (%s) is %s", self.name, response["uuid"], response["state"])

            if response["state"] in ("Complete", "Failed", "Cancelled"):
                self.done(response)
        except Exception as e:
            logger.error("Got error %s" % str(e))
            self.output_callback({}, "permanentFail")
Esempio n. 11
0
    def run(self, dry_run=False, pull_image=True, **kwargs):
        container_request = {
            "command": self.command_line,
            "owner_uuid": self.arvrunner.project_uuid,
            "name": self.name,
            "output_path": self.outdir,
            "cwd": self.outdir,
            "priority": 1,
            "state": "Committed",
            "properties": {}
        }
        runtime_constraints = {}
        mounts = {self.outdir: {"kind": "tmp"}}
        scheduling_parameters = {}

        dirs = set()
        for f in self.pathmapper.files():
            _, p, tp = self.pathmapper.mapper(f)
            if tp == "Directory" and '/' not in p[6:]:
                mounts[p] = {"kind": "collection", "portable_data_hash": p[6:]}
                dirs.add(p[6:])
        for f in self.pathmapper.files():
            _, p, tp = self.pathmapper.mapper(f)
            if p[6:].split("/")[0] not in dirs:
                mounts[p] = {"kind": "collection", "portable_data_hash": p[6:]}

        if self.generatefiles["listing"]:
            raise UnsupportedRequirement(
                "InitialWorkDirRequirement not supported with --api=containers"
            )

        container_request["environment"] = {
            "TMPDIR": self.tmpdir,
            "HOME": self.outdir
        }
        if self.environment:
            container_request["environment"].update(self.environment)

        if self.stdin:
            raise UnsupportedRequirement(
                "Stdin redirection currently not suppported")

        if self.stderr:
            raise UnsupportedRequirement(
                "Stderr redirection currently not suppported")

        if self.stdout:
            mounts["stdout"] = {
                "kind": "file",
                "path": "%s/%s" % (self.outdir, self.stdout)
            }

        (docker_req, docker_is_req) = get_feature(self, "DockerRequirement")
        if not docker_req:
            docker_req = {"dockerImageId": arvados_jobs_image(self.arvrunner)}

        container_request["container_image"] = arv_docker_get_image(
            self.arvrunner.api, docker_req, pull_image,
            self.arvrunner.project_uuid)

        resources = self.builder.resources
        if resources is not None:
            runtime_constraints["vcpus"] = resources.get("cores", 1)
            runtime_constraints["ram"] = resources.get("ram") * 2**20

        api_req, _ = get_feature(self, "http://arvados.org/cwl#APIRequirement")
        if api_req:
            runtime_constraints["API"] = True

        runtime_req, _ = get_feature(
            self, "http://arvados.org/cwl#RuntimeConstraints")
        if runtime_req:
            if "keep_cache" in runtime_req:
                runtime_constraints["keep_cache_ram"] = runtime_req[
                    "keep_cache"]

        partition_req, _ = get_feature(
            self, "http://arvados.org/cwl#PartitionRequirement")
        if partition_req:
            scheduling_parameters["partitions"] = aslist(
                partition_req["partition"])

        container_request["mounts"] = mounts
        container_request["runtime_constraints"] = runtime_constraints
        container_request["use_existing"] = kwargs.get("enable_reuse", True)
        container_request["scheduling_parameters"] = scheduling_parameters

        if kwargs.get("runnerjob", "").startswith("arvwf:"):
            wfuuid = kwargs["runnerjob"][6:kwargs["runnerjob"].index("#")]
            wfrecord = self.arvrunner.api.workflows().get(uuid=wfuuid).execute(
                num_retries=self.arvrunner.num_retries)
            if container_request["name"] == "main":
                container_request["name"] = wfrecord["name"]
            container_request["properties"]["template_uuid"] = wfuuid

        try:
            response = self.arvrunner.api.container_requests().create(
                body=container_request).execute(
                    num_retries=self.arvrunner.num_retries)

            self.uuid = response["uuid"]
            self.arvrunner.processes[self.uuid] = self

            logger.info("Container request %s (%s) state is %s", self.name,
                        response["uuid"], response["state"])

            if response["state"] == "Final":
                self.done(response)
        except Exception as e:
            logger.error("Got error %s" % str(e))
            self.output_callback({}, "permanentFail")
Esempio n. 12
0
    def run(self, dry_run=False, pull_image=True, **kwargs):
        script_parameters = {"command": self.command_line}
        runtime_constraints = {}

        if self.generatefiles:
            vwd = arvados.collection.Collection()
            script_parameters["task.vwd"] = {}
            for t in self.generatefiles:
                if isinstance(self.generatefiles[t], dict):
                    src, rest = self.arvrunner.fs_access.get_collection(
                        self.generatefiles[t]["path"].replace(
                            "$(task.keep)/", "keep:"))
                    vwd.copy(rest, t, source_collection=src)
                else:
                    with vwd.open(t, "w") as f:
                        f.write(self.generatefiles[t])
            vwd.save_new()
            for t in self.generatefiles:
                script_parameters["task.vwd"][t] = "$(task.keep)/%s/%s" % (
                    vwd.portable_data_hash(), t)

        script_parameters["task.env"] = {"TMPDIR": "$(task.tmpdir)"}
        if self.environment:
            script_parameters["task.env"].update(self.environment)

        if self.stdin:
            script_parameters["task.stdin"] = self.pathmapper.mapper(
                self.stdin)[1]

        if self.stdout:
            script_parameters["task.stdout"] = self.stdout

        (docker_req, docker_is_req) = get_feature(self, "DockerRequirement")
        if docker_req and kwargs.get("use_container") is not False:
            runtime_constraints["docker_image"] = arv_docker_get_image(
                self.arvrunner.api, docker_req, pull_image,
                self.arvrunner.project_uuid)
        else:
            runtime_constraints["docker_image"] = "arvados/jobs"

        resources = self.builder.resources
        if resources is not None:
            runtime_constraints["min_cores_per_node"] = resources.get(
                "cores", 1)
            runtime_constraints["min_ram_mb_per_node"] = resources.get("ram")
            runtime_constraints["min_scratch_mb_per_node"] = resources.get(
                "tmpdirSize", 0) + resources.get("outdirSize", 0)

        filters = [["repository", "=", "arvados"],
                   ["script", "=", "crunchrunner"],
                   [
                       "script_version", "in git",
                       "9e5b98e8f5f4727856b53447191f9c06e3da2ba6"
                   ]]
        if not self.arvrunner.ignore_docker_for_reuse:
            filters.append([
                "docker_image_locator", "in docker",
                runtime_constraints["docker_image"]
            ])

        try:
            response = self.arvrunner.api.jobs().create(
                body={
                    "owner_uuid": self.arvrunner.project_uuid,
                    "script": "crunchrunner",
                    "repository": "arvados",
                    "script_version": "master",
                    "minimum_script_version":
                    "9e5b98e8f5f4727856b53447191f9c06e3da2ba6",
                    "script_parameters": {
                        "tasks": [script_parameters]
                    },
                    "runtime_constraints": runtime_constraints
                },
                filters=filters,
                find_or_create=kwargs.get(
                    "enable_reuse",
                    True)).execute(num_retries=self.arvrunner.num_retries)

            self.arvrunner.jobs[response["uuid"]] = self

            self.update_pipeline_component(response)

            logger.info("Job %s (%s) is %s", self.name, response["uuid"],
                        response["state"])

            if response["state"] in ("Complete", "Failed", "Cancelled"):
                self.done(response)
        except Exception as e:
            logger.error("Got error %s" % str(e))
            self.output_callback({}, "permanentFail")
Esempio n. 13
0
    def run(self, dry_run=False, pull_image=True, **kwargs):
        script_parameters = {"command": self.command_line}
        runtime_constraints = {}

        with Perf(metrics, "generatefiles %s" % self.name):
            if self.generatefiles["listing"]:
                vwd = arvados.collection.Collection(
                    api_client=self.arvrunner.api,
                    keep_client=self.arvrunner.keep_client,
                    num_retries=self.arvrunner.num_retries)
                script_parameters["task.vwd"] = {}
                generatemapper = VwdPathMapper([self.generatefiles],
                                               "",
                                               "",
                                               separateDirs=False)

                with Perf(metrics, "createfiles %s" % self.name):
                    for f, p in generatemapper.items():
                        if p.type == "CreateFile":
                            with vwd.open(p.target, "w") as n:
                                n.write(p.resolved.encode("utf-8"))

                if vwd:
                    with Perf(metrics,
                              "generatefiles.save_new %s" % self.name):
                        vwd.save_new()

                for f, p in generatemapper.items():
                    if p.type == "File":
                        script_parameters["task.vwd"][p.target] = p.resolved
                    if p.type == "CreateFile":
                        script_parameters["task.vwd"][
                            p.target] = "$(task.keep)/%s/%s" % (
                                vwd.portable_data_hash(), p.target)

        script_parameters["task.env"] = {
            "TMPDIR": self.tmpdir,
            "HOME": self.outdir
        }
        if self.environment:
            script_parameters["task.env"].update(self.environment)

        if self.stdin:
            script_parameters["task.stdin"] = self.stdin

        if self.stdout:
            script_parameters["task.stdout"] = self.stdout

        if self.stderr:
            script_parameters["task.stderr"] = self.stderr

        if self.successCodes:
            script_parameters["task.successCodes"] = self.successCodes
        if self.temporaryFailCodes:
            script_parameters[
                "task.temporaryFailCodes"] = self.temporaryFailCodes
        if self.permanentFailCodes:
            script_parameters[
                "task.permanentFailCodes"] = self.permanentFailCodes

        with Perf(metrics, "arv_docker_get_image %s" % self.name):
            (docker_req, docker_is_req) = get_feature(self,
                                                      "DockerRequirement")
            if docker_req and kwargs.get("use_container") is not False:
                if docker_req.get("dockerOutputDirectory"):
                    raise SourceLine(
                        docker_req, "dockerOutputDirectory",
                        UnsupportedRequirement
                    ).makeError(
                        "Option 'dockerOutputDirectory' of DockerRequirement not supported."
                    )
                runtime_constraints["docker_image"] = arv_docker_get_image(
                    self.arvrunner.api, docker_req, pull_image,
                    self.arvrunner.project_uuid)
            else:
                runtime_constraints["docker_image"] = "arvados/jobs"

        resources = self.builder.resources
        if resources is not None:
            runtime_constraints["min_cores_per_node"] = resources.get(
                "cores", 1)
            runtime_constraints["min_ram_mb_per_node"] = resources.get("ram")
            runtime_constraints["min_scratch_mb_per_node"] = resources.get(
                "tmpdirSize", 0) + resources.get("outdirSize", 0)

        runtime_req, _ = get_feature(
            self, "http://arvados.org/cwl#RuntimeConstraints")
        if runtime_req:
            if "keep_cache" in runtime_req:
                runtime_constraints["keep_cache_mb_per_task"] = runtime_req[
                    "keep_cache"]
                runtime_constraints["min_ram_mb_per_node"] += runtime_req[
                    "keep_cache"]
            if "outputDirType" in runtime_req:
                if runtime_req["outputDirType"] == "local_output_dir":
                    script_parameters["task.keepTmpOutput"] = False
                elif runtime_req["outputDirType"] == "keep_output_dir":
                    script_parameters["task.keepTmpOutput"] = True

        filters = [["repository", "=", "arvados"],
                   ["script", "=", "crunchrunner"],
                   ["script_version", "in git", crunchrunner_git_commit]]
        if not self.arvrunner.ignore_docker_for_reuse:
            filters.append([
                "docker_image_locator", "in docker",
                runtime_constraints["docker_image"]
            ])

        enable_reuse = kwargs.get("enable_reuse", True)
        if enable_reuse:
            reuse_req, _ = get_feature(
                self, "http://arvados.org/cwl#ReuseRequirement")
            if reuse_req:
                enable_reuse = reuse_req["enableReuse"]

        try:
            with Perf(metrics, "create %s" % self.name):
                response = self.arvrunner.api.jobs().create(
                    body={
                        "owner_uuid": self.arvrunner.project_uuid,
                        "script": "crunchrunner",
                        "repository": "arvados",
                        "script_version": "master",
                        "minimum_script_version": crunchrunner_git_commit,
                        "script_parameters": {
                            "tasks": [script_parameters]
                        },
                        "runtime_constraints": runtime_constraints
                    },
                    filters=filters,
                    find_or_create=enable_reuse).execute(
                        num_retries=self.arvrunner.num_retries)

            self.arvrunner.processes[response["uuid"]] = self

            self.update_pipeline_component(response)

            if response["state"] == "Complete":
                logger.info("%s reused job %s", self.arvrunner.label(self),
                            response["uuid"])
                # Give read permission to the desired project on reused jobs
                if response["owner_uuid"] != self.arvrunner.project_uuid:
                    try:
                        self.arvrunner.api.links().create(
                            body={
                                'link_class': 'permission',
                                'name': 'can_read',
                                'tail_uuid': self.arvrunner.project_uuid,
                                'head_uuid': response["uuid"],
                            }).execute(num_retries=self.arvrunner.num_retries)
                    except ApiError as e:
                        # The user might not have "manage" access on the job: log
                        # a message and continue.
                        logger.info("Creating read permission on job %s: %s",
                                    response["uuid"], e)

                with Perf(metrics, "done %s" % self.name):
                    self.done(response)
            else:
                logger.info("%s %s is %s", self.arvrunner.label(self),
                            response["uuid"], response["state"])
        except Exception as e:
            logger.exception("%s error" % (self.arvrunner.label(self)))
            self.output_callback({}, "permanentFail")
Esempio n. 14
0
    def run(self, dry_run=False, pull_image=True, **kwargs):
        container_request = {
            "command": self.command_line,
            "owner_uuid": self.arvrunner.project_uuid,
            "name": self.name,
            "output_path": self.outdir,
            "cwd": self.outdir,
            "priority": 1,
            "state": "Committed",
            "properties": {}
        }
        runtime_constraints = {}

        resources = self.builder.resources
        if resources is not None:
            runtime_constraints["vcpus"] = resources.get("cores", 1)
            runtime_constraints["ram"] = resources.get("ram") * 2**20

        mounts = {
            self.outdir: {
                "kind": "tmp",
                "capacity": resources.get("outdirSize", 0) * 2**20
            },
            self.tmpdir: {
                "kind": "tmp",
                "capacity": resources.get("tmpdirSize", 0) * 2**20
            }
        }
        scheduling_parameters = {}

        rf = [self.pathmapper.mapper(f) for f in self.pathmapper.referenced_files]
        rf.sort(key=lambda k: k.resolved)
        prevdir = None
        for resolved, target, tp, stg in rf:
            if not stg:
                continue
            if prevdir and target.startswith(prevdir):
                continue
            if tp == "Directory":
                targetdir = target
            else:
                targetdir = os.path.dirname(target)
            sp = resolved.split("/", 1)
            pdh = sp[0][5:]   # remove "keep:"
            mounts[targetdir] = {
                "kind": "collection",
                "portable_data_hash": pdh
            }
            if len(sp) == 2:
                if tp == "Directory":
                    path = sp[1]
                else:
                    path = os.path.dirname(sp[1])
                if path and path != "/":
                    mounts[targetdir]["path"] = path
            prevdir = targetdir + "/"

        with Perf(metrics, "generatefiles %s" % self.name):
            if self.generatefiles["listing"]:
                vwd = arvados.collection.Collection(api_client=self.arvrunner.api,
                                                    keep_client=self.arvrunner.keep_client,
                                                    num_retries=self.arvrunner.num_retries)
                generatemapper = NoFollowPathMapper([self.generatefiles], "", "",
                                                    separateDirs=False)

                with Perf(metrics, "createfiles %s" % self.name):
                    for f, p in generatemapper.items():
                        if not p.target:
                            pass
                        elif p.type in ("File", "Directory"):
                            source, path = self.arvrunner.fs_access.get_collection(p.resolved)
                            vwd.copy(path, p.target, source_collection=source)
                        elif p.type == "CreateFile":
                            with vwd.open(p.target, "w") as n:
                                n.write(p.resolved.encode("utf-8"))

                with Perf(metrics, "generatefiles.save_new %s" % self.name):
                    vwd.save_new()

                for f, p in generatemapper.items():
                    if not p.target:
                        continue
                    mountpoint = "%s/%s" % (self.outdir, p.target)
                    mounts[mountpoint] = {"kind": "collection",
                                          "portable_data_hash": vwd.portable_data_hash(),
                                          "path": p.target}

        container_request["environment"] = {"TMPDIR": self.tmpdir, "HOME": self.outdir}
        if self.environment:
            container_request["environment"].update(self.environment)

        if self.stdin:
            sp = self.stdin[6:].split("/", 1)
            mounts["stdin"] = {"kind": "collection",
                                "portable_data_hash": sp[0],
                                "path": sp[1]}

        if self.stderr:
            mounts["stderr"] = {"kind": "file",
                                "path": "%s/%s" % (self.outdir, self.stderr)}

        if self.stdout:
            mounts["stdout"] = {"kind": "file",
                                "path": "%s/%s" % (self.outdir, self.stdout)}

        (docker_req, docker_is_req) = get_feature(self, "DockerRequirement")
        if not docker_req:
            docker_req = {"dockerImageId": "arvados/jobs"}

        container_request["container_image"] = arv_docker_get_image(self.arvrunner.api,
                                                                     docker_req,
                                                                     pull_image,
                                                                     self.arvrunner.project_uuid)

        api_req, _ = get_feature(self, "http://arvados.org/cwl#APIRequirement")
        if api_req:
            runtime_constraints["API"] = True

        runtime_req, _ = get_feature(self, "http://arvados.org/cwl#RuntimeConstraints")
        if runtime_req:
            if "keep_cache" in runtime_req:
                runtime_constraints["keep_cache_ram"] = runtime_req["keep_cache"] * 2**20
            if "outputDirType" in runtime_req:
                if runtime_req["outputDirType"] == "local_output_dir":
                    # Currently the default behavior.
                    pass
                elif runtime_req["outputDirType"] == "keep_output_dir":
                    mounts[self.outdir]= {
                        "kind": "collection",
                        "writable": True
                    }

        partition_req, _ = get_feature(self, "http://arvados.org/cwl#PartitionRequirement")
        if partition_req:
            scheduling_parameters["partitions"] = aslist(partition_req["partition"])

        container_request["mounts"] = mounts
        container_request["runtime_constraints"] = runtime_constraints
        container_request["use_existing"] = kwargs.get("enable_reuse", True)
        container_request["scheduling_parameters"] = scheduling_parameters

        if kwargs.get("runnerjob", "").startswith("arvwf:"):
            wfuuid = kwargs["runnerjob"][6:kwargs["runnerjob"].index("#")]
            wfrecord = self.arvrunner.api.workflows().get(uuid=wfuuid).execute(num_retries=self.arvrunner.num_retries)
            if container_request["name"] == "main":
                container_request["name"] = wfrecord["name"]
            container_request["properties"]["template_uuid"] = wfuuid

        try:
            response = self.arvrunner.api.container_requests().create(
                body=container_request
            ).execute(num_retries=self.arvrunner.num_retries)

            self.uuid = response["uuid"]
            self.arvrunner.processes[self.uuid] = self

            if response["state"] == "Final":
                logger.info("%s reused container %s", self.arvrunner.label(self), response["container_uuid"])
                self.done(response)
            else:
                logger.info("%s %s state is %s", self.arvrunner.label(self), response["uuid"], response["state"])
        except Exception as e:
            logger.error("%s got error %s" % (self.arvrunner.label(self), str(e)))
            self.output_callback({}, "permanentFail")
Esempio n. 15
0
    def run(self, dry_run=False, pull_image=True, **kwargs):
        container_request = {
            "command": self.command_line,
            "owner_uuid": self.arvrunner.project_uuid,
            "name": self.name,
            "output_path": self.outdir,
            "cwd": self.outdir,
            "priority": 1,
            "state": "Committed"
        }
        runtime_constraints = {}
        mounts = {
            self.outdir: {
                "kind": "tmp"
            }
        }

        dirs = set()
        for f in self.pathmapper.files():
            _, p, tp = self.pathmapper.mapper(f)
            if tp == "Directory" and '/' not in p[6:]:
                mounts[p] = {
                    "kind": "collection",
                    "portable_data_hash": p[6:]
                }
                dirs.add(p[6:])
        for f in self.pathmapper.files():
            _, p, tp = self.pathmapper.mapper(f)
            if p[6:].split("/")[0] not in dirs:
                mounts[p] = {
                    "kind": "collection",
                    "portable_data_hash": p[6:]
                }

        if self.generatefiles["listing"]:
            raise UnsupportedRequirement("Generate files not supported")

        container_request["environment"] = {"TMPDIR": self.tmpdir, "HOME": self.outdir}
        if self.environment:
            container_request["environment"].update(self.environment)

        if self.stdin:
            raise UnsupportedRequirement("Stdin redirection currently not suppported")

        if self.stderr:
            raise UnsupportedRequirement("Stderr redirection currently not suppported")

        if self.stdout:
            mounts["stdout"] = {"kind": "file",
                                "path": "%s/%s" % (self.outdir, self.stdout)}

        (docker_req, docker_is_req) = get_feature(self, "DockerRequirement")
        if not docker_req:
            docker_req = {"dockerImageId": "arvados/jobs"}

        container_request["container_image"] = arv_docker_get_image(self.arvrunner.api,
                                                                     docker_req,
                                                                     pull_image,
                                                                     self.arvrunner.project_uuid)

        resources = self.builder.resources
        if resources is not None:
            runtime_constraints["vcpus"] = resources.get("cores", 1)
            runtime_constraints["ram"] = resources.get("ram") * 2**20

        container_request["mounts"] = mounts
        container_request["runtime_constraints"] = runtime_constraints

        try:
            response = self.arvrunner.api.container_requests().create(
                body=container_request
            ).execute(num_retries=self.arvrunner.num_retries)

            self.arvrunner.processes[response["container_uuid"]] = self

            logger.info("Container %s (%s) request state is %s", self.name, response["uuid"], response["state"])

            if response["state"] == "Final":
                self.done(response)
        except Exception as e:
            logger.error("Got error %s" % str(e))
            self.output_callback({}, "permanentFail")
Esempio n. 16
0
    def run(self, dry_run=False, pull_image=True, **kwargs):
        script_parameters = {
            "command": self.command_line
        }
        runtime_constraints = {}

        with Perf(metrics, "generatefiles %s" % self.name):
            if self.generatefiles["listing"]:
                vwd = arvados.collection.Collection(api_client=self.arvrunner.api,
                                                    keep_client=self.arvrunner.keep_client,
                                                    num_retries=self.arvrunner.num_retries)
                script_parameters["task.vwd"] = {}
                generatemapper = VwdPathMapper([self.generatefiles], "", "",
                                               separateDirs=False)

                with Perf(metrics, "createfiles %s" % self.name):
                    for f, p in generatemapper.items():
                        if p.type == "CreateFile":
                            with vwd.open(p.target, "w") as n:
                                n.write(p.resolved.encode("utf-8"))

                if vwd:
                    with Perf(metrics, "generatefiles.save_new %s" % self.name):
                        vwd.save_new()

                for f, p in generatemapper.items():
                    if p.type == "File":
                        script_parameters["task.vwd"][p.target] = p.resolved
                    if p.type == "CreateFile":
                        script_parameters["task.vwd"][p.target] = "$(task.keep)/%s/%s" % (vwd.portable_data_hash(), p.target)

        script_parameters["task.env"] = {"TMPDIR": self.tmpdir, "HOME": self.outdir}
        if self.environment:
            script_parameters["task.env"].update(self.environment)

        if self.stdin:
            script_parameters["task.stdin"] = self.stdin

        if self.stdout:
            script_parameters["task.stdout"] = self.stdout

        if self.stderr:
            script_parameters["task.stderr"] = self.stderr

        if self.successCodes:
            script_parameters["task.successCodes"] = self.successCodes
        if self.temporaryFailCodes:
            script_parameters["task.temporaryFailCodes"] = self.temporaryFailCodes
        if self.permanentFailCodes:
            script_parameters["task.permanentFailCodes"] = self.permanentFailCodes

        with Perf(metrics, "arv_docker_get_image %s" % self.name):
            (docker_req, docker_is_req) = get_feature(self, "DockerRequirement")
            if docker_req and kwargs.get("use_container") is not False:
                if docker_req.get("dockerOutputDirectory"):
                    raise SourceLine(docker_req, "dockerOutputDirectory", UnsupportedRequirement).makeError(
                        "Option 'dockerOutputDirectory' of DockerRequirement not supported.")
                runtime_constraints["docker_image"] = arv_docker_get_image(self.arvrunner.api, docker_req, pull_image, self.arvrunner.project_uuid)
            else:
                runtime_constraints["docker_image"] = "arvados/jobs"

        resources = self.builder.resources
        if resources is not None:
            runtime_constraints["min_cores_per_node"] = resources.get("cores", 1)
            runtime_constraints["min_ram_mb_per_node"] = resources.get("ram")
            runtime_constraints["min_scratch_mb_per_node"] = resources.get("tmpdirSize", 0) + resources.get("outdirSize", 0)

        runtime_req, _ = get_feature(self, "http://arvados.org/cwl#RuntimeConstraints")
        if runtime_req:
            if "keep_cache" in runtime_req:
                runtime_constraints["keep_cache_mb_per_task"] = runtime_req["keep_cache"]
                runtime_constraints["min_ram_mb_per_node"] += runtime_req["keep_cache"]
            if "outputDirType" in runtime_req:
                if runtime_req["outputDirType"] == "local_output_dir":
                    script_parameters["task.keepTmpOutput"] = False
                elif runtime_req["outputDirType"] == "keep_output_dir":
                    script_parameters["task.keepTmpOutput"] = True

        filters = [["repository", "=", "arvados"],
                   ["script", "=", "crunchrunner"],
                   ["script_version", "in git", crunchrunner_git_commit]]
        if not self.arvrunner.ignore_docker_for_reuse:
            filters.append(["docker_image_locator", "in docker", runtime_constraints["docker_image"]])

        try:
            with Perf(metrics, "create %s" % self.name):
                response = self.arvrunner.api.jobs().create(
                    body={
                        "owner_uuid": self.arvrunner.project_uuid,
                        "script": "crunchrunner",
                        "repository": "arvados",
                        "script_version": "master",
                        "minimum_script_version": crunchrunner_git_commit,
                        "script_parameters": {"tasks": [script_parameters]},
                        "runtime_constraints": runtime_constraints
                    },
                    filters=filters,
                    find_or_create=kwargs.get("enable_reuse", True)
                ).execute(num_retries=self.arvrunner.num_retries)

            self.arvrunner.processes[response["uuid"]] = self

            self.update_pipeline_component(response)

            if response["state"] == "Complete":
                logger.info("%s reused job %s", self.arvrunner.label(self), response["uuid"])
                with Perf(metrics, "done %s" % self.name):
                    self.done(response)
            else:
                logger.info("%s %s is %s", self.arvrunner.label(self), response["uuid"], response["state"])
        except Exception as e:
            logger.exception("%s error" % (self.arvrunner.label(self)))
            self.output_callback({}, "permanentFail")
Esempio n. 17
0
    def run(self, dry_run=False, pull_image=True, **kwargs):
        # ArvadosCommandTool subclasses from cwltool.CommandLineTool,
        # which calls makeJobRunner() to get a new ArvadosContainer
        # object.  The fields that define execution such as
        # command_line, environment, etc are set on the
        # ArvadosContainer object by CommandLineTool.job() before
        # run() is called.

        container_request = {
            "command": self.command_line,
            "owner_uuid": self.arvrunner.project_uuid,
            "name": self.name,
            "output_path": self.outdir,
            "cwd": self.outdir,
            "priority": kwargs.get("priority"),
            "state": "Committed",
            "properties": {},
        }
        runtime_constraints = {}

        if self.arvrunner.secret_store.has_secret(self.command_line):
            raise WorkflowException("Secret material leaked on command line, only file literals may contain secrets")

        if self.arvrunner.secret_store.has_secret(self.environment):
            raise WorkflowException("Secret material leaked in environment, only file literals may contain secrets")

        resources = self.builder.resources
        if resources is not None:
            runtime_constraints["vcpus"] = resources.get("cores", 1)
            runtime_constraints["ram"] = resources.get("ram") * 2**20

        mounts = {
            self.outdir: {
                "kind": "tmp",
                "capacity": resources.get("outdirSize", 0) * 2**20
            },
            self.tmpdir: {
                "kind": "tmp",
                "capacity": resources.get("tmpdirSize", 0) * 2**20
            }
        }
        secret_mounts = {}
        scheduling_parameters = {}

        rf = [self.pathmapper.mapper(f) for f in self.pathmapper.referenced_files]
        rf.sort(key=lambda k: k.resolved)
        prevdir = None
        for resolved, target, tp, stg in rf:
            if not stg:
                continue
            if prevdir and target.startswith(prevdir):
                continue
            if tp == "Directory":
                targetdir = target
            else:
                targetdir = os.path.dirname(target)
            sp = resolved.split("/", 1)
            pdh = sp[0][5:]   # remove "keep:"
            mounts[targetdir] = {
                "kind": "collection",
                "portable_data_hash": pdh
            }
            if len(sp) == 2:
                if tp == "Directory":
                    path = sp[1]
                else:
                    path = os.path.dirname(sp[1])
                if path and path != "/":
                    mounts[targetdir]["path"] = path
            prevdir = targetdir + "/"

        with Perf(metrics, "generatefiles %s" % self.name):
            if self.generatefiles["listing"]:
                vwd = arvados.collection.Collection(api_client=self.arvrunner.api,
                                                    keep_client=self.arvrunner.keep_client,
                                                    num_retries=self.arvrunner.num_retries)
                generatemapper = NoFollowPathMapper([self.generatefiles], "", "",
                                                    separateDirs=False)

                sorteditems = sorted(generatemapper.items(), None, key=lambda n: n[1].target)

                logger.debug("generatemapper is %s", sorteditems)

                with Perf(metrics, "createfiles %s" % self.name):
                    for f, p in sorteditems:
                        if not p.target:
                            pass
                        elif p.type in ("File", "Directory", "WritableFile", "WritableDirectory"):
                            if p.resolved.startswith("_:"):
                                vwd.mkdirs(p.target)
                            else:
                                source, path = self.arvrunner.fs_access.get_collection(p.resolved)
                                vwd.copy(path, p.target, source_collection=source)
                        elif p.type == "CreateFile":
                            if self.arvrunner.secret_store.has_secret(p.resolved):
                                secret_mounts["%s/%s" % (self.outdir, p.target)] = {
                                    "kind": "text",
                                    "content": self.arvrunner.secret_store.retrieve(p.resolved)
                                }
                            else:
                                with vwd.open(p.target, "w") as n:
                                    n.write(p.resolved.encode("utf-8"))

                def keepemptydirs(p):
                    if isinstance(p, arvados.collection.RichCollectionBase):
                        if len(p) == 0:
                            p.open(".keep", "w").close()
                        else:
                            for c in p:
                                keepemptydirs(p[c])

                keepemptydirs(vwd)

                with Perf(metrics, "generatefiles.save_new %s" % self.name):
                    vwd.save_new()

                prev = None
                for f, p in sorteditems:
                    if (not p.target or self.arvrunner.secret_store.has_secret(p.resolved) or
                        (prev is not None and p.target.startswith(prev))):
                        continue
                    mountpoint = "%s/%s" % (self.outdir, p.target)
                    mounts[mountpoint] = {"kind": "collection",
                                          "portable_data_hash": vwd.portable_data_hash(),
                                          "path": p.target}
                    if p.type.startswith("Writable"):
                        mounts[mountpoint]["writable"] = True
                    prev = p.target + "/"

        container_request["environment"] = {"TMPDIR": self.tmpdir, "HOME": self.outdir}
        if self.environment:
            container_request["environment"].update(self.environment)

        if self.stdin:
            sp = self.stdin[6:].split("/", 1)
            mounts["stdin"] = {"kind": "collection",
                                "portable_data_hash": sp[0],
                                "path": sp[1]}

        if self.stderr:
            mounts["stderr"] = {"kind": "file",
                                "path": "%s/%s" % (self.outdir, self.stderr)}

        if self.stdout:
            mounts["stdout"] = {"kind": "file",
                                "path": "%s/%s" % (self.outdir, self.stdout)}

        (docker_req, docker_is_req) = get_feature(self, "DockerRequirement")
        if not docker_req:
            docker_req = {"dockerImageId": "arvados/jobs"}

        container_request["container_image"] = arv_docker_get_image(self.arvrunner.api,
                                                                     docker_req,
                                                                     pull_image,
                                                                     self.arvrunner.project_uuid)

        api_req, _ = get_feature(self, "http://arvados.org/cwl#APIRequirement")
        if api_req:
            runtime_constraints["API"] = True

        runtime_req, _ = get_feature(self, "http://arvados.org/cwl#RuntimeConstraints")
        if runtime_req:
            if "keep_cache" in runtime_req:
                runtime_constraints["keep_cache_ram"] = runtime_req["keep_cache"] * 2**20
            if "outputDirType" in runtime_req:
                if runtime_req["outputDirType"] == "local_output_dir":
                    # Currently the default behavior.
                    pass
                elif runtime_req["outputDirType"] == "keep_output_dir":
                    mounts[self.outdir]= {
                        "kind": "collection",
                        "writable": True
                    }

        partition_req, _ = get_feature(self, "http://arvados.org/cwl#PartitionRequirement")
        if partition_req:
            scheduling_parameters["partitions"] = aslist(partition_req["partition"])

        intermediate_output_req, _ = get_feature(self, "http://arvados.org/cwl#IntermediateOutput")
        if intermediate_output_req:
            self.output_ttl = intermediate_output_req["outputTTL"]
        else:
            self.output_ttl = self.arvrunner.intermediate_output_ttl

        if self.output_ttl < 0:
            raise WorkflowException("Invalid value %d for output_ttl, cannot be less than zero" % container_request["output_ttl"])

        container_request["output_ttl"] = self.output_ttl
        container_request["mounts"] = mounts
        container_request["secret_mounts"] = secret_mounts
        container_request["runtime_constraints"] = runtime_constraints
        container_request["scheduling_parameters"] = scheduling_parameters

        enable_reuse = kwargs.get("enable_reuse", True)
        if enable_reuse:
            reuse_req, _ = get_feature(self, "http://arvados.org/cwl#ReuseRequirement")
            if reuse_req:
                enable_reuse = reuse_req["enableReuse"]
        container_request["use_existing"] = enable_reuse

        if kwargs.get("runnerjob", "").startswith("arvwf:"):
            wfuuid = kwargs["runnerjob"][6:kwargs["runnerjob"].index("#")]
            wfrecord = self.arvrunner.api.workflows().get(uuid=wfuuid).execute(num_retries=self.arvrunner.num_retries)
            if container_request["name"] == "main":
                container_request["name"] = wfrecord["name"]
            container_request["properties"]["template_uuid"] = wfuuid

        self.output_callback = self.arvrunner.get_wrapped_callback(self.output_callback)

        try:
            response = self.arvrunner.api.container_requests().create(
                body=container_request
            ).execute(num_retries=self.arvrunner.num_retries)

            self.uuid = response["uuid"]
            self.arvrunner.process_submitted(self)

            if response["state"] == "Final":
                logger.info("%s reused container %s", self.arvrunner.label(self), response["container_uuid"])
                self.done(response)
            else:
                logger.info("%s %s state is %s", self.arvrunner.label(self), response["uuid"], response["state"])
        except Exception as e:
            logger.error("%s got error %s" % (self.arvrunner.label(self), str(e)))
            self.output_callback({}, "permanentFail")
Esempio n. 18
0
    def run(self, dry_run=False, pull_image=True, **kwargs):
        script_parameters = {"command": self.command_line}
        runtime_constraints = {}

        with Perf(metrics, "generatefiles %s" % self.name):
            if self.generatefiles["listing"]:
                vwd = arvados.collection.Collection(
                    api_client=self.arvrunner.api,
                    keep_client=self.arvrunner.keep_client,
                    num_retries=self.arvrunner.num_retries)
                script_parameters["task.vwd"] = {}
                generatemapper = InitialWorkDirPathMapper([self.generatefiles],
                                                          "",
                                                          "",
                                                          separateDirs=False)

                with Perf(metrics, "createfiles %s" % self.name):
                    for f, p in generatemapper.items():
                        if p.type == "CreateFile":
                            with vwd.open(p.target, "w") as n:
                                n.write(p.resolved.encode("utf-8"))

                with Perf(metrics, "generatefiles.save_new %s" % self.name):
                    vwd.save_new()

                for f, p in generatemapper.items():
                    if p.type == "File":
                        script_parameters["task.vwd"][p.target] = p.resolved
                    if p.type == "CreateFile":
                        script_parameters["task.vwd"][
                            p.target] = "$(task.keep)/%s/%s" % (
                                vwd.portable_data_hash(), p.target)

        script_parameters["task.env"] = {
            "TMPDIR": self.tmpdir,
            "HOME": self.outdir
        }
        if self.environment:
            script_parameters["task.env"].update(self.environment)

        if self.stdin:
            script_parameters["task.stdin"] = self.stdin

        if self.stdout:
            script_parameters["task.stdout"] = self.stdout

        if self.stderr:
            script_parameters["task.stderr"] = self.stderr

        if self.successCodes:
            script_parameters["task.successCodes"] = self.successCodes
        if self.temporaryFailCodes:
            script_parameters[
                "task.temporaryFailCodes"] = self.temporaryFailCodes
        if self.permanentFailCodes:
            script_parameters[
                "task.permanentFailCodes"] = self.permanentFailCodes

        with Perf(metrics, "arv_docker_get_image %s" % self.name):
            (docker_req, docker_is_req) = get_feature(self,
                                                      "DockerRequirement")
            if docker_req and kwargs.get("use_container") is not False:
                if docker_req.get("dockerOutputDirectory"):
                    raise UnsupportedRequirement(
                        "Option 'dockerOutputDirectory' of DockerRequirement not supported."
                    )
                runtime_constraints["docker_image"] = arv_docker_get_image(
                    self.arvrunner.api, docker_req, pull_image,
                    self.arvrunner.project_uuid)
            else:
                runtime_constraints["docker_image"] = arvados_jobs_image(
                    self.arvrunner)

        resources = self.builder.resources
        if resources is not None:
            runtime_constraints["min_cores_per_node"] = resources.get(
                "cores", 1)
            runtime_constraints["min_ram_mb_per_node"] = resources.get("ram")
            runtime_constraints["min_scratch_mb_per_node"] = resources.get(
                "tmpdirSize", 0) + resources.get("outdirSize", 0)

        runtime_req, _ = get_feature(
            self, "http://arvados.org/cwl#RuntimeConstraints")
        if runtime_req:
            if "keep_cache" in runtime_req:
                runtime_constraints["keep_cache_mb_per_task"] = runtime_req[
                    "keep_cache"]
            if "outputDirType" in runtime_req:
                if runtime_req["outputDirType"] == "local_output_dir":
                    script_parameters["task.keepTmpOutput"] = False
                elif runtime_req["outputDirType"] == "keep_output_dir":
                    script_parameters["task.keepTmpOutput"] = True

        filters = [["repository", "=", "arvados"],
                   ["script", "=", "crunchrunner"],
                   [
                       "script_version", "in git",
                       "9e5b98e8f5f4727856b53447191f9c06e3da2ba6"
                   ]]
        if not self.arvrunner.ignore_docker_for_reuse:
            filters.append([
                "docker_image_locator", "in docker",
                runtime_constraints["docker_image"]
            ])

        try:
            with Perf(metrics, "create %s" % self.name):
                response = self.arvrunner.api.jobs().create(
                    body={
                        "owner_uuid": self.arvrunner.project_uuid,
                        "script": "crunchrunner",
                        "repository": "arvados",
                        "script_version": "master",
                        "minimum_script_version":
                        "9e5b98e8f5f4727856b53447191f9c06e3da2ba6",
                        "script_parameters": {
                            "tasks": [script_parameters]
                        },
                        "runtime_constraints": runtime_constraints
                    },
                    filters=filters,
                    find_or_create=kwargs.get(
                        "enable_reuse",
                        True)).execute(num_retries=self.arvrunner.num_retries)

            self.arvrunner.processes[response["uuid"]] = self

            self.update_pipeline_component(response)

            logger.info("Job %s (%s) is %s", self.name, response["uuid"],
                        response["state"])

            if response["state"] in ("Complete", "Failed", "Cancelled"):
                with Perf(metrics, "done %s" % self.name):
                    self.done(response)
        except Exception as e:
            logger.exception("Job %s error" % (self.name))
            self.output_callback({}, "permanentFail")
Esempio n. 19
0
    def run(self, dry_run=False, pull_image=True, **kwargs):
        container_request = {
            "command": self.command_line,
            "owner_uuid": self.arvrunner.project_uuid,
            "name": self.name,
            "output_path": self.outdir,
            "cwd": self.outdir,
            "priority": 1,
            "state": "Committed",
            "properties": {}
        }
        runtime_constraints = {}
        mounts = {self.outdir: {"kind": "tmp"}}
        scheduling_parameters = {}

        dirs = set()
        for f in self.pathmapper.files():
            pdh, p, tp = self.pathmapper.mapper(f)
            if tp == "Directory" and '/' not in pdh:
                mounts[p] = {
                    "kind": "collection",
                    "portable_data_hash": pdh[5:]
                }
                dirs.add(pdh)

        for f in self.pathmapper.files():
            res, p, tp = self.pathmapper.mapper(f)
            if res.startswith("keep:"):
                res = res[5:]
            elif res.startswith("/keep/"):
                res = res[6:]
            else:
                continue
            sp = res.split("/", 1)
            pdh = sp[0]
            if pdh not in dirs:
                mounts[p] = {"kind": "collection", "portable_data_hash": pdh}
                if len(sp) == 2:
                    mounts[p]["path"] = sp[1]

        with Perf(metrics, "generatefiles %s" % self.name):
            if self.generatefiles["listing"]:
                vwd = arvados.collection.Collection(
                    api_client=self.arvrunner.api,
                    keep_client=self.arvrunner.keep_client,
                    num_retries=self.arvrunner.num_retries)
                generatemapper = NoFollowPathMapper([self.generatefiles],
                                                    "",
                                                    "",
                                                    separateDirs=False)

                with Perf(metrics, "createfiles %s" % self.name):
                    for f, p in generatemapper.items():
                        if not p.target:
                            pass
                        elif p.type in ("File", "Directory"):
                            source, path = self.arvrunner.fs_access.get_collection(
                                p.resolved)
                            vwd.copy(path, p.target, source_collection=source)
                        elif p.type == "CreateFile":
                            with vwd.open(p.target, "w") as n:
                                n.write(p.resolved.encode("utf-8"))

                with Perf(metrics, "generatefiles.save_new %s" % self.name):
                    vwd.save_new()

                for f, p in generatemapper.items():
                    if not p.target:
                        continue
                    mountpoint = "%s/%s" % (self.outdir, p.target)
                    mounts[mountpoint] = {
                        "kind": "collection",
                        "portable_data_hash": vwd.portable_data_hash(),
                        "path": p.target
                    }

        container_request["environment"] = {
            "TMPDIR": self.tmpdir,
            "HOME": self.outdir
        }
        if self.environment:
            container_request["environment"].update(self.environment)

        if self.stdin:
            raise UnsupportedRequirement(
                "Stdin redirection currently not suppported")

        if self.stderr:
            raise UnsupportedRequirement(
                "Stderr redirection currently not suppported")

        if self.stdout:
            mounts["stdout"] = {
                "kind": "file",
                "path": "%s/%s" % (self.outdir, self.stdout)
            }

        (docker_req, docker_is_req) = get_feature(self, "DockerRequirement")
        if not docker_req:
            docker_req = {"dockerImageId": "arvados/jobs"}

        container_request["container_image"] = arv_docker_get_image(
            self.arvrunner.api, docker_req, pull_image,
            self.arvrunner.project_uuid)

        resources = self.builder.resources
        if resources is not None:
            runtime_constraints["vcpus"] = resources.get("cores", 1)
            runtime_constraints["ram"] = resources.get("ram") * 2**20

        api_req, _ = get_feature(self, "http://arvados.org/cwl#APIRequirement")
        if api_req:
            runtime_constraints["API"] = True

        runtime_req, _ = get_feature(
            self, "http://arvados.org/cwl#RuntimeConstraints")
        if runtime_req:
            if "keep_cache" in runtime_req:
                runtime_constraints[
                    "keep_cache_ram"] = runtime_req["keep_cache"] * 2**20

        partition_req, _ = get_feature(
            self, "http://arvados.org/cwl#PartitionRequirement")
        if partition_req:
            scheduling_parameters["partitions"] = aslist(
                partition_req["partition"])

        container_request["mounts"] = mounts
        container_request["runtime_constraints"] = runtime_constraints
        container_request["use_existing"] = kwargs.get("enable_reuse", True)
        container_request["scheduling_parameters"] = scheduling_parameters

        if kwargs.get("runnerjob", "").startswith("arvwf:"):
            wfuuid = kwargs["runnerjob"][6:kwargs["runnerjob"].index("#")]
            wfrecord = self.arvrunner.api.workflows().get(uuid=wfuuid).execute(
                num_retries=self.arvrunner.num_retries)
            if container_request["name"] == "main":
                container_request["name"] = wfrecord["name"]
            container_request["properties"]["template_uuid"] = wfuuid

        try:
            response = self.arvrunner.api.container_requests().create(
                body=container_request).execute(
                    num_retries=self.arvrunner.num_retries)

            self.uuid = response["uuid"]
            self.arvrunner.processes[self.uuid] = self

            if response["state"] == "Final":
                logger.info("%s reused container %s",
                            self.arvrunner.label(self),
                            response["container_uuid"])
                self.done(response)
            else:
                logger.info("%s %s state is %s", self.arvrunner.label(self),
                            response["uuid"], response["state"])
        except Exception as e:
            logger.error("%s got error %s" %
                         (self.arvrunner.label(self), str(e)))
            self.output_callback({}, "permanentFail")