Example #1
0
def make_builder(joborder, hints, requirements, runtimeContext, metadata):
    return Builder(
        job=joborder,
        files=[],  # type: List[Dict[Text, Text]]
        bindings=[],  # type: List[Dict[Text, Any]]
        schemaDefs={},  # type: Dict[Text, Dict[Text, Any]]
        names=None,  # type: Names
        requirements=requirements,  # type: List[Dict[Text, Any]]
        hints=hints,  # type: List[Dict[Text, Any]]
        resources={},  # type: Dict[str, int]
        mutation_manager=None,  # type: Optional[MutationManager]
        formatgraph=None,  # type: Optional[Graph]
        make_fs_access=None,  # type: Type[StdFsAccess]
        fs_access=None,  # type: StdFsAccess
        job_script_provider=runtimeContext.
        job_script_provider,  # type: Optional[Any]
        timeout=runtimeContext.eval_timeout,  # type: float
        debug=runtimeContext.debug,  # type: bool
        js_console=runtimeContext.js_console,  # type: bool
        force_docker_pull=runtimeContext.force_docker_pull,  # type: bool
        loadListing="",  # type: Text
        outdir="",  # type: Text
        tmpdir="",  # type: Text
        stagedir="",  # type: Text
        cwlVersion=metadata.get(
            "http://commonwl.org/cwltool#original_cwlVersion")
        or metadata.get("cwlVersion"),
        container_engine="docker")
Example #2
0
def _makebuilder(cudaReq: CWLObjectType) -> Builder:
    return Builder(
        {},
        [],
        [],
        {},
        schema.Names(),
        [cudaReq],
        [],
        {"cudaDeviceCount": 1},
        None,
        None,
        StdFsAccess,
        StdFsAccess(""),
        None,
        0.1,
        False,
        False,
        False,
        "",
        "",
        "",
        "",
        INTERNAL_VERSION,
        "docker",
    )
Example #3
0
    def job(self, joborder, output_callback, runtimeContext):
        req, _ = self.get_requirement(
            "http://arvados.org/cwl#RunInSingleContainer")
        if req:
            with SourceLine(self.tool, None, WorkflowException,
                            logger.isEnabledFor(logging.DEBUG)):
                if "id" not in self.tool:
                    raise WorkflowException("%s object must have 'id'" %
                                            (self.tool["class"]))
            document_loader, workflowobj, uri = (self.doc_loader,
                                                 self.doc_loader.fetch(
                                                     self.tool["id"]),
                                                 self.tool["id"])

            discover_secondary_files(self.tool["inputs"], joborder)

            with Perf(metrics, "subworkflow upload_deps"):
                upload_dependencies(self.arvrunner,
                                    os.path.basename(joborder.get("id", "#")),
                                    document_loader, joborder,
                                    joborder.get("id", "#"), False)

                if self.wf_pdh is None:
                    workflowobj["requirements"] = dedup_reqs(self.requirements)
                    workflowobj["hints"] = dedup_reqs(self.hints)

                    packed = pack(document_loader, workflowobj, uri,
                                  self.metadata)

                    builder = Builder(joborder,
                                      requirements=workflowobj["requirements"],
                                      hints=workflowobj["hints"],
                                      resources={})

                    def visit(item):
                        for t in ("hints", "requirements"):
                            if t not in item:
                                continue
                            for req in item[t]:
                                if req["class"] == "ResourceRequirement":
                                    dyn = False
                                    for k in max_res_pars + sum_res_pars:
                                        if k in req:
                                            if isinstance(req[k], basestring):
                                                if item["id"] == "#main":
                                                    # only the top-level requirements/hints may contain expressions
                                                    self.dynamic_resource_req.append(
                                                        req)
                                                    dyn = True
                                                    break
                                                else:
                                                    with SourceLine(
                                                            req, k,
                                                            WorkflowException):
                                                        raise WorkflowException(
                                                            "Non-top-level ResourceRequirement in single container cannot have expressions"
                                                        )
                                    if not dyn:
                                        self.static_resource_req.append(req)

                    visit_class(packed["$graph"],
                                ("Workflow", "CommandLineTool"), visit)

                    if self.static_resource_req:
                        self.static_resource_req = [
                            get_overall_res_req(self.static_resource_req)
                        ]

                    upload_dependencies(self.arvrunner, runtimeContext.name,
                                        document_loader, packed, uri, False)

                    # Discover files/directories referenced by the
                    # workflow (mainly "default" values)
                    visit_class(packed, ("File", "Directory"),
                                self.wf_reffiles.append)

            if self.dynamic_resource_req:
                builder = Builder(joborder,
                                  requirements=self.requirements,
                                  hints=self.hints,
                                  resources={})

                # Evaluate dynamic resource requirements using current builder
                rs = copy.copy(self.static_resource_req)
                for dyn_rs in self.dynamic_resource_req:
                    eval_req = {"class": "ResourceRequirement"}
                    for a in max_res_pars + sum_res_pars:
                        if a in dyn_rs:
                            eval_req[a] = builder.do_eval(dyn_rs[a])
                    rs.append(eval_req)
                job_res_reqs = [get_overall_res_req(rs)]
            else:
                job_res_reqs = self.static_resource_req

            with Perf(metrics, "subworkflow adjust"):
                joborder_resolved = copy.deepcopy(joborder)
                joborder_keepmount = copy.deepcopy(joborder)

                reffiles = []
                visit_class(joborder_keepmount, ("File", "Directory"),
                            reffiles.append)

                mapper = ArvPathMapper(self.arvrunner,
                                       reffiles + self.wf_reffiles,
                                       runtimeContext.basedir, "/keep/%s",
                                       "/keep/%s/%s")

                # For containers API, we need to make sure any extra
                # referenced files (ie referenced by the workflow but
                # not in the inputs) are included in the mounts.
                if self.wf_reffiles:
                    runtimeContext = runtimeContext.copy()
                    runtimeContext.extra_reffiles = copy.deepcopy(
                        self.wf_reffiles)

                def keepmount(obj):
                    remove_redundant_fields(obj)
                    with SourceLine(obj, None, WorkflowException,
                                    logger.isEnabledFor(logging.DEBUG)):
                        if "location" not in obj:
                            raise WorkflowException(
                                "%s object is missing required 'location' field: %s"
                                % (obj["class"], obj))
                    with SourceLine(obj, "location", WorkflowException,
                                    logger.isEnabledFor(logging.DEBUG)):
                        if obj["location"].startswith("keep:"):
                            obj["location"] = mapper.mapper(
                                obj["location"]).target
                            if "listing" in obj:
                                del obj["listing"]
                        elif obj["location"].startswith("_:"):
                            del obj["location"]
                        else:
                            raise WorkflowException(
                                "Location is not a keep reference or a literal: '%s'"
                                % obj["location"])

                visit_class(joborder_keepmount, ("File", "Directory"),
                            keepmount)

                def resolved(obj):
                    if obj["location"].startswith("keep:"):
                        obj["location"] = mapper.mapper(
                            obj["location"]).resolved

                visit_class(joborder_resolved, ("File", "Directory"), resolved)

                if self.wf_pdh is None:
                    adjustFileObjs(packed, keepmount)
                    adjustDirObjs(packed, keepmount)
                    self.wf_pdh = upload_workflow_collection(
                        self.arvrunner, shortname(self.tool["id"]), packed)

            wf_runner = cmap({
                "class":
                "CommandLineTool",
                "baseCommand":
                "cwltool",
                "inputs":
                self.tool["inputs"],
                "outputs":
                self.tool["outputs"],
                "stdout":
                "cwl.output.json",
                "requirements":
                self.requirements + job_res_reqs +
                [{
                    "class": "InlineJavascriptRequirement"
                }, {
                    "class":
                    "InitialWorkDirRequirement",
                    "listing": [{
                        "entryname":
                        "workflow.cwl",
                        "entry":
                        '$({"class": "File", "location": "keep:%s/workflow.cwl"})'
                        % self.wf_pdh
                    }, {
                        "entryname":
                        "cwl.input.yml",
                        "entry":
                        json.dumps(joborder_keepmount,
                                   indent=2,
                                   sort_keys=True,
                                   separators=(',', ': ')).replace(
                                       "\\", "\\\\").replace(
                                           '$(', '\$(').replace('${', '\${')
                    }]
                }],
                "hints":
                self.hints,
                "arguments": [
                    "--no-container", "--move-outputs",
                    "--preserve-entire-environment", "workflow.cwl#main",
                    "cwl.input.yml"
                ],
                "id":
                "#"
            })
            return ArvadosCommandTool(self.arvrunner, wf_runner,
                                      self.loadingContext).job(
                                          joborder_resolved, output_callback,
                                          runtimeContext)
        else:
            return super(ArvadosWorkflow, self).job(joborder, output_callback,
                                                    runtimeContext)
Example #4
0
def test_docker_tmpdir_prefix(tmp_path: Path) -> None:
    """Test that DockerCommandLineJob respects temp directory directives."""
    (tmp_path / "3").mkdir()
    tmpdir_prefix = str(tmp_path / "3" / "ttmp")
    runtime_context = RuntimeContext({
        "tmpdir_prefix": tmpdir_prefix,
        "user_space_docker_cmd": None
    })
    builder = Builder(
        {},
        [],
        [],
        {},
        schema.Names(),
        [],
        [],
        {},
        None,
        None,
        StdFsAccess,
        StdFsAccess(""),
        None,
        0.1,
        False,
        False,
        False,
        "",
        runtime_context.get_outdir(),
        runtime_context.get_tmpdir(),
        runtime_context.get_stagedir(),
        INTERNAL_VERSION,
    )
    job = DockerCommandLineJob(builder, {}, PathMapper, [], [], "")
    runtime: List[str] = []

    volume_writable_file = MapperEnt(resolved=get_data("tests/2.fastq"),
                                     target="foo",
                                     type=None,
                                     staged=None)
    (tmp_path / "1").mkdir()
    job.add_writable_file_volume(runtime, volume_writable_file, None,
                                 str(tmp_path / "1" / "writable_file"))
    children = sorted((tmp_path / "1").glob("*"))
    assert len(children) == 1
    subdir = tmp_path / children[0]
    assert subdir.name.startswith("writable_file")
    assert len(sorted(subdir.glob("*"))) == 1
    assert (subdir / "2.fastq").exists()

    resolved_writable_dir = tmp_path / "data_orig"
    resolved_writable_dir.mkdir(parents=True)
    volume_dir = MapperEnt(resolved=str(resolved_writable_dir),
                           target="bar",
                           type=None,
                           staged=None)
    (tmp_path / "2").mkdir()
    job.add_writable_directory_volume(runtime, volume_dir, None,
                                      str(tmp_path / "2" / "dir"))
    children = sorted((tmp_path / "2").glob("*"))
    assert len(children) == 1
    subdir = tmp_path / "2" / children[0]
    assert subdir.name.startswith("dir")
    assert len(sorted(subdir.glob("*"))) == 1
    assert (subdir / "data_orig").exists()

    cidfile = job.create_runtime({}, runtime_context)[1]
    assert cidfile and cidfile.startswith(tmpdir_prefix)

    volume_file = MapperEnt(resolved="Hoopla!",
                            target="baz",
                            type=None,
                            staged=None)
    (tmp_path / "4").mkdir()
    job.create_file_and_add_volume(runtime, volume_file, None, None,
                                   str(tmp_path / "4" / "file"))
    children = sorted((tmp_path / "4").glob("*"))
    assert len(children) == 1
    subdir = tmp_path / "4" / children[0]
    assert subdir.name.startswith("file")
    assert len(sorted(subdir.glob("*"))) == 1
    assert (subdir / "baz").exists()
Example #5
0
    def job(self, joborder, output_callback, runtimeContext):
        req, _ = self.get_requirement("http://arvados.org/cwl#RunInSingleContainer")
        if req:
            with SourceLine(self.tool, None, WorkflowException, logger.isEnabledFor(logging.DEBUG)):
                if "id" not in self.tool:
                    raise WorkflowException("%s object must have 'id'" % (self.tool["class"]))
            document_loader, workflowobj, uri = (self.doc_loader, self.doc_loader.fetch(self.tool["id"]), self.tool["id"])

            discover_secondary_files(self.tool["inputs"], joborder)

            with Perf(metrics, "subworkflow upload_deps"):
                upload_dependencies(self.arvrunner,
                                    os.path.basename(joborder.get("id", "#")),
                                    document_loader,
                                    joborder,
                                    joborder.get("id", "#"),
                                    False)

                if self.wf_pdh is None:
                    workflowobj["requirements"] = dedup_reqs(self.requirements)
                    workflowobj["hints"] = dedup_reqs(self.hints)

                    packed = pack(document_loader, workflowobj, uri, self.metadata)

                    builder = Builder(joborder,
                                      requirements=workflowobj["requirements"],
                                      hints=workflowobj["hints"],
                                      resources={})

                    def visit(item):
                        for t in ("hints", "requirements"):
                            if t not in item:
                                continue
                            for req in item[t]:
                                if req["class"] == "ResourceRequirement":
                                    dyn = False
                                    for k in max_res_pars + sum_res_pars:
                                        if k in req:
                                            if isinstance(req[k], basestring):
                                                if item["id"] == "#main":
                                                    # only the top-level requirements/hints may contain expressions
                                                    self.dynamic_resource_req.append(req)
                                                    dyn = True
                                                    break
                                                else:
                                                    with SourceLine(req, k, WorkflowException):
                                                        raise WorkflowException("Non-top-level ResourceRequirement in single container cannot have expressions")
                                    if not dyn:
                                        self.static_resource_req.append(req)

                    visit_class(packed["$graph"], ("Workflow", "CommandLineTool"), visit)

                    if self.static_resource_req:
                        self.static_resource_req = [get_overall_res_req(self.static_resource_req)]

                    upload_dependencies(self.arvrunner,
                                        runtimeContext.name,
                                        document_loader,
                                        packed,
                                        uri,
                                        False)

                    # Discover files/directories referenced by the
                    # workflow (mainly "default" values)
                    visit_class(packed, ("File", "Directory"), self.wf_reffiles.append)


            if self.dynamic_resource_req:
                builder = Builder(joborder,
                                  requirements=self.requirements,
                                  hints=self.hints,
                                  resources={})

                # Evaluate dynamic resource requirements using current builder
                rs = copy.copy(self.static_resource_req)
                for dyn_rs in self.dynamic_resource_req:
                    eval_req = {"class": "ResourceRequirement"}
                    for a in max_res_pars + sum_res_pars:
                        if a in dyn_rs:
                            eval_req[a] = builder.do_eval(dyn_rs[a])
                    rs.append(eval_req)
                job_res_reqs = [get_overall_res_req(rs)]
            else:
                job_res_reqs = self.static_resource_req

            with Perf(metrics, "subworkflow adjust"):
                joborder_resolved = copy.deepcopy(joborder)
                joborder_keepmount = copy.deepcopy(joborder)

                reffiles = []
                visit_class(joborder_keepmount, ("File", "Directory"), reffiles.append)

                mapper = ArvPathMapper(self.arvrunner, reffiles+self.wf_reffiles, runtimeContext.basedir,
                                       "/keep/%s",
                                       "/keep/%s/%s")

                # For containers API, we need to make sure any extra
                # referenced files (ie referenced by the workflow but
                # not in the inputs) are included in the mounts.
                if self.wf_reffiles:
                    runtimeContext = runtimeContext.copy()
                    runtimeContext.extra_reffiles = copy.deepcopy(self.wf_reffiles)

                def keepmount(obj):
                    remove_redundant_fields(obj)
                    with SourceLine(obj, None, WorkflowException, logger.isEnabledFor(logging.DEBUG)):
                        if "location" not in obj:
                            raise WorkflowException("%s object is missing required 'location' field: %s" % (obj["class"], obj))
                    with SourceLine(obj, "location", WorkflowException, logger.isEnabledFor(logging.DEBUG)):
                        if obj["location"].startswith("keep:"):
                            obj["location"] = mapper.mapper(obj["location"]).target
                            if "listing" in obj:
                                del obj["listing"]
                        elif obj["location"].startswith("_:"):
                            del obj["location"]
                        else:
                            raise WorkflowException("Location is not a keep reference or a literal: '%s'" % obj["location"])

                visit_class(joborder_keepmount, ("File", "Directory"), keepmount)

                def resolved(obj):
                    if obj["location"].startswith("keep:"):
                        obj["location"] = mapper.mapper(obj["location"]).resolved

                visit_class(joborder_resolved, ("File", "Directory"), resolved)

                if self.wf_pdh is None:
                    adjustFileObjs(packed, keepmount)
                    adjustDirObjs(packed, keepmount)
                    self.wf_pdh = upload_workflow_collection(self.arvrunner, shortname(self.tool["id"]), packed)

            wf_runner = cmap({
                "class": "CommandLineTool",
                "baseCommand": "cwltool",
                "inputs": self.tool["inputs"],
                "outputs": self.tool["outputs"],
                "stdout": "cwl.output.json",
                "requirements": self.requirements+job_res_reqs+[
                    {"class": "InlineJavascriptRequirement"},
                    {
                    "class": "InitialWorkDirRequirement",
                    "listing": [{
                            "entryname": "workflow.cwl",
                            "entry": '$({"class": "File", "location": "keep:%s/workflow.cwl"})' % self.wf_pdh
                        }, {
                            "entryname": "cwl.input.yml",
                            "entry": json.dumps(joborder_keepmount, indent=2, sort_keys=True, separators=(',',': ')).replace("\\", "\\\\").replace('$(', '\$(').replace('${', '\${')
                        }]
                }],
                "hints": self.hints,
                "arguments": ["--no-container", "--move-outputs", "--preserve-entire-environment", "workflow.cwl#main", "cwl.input.yml"],
                "id": "#"
            })
            return ArvadosCommandTool(self.arvrunner, wf_runner, self.loadingContext).job(joborder_resolved, output_callback, runtimeContext)
        else:
            return super(ArvadosWorkflow, self).job(joborder, output_callback, runtimeContext)