Ejemplo n.º 1
0
def wf3():
    wf = Workflow("test")
    wf.add_jobs(Job("ls"))
    wf.add_site_catalog(SiteCatalog())
    wf.add_transformation_catalog(TransformationCatalog())
    wf.add_replica_catalog(ReplicaCatalog())

    return wf
Ejemplo n.º 2
0
    def test_job_id_assignment_by_workflow(self):
        wf = Workflow("wf")
        j1 = Job("t1", _id="a")
        j2 = Job("t2")
        j3 = Job("t3", _id="b")
        j4 = Job("t4")
        j5 = Job("t5")
        wf.add_jobs(j1, j2, j3, j4, j5)

        assert j2._id == "ID0000001"
        assert j4._id == "ID0000002"
        assert j5._id == "ID0000003"
Ejemplo n.º 3
0
    def test_write_valid_hierarchical_workflow(self, mocker):
        mocker.patch("Pegasus.api.workflow.Workflow.write")

        try:
            wf = Workflow("test")
            wf.add_jobs(SubWorkflow("file", False))
            wf.write(file="workflow.yml", _format="yml")
        except PegasusError:
            pytest.fail("shouldn't have thrown PegasusError")

        Pegasus.api.workflow.Workflow.write.assert_called_once_with(
            file="workflow.yml", _format="yml")
Ejemplo n.º 4
0
    def test_add_duplicate_parent_dependency(self):
        wf = Workflow("wf")
        job = Job("t", _id="job")
        parent = Job("t", _id="parent")

        wf.add_jobs(job, parent)

        with pytest.raises(DuplicateError) as e:
            wf.add_dependency(job, parents=[parent, parent])

        assert (
            "A dependency already exists between parent id: parent and job id: job"
            in str(e))
Ejemplo n.º 5
0
    def test_add_duplicate_child_dependency(self):
        wf = Workflow("wf")
        job = Job("t", _id="job")
        child = Job("t", _id="child")

        wf.add_jobs(job, child)

        with pytest.raises(DuplicateError) as e:
            wf.add_dependency(job, children=[child, child])

        assert (
            "A dependency already exists between job id: job and child id: child"
            in str(e))
Ejemplo n.º 6
0
    def test_infer_dependencies_when_job_uses_stdin_stdout_and_stderr(self):
        wf = Workflow("wf")
        j1 = Job("t1", _id="j1").add_outputs(File("f1"))
        j2 = Job("t1",
                 _id="j2").set_stdin(*j1.get_outputs()).set_stdout(File("f2"))
        j3 = Job("t1", _id="j3").add_inputs(*j2.get_outputs())
        wf.add_jobs(j1, j2, j3)

        # manually call _infer_dependencies() as it is only called when
        # wf.write() is called
        wf._infer_dependencies()

        assert wf.dependencies["j1"] == _JobDependency("j1", {"j2"})
        assert wf.dependencies["j2"] == _JobDependency("j2", {"j3"})
Ejemplo n.º 7
0
    def test_add_dependency_parents(self):
        wf = Workflow("wf")
        job = Job("t", _id="job")
        parents = [
            Job("t", _id="parent1"),
            Job("t", _id="parent2"),
            Job("t", _id="parent3"),
        ]

        wf.add_jobs(job, *parents)

        wf.add_dependency(job, parents=[parents[0]])
        wf.add_dependency(job, parents=parents[1:])

        for parent in parents:
            assert wf.dependencies[parent._id] == _JobDependency(
                parent._id, {job._id})
Ejemplo n.º 8
0
    def test_write_hierarchical_workflow_when_catalogs_are_inlined(
            self, sc, tc):
        wf = Workflow("test")
        wf.add_jobs(SubWorkflow("file", False))

        if sc:
            wf.add_site_catalog(sc)

        if tc:
            wf.add_transformation_catalog(tc)

        with pytest.raises(PegasusError) as e:
            wf.write()

        assert (
            "Site Catalog and Transformation Catalog must be written as a separate"
            in str(e))
Ejemplo n.º 9
0
    def test_add_dependency_children(self):
        wf = Workflow("wf")
        job = Job("t", _id="job")
        children = [
            Job("t", _id="child1"),
            Job("t", _id="child2"),
            Job("t", _id="child3"),
        ]

        wf.add_jobs(job, *children)

        wf.add_dependency(job, children=[children[0]])
        assert wf.dependencies[job._id] == _JobDependency(
            job._id, {children[0]._id})

        wf.add_dependency(job, children=children[1:])
        assert wf.dependencies[job._id] == _JobDependency(
            job._id, {child._id
                      for child in children})
Ejemplo n.º 10
0
    def test_workflow_key_ordering_on_yml_write(self):
        tc = TransformationCatalog()
        rc = ReplicaCatalog()
        sc = SiteCatalog()

        wf = Workflow("wf")
        wf.add_transformation_catalog(tc)
        wf.add_replica_catalog(rc)
        wf.add_site_catalog(sc)

        wf.add_jobs(Job("t1", _id="a"))

        wf.add_env(JAVA_HOME="/java/home")
        wf.add_shell_hook(EventType.START, "/bin/echo hi")
        wf.add_metadata(key="value")

        wf.write()
        EXPECTED_FILE = Path("workflow.yml")

        with EXPECTED_FILE.open() as f:
            # reading in as str so ordering of keys is not disrupted
            # when loaded into a dict
            result = f.read()

        EXPECTED_FILE.unlink()
        """
        Check that wf keys have been ordered as follows (while ignoring nested keys):
        - pegasus,
        - name,
        - hooks,
        - profiles,
        - metadata,
        - siteCatalog,
        - replicaCatalog,
        - transformationCatalog,
        - jobs
        - jobDependencies
        """
        p = re.compile(
            r"pegasus: '5.0'[\w\W]+name:[\w\W]+hooks:[\w\W]+profiles:[\w\W]+metadata:[\w\W]+siteCatalog:[\w\W]+replicaCatalog:[\w\W]+transformationCatalog:[\w\W]+jobs:[\w\W]+jobDependencies:[\w\W]+"
        )
        assert p.match(result) is not None
Ejemplo n.º 11
0
    def test_write_wf_catalogs_included(self):
        wf = Workflow("test")
        wf.add_jobs(Job("ls"))

        wf.add_transformation_catalog(TransformationCatalog())
        wf.add_site_catalog(SiteCatalog())
        wf.add_replica_catalog(ReplicaCatalog())

        wf_path = Path("workflow.yml")
        with wf_path.open("w+") as f:
            wf.write(f)
            f.seek(0)
            result = yaml.load(f)

        expected = {
            "pegasus":
            "5.0",
            "name":
            "test",
            "siteCatalog": {
                "sites": []
            },
            "replicaCatalog": {
                "replicas": []
            },
            "transformationCatalog": {
                "transformations": []
            },
            "jobs": [{
                "type": "job",
                "name": "ls",
                "id": "ID0000001",
                "arguments": [],
                "uses": [],
            }],
            "jobDependencies": [],
        }

        assert expected == result

        wf_path.unlink()
Ejemplo n.º 12
0
def wf():
    wf = Workflow("wf")

    j1 = (Job("t1", _id="a").add_outputs(File("f1"), File("f2")).add_args(
        File("do-nothing"), "-n", 1,
        1.1).set_stdin("stdin").set_stdout("stdout").set_stderr("stderr"))
    j2 = (Job("t1", _id="b").add_inputs(File("f1"), File("f2")).add_checkpoint(
        File("checkpoint")))
    j3 = SubWorkflow("subworkflow.dag", True,
                     _id="c").add_args("--sites", "condorpool")
    j4 = SubWorkflow(File("subworkflow.dax"), False, _id="d")

    wf.add_jobs(j1, j2, j3, j4)

    wf._infer_dependencies()

    wf.add_env(JAVA_HOME="/java/home")
    wf.add_shell_hook(EventType.START, "/bin/echo hi")
    wf.add_metadata(key="value")

    return wf
Ejemplo n.º 13
0
    def test_add_dependency_parents_and_children(self):
        wf = Workflow("wf")
        job = Job("t", _id="job")
        parents = [Job("t", _id="parent1"), Job("t", _id="parent2")]

        children = [Job("t", _id="child1"), Job("t", _id="child2")]

        wf.add_jobs(*parents, *children)

        # add nothing
        wf.add_dependency(job)
        assert len(wf.dependencies) == 0

        wf.add_dependency(job, parents=parents, children=children)

        for parent in parents:
            assert wf.dependencies[parent._id] == _JobDependency(
                parent._id, {job._id})

        assert wf.dependencies[job._id] == _JobDependency(
            job._id, {child._id
                      for child in children})
Ejemplo n.º 14
0
    def test_infer_dependencies_fork_join_wf(self):
        wf = Workflow("wf")

        f1 = File("f1")
        f2 = File("f2")
        f3 = File("f3")
        f4 = File("f4")

        fork = Job("t1", _id="fork").add_outputs(f1, f2)
        work1 = Job("t1", _id="work1").add_inputs(f1).add_outputs(f3)
        work2 = Job("t1", _id="work2").add_inputs(f2).add_outputs(f4)
        join = Job("t1", _id="join").add_inputs(f3, f4)
        wf.add_jobs(fork, work1, work2, join)

        # manually call _infer_dependencies() as it is only called when
        # wf.write() is called
        wf._infer_dependencies()

        assert wf.dependencies["fork"] == _JobDependency(
            "fork", {"work1", "work2"})
        assert wf.dependencies["work1"] == _JobDependency("work1", {"join"})
        assert wf.dependencies["work2"] == _JobDependency("work2", {"join"})
Ejemplo n.º 15
0
    def test_get_job(self):
        wf = Workflow("wf")
        j1 = Job("t1", _id="j1")
        wf.add_jobs(j1)

        assert j1 == wf.get_job("j1")
Ejemplo n.º 16
0
    def test_add_job(self, job):
        wf = Workflow("wf")
        wf.add_jobs(job)

        assert job == wf.get_job("job")
Ejemplo n.º 17
0
def _to_wf(d: dict) -> Workflow:
    """Convert dict to Workflow

    :param d: Workflow represented as a dict
    :type d: dict
    :raises PegasusError: encountered error parsing
    :return: a Workflow object based on d
    :rtype: Workflow
    """

    try:
        #
        wf = Workflow(d["name"], infer_dependencies=False)

        # add rc
        if "replicaCatalog" in d:
            wf.replica_catalog = _to_rc(d["replicaCatalog"])

        # add tc
        if "transformationCatalog" in d:
            wf.transformation_catalog = _to_tc(d["transformationCatalog"])

        # add sc
        if "siteCatalog" in d:
            wf.site_catalog = _to_sc(d["siteCatalog"])

        # add jobs
        for j in d["jobs"]:
            # create appropriate job based on type
            if j["type"] == "job":
                job = Job(
                    j["name"],
                    _id=j["id"],
                    node_label=j.get("nodeLabel"),
                    namespace=j.get("namespace"),
                    version=j.get("version"),
                )
            elif j["type"] in {"pegasusWorkflow", "condorWorkflow"}:
                f = File(j["file"])

                is_planned = False if j["type"] == "pegasusWorkflow" else True

                job = SubWorkflow(
                    f, is_planned, _id=j["id"], node_label=j.get("nodeLabel")
                )

            else:
                raise ValueError

            # add args
            args = list()
            for a in j["arguments"]:
                args.append(a)

            job.args = args

            # add uses
            uses = set()
            for u in j["uses"]:
                f = File(u["lfn"], size=u.get("size"))
                try:
                    f.metadata = u["metadata"]
                except KeyError:
                    pass

                uses.add(
                    _Use(
                        f,
                        getattr(_LinkType, u["type"].upper()),
                        stage_out=u.get("stageOut"),
                        register_replica=u.get("registerReplica"),
                        bypass_staging=u.get("bypass"),
                    )
                )

            job.uses = uses

            # set stdin
            if "stdin" in j:
                for u in job.uses:
                    if u.file.lfn == j["stdin"]:
                        job.stdin = u.file
                        break

            # set stdout
            if "stdout" in j:
                for u in job.uses:
                    if u.file.lfn == j["stdout"]:
                        job.stdout = u.file
                        break

            # set stderr
            if "stderr" in j:
                for u in job.uses:
                    if u.file.lfn == j["stderr"]:
                        job.stderr = u.file
                        break

            # add profiles
            if j.get("profiles"):
                job.profiles = defaultdict(dict, j.get("profiles"))

            # add metadata
            if j.get("metadata"):
                job.metadata = j.get("metadata")

            # add hooks
            if j.get("hooks"):
                job.hooks = defaultdict(list, j.get("hooks"))

            # add job to wf
            wf.add_jobs(job)

        # add dependencies
        if d.get("jobDependencies"):
            dependencies = defaultdict(_JobDependency)
            for item in d.get("jobDependencies"):
                dependencies[item["id"]] = _JobDependency(
                    item["id"], {child for child in item["children"]}
                )

            wf.dependencies = dependencies

        # add profiles
        if d.get("profiles"):
            wf.profiles = defaultdict(dict, d.get("profiles"))

        # add metadata
        if d.get("metadata"):
            wf.metadata = d.get("metadata")

        # add hooks
        if d.get("hooks"):
            wf.hooks = defaultdict(list, d.get("hooks"))

        return wf
    except (KeyError, ValueError):
        raise PegasusError("error parsing {}".format(d))
Ejemplo n.º 18
0
 def test_add_duplicate_job(self):
     wf = Workflow("wf")
     with pytest.raises(DuplicateError):
         wf.add_jobs(Job("t1", _id="j1"), Job("t2", _id="j1"))