Esempio n. 1
0
def wf2():
    _in = File("in")
    stdin = File("stdin")
    stdout = File("stdout")
    stderr = File("stderr")
    out = File("out")
    out2 = File("out2")

    pegasus_workflow = File("pegasus_workflow")
    condor_workflow = File("condor_workflow")

    j1 = (Job("tr", _id="1", node_label="test").add_args(
        "-i", _in, "-o", out).add_inputs(_in).add_outputs(out).set_stdin(
            stdin).set_stdout(stdout).set_stderr(stderr))

    j2 = (Job("tr2", _id="2", node_label="test").add_args(
        "-i", out, "-o", out2).add_inputs(out).add_outputs(out2))

    sbwf_pegasus = SubWorkflow(pegasus_workflow,
                               False,
                               _id="unplanned",
                               node_label="test").add_args("-flag", "-flag2")

    sbwf_condor = SubWorkflow(condor_workflow,
                              True,
                              _id="planned",
                              node_label="test")

    return (Workflow("test", infer_dependencies=False).add_jobs(
        j1, j2, sbwf_pegasus, sbwf_condor).add_dependency(j1, children=[j2]))
Esempio n. 2
0
    def test_add_dependency_invalid_child(self):
        wf = Workflow("wf")
        job = Job("t", _id="job")
        child = Job("t")

        with pytest.raises(ValueError) as e:
            wf.add_dependency(job, children=[child])

        assert "One of the given children does not have an id" in str(e)
Esempio n. 3
0
    def test_add_dependency_invalid_parent(self):
        wf = Workflow("wf")
        job = Job("t", _id="job")
        parent = Job("t")

        with pytest.raises(ValueError) as e:
            wf.add_dependency(job, parents=[parent])

        assert "One of the given parents does not have an id" in str(e)
Esempio n. 4
0
    def test_job_id_assignment_by_workflow(self):
        wf = Workflow("wf")
        j1 = Job("t1", _id="a")
        j2 = Job("t2")
        j3 = Job("t3", _id="b")
        j4 = Job("t4")
        j5 = Job("t5")
        wf.add_jobs(j1, j2, j3, j4, j5)

        assert j2._id == "ID0000001"
        assert j4._id == "ID0000002"
        assert j5._id == "ID0000003"
Esempio n. 5
0
    def test_add_duplicate_parent_dependency(self):
        wf = Workflow("wf")
        job = Job("t", _id="job")
        parent = Job("t", _id="parent")

        wf.add_jobs(job, parent)

        with pytest.raises(DuplicateError) as e:
            wf.add_dependency(job, parents=[parent, parent])

        assert (
            "A dependency already exists between parent id: parent and job id: job"
            in str(e))
Esempio n. 6
0
    def test_add_duplicate_child_dependency(self):
        wf = Workflow("wf")
        job = Job("t", _id="job")
        child = Job("t", _id="child")

        wf.add_jobs(job, child)

        with pytest.raises(DuplicateError) as e:
            wf.add_dependency(job, children=[child, child])

        assert (
            "A dependency already exists between job id: job and child id: child"
            in str(e))
Esempio n. 7
0
    def test_tojson_with_mixins(self):
        j = Job("t1")
        j.add_env(JAVA_HOME="/java/home")
        j.add_shell_hook(EventType.START, "/bin/echo hi")
        j.add_metadata(key="value")

        result = json.loads(json.dumps(j, cls=_CustomEncoder))
        expected = {
            "type": "job",
            "name": "t1",
            "arguments": [],
            "uses": [],
            "profiles": {
                Namespace.ENV.value: {
                    "JAVA_HOME": "/java/home"
                }
            },
            "hooks": {
                "shell": [{
                    "_on": EventType.START.value,
                    "cmd": "/bin/echo hi"
                }]
            },
            "metadata": {
                "key": "value"
            },
        }

        assert result == expected
Esempio n. 8
0
    def test_add_dependency_parents(self):
        wf = Workflow("wf")
        job = Job("t", _id="job")
        parents = [
            Job("t", _id="parent1"),
            Job("t", _id="parent2"),
            Job("t", _id="parent3"),
        ]

        wf.add_jobs(job, *parents)

        wf.add_dependency(job, parents=[parents[0]])
        wf.add_dependency(job, parents=parents[1:])

        for parent in parents:
            assert wf.dependencies[parent._id] == _JobDependency(
                parent._id, {job._id})
Esempio n. 9
0
    def test_add_dependency_invalid_job(self):
        wf = Workflow("wf")
        job = Job("t")

        with pytest.raises(ValueError) as e:
            wf.add_dependency(job)

        assert "The given job does not have an id" in str(e)
Esempio n. 10
0
def wf3():
    wf = Workflow("test")
    wf.add_jobs(Job("ls"))
    wf.add_site_catalog(SiteCatalog())
    wf.add_transformation_catalog(TransformationCatalog())
    wf.add_replica_catalog(ReplicaCatalog())

    return wf
Esempio n. 11
0
    def test_chaining(self):
        j = (Job("t1").add_args("-n5").add_inputs(File("if")).add_outputs(
            File("of")).set_stdin(File("stdin")).set_stdout(
                File("stdout")).set_stderr(File("stderr")))

        assert j.transformation == "t1"
        assert j.args == ["-n5"]
        assert j.get_inputs() == {File("if"), File("stdin")}
        assert j.get_outputs() == {File("of"), File("stdout"), File("stderr")}
Esempio n. 12
0
    def test_add_dependency_children(self):
        wf = Workflow("wf")
        job = Job("t", _id="job")
        children = [
            Job("t", _id="child1"),
            Job("t", _id="child2"),
            Job("t", _id="child3"),
        ]

        wf.add_jobs(job, *children)

        wf.add_dependency(job, children=[children[0]])
        assert wf.dependencies[job._id] == _JobDependency(
            job._id, {children[0]._id})

        wf.add_dependency(job, children=children[1:])
        assert wf.dependencies[job._id] == _JobDependency(
            job._id, {child._id
                      for child in children})
Esempio n. 13
0
def wf():
    wf = Workflow("wf")

    j1 = (Job("t1", _id="a").add_outputs(File("f1"), File("f2")).add_args(
        File("do-nothing"), "-n", 1,
        1.1).set_stdin("stdin").set_stdout("stdout").set_stderr("stderr"))
    j2 = (Job("t1", _id="b").add_inputs(File("f1"), File("f2")).add_checkpoint(
        File("checkpoint")))
    j3 = SubWorkflow("subworkflow.dag", True,
                     _id="c").add_args("--sites", "condorpool")
    j4 = SubWorkflow(File("subworkflow.dax"), False, _id="d")

    wf.add_jobs(j1, j2, j3, j4)

    wf._infer_dependencies()

    wf.add_env(JAVA_HOME="/java/home")
    wf.add_shell_hook(EventType.START, "/bin/echo hi")
    wf.add_metadata(key="value")

    return wf
Esempio n. 14
0
    def test_add_dependency_parents_and_children(self):
        wf = Workflow("wf")
        job = Job("t", _id="job")
        parents = [Job("t", _id="parent1"), Job("t", _id="parent2")]

        children = [Job("t", _id="child1"), Job("t", _id="child2")]

        wf.add_jobs(*parents, *children)

        # add nothing
        wf.add_dependency(job)
        assert len(wf.dependencies) == 0

        wf.add_dependency(job, parents=parents, children=children)

        for parent in parents:
            assert wf.dependencies[parent._id] == _JobDependency(
                parent._id, {job._id})

        assert wf.dependencies[job._id] == _JobDependency(
            job._id, {child._id
                      for child in children})
Esempio n. 15
0
    def test_infer_dependencies_fork_join_wf(self):
        wf = Workflow("wf")

        f1 = File("f1")
        f2 = File("f2")
        f3 = File("f3")
        f4 = File("f4")

        fork = Job("t1", _id="fork").add_outputs(f1, f2)
        work1 = Job("t1", _id="work1").add_inputs(f1).add_outputs(f3)
        work2 = Job("t1", _id="work2").add_inputs(f2).add_outputs(f4)
        join = Job("t1", _id="join").add_inputs(f3, f4)
        wf.add_jobs(fork, work1, work2, join)

        # manually call _infer_dependencies() as it is only called when
        # wf.write() is called
        wf._infer_dependencies()

        assert wf.dependencies["fork"] == _JobDependency(
            "fork", {"work1", "work2"})
        assert wf.dependencies["work1"] == _JobDependency("work1", {"join"})
        assert wf.dependencies["work2"] == _JobDependency("work2", {"join"})
Esempio n. 16
0
def wf1():
    in1 = File("in1", size=2048).add_metadata(createdBy="ryan")
    in2 = File("in2")
    stdin = File("stdin").add_metadata(size=1024)
    stdout = File("stdout").add_metadata(size=1024)
    stderr = File("stderr").add_metadata(size=1024)
    out = File("out").add_metadata(size=1024)
    out2 = File("out2").add_metadata(size=1024)

    pegasus_workflow = File("pegasus_workflow").add_metadata(size=2048)
    condor_workflow = File("condor_workflow").add_metadata(size=2048)

    j1 = (Job("tr", _id="1", node_label="test").add_args(
        "-i", in1, "-o", out, "-n", 1, 1.1).add_inputs(in1).add_inputs(
            in2, bypass_staging=True).add_outputs(out).set_stdin(
                stdin).set_stdout(stdout).set_stderr(stderr).add_shell_hook(
                    EventType.START,
                    "/cmd2").add_env(JAVA_HOME="/usr/bin/java").add_metadata(
                        xtra_info="123"))

    j2 = (Job("tr2", _id="2", node_label="test").add_args(
        "-i", out, "-o", out2).add_inputs(out).add_outputs(out2))

    sbwf_pegasus = SubWorkflow(pegasus_workflow,
                               False,
                               _id="unplanned",
                               node_label="test").add_args("-flag", "-flag2")

    sbwf_condor = SubWorkflow(condor_workflow,
                              True,
                              _id="planned",
                              node_label="test")

    return (Workflow("test", infer_dependencies=False).add_shell_hook(
        EventType.START, "/cmd").add_dagman_profile(retry=1).add_metadata(
            author="ryan").add_jobs(j1, j2, sbwf_pegasus,
                                    sbwf_condor).add_dependency(j1,
                                                                children=[j2]))
Esempio n. 17
0
    def test_infer_dependencies_when_job_uses_stdin_stdout_and_stderr(self):
        wf = Workflow("wf")
        j1 = Job("t1", _id="j1").add_outputs(File("f1"))
        j2 = Job("t1",
                 _id="j2").set_stdin(*j1.get_outputs()).set_stdout(File("f2"))
        j3 = Job("t1", _id="j3").add_inputs(*j2.get_outputs())
        wf.add_jobs(j1, j2, j3)

        # manually call _infer_dependencies() as it is only called when
        # wf.write() is called
        wf._infer_dependencies()

        assert wf.dependencies["j1"] == _JobDependency("j1", {"j2"})
        assert wf.dependencies["j2"] == _JobDependency("j2", {"j3"})
Esempio n. 18
0
    def test_workflow_key_ordering_on_yml_write(self):
        tc = TransformationCatalog()
        rc = ReplicaCatalog()
        sc = SiteCatalog()

        wf = Workflow("wf")
        wf.add_transformation_catalog(tc)
        wf.add_replica_catalog(rc)
        wf.add_site_catalog(sc)

        wf.add_jobs(Job("t1", _id="a"))

        wf.add_env(JAVA_HOME="/java/home")
        wf.add_shell_hook(EventType.START, "/bin/echo hi")
        wf.add_metadata(key="value")

        wf.write()
        EXPECTED_FILE = Path("workflow.yml")

        with EXPECTED_FILE.open() as f:
            # reading in as str so ordering of keys is not disrupted
            # when loaded into a dict
            result = f.read()

        EXPECTED_FILE.unlink()
        """
        Check that wf keys have been ordered as follows (while ignoring nested keys):
        - pegasus,
        - name,
        - hooks,
        - profiles,
        - metadata,
        - siteCatalog,
        - replicaCatalog,
        - transformationCatalog,
        - jobs
        - jobDependencies
        """
        p = re.compile(
            r"pegasus: '5.0'[\w\W]+name:[\w\W]+hooks:[\w\W]+profiles:[\w\W]+metadata:[\w\W]+siteCatalog:[\w\W]+replicaCatalog:[\w\W]+transformationCatalog:[\w\W]+jobs:[\w\W]+jobDependencies:[\w\W]+"
        )
        assert p.match(result) is not None
Esempio n. 19
0
    def test_write_wf_catalogs_included(self):
        wf = Workflow("test")
        wf.add_jobs(Job("ls"))

        wf.add_transformation_catalog(TransformationCatalog())
        wf.add_site_catalog(SiteCatalog())
        wf.add_replica_catalog(ReplicaCatalog())

        wf_path = Path("workflow.yml")
        with wf_path.open("w+") as f:
            wf.write(f)
            f.seek(0)
            result = yaml.load(f)

        expected = {
            "pegasus":
            "5.0",
            "name":
            "test",
            "siteCatalog": {
                "sites": []
            },
            "replicaCatalog": {
                "replicas": []
            },
            "transformationCatalog": {
                "transformations": []
            },
            "jobs": [{
                "type": "job",
                "name": "ls",
                "id": "ID0000001",
                "arguments": [],
                "uses": [],
            }],
            "jobDependencies": [],
        }

        assert expected == result

        wf_path.unlink()
Esempio n. 20
0
    def test_get_job(self):
        wf = Workflow("wf")
        j1 = Job("t1", _id="j1")
        wf.add_jobs(j1)

        assert j1 == wf.get_job("j1")
Esempio n. 21
0
 def test_valid_job(self, transformation):
     assert Job(transformation)
Esempio n. 22
0
def _to_wf(d: dict) -> Workflow:
    """Convert dict to Workflow

    :param d: Workflow represented as a dict
    :type d: dict
    :raises PegasusError: encountered error parsing
    :return: a Workflow object based on d
    :rtype: Workflow
    """

    try:
        #
        wf = Workflow(d["name"], infer_dependencies=False)

        # add rc
        if "replicaCatalog" in d:
            wf.replica_catalog = _to_rc(d["replicaCatalog"])

        # add tc
        if "transformationCatalog" in d:
            wf.transformation_catalog = _to_tc(d["transformationCatalog"])

        # add sc
        if "siteCatalog" in d:
            wf.site_catalog = _to_sc(d["siteCatalog"])

        # add jobs
        for j in d["jobs"]:
            # create appropriate job based on type
            if j["type"] == "job":
                job = Job(
                    j["name"],
                    _id=j["id"],
                    node_label=j.get("nodeLabel"),
                    namespace=j.get("namespace"),
                    version=j.get("version"),
                )
            elif j["type"] in {"pegasusWorkflow", "condorWorkflow"}:
                f = File(j["file"])

                is_planned = False if j["type"] == "pegasusWorkflow" else True

                job = SubWorkflow(
                    f, is_planned, _id=j["id"], node_label=j.get("nodeLabel")
                )

            else:
                raise ValueError

            # add args
            args = list()
            for a in j["arguments"]:
                args.append(a)

            job.args = args

            # add uses
            uses = set()
            for u in j["uses"]:
                f = File(u["lfn"], size=u.get("size"))
                try:
                    f.metadata = u["metadata"]
                except KeyError:
                    pass

                uses.add(
                    _Use(
                        f,
                        getattr(_LinkType, u["type"].upper()),
                        stage_out=u.get("stageOut"),
                        register_replica=u.get("registerReplica"),
                        bypass_staging=u.get("bypass"),
                    )
                )

            job.uses = uses

            # set stdin
            if "stdin" in j:
                for u in job.uses:
                    if u.file.lfn == j["stdin"]:
                        job.stdin = u.file
                        break

            # set stdout
            if "stdout" in j:
                for u in job.uses:
                    if u.file.lfn == j["stdout"]:
                        job.stdout = u.file
                        break

            # set stderr
            if "stderr" in j:
                for u in job.uses:
                    if u.file.lfn == j["stderr"]:
                        job.stderr = u.file
                        break

            # add profiles
            if j.get("profiles"):
                job.profiles = defaultdict(dict, j.get("profiles"))

            # add metadata
            if j.get("metadata"):
                job.metadata = j.get("metadata")

            # add hooks
            if j.get("hooks"):
                job.hooks = defaultdict(list, j.get("hooks"))

            # add job to wf
            wf.add_jobs(job)

        # add dependencies
        if d.get("jobDependencies"):
            dependencies = defaultdict(_JobDependency)
            for item in d.get("jobDependencies"):
                dependencies[item["id"]] = _JobDependency(
                    item["id"], {child for child in item["children"]}
                )

            wf.dependencies = dependencies

        # add profiles
        if d.get("profiles"):
            wf.profiles = defaultdict(dict, d.get("profiles"))

        # add metadata
        if d.get("metadata"):
            wf.metadata = d.get("metadata")

        # add hooks
        if d.get("hooks"):
            wf.hooks = defaultdict(list, d.get("hooks"))

        return wf
    except (KeyError, ValueError):
        raise PegasusError("error parsing {}".format(d))
Esempio n. 23
0
    def test_invalid_job(self):
        with pytest.raises(TypeError) as e:
            Job(123)

        assert "invalid transformation: 123" in str(e)
Esempio n. 24
0
    def test_tojson_no_mixins(self):
        j = Job("t1",
                namespace="ns",
                node_label="label",
                _id="id",
                version="1")
        j.set_stdin("stdin")
        j.set_stdout("stdout")
        j.set_stderr("stderr")
        j.add_args("-i", File("f1"), "-n", 1, 1.1)
        j.add_inputs(File("if1"), File("if2"))
        j.add_outputs(File("of1"), File("of2"))

        result = json.loads(json.dumps(j, cls=_CustomEncoder))
        result["uses"] = sorted(result["uses"], key=lambda use: use["lfn"])

        expected = {
            "type":
            "job",
            "name":
            "t1",
            "namespace":
            "ns",
            "id":
            "id",
            "nodeLabel":
            "label",
            "version":
            "1",
            "arguments": ["-i", "f1", "-n", 1, 1.1],
            "stdin":
            "stdin",
            "stdout":
            "stdout",
            "stderr":
            "stderr",
            "uses": [
                {
                    "lfn": "stdin",
                    "type": "input"
                },
                {
                    "lfn": "if1",
                    "type": "input"
                },
                {
                    "lfn": "if2",
                    "type": "input"
                },
                {
                    "lfn": "stdout",
                    "type": "output",
                    "stageOut": True,
                    "registerReplica": False,
                },
                {
                    "lfn": "stderr",
                    "type": "output",
                    "stageOut": True,
                    "registerReplica": False,
                },
                {
                    "lfn": "of1",
                    "type": "output",
                    "stageOut": True,
                    "registerReplica": False,
                },
                {
                    "lfn": "of2",
                    "type": "output",
                    "stageOut": True,
                    "registerReplica": False,
                },
            ],
        }

        expected["uses"] = sorted(expected["uses"], key=lambda use: use["lfn"])

        assert result == expected
Esempio n. 25
0
class TestWorkflow:
    @pytest.mark.parametrize(
        "job",
        [
            (Job("t1", _id="job")),
            (SubWorkflow(File("f1"), False, _id="job")),
            (SubWorkflow("f1", True, _id="job")),
        ],
    )
    def test_add_job(self, job):
        wf = Workflow("wf")
        wf.add_jobs(job)

        assert job == wf.get_job("job")

    def test_add_duplicate_job(self):
        wf = Workflow("wf")
        with pytest.raises(DuplicateError):
            wf.add_jobs(Job("t1", _id="j1"), Job("t2", _id="j1"))

    def test_get_job(self):
        wf = Workflow("wf")
        j1 = Job("t1", _id="j1")
        wf.add_jobs(j1)

        assert j1 == wf.get_job("j1")

    def test_get_invalid_job(self):
        wf = Workflow("wf")
        with pytest.raises(NotFoundError):
            wf.get_job("abc123")

    def test_job_id_assignment_by_workflow(self):
        wf = Workflow("wf")
        j1 = Job("t1", _id="a")
        j2 = Job("t2")
        j3 = Job("t3", _id="b")
        j4 = Job("t4")
        j5 = Job("t5")
        wf.add_jobs(j1, j2, j3, j4, j5)

        assert j2._id == "ID0000001"
        assert j4._id == "ID0000002"
        assert j5._id == "ID0000003"

    def test_add_site_catalog(self):
        sc = SiteCatalog()
        wf = Workflow("wf")

        try:
            wf.add_site_catalog(sc)
        except:
            pytest.fail("should not have raised exception")

    def test_add_invalid_site_catalog(self):
        wf = Workflow("wf")
        with pytest.raises(TypeError) as e:
            wf.add_site_catalog(123)

        assert "invalid catalog: 123" in str(e)

    def test_add_duplicate_site_catalog(self):
        sc = SiteCatalog()
        wf = Workflow("wf")
        wf.add_site_catalog(sc)

        with pytest.raises(DuplicateError) as e:
            wf.add_site_catalog(sc)

        assert "a SiteCatalog has already" in str(e)

    def test_add_replica_catalog(self):
        rc = ReplicaCatalog()
        wf = Workflow("wf")

        try:
            wf.add_replica_catalog(rc)
        except:
            pytest.fail("should not have raised exception")

    def test_add_invalid_replica_catalog(self):
        wf = Workflow("wf")
        with pytest.raises(TypeError) as e:
            wf.add_replica_catalog(123)

        assert "invalid catalog: 123" in str(e)

    def test_add_duplicate_replica_catalog(self):
        rc = ReplicaCatalog()
        wf = Workflow("wf")
        wf.add_replica_catalog(rc)

        with pytest.raises(DuplicateError) as e:
            wf.add_replica_catalog(rc)

        assert "a ReplicaCatalog has already" in str(e)

    def test_add_transformation_catalog(self):
        tc = TransformationCatalog()
        wf = Workflow("wf")

        try:
            wf.add_transformation_catalog(tc)
        except:
            pytest.fail("should not have raised exception")

    def test_add_invalid_transformation_catalog(self):
        wf = Workflow("wf")
        with pytest.raises(TypeError) as e:
            wf.add_transformation_catalog(123)

        assert "invalid catalog: 123" in str(e)

    def test_add_duplicate_transformation_catalog(self):
        tc = TransformationCatalog()
        wf = Workflow("wf")
        wf.add_transformation_catalog(tc)

        with pytest.raises(DuplicateError) as e:
            wf.add_transformation_catalog(tc)

        assert "a TransformationCatalog has already" in str(e)

    def test_add_dependency_parents(self):
        wf = Workflow("wf")
        job = Job("t", _id="job")
        parents = [
            Job("t", _id="parent1"),
            Job("t", _id="parent2"),
            Job("t", _id="parent3"),
        ]

        wf.add_jobs(job, *parents)

        wf.add_dependency(job, parents=[parents[0]])
        wf.add_dependency(job, parents=parents[1:])

        for parent in parents:
            assert wf.dependencies[parent._id] == _JobDependency(
                parent._id, {job._id})

    def test_add_dependency_children(self):
        wf = Workflow("wf")
        job = Job("t", _id="job")
        children = [
            Job("t", _id="child1"),
            Job("t", _id="child2"),
            Job("t", _id="child3"),
        ]

        wf.add_jobs(job, *children)

        wf.add_dependency(job, children=[children[0]])
        assert wf.dependencies[job._id] == _JobDependency(
            job._id, {children[0]._id})

        wf.add_dependency(job, children=children[1:])
        assert wf.dependencies[job._id] == _JobDependency(
            job._id, {child._id
                      for child in children})

    def test_add_dependency_parents_and_children(self):
        wf = Workflow("wf")
        job = Job("t", _id="job")
        parents = [Job("t", _id="parent1"), Job("t", _id="parent2")]

        children = [Job("t", _id="child1"), Job("t", _id="child2")]

        wf.add_jobs(*parents, *children)

        # add nothing
        wf.add_dependency(job)
        assert len(wf.dependencies) == 0

        wf.add_dependency(job, parents=parents, children=children)

        for parent in parents:
            assert wf.dependencies[parent._id] == _JobDependency(
                parent._id, {job._id})

        assert wf.dependencies[job._id] == _JobDependency(
            job._id, {child._id
                      for child in children})

    def test_add_duplicate_parent_dependency(self):
        wf = Workflow("wf")
        job = Job("t", _id="job")
        parent = Job("t", _id="parent")

        wf.add_jobs(job, parent)

        with pytest.raises(DuplicateError) as e:
            wf.add_dependency(job, parents=[parent, parent])

        assert (
            "A dependency already exists between parent id: parent and job id: job"
            in str(e))

    def test_add_duplicate_child_dependency(self):
        wf = Workflow("wf")
        job = Job("t", _id="job")
        child = Job("t", _id="child")

        wf.add_jobs(job, child)

        with pytest.raises(DuplicateError) as e:
            wf.add_dependency(job, children=[child, child])

        assert (
            "A dependency already exists between job id: job and child id: child"
            in str(e))

    def test_add_dependency_invalid_job(self):
        wf = Workflow("wf")
        job = Job("t")

        with pytest.raises(ValueError) as e:
            wf.add_dependency(job)

        assert "The given job does not have an id" in str(e)

    def test_add_dependency_invalid_parent(self):
        wf = Workflow("wf")
        job = Job("t", _id="job")
        parent = Job("t")

        with pytest.raises(ValueError) as e:
            wf.add_dependency(job, parents=[parent])

        assert "One of the given parents does not have an id" in str(e)

    def test_add_dependency_invalid_child(self):
        wf = Workflow("wf")
        job = Job("t", _id="job")
        child = Job("t")

        with pytest.raises(ValueError) as e:
            wf.add_dependency(job, children=[child])

        assert "One of the given children does not have an id" in str(e)

    def test_infer_dependencies_fork_join_wf(self):
        wf = Workflow("wf")

        f1 = File("f1")
        f2 = File("f2")
        f3 = File("f3")
        f4 = File("f4")

        fork = Job("t1", _id="fork").add_outputs(f1, f2)
        work1 = Job("t1", _id="work1").add_inputs(f1).add_outputs(f3)
        work2 = Job("t1", _id="work2").add_inputs(f2).add_outputs(f4)
        join = Job("t1", _id="join").add_inputs(f3, f4)
        wf.add_jobs(fork, work1, work2, join)

        # manually call _infer_dependencies() as it is only called when
        # wf.write() is called
        wf._infer_dependencies()

        assert wf.dependencies["fork"] == _JobDependency(
            "fork", {"work1", "work2"})
        assert wf.dependencies["work1"] == _JobDependency("work1", {"join"})
        assert wf.dependencies["work2"] == _JobDependency("work2", {"join"})

    def test_infer_dependencies_when_job_uses_stdin_stdout_and_stderr(self):
        wf = Workflow("wf")
        j1 = Job("t1", _id="j1").add_outputs(File("f1"))
        j2 = Job("t1",
                 _id="j2").set_stdin(*j1.get_outputs()).set_stdout(File("f2"))
        j3 = Job("t1", _id="j3").add_inputs(*j2.get_outputs())
        wf.add_jobs(j1, j2, j3)

        # manually call _infer_dependencies() as it is only called when
        # wf.write() is called
        wf._infer_dependencies()

        assert wf.dependencies["j1"] == _JobDependency("j1", {"j2"})
        assert wf.dependencies["j2"] == _JobDependency("j2", {"j3"})

    def test_tojson(self, convert_yaml_schemas_to_json, load_schema, wf,
                    expected_json):
        result = json.loads(json.dumps(wf, cls=_CustomEncoder))

        workflow_schema = load_schema("wf-5.0.json")
        validate(instance=result, schema=workflow_schema)

        result["jobs"] = sorted(result["jobs"], key=lambda j: j["id"])
        result["jobs"][0]["uses"] = sorted(result["jobs"][0]["uses"],
                                           key=lambda u: u["lfn"])
        result["jobs"][1]["uses"] = sorted(result["jobs"][1]["uses"],
                                           key=lambda u: u["lfn"])

        assert result == expected_json

    @pytest.mark.parametrize("_format, loader", [("json", json.load),
                                                 ("yml", yaml.safe_load)])
    def test_write_file_obj(
        self,
        convert_yaml_schemas_to_json,
        load_schema,
        wf,
        expected_json,
        _format,
        loader,
    ):
        with NamedTemporaryFile("r+") as f:
            wf.write(f, _format=_format)

            # _path should be set by the call to write
            assert wf._path == f.name

            f.seek(0)
            result = loader(f)

        workflow_schema = load_schema("wf-5.0.json")
        validate(instance=result, schema=workflow_schema)

        result["jobs"] = sorted(result["jobs"], key=lambda j: j["id"])
        result["jobs"][0]["uses"] = sorted(result["jobs"][0]["uses"],
                                           key=lambda u: u["lfn"])
        result["jobs"][1]["uses"] = sorted(result["jobs"][1]["uses"],
                                           key=lambda u: u["lfn"])

        assert result == expected_json

    def test_write_str_filename(self, wf, load_schema, expected_json):
        path = "wf.yml"
        wf.write(path)

        # _path should be set by the call to write
        assert wf._path == path

        with open(path) as f:
            result = yaml.safe_load(f)

        workflow_schema = load_schema("wf-5.0.json")
        validate(instance=result, schema=workflow_schema)

        result["jobs"] = sorted(result["jobs"], key=lambda j: j["id"])
        result["jobs"][0]["uses"] = sorted(result["jobs"][0]["uses"],
                                           key=lambda u: u["lfn"])
        result["jobs"][1]["uses"] = sorted(result["jobs"][1]["uses"],
                                           key=lambda u: u["lfn"])

        assert result == expected_json

        os.remove(path)

    def test_write_default_filename(self, wf, expected_json):
        wf.write()
        EXPECTED_FILE = "workflow.yml"

        with open(EXPECTED_FILE) as f:
            result = yaml.safe_load(f)

        result["jobs"] = sorted(result["jobs"], key=lambda j: j["id"])

        for i in range(len(result["jobs"])):
            result["jobs"][i]["uses"] = sorted(result["jobs"][i]["uses"],
                                               key=lambda u: u["lfn"])

        assert result == expected_json

        os.remove(EXPECTED_FILE)

    def test_write_wf_catalogs_included(self):
        wf = Workflow("test")
        wf.add_jobs(Job("ls"))

        wf.add_transformation_catalog(TransformationCatalog())
        wf.add_site_catalog(SiteCatalog())
        wf.add_replica_catalog(ReplicaCatalog())

        wf_path = Path("workflow.yml")
        with wf_path.open("w+") as f:
            wf.write(f)
            f.seek(0)
            result = yaml.load(f)

        expected = {
            "pegasus":
            "5.0",
            "name":
            "test",
            "siteCatalog": {
                "sites": []
            },
            "replicaCatalog": {
                "replicas": []
            },
            "transformationCatalog": {
                "transformations": []
            },
            "jobs": [{
                "type": "job",
                "name": "ls",
                "id": "ID0000001",
                "arguments": [],
                "uses": [],
            }],
            "jobDependencies": [],
        }

        assert expected == result

        wf_path.unlink()

    def test_write_valid_hierarchical_workflow(self, mocker):
        mocker.patch("Pegasus.api.workflow.Workflow.write")

        try:
            wf = Workflow("test")
            wf.add_jobs(SubWorkflow("file", False))
            wf.write(file="workflow.yml", _format="yml")
        except PegasusError:
            pytest.fail("shouldn't have thrown PegasusError")

        Pegasus.api.workflow.Workflow.write.assert_called_once_with(
            file="workflow.yml", _format="yml")

    @pytest.mark.parametrize(
        "sc, tc",
        [
            (SiteCatalog(), None),
            (None, TransformationCatalog()),
            (SiteCatalog(), TransformationCatalog()),
        ],
    )
    def test_write_hierarchical_workflow_when_catalogs_are_inlined(
            self, sc, tc):
        wf = Workflow("test")
        wf.add_jobs(SubWorkflow("file", False))

        if sc:
            wf.add_site_catalog(sc)

        if tc:
            wf.add_transformation_catalog(tc)

        with pytest.raises(PegasusError) as e:
            wf.write()

        assert (
            "Site Catalog and Transformation Catalog must be written as a separate"
            in str(e))

    def test_workflow_key_ordering_on_yml_write(self):
        tc = TransformationCatalog()
        rc = ReplicaCatalog()
        sc = SiteCatalog()

        wf = Workflow("wf")
        wf.add_transformation_catalog(tc)
        wf.add_replica_catalog(rc)
        wf.add_site_catalog(sc)

        wf.add_jobs(Job("t1", _id="a"))

        wf.add_env(JAVA_HOME="/java/home")
        wf.add_shell_hook(EventType.START, "/bin/echo hi")
        wf.add_metadata(key="value")

        wf.write()
        EXPECTED_FILE = Path("workflow.yml")

        with EXPECTED_FILE.open() as f:
            # reading in as str so ordering of keys is not disrupted
            # when loaded into a dict
            result = f.read()

        EXPECTED_FILE.unlink()
        """
        Check that wf keys have been ordered as follows (while ignoring nested keys):
        - pegasus,
        - name,
        - hooks,
        - profiles,
        - metadata,
        - siteCatalog,
        - replicaCatalog,
        - transformationCatalog,
        - jobs
        - jobDependencies
        """
        p = re.compile(
            r"pegasus: '5.0'[\w\W]+name:[\w\W]+hooks:[\w\W]+profiles:[\w\W]+metadata:[\w\W]+siteCatalog:[\w\W]+replicaCatalog:[\w\W]+transformationCatalog:[\w\W]+jobs:[\w\W]+jobDependencies:[\w\W]+"
        )
        assert p.match(result) is not None

    def test_plan_workflow_already_written(self, wf, mocker):
        mocker.patch("shutil.which", return_value="/usr/bin/pegasus-version")
        mocker.patch("Pegasus.client._client.Client.plan")

        path = "wf.yml"
        wf.write(path).plan()

        assert wf._path == path

        Pegasus.client._client.Client.plan.assert_called_once_with(
            path,
            cleanup="none",
            conf=None,
            dir=None,
            force=False,
            input_dirs=None,
            output_dir=None,
            output_sites=["local"],
            relative_dir=None,
            sites=None,
            staging_sites=None,
            submit=False,
            verbose=0,
        )

        os.remove(path)

    def test_plan_workflow_not_written(self, wf, mocker):
        mocker.patch("shutil.which", return_value="/usr/bin/pegasus-version")
        mocker.patch("Pegasus.client._client.Client.plan")

        DEFAULT_WF_PATH = "workflow.yml"
        wf.plan()

        assert wf._path == DEFAULT_WF_PATH

        Pegasus.client._client.Client.plan.assert_called_once_with(
            DEFAULT_WF_PATH,
            cleanup="none",
            conf=None,
            dir=None,
            force=False,
            input_dirs=None,
            output_dir=None,
            output_sites=["local"],
            relative_dir=None,
            sites=None,
            staging_sites=None,
            submit=False,
            verbose=0,
        )

        os.remove(DEFAULT_WF_PATH)

    def test_run(self, wf, mocker):
        mocker.patch("Pegasus.client._client.Client.run")
        mocker.patch("shutil.which", return_value="/usr/bin/pegasus-version")

        wf.run()

        Pegasus.client._client.Client.run.assert_called_once_with(None,
                                                                  verbose=0)

    def test_status(self, wf, mocker):
        mocker.patch("Pegasus.client._client.Client.status")
        mocker.patch("shutil.which", return_value="/usr/bin/pegasus-version")

        wf._submit_dir = "submit_dir"
        wf.status()

        Pegasus.client._client.Client.status.assert_called_once_with(
            wf._submit_dir, long=0, verbose=0)

    def test_remove(self, wf, mocker):
        mocker.patch("Pegasus.client._client.Client.remove")
        mocker.patch("shutil.which", return_value="/usr/bin/pegasus-version")

        wf._submit_dir = "submit_dir"
        wf.remove()

        Pegasus.client._client.Client.remove.assert_called_once_with(
            wf._submit_dir, verbose=0)

    def test_analyze(self, wf, mocker):
        mocker.patch("Pegasus.client._client.Client.analyzer")
        mocker.patch("shutil.which", return_value="/usr/bin/pegasus-version")

        wf._submit_dir = "submit_dir"
        wf.analyze()

        Pegasus.client._client.Client.analyzer.assert_called_once_with(
            wf._submit_dir, verbose=0)

    def test_statistics(self, wf, mocker):
        mocker.patch("Pegasus.client._client.Client.statistics")
        mocker.patch("shutil.which", return_value="/usr/bin/pegasus-version")

        wf._submit_dir = "submit_dir"
        wf.statistics()

        Pegasus.client._client.Client.statistics.assert_called_once_with(
            wf._submit_dir, verbose=0)
Esempio n. 26
0
 def test_add_duplicate_job(self):
     wf = Workflow("wf")
     with pytest.raises(DuplicateError):
         wf.add_jobs(Job("t1", _id="j1"), Job("t2", _id="j1"))