def test_set_duplicate_stdout(self): job = AbstractJob() job.add_outputs(File("a")) with pytest.raises(DuplicateError) as e: job.set_stdout(File("a")) assert "file: {file}".format(file=File("a")) in str(e)
def test_add_duplicate_checkpoint(self): job = AbstractJob() job.add_inputs(File("abc")) with pytest.raises(DuplicateError) as e: job.add_checkpoint(File("abc")) assert "file: {file}".format(file=File("abc")) in str(e)
def test_set_stdout_already_set(self): job = AbstractJob() job.set_stdout(File("a")) with pytest.raises(DuplicateError) as e: job.set_stdout(File("b")) assert "stdout has already been set" in str(e)
def test_add_outputs(self): job = AbstractJob() f1 = File("a") f2 = File("b") job.add_outputs(f1, f2) assert job.get_outputs() == {f1, f2}
def test_chaining(self): j = (Job("t1").add_args("-n5").add_inputs(File("if")).add_outputs( File("of")).set_stdin(File("stdin")).set_stdout( File("stdout")).set_stderr(File("stderr"))) assert j.transformation == "t1" assert j.args == ["-n5"] assert j.get_inputs() == {File("if"), File("stdin")} assert j.get_outputs() == {File("of"), File("stdout"), File("stderr")}
def test_add_inputs_and_outputs(self): job = AbstractJob() job.add_inputs(File("a")) job.add_outputs(File("b")) with pytest.raises(DuplicateError) as e: job.add_inputs(File("b")) assert "file: {file}".format(file=File("b")) in str(e)
def wf2(): _in = File("in") stdin = File("stdin") stdout = File("stdout") stderr = File("stderr") out = File("out") out2 = File("out2") pegasus_workflow = File("pegasus_workflow") condor_workflow = File("condor_workflow") j1 = (Job("tr", _id="1", node_label="test").add_args( "-i", _in, "-o", out).add_inputs(_in).add_outputs(out).set_stdin( stdin).set_stdout(stdout).set_stderr(stderr)) j2 = (Job("tr2", _id="2", node_label="test").add_args( "-i", out, "-o", out2).add_inputs(out).add_outputs(out2)) sbwf_pegasus = SubWorkflow(pegasus_workflow, False, _id="unplanned", node_label="test").add_args("-flag", "-flag2") sbwf_condor = SubWorkflow(condor_workflow, True, _id="planned", node_label="test") return (Workflow("test", infer_dependencies=False).add_jobs( j1, j2, sbwf_pegasus, sbwf_condor).add_dependency(j1, children=[j2]))
def test_infer_dependencies_when_job_uses_stdin_stdout_and_stderr(self): wf = Workflow("wf") j1 = Job("t1", _id="j1").add_outputs(File("f1")) j2 = Job("t1", _id="j2").set_stdin(*j1.get_outputs()).set_stdout(File("f2")) j3 = Job("t1", _id="j3").add_inputs(*j2.get_outputs()) wf.add_jobs(j1, j2, j3) # manually call _infer_dependencies() as it is only called when # wf.write() is called wf._infer_dependencies() assert wf.dependencies["j1"] == _JobDependency("j1", {"j2"}) assert wf.dependencies["j2"] == _JobDependency("j2", {"j3"})
def test_get_outputs(self): job = AbstractJob() f1 = File("a") job.add_outputs(f1) assert job.get_outputs() == {f1}
def test_add_duplicate_replica(self): rc = ReplicaCatalog() with pytest.raises(DuplicateError) as e: rc.add_replica("site", "lfn", "pfn", True) rc.add_replica("site", File("lfn"), "pfn", True) assert ( "entry: ReplicaEntry(site=site, lfn=lfn, pfn=pfn, regex=True, checksum_type=None, checksum_value=None)" in str(e))
def test_tojson_with_metdata(self, convert_yaml_schemas_to_json, load_schema): result = File("lfn", size=2048).add_metadata(key="value").__json__() expected = {"lfn": "lfn", "metadata": {"key": "value"}, "size": 2048} file_schema = load_schema("rc-5.0.json")["$defs"]["file"] validate(instance=result, schema=file_schema) assert result == expected
def test_infer_dependencies_fork_join_wf(self): wf = Workflow("wf") f1 = File("f1") f2 = File("f2") f3 = File("f3") f4 = File("f4") fork = Job("t1", _id="fork").add_outputs(f1, f2) work1 = Job("t1", _id="work1").add_inputs(f1).add_outputs(f3) work2 = Job("t1", _id="work2").add_inputs(f2).add_outputs(f4) join = Job("t1", _id="join").add_inputs(f3, f4) wf.add_jobs(fork, work1, work2, join) # manually call _infer_dependencies() as it is only called when # wf.write() is called wf._infer_dependencies() assert wf.dependencies["fork"] == _JobDependency( "fork", {"work1", "work2"}) assert wf.dependencies["work1"] == _JobDependency("work1", {"join"}) assert wf.dependencies["work2"] == _JobDependency("work2", {"join"})
class TestSubWorkflow: @pytest.mark.parametrize("file, is_planned", [(File("wf-file"), False), ("wf-file", True)]) def test_valid_subworkflow(self, file, is_planned): assert SubWorkflow(file, is_planned) def test_invalid_subworkflow(self): with pytest.raises(TypeError) as e: SubWorkflow(123, False) assert "invalid file: 123" in str(e) @pytest.mark.parametrize( "subworkflow, expected", [ ( SubWorkflow( "file", False, _id="test-subworkflow", node_label="label").add_args("--sites", "condorpool"), { "type": "pegasusWorkflow", "file": "file", "id": "test-subworkflow", "nodeLabel": "label", "arguments": ["--sites", "condorpool"], "uses": [{ "lfn": "file", "type": "input" }], }, ), ( SubWorkflow( "file", True, _id="test-subworkflow", node_label="label"), { "type": "condorWorkflow", "file": "file", "id": "test-subworkflow", "nodeLabel": "label", "arguments": [], "uses": [{ "lfn": "file", "type": "input" }], }, ), ], ) def test_tojson(self, subworkflow, expected): result = json.loads(json.dumps(subworkflow, cls=_CustomEncoder)) assert result == expected
def wf1(): in1 = File("in1", size=2048).add_metadata(createdBy="ryan") in2 = File("in2") stdin = File("stdin").add_metadata(size=1024) stdout = File("stdout").add_metadata(size=1024) stderr = File("stderr").add_metadata(size=1024) out = File("out").add_metadata(size=1024) out2 = File("out2").add_metadata(size=1024) pegasus_workflow = File("pegasus_workflow").add_metadata(size=2048) condor_workflow = File("condor_workflow").add_metadata(size=2048) j1 = (Job("tr", _id="1", node_label="test").add_args( "-i", in1, "-o", out, "-n", 1, 1.1).add_inputs(in1).add_inputs( in2, bypass_staging=True).add_outputs(out).set_stdin( stdin).set_stdout(stdout).set_stderr(stderr).add_shell_hook( EventType.START, "/cmd2").add_env(JAVA_HOME="/usr/bin/java").add_metadata( xtra_info="123")) j2 = (Job("tr2", _id="2", node_label="test").add_args( "-i", out, "-o", out2).add_inputs(out).add_outputs(out2)) sbwf_pegasus = SubWorkflow(pegasus_workflow, False, _id="unplanned", node_label="test").add_args("-flag", "-flag2") sbwf_condor = SubWorkflow(condor_workflow, True, _id="planned", node_label="test") return (Workflow("test", infer_dependencies=False).add_shell_hook( EventType.START, "/cmd").add_dagman_profile(retry=1).add_metadata( author="ryan").add_jobs(j1, j2, sbwf_pegasus, sbwf_condor).add_dependency(j1, children=[j2]))
def wf(): wf = Workflow("wf") j1 = (Job("t1", _id="a").add_outputs(File("f1"), File("f2")).add_args( File("do-nothing"), "-n", 1, 1.1).set_stdin("stdin").set_stdout("stdout").set_stderr("stderr")) j2 = (Job("t1", _id="b").add_inputs(File("f1"), File("f2")).add_checkpoint( File("checkpoint"))) j3 = SubWorkflow("subworkflow.dag", True, _id="c").add_args("--sites", "condorpool") j4 = SubWorkflow(File("subworkflow.dax"), False, _id="d") wf.add_jobs(j1, j2, j3, j4) wf._infer_dependencies() wf.add_env(JAVA_HOME="/java/home") wf.add_shell_hook(EventType.START, "/bin/echo hi") wf.add_metadata(key="value") return wf
def test_write(self, _format, loader): rc = ReplicaCatalog() f_a = File("f.a", size=1024).add_metadata(creator="ryan") rc.add_replica( "local", f_a, "/f.a", checksum={"sha256": "123"}, metadata={"extra": "metadata"}, ) rc.add_replica("condorpool", f_a, "/f.a") rc.add_replica("local", "f.b", "/f.b") rc.add_regex_replica("local", "*.txt", "/path", metadata={"creator": "ryan"}) expected = { "pegasus": "5.0", "replicas": [ { "lfn": "f.a", "pfns": [ { "site": "local", "pfn": "/f.a" }, { "site": "condorpool", "pfn": "/f.a" }, ], "checksum": { "sha256": "123" }, "metadata": { "extra": "metadata", "size": 1024, "creator": "ryan" }, }, { "lfn": "f.b", "pfns": [{ "site": "local", "pfn": "/f.b" }], "metadata": { "size": 1024, "creator": "ryan" }, }, { "lfn": "*.txt", "pfns": [{ "site": "local", "pfn": "/path" }], "metadata": { "creator": "ryan" }, "regex": True, }, ], } expected["replicas"][0]["pfns"] = sorted( expected["replicas"][0]["pfns"], key=lambda pfn: pfn["site"]) with NamedTemporaryFile(mode="r+") as f: rc.write(f, _format=_format) f.seek(0) result = loader(f) result["replicas"][0]["pfns"] = sorted(result["replicas"][0]["pfns"], key=lambda pfn: pfn["site"]) assert "createdOn" in result["x-pegasus"] assert result["x-pegasus"]["createdBy"] == getpass.getuser() assert result["x-pegasus"]["apiLang"] == "python" del result["x-pegasus"] assert result == expected
def test_invalid_file(self, lfn: str): with pytest.raises(TypeError) as e: File(lfn) assert "invalid lfn: {lfn}".format(lfn=lfn) in str(e)
def test_valid_file(self, lfn: str, size: int): assert File(lfn, size)
def test_tojson(self): j = AbstractJob(_id="aj", node_label="test") j.set_stdin("stdin") j.set_stdout("stdout") j.set_stderr("stderr") j.add_args("-i", File("f1")) j.add_inputs(File("if1"), File("if2")) j.add_outputs(File("of1"), File("of2")) j.add_checkpoint(File("cpf")) result = json.loads(json.dumps(j, cls=_CustomEncoder)) result["uses"] = sorted(result["uses"], key=lambda use: use["lfn"]) expected = { "id": "aj", "nodeLabel": "test", "arguments": ["-i", "f1"], "stdin": "stdin", "stdout": "stdout", "stderr": "stderr", "uses": [ { "lfn": "stdin", "type": "input" }, { "lfn": "if1", "type": "input" }, { "lfn": "if2", "type": "input" }, { "lfn": "stdout", "type": "output", "stageOut": True, "registerReplica": False, }, { "lfn": "stderr", "type": "output", "stageOut": True, "registerReplica": False, }, { "lfn": "of1", "type": "output", "stageOut": True, "registerReplica": False, }, { "lfn": "of2", "type": "output", "stageOut": True, "registerReplica": False, }, { "lfn": "cpf", "type": "checkpoint", "stageOut": True, "registerReplica": False, }, ], } expected["uses"] = sorted(expected["uses"], key=lambda use: use["lfn"]) assert result == expected
def _to_wf(d: dict) -> Workflow: """Convert dict to Workflow :param d: Workflow represented as a dict :type d: dict :raises PegasusError: encountered error parsing :return: a Workflow object based on d :rtype: Workflow """ try: # wf = Workflow(d["name"], infer_dependencies=False) # add rc if "replicaCatalog" in d: wf.replica_catalog = _to_rc(d["replicaCatalog"]) # add tc if "transformationCatalog" in d: wf.transformation_catalog = _to_tc(d["transformationCatalog"]) # add sc if "siteCatalog" in d: wf.site_catalog = _to_sc(d["siteCatalog"]) # add jobs for j in d["jobs"]: # create appropriate job based on type if j["type"] == "job": job = Job( j["name"], _id=j["id"], node_label=j.get("nodeLabel"), namespace=j.get("namespace"), version=j.get("version"), ) elif j["type"] in {"pegasusWorkflow", "condorWorkflow"}: f = File(j["file"]) is_planned = False if j["type"] == "pegasusWorkflow" else True job = SubWorkflow( f, is_planned, _id=j["id"], node_label=j.get("nodeLabel") ) else: raise ValueError # add args args = list() for a in j["arguments"]: args.append(a) job.args = args # add uses uses = set() for u in j["uses"]: f = File(u["lfn"], size=u.get("size")) try: f.metadata = u["metadata"] except KeyError: pass uses.add( _Use( f, getattr(_LinkType, u["type"].upper()), stage_out=u.get("stageOut"), register_replica=u.get("registerReplica"), bypass_staging=u.get("bypass"), ) ) job.uses = uses # set stdin if "stdin" in j: for u in job.uses: if u.file.lfn == j["stdin"]: job.stdin = u.file break # set stdout if "stdout" in j: for u in job.uses: if u.file.lfn == j["stdout"]: job.stdout = u.file break # set stderr if "stderr" in j: for u in job.uses: if u.file.lfn == j["stderr"]: job.stderr = u.file break # add profiles if j.get("profiles"): job.profiles = defaultdict(dict, j.get("profiles")) # add metadata if j.get("metadata"): job.metadata = j.get("metadata") # add hooks if j.get("hooks"): job.hooks = defaultdict(list, j.get("hooks")) # add job to wf wf.add_jobs(job) # add dependencies if d.get("jobDependencies"): dependencies = defaultdict(_JobDependency) for item in d.get("jobDependencies"): dependencies[item["id"]] = _JobDependency( item["id"], {child for child in item["children"]} ) wf.dependencies = dependencies # add profiles if d.get("profiles"): wf.profiles = defaultdict(dict, d.get("profiles")) # add metadata if d.get("metadata"): wf.metadata = d.get("metadata") # add hooks if d.get("hooks"): wf.hooks = defaultdict(list, d.get("hooks")) return wf except (KeyError, ValueError): raise PegasusError("error parsing {}".format(d))
def test_eq(self): assert _Use(File("a"), _LinkType.INPUT) == _Use(File("a"), _LinkType.OUTPUT) assert _Use(File("a"), _LinkType.INPUT) != _Use( File("b"), _LinkType.INPUT)
def test_valid_use(self): assert _Use(File("a"), _LinkType.INPUT)
class TestReplicaCatalog: @pytest.mark.parametrize("replica", [("lfn", "pfn", "site", True), (File("lfn"), "pfn", "site", True)]) def test_add_replica(self, replica: tuple): rc = ReplicaCatalog() rc.add_replica(*replica) assert len(rc.replicas) == 1 def test_add_duplicate_replica(self): rc = ReplicaCatalog() with pytest.raises(DuplicateError) as e: rc.add_replica("site", "lfn", "pfn", True) rc.add_replica("site", File("lfn"), "pfn", True) assert ( "entry: ReplicaEntry(site=site, lfn=lfn, pfn=pfn, regex=True, checksum_type=None, checksum_value=None)" in str(e)) def test_add_invalid_replica(self): rc = ReplicaCatalog() with pytest.raises(TypeError) as e: rc.add_replica("site", set(), "pfn") assert "invalid lfn: {lfn}".format(lfn=set()) in str(e) def test_tojson(self, convert_yaml_schemas_to_json, load_schema): rc = ReplicaCatalog() rc.add_replica("site1", "lfn1", "pfn1") rc.add_replica("site2", "lfn2", "pfn2", True) rc.add_replica("site3", "lfn3", "pfn3", checksum_type="sha256", checksum_value="abc123") expected = { "pegasus": PEGASUS_VERSION, "replicas": [ { "lfn": "lfn1", "pfn": "pfn1", "site": "site1" }, { "lfn": "lfn2", "pfn": "pfn2", "site": "site2", "regex": True }, { "lfn": "lfn3", "pfn": "pfn3", "site": "site3", "checksum": { "type": "sha256", "value": "abc123" }, }, ], } expected["replicas"] = sorted(expected["replicas"], key=lambda d: d["lfn"]) result = json.loads(json.dumps(rc, cls=_CustomEncoder)) result["replicas"] = sorted(result["replicas"], key=lambda d: d["lfn"]) rc_schema = load_schema("rc-5.0.json") validate(instance=result, schema=rc_schema) assert result == expected @pytest.mark.parametrize("_format, loader", [("json", json.load), ("yml", yaml.safe_load)]) def test_write(self, _format, loader): rc = ReplicaCatalog() rc.add_replica("site1", "lfn1", "pfn1", True).add_replica("site2", "lfn2", "pfn2", True) expected = { "pegasus": PEGASUS_VERSION, "replicas": [ { "lfn": "lfn1", "pfn": "pfn1", "site": "site1", "regex": True }, { "lfn": "lfn2", "pfn": "pfn2", "site": "site2", "regex": True }, ], } expected["replicas"] = sorted(expected["replicas"], key=lambda d: d["lfn"]) with NamedTemporaryFile(mode="r+") as f: rc.write(f, _format=_format) f.seek(0) result = loader(f) result["replicas"] = sorted(result["replicas"], key=lambda d: d["lfn"]) assert result == expected def test_write_default(self): expected_file = Path("replicas.yml") ReplicaCatalog().write() try: expected_file.unlink() except FileNotFoundError: pytest.fail("could not find {}".format(expected_file)) def test_replica_catalog_ordering_on_yml_write(self): ReplicaCatalog().write() EXPECTED_FILE = Path("replicas.yml") with EXPECTED_FILE.open() as f: # reading in as str so ordering of keys is not disrupted # when loaded into a dict result = f.read() EXPECTED_FILE.unlink() """ Check that rc keys have been ordered as follows: - pegasus - replicas """ p = re.compile(r"pegasus: '5.0'[\w\W]+replicas:[\w\W]+") assert p.match(result) is not None
class TestWorkflow: @pytest.mark.parametrize( "job", [ (Job("t1", _id="job")), (SubWorkflow(File("f1"), False, _id="job")), (SubWorkflow("f1", True, _id="job")), ], ) def test_add_job(self, job): wf = Workflow("wf") wf.add_jobs(job) assert job == wf.get_job("job") def test_add_duplicate_job(self): wf = Workflow("wf") with pytest.raises(DuplicateError): wf.add_jobs(Job("t1", _id="j1"), Job("t2", _id="j1")) def test_get_job(self): wf = Workflow("wf") j1 = Job("t1", _id="j1") wf.add_jobs(j1) assert j1 == wf.get_job("j1") def test_get_invalid_job(self): wf = Workflow("wf") with pytest.raises(NotFoundError): wf.get_job("abc123") def test_job_id_assignment_by_workflow(self): wf = Workflow("wf") j1 = Job("t1", _id="a") j2 = Job("t2") j3 = Job("t3", _id="b") j4 = Job("t4") j5 = Job("t5") wf.add_jobs(j1, j2, j3, j4, j5) assert j2._id == "ID0000001" assert j4._id == "ID0000002" assert j5._id == "ID0000003" def test_add_site_catalog(self): sc = SiteCatalog() wf = Workflow("wf") try: wf.add_site_catalog(sc) except: pytest.fail("should not have raised exception") def test_add_invalid_site_catalog(self): wf = Workflow("wf") with pytest.raises(TypeError) as e: wf.add_site_catalog(123) assert "invalid catalog: 123" in str(e) def test_add_duplicate_site_catalog(self): sc = SiteCatalog() wf = Workflow("wf") wf.add_site_catalog(sc) with pytest.raises(DuplicateError) as e: wf.add_site_catalog(sc) assert "a SiteCatalog has already" in str(e) def test_add_replica_catalog(self): rc = ReplicaCatalog() wf = Workflow("wf") try: wf.add_replica_catalog(rc) except: pytest.fail("should not have raised exception") def test_add_invalid_replica_catalog(self): wf = Workflow("wf") with pytest.raises(TypeError) as e: wf.add_replica_catalog(123) assert "invalid catalog: 123" in str(e) def test_add_duplicate_replica_catalog(self): rc = ReplicaCatalog() wf = Workflow("wf") wf.add_replica_catalog(rc) with pytest.raises(DuplicateError) as e: wf.add_replica_catalog(rc) assert "a ReplicaCatalog has already" in str(e) def test_add_transformation_catalog(self): tc = TransformationCatalog() wf = Workflow("wf") try: wf.add_transformation_catalog(tc) except: pytest.fail("should not have raised exception") def test_add_invalid_transformation_catalog(self): wf = Workflow("wf") with pytest.raises(TypeError) as e: wf.add_transformation_catalog(123) assert "invalid catalog: 123" in str(e) def test_add_duplicate_transformation_catalog(self): tc = TransformationCatalog() wf = Workflow("wf") wf.add_transformation_catalog(tc) with pytest.raises(DuplicateError) as e: wf.add_transformation_catalog(tc) assert "a TransformationCatalog has already" in str(e) def test_add_dependency_parents(self): wf = Workflow("wf") job = Job("t", _id="job") parents = [ Job("t", _id="parent1"), Job("t", _id="parent2"), Job("t", _id="parent3"), ] wf.add_jobs(job, *parents) wf.add_dependency(job, parents=[parents[0]]) wf.add_dependency(job, parents=parents[1:]) for parent in parents: assert wf.dependencies[parent._id] == _JobDependency( parent._id, {job._id}) def test_add_dependency_children(self): wf = Workflow("wf") job = Job("t", _id="job") children = [ Job("t", _id="child1"), Job("t", _id="child2"), Job("t", _id="child3"), ] wf.add_jobs(job, *children) wf.add_dependency(job, children=[children[0]]) assert wf.dependencies[job._id] == _JobDependency( job._id, {children[0]._id}) wf.add_dependency(job, children=children[1:]) assert wf.dependencies[job._id] == _JobDependency( job._id, {child._id for child in children}) def test_add_dependency_parents_and_children(self): wf = Workflow("wf") job = Job("t", _id="job") parents = [Job("t", _id="parent1"), Job("t", _id="parent2")] children = [Job("t", _id="child1"), Job("t", _id="child2")] wf.add_jobs(*parents, *children) # add nothing wf.add_dependency(job) assert len(wf.dependencies) == 0 wf.add_dependency(job, parents=parents, children=children) for parent in parents: assert wf.dependencies[parent._id] == _JobDependency( parent._id, {job._id}) assert wf.dependencies[job._id] == _JobDependency( job._id, {child._id for child in children}) def test_add_duplicate_parent_dependency(self): wf = Workflow("wf") job = Job("t", _id="job") parent = Job("t", _id="parent") wf.add_jobs(job, parent) with pytest.raises(DuplicateError) as e: wf.add_dependency(job, parents=[parent, parent]) assert ( "A dependency already exists between parent id: parent and job id: job" in str(e)) def test_add_duplicate_child_dependency(self): wf = Workflow("wf") job = Job("t", _id="job") child = Job("t", _id="child") wf.add_jobs(job, child) with pytest.raises(DuplicateError) as e: wf.add_dependency(job, children=[child, child]) assert ( "A dependency already exists between job id: job and child id: child" in str(e)) def test_add_dependency_invalid_job(self): wf = Workflow("wf") job = Job("t") with pytest.raises(ValueError) as e: wf.add_dependency(job) assert "The given job does not have an id" in str(e) def test_add_dependency_invalid_parent(self): wf = Workflow("wf") job = Job("t", _id="job") parent = Job("t") with pytest.raises(ValueError) as e: wf.add_dependency(job, parents=[parent]) assert "One of the given parents does not have an id" in str(e) def test_add_dependency_invalid_child(self): wf = Workflow("wf") job = Job("t", _id="job") child = Job("t") with pytest.raises(ValueError) as e: wf.add_dependency(job, children=[child]) assert "One of the given children does not have an id" in str(e) def test_infer_dependencies_fork_join_wf(self): wf = Workflow("wf") f1 = File("f1") f2 = File("f2") f3 = File("f3") f4 = File("f4") fork = Job("t1", _id="fork").add_outputs(f1, f2) work1 = Job("t1", _id="work1").add_inputs(f1).add_outputs(f3) work2 = Job("t1", _id="work2").add_inputs(f2).add_outputs(f4) join = Job("t1", _id="join").add_inputs(f3, f4) wf.add_jobs(fork, work1, work2, join) # manually call _infer_dependencies() as it is only called when # wf.write() is called wf._infer_dependencies() assert wf.dependencies["fork"] == _JobDependency( "fork", {"work1", "work2"}) assert wf.dependencies["work1"] == _JobDependency("work1", {"join"}) assert wf.dependencies["work2"] == _JobDependency("work2", {"join"}) def test_infer_dependencies_when_job_uses_stdin_stdout_and_stderr(self): wf = Workflow("wf") j1 = Job("t1", _id="j1").add_outputs(File("f1")) j2 = Job("t1", _id="j2").set_stdin(*j1.get_outputs()).set_stdout(File("f2")) j3 = Job("t1", _id="j3").add_inputs(*j2.get_outputs()) wf.add_jobs(j1, j2, j3) # manually call _infer_dependencies() as it is only called when # wf.write() is called wf._infer_dependencies() assert wf.dependencies["j1"] == _JobDependency("j1", {"j2"}) assert wf.dependencies["j2"] == _JobDependency("j2", {"j3"}) def test_tojson(self, convert_yaml_schemas_to_json, load_schema, wf, expected_json): result = json.loads(json.dumps(wf, cls=_CustomEncoder)) workflow_schema = load_schema("wf-5.0.json") validate(instance=result, schema=workflow_schema) result["jobs"] = sorted(result["jobs"], key=lambda j: j["id"]) result["jobs"][0]["uses"] = sorted(result["jobs"][0]["uses"], key=lambda u: u["lfn"]) result["jobs"][1]["uses"] = sorted(result["jobs"][1]["uses"], key=lambda u: u["lfn"]) assert result == expected_json @pytest.mark.parametrize("_format, loader", [("json", json.load), ("yml", yaml.safe_load)]) def test_write_file_obj( self, convert_yaml_schemas_to_json, load_schema, wf, expected_json, _format, loader, ): with NamedTemporaryFile("r+") as f: wf.write(f, _format=_format) # _path should be set by the call to write assert wf._path == f.name f.seek(0) result = loader(f) workflow_schema = load_schema("wf-5.0.json") validate(instance=result, schema=workflow_schema) result["jobs"] = sorted(result["jobs"], key=lambda j: j["id"]) result["jobs"][0]["uses"] = sorted(result["jobs"][0]["uses"], key=lambda u: u["lfn"]) result["jobs"][1]["uses"] = sorted(result["jobs"][1]["uses"], key=lambda u: u["lfn"]) assert result == expected_json def test_write_str_filename(self, wf, load_schema, expected_json): path = "wf.yml" wf.write(path) # _path should be set by the call to write assert wf._path == path with open(path) as f: result = yaml.safe_load(f) workflow_schema = load_schema("wf-5.0.json") validate(instance=result, schema=workflow_schema) result["jobs"] = sorted(result["jobs"], key=lambda j: j["id"]) result["jobs"][0]["uses"] = sorted(result["jobs"][0]["uses"], key=lambda u: u["lfn"]) result["jobs"][1]["uses"] = sorted(result["jobs"][1]["uses"], key=lambda u: u["lfn"]) assert result == expected_json os.remove(path) def test_write_default_filename(self, wf, expected_json): wf.write() EXPECTED_FILE = "workflow.yml" with open(EXPECTED_FILE) as f: result = yaml.safe_load(f) result["jobs"] = sorted(result["jobs"], key=lambda j: j["id"]) for i in range(len(result["jobs"])): result["jobs"][i]["uses"] = sorted(result["jobs"][i]["uses"], key=lambda u: u["lfn"]) assert result == expected_json os.remove(EXPECTED_FILE) def test_write_wf_catalogs_included(self): wf = Workflow("test") wf.add_jobs(Job("ls")) wf.add_transformation_catalog(TransformationCatalog()) wf.add_site_catalog(SiteCatalog()) wf.add_replica_catalog(ReplicaCatalog()) wf_path = Path("workflow.yml") with wf_path.open("w+") as f: wf.write(f) f.seek(0) result = yaml.load(f) expected = { "pegasus": "5.0", "name": "test", "siteCatalog": { "sites": [] }, "replicaCatalog": { "replicas": [] }, "transformationCatalog": { "transformations": [] }, "jobs": [{ "type": "job", "name": "ls", "id": "ID0000001", "arguments": [], "uses": [], }], "jobDependencies": [], } assert expected == result wf_path.unlink() def test_write_valid_hierarchical_workflow(self, mocker): mocker.patch("Pegasus.api.workflow.Workflow.write") try: wf = Workflow("test") wf.add_jobs(SubWorkflow("file", False)) wf.write(file="workflow.yml", _format="yml") except PegasusError: pytest.fail("shouldn't have thrown PegasusError") Pegasus.api.workflow.Workflow.write.assert_called_once_with( file="workflow.yml", _format="yml") @pytest.mark.parametrize( "sc, tc", [ (SiteCatalog(), None), (None, TransformationCatalog()), (SiteCatalog(), TransformationCatalog()), ], ) def test_write_hierarchical_workflow_when_catalogs_are_inlined( self, sc, tc): wf = Workflow("test") wf.add_jobs(SubWorkflow("file", False)) if sc: wf.add_site_catalog(sc) if tc: wf.add_transformation_catalog(tc) with pytest.raises(PegasusError) as e: wf.write() assert ( "Site Catalog and Transformation Catalog must be written as a separate" in str(e)) def test_workflow_key_ordering_on_yml_write(self): tc = TransformationCatalog() rc = ReplicaCatalog() sc = SiteCatalog() wf = Workflow("wf") wf.add_transformation_catalog(tc) wf.add_replica_catalog(rc) wf.add_site_catalog(sc) wf.add_jobs(Job("t1", _id="a")) wf.add_env(JAVA_HOME="/java/home") wf.add_shell_hook(EventType.START, "/bin/echo hi") wf.add_metadata(key="value") wf.write() EXPECTED_FILE = Path("workflow.yml") with EXPECTED_FILE.open() as f: # reading in as str so ordering of keys is not disrupted # when loaded into a dict result = f.read() EXPECTED_FILE.unlink() """ Check that wf keys have been ordered as follows (while ignoring nested keys): - pegasus, - name, - hooks, - profiles, - metadata, - siteCatalog, - replicaCatalog, - transformationCatalog, - jobs - jobDependencies """ p = re.compile( r"pegasus: '5.0'[\w\W]+name:[\w\W]+hooks:[\w\W]+profiles:[\w\W]+metadata:[\w\W]+siteCatalog:[\w\W]+replicaCatalog:[\w\W]+transformationCatalog:[\w\W]+jobs:[\w\W]+jobDependencies:[\w\W]+" ) assert p.match(result) is not None def test_plan_workflow_already_written(self, wf, mocker): mocker.patch("shutil.which", return_value="/usr/bin/pegasus-version") mocker.patch("Pegasus.client._client.Client.plan") path = "wf.yml" wf.write(path).plan() assert wf._path == path Pegasus.client._client.Client.plan.assert_called_once_with( path, cleanup="none", conf=None, dir=None, force=False, input_dirs=None, output_dir=None, output_sites=["local"], relative_dir=None, sites=None, staging_sites=None, submit=False, verbose=0, ) os.remove(path) def test_plan_workflow_not_written(self, wf, mocker): mocker.patch("shutil.which", return_value="/usr/bin/pegasus-version") mocker.patch("Pegasus.client._client.Client.plan") DEFAULT_WF_PATH = "workflow.yml" wf.plan() assert wf._path == DEFAULT_WF_PATH Pegasus.client._client.Client.plan.assert_called_once_with( DEFAULT_WF_PATH, cleanup="none", conf=None, dir=None, force=False, input_dirs=None, output_dir=None, output_sites=["local"], relative_dir=None, sites=None, staging_sites=None, submit=False, verbose=0, ) os.remove(DEFAULT_WF_PATH) def test_run(self, wf, mocker): mocker.patch("Pegasus.client._client.Client.run") mocker.patch("shutil.which", return_value="/usr/bin/pegasus-version") wf.run() Pegasus.client._client.Client.run.assert_called_once_with(None, verbose=0) def test_status(self, wf, mocker): mocker.patch("Pegasus.client._client.Client.status") mocker.patch("shutil.which", return_value="/usr/bin/pegasus-version") wf._submit_dir = "submit_dir" wf.status() Pegasus.client._client.Client.status.assert_called_once_with( wf._submit_dir, long=0, verbose=0) def test_remove(self, wf, mocker): mocker.patch("Pegasus.client._client.Client.remove") mocker.patch("shutil.which", return_value="/usr/bin/pegasus-version") wf._submit_dir = "submit_dir" wf.remove() Pegasus.client._client.Client.remove.assert_called_once_with( wf._submit_dir, verbose=0) def test_analyze(self, wf, mocker): mocker.patch("Pegasus.client._client.Client.analyzer") mocker.patch("shutil.which", return_value="/usr/bin/pegasus-version") wf._submit_dir = "submit_dir" wf.analyze() Pegasus.client._client.Client.analyzer.assert_called_once_with( wf._submit_dir, verbose=0) def test_statistics(self, wf, mocker): mocker.patch("Pegasus.client._client.Client.statistics") mocker.patch("shutil.which", return_value="/usr/bin/pegasus-version") wf._submit_dir = "submit_dir" wf.statistics() Pegasus.client._client.Client.statistics.assert_called_once_with( wf._submit_dir, verbose=0)
def test_eq_invalid(self): with pytest.raises(ValueError) as e: _Use(File("a"), _LinkType.INPUT) == "use" assert "_Use cannot be compared with" in str(e)
def test_tojson_no_mixins(self): j = Job("t1", namespace="ns", node_label="label", _id="id", version="1") j.set_stdin("stdin") j.set_stdout("stdout") j.set_stderr("stderr") j.add_args("-i", File("f1"), "-n", 1, 1.1) j.add_inputs(File("if1"), File("if2")) j.add_outputs(File("of1"), File("of2")) result = json.loads(json.dumps(j, cls=_CustomEncoder)) result["uses"] = sorted(result["uses"], key=lambda use: use["lfn"]) expected = { "type": "job", "name": "t1", "namespace": "ns", "id": "id", "nodeLabel": "label", "version": "1", "arguments": ["-i", "f1", "-n", 1, 1.1], "stdin": "stdin", "stdout": "stdout", "stderr": "stderr", "uses": [ { "lfn": "stdin", "type": "input" }, { "lfn": "if1", "type": "input" }, { "lfn": "if2", "type": "input" }, { "lfn": "stdout", "type": "output", "stageOut": True, "registerReplica": False, }, { "lfn": "stderr", "type": "output", "stageOut": True, "registerReplica": False, }, { "lfn": "of1", "type": "output", "stageOut": True, "registerReplica": False, }, { "lfn": "of2", "type": "output", "stageOut": True, "registerReplica": False, }, ], } expected["uses"] = sorted(expected["uses"], key=lambda use: use["lfn"]) assert result == expected
def test_tojson_no_metadata(self, lfn, size, expected): assert File(lfn, size).__json__() == expected
class Test_Use: def test_valid_use(self): assert _Use(File("a"), _LinkType.INPUT) def test_invalid_use_bad_file(self): with pytest.raises(TypeError) as e: _Use(123, _LinkType.INPUT) assert "invalid file: 123; file must be of type File" in str(e) def test_invalid_use_bad_link_type(self): with pytest.raises(TypeError) as e: _Use(File("a"), "link") assert "invalid link_type: link;" in str(e) def test_eq(self): assert _Use(File("a"), _LinkType.INPUT) == _Use(File("a"), _LinkType.OUTPUT) assert _Use(File("a"), _LinkType.INPUT) != _Use( File("b"), _LinkType.INPUT) def test_eq_invalid(self): with pytest.raises(ValueError) as e: _Use(File("a"), _LinkType.INPUT) == "use" assert "_Use cannot be compared with" in str(e) @pytest.mark.parametrize( "use, expected", [ ( _Use(File("a"), _LinkType.INPUT, stage_out=None, register_replica=False), { "lfn": "a", "type": "input", "registerReplica": False, }, ), ( _Use(File("a"), _LinkType.INPUT, stage_out=None, register_replica=None), { "lfn": "a", "type": "input", }, ), ( _Use( File("a", size=2048).add_metadata(createdBy="ryan"), _LinkType.OUTPUT, stage_out=False, register_replica=True, ), { "lfn": "a", "size": 2048, "metadata": { "createdBy": "ryan" }, "type": "output", "stageOut": False, "registerReplica": True, }, ), ( _Use( File("a", size=1024), _LinkType.CHECKPOINT, stage_out=True, register_replica=True, ), { "lfn": "a", "size": 1024, "type": "checkpoint", "stageOut": True, "registerReplica": True, }, ), ], ) def test_tojson(self, use, expected): result = json.loads(json.dumps(use, cls=_CustomEncoder)) assert result == expected
def test_eq(self): assert File("a") == File("a") assert File("a") != File("b") assert File("a") != 1
def test_invalid_use_bad_link_type(self): with pytest.raises(TypeError) as e: _Use(File("a"), "link") assert "invalid link_type: link;" in str(e)