def wf3(): wf = Workflow("test") wf.add_jobs(Job("ls")) wf.add_site_catalog(SiteCatalog()) wf.add_transformation_catalog(TransformationCatalog()) wf.add_replica_catalog(ReplicaCatalog()) return wf
def test_job_id_assignment_by_workflow(self): wf = Workflow("wf") j1 = Job("t1", _id="a") j2 = Job("t2") j3 = Job("t3", _id="b") j4 = Job("t4") j5 = Job("t5") wf.add_jobs(j1, j2, j3, j4, j5) assert j2._id == "ID0000001" assert j4._id == "ID0000002" assert j5._id == "ID0000003"
def test_write_valid_hierarchical_workflow(self, mocker): mocker.patch("Pegasus.api.workflow.Workflow.write") try: wf = Workflow("test") wf.add_jobs(SubWorkflow("file", False)) wf.write(file="workflow.yml", _format="yml") except PegasusError: pytest.fail("shouldn't have thrown PegasusError") Pegasus.api.workflow.Workflow.write.assert_called_once_with( file="workflow.yml", _format="yml")
def test_add_duplicate_parent_dependency(self): wf = Workflow("wf") job = Job("t", _id="job") parent = Job("t", _id="parent") wf.add_jobs(job, parent) with pytest.raises(DuplicateError) as e: wf.add_dependency(job, parents=[parent, parent]) assert ( "A dependency already exists between parent id: parent and job id: job" in str(e))
def test_add_duplicate_child_dependency(self): wf = Workflow("wf") job = Job("t", _id="job") child = Job("t", _id="child") wf.add_jobs(job, child) with pytest.raises(DuplicateError) as e: wf.add_dependency(job, children=[child, child]) assert ( "A dependency already exists between job id: job and child id: child" in str(e))
def test_infer_dependencies_when_job_uses_stdin_stdout_and_stderr(self): wf = Workflow("wf") j1 = Job("t1", _id="j1").add_outputs(File("f1")) j2 = Job("t1", _id="j2").set_stdin(*j1.get_outputs()).set_stdout(File("f2")) j3 = Job("t1", _id="j3").add_inputs(*j2.get_outputs()) wf.add_jobs(j1, j2, j3) # manually call _infer_dependencies() as it is only called when # wf.write() is called wf._infer_dependencies() assert wf.dependencies["j1"] == _JobDependency("j1", {"j2"}) assert wf.dependencies["j2"] == _JobDependency("j2", {"j3"})
def test_add_dependency_parents(self): wf = Workflow("wf") job = Job("t", _id="job") parents = [ Job("t", _id="parent1"), Job("t", _id="parent2"), Job("t", _id="parent3"), ] wf.add_jobs(job, *parents) wf.add_dependency(job, parents=[parents[0]]) wf.add_dependency(job, parents=parents[1:]) for parent in parents: assert wf.dependencies[parent._id] == _JobDependency( parent._id, {job._id})
def test_write_hierarchical_workflow_when_catalogs_are_inlined( self, sc, tc): wf = Workflow("test") wf.add_jobs(SubWorkflow("file", False)) if sc: wf.add_site_catalog(sc) if tc: wf.add_transformation_catalog(tc) with pytest.raises(PegasusError) as e: wf.write() assert ( "Site Catalog and Transformation Catalog must be written as a separate" in str(e))
def test_add_dependency_children(self): wf = Workflow("wf") job = Job("t", _id="job") children = [ Job("t", _id="child1"), Job("t", _id="child2"), Job("t", _id="child3"), ] wf.add_jobs(job, *children) wf.add_dependency(job, children=[children[0]]) assert wf.dependencies[job._id] == _JobDependency( job._id, {children[0]._id}) wf.add_dependency(job, children=children[1:]) assert wf.dependencies[job._id] == _JobDependency( job._id, {child._id for child in children})
def test_workflow_key_ordering_on_yml_write(self): tc = TransformationCatalog() rc = ReplicaCatalog() sc = SiteCatalog() wf = Workflow("wf") wf.add_transformation_catalog(tc) wf.add_replica_catalog(rc) wf.add_site_catalog(sc) wf.add_jobs(Job("t1", _id="a")) wf.add_env(JAVA_HOME="/java/home") wf.add_shell_hook(EventType.START, "/bin/echo hi") wf.add_metadata(key="value") wf.write() EXPECTED_FILE = Path("workflow.yml") with EXPECTED_FILE.open() as f: # reading in as str so ordering of keys is not disrupted # when loaded into a dict result = f.read() EXPECTED_FILE.unlink() """ Check that wf keys have been ordered as follows (while ignoring nested keys): - pegasus, - name, - hooks, - profiles, - metadata, - siteCatalog, - replicaCatalog, - transformationCatalog, - jobs - jobDependencies """ p = re.compile( r"pegasus: '5.0'[\w\W]+name:[\w\W]+hooks:[\w\W]+profiles:[\w\W]+metadata:[\w\W]+siteCatalog:[\w\W]+replicaCatalog:[\w\W]+transformationCatalog:[\w\W]+jobs:[\w\W]+jobDependencies:[\w\W]+" ) assert p.match(result) is not None
def test_write_wf_catalogs_included(self): wf = Workflow("test") wf.add_jobs(Job("ls")) wf.add_transformation_catalog(TransformationCatalog()) wf.add_site_catalog(SiteCatalog()) wf.add_replica_catalog(ReplicaCatalog()) wf_path = Path("workflow.yml") with wf_path.open("w+") as f: wf.write(f) f.seek(0) result = yaml.load(f) expected = { "pegasus": "5.0", "name": "test", "siteCatalog": { "sites": [] }, "replicaCatalog": { "replicas": [] }, "transformationCatalog": { "transformations": [] }, "jobs": [{ "type": "job", "name": "ls", "id": "ID0000001", "arguments": [], "uses": [], }], "jobDependencies": [], } assert expected == result wf_path.unlink()
def wf(): wf = Workflow("wf") j1 = (Job("t1", _id="a").add_outputs(File("f1"), File("f2")).add_args( File("do-nothing"), "-n", 1, 1.1).set_stdin("stdin").set_stdout("stdout").set_stderr("stderr")) j2 = (Job("t1", _id="b").add_inputs(File("f1"), File("f2")).add_checkpoint( File("checkpoint"))) j3 = SubWorkflow("subworkflow.dag", True, _id="c").add_args("--sites", "condorpool") j4 = SubWorkflow(File("subworkflow.dax"), False, _id="d") wf.add_jobs(j1, j2, j3, j4) wf._infer_dependencies() wf.add_env(JAVA_HOME="/java/home") wf.add_shell_hook(EventType.START, "/bin/echo hi") wf.add_metadata(key="value") return wf
def test_add_dependency_parents_and_children(self): wf = Workflow("wf") job = Job("t", _id="job") parents = [Job("t", _id="parent1"), Job("t", _id="parent2")] children = [Job("t", _id="child1"), Job("t", _id="child2")] wf.add_jobs(*parents, *children) # add nothing wf.add_dependency(job) assert len(wf.dependencies) == 0 wf.add_dependency(job, parents=parents, children=children) for parent in parents: assert wf.dependencies[parent._id] == _JobDependency( parent._id, {job._id}) assert wf.dependencies[job._id] == _JobDependency( job._id, {child._id for child in children})
def test_infer_dependencies_fork_join_wf(self): wf = Workflow("wf") f1 = File("f1") f2 = File("f2") f3 = File("f3") f4 = File("f4") fork = Job("t1", _id="fork").add_outputs(f1, f2) work1 = Job("t1", _id="work1").add_inputs(f1).add_outputs(f3) work2 = Job("t1", _id="work2").add_inputs(f2).add_outputs(f4) join = Job("t1", _id="join").add_inputs(f3, f4) wf.add_jobs(fork, work1, work2, join) # manually call _infer_dependencies() as it is only called when # wf.write() is called wf._infer_dependencies() assert wf.dependencies["fork"] == _JobDependency( "fork", {"work1", "work2"}) assert wf.dependencies["work1"] == _JobDependency("work1", {"join"}) assert wf.dependencies["work2"] == _JobDependency("work2", {"join"})
def test_get_job(self): wf = Workflow("wf") j1 = Job("t1", _id="j1") wf.add_jobs(j1) assert j1 == wf.get_job("j1")
def test_add_job(self, job): wf = Workflow("wf") wf.add_jobs(job) assert job == wf.get_job("job")
def _to_wf(d: dict) -> Workflow: """Convert dict to Workflow :param d: Workflow represented as a dict :type d: dict :raises PegasusError: encountered error parsing :return: a Workflow object based on d :rtype: Workflow """ try: # wf = Workflow(d["name"], infer_dependencies=False) # add rc if "replicaCatalog" in d: wf.replica_catalog = _to_rc(d["replicaCatalog"]) # add tc if "transformationCatalog" in d: wf.transformation_catalog = _to_tc(d["transformationCatalog"]) # add sc if "siteCatalog" in d: wf.site_catalog = _to_sc(d["siteCatalog"]) # add jobs for j in d["jobs"]: # create appropriate job based on type if j["type"] == "job": job = Job( j["name"], _id=j["id"], node_label=j.get("nodeLabel"), namespace=j.get("namespace"), version=j.get("version"), ) elif j["type"] in {"pegasusWorkflow", "condorWorkflow"}: f = File(j["file"]) is_planned = False if j["type"] == "pegasusWorkflow" else True job = SubWorkflow( f, is_planned, _id=j["id"], node_label=j.get("nodeLabel") ) else: raise ValueError # add args args = list() for a in j["arguments"]: args.append(a) job.args = args # add uses uses = set() for u in j["uses"]: f = File(u["lfn"], size=u.get("size")) try: f.metadata = u["metadata"] except KeyError: pass uses.add( _Use( f, getattr(_LinkType, u["type"].upper()), stage_out=u.get("stageOut"), register_replica=u.get("registerReplica"), bypass_staging=u.get("bypass"), ) ) job.uses = uses # set stdin if "stdin" in j: for u in job.uses: if u.file.lfn == j["stdin"]: job.stdin = u.file break # set stdout if "stdout" in j: for u in job.uses: if u.file.lfn == j["stdout"]: job.stdout = u.file break # set stderr if "stderr" in j: for u in job.uses: if u.file.lfn == j["stderr"]: job.stderr = u.file break # add profiles if j.get("profiles"): job.profiles = defaultdict(dict, j.get("profiles")) # add metadata if j.get("metadata"): job.metadata = j.get("metadata") # add hooks if j.get("hooks"): job.hooks = defaultdict(list, j.get("hooks")) # add job to wf wf.add_jobs(job) # add dependencies if d.get("jobDependencies"): dependencies = defaultdict(_JobDependency) for item in d.get("jobDependencies"): dependencies[item["id"]] = _JobDependency( item["id"], {child for child in item["children"]} ) wf.dependencies = dependencies # add profiles if d.get("profiles"): wf.profiles = defaultdict(dict, d.get("profiles")) # add metadata if d.get("metadata"): wf.metadata = d.get("metadata") # add hooks if d.get("hooks"): wf.hooks = defaultdict(list, d.get("hooks")) return wf except (KeyError, ValueError): raise PegasusError("error parsing {}".format(d))
def test_add_duplicate_job(self): wf = Workflow("wf") with pytest.raises(DuplicateError): wf.add_jobs(Job("t1", _id="j1"), Job("t2", _id="j1"))