def test_add_transformation_catalog(self): tc = TransformationCatalog() wf = Workflow("wf") try: wf.add_transformation_catalog(tc) except: pytest.fail("should not have raised exception")
def test_add_replica_catalog(self): rc = ReplicaCatalog() wf = Workflow("wf") try: wf.add_replica_catalog(rc) except: pytest.fail("should not have raised exception")
def wf3(): wf = Workflow("test") wf.add_jobs(Job("ls")) wf.add_site_catalog(SiteCatalog()) wf.add_transformation_catalog(TransformationCatalog()) wf.add_replica_catalog(ReplicaCatalog()) return wf
def test_add_dependency_invalid_job(self): wf = Workflow("wf") job = Job("t") with pytest.raises(ValueError) as e: wf.add_dependency(job) assert "The given job does not have an id" in str(e)
def test_add_site_catalog(self): sc = SiteCatalog() wf = Workflow("wf") try: wf.add_site_catalog(sc) except: pytest.fail("should not have raised exception")
def test_add_dependency_invalid_parent(self): wf = Workflow("wf") job = Job("t", _id="job") parent = Job("t") with pytest.raises(ValueError) as e: wf.add_dependency(job, parents=[parent]) assert "One of the given parents does not have an id" in str(e)
def test_add_dependency_invalid_child(self): wf = Workflow("wf") job = Job("t", _id="job") child = Job("t") with pytest.raises(ValueError) as e: wf.add_dependency(job, children=[child]) assert "One of the given children does not have an id" in str(e)
def test_job_id_assignment_by_workflow(self): wf = Workflow("wf") j1 = Job("t1", _id="a") j2 = Job("t2") j3 = Job("t3", _id="b") j4 = Job("t4") j5 = Job("t5") wf.add_jobs(j1, j2, j3, j4, j5) assert j2._id == "ID0000001" assert j4._id == "ID0000002" assert j5._id == "ID0000003"
def dump(obj: Workflow, fp: TextIO, _format="yml", *args, **kwargs) -> None: """ Serialize ``obj`` as a :py:class:`~Pegasus.api.worklfow.Workflow` formatted stream to ``fp`` (a ``.write()``-supporting file-like object). :param obj: Workflow to serialize :type obj: Workflow :param fp: file like object to serialize to :type fp: TextIO :param _format: format to write to if fp does not have an extension; can be one of ["yml" | "yaml" | "json"], defaults to "yml" :type _format: str :rtype: NoReturn """ obj.write(fp, _format=_format)
def dumps(obj: Workflow, _format="yml", *args, **kwargs) -> str: """ Serialize ``obj`` to a :py:class:`~Pegasus.api.workflow.Workflow` formatted ``str``. :param obj: Workflow to serialize :type obj: Workflow :param _format: format to write to if fp does not have an extension; can be one of ["yml" | "yaml" | "json"], defaults to "yml" :type _format: str :return: Workflow serialized as a string :rtype: str """ with StringIO() as s: obj.write(s, _format=_format) s.seek(0) return s.read()
def wf2(): _in = File("in") stdin = File("stdin") stdout = File("stdout") stderr = File("stderr") out = File("out") out2 = File("out2") pegasus_workflow = File("pegasus_workflow") condor_workflow = File("condor_workflow") j1 = (Job("tr", _id="1", node_label="test").add_args( "-i", _in, "-o", out).add_inputs(_in).add_outputs(out).set_stdin( stdin).set_stdout(stdout).set_stderr(stderr)) j2 = (Job("tr2", _id="2", node_label="test").add_args( "-i", out, "-o", out2).add_inputs(out).add_outputs(out2)) sbwf_pegasus = SubWorkflow(pegasus_workflow, False, _id="unplanned", node_label="test").add_args("-flag", "-flag2") sbwf_condor = SubWorkflow(condor_workflow, True, _id="planned", node_label="test") return (Workflow("test", infer_dependencies=False).add_jobs( j1, j2, sbwf_pegasus, sbwf_condor).add_dependency(j1, children=[j2]))
def test_write_hierarchical_workflow_when_catalogs_are_inlined( self, sc, tc): wf = Workflow("test") wf.add_jobs(SubWorkflow("file", False)) if sc: wf.add_site_catalog(sc) if tc: wf.add_transformation_catalog(tc) with pytest.raises(PegasusError) as e: wf.write() assert ( "Site Catalog and Transformation Catalog must be written as a separate" in str(e))
def test_add_duplicate_transformation_catalog(self): tc = TransformationCatalog() wf = Workflow("wf") wf.add_transformation_catalog(tc) with pytest.raises(DuplicateError) as e: wf.add_transformation_catalog(tc) assert "a TransformationCatalog has already" in str(e)
def test_add_duplicate_replica_catalog(self): rc = ReplicaCatalog() wf = Workflow("wf") wf.add_replica_catalog(rc) with pytest.raises(DuplicateError) as e: wf.add_replica_catalog(rc) assert "a ReplicaCatalog has already" in str(e)
def test_add_duplicate_site_catalog(self): sc = SiteCatalog() wf = Workflow("wf") wf.add_site_catalog(sc) with pytest.raises(DuplicateError) as e: wf.add_site_catalog(sc) assert "a SiteCatalog has already" in str(e)
def test_add_dependency_parents(self): wf = Workflow("wf") job = Job("t", _id="job") parents = [ Job("t", _id="parent1"), Job("t", _id="parent2"), Job("t", _id="parent3"), ] wf.add_jobs(job, *parents) wf.add_dependency(job, parents=[parents[0]]) wf.add_dependency(job, parents=parents[1:]) for parent in parents: assert wf.dependencies[parent._id] == _JobDependency( parent._id, {job._id})
def test_write_valid_hierarchical_workflow(self, mocker): mocker.patch("Pegasus.api.workflow.Workflow.write") try: wf = Workflow("test") wf.add_jobs(SubWorkflow("file", False)) wf.write(file="workflow.yml", _format="yml") except PegasusError: pytest.fail("shouldn't have thrown PegasusError") Pegasus.api.workflow.Workflow.write.assert_called_once_with( file="workflow.yml", _format="yml")
def test_add_dependency_children(self): wf = Workflow("wf") job = Job("t", _id="job") children = [ Job("t", _id="child1"), Job("t", _id="child2"), Job("t", _id="child3"), ] wf.add_jobs(job, *children) wf.add_dependency(job, children=[children[0]]) assert wf.dependencies[job._id] == _JobDependency( job._id, {children[0]._id}) wf.add_dependency(job, children=children[1:]) assert wf.dependencies[job._id] == _JobDependency( job._id, {child._id for child in children})
def test_add_duplicate_parent_dependency(self): wf = Workflow("wf") job = Job("t", _id="job") parent = Job("t", _id="parent") wf.add_jobs(job, parent) with pytest.raises(DuplicateError) as e: wf.add_dependency(job, parents=[parent, parent]) assert ( "A dependency already exists between parent id: parent and job id: job" in str(e))
def test_add_duplicate_child_dependency(self): wf = Workflow("wf") job = Job("t", _id="job") child = Job("t", _id="child") wf.add_jobs(job, child) with pytest.raises(DuplicateError) as e: wf.add_dependency(job, children=[child, child]) assert ( "A dependency already exists between job id: job and child id: child" in str(e))
def test_infer_dependencies_when_job_uses_stdin_stdout_and_stderr(self): wf = Workflow("wf") j1 = Job("t1", _id="j1").add_outputs(File("f1")) j2 = Job("t1", _id="j2").set_stdin(*j1.get_outputs()).set_stdout(File("f2")) j3 = Job("t1", _id="j3").add_inputs(*j2.get_outputs()) wf.add_jobs(j1, j2, j3) # manually call _infer_dependencies() as it is only called when # wf.write() is called wf._infer_dependencies() assert wf.dependencies["j1"] == _JobDependency("j1", {"j2"}) assert wf.dependencies["j2"] == _JobDependency("j2", {"j3"})
def test_add_dependency_parents_and_children(self): wf = Workflow("wf") job = Job("t", _id="job") parents = [Job("t", _id="parent1"), Job("t", _id="parent2")] children = [Job("t", _id="child1"), Job("t", _id="child2")] wf.add_jobs(*parents, *children) # add nothing wf.add_dependency(job) assert len(wf.dependencies) == 0 wf.add_dependency(job, parents=parents, children=children) for parent in parents: assert wf.dependencies[parent._id] == _JobDependency( parent._id, {job._id}) assert wf.dependencies[job._id] == _JobDependency( job._id, {child._id for child in children})
def wf1(): in1 = File("in1", size=2048).add_metadata(createdBy="ryan") in2 = File("in2") stdin = File("stdin").add_metadata(size=1024) stdout = File("stdout").add_metadata(size=1024) stderr = File("stderr").add_metadata(size=1024) out = File("out").add_metadata(size=1024) out2 = File("out2").add_metadata(size=1024) pegasus_workflow = File("pegasus_workflow").add_metadata(size=2048) condor_workflow = File("condor_workflow").add_metadata(size=2048) j1 = (Job("tr", _id="1", node_label="test").add_args( "-i", in1, "-o", out, "-n", 1, 1.1).add_inputs(in1).add_inputs( in2, bypass_staging=True).add_outputs(out).set_stdin( stdin).set_stdout(stdout).set_stderr(stderr).add_shell_hook( EventType.START, "/cmd2").add_env(JAVA_HOME="/usr/bin/java").add_metadata( xtra_info="123")) j2 = (Job("tr2", _id="2", node_label="test").add_args( "-i", out, "-o", out2).add_inputs(out).add_outputs(out2)) sbwf_pegasus = SubWorkflow(pegasus_workflow, False, _id="unplanned", node_label="test").add_args("-flag", "-flag2") sbwf_condor = SubWorkflow(condor_workflow, True, _id="planned", node_label="test") return (Workflow("test", infer_dependencies=False).add_shell_hook( EventType.START, "/cmd").add_dagman_profile(retry=1).add_metadata( author="ryan").add_jobs(j1, j2, sbwf_pegasus, sbwf_condor).add_dependency(j1, children=[j2]))
def test_infer_dependencies_fork_join_wf(self): wf = Workflow("wf") f1 = File("f1") f2 = File("f2") f3 = File("f3") f4 = File("f4") fork = Job("t1", _id="fork").add_outputs(f1, f2) work1 = Job("t1", _id="work1").add_inputs(f1).add_outputs(f3) work2 = Job("t1", _id="work2").add_inputs(f2).add_outputs(f4) join = Job("t1", _id="join").add_inputs(f3, f4) wf.add_jobs(fork, work1, work2, join) # manually call _infer_dependencies() as it is only called when # wf.write() is called wf._infer_dependencies() assert wf.dependencies["fork"] == _JobDependency( "fork", {"work1", "work2"}) assert wf.dependencies["work1"] == _JobDependency("work1", {"join"}) assert wf.dependencies["work2"] == _JobDependency("work2", {"join"})
def test_add_invalid_transformation_catalog(self): wf = Workflow("wf") with pytest.raises(TypeError) as e: wf.add_transformation_catalog(123) assert "invalid catalog: 123" in str(e)
def _to_wf(d: dict) -> Workflow: """Convert dict to Workflow :param d: Workflow represented as a dict :type d: dict :raises PegasusError: encountered error parsing :return: a Workflow object based on d :rtype: Workflow """ try: # wf = Workflow(d["name"], infer_dependencies=False) # add rc if "replicaCatalog" in d: wf.replica_catalog = _to_rc(d["replicaCatalog"]) # add tc if "transformationCatalog" in d: wf.transformation_catalog = _to_tc(d["transformationCatalog"]) # add sc if "siteCatalog" in d: wf.site_catalog = _to_sc(d["siteCatalog"]) # add jobs for j in d["jobs"]: # create appropriate job based on type if j["type"] == "job": job = Job( j["name"], _id=j["id"], node_label=j.get("nodeLabel"), namespace=j.get("namespace"), version=j.get("version"), ) elif j["type"] in {"pegasusWorkflow", "condorWorkflow"}: f = File(j["file"]) is_planned = False if j["type"] == "pegasusWorkflow" else True job = SubWorkflow( f, is_planned, _id=j["id"], node_label=j.get("nodeLabel") ) else: raise ValueError # add args args = list() for a in j["arguments"]: args.append(a) job.args = args # add uses uses = set() for u in j["uses"]: f = File(u["lfn"], size=u.get("size")) try: f.metadata = u["metadata"] except KeyError: pass uses.add( _Use( f, getattr(_LinkType, u["type"].upper()), stage_out=u.get("stageOut"), register_replica=u.get("registerReplica"), bypass_staging=u.get("bypass"), ) ) job.uses = uses # set stdin if "stdin" in j: for u in job.uses: if u.file.lfn == j["stdin"]: job.stdin = u.file break # set stdout if "stdout" in j: for u in job.uses: if u.file.lfn == j["stdout"]: job.stdout = u.file break # set stderr if "stderr" in j: for u in job.uses: if u.file.lfn == j["stderr"]: job.stderr = u.file break # add profiles if j.get("profiles"): job.profiles = defaultdict(dict, j.get("profiles")) # add metadata if j.get("metadata"): job.metadata = j.get("metadata") # add hooks if j.get("hooks"): job.hooks = defaultdict(list, j.get("hooks")) # add job to wf wf.add_jobs(job) # add dependencies if d.get("jobDependencies"): dependencies = defaultdict(_JobDependency) for item in d.get("jobDependencies"): dependencies[item["id"]] = _JobDependency( item["id"], {child for child in item["children"]} ) wf.dependencies = dependencies # add profiles if d.get("profiles"): wf.profiles = defaultdict(dict, d.get("profiles")) # add metadata if d.get("metadata"): wf.metadata = d.get("metadata") # add hooks if d.get("hooks"): wf.hooks = defaultdict(list, d.get("hooks")) return wf except (KeyError, ValueError): raise PegasusError("error parsing {}".format(d))
def test_get_job(self): wf = Workflow("wf") j1 = Job("t1", _id="j1") wf.add_jobs(j1) assert j1 == wf.get_job("j1")
def test_add_job(self, job): wf = Workflow("wf") wf.add_jobs(job) assert job == wf.get_job("job")
def test_add_duplicate_job(self): wf = Workflow("wf") with pytest.raises(DuplicateError): wf.add_jobs(Job("t1", _id="j1"), Job("t2", _id="j1"))
def test_get_invalid_job(self): wf = Workflow("wf") with pytest.raises(NotFoundError): wf.get_job("abc123")