def test_add_replica_str_as_lfn(self): rc = ReplicaCatalog() rc.add_replica("local", "f.a", "/f.a") assert _tojson(rc.entries[("f.a", False)]) == { "lfn": "f.a", "pfns": [{ "site": "local", "pfn": "/f.a" }], }
def test_add_regex_replica(self): rc = ReplicaCatalog() rc.add_regex_replica("local", "*.txt", "/path") assert _tojson(rc.entries[("*.txt", True)]) == { "lfn": "*.txt", "pfns": [{ "site": "local", "pfn": "/path" }], "regex": True, }
def dumps(obj: ReplicaCatalog, _format="yml", *args, **kwargs) -> str: """ Serialize ``obj`` to a :py:class:`~Pegasus.api.replica_catalog.ReplicaCatalog` formatted ``str``. :param obj: ReplicaCatalog to serialize :type obj: ReplicaCatalog :param _format: format to write to if fp does not have an extension; can be one of ["yml" | "yaml" | "json"], defaults to "yml" :type _format: str :return: ReplicaCatalog serialized as a string :rtype: str """ with StringIO() as s: obj.write(s, _format=_format) s.seek(0) return s.read()
def test_dump(mocker): mocker.patch("Pegasus.api.writable.Writable.write") rc = ReplicaCatalog() with NamedTemporaryFile(mode="w") as f: dump(rc, f, _format="yml") Pegasus.api.writable.Writable.write.assert_called_once_with( f, _format="yml")
def wf3(): wf = Workflow("test") wf.add_jobs(Job("ls")) wf.add_site_catalog(SiteCatalog()) wf.add_transformation_catalog(TransformationCatalog()) wf.add_replica_catalog(ReplicaCatalog()) return wf
def test_add_replica_catalog(self): rc = ReplicaCatalog() wf = Workflow("wf") try: wf.add_replica_catalog(rc) except: pytest.fail("should not have raised exception")
def dump(obj: ReplicaCatalog, fp: TextIO, _format="yml", *args, **kwargs) -> None: """ Serialize ``obj`` as a :py:class:`~Pegasus.api.replica_catalog.ReplicaCatalog` formatted stream to ``fp`` (a ``.write()``-supporting file-like object). :param obj: ReplicaCatalog to serialize :type obj: ReplicaCatalog :param fp: file like object to serialize to :type fp: TextIO :param _format: format to write to if fp does not have an extension; can be one of ["yml" | "yaml" | "json"], defaults to "yml" :type _format: str :rtype: NoReturn """ obj.write(fp, _format=_format)
def test_write_default(self): expected_file = Path("replicas.yml") ReplicaCatalog().write() try: expected_file.unlink() except FileNotFoundError: pytest.fail("could not find {}".format(expected_file))
def test_add_regex_replica_with_metadata(self): rc = ReplicaCatalog() rc.add_regex_replica("local", "*.txt", "/path", metadata={"creator": "ryan"}) assert _tojson(rc.entries[("*.txt", True)]) == { "lfn": "*.txt", "pfns": [{ "site": "local", "pfn": "/path" }], "metadata": { "creator": "ryan" }, "regex": True, }
def test_add_duplicate_replica_catalog(self): rc = ReplicaCatalog() wf = Workflow("wf") wf.add_replica_catalog(rc) with pytest.raises(DuplicateError) as e: wf.add_replica_catalog(rc) assert "a ReplicaCatalog has already" in str(e)
def test_dumps(rc_as_dict): rc = ReplicaCatalog() rc.add_replica("local", "a", "/a", regex=True) rc.add_replica("local", "b", "/b") rc.add_replica("local", "c", "/c", checksum_type="sha256", checksum_value="abc123") rc_as_dict["replicas"] = sorted(rc_as_dict["replicas"], key=lambda r: r["lfn"]) result = yaml.load(dumps(rc)) result["replicas"] = sorted(result["replicas"], key=lambda r: r["lfn"]) assert result["replicas"] == rc_as_dict["replicas"]
def test_to_rc(rc_as_dict): expected = ReplicaCatalog() expected.add_replica("local", "a", "/a", regex=True) expected.add_replica("local", "b", "/b") expected.add_replica( "local", "c", "/c", checksum_type="sha256", checksum_value="abc123" ) result = _to_rc(rc_as_dict) assert result.replicas == expected.replicas
def test_add_multiple_replicas(self): rc = ReplicaCatalog() rc.add_replica("local", "f.a", "/f.a") rc.add_replica("local", "f.b", "/f.b") assert _tojson(rc.entries[("f.a", False)]) == { "lfn": "f.a", "pfns": [{ "site": "local", "pfn": "/f.a" }], } assert _tojson(rc.entries[("f.b", False)]) == { "lfn": "f.b", "pfns": [{ "site": "local", "pfn": "/f.b" }], } assert len(rc.entries) == 2
def test_write(self, _format, loader): rc = ReplicaCatalog() rc.add_replica("site1", "lfn1", "pfn1", True).add_replica("site2", "lfn2", "pfn2", True) expected = { "pegasus": PEGASUS_VERSION, "replicas": [ { "lfn": "lfn1", "pfn": "pfn1", "site": "site1", "regex": True }, { "lfn": "lfn2", "pfn": "pfn2", "site": "site2", "regex": True }, ], } expected["replicas"] = sorted(expected["replicas"], key=lambda d: d["lfn"]) with NamedTemporaryFile(mode="r+") as f: rc.write(f, _format=_format) f.seek(0) result = loader(f) result["replicas"] = sorted(result["replicas"], key=lambda d: d["lfn"]) assert result == expected
def test_add_duplicate_regex_replica(self): rc = ReplicaCatalog() rc.add_regex_replica("local", "*.txt", "/path") with pytest.raises(DuplicateError) as e: rc.add_regex_replica("local", "*.txt", "/path") assert "Pattern: *.txt already exists" in str(e)
def _to_rc(d: dict) -> ReplicaCatalog: """Convert dict to ReplicaCatalog :param d: ReplicaCatalog represented as a dict :type d: dict :raises PegasusError: encountered error parsing :return: a ReplicaCatalog object based on d :rtype: ReplicaCatalog """ rc = ReplicaCatalog() try: for r in d["replicas"]: site = r["site"] lfn = r["lfn"] pfn = r["pfn"] regex = r.get("regex") if not regex: regex = False checksum = r.get("checksum") _type = None value = None if checksum: _type = checksum["type"] value = checksum["value"] rc.add_replica(site, lfn, pfn, regex=regex, checksum_type=_type, checksum_value=value) except KeyError: raise PegasusError("error parsing {}".format(d)) return rc
def test_add_duplicate_replica(self): rc = ReplicaCatalog() with pytest.raises(DuplicateError) as e: rc.add_replica("site", "lfn", "pfn", True) rc.add_replica("site", File("lfn"), "pfn", True) assert ( "entry: ReplicaEntry(site=site, lfn=lfn, pfn=pfn, regex=True, checksum_type=None, checksum_value=None)" in str(e))
def rc(): return (ReplicaCatalog().add_replica( "local", "a", "/a", checksum={ "sha256": "abc123" }, metadata={ "key": "value" }).add_replica("condorpool", "a", "/a").add_regex_replica("local", "b*", "/b", metadata={"key": "value"}))
def test_tojson(self, convert_yaml_schemas_to_json, load_schema): rc = ReplicaCatalog() rc.add_replica("site1", "lfn1", "pfn1") rc.add_replica("site2", "lfn2", "pfn2", True) rc.add_replica("site3", "lfn3", "pfn3", checksum_type="sha256", checksum_value="abc123") expected = { "pegasus": PEGASUS_VERSION, "replicas": [ { "lfn": "lfn1", "pfn": "pfn1", "site": "site1" }, { "lfn": "lfn2", "pfn": "pfn2", "site": "site2", "regex": True }, { "lfn": "lfn3", "pfn": "pfn3", "site": "site3", "checksum": { "type": "sha256", "value": "abc123" }, }, ], } expected["replicas"] = sorted(expected["replicas"], key=lambda d: d["lfn"]) result = json.loads(json.dumps(rc, cls=_CustomEncoder)) result["replicas"] = sorted(result["replicas"], key=lambda d: d["lfn"]) rc_schema = load_schema("rc-5.0.json") validate(instance=result, schema=rc_schema) assert result == expected
def test_tojson(self, convert_yaml_schemas_to_json, load_schema): rc = ReplicaCatalog() rc.add_replica( "local", "f.a", "/f.a", checksum={"sha256": "123"}, metadata={ "size": 1024, "㐦": "㐦" }, ) rc.add_regex_replica("local", "*.txt", "/path", metadata={"creator": "ryan"}) result = _tojson(rc) expected = { "pegasus": "5.0", "replicas": [ { "lfn": "f.a", "pfns": [{ "site": "local", "pfn": "/f.a" }], "checksum": { "sha256": "123" }, "metadata": { "size": 1024, "㐦": "㐦" }, }, { "lfn": "*.txt", "pfns": [{ "site": "local", "pfn": "/path" }], "metadata": { "creator": "ryan" }, "regex": True, }, ], } assert result == expected rc_schema = load_schema("rc-5.0.json") validate(instance=result, schema=rc_schema)
def test_replica_catalog_ordering_on_yml_write(self): ReplicaCatalog().write() EXPECTED_FILE = Path("replicas.yml") with EXPECTED_FILE.open() as f: # reading in as str so ordering of keys is not disrupted # when loaded into a dict result = f.read() EXPECTED_FILE.unlink() """ Check that rc keys have been ordered as follows: - pegasus - replicas """ p = re.compile(r"pegasus: '5.0'[\w\W]+replicas:[\w\W]+") assert p.match(result) is not None
def test_add_replica_multiple_pfns(self): rc = ReplicaCatalog() rc.add_replica("local", "f.a", "/f.a") rc.add_replica("condorpool", "f.a", "/f.a") rc.add_replica("condorpool", "f.a", "/f.a") assert len(rc.entries) == 1 assert rc.entries[("f.a", False)].pfns == { _PFN("local", "/f.a"), _PFN("condorpool", "/f.a"), } assert rc.entries[("f.a", False)].metadata == {}
def test_workflow_key_ordering_on_yml_write(self): tc = TransformationCatalog() rc = ReplicaCatalog() sc = SiteCatalog() wf = Workflow("wf") wf.add_transformation_catalog(tc) wf.add_replica_catalog(rc) wf.add_site_catalog(sc) wf.add_jobs(Job("t1", _id="a")) wf.add_env(JAVA_HOME="/java/home") wf.add_shell_hook(EventType.START, "/bin/echo hi") wf.add_metadata(key="value") wf.write() EXPECTED_FILE = Path("workflow.yml") with EXPECTED_FILE.open() as f: # reading in as str so ordering of keys is not disrupted # when loaded into a dict result = f.read() EXPECTED_FILE.unlink() """ Check that wf keys have been ordered as follows (while ignoring nested keys): - pegasus, - name, - hooks, - profiles, - metadata, - siteCatalog, - replicaCatalog, - transformationCatalog, - jobs - jobDependencies """ p = re.compile( r"pegasus: '5.0'[\w\W]+name:[\w\W]+hooks:[\w\W]+profiles:[\w\W]+metadata:[\w\W]+siteCatalog:[\w\W]+replicaCatalog:[\w\W]+transformationCatalog:[\w\W]+jobs:[\w\W]+jobDependencies:[\w\W]+" ) assert p.match(result) is not None
def test_write_wf_catalogs_included(self): wf = Workflow("test") wf.add_jobs(Job("ls")) wf.add_transformation_catalog(TransformationCatalog()) wf.add_site_catalog(SiteCatalog()) wf.add_replica_catalog(ReplicaCatalog()) wf_path = Path("workflow.yml") with wf_path.open("w+") as f: wf.write(f) f.seek(0) result = yaml.load(f) expected = { "pegasus": "5.0", "name": "test", "siteCatalog": { "sites": [] }, "replicaCatalog": { "replicas": [] }, "transformationCatalog": { "transformations": [] }, "jobs": [{ "type": "job", "name": "ls", "id": "ID0000001", "arguments": [], "uses": [], }], "jobDependencies": [], } assert expected == result wf_path.unlink()
def test_add_replica_multiple_pfns_checksums_and_metadata(self): rc = ReplicaCatalog() rc.add_replica( "local", "f.a", "/f.a", checksum={"sha256": "abc"}, metadata={"creator": "ryan"}, ) rc.add_replica( "condorpool", "f.a", "/f.a", metadata={"size": 1024}, ) f_a_entry = rc.entries[("f.a", False)] assert f_a_entry.metadata == {"size": 1024, "creator": "ryan"} assert f_a_entry.checksum == {"sha256": "abc"}
def _to_rc(d: dict) -> ReplicaCatalog: """Convert dict to ReplicaCatalog :param d: ReplicaCatalog represented as a dict :type d: dict :raises PegasusError: encountered error parsing :return: a ReplicaCatalog object based on d :rtype: ReplicaCatalog """ rc = ReplicaCatalog() try: for r in d["replicas"]: lfn = r["lfn"] pfns = {_PFN(i["site"], i["pfn"]) for i in r["pfns"]} checksum = r.get("checksum") if r.get("checksum") else {} metadata = r.get("metadata") if r.get("metadata") else {} regex = r.get("regex") for pfn in pfns: if regex: rc.add_regex_replica(pfn.site, lfn, pfn.pfn, metadata=metadata) else: rc.add_replica(pfn.site, lfn, pfn.pfn, metadata=metadata, checksum=checksum) except KeyError: raise PegasusError("error parsing {}".format(d)) return rc
def test_write(self, _format, loader): rc = ReplicaCatalog() f_a = File("f.a", size=1024).add_metadata(creator="ryan") rc.add_replica( "local", f_a, "/f.a", checksum={"sha256": "123"}, metadata={"extra": "metadata"}, ) rc.add_replica("condorpool", f_a, "/f.a") rc.add_replica("local", "f.b", "/f.b") rc.add_regex_replica("local", "*.txt", "/path", metadata={"creator": "ryan"}) expected = { "pegasus": "5.0", "replicas": [ { "lfn": "f.a", "pfns": [ { "site": "local", "pfn": "/f.a" }, { "site": "condorpool", "pfn": "/f.a" }, ], "checksum": { "sha256": "123" }, "metadata": { "extra": "metadata", "size": 1024, "creator": "ryan" }, }, { "lfn": "f.b", "pfns": [{ "site": "local", "pfn": "/f.b" }], "metadata": { "size": 1024, "creator": "ryan" }, }, { "lfn": "*.txt", "pfns": [{ "site": "local", "pfn": "/path" }], "metadata": { "creator": "ryan" }, "regex": True, }, ], } expected["replicas"][0]["pfns"] = sorted( expected["replicas"][0]["pfns"], key=lambda pfn: pfn["site"]) with NamedTemporaryFile(mode="r+") as f: rc.write(f, _format=_format) f.seek(0) result = loader(f) result["replicas"][0]["pfns"] = sorted(result["replicas"][0]["pfns"], key=lambda pfn: pfn["site"]) assert "createdOn" in result["x-pegasus"] assert result["x-pegasus"]["createdBy"] == getpass.getuser() assert result["x-pegasus"]["apiLang"] == "python" del result["x-pegasus"] assert result == expected
def test_add_replica_with_invalid_checksum(self): rc = ReplicaCatalog() with pytest.raises(ValueError) as e: rc.add_replica("local", "f.a", "/f.a", checksum={"md5": "123"}) assert "Invalid checksum: md5" in str(e)
def test_add_replica_pfn_with_invalid_path_object(self): rc = ReplicaCatalog() with pytest.raises(ValueError) as e: rc.add_replica("local", "f.a", Path("file")) assert "Invalid pfn: file" in str(e)
def test_add_replica_pfn_with_path_obj(self): rc = ReplicaCatalog() rc.add_replica("local", "test_replica_catalog", Path("/file")) assert rc.entries[("test_replica_catalog", False)].pfns.pop().pfn == "/file"