def test_special_cases(fuse_mntdir): snp_dir = Path(fuse_mntdir / "archive" / FAKE_SNP_SPECIAL_CASES_SWHID) snp_meta = get_data_from_web_archive(FAKE_SNP_SPECIAL_CASES_SWHID) for branch_name, branch_meta in snp_meta.items(): curr = snp_dir / branch_name assert curr.is_symlink() if "expected_symlink" in branch_meta: assert os.readlink(curr) == branch_meta["expected_symlink"]
def test_list_parents(fuse_mntdir): rev_meta = get_data_from_web_archive(ROOT_REV) dir_path = fuse_mntdir / "archive" / ROOT_REV / "parents" for i, parent in enumerate(rev_meta["parents"]): parent_path = dir_path / str(i + 1) parent_swhid = f"swh:1:rev:{parent['id']}" assert parent_path.is_symlink() assert os.readlink(parent_path) == f"../../../archive/{parent_swhid}"
def test_list_history(fuse_mntdir): dir_path = fuse_mntdir / "archive" / REV_SMALL_HISTORY / "history" assert os.listdir(dir_path) == ["by-date", "by-hash", "by-page"] history_meta = get_data_from_graph_archive(REV_SMALL_HISTORY, GRAPH_API_REQUEST.HISTORY) history = history_meta.strip() # Only keep second node in the edge because first node is redundant # information or the root node (hence not an ancestor) expected = set( map(CoreSWHID.from_string, [edge.split(" ")[1] for edge in history.split("\n")])) dir_by_hash = dir_path / "by-hash" for swhid in expected: depth1 = hash_to_hex(swhid.object_id)[:2] depth2 = str(swhid) assert (dir_by_hash / depth1).exists() assert depth2 in (os.listdir(dir_by_hash / depth1)) dir_by_page = dir_path / "by-page" for idx, swhid in enumerate(expected): page_number = idx // RevisionHistoryShardByPage.PAGE_SIZE depth1 = RevisionHistoryShardByPage.PAGE_FMT.format( page_number=page_number) depth2 = str(swhid) assert (dir_by_page / depth1).exists() assert depth2 in (os.listdir(dir_by_page / depth1)) dir_by_date = dir_path / "by-date" # Wait max 2 seconds to populate by-date/ dir for i in range(200): entries = os.listdir(dir_by_date) if entries and ".status" not in entries: break time.sleep(0.1) assert not (dir_by_date / ".status").exists() for swhid in expected: meta = get_data_from_web_archive(str(swhid)) date = dateutil.parser.parse(meta["date"]) depth1 = RevisionHistoryShardByDate.DATE_FMT.format(year=date.year, month=date.month, day=date.day) depth2 = str(swhid) assert (dir_by_date / depth1).exists() assert depth2 in (os.listdir(dir_by_date / depth1))
def test_access_meta(fuse_mntdir): file_path = fuse_mntdir / "archive" / ROOT_REV / "meta.json" expected = json.dumps(get_data_from_web_archive(ROOT_REV)) assert file_path.read_text().strip() == expected.strip()
def test_access_cnt_target(fuse_mntdir): target_path = fuse_mntdir / "archive" / REL_TARGET_CNT / "target" expected = get_data_from_web_archive(TARGET_CNT, raw=True) assert target_path.read_text() == expected
def test_cat_file(fuse_mntdir): file_path = fuse_mntdir / "archive" / REGULAR_FILE expected = get_data_from_web_archive(REGULAR_FILE, raw=True) assert file_path.read_text() == expected
def test_list_branches(fuse_mntdir): snp_dir = Path(fuse_mntdir / "archive" / ROOT_SNP) snp_meta = get_data_from_web_archive(ROOT_SNP) for branch_name in snp_meta.keys(): assert (snp_dir / branch_name).is_symlink()
def test_access_meta_file(fuse_mntdir): for swhid in ALL_ENTRIES: # On the fly mounting file_path_meta = fuse_mntdir / f"archive/{swhid}.json" expected = json.dumps(get_data_from_web_archive(swhid)) assert file_path_meta.read_text().strip() == expected.strip()