def test_loader_hg_extid_filtering(swh_storage, datadir, tmp_path): """The first visit of a fork should filter already seen revisions (through extids)""" archive_name = "the-sandbox" archive_path = os.path.join(datadir, f"{archive_name}.tgz") repo_url = prepare_repository_from_archive(archive_path, archive_name, tmp_path) loader = HgLoader(swh_storage, url=repo_url) assert loader.load() == {"status": "eventful"} stats = get_stats(loader.storage) expected_stats = { "content": 2, "directory": 3, "origin": 1, "origin_visit": 1, "release": 0, "revision": 58, "skipped_content": 0, "snapshot": 1, } assert stats == expected_stats visit_status = assert_last_visit_matches( loader.storage, repo_url, status="full", type="hg", ) # Make a fork of the first repository we ingested fork_url = prepare_repository_from_archive(archive_path, "the-sandbox-reloaded", tmp_path) loader2 = HgLoader(swh_storage, url=fork_url, directory=str(tmp_path / archive_name)) assert loader2.load() == {"status": "uneventful"} stats = get_stats(loader.storage) expected_stats2 = expected_stats.copy() expected_stats2.update({ "origin": 1 + 1, "origin_visit": 1 + 1, }) assert stats == expected_stats2 visit_status2 = assert_last_visit_matches( loader.storage, fork_url, status="full", type="hg", ) assert visit_status.snapshot is not None assert visit_status2.snapshot == visit_status.snapshot
def test_load_repo_check_extids_write_version(swh_storage, datadir, tmp_path): """ExtIDs should be stored with a given version when loading is done""" archive_name = "hello" archive_path = Path(datadir, f"{archive_name}.tgz") repo_url = prepare_repository_from_archive(archive_path, archive_name, tmp_path) hg_strip(repo_url.replace("file://", ""), "tip") loader = HgLoader(swh_storage, repo_url) assert loader.load() == {"status": "eventful"} # Ensure we write ExtIDs to a specific version. snapshot = snapshot_get_latest(swh_storage, repo_url) # First, filter out revisions from that snapshot revision_ids = [ branch.target for branch in snapshot.branches.values() if branch.target_type == TargetType.REVISION ] assert len(revision_ids) > 0 # Those revisions should have their associated ExtID version set to EXTID_VERSION extids = swh_storage.extid_get_from_target(ObjectType.REVISION, revision_ids) assert len(extids) == len(revision_ids) for extid in extids: assert extid.extid_version == EXTID_VERSION
def test_load_new_extid_should_be_eventful(swh_storage, datadir, tmp_path): """Changing the extid version should make loaders ignore existing extids, and load the repo again.""" archive_name = "hello" archive_path = os.path.join(datadir, f"{archive_name}.tgz") repo_url = prepare_repository_from_archive(archive_path, archive_name, tmp_path) repo_path = repo_url.replace("file://", "") with unittest.mock.patch("swh.loader.mercurial.loader.EXTID_VERSION", 0): loader = HgLoader(swh_storage, repo_path) assert loader.load() == {"status": "eventful"} loader = HgLoader(swh_storage, repo_path) assert loader.load() == {"status": "eventful"} loader = HgLoader(swh_storage, repo_path) assert loader.load() == {"status": "uneventful"} with unittest.mock.patch("swh.loader.mercurial.loader.EXTID_VERSION", 10000): loader = HgLoader(swh_storage, repo_path) assert loader.load() == {"status": "eventful"} loader = HgLoader(swh_storage, repo_path) assert loader.load() == {"status": "uneventful"}
def test_multiple_open_heads(swh_storage, datadir, tmp_path): archive_name = "multiple-heads" archive_path = os.path.join(datadir, f"{archive_name}.tgz") repo_url = prepare_repository_from_archive(archive_path, archive_name, tmp_path) loader = HgLoader( storage=swh_storage, url=repo_url, ) actual_load_status = loader.load() assert actual_load_status == {"status": "eventful"} assert_last_visit_matches(swh_storage, repo_url, status="full", type="hg") snapshot = snapshot_get_latest(swh_storage, repo_url) expected_branches = [ b"HEAD", b"branch-heads/default/0", b"branch-heads/default/1", b"branch-tip/default", ] assert sorted(snapshot.branches.keys()) == expected_branches # Check that we don't load anything the second time loader = HgLoader( storage=swh_storage, url=repo_url, ) actual_load_status = loader.load() assert actual_load_status == {"status": "uneventful"}
def test_closed_branch_incremental(swh_storage, datadir, tmp_path): """Test that a repository with a closed branch does not trip an incremental load""" archive_name = "example" archive_path = os.path.join(datadir, f"{archive_name}.tgz") repo_url = prepare_repository_from_archive(archive_path, archive_name, tmp_path) repo_path = repo_url.replace("file://", "") loader = HgLoader(swh_storage, repo_path) # Test 3 loads: full, and two incremental. assert loader.load() == {"status": "eventful"} expected_stats = { "content": 7, "directory": 16, "origin": 1, "origin_visit": 1, "release": 0, "revision": 9, "skipped_content": 0, "snapshot": 1, } assert get_stats(loader.storage) == expected_stats assert loader.load() == {"status": "uneventful"} assert get_stats(loader.storage) == { **expected_stats, "origin_visit": 1 + 1 } assert loader.load() == {"status": "uneventful"} assert get_stats(loader.storage) == { **expected_stats, "origin_visit": 2 + 1 }
def init(self, swh_storage, datadir, tmp_path, mocker): archive_name = "testrepo" archive_path = os.path.join(datadir, f"{archive_name}.tgz") tmp_path = str(tmp_path) self.repo_url = prepare_repository_from_archive(archive_path, archive_name, tmp_path=tmp_path) self.destination_path = os.path.join(tmp_path, archive_name) self.fetcher = MagicMock() self.fetcher.get_origin_metadata.return_value = [] self.fetcher.get_parent_origins.return_value = [ Origin(url=f"base://{self.repo_url}") ] self.fetcher_cls = MagicMock(return_value=self.fetcher) self.fetcher_cls.SUPPORTED_LISTERS = ["fake-lister"] mocker.patch( "swh.loader.core.metadata_fetchers._fetchers", return_value=[self.fetcher_cls], ) self.loader = GitLoader( MagicMock(wraps=swh_storage), self.repo_url, lister_name="fake-lister", lister_instance_name="", ) self.repo = dulwich.repo.Repo(self.destination_path)
def test_load_unchanged_repo__dangling_extid(swh_storage, datadir, tmp_path): """Checks the loader will load revisions targeted by an ExtID if the revisions are missing from the storage""" archive_name = "hello" archive_path = os.path.join(datadir, f"{archive_name}.tgz") repo_url = prepare_repository_from_archive(archive_path, archive_name, tmp_path) repo_path = repo_url.replace("file://", "") loader = HgLoader(swh_storage, repo_path) assert loader.load() == {"status": "eventful"} assert get_stats(loader.storage) == { "content": 3, "directory": 3, "origin": 1, "origin_visit": 1, "release": 1, "revision": 3, "skipped_content": 0, "snapshot": 1, } old_storage = swh_storage # Create a new storage, and only copy ExtIDs or head revisions to it. # This should be enough for the loader to know revisions were already loaded new_storage = _partial_copy_storage(old_storage, repo_path, mechanism="extid", copy_revisions=False) # Create a new loader (to start with a clean slate, eg. remove the caches), # with the new, partial, storage loader = HgLoader(new_storage, repo_path) assert get_stats(loader.storage) == { "content": 0, "directory": 0, "origin": 1, "origin_visit": 1, "release": 0, "revision": 0, "skipped_content": 0, "snapshot": 1, } assert loader.load() == {"status": "eventful"} assert get_stats(loader.storage) == { "content": 3, "directory": 3, "origin": 1, "origin_visit": 2, "release": 1, "revision": 3, "skipped_content": 0, "snapshot": 1, }
def test_load_unchanged_repo_should_be_uneventful( swh_storage, datadir, tmp_path, ): """Checks the loader can find which revisions it already loaded, using ExtIDs.""" archive_name = "hello" archive_path = os.path.join(datadir, f"{archive_name}.tgz") repo_url = prepare_repository_from_archive(archive_path, archive_name, tmp_path) repo_path = repo_url.replace("file://", "") loader = HgLoader(swh_storage, repo_path) assert loader.load() == {"status": "eventful"} assert get_stats(loader.storage) == { "content": 3, "directory": 3, "origin": 1, "origin_visit": 1, "release": 1, "revision": 3, "skipped_content": 0, "snapshot": 1, } visit_status = assert_last_visit_matches( loader.storage, repo_path, type=RevisionType.MERCURIAL.value, status="full", ) assert visit_status.snapshot is not None # Create a new loader (to start with a clean slate, eg. remove the caches), # with the new, partial, storage loader2 = HgLoader(swh_storage, repo_path) assert loader2.load() == {"status": "uneventful"} # Should have all the objects assert get_stats(loader.storage) == { "content": 3, "directory": 3, "origin": 1, "origin_visit": 2, "release": 1, "revision": 3, "skipped_content": 0, "snapshot": 1, } visit_status2 = assert_last_visit_matches( loader2.storage, repo_path, type=RevisionType.MERCURIAL.value, status="full", ) assert visit_status2.snapshot == visit_status.snapshot
def init(self, swh_storage, datadir, tmp_path): archive_name = "testrepo" archive_path = os.path.join(datadir, f"{archive_name}.tgz") tmp_path = str(tmp_path) self.repo_url = prepare_repository_from_archive(archive_path, archive_name, tmp_path=tmp_path) self.destination_path = os.path.join(tmp_path, archive_name) self.loader = GitLoader(swh_storage, self.repo_url) self.repo = dulwich.repo.Repo(self.destination_path)
def test_examples(swh_storage, datadir, tmp_path, archive_name): archive_path = Path(datadir, f"{archive_name}.tgz") json_path = Path(datadir, f"{archive_name}.json") repo_url = prepare_repository_from_archive(archive_path, archive_name, tmp_path) LoaderChecker( loader=HgLoader(swh_storage, repo_url), expected=ExpectedSwhids.load(json_path), ).check()
def test_load_repo_with_new_commits(swh_storage, datadir, tmp_path): archive_name = "hello" archive_path = Path(datadir, f"{archive_name}.tgz") json_path = Path(datadir, f"{archive_name}.json") repo_url = prepare_repository_from_archive(archive_path, archive_name, tmp_path) # first load with missing commits hg_strip(repo_url.replace("file://", ""), "tip") loader = HgLoader(swh_storage, repo_url) assert loader.load() == {"status": "eventful"} assert get_stats(loader.storage) == { "content": 2, "directory": 2, "origin": 1, "origin_visit": 1, "release": 0, "revision": 2, "skipped_content": 0, "snapshot": 1, } # second load with all commits repo_url = prepare_repository_from_archive(archive_path, archive_name, tmp_path) loader = HgLoader(swh_storage, repo_url) checker = LoaderChecker( loader=loader, expected=ExpectedSwhids.load(json_path), ) checker.check() assert get_stats(loader.storage) == { "content": 3, "directory": 3, "origin": 1, "origin_visit": 2, "release": 1, "revision": 3, "skipped_content": 0, "snapshot": 2, }
def test_loader_repository_with_bookmark_information(swh_storage, datadir, tmp_path): """Repository with bookmark information should be ingested correctly""" archive_name = "anomad-d" archive_path = os.path.join(datadir, f"{archive_name}.tgz") repo_url = prepare_repository_from_archive(archive_path, archive_name, tmp_path) loader = HgLoader(swh_storage, url=repo_url) assert loader.load() == {"status": "eventful"}
def test_prepare_repository_from_archive(datadir, tmp_path): archive_name = "0805nexter-1.1.0" archive_path = os.path.join(str(datadir), f"{archive_name}.tar.gz") assert os.path.exists(archive_path) is True tmp_path = str(tmp_path) # deals with path string repo_url = prepare_repository_from_archive(archive_path, filename=archive_name, tmp_path=tmp_path) expected_uncompressed_archive_path = os.path.join(tmp_path, archive_name) assert repo_url == f"file://{expected_uncompressed_archive_path}" assert os.path.exists(expected_uncompressed_archive_path)
def test_visit_repository_with_transplant_operations(swh_storage, datadir, tmp_path): """Visit a mercurial repository visit transplant operations within should yield a snapshot as well. """ archive_name = "transplant" archive_path = os.path.join(datadir, f"{archive_name}.tgz") repo_url = prepare_repository_from_archive(archive_path, archive_name, tmp_path) loader = HgLoader( swh_storage, url=repo_url, visit_date=VISIT_DATE, ) # load hg repository actual_load_status = loader.load() assert actual_load_status == {"status": "eventful"} # collect swh revisions assert_last_visit_matches(loader.storage, repo_url, type=RevisionType.MERCURIAL.value, status="full") revisions = [] snapshot = snapshot_get_latest(loader.storage, repo_url) for branch in snapshot.branches.values(): if branch.target_type.value != "revision": continue revisions.append(branch.target) # extract original changesets info and the transplant sources hg_changesets = set() transplant_sources = set() for rev in loader.storage.revision_log(revisions): extids = list( loader.storage.extid_get_from_target(ObjectType.REVISION, [rev["id"]])) assert len(extids) == 1 hg_changesets.add(hash_to_hex(extids[0].extid)) for k, v in rev["extra_headers"]: if k == b"transplant_source": transplant_sources.add(v.decode("ascii")) # check extracted data are valid assert len(hg_changesets) > 0 assert len(transplant_sources) > 0 assert transplant_sources <= hg_changesets
def test_single_revision(datadir: str, tmp_path: str): archive_name = "hello" archive_path = os.path.join(datadir, f"{archive_name}.tgz") repo_url = prepare_repository_from_archive(archive_path, archive_name, tmp_path) directory = urlsplit(repo_url).path runner = CliRunner() result = runner.invoke(main, [ "-d", directory, "revision", "0a04b987be5ae354b710cefeba0e2d9de7ad41a9" ]) expected = ("swh:1:rev:93b48d515580522a05f389bec93227fc8e43d940" "\t0a04b987be5ae354b710cefeba0e2d9de7ad41a9\n") assert result.output == expected
def test_prepare_repository_from_archive_no_filename(datadir, tmp_path): archive_name = "0805nexter-1.1.0" archive_path = os.path.join(str(datadir), f"{archive_name}.tar.gz") assert os.path.exists(archive_path) is True # deals with path as posix path (for tmp_path) repo_url = prepare_repository_from_archive(archive_path, tmp_path=tmp_path) tmp_path = str(tmp_path) expected_uncompressed_archive_path = os.path.join(tmp_path, archive_name) expected_repo_url = os.path.join(tmp_path, f"{archive_name}.tar.gz") assert repo_url == f"file://{expected_repo_url}" # passing along the filename does not influence the on-disk extraction # just the repo-url computation assert os.path.exists(expected_uncompressed_archive_path)
def test_all_revisions(datadir: str, tmp_path: str): archive_name = "hello" archive_path = os.path.join(datadir, f"{archive_name}.tgz") repo_url = prepare_repository_from_archive(archive_path, archive_name, tmp_path) directory = urlsplit(repo_url).path runner = CliRunner() result = runner.invoke(main, ["-d", directory, "revision"]) expected = dedent(""" swh:1:rev:93b48d515580522a05f389bec93227fc8e43d940\t0a04b987be5ae354b710cefeba0e2d9de7ad41a9 swh:1:rev:8dd3db5d5519e4947f035d141581d304565372d2\t82e55d328c8ca4ee16520036c0aaace03a5beb65 swh:1:rev:c3dbe4fbeaaa98dd961834e4007edb3efb0e2a27\tb985ae4a07e12ac662f45a171e2d42b13be5b50c """).lstrip() assert result.output == expected
def init(self, swh_storage, datadir, tmp_path): archive_name = "testrepo" archive_path = os.path.join(datadir, f"{archive_name}.tgz") tmp_path = str(tmp_path) self.repo_url = prepare_repository_from_archive( archive_path, archive_name, tmp_path=tmp_path ) self.destination_path = os.path.join(tmp_path, archive_name) self.loader = GitLoaderFromDisk( swh_storage, url=self.repo_url, visit_date=datetime.datetime( 2016, 5, 3, 15, 16, 32, tzinfo=datetime.timezone.utc ), directory=self.destination_path, ) self.repo = dulwich.repo.Repo(self.destination_path)
def test_missing_filelog_should_not_crash(swh_storage, datadir, tmp_path): archive_name = "missing-filelog" archive_path = os.path.join(datadir, f"{archive_name}.tgz") repo_url = prepare_repository_from_archive(archive_path, archive_name, tmp_path) directory = repo_url.replace("file://", "") loader = HgLoader( storage=swh_storage, url=repo_url, directory=directory, # specify directory to avoid clone visit_date=VISIT_DATE, ) actual_load_status = loader.load() assert actual_load_status == {"status": "eventful"} assert_last_visit_matches(swh_storage, repo_url, status="partial", type="hg")
def test_all(datadir: str, tmp_path: str): archive_name = "hello" archive_path = os.path.join(datadir, f"{archive_name}.tgz") repo_url = prepare_repository_from_archive(archive_path, archive_name, tmp_path) directory = urlsplit(repo_url).path runner = CliRunner() result = runner.invoke(main, ["-d", directory, "all"]) expected = dedent(f""" swh:1:dir:43d727f2f3f2f7cb3b098ddad1d7038464a4cee2\t0a04b987be5ae354b710cefeba0e2d9de7ad41a9 swh:1:dir:b3f85f210ff86d334575f64cb01c5bf49895b63e\t82e55d328c8ca4ee16520036c0aaace03a5beb65 swh:1:dir:8f2be433c945384c85920a8e60f2a68d2c0f20fb\tb985ae4a07e12ac662f45a171e2d42b13be5b50c swh:1:rev:93b48d515580522a05f389bec93227fc8e43d940\t0a04b987be5ae354b710cefeba0e2d9de7ad41a9 swh:1:rev:8dd3db5d5519e4947f035d141581d304565372d2\t82e55d328c8ca4ee16520036c0aaace03a5beb65 swh:1:rev:c3dbe4fbeaaa98dd961834e4007edb3efb0e2a27\tb985ae4a07e12ac662f45a171e2d42b13be5b50c swh:1:rel:515c4d72e089404356d0f4b39d60f948b8999140\t0.1 swh:1:snp:d35668e02e2ba4321dc951cd308cf883786f918a\t{directory} """).lstrip() assert result.output == expected
def test_loader_hg_new_visit_no_release(swh_storage, datadir, tmp_path): """Eventful visit should yield 1 snapshot""" archive_name = "the-sandbox" archive_path = os.path.join(datadir, f"{archive_name}.tgz") repo_url = prepare_repository_from_archive(archive_path, archive_name, tmp_path) loader = HgLoader(swh_storage, url=repo_url) assert loader.load() == {"status": "eventful"} tips = { b"branch-tip/default": "70e750bb046101fdced06f428e73fee471509c56", b"branch-tip/develop": "a9c4534552df370f43f0ef97146f393ef2f2a08c", } closed = { b"feature/fun_time": "4d640e8064fe69b4c851dfd43915c431e80c7497", b"feature/green2_loader": "94be9abcf9558213ff301af0ecd8223451ce991d", b"feature/greenloader": "9f82d95bd3edfb7f18b1a21d6171170395ea44ce", b"feature/my_test": "dafa445964230e808148db043c126063ea1dc9b6", b"feature/read2_loader": "9e912851eb64e3a1e08fbb587de7a4c897ce5a0a", b"feature/readloader": "ddecbc16f4c916c39eacfcb2302e15a9e70a231e", b"feature/red": "cb36b894129ca7910bb81c457c72d69d5ff111bc", b"feature/split5_loader": "3ed4b85d30401fe32ae3b1d650f215a588293a9e", b"feature/split_causing": "c346f6ff7f42f2a8ff867f92ab83a6721057d86c", b"feature/split_loader": "5f4eba626c3f826820c4475d2d81410759ec911b", b"feature/split_loader5": "5017ce0b285351da09a2029ea2cf544f79b593c7", b"feature/split_loading": "4e2dc6d6073f0b6d348f84ded52f9143b10344b9", b"feature/split_redload": "2d4a801c9a9645fcd3a9f4c06418d8393206b1f3", b"feature/splitloading": "88b80615ed8561be74a700b92883ec0374ddacb0", b"feature/test": "61d762d65afb3150e2653d6735068241779c1fcf", b"feature/test_branch": "be44d5e6cc66580f59c108f8bff5911ee91a22e4", b"feature/test_branching": "d2164061453ecb03d4347a05a77db83f706b8e15", b"feature/test_dog": "2973e5dc9568ac491b198f6b7f10c44ddc04e0a3", } mapping = {b"branch-closed-heads/%s/0" % b: n for b, n in closed.items()} mapping.update(tips) expected_branches = { k: SnapshotBranch(target=hash_to_bytes(v), target_type=TargetType.REVISION) for k, v in mapping.items() } expected_branches[b"HEAD"] = SnapshotBranch(target=b"branch-tip/default", target_type=TargetType.ALIAS) expected_snapshot = Snapshot( id=hash_to_bytes("cbc609dcdced34dbd9938fe81b555170f1abc96f"), branches=expected_branches, ) assert_last_visit_matches( loader.storage, repo_url, status="full", type="hg", snapshot=expected_snapshot.id, ) check_snapshot(expected_snapshot, loader.storage) stats = get_stats(loader.storage) expected_stats = { "content": 2, "directory": 3, "origin": 1, "origin_visit": 1, "release": 0, "revision": 58, "skipped_content": 0, "snapshot": 1, } assert stats == expected_stats loader2 = HgLoader(swh_storage, url=repo_url) assert loader2.load() == {"status": "uneventful"} # nothing new happened stats2 = get_stats(loader2.storage) expected_stats2 = expected_stats.copy() expected_stats2["origin_visit"] = 2 # one new visit recorded assert stats2 == expected_stats2 assert_last_visit_matches( loader2.storage, repo_url, status="full", type="hg", snapshot=expected_snapshot.id, ) # but we got a snapshot nonetheless
def test_prepare_repository_from_archive_failure(): # does not deal with inexistent archive so raise assert os.path.exists("unknown-archive") is False with pytest.raises(subprocess.CalledProcessError, match="exit status 2"): prepare_repository_from_archive("unknown-archive")
def test_loader_hg_new_visit_with_release(swh_storage, datadir, tmp_path): """Eventful visit with release should yield 1 snapshot""" archive_name = "hello" archive_path = os.path.join(datadir, f"{archive_name}.tgz") repo_url = prepare_repository_from_archive(archive_path, archive_name, tmp_path) loader = HgLoader( swh_storage, url=repo_url, visit_date=VISIT_DATE, ) actual_load_status = loader.load() assert actual_load_status == {"status": "eventful"} # then stats = get_stats(loader.storage) assert stats == { "content": 3, "directory": 3, "origin": 1, "origin_visit": 1, "release": 1, "revision": 3, "skipped_content": 0, "snapshot": 1, } # cf. test_loader.org for explaining from where those hashes tip_release = hash_to_bytes("515c4d72e089404356d0f4b39d60f948b8999140") release = loader.storage.release_get([tip_release])[0] assert release is not None tip_revision_default = hash_to_bytes( "c3dbe4fbeaaa98dd961834e4007edb3efb0e2a27") revision = loader.storage.revision_get([tip_revision_default])[0] assert revision is not None expected_snapshot = Snapshot( id=hash_to_bytes("7ef082aa8b53136b1bed97f734504be32679bbec"), branches={ b"branch-tip/default": SnapshotBranch( target=tip_revision_default, target_type=TargetType.REVISION, ), b"tags/0.1": SnapshotBranch( target=tip_release, target_type=TargetType.RELEASE, ), b"HEAD": SnapshotBranch( target=b"branch-tip/default", target_type=TargetType.ALIAS, ), }, ) check_snapshot(expected_snapshot, loader.storage) assert_last_visit_matches( loader.storage, repo_url, type=RevisionType.MERCURIAL.value, status="full", snapshot=expected_snapshot.id, )
def _get_repo_url(archive_name, datadir, tmp_path): archive_path = os.path.join(datadir, f"{archive_name}.tgz") return prepare_repository_from_archive(archive_path, "pkg-gourmet", tmp_path)
def init(self, swh_storage, datadir, tmp_path): # remove any proxy settings in order to successfully spawn a local HTTP server http_proxy = os.environ.get("http_proxy") https_proxy = os.environ.get("https_proxy") if http_proxy: del os.environ["http_proxy"] if http_proxy: del os.environ["https_proxy"] # prepare test base repository using smart transfer protocol archive_name = "testrepo" archive_path = os.path.join(datadir, f"{archive_name}.tgz") tmp_path = str(tmp_path) base_repo_url = prepare_repository_from_archive(archive_path, archive_name, tmp_path=tmp_path) destination_path = os.path.join(tmp_path, archive_name) self.destination_path = destination_path with_pack_files = self.with_pack_files if with_pack_files: # create a bare clone of that repository in another folder, # all objects will be contained in one or two pack files in that case http_root_dir = tmp_path repo_name = archive_name + "_bare" bare_repo_path = os.path.join(http_root_dir, repo_name) subprocess.run( ["git", "clone", "--bare", base_repo_url, bare_repo_path], check=True, ) else: # otherwise serve objects from the bare repository located in # the .git folder of the base repository http_root_dir = destination_path repo_name = ".git" bare_repo_path = os.path.join(http_root_dir, repo_name) # spawn local HTTP server that will serve the bare repository files hostname = "localhost" handler = partial(SimpleHTTPRequestHandler, directory=http_root_dir) httpd = HTTPServer((hostname, 0), handler, bind_and_activate=True) def serve_forever(httpd): with httpd: httpd.serve_forever() thread = Thread(target=serve_forever, args=(httpd, )) thread.start() repo = dulwich.repo.Repo(self.destination_path) class DumbGitLoaderTest(GitLoader): def load(self): """ Override load method to ensure the bare repository will be synchronized with the base one as tests can modify its content. """ if with_pack_files: # ensure HEAD ref will be the same for both repositories with open(os.path.join(bare_repo_path, "HEAD"), "wb") as fw: with open(os.path.join(destination_path, ".git/HEAD"), "rb") as fr: head_ref = fr.read() fw.write(head_ref) # push possibly modified refs in the base repository to the bare one for ref in repo.refs.allkeys(): if ref != b"HEAD" or head_ref in repo.refs: push( repo, remote_location=f"file://{bare_repo_path}", refspecs=ref, ) # generate or update the info/refs file used in dumb protocol subprocess.run( ["git", "-C", bare_repo_path, "update-server-info"], check=True, ) return super().load() # bare repository with dumb protocol only URL self.repo_url = f"http://{httpd.server_name}:{httpd.server_port}/{repo_name}" self.loader = DumbGitLoaderTest(swh_storage, self.repo_url) self.repo = repo yield # shutdown HTTP server httpd.shutdown() thread.join() # restore HTTP proxy settings if any if http_proxy: os.environ["http_proxy"] = http_proxy if https_proxy: os.environ["https_proxy"] = https_proxy