def test_lister_cgit_run_populates_last_update(requests_mock_datadir, swh_scheduler): """cgit lister returns last updated date""" url = "https://git.tizen/cgit" urls_without_date = [ f"https://git.tizen.org/cgit/{suffix_url}" for suffix_url in [ "All-Projects", "All-Users", "Lock-Projects", ] ] lister_cgit = CGitLister(swh_scheduler, url=url) stats = lister_cgit.run() expected_nb_origins = 16 assert stats == ListerStats(pages=3, origins=expected_nb_origins) # test page parsing scheduler_origins = swh_scheduler.get_listed_origins( lister_cgit.lister_obj.id).results assert len(scheduler_origins) == expected_nb_origins # test listed repositories for listed_origin in scheduler_origins: if listed_origin.url in urls_without_date: assert listed_origin.last_update is None else: assert listed_origin.last_update is not None
def test_lister_cgit_with_base_git_url(url, base_git_url, expected_nb_origins, requests_mock_datadir, swh_scheduler): """With base git url provided, listed urls should be the computed origin urls """ lister_cgit = CGitLister( swh_scheduler, url=url, base_git_url=base_git_url, ) stats = lister_cgit.run() assert stats == ListerStats(pages=1, origins=expected_nb_origins) # test page parsing scheduler_origins = swh_scheduler.get_listed_origins( lister_cgit.lister_obj.id).results assert len(scheduler_origins) == expected_nb_origins # test listed repositories for listed_origin in scheduler_origins: assert listed_origin.visit_type == "git" assert listed_origin.url.startswith(base_git_url) assert (listed_origin.url.startswith(url) is False), f"url should be mapped to {base_git_url}"
def test_lister_cgit_run_with_page(requests_mock_datadir, swh_scheduler): """cgit lister supports pagination""" url = "https://git.tizen/cgit/" lister_cgit = CGitLister(swh_scheduler, url=url) stats = lister_cgit.run() expected_nb_origins = 16 assert stats == ListerStats(pages=3, origins=expected_nb_origins) # test page parsing scheduler_origins = swh_scheduler.get_listed_origins( lister_cgit.lister_obj.id).results assert len(scheduler_origins) == expected_nb_origins # test listed repositories for listed_origin in scheduler_origins: assert listed_origin.visit_type == "git" assert listed_origin.url.startswith("https://git.tizen") # test user agent content assert len(requests_mock_datadir.request_history) != 0 for request in requests_mock_datadir.request_history: assert "User-Agent" in request.headers user_agent = request.headers["User-Agent"] assert "Software Heritage Lister" in user_agent assert __version__ in user_agent
def test_lister_cgit_get_origin_from_repo_failing( requests_mock_datadir_missing_url, swh_scheduler): url = "https://git.tizen/cgit/" lister_cgit = CGitLister(swh_scheduler, url=url) stats = lister_cgit.run() expected_nb_origins = 15 assert stats == ListerStats(pages=3, origins=expected_nb_origins)