def test_api_snapshot_paginated(api_client, archive_data, snapshot): branches_offset = 0 branches_count = 2 snapshot_branches = [] for k, v in sorted( archive_data.snapshot_get(snapshot)["branches"].items()): snapshot_branches.append({ "name": k, "target_type": v["target_type"], "target": v["target"] }) whole_snapshot = {"id": snapshot, "branches": {}, "next_branch": None} while branches_offset < len(snapshot_branches): branches_from = snapshot_branches[branches_offset]["name"] url = reverse( "api-1-snapshot", url_args={"snapshot_id": snapshot}, query_params={ "branches_from": branches_from, "branches_count": branches_count, }, ) rv = check_api_get_responses(api_client, url, status_code=200) expected_data = archive_data.snapshot_get_branches( snapshot, branches_from, branches_count) expected_data = enrich_snapshot(expected_data, rv.wsgi_request) branches_offset += branches_count if branches_offset < len(snapshot_branches): next_branch = snapshot_branches[branches_offset]["name"] expected_data["next_branch"] = next_branch else: expected_data["next_branch"] = None assert rv.data == expected_data whole_snapshot["branches"].update(expected_data["branches"]) if branches_offset < len(snapshot_branches): next_url = rv.wsgi_request.build_absolute_uri( reverse( "api-1-snapshot", url_args={"snapshot_id": snapshot}, query_params={ "branches_from": next_branch, "branches_count": branches_count, }, )) assert rv["Link"] == '<%s>; rel="next"' % next_url else: assert not rv.has_header("Link") url = reverse("api-1-snapshot", url_args={"snapshot_id": snapshot}) rv = check_api_get_responses(api_client, url, status_code=200) assert rv.data == whole_snapshot
def test_apidoc_full_stack_fn(api_client): url = reverse("api-1-some-complete-doc-route", url_args={ "myarg": 1, "myotherarg": 1 }) check_api_get_responses(api_client, url, status_code=200)
def test_api_origin_search_visit_type(api_client, mocker, backend): if backend != "swh-search": # equivalent to not configuring search in the config mocker.patch("swh.web.common.archive.search", None) expected_origins = { "https://github.com/wcoder/highlightjs-line-numbers.js", "https://github.com/memononen/libtess2", } url = reverse( "api-1-origin-search", url_args={ "url_pattern": "github com", }, query_params={"visit_type": "git"}, ) rv = check_api_get_responses(api_client, url, status_code=200) assert {origin["url"] for origin in rv.data} == expected_origins url = reverse( "api-1-origin-search", url_args={ "url_pattern": "github com", }, query_params={"visit_type": "foo"}, ) rv = check_api_get_responses(api_client, url, status_code=200) assert rv.data == []
def check_save_request_status( api_client, mocker, origin_url, expected_request_status, expected_task_status, scheduler_task_status="next_run_not_scheduled", scheduler_task_run_status=None, visit_date=None, ): mock_scheduler = mocker.patch("swh.web.common.origin_save.scheduler") mock_scheduler.get_tasks.return_value = [ { "priority": "high", "policy": "oneshot", "type": "load-git", "arguments": {"kwargs": {"repo_url": origin_url}, "args": []}, "status": scheduler_task_status, "id": 1, } ] if scheduler_task_run_status is None: mock_scheduler.get_task_runs.return_value = [] else: mock_scheduler.get_task_runs.return_value = [ { "backend_id": "f00c712c-e820-41ce-a07c-9bf8df914205", "ended": datetime.now(tz=timezone.utc) + timedelta(minutes=5), "id": 1, "metadata": {}, "scheduled": datetime.now(tz=timezone.utc), "started": None, "status": scheduler_task_run_status, "task": 1, } ] url = reverse( "api-1-save-origin", url_args={"visit_type": "git", "origin_url": origin_url} ) mock_visit_date = mocker.patch( ("swh.web.common.origin_save." "_get_visit_info_for_save_request") ) mock_visit_date.return_value = (visit_date, None) response = check_api_get_responses(api_client, url, status_code=200) save_request_data = response.data[0] assert save_request_data["save_request_status"] == expected_request_status assert save_request_data["save_task_status"] == expected_task_status # Check that save task status is still available when # the scheduler task has been archived mock_scheduler.get_tasks.return_value = [] response = check_api_get_responses(api_client, url, status_code=200) save_request_data = response.data[0] assert save_request_data["save_task_status"] == expected_task_status
def test_api_vault_cook_notfound(api_client, mocker, directory, revision, unknown_directory, unknown_revision): mock_vault = mocker.patch("swh.web.common.archive.vault") mock_vault.cook.side_effect = NotFoundExc("object not found") mock_vault.fetch.side_effect = NotFoundExc("cooked archive not found") mock_vault.progress.side_effect = NotFoundExc("cooking request not found") for obj_type, obj_id in ( ("directory", directory), ("revision_gitfast", revision), ): obj_name = obj_type.split("_")[0] url = reverse( f"api-1-vault-cook-{obj_type}", url_args={f"{obj_type[:3]}_id": obj_id}, ) rv = check_api_get_responses(api_client, url, status_code=404) assert rv.data["exception"] == "NotFoundExc" assert (rv.data["reason"] == f"Cooking of {obj_name} '{obj_id}' was never requested.") mock_vault.progress.assert_called_with(obj_type, hashutil.hash_to_bytes(obj_id)) for obj_type, obj_id in ( ("directory", unknown_directory), ("revision_gitfast", unknown_revision), ): obj_name = obj_type.split("_")[0] url = reverse(f"api-1-vault-cook-{obj_type}", url_args={f"{obj_type[:3]}_id": obj_id}) rv = check_api_post_responses(api_client, url, data=None, status_code=404) assert rv.data["exception"] == "NotFoundExc" assert rv.data["reason"] == f"{obj_name.title()} '{obj_id}' not found." mock_vault.cook.assert_called_with(obj_type, hashutil.hash_to_bytes(obj_id), email=None) fetch_url = reverse( f"api-1-vault-fetch-{obj_type}", url_args={f"{obj_type[:3]}_id": obj_id}, ) rv = check_api_get_responses(api_client, fetch_url, status_code=404) assert rv.data["exception"] == "NotFoundExc" assert (rv.data["reason"] == f"Cooked archive for {obj_name} '{obj_id}' not found.") mock_vault.fetch.assert_called_with(obj_type, hashutil.hash_to_bytes(obj_id))
def test_api_snapshot_null_branch(api_client, archive_data, new_snapshot): snp_dict = new_snapshot.to_dict() snp_id = hash_to_hex(snp_dict["id"]) for branch in snp_dict["branches"].keys(): snp_dict["branches"][branch] = None break archive_data.snapshot_add([Snapshot.from_dict(snp_dict)]) url = reverse("api-1-snapshot", url_args={"snapshot_id": snp_id}) check_api_get_responses(api_client, url, status_code=200)
def test_api_snapshot_errors(api_client): unknown_snapshot_ = random_sha1() url = reverse("api-1-snapshot", url_args={"snapshot_id": "63ce369"}) check_api_get_responses(api_client, url, status_code=400) url = reverse("api-1-snapshot", url_args={"snapshot_id": unknown_snapshot_}) check_api_get_responses(api_client, url, status_code=404)
def test_invalid_visit_type(api_client): url = reverse( "api-1-save-origin", url_args={ "visit_type": "foo", "origin_url": "https://github.com/torvalds/linux", }, ) check_api_get_responses(api_client, url, status_code=400)
def test_api_content_symbol(api_client, indexer_data, contents_with_ctags): expected_data = {} for content_sha1 in contents_with_ctags["sha1s"]: indexer_data.content_add_ctags(content_sha1) for ctag in indexer_data.content_get_ctags(content_sha1): if ctag["name"] == contents_with_ctags["symbol_name"]: expected_data[content_sha1] = ctag break url = reverse( "api-1-content-symbol", url_args={"q": contents_with_ctags["symbol_name"]}, query_params={"per_page": 100}, ) rv = check_api_get_responses(api_client, url, status_code=200) for entry in rv.data: content_sha1 = entry["sha1"] expected_entry = expected_data[content_sha1] for key, view_name in ( ("content_url", "api-1-content"), ("data_url", "api-1-content-raw"), ("license_url", "api-1-content-license"), ("language_url", "api-1-content-language"), ("filetype_url", "api-1-content-filetype"), ): expected_entry[key] = reverse( view_name, url_args={"q": "sha1:%s" % content_sha1}, request=rv.wsgi_request, ) expected_entry["sha1"] = content_sha1 del expected_entry["id"] assert entry == expected_entry assert "Link" not in rv url = reverse( "api-1-content-symbol", url_args={"q": contents_with_ctags["symbol_name"]}, query_params={"per_page": 2}, ) rv = check_api_get_responses(api_client, url, status_code=200) next_url = (reverse( "api-1-content-symbol", url_args={"q": contents_with_ctags["symbol_name"]}, query_params={ "last_sha1": rv.data[1]["sha1"], "per_page": 2 }, request=rv.wsgi_request, ), ) assert rv["Link"] == '<%s>; rel="next"' % next_url
def test_api_origin_search(api_client, mocker, backend): if backend != "swh-search": # equivalent to not configuring search in the config mocker.patch("swh.web.common.archive.search", None) expected_origins = { "https://github.com/wcoder/highlightjs-line-numbers.js", "https://github.com/memononen/libtess2", } # Search for 'github.com', get only one url = reverse( "api-1-origin-search", url_args={"url_pattern": "github.com"}, query_params={"limit": 1}, ) rv = check_api_get_responses(api_client, url, status_code=200) assert len(rv.data) == 1 assert {origin["url"] for origin in rv.data} <= expected_origins assert rv.data == [ enrich_origin({"url": origin["url"]}, request=rv.wsgi_request) for origin in rv.data ] # Search for 'github.com', get all url = reverse( "api-1-origin-search", url_args={"url_pattern": "github.com"}, query_params={"limit": 2}, ) rv = check_api_get_responses(api_client, url, status_code=200) assert {origin["url"] for origin in rv.data} == expected_origins assert rv.data == [ enrich_origin({"url": origin["url"]}, request=rv.wsgi_request) for origin in rv.data ] # Search for 'github.com', get more than available url = reverse( "api-1-origin-search", url_args={"url_pattern": "github.com"}, query_params={"limit": 10}, ) rv = check_api_get_responses(api_client, url, status_code=200) assert {origin["url"] for origin in rv.data} == expected_origins assert rv.data == [ enrich_origin({"url": origin["url"]}, request=rv.wsgi_request) for origin in rv.data ]
def test_api_origin_metadata_search(api_client, mocker, backend): mock_config = mocker.patch("swh.web.common.archive.config") mock_config.get_config.return_value = {"metadata_search_backend": backend} url = reverse("api-1-origin-metadata-search", query_params={"fulltext": ORIGIN_METADATA_VALUE}) rv = check_api_get_responses(api_client, url, status_code=200) expected_data = [{ "url": origin_url, "metadata": { "from_revision": master_rev, "tool": { "name": INDEXER_TOOL["tool_name"], "version": INDEXER_TOOL["tool_version"], "configuration": INDEXER_TOOL["tool_configuration"], "id": INDEXER_TOOL["id"], }, "mappings": [], }, } for origin_url, master_rev in ORIGIN_MASTER_REVISION.items()] for i in range(len(expected_data)): expected = expected_data[i] response = rv.data[i] metadata = response["metadata"].pop("metadata") assert any([ ORIGIN_METADATA_VALUE in json.dumps(val) for val in metadata.values() ]) assert response == expected
def test_api_snapshot_filtered(api_client, archive_data, snapshot): snapshot_branches = [] for k, v in sorted( archive_data.snapshot_get(snapshot)["branches"].items()): snapshot_branches.append({ "name": k, "target_type": v["target_type"], "target": v["target"] }) target_type = random.choice(snapshot_branches)["target_type"] url = reverse( "api-1-snapshot", url_args={"snapshot_id": snapshot}, query_params={"target_types": target_type}, ) rv = check_api_get_responses(api_client, url, status_code=200) expected_data = archive_data.snapshot_get_branches( snapshot, target_types=target_type) expected_data = enrich_snapshot(expected_data, rv.wsgi_request) assert rv.data == expected_data
def test_api_origin_not_found(api_client, new_origin): url = reverse("api-1-origin", url_args={"origin_url": new_origin.url}) rv = check_api_get_responses(api_client, url, status_code=404) assert rv.data == { "exception": "NotFoundExc", "reason": "Origin with url %s not found!" % new_origin.url, }
def test_api_origin_by_url(api_client, archive_data, origin): origin_url = origin["url"] url = reverse("api-1-origin", url_args={"origin_url": origin_url}) rv = check_api_get_responses(api_client, url, status_code=200) expected_origin = archive_data.origin_get([origin_url])[0] expected_origin = enrich_origin(expected_origin, rv.wsgi_request) assert rv.data == expected_origin
def test_api_origin_intrinsic_metadata(api_client, origin): url = reverse("api-origin-intrinsic-metadata", url_args={"origin_url": origin["url"]}) rv = check_api_get_responses(api_client, url, status_code=200) expected_data = {ORIGIN_METADATA_KEY: ORIGIN_METADATA_VALUE} assert rv.data == expected_data
def test_api_snapshot_no_pull_request_branches_filtering( api_client, archive_data, origin): """Pull request branches should not be filtered out when querying a snapshot with the Web API.""" snapshot = archive_data.snapshot_get_latest(origin.url) url = reverse("api-1-snapshot", url_args={"snapshot_id": snapshot["id"]}) resp = check_api_get_responses(api_client, url, status_code=200) assert any([b.startswith("refs/pull/") for b in resp.data["branches"]])
def test_api_revision_directory_ko_not_found(api_client): sha1_git = random_sha1() url = reverse("api-1-revision-directory", {"sha1_git": sha1_git}) rv = check_api_get_responses(api_client, url, status_code=404) assert rv.data == { "exception": "NotFoundExc", "reason": f"Revision with sha1_git {sha1_git} not found.", }
def test_api_content_symbol_not_found(api_client): url = reverse("api-1-content-symbol", url_args={"q": "bar"}) rv = check_api_get_responses(api_client, url, status_code=404) assert rv.data == { "exception": "NotFoundExc", "reason": "No indexed raw content match expression 'bar'.", } assert "Link" not in rv
def test_api_revision_not_found(api_client): unknown_revision_ = random_sha1() url = reverse("api-1-revision", url_args={"sha1_git": unknown_revision_}) rv = check_api_get_responses(api_client, url, status_code=404) assert rv.data == { "exception": "NotFoundExc", "reason": "Revision with sha1_git %s not found." % unknown_revision_, }
def test_api_directory_not_found(api_client): unknown_directory_ = random_sha1() url = reverse("api-1-directory", url_args={"sha1_git": unknown_directory_}) rv = check_api_get_responses(api_client, url, status_code=404) assert rv.data == { "exception": "NotFoundExc", "reason": "Directory with sha1_git %s not found" % unknown_directory_, }
def test_api_revision(api_client, archive_data, revision): url = reverse("api-1-revision", url_args={"sha1_git": revision}) rv = check_api_get_responses(api_client, url, status_code=200) expected_revision = archive_data.revision_get(revision) enrich_revision(expected_revision, rv.wsgi_request) assert rv.data == expected_revision
def test_api_snapshot(api_client, archive_data, snapshot): url = reverse("api-1-snapshot", url_args={"snapshot_id": snapshot}) rv = check_api_get_responses(api_client, url, status_code=200) expected_data = { **archive_data.snapshot_get(snapshot), "next_branch": None } expected_data = enrich_snapshot(expected_data, rv.wsgi_request) assert rv.data == expected_data
def test_api_lookup_origin_visits_raise_error(api_client, mocker): mock_get_origin_visits = mocker.patch( "swh.web.api.views.origin.get_origin_visits") err_msg = "voluntary error to check the bad request middleware." mock_get_origin_visits.side_effect = BadInputExc(err_msg) url = reverse("api-1-origin-visits", url_args={"origin_url": "http://foo"}) rv = check_api_get_responses(api_client, url, status_code=400) assert rv.data == {"exception": "BadInputExc", "reason": err_msg}
def test_api_directory_with_path_found(api_client, archive_data, directory): directory_content = archive_data.directory_ls(directory) dir_entry = random.choice(directory_content) url = reverse( "api-1-directory", url_args={"sha1_git": directory, "path": dir_entry["name"]} ) rv = check_api_get_responses(api_client, url, status_code=200) assert rv.data == enrich_directory_entry(dir_entry, rv.wsgi_request)
def test_api_directory(api_client, archive_data, directory): url = reverse("api-1-directory", url_args={"sha1_git": directory}) rv = check_api_get_responses(api_client, url, status_code=200) dir_content = list(archive_data.directory_ls(directory)) expected_data = list( map(enrich_directory_entry, dir_content, [rv.wsgi_request] * len(dir_content)) ) assert rv.data == expected_data
def test_api_origin_metadata_search_limit(api_client, mocker): mock_idx_storage = mocker.patch("swh.web.common.archive.idx_storage") oimsft = mock_idx_storage.origin_intrinsic_metadata_search_fulltext oimsft.side_effect = lambda conjunction, limit: [ OriginIntrinsicMetadataRow( id=origin_url, from_revision=hash_to_bytes(master_rev), indexer_configuration_id=INDEXER_TOOL["id"], metadata={ORIGIN_METADATA_KEY: ORIGIN_METADATA_VALUE}, mappings=[], ) for origin_url, master_rev in ORIGIN_MASTER_REVISION.items() ] url = reverse("api-1-origin-metadata-search", query_params={"fulltext": ORIGIN_METADATA_VALUE}) rv = check_api_get_responses(api_client, url, status_code=200) assert len(rv.data) == len(ORIGIN_MASTER_REVISION) oimsft.assert_called_with(conjunction=[ORIGIN_METADATA_VALUE], limit=70) url = reverse( "api-1-origin-metadata-search", query_params={ "fulltext": ORIGIN_METADATA_VALUE, "limit": 10 }, ) rv = check_api_get_responses(api_client, url, status_code=200) assert len(rv.data) == len(ORIGIN_MASTER_REVISION) oimsft.assert_called_with(conjunction=[ORIGIN_METADATA_VALUE], limit=10) url = reverse( "api-1-origin-metadata-search", query_params={ "fulltext": ORIGIN_METADATA_VALUE, "limit": 987 }, ) rv = check_api_get_responses(api_client, url, status_code=200) assert len(rv.data) == len(ORIGIN_MASTER_REVISION) oimsft.assert_called_with(conjunction=[ORIGIN_METADATA_VALUE], limit=100)
def test_api_1_stat_counters_raise_error(api_client, mocker): mock_archive = mocker.patch("swh.web.api.views.stat.archive") mock_archive.stat_counters.side_effect = BadInputExc( "voluntary error to check the bad request middleware." ) url = reverse("api-1-stat-counters") rv = check_api_get_responses(api_client, url, status_code=400) assert rv.data == { "exception": "BadInputExc", "reason": "voluntary error to check the bad request middleware.", }
def test_api_directory_with_path_not_found(api_client, directory): path = "some/path/to/nonexistent/dir/" url = reverse("api-1-directory", url_args={"sha1_git": directory, "path": path}) rv = check_api_get_responses(api_client, url, status_code=404) reason = ( f"Directory entry with path {path} from root directory {directory} not found" ) assert rv.data == { "exception": "NotFoundExc", "reason": reason, }
def test_api_lookup_origin_visit_latest_no_visit(api_client, archive_data, new_origin): archive_data.origin_add([new_origin]) url = reverse("api-1-origin-visit-latest", url_args={"origin_url": new_origin.url}) rv = check_api_get_responses(api_client, url, status_code=404) assert rv.data == { "exception": "NotFoundExc", "reason": "No visit for origin %s found" % new_origin.url, }
def test_api_origins_wrong_input(api_client, archive_data): """Should fail with 400 if the input is deprecated. """ # fail if wrong input url = reverse("api-1-origins", query_params={"origin_from": 1}) rv = check_api_get_responses(api_client, url, status_code=400) assert rv.data == { "exception": "BadInputExc", "reason": "Please use the Link header to browse through result", }