def test_unknown_state(endpoint):
    example_response = get_example_response(f"{endpoint}.json")

    example_response["result"]["state"] = "lockdown"

    with pytest.raises(jsonschema.ValidationError):
        validate_against_schema(example_response, endpoint)
def test_missing_ids(endpoint):
    example_response = get_example_response(f"{endpoint}.json")

    del example_response["result"]["id"]

    with pytest.raises(jsonschema.ValidationError):
        validate_against_schema(example_response, endpoint)
예제 #3
0
def test_search_datasets_stable_package_by_title_general_term(
        subtests, base_url_3, rsession, stable_pkg, variables):
    limit_param, offset_param = _get_limit_offset_params(base_url_3,
                                                         variables=variables)
    name_terms = extract_search_terms(stable_pkg["name"], 3)
    response = rsession.get(
        f"{base_url_3}/search/dataset?q=name:{stable_pkg['name']}&fl=name&{limit_param}=100"
    )
    assert response.status_code == 200
    rj = get_dataset_search_json_response(response,
                                          base_url_3,
                                          variables=variables)

    with subtests.test("response validity"):
        validate_against_schema(rj, "search_dataset")
        if variables.get("ckan_version") == "2.9":
            # in CKAN 2.9, v1 dataset search has been dropped so results come back as v3
            assert isinstance(rj["results"][0], dict)
        else:
            # check it's using the raw-string result format
            assert isinstance(rj["results"][0], str)
        assert len(rj["results"]) <= 100

    with subtests.test("desired result present"):
        if variables.get("ckan_version") == "2.9":
            assert stable_pkg["name"] in [n['name'] for n in rj["results"]]
        else:
            assert stable_pkg["name"] in rj["results"]
def test_search_dataset_pii_extra():
    example_response = get_example_response("search_dataset.all_fields.json")

    example_response["results"][1]["extras"]["author"] = "Rieux"

    with pytest.raises(jsonschema.ValidationError):
        validate_against_schema(example_response, "search_dataset")
def test_organization_list_all_fields_inc_optional(subtests, base_url_3,
                                                   rsession):
    response = rsession.get(
        f"{base_url_3}/action/organization_list?all_fields=1&include_extras=1&include_tags=1"
        "&include_groups=1&limit=5")
    assert response.status_code == 200
    rj = response.json()

    with subtests.test("response validity"):
        validate_against_schema(rj, "organization_list")
        assert rj["success"] is True
        # assert this is the correct variant of the response schema
        assert isinstance(rj["result"][0], dict)
        # extras are not available when creating the organisation
        # via the ckan static mock harvest source
        # assert "extras" in rj["result"][0]
        assert "tags" in rj["result"][0]
        assert "groups" in rj["result"][0]

    with subtests.test("consistency with organization_show"):
        os_response = rsession.get(
            f"{base_url_3}/action/organization_show?id={rj['result'][0]['id']}"
        )
        assert os_response.status_code == 200

        assert os_response.json()["result"] == AnySupersetOf(rj['result'][0])
def test_format_autocomplete_result_missing_format():
    example_response = get_example_response("format_autocomplete.json")

    del example_response["ResultSet"]["Result"][1]["Format"]

    with pytest.raises(jsonschema.ValidationError):
        validate_against_schema(example_response, "format_autocomplete")
def test_package_search_pii_field():
    example_response = get_example_response("package_search.json")

    example_response["result"]["results"][1]["author"] = "Rieux"

    with pytest.raises(jsonschema.ValidationError):
        validate_against_schema(example_response, "package_search")
예제 #8
0
def test_package_search_by_full_slug_general_term(
    subtests,
    inc_sync_sensitive,
    base_url_3,
    rsession,
    stable_pkg_slug,
):
    response = rsession.get(
        f"{base_url_3}/action/package_search?q={stable_pkg_slug}&rows=100")
    assert response.status_code == 200
    rj = response.json()

    with subtests.test("response validity"):
        validate_against_schema(rj, "package_search")
        assert rj["success"] is True
        assert len(rj["result"]["results"]) <= 100

    if inc_sync_sensitive:
        desired_result = tuple(pkg
                               for pkg in response.json()["result"]["results"]
                               if pkg["name"] == stable_pkg_slug)
        assert desired_result
        if len(desired_result) > 1:
            warn(
                f"Multiple results ({len(desired_result)}) with name = {stable_pkg_slug!r})"
            )

        with subtests.test("approx consistency with package_show"):
            ps_response = rsession.get(
                f"{base_url_3}/action/package_show?id={stable_pkg_slug}")
            assert ps_response.status_code == 200
            assert any(ps_response.json()["result"]["id"] == result["id"]
                       for result in desired_result)
예제 #9
0
def test_package_search_stable_package(subtests, base_url_3, rsession,
                                       stable_pkg_search):
    stable_pkg = stable_pkg_search
    response = rsession.get(
        f"{base_url_3}/action/package_search?q=name:{stable_pkg['name']}&rows=30"
    )
    assert response.status_code == 200
    rj = response.json()

    with subtests.test("response validity"):
        validate_against_schema(rj, "package_search")
        assert rj["success"] is True
        assert len(rj["result"]["results"]) <= 30

    desired_result = tuple(pkg for pkg in rj["result"]["results"]
                           if pkg["name"] == stable_pkg["name"])
    assert len(desired_result) == 1

    clean_unstable_elements(desired_result[0])
    clean_unstable_elements(stable_pkg)

    with subtests.test("desired result equality"):
        assert desired_result[0] == AnySupersetOf(stable_pkg,
                                                  recursive=True,
                                                  seq_norm_order=True)
예제 #10
0
def test_search_datasets_by_full_slug_general_term(subtests,
                                                   inc_sync_sensitive,
                                                   base_url_3, rsession,
                                                   random_pkg_slug, variables):
    limit_param, offset_param = _get_limit_offset_params(base_url_3,
                                                         variables=variables)
    response = rsession.get(
        f"{base_url_3}/search/dataset?q={random_pkg_slug}&{limit_param}=100")
    assert response.status_code == 200
    rj = get_dataset_search_json_response(response, base_url_3, variables)

    with subtests.test("response validity"):
        validate_against_schema(rj, "search_dataset")
        # check it's using the raw-string result format
        if variables.get('ckan_version') == "2.9":
            assert isinstance(rj["results"][0], dict)
        else:
            assert isinstance(rj["results"][0], str)
        assert len(rj["results"]) <= 100

    if inc_sync_sensitive:
        with subtests.test("desired result present"):
            results = [
                r["name"] for r in rj["results"]
            ] if variables.get("ckan_version") == "2.9" else rj["results"]
            desired_result = tuple(name for name in results
                                   if name == random_pkg_slug)
            assert desired_result
            if len(desired_result) > 1:
                warn(
                    f"Multiple results ({len(desired_result)}) with name = {random_pkg_slug!r})"
                )
예제 #11
0
def test_i18n(base_url, rsession, subtests):
    response = rsession.get(f"{base_url}/i18n/en_GB")
    assert response.status_code == 200
    rj = response.json()

    with subtests.test("response validity"):
        validate_against_schema(rj, "i18n")
예제 #12
0
def test_search_datasets_by_full_slug_specific_field_all_fields_response(
        subtests, inc_sync_sensitive, base_url_3, rsession, random_pkg,
        allfields_term, variables):
    if allfields_term.startswith("all_fields") and base_url_3.endswith("/3"):
        pytest.skip("all_fields parameter not supported in v3 endpoint")

    limit_param, offset_param = _get_limit_offset_params(base_url_3,
                                                         variables=variables)
    response = rsession.get(
        f"{base_url_3}/search/dataset?q=name:{random_pkg['name']}&{allfields_term}&{limit_param}=10"
    )
    assert response.status_code == 200
    rj = response.json()

    with subtests.test("response validity"):
        validate_against_schema(rj, "search_dataset")
        assert isinstance(rj["results"][0], dict)
        assert len(rj["results"]) <= 10

        _validate_embedded_keys(rj)

    if inc_sync_sensitive:
        with subtests.test("desired result present"):
            desired_result = tuple(dst for dst in rj["results"]
                                   if random_pkg["id"] == dst["id"])
            assert len(desired_result) == 1

            assert desired_result[0]["title"] == random_pkg["title"]
            assert desired_result[0]["state"] == random_pkg["state"]
            assert desired_result[0]["organization"] == random_pkg[
                "organization"]["name"]
예제 #13
0
def test_package_show(subtests, base_url_3, rsession, random_pkg_slug):
    response = rsession.get(
        f"{base_url_3}/action/package_show?id={random_pkg_slug}")
    assert response.status_code == 200
    rj = response.json()

    with subtests.test("response validity"):
        validate_against_schema(rj, "package_show")
        assert rj["success"] is True
        assert rj["result"]["name"] == random_pkg_slug
        assert all(res["package_id"] == rj['result']['id']
                   for res in rj["result"]["resources"])

    with subtests.test("uuid lookup consistency"):
        # we should be able to look up this same package by its uuid and get an identical response
        uuid_response = rsession.get(
            f"{base_url_3}/action/package_show?id={rj['result']['id']}")
        assert uuid_response.status_code == 200
        assert uuid_response.json() == rj

    with subtests.test("organization consistency"):
        org_response = rsession.get(
            f"{base_url_3}/action/organization_show?id={rj['result']['organization']['id']}"
        )
        assert org_response.status_code == 200
        assert org_response.json()["result"] == AnySupersetOf(
            rj['result']['organization'], recursive=True)
예제 #14
0
def test_search_datasets_by_full_slug_general_term_id_response(
        subtests, inc_sync_sensitive, base_url_3, rsession, random_pkg,
        variables):
    limit_param, offset_param = _get_limit_offset_params(base_url_3,
                                                         variables=variables)
    response = rsession.get(
        f"{base_url_3}/search/dataset?q={random_pkg['name']}&fl=id&{limit_param}=100"
    )
    assert response.status_code == 200
    rj = get_dataset_search_json_response(response, base_url_3, variables)

    with subtests.test("response validity"):
        validate_against_schema(rj, "search_dataset")

        if variables.get("ckan_version") == "2.9":
            # in CKAN 2.9, v1 dataset search has been dropped so results come back as v3
            assert isinstance(rj["results"][0], dict)
        else:
            # when "id" is chosen for the response, it is presented as raw strings
            assert isinstance(rj["results"][0], str)
        assert len(rj["results"]) <= 100

    if inc_sync_sensitive:
        with subtests.test("desired result present"):
            if variables.get("ckan_version") == "2.9":
                assert random_pkg["id"] in [r['id'] for r in rj["results"]]
            else:
                assert random_pkg["id"] in rj["results"]
예제 #15
0
def test_package_search_facets(subtests, inc_sync_sensitive, base_url_3,
                               rsession, random_pkg):
    notes_terms = extract_search_terms(random_pkg["notes"], 2)

    response = rsession.get(
        f"{base_url_3}/action/package_search?q={notes_terms}&rows=10"
        "&facet.field=[\"license_id\",\"organization\"]&facet.limit=-1")
    assert response.status_code == 200
    rj = response.json()

    with subtests.test("response validity"):
        validate_against_schema(rj, "package_search")
        assert rj["success"] is True
        assert len(rj["result"]["results"]) <= 10

    if inc_sync_sensitive:
        with subtests.test("facets include random_pkg's value"):
            assert random_pkg["organization"]["name"] in rj["result"][
                "facets"]["organization"]
            assert any(random_pkg["organization"]["name"] == val["name"]
                       for val in rj["result"]["search_facets"]["organization"]
                       ["items"])

            # not all packages have a license_id
            if random_pkg.get("license_id"):
                assert random_pkg["license_id"] in rj["result"]["facets"][
                    "license_id"]
                assert any(random_pkg["license_id"] == val["name"]
                           for val in rj["result"]["search_facets"]
                           ["license_id"]["items"])
def test_organization_show_inc_datasets_stable_pkg(
    subtests,
    base_url_3,
    rsession,
    stable_org_with_datasets,
):
    response = rsession.get(
        f"{base_url_3}/action/organization_show?id={stable_org_with_datasets['name']}"
        "&include_datasets=1")
    assert response.status_code == 200
    rj = response.json()

    with subtests.test("response validity"):
        validate_against_schema(rj, "organization_show")

    desired_result = [
        clean_unstable_elements(pkg) for pkg in rj["result"]["packages"]
        if pkg["organization"]["name"] == stable_org_with_datasets["name"]
    ]
    if rj["result"]["package_count"] > 1000 and not desired_result:
        # this view only shows the first 1000 packages - it may have missed the cut
        warn(
            f"Expected package name {stable_org_with_datasets['name']!r} not found in first 1000 listed packages"
        )
    else:
        # example publisher 1 should have 2 datasets attached
        assert len(desired_result) == 2

        with subtests.test("response equality"):
            clean_unstable_elements(stable_org_with_datasets["packages"][0])
            assert stable_org_with_datasets["packages"][0] in desired_result
예제 #17
0
def test_package_search_by_revision_id_specific_field(
    subtests,
    inc_sync_sensitive,
    base_url_3,
    rsession,
    stable_pkg,
):
    response = rsession.get(
        f"{base_url_3}/action/package_search?fq=revision_id:{stable_pkg['revision_id']}"
        "&rows=1000")

    assert response.status_code == 200
    rj = response.json()

    with subtests.test("response validity"):
        validate_against_schema(rj, "package_search")
        assert rj["success"] is True
        assert len(rj["result"]["results"]) <= 1000

    with subtests.test("all results match criteria"):
        assert all(stable_pkg["revision_id"] == pkg["revision_id"]
                   for pkg in rj["result"]["results"])

    if inc_sync_sensitive:
        desired_result = tuple(pkg for pkg in rj["result"]["results"]
                               if pkg["id"] == stable_pkg["id"])
        assert len(desired_result) == 1

        with subtests.test("approx consistency with package_show"):
            assert stable_pkg["name"] == desired_result[0]["name"]
            assert stable_pkg["organization"] == desired_result[0][
                "organization"]
def test_package_search_empty_tag():
    example_response = get_example_response("package_search.json")

    example_response["result"]["results"][2]["tags"].append({})

    with pytest.raises(jsonschema.ValidationError):
        validate_against_schema(example_response, "package_search")
예제 #19
0
def test_search_datasets_by_full_slug_specific_field_all_fields_response(
        subtests, base_url_3, rsession, stable_dataset, allfields_term,
        variables):
    if allfields_term.startswith("all_fields") and (
            base_url_3.endswith("/3")
            or variables.get('ckan_version') == '2.9'):
        pytest.skip("all_fields parameter not supported in v3 endpoint")

    limit_param, offset_param = _get_limit_offset_params(base_url_3,
                                                         variables=variables)

    response = rsession.get(
        f"{base_url_3}/search/dataset?q=name:{stable_dataset['name']}"
        f"&{allfields_term}&{limit_param}=10")
    assert response.status_code == 200
    rj = response.json().get('result') if variables.get('ckan_version') == '2.9' and base_url_3.endswith("/3")\
        else response.json()

    with subtests.test("response validity"):
        validate_against_schema(rj, "search_dataset")
        assert isinstance(rj["results"][0], dict)
        assert len(rj["results"]) <= 10

        _validate_embedded_keys(rj)

    desired_result = tuple(dst for dst in rj["results"]
                           if stable_dataset["name"] == dst["name"])
    assert len(desired_result) == 1

    with subtests.test("desired result equality"):
        clean_unstable_elements(stable_dataset, is_key_value=False)
        clean_unstable_elements(desired_result[0], is_key_value=False)
        assert desired_result[0] == AnySupersetOf(stable_dataset,
                                                  recursive=True,
                                                  seq_norm_order=True)
def test_organization_show_is_organization():
    example_response = get_example_response("organization_show.json")

    example_response["result"]["is_organization"] = False

    with pytest.raises(jsonschema.ValidationError):
        validate_against_schema(example_response, "organization_show")
def test_package_search_noninteger_facet():
    example_response = get_example_response("package_search.json")

    example_response["result"]["facets"]["organization"][
        "natural-england"] = "2"

    with pytest.raises(jsonschema.ValidationError):
        validate_against_schema(example_response, "package_search")
def test_package_show_org_must_follow_org_schema():
    example_response = get_example_response("package_show.json")

    # note non-existent leap-day
    example_response["result"]["organization"][
        "created"] = "2019-02-29T16:00:00.123Z"

    with pytest.raises(jsonschema.ValidationError):
        validate_against_schema(example_response, "package_show")
def test_organization_list(base_url_3, rsession):
    response = rsession.get(f"{base_url_3}/action/organization_list")
    assert response.status_code == 200
    rj = response.json()
    validate_against_schema(rj, "organization_list")

    assert rj["success"] is True
    # assert this is the correct variant of the response
    assert isinstance(rj["result"][0], str)
def test_package_search_pii_extra():
    example_response = get_example_response("package_search.json")

    example_response["result"]["results"][1]["extras"].append({
        "key": "author",
        "value": "Rieux"
    })

    with pytest.raises(jsonschema.ValidationError):
        validate_against_schema(example_response, "package_search")
def test_no_results(base_url, rsession, subtests):
    response = rsession.get(f"{base_url}/2/util/resource/format_autocomplete?incomplete=telegrams")
    assert response.status_code == 200
    rj = response.json()

    with subtests.test("response validity"):
        validate_against_schema(rj, "format_autocomplete")

    with subtests.test("no results"):
        assert rj["ResultSet"]["Result"] == []
def test_csv(base_url, rsession, subtests):
    response = rsession.get(f"{base_url}/2/util/resource/format_autocomplete?incomplete=cs")
    assert response.status_code == 200
    rj = response.json()

    with subtests.test("response validity"):
        validate_against_schema(rj, "format_autocomplete")

    with subtests.test("expected result present"):
        assert any(result["Format"].lower().strip() == "csv" for result in rj["ResultSet"]["Result"])
예제 #27
0
def test_package_show_default_schema(base_url_3, rsession, stable_pkg):
    # cannot use random slugs as they sometimes contain harvest packages which cannot be handled properly
    response = rsession.get(
        f"{base_url_3}/action/package_show?id={stable_pkg['name']}&use_default_schema=1"
    )
    assert response.status_code == 200
    rj = response.json()
    validate_against_schema(rj, "package_show")

    assert rj["success"] is True
예제 #28
0
def _validate_embedded_keys(response_json):
    for result in response_json["results"]:
        for key in (
                "data_dict",
                "validated_data_dict",
        ):
            if key in result:
                # note this embedded json uses the "package" schema, despite being
                # in a "dataset".
                inner_package = json.loads(result[key])
                validate_against_schema(inner_package, "package_base")
def test_organization_show_stable_org(subtests, base_url_3, rsession,
                                      stable_org):
    response = rsession.get(
        f"{base_url_3}/action/organization_show?id={stable_org['name']}")
    assert response.status_code == 200
    rj = response.json()

    with subtests.test("response validity"):
        validate_against_schema(rj, "organization_show")

    with subtests.test("response equality"):
        assert rj["result"] == AnySupersetOf(stable_org,
                                             recursive=True,
                                             seq_norm_order=True)
예제 #30
0
def test_search_datasets_by_org_slug_specific_field_and_title_general_term(
        subtests, inc_sync_sensitive, base_url_3, rsession, stable_pkg,
        org_as_q, variables):
    if base_url_3.endswith("/3") and not org_as_q:
        pytest.skip(
            "field filtering as separate params not supported in v3 endpoint")

    limit_param, offset_param = _get_limit_offset_params(base_url_3,
                                                         variables=variables)
    name_terms = "name:" + stable_pkg["name"]

    # it's possible to query specific fields in two different ways
    query_frag = f"q={name_terms}" + (
        f"+organization:{stable_pkg['organization']['name']}" if org_as_q else
        (f"&organization={stable_pkg['organization']['name']}"
         if variables.get("ckan_version") != "2.9" else
         f"+organization:{stable_pkg['organization']['name']}"  # ckan 2.9 is stricter with search params
         ))
    response = rsession.get(f"{base_url_3}/search/dataset?{query_frag}"
                            f"&fl=id,organization,title&{limit_param}=1000")
    assert response.status_code == 200
    rj = get_dataset_search_json_response(response,
                                          base_url_3,
                                          variables=variables)

    with subtests.test("response validity"):
        validate_against_schema(rj, "search_dataset")
        assert isinstance(rj["results"][0], dict)
        assert len(rj["results"]) <= 1000

    with subtests.test("all results match criteria"):
        assert all(stable_pkg["organization"]["name"] == dst["organization"]
                   for dst in rj["results"])
        # we can't reliably test for the search terms because they may have been stemmed
        # and not correspond to exact matches

    if inc_sync_sensitive:
        with subtests.test("desired result present"):
            desired_result = tuple(dst for dst in rj["results"]
                                   if stable_pkg["id"] == dst["id"])
            if rj["count"] > 1000 and not desired_result:
                # we don't have all results - it may well be on a latter page
                warn(
                    f"Expected dataset id {stable_pkg['id']!r} not found on first page of results"
                )
            else:
                assert len(desired_result) == 1
                assert desired_result[0]["title"] == stable_pkg["title"]