def test_validate_package_errors_with_fields_993():
    package = Package(descriptor="data/package-with-fields-993.json")
    report = package.validate()
    assert report.flatten(["code", "message"]) == [[
        "package-error",
        'The data package has an error: "fields" should be set as "resource.schema.fields" (not "package.fields").',
    ]]
def test_validate_package_invalid_package_original():
    package = Package({"resources": [{"path": "data/table.csv"}]})
    report = package.validate(original=True)
    assert report.flatten(["code", "note"]) == [[
        "resource-error",
        "\"{'path': 'data/table.csv', 'stats': {}} is not valid under any of the given schemas\" at \"\" in metadata and at \"oneOf\" in profile",
    ]]
def test_validate_package_invalid_json_issue_192():
    package = Package("data/invalid.json")
    report = package.validate()
    assert report.flatten(["code", "note"]) == [[
        "package-error",
        'cannot extract metadata "data/invalid.json" because "Expecting property name enclosed in double quotes: line 2 column 5 (char 6)"',
    ]]
def test_validate_package_single_resource_wrong_resource_name():
    package = Package("data/datapackage.json")
    report = package.validate(resource_name="number-twoo")
    assert report.flatten(["code", "message"]) == [[
        "package-error",
        'The data package has an error: resource "number-twoo" does not exist',
    ]]
def test_validate_package_descriptor_type_package_invalid():
    package = Package(descriptor="data/invalid/datapackage.json")
    report = package.validate()
    assert report.flatten() == [
        [1, 3, None, "blank-row"],
        [1, 3, None, "primary-key-error"],
        [2, 4, None, "blank-row"],
    ]
def test_validate_package_invalid_descriptor_path():
    package = Package("bad/datapackage.json")
    report = package.validate()
    assert report["stats"]["errors"] == 1
    error = report["errors"][0]
    assert error["code"] == "package-error"
    assert error["note"].count("[Errno 2]") and error["note"].count(
        "bad/datapackage.json")
def test_validate_package_with_schema_as_string():
    package = Package({
        "resources": [{
            "path": "data/table.csv",
            "schema": "data/schema.json"
        }]
    })
    report = package.validate()
    assert report.valid
def test_validate_package_from_zip_invalid():
    package = Package("data/package-invalid.zip")
    report = package.validate()
    assert report.flatten(
        ["taskPosition", "rowPosition", "fieldPosition", "code"]) == [
            [1, 3, None, "blank-row"],
            [1, 3, None, "primary-key-error"],
            [2, 4, None, "blank-row"],
        ]
def test_validate_package_schema_foreign_key_internal_resource_violation():
    descriptor = deepcopy(DESCRIPTOR_FK)
    del descriptor["resources"][1]["data"][4]
    package = Package(descriptor)
    report = package.validate()
    assert report.flatten(
        ["rowPosition", "fieldPosition", "code", "cells"]) == [
            [5, None, "foreign-key-error", ["4", "rio", ""]],
        ]
Ejemplo n.º 10
0
def test_validate_package_with_parallel():
    package = Package("data/invalid/datapackage.json")
    report = package.validate(parallel=True)
    assert report.flatten(
        ["taskPosition", "rowPosition", "fieldPosition", "code"]) == [
            [1, 3, None, "blank-row"],
            [1, 3, None, "primary-key-error"],
            [2, 4, None, "blank-row"],
        ]
Ejemplo n.º 11
0
def test_validate_package_parallel_from_dict_invalid():
    with open("data/invalid/datapackage.json") as file:
        package = Package(json.load(file), basepath="data/invalid")
        report = package.validate(parallel=True)
        assert report.flatten(
            ["taskPosition", "rowPosition", "fieldPosition", "code"]) == [
                [1, 3, None, "blank-row"],
                [1, 3, None, "primary-key-error"],
                [2, 4, None, "blank-row"],
            ]
def test_validate_package_invalid_package():
    package = Package(
        {"resources": [{
            "path": "data/table.csv",
            "schema": "bad"
        }]})
    report = package.validate()
    assert report["stats"]["errors"] == 1
    error = report["errors"][0]
    assert error["code"] == "schema-error"
    assert error["note"].count("[Errno 2]") and error["note"].count("'bad'")
def test_validate_package_using_detector_schema_sync_issue_847():
    package = Package(resources=[
        Resource(
            data=[["f1"], ["v1"], ["v2"], ["v3"]],
            schema=Schema(fields=[Field(
                name="f1"), Field(name="f2")]),
        ),
    ])
    report = package.validate()
    for resource in package.resources:
        resource.detector = Detector(schema_sync=True)
    package = Package(package)
    assert report.valid
def test_validate_package_invalid_table():
    package = Package({"resources": [{"path": "data/invalid.csv"}]})
    report = package.validate()
    assert report.flatten(["rowPosition", "fieldPosition", "code"]) == [
        [None, 3, "blank-label"],
        [None, 4, "duplicate-label"],
        [2, 3, "missing-cell"],
        [2, 4, "missing-cell"],
        [3, 3, "missing-cell"],
        [3, 4, "missing-cell"],
        [4, None, "blank-row"],
        [5, 5, "extra-cell"],
    ]
Ejemplo n.º 15
0
def test_validate_package_schema_foreign_key_internal_resource_violation_non_existent(
):
    descriptor = deepcopy(DESCRIPTOR_FK)
    descriptor["resources"][1]["data"] = [["label", "population"], [10, 10]]
    package = Package(descriptor)
    report = package.validate()
    assert report.flatten(
        ["rowPosition", "fieldPosition", "code", "cells"]) == [
            [2, None, "foreign-key-error", ["1", "london", "2"]],
            [3, None, "foreign-key-error", ["2", "paris", "3"]],
            [4, None, "foreign-key-error", ["3", "rome", "4"]],
            [5, None, "foreign-key-error", ["4", "rio", ""]],
        ]
def test_validate_package_with_non_tabular():
    package = Package(
        {
            "resources": [
                {
                    "path": "data/table.csv"
                },
                {
                    "path": "data/file.txt"
                },
            ]
        }, )
    report = package.validate()
    assert report.valid
def test_validate_package_composite_primary_key_unique_issue_215():
    source = {
        "resources": [{
            "name": "name",
            "data": [["id1", "id2"], ["a", "1"], ["a", "2"]],
            "schema": {
                "fields": [{
                    "name": "id1"
                }, {
                    "name": "id2"
                }],
                "primaryKey": ["id1", "id2"],
            },
        }],
    }
    package = Package(source)
    report = package.validate()
    assert report.valid
Ejemplo n.º 18
0
def test_validate_package_schema_multiple_foreign_key_resource_violation_non_existent(
):
    descriptor = deepcopy(DESCRIPTOR_FK)
    # remove London
    del descriptor["resources"][0]["data"][1]
    descriptor["resources"].append(MULTI_FK_RESSOURCE)
    package = Package(descriptor)
    report = package.validate()
    assert report.flatten([
        "rowPosition", "fieldPosition", "code", "cells", "note"
    ]) == [
        [
            2,
            None,
            "foreign-key-error",
            ["1", "2", "1.5"],
            'for "from, to": values "1, 2" not found in the lookup table "cities" as "id, next_id"',
        ],
    ]
def test_validate_package_composite_primary_key_not_unique_issue_215():
    descriptor = {
        "resources": [{
            "name": "name",
            "data": [["id1", "id2"], ["a", "1"], ["a", "1"]],
            "schema": {
                "fields": [{
                    "name": "id1"
                }, {
                    "name": "id2"
                }],
                "primaryKey": ["id1", "id2"],
            },
        }],
    }
    package = Package(descriptor)
    report = package.validate(skip_errors=["duplicate-row"])
    assert report.flatten(["rowPosition", "fieldPosition", "code"]) == [
        [3, None, "primary-key-error"],
    ]
def test_validate_package_dialect_header_false():
    descriptor = {
        "resources": [{
            "name": "name",
            "data": [["John", "22"], ["Alex", "33"], ["Paul", "44"]],
            "schema": {
                "fields": [{
                    "name": "name"
                }, {
                    "name": "age",
                    "type": "integer"
                }]
            },
            "layout": {
                "header": False
            },
        }]
    }
    package = Package(descriptor)
    report = package.validate()
    assert report.valid
def test_validate_package_with_schema_issue_348():
    descriptor = {
        "resources": [{
            "name":
            "people",
            "data": [
                ["id", "name", "surname"],
                ["p1", "Tom", "Hanks"],
                ["p2", "Meryl", "Streep"],
            ],
            "schema": {
                "fields": [
                    {
                        "name": "id",
                        "type": "string"
                    },
                    {
                        "name": "name",
                        "type": "string"
                    },
                    {
                        "name": "surname",
                        "type": "string"
                    },
                    {
                        "name": "dob",
                        "type": "date"
                    },
                ]
            },
        }]
    }
    package = Package(descriptor)
    report = package.validate()
    assert report.flatten(["rowPosition", "fieldPosition", "code"]) == [
        [None, 4, "missing-label"],
        [2, 4, "missing-cell"],
        [3, 4, "missing-cell"],
    ]
Ejemplo n.º 22
0
def test_validate_package_schema_foreign_key_not_defined():
    descriptor = deepcopy(DESCRIPTOR_FK)
    del descriptor["resources"][0]["schema"]["foreignKeys"]
    package = Package(descriptor)
    report = package.validate()
    assert report.valid
Ejemplo n.º 23
0
def test_validate_package_schema_foreign_key_error():
    descriptor = deepcopy(DESCRIPTOR_FK)
    package = Package(descriptor)
    report = package.validate()
    assert report.valid
Ejemplo n.º 24
0
def test_validate_package_schema_multiple_foreign_key():
    descriptor = deepcopy(DESCRIPTOR_FK)
    descriptor["resources"].append(MULTI_FK_RESSOURCE)
    package = Package(descriptor)
    report = package.validate()
    assert report.valid
def test_validate_package_descriptor_type_package():
    package = Package(descriptor="data/package/datapackage.json")
    report = package.validate()
    assert report.valid
def test_validate_package_from_path():
    package = Package("data/package/datapackage.json")
    report = package.validate()
    assert report.valid
def test_validate_package_with_diacritic_symbol_issue_905():
    package = Package(descriptor="data/issue-905/datapackage.json")
    report = package.validate()
    assert report.stats["tasks"] == 3
def test_validate_package_with_resource_data_is_a_string_issue_977():
    package = Package(descriptor="data/issue-977.json")
    report = package.validate()
    assert report.flatten() == [
        [None, None, None, "package-error"],
    ]
Ejemplo n.º 29
0
def test_validate_package_parallel_from_dict():
    with open("data/package/datapackage.json") as file:
        with pytest.warns(UserWarning):
            package = Package(json.load(file), basepath="data/package")
            report = package.validate(parallel=True)
            assert report.valid
def test_validate_package_from_zip():
    package = Package("data/package.zip")
    report = package.validate()
    assert report.valid