コード例 #1
0
def test_validate_schema_primary_key_error_composite():
    source = [
        ["id", "name"],
        [1, "Alex"],
        [1, "John"],
        ["", "Paul"],
        [1, "John"],
        ["", None],
    ]
    schema = {
        "fields": [
            {
                "name": "id",
                "type": "integer"
            },
            {
                "name": "name",
                "type": "string"
            },
        ],
        "primaryKey": ["id", "name"],
    }
    resource = Resource(source, schema=schema)
    report = resource.validate()
    assert report.flatten(["rowPosition", "fieldPosition", "code"]) == [
        [5, None, "primary-key-error"],
        [6, None, "blank-row"],
        [6, None, "primary-key-error"],
    ]
コード例 #2
0
def test_validate_defective_rows():
    resource = Resource("data/defective-rows.csv")
    report = resource.validate()
    assert report.flatten(["rowPosition", "fieldPosition", "code"]) == [
        [2, 3, "missing-cell"],
        [3, 4, "extra-cell"],
    ]
コード例 #3
0
def test_validate_schema_primary_key_error():
    resource = Resource("data/unique-field.csv",
                        schema="data/unique-field.json")
    report = resource.validate(pick_errors=["primary-key-error"])
    assert report.flatten(["rowPosition", "fieldPosition", "code"]) == [
        [10, None, "primary-key-error"],
    ]
コード例 #4
0
def test_validate_schema_unique_error_and_type_error():
    source = [
        ["id", "unique_number"],
        ["a1", 100],
        ["a2", "bad"],
        ["a3", 100],
        ["a4", 0],
        ["a5", 0],
    ]
    schema = {
        "fields": [
            {
                "name": "id"
            },
            {
                "name": "unique_number",
                "type": "number",
                "constraints": {
                    "unique": True
                }
            },
        ]
    }
    resource = Resource(source, schema=schema)
    report = resource.validate()
    assert report.flatten(["rowPosition", "fieldPosition", "code",
                           "cells"]) == [
                               [3, 2, "type-error", ["a2", "bad"]],
                               [4, 2, "unique-error", ["a3", "100"]],
                               [6, 2, "unique-error", ["a5", "0"]],
                           ]
コード例 #5
0
def test_validate_schema_foreign_key_error_self_referencing_invalid():
    source = {
        "path": "data/nested-invalid.csv",
        "schema": {
            "fields": [
                {
                    "name": "id",
                    "type": "integer"
                },
                {
                    "name": "cat",
                    "type": "integer"
                },
                {
                    "name": "name",
                    "type": "string"
                },
            ],
            "foreignKeys": [{
                "fields": "cat",
                "reference": {
                    "resource": "",
                    "fields": "id"
                }
            }],
        },
    }
    resource = Resource(source)
    report = resource.validate()
    assert report.flatten(
        ["rowPosition", "fieldPosition", "code", "cells"]) == [
            [6, None, "foreign-key-error", ["5", "6", "Rome"]],
        ]
コード例 #6
0
def test_validate_invalid_source():
    resource = Resource("bad.json")
    report = resource.validate()
    assert report["stats"]["errors"] == 1
    [[code, note]] = report.flatten(["code", "note"])
    assert code == "resource-error"
    assert note.count("[Errno 2]") and note.count("bad.json")
コード例 #7
0
def test_validate_resource_errors_with_fields_993():
    resource = Resource("data/resource-with-fields-993.json")
    report = resource.validate()
    assert report.flatten(["code", "message"]) == [[
        "resource-error",
        'The data resource has an error: "fields" should be set as "resource.schema.fields" (not "resource.fields").',
    ]]
コード例 #8
0
def test_validate_layout_limit_fields():
    layout = Layout(limit_fields=1)
    resource = Resource("data/matrix.csv", layout=layout)
    report = resource.validate()
    assert report.task.resource.header == ["f1"]
    assert report.task.resource.stats["rows"] == 4
    assert report.task.valid
コード例 #9
0
def test_validate_invalid_resource_original():
    resource = Resource({"path": "data/table.csv"})
    report = resource.validate(original=True)
    assert report.flatten(["code", "note"]) == [[
        "resource-error",
        '"{\'path\': \'data/table.csv\'} is not valid under any of the given schemas" at "" in metadata and at "oneOf" in profile',
    ]]
コード例 #10
0
def test_validate_infer_fields_issue_223():
    source = [["name1", "name2"], ["123", "abc"], ["456", "def"],
              ["789", "ghi"]]
    detector = Detector(schema_patch={"fields": {"name": {"type": "string"}}})
    resource = Resource(source, detector=detector)
    report = resource.validate()
    assert report.valid
コード例 #11
0
def test_validate_invalid_resource():
    resource = Resource({"path": "data/table.csv", "schema": "bad"})
    report = resource.validate()
    assert report["stats"]["errors"] == 1
    [[code, note]] = report.flatten(["code", "note"])
    assert code == "schema-error"
    assert note.count("[Errno 2]") and note.count("bad")
コード例 #12
0
def test_validate_detector_sync_schema():
    schema = {
        "fields": [
            {
                "name": "id",
                "type": "integer"
            },
            {
                "name": "name",
                "type": "string"
            },
        ],
    }
    detector = Detector(schema_sync=True)
    resource = Resource("data/sync-schema.csv",
                        schema=schema,
                        detector=detector)
    report = resource.validate()
    assert report.valid
    assert report.task.resource.schema == {
        "fields": [
            {
                "name": "name",
                "type": "string"
            },
            {
                "name": "id",
                "type": "integer"
            },
        ],
    }
コード例 #13
0
def test_validate_layout_pick_fields():
    layout = Layout(pick_fields=[2, "f3"])
    resource = Resource("data/matrix.csv", layout=layout)
    report = resource.validate()
    assert report.task.resource.header == ["f2", "f3"]
    assert report.task.resource.stats["rows"] == 4
    assert report.task.valid
コード例 #14
0
def test_validate_layout_structure_errors_with_limit_rows():
    layout = Layout(limit_rows=3)
    resource = Resource("data/structure-errors.csv", layout=layout)
    report = resource.validate()
    assert report.flatten(["rowPosition", "fieldPosition", "code"]) == [
        [4, None, "blank-row"],
    ]
コード例 #15
0
def test_validate_detector_headers_errors():
    source = [
        ["id", "last_name", "first_name", "language"],
        [1, "Alex", "John", "English"],
        [2, "Peters", "John", "Afrikaans"],
        [3, "Smith", "Paul", None],
    ]
    schema = {
        "fields": [
            {
                "name": "id",
                "type": "number"
            },
            {
                "name": "language",
                "constraints": {
                    "required": True
                }
            },
            {
                "name": "country"
            },
        ]
    }
    detector = Detector(schema_sync=True)
    resource = Resource(source, schema=schema, detector=detector)
    report = resource.validate()
    assert report.flatten(
        ["rowPosition", "fieldPosition", "code", "cells"]) == [
            [4, 4, "constraint-error", ["3", "Smith", "Paul", ""]],
        ]
コード例 #16
0
def test_validate_layout_skip_rows_and_fields():
    layout = Layout(skip_rows=[2, "41"], skip_fields=[1, "f4"])
    resource = Resource("data/matrix.csv", layout=layout)
    report = resource.validate()
    assert report.task.resource.header == ["f2", "f3"]
    assert report.task.resource.stats["rows"] == 2
    assert report.task.valid
コード例 #17
0
def test_validate_layout_limit_and_offset_rows():
    layout = Layout(limit_rows=2, offset_rows=1)
    resource = Resource("data/matrix.csv", layout=layout)
    report = resource.validate()
    assert report.task.resource.header == ["f1", "f2", "f3", "f4"]
    assert report.task.resource.stats["rows"] == 2
    assert report.task.valid
コード例 #18
0
def test_validate_layout_pick_rows_regex():
    layout = Layout(pick_rows=["<regex>[f23]1"])
    resource = Resource("data/matrix.csv", layout=layout)
    report = resource.validate()
    assert report.task.resource.header == ["f1", "f2", "f3", "f4"]
    assert report.task.resource.stats["rows"] == 2
    assert report.task.valid
コード例 #19
0
def test_validate_layout_skip_rows_blank():
    layout = Layout(skip_rows=["<blank>"])
    resource = Resource("data/blank-rows.csv", layout=layout)
    report = resource.validate()
    assert report.task.resource.header == ["id", "name", "age"]
    assert report.task.resource.stats["rows"] == 2
    assert report.task.valid
コード例 #20
0
def test_validate_layout_skip_fields_regex():
    layout = Layout(skip_fields=["<regex>f[14]"])
    resource = Resource("data/matrix.csv", layout=layout)
    report = resource.validate()
    assert report.task.resource.header == ["f2", "f3"]
    assert report.task.resource.stats["rows"] == 4
    assert report.task.valid
コード例 #21
0
def test_validate_duplicate_headers():
    resource = Resource("data/duplicate-headers.csv")
    report = resource.validate()
    assert report.flatten(["rowPosition", "fieldPosition", "code"]) == [
        [None, 3, "duplicate-label"],
        [None, 5, "duplicate-label"],
    ]
コード例 #22
0
def test_validate_resource_with_schema_as_string():
    resource = Resource({
        "path": "data/table.csv",
        "schema": "data/schema.json"
    })
    report = resource.validate()
    assert report.valid
コード例 #23
0
def test_validate_schema_foreign_key_error_self_referencing():
    source = {
        "path": "data/nested.csv",
        "schema": {
            "fields": [
                {
                    "name": "id",
                    "type": "integer"
                },
                {
                    "name": "cat",
                    "type": "integer"
                },
                {
                    "name": "name",
                    "type": "string"
                },
            ],
            "foreignKeys": [{
                "fields": "cat",
                "reference": {
                    "resource": "",
                    "fields": "id"
                }
            }],
        },
    }
    resource = Resource(source)
    report = resource.validate()
    assert report.valid
コード例 #24
0
def test_validate_missing_local_file_raises_scheme_error_issue_315():
    resource = Resource("bad-path.csv")
    report = resource.validate()
    assert report["stats"]["errors"] == 1
    [[code, note]] = report.flatten(["code", "note"])
    assert code == "scheme-error"
    assert note.count("[Errno 2]") and note.count("bad-path.csv")
コード例 #25
0
def test_validate_layout_list_of_numbers_and_headers_join():
    layout = Layout(header_rows=[2, 3, 4], header_join=".")
    resource = Resource("data/matrix.csv", layout=layout)
    report = resource.validate()
    assert report.task.resource.header == [
        "11.21.31", "12.22.32", "13.23.33", "14.24.34"
    ]
    assert report.valid
コード例 #26
0
def test_validate_layout_list_of_numbers():
    layout = Layout(header_rows=[2, 3, 4])
    resource = Resource("data/matrix.csv", layout=layout)
    report = resource.validate()
    assert report.task.resource.header == [
        "11 21 31", "12 22 32", "13 23 33", "14 24 34"
    ]
    assert report.valid
コード例 #27
0
def test_validate_pick_errors():
    resource = Resource("data/invalid.csv")
    report = resource.validate(pick_errors=["blank-label", "blank-row"])
    assert report.task.scope == ["blank-label", "blank-row"]
    assert report.flatten(["rowPosition", "fieldPosition", "code"]) == [
        [None, 3, "blank-label"],
        [4, None, "blank-row"],
    ]
コード例 #28
0
def test_validate_infer_fields_issue_225():
    source = [["name1", "name2"], ["123", None], ["456", None], ["789"]]
    detector = Detector(schema_patch={"fields": {"name": {"type": "string"}}})
    resource = Resource(source, detector=detector)
    report = resource.validate()
    assert report.flatten(["rowPosition", "fieldPosition", "code"]) == [
        [4, 2, "missing-cell"],
    ]
コード例 #29
0
def test_validate_stats_hash_sha512():
    hash = "d52e3f5f5693894282f023b9985967007d7984292e9abd29dca64454500f27fa45b980132d7b496bc84d336af33aeba6caf7730ec1075d6418d74fb8260de4fd"
    resource = Resource("data/table.csv",
                        hashing="sha512",
                        stats={"hash": hash})
    report = resource.validate()
    if IS_UNIX:
        assert report.task.valid
コード例 #30
0
def test_validate_source_invalid():
    # Reducing sample size to get raise on iter, not on open
    detector = Detector(sample_size=1)
    resource = Resource([["h"], [1], "bad"], detector=detector)
    report = resource.validate()
    assert report.flatten(["rowPosition", "fieldPosition", "code"]) == [
        [None, None, "source-error"],
    ]