def test_validate_schema_primary_key_error_composite(): source = [ ["id", "name"], [1, "Alex"], [1, "John"], ["", "Paul"], [1, "John"], ["", None], ] schema = { "fields": [ { "name": "id", "type": "integer" }, { "name": "name", "type": "string" }, ], "primaryKey": ["id", "name"], } resource = Resource(source, schema=schema) report = resource.validate() assert report.flatten(["rowPosition", "fieldPosition", "code"]) == [ [5, None, "primary-key-error"], [6, None, "blank-row"], [6, None, "primary-key-error"], ]
def test_validate_defective_rows(): resource = Resource("data/defective-rows.csv") report = resource.validate() assert report.flatten(["rowPosition", "fieldPosition", "code"]) == [ [2, 3, "missing-cell"], [3, 4, "extra-cell"], ]
def test_validate_schema_primary_key_error(): resource = Resource("data/unique-field.csv", schema="data/unique-field.json") report = resource.validate(pick_errors=["primary-key-error"]) assert report.flatten(["rowPosition", "fieldPosition", "code"]) == [ [10, None, "primary-key-error"], ]
def test_validate_schema_unique_error_and_type_error(): source = [ ["id", "unique_number"], ["a1", 100], ["a2", "bad"], ["a3", 100], ["a4", 0], ["a5", 0], ] schema = { "fields": [ { "name": "id" }, { "name": "unique_number", "type": "number", "constraints": { "unique": True } }, ] } resource = Resource(source, schema=schema) report = resource.validate() assert report.flatten(["rowPosition", "fieldPosition", "code", "cells"]) == [ [3, 2, "type-error", ["a2", "bad"]], [4, 2, "unique-error", ["a3", "100"]], [6, 2, "unique-error", ["a5", "0"]], ]
def test_validate_schema_foreign_key_error_self_referencing_invalid(): source = { "path": "data/nested-invalid.csv", "schema": { "fields": [ { "name": "id", "type": "integer" }, { "name": "cat", "type": "integer" }, { "name": "name", "type": "string" }, ], "foreignKeys": [{ "fields": "cat", "reference": { "resource": "", "fields": "id" } }], }, } resource = Resource(source) report = resource.validate() assert report.flatten( ["rowPosition", "fieldPosition", "code", "cells"]) == [ [6, None, "foreign-key-error", ["5", "6", "Rome"]], ]
def test_validate_invalid_source(): resource = Resource("bad.json") report = resource.validate() assert report["stats"]["errors"] == 1 [[code, note]] = report.flatten(["code", "note"]) assert code == "resource-error" assert note.count("[Errno 2]") and note.count("bad.json")
def test_validate_resource_errors_with_fields_993(): resource = Resource("data/resource-with-fields-993.json") report = resource.validate() assert report.flatten(["code", "message"]) == [[ "resource-error", 'The data resource has an error: "fields" should be set as "resource.schema.fields" (not "resource.fields").', ]]
def test_validate_layout_limit_fields(): layout = Layout(limit_fields=1) resource = Resource("data/matrix.csv", layout=layout) report = resource.validate() assert report.task.resource.header == ["f1"] assert report.task.resource.stats["rows"] == 4 assert report.task.valid
def test_validate_invalid_resource_original(): resource = Resource({"path": "data/table.csv"}) report = resource.validate(original=True) assert report.flatten(["code", "note"]) == [[ "resource-error", '"{\'path\': \'data/table.csv\'} is not valid under any of the given schemas" at "" in metadata and at "oneOf" in profile', ]]
def test_validate_infer_fields_issue_223(): source = [["name1", "name2"], ["123", "abc"], ["456", "def"], ["789", "ghi"]] detector = Detector(schema_patch={"fields": {"name": {"type": "string"}}}) resource = Resource(source, detector=detector) report = resource.validate() assert report.valid
def test_validate_invalid_resource(): resource = Resource({"path": "data/table.csv", "schema": "bad"}) report = resource.validate() assert report["stats"]["errors"] == 1 [[code, note]] = report.flatten(["code", "note"]) assert code == "schema-error" assert note.count("[Errno 2]") and note.count("bad")
def test_validate_detector_sync_schema(): schema = { "fields": [ { "name": "id", "type": "integer" }, { "name": "name", "type": "string" }, ], } detector = Detector(schema_sync=True) resource = Resource("data/sync-schema.csv", schema=schema, detector=detector) report = resource.validate() assert report.valid assert report.task.resource.schema == { "fields": [ { "name": "name", "type": "string" }, { "name": "id", "type": "integer" }, ], }
def test_validate_layout_pick_fields(): layout = Layout(pick_fields=[2, "f3"]) resource = Resource("data/matrix.csv", layout=layout) report = resource.validate() assert report.task.resource.header == ["f2", "f3"] assert report.task.resource.stats["rows"] == 4 assert report.task.valid
def test_validate_layout_structure_errors_with_limit_rows(): layout = Layout(limit_rows=3) resource = Resource("data/structure-errors.csv", layout=layout) report = resource.validate() assert report.flatten(["rowPosition", "fieldPosition", "code"]) == [ [4, None, "blank-row"], ]
def test_validate_detector_headers_errors(): source = [ ["id", "last_name", "first_name", "language"], [1, "Alex", "John", "English"], [2, "Peters", "John", "Afrikaans"], [3, "Smith", "Paul", None], ] schema = { "fields": [ { "name": "id", "type": "number" }, { "name": "language", "constraints": { "required": True } }, { "name": "country" }, ] } detector = Detector(schema_sync=True) resource = Resource(source, schema=schema, detector=detector) report = resource.validate() assert report.flatten( ["rowPosition", "fieldPosition", "code", "cells"]) == [ [4, 4, "constraint-error", ["3", "Smith", "Paul", ""]], ]
def test_validate_layout_skip_rows_and_fields(): layout = Layout(skip_rows=[2, "41"], skip_fields=[1, "f4"]) resource = Resource("data/matrix.csv", layout=layout) report = resource.validate() assert report.task.resource.header == ["f2", "f3"] assert report.task.resource.stats["rows"] == 2 assert report.task.valid
def test_validate_layout_limit_and_offset_rows(): layout = Layout(limit_rows=2, offset_rows=1) resource = Resource("data/matrix.csv", layout=layout) report = resource.validate() assert report.task.resource.header == ["f1", "f2", "f3", "f4"] assert report.task.resource.stats["rows"] == 2 assert report.task.valid
def test_validate_layout_pick_rows_regex(): layout = Layout(pick_rows=["<regex>[f23]1"]) resource = Resource("data/matrix.csv", layout=layout) report = resource.validate() assert report.task.resource.header == ["f1", "f2", "f3", "f4"] assert report.task.resource.stats["rows"] == 2 assert report.task.valid
def test_validate_layout_skip_rows_blank(): layout = Layout(skip_rows=["<blank>"]) resource = Resource("data/blank-rows.csv", layout=layout) report = resource.validate() assert report.task.resource.header == ["id", "name", "age"] assert report.task.resource.stats["rows"] == 2 assert report.task.valid
def test_validate_layout_skip_fields_regex(): layout = Layout(skip_fields=["<regex>f[14]"]) resource = Resource("data/matrix.csv", layout=layout) report = resource.validate() assert report.task.resource.header == ["f2", "f3"] assert report.task.resource.stats["rows"] == 4 assert report.task.valid
def test_validate_duplicate_headers(): resource = Resource("data/duplicate-headers.csv") report = resource.validate() assert report.flatten(["rowPosition", "fieldPosition", "code"]) == [ [None, 3, "duplicate-label"], [None, 5, "duplicate-label"], ]
def test_validate_resource_with_schema_as_string(): resource = Resource({ "path": "data/table.csv", "schema": "data/schema.json" }) report = resource.validate() assert report.valid
def test_validate_schema_foreign_key_error_self_referencing(): source = { "path": "data/nested.csv", "schema": { "fields": [ { "name": "id", "type": "integer" }, { "name": "cat", "type": "integer" }, { "name": "name", "type": "string" }, ], "foreignKeys": [{ "fields": "cat", "reference": { "resource": "", "fields": "id" } }], }, } resource = Resource(source) report = resource.validate() assert report.valid
def test_validate_missing_local_file_raises_scheme_error_issue_315(): resource = Resource("bad-path.csv") report = resource.validate() assert report["stats"]["errors"] == 1 [[code, note]] = report.flatten(["code", "note"]) assert code == "scheme-error" assert note.count("[Errno 2]") and note.count("bad-path.csv")
def test_validate_layout_list_of_numbers_and_headers_join(): layout = Layout(header_rows=[2, 3, 4], header_join=".") resource = Resource("data/matrix.csv", layout=layout) report = resource.validate() assert report.task.resource.header == [ "11.21.31", "12.22.32", "13.23.33", "14.24.34" ] assert report.valid
def test_validate_layout_list_of_numbers(): layout = Layout(header_rows=[2, 3, 4]) resource = Resource("data/matrix.csv", layout=layout) report = resource.validate() assert report.task.resource.header == [ "11 21 31", "12 22 32", "13 23 33", "14 24 34" ] assert report.valid
def test_validate_pick_errors(): resource = Resource("data/invalid.csv") report = resource.validate(pick_errors=["blank-label", "blank-row"]) assert report.task.scope == ["blank-label", "blank-row"] assert report.flatten(["rowPosition", "fieldPosition", "code"]) == [ [None, 3, "blank-label"], [4, None, "blank-row"], ]
def test_validate_infer_fields_issue_225(): source = [["name1", "name2"], ["123", None], ["456", None], ["789"]] detector = Detector(schema_patch={"fields": {"name": {"type": "string"}}}) resource = Resource(source, detector=detector) report = resource.validate() assert report.flatten(["rowPosition", "fieldPosition", "code"]) == [ [4, 2, "missing-cell"], ]
def test_validate_stats_hash_sha512(): hash = "d52e3f5f5693894282f023b9985967007d7984292e9abd29dca64454500f27fa45b980132d7b496bc84d336af33aeba6caf7730ec1075d6418d74fb8260de4fd" resource = Resource("data/table.csv", hashing="sha512", stats={"hash": hash}) report = resource.validate() if IS_UNIX: assert report.task.valid
def test_validate_source_invalid(): # Reducing sample size to get raise on iter, not on open detector = Detector(sample_size=1) resource = Resource([["h"], [1], "bad"], detector=detector) report = resource.validate() assert report.flatten(["rowPosition", "fieldPosition", "code"]) == [ [None, None, "source-error"], ]