def test_validate(): report = validate("data/schema.json") assert report.valid
def test_validate_duplicate_headers(): report = validate("data/duplicate-headers.csv") assert report.flatten(["rowPosition", "fieldPosition", "code"]) == [ [None, 3, "duplicate-header"], [None, 5, "duplicate-header"], ]
def test_validate_skip_rows_and_fields(): query = Query(skip_rows=[2, "41"], skip_fields=[1, "f4"]) report = validate("data/matrix.csv", query=query) assert report.table["header"] == ["f2", "f3"] assert report.table.stats["rows"] == 2 assert report.table.valid
def test_validate_offset_fields(): query = Query(offset_fields=3) report = validate("data/matrix.csv", query=query) assert report.table["header"] == ["f4"] assert report.table.stats["rows"] == 4 assert report.table.valid
def test_validate_pick_rows(): query = Query(pick_rows=[1, 3, "31"]) report = validate("data/matrix.csv", query=query) assert report.table["header"] == ["f1", "f2", "f3", "f4"] assert report.table.stats["rows"] == 2 assert report.table.valid
def test_validate_dialect_delimiter(): report = validate("data/delimiter.csv", dialect={"delimiter": ";"}) assert report.valid assert report.table.stats["rows"] == 2
def test_validate_blank_headers(): report = validate("data/blank-headers.csv") assert report.flatten(["rowPosition", "fieldPosition", "code"]) == [ [None, 2, "blank-header"], ]
def test_validate_headers_none(): report = validate("data/without-headers.csv", headers=False) assert report.valid assert report.table.stats["rows"] == 3 assert report.table["dialect"]["header"] is False assert report.table["header"] == []
def test_validate_inline_not_a_binary_issue_349(): with open("data/table.csv") as source: report = validate(source) assert report.flatten(["code", "note"]) == [ ["scheme-error", "only byte streams are supported"], ]
def test_validate_infer_fields_issue_223(): source = [["name1", "name2"], ["123", "abc"], ["456", "def"], ["789", "ghi"]] patch_schema = {"fields": {"name": {"type": "string"}}} report = validate(source, patch_schema=patch_schema) assert report.valid
def test_validate_fails_with_wrong_encoding_issue_274(): # For now, by default encoding is detected incorectly by chardet report = validate("data/encoding-issue-274.csv", encoding="utf-8") assert report.valid
def test_validate_source_pathlib_path_table(): report = validate(pathlib.Path("data/table.csv")) assert report.valid
def test_validate_source_invalid(): # Reducing sample size to get raise on iter, not on open report = validate([["h"], [1], "bad"], infer_volume=1) assert report.flatten(["rowPosition", "fieldPosition", "code"]) == [ [None, None, "source-error"], ]
def test_validate(): report = validate("data/table.csv") assert report.valid
def test_validate_compression_explicit(): report = validate("data/table.csv.zip", compression="zip") assert report.valid
def test_validate_headers_number(): report = validate("data/matrix.csv", headers=2) assert report.table["header"] == ["11", "12", "13", "14"] assert report.valid
def test_validate_compression_invalid(): report = validate("data/table.csv.zip", compression="bad") assert report.flatten(["code", "note"]) == [ ["compression-error", 'compression "bad" is not supported'], ]
def test_validate_headers_list_of_numbers(): report = validate("data/matrix.csv", headers=[2, 3, 4]) assert report.table["header"] == [ "11 21 31", "12 22 32", "13 23 33", "14 24 34" ] assert report.valid
def test_validate_pick_fields(): query = Query(pick_fields=[2, "f3"]) report = validate("data/matrix.csv", query=query) assert report.table["header"] == ["f2", "f3"] assert report.table.stats["rows"] == 4 assert report.table.valid
def test_validate_headers_list_of_numbers_and_headers_join(): report = validate("data/matrix.csv", headers=[[2, 3, 4], "."]) assert report.table["header"] == [ "11.21.31", "12.22.32", "13.23.33", "14.24.34" ] assert report.valid
def test_validate_skip_fields_regex(): query = Query(skip_fields=["<regex>f[14]"]) report = validate("data/matrix.csv", query=query) assert report.table["header"] == ["f2", "f3"] assert report.table.stats["rows"] == 4 assert report.table.valid
def test_validate_scheme(): report = validate("data/table.csv", scheme="file") assert report.valid
def test_validate_limit_and_offset_fields(): query = Query(limit_fields=2, offset_fields=1) report = validate("data/matrix.csv", query=query) assert report.table["header"] == ["f2", "f3"] assert report.table.stats["rows"] == 4 assert report.table.valid
def test_validate_format(): report = validate("data/table.csv", format="csv") assert report.valid
def test_validate_skip_rows_regex(): query = Query(skip_rows=["<regex>[14]1"]) report = validate("data/matrix.csv", query=query) assert report.table["header"] == ["f1", "f2", "f3", "f4"] assert report.table.stats["rows"] == 2 assert report.table.valid
def test_validate_encoding(): report = validate("data/table.csv", encoding="utf-8") assert report.valid
def test_validate_skip_rows_blank(): query = Query(skip_rows=["<blank>"]) report = validate("data/blank-rows.csv", query=query) assert report.table["header"] == ["id", "name", "age"] assert report.table.stats["rows"] == 2 assert report.table.valid
def test_validate_compression(): report = validate("data/table.csv.zip") assert report.valid
def test_validate_limit_rows(): query = Query(limit_rows=1) report = validate("data/matrix.csv", query=query) assert report.table["header"] == ["f1", "f2", "f3", "f4"] assert report.table.stats["rows"] == 1 assert report.table.valid
def test_validate_duplicate_row(): report = validate("data/duplicate-rows.csv", checks=[checks.duplicate_row()]) assert report.flatten(["rowPosition", "fieldPosition", "code"]) == [ [4, None, "duplicate-row"], ]