Beispiel #1
0
def test_describe_resource_with_stats():
    resource = Resource.describe("data/table.csv", stats=True)
    assert resource.metadata_valid
    if IS_UNIX:
        assert resource == {
            "profile": "tabular-data-resource",
            "name": "table",
            "path": "data/table.csv",
            "scheme": "file",
            "format": "csv",
            "hashing": "md5",
            "encoding": "utf-8",
            "schema": {
                "fields": [
                    {
                        "name": "id",
                        "type": "integer"
                    },
                    {
                        "name": "name",
                        "type": "string"
                    },
                ]
            },
            "stats": {
                "hash": "6c2c61dd9b0e9c6876139a449ed87933",
                "bytes": 30,
                "fields": 2,
                "rows": 2,
            },
        }
Beispiel #2
0
def test_describe_resource_schema_check_type_boolean_string_tie():
    layout = Layout(header=False)
    detector = Detector(field_names=["field"])
    resource = Resource.describe([["f"], ["stringish"]],
                                 layout=layout,
                                 detector=detector)
    assert resource.schema.get_field("field").type == "string"
Beispiel #3
0
def test_describe_resource_values_with_leading_zeros_issue_492():
    resource = Resource.describe("data/leading-zeros.csv")
    assert resource.schema == {
        "fields": [{
            "name": "value",
            "type": "integer"
        }]
    }
    assert resource.read_rows() == [{"value": 1}, {"value": 2}, {"value": 3}]
Beispiel #4
0
def test_describe_resource_schema_summary():
    resource = Resource.describe("data/countries.csv")
    resource.infer()
    output = resource.schema.to_summary()
    assert (output.count("| name        | type    | required   |")
            and output.count("| id          | integer |            |")
            and output.count("| neighbor_id | string  |            |")
            and output.count("| name        | string  |            |")
            and output.count("| population  | string  |            |"))
Beispiel #5
0
def test_describe_resource_schema_increase_limit_issue_212():
    detector = Detector(sample_size=200)
    resource = Resource.describe("data/table-infer-increase-limit.csv",
                                 detector=detector)
    assert resource.schema == {
        "fields": [{
            "name": "a",
            "type": "integer"
        }, {
            "name": "b",
            "type": "number"
        }],
    }
Beispiel #6
0
def test_describe_resource_schema_xlsx_file_with_boolean_column_issue_203():
    resource = Resource.describe("data/table-infer-boolean.xlsx")
    assert resource.schema == {
        "fields": [
            {
                "name": "number",
                "type": "integer"
            },
            {
                "name": "string",
                "type": "string"
            },
            {
                "name": "boolean",
                "type": "boolean"
            },
        ],
    }
Beispiel #7
0
def test_describe_resource_schema_with_missing_values_default():
    resource = Resource.describe("data/table-infer-missing-values.csv")
    assert resource.schema == {
        "fields": [
            {
                "name": "id",
                "type": "string"
            },
            {
                "name": "age",
                "type": "integer"
            },
            {
                "name": "name",
                "type": "string"
            },
        ],
    }
Beispiel #8
0
def test_describe_resource_schema_utf8():
    resource = Resource.describe("data/table-infer-utf8.csv")
    assert resource.schema == {
        "fields": [
            {
                "name": "id",
                "type": "integer"
            },
            {
                "name": "age",
                "type": "integer"
            },
            {
                "name": "name",
                "type": "string"
            },
        ],
    }
Beispiel #9
0
def test_describe_resource_schema_infer_volume():
    detector = Detector(sample_size=4)
    resource = Resource.describe("data/table-infer-row-limit.csv",
                                 detector=detector)
    assert resource.schema == {
        "fields": [
            {
                "name": "id",
                "type": "integer"
            },
            {
                "name": "age",
                "type": "integer"
            },
            {
                "name": "name",
                "type": "string"
            },
        ],
    }
Beispiel #10
0
def test_describe_resource_schema_with_missing_values_using_the_argument():
    detector = Detector(field_missing_values=["-"])
    resource = Resource.describe("data/table-infer-missing-values.csv",
                                 detector=detector)
    assert resource.schema == {
        "fields": [
            {
                "name": "id",
                "type": "integer"
            },
            {
                "name": "age",
                "type": "integer"
            },
            {
                "name": "name",
                "type": "string"
            },
        ],
        "missingValues": ["-"],
    }
Beispiel #11
0
def test_describe_resource_schema_expand():
    resource = Resource.describe("data/table-infer.csv", expand=True)
    assert resource.schema == {
        "fields": [
            {
                "name": "id",
                "type": "integer",
                "format": "default",
                "bareNumber": True
            },
            {
                "name": "age",
                "type": "integer",
                "format": "default",
                "bareNumber": True
            },
            {
                "name": "name",
                "type": "string",
                "format": "default"
            },
        ],
        "missingValues": [""],
    }
Beispiel #12
0
def test_describe_resource():
    resource = Resource.describe("data/table.csv")
    assert resource.metadata_valid
    assert resource == {
        "profile": "tabular-data-resource",
        "name": "table",
        "path": "data/table.csv",
        "scheme": "file",
        "format": "csv",
        "hashing": "md5",
        "encoding": "utf-8",
        "schema": {
            "fields": [
                {
                    "name": "id",
                    "type": "integer"
                },
                {
                    "name": "name",
                    "type": "string"
                },
            ]
        },
    }
Beispiel #13
0
def test_describe_schema_proper_quote_issue_493():
    resource = Resource.describe("data/issue-493.csv")
    assert resource.dialect.quote_char == '"'
    assert len(resource.schema.fields) == 126
Beispiel #14
0
def test_describe_file_with_different_characters_name_issue_600():
    assert Resource.describe(
        "data/table_with_data.csv").name == "table_with_data"
    assert Resource.describe(
        "data/Table With Data.csv").name == "table-with-data"
    assert Resource.describe("data/Таблица.csv").name == "tablitsa"
Beispiel #15
0
def test_describe_resource_compression_gzip_issue_606():
    resource = Resource.describe("data/table.csv.gz", stats=True)
    assert resource.name == "table"
    assert resource.stats["hash"] == "edf56ce48e402d83eb08d5dac6aa2ad9"
    assert resource.stats["bytes"] == 61
Beispiel #16
0
def test_describe_resource_with_years_in_the_header_issue_825():
    resource = Resource.describe("data/issue-825.csv")
    assert resource.schema.field_names == ["Musei", "2011", "2010"]
Beispiel #17
0
def test_describe_resource_with_json_format_issue_827():
    resource = Resource.describe(path="data/table.json")
    assert resource.name == "table"