Exemple #1
0
def test_pandas_parser_nan_with_field_type_information_1143():
    descriptor = {
        "dialect": {
            "delimiter": ","
        },
        "name": "issue-1109",
        "path": "data/issue-1109.csv",
        "schema": {
            "fields": [
                {
                    "name": "int",
                    "type": "integer"
                },
                {
                    "name": "number",
                    "type": "number"
                },
                {
                    "name": "string",
                    "type": "string"
                },
            ]
        },
    }
    res = Resource(descriptor)
    df = res.to_pandas()
    assert all(df.dtypes.values == pd.array([pd.Int64Dtype(), float, object]))
Exemple #2
0
def test_pandas_parser_test_issue_sample_data_1138():
    descriptor = {
        "path": "data/issue-1138.csv",
        "name": "pegeldaten-schleswig-holstein-114515",
        "profile": "tabular-data-resource",
        "format": "csv",
        "encoding": "iso8859-1",
        "dialect": {
            "delimiter": ";"
        },
        "schema": {
            "fields": [
                {
                    "type": "date",
                    "format": "%d.%m.%Y",
                    "name": "Zeit [MEZ]"
                },
                {
                    "type": "integer",
                    "name": "Wasserstand"
                },
                {
                    "type": "string",
                    "name": "Status",
                    "constraints": {
                        "enum":
                        ["qualitätsgesichert", "nicht qualitätsgesichert"]
                    },
                },
            ]
        },
    }
    resource = Resource(descriptor)
    df = resource.to_pandas()
    assert is_datetime64_ns_dtype(df.dtypes.values[0])
Exemple #3
0
def test_pandas_parser_preserve_datetime_field_type_1138():
    descriptor = {
        "name":
        "article",
        "schema": {
            "fields": [
                {
                    "name": "id",
                    "type": "integer"
                },
                {
                    "name": "datetime",
                    "type": "date"
                },
            ]
        },
        "data": [
            ["id", "datetime"],
            ["1", "2020-01-01 15:00:00"],
            ["2", "2020-01-01 15:00:00"],
        ],
    }
    resource = Resource(descriptor)
    df = resource.to_pandas()
    assert is_datetime64_ns_dtype(df.dtypes.values[1])
Exemple #4
0
def test_pandas_storage_write_resource_existent_error():
    resource = Resource(path="data/table.csv")
    storage = resource.to_pandas()
    with pytest.raises(FrictionlessException) as excinfo:
        storage.write_resource(resource)
    error = excinfo.value.error
    assert error.code == "storage-error"
    assert error.note.count("already exists")
    # Cleanup storage
    storage.delete_package(list(storage))
Exemple #5
0
def test_pandas_parser_nan_in_integer_resource_column():
    # see issue 1109
    res = Resource([
        ["int", "number", "string"],
        ["1", "2.3", "string"],
        ["", "4.3", "string"],
        ["3", "3.14", "string"],
    ])
    df = res.to_pandas()
    assert all(df.dtypes.values == pd.array([pd.Int64Dtype(), float, object]))
Exemple #6
0
def test_pandas_parser_nan_in_integer_csv_column():
    # see issue 1109
    res = Resource("data/issue-1109.csv")
    df = res.to_pandas()
    assert all(df.dtypes.values == pd.array([pd.Int64Dtype(), float, object]))