def test_pandas_parser_nan_with_field_type_information_1143(): descriptor = { "dialect": { "delimiter": "," }, "name": "issue-1109", "path": "data/issue-1109.csv", "schema": { "fields": [ { "name": "int", "type": "integer" }, { "name": "number", "type": "number" }, { "name": "string", "type": "string" }, ] }, } res = Resource(descriptor) df = res.to_pandas() assert all(df.dtypes.values == pd.array([pd.Int64Dtype(), float, object]))
def test_pandas_parser_test_issue_sample_data_1138(): descriptor = { "path": "data/issue-1138.csv", "name": "pegeldaten-schleswig-holstein-114515", "profile": "tabular-data-resource", "format": "csv", "encoding": "iso8859-1", "dialect": { "delimiter": ";" }, "schema": { "fields": [ { "type": "date", "format": "%d.%m.%Y", "name": "Zeit [MEZ]" }, { "type": "integer", "name": "Wasserstand" }, { "type": "string", "name": "Status", "constraints": { "enum": ["qualitätsgesichert", "nicht qualitätsgesichert"] }, }, ] }, } resource = Resource(descriptor) df = resource.to_pandas() assert is_datetime64_ns_dtype(df.dtypes.values[0])
def test_pandas_parser_preserve_datetime_field_type_1138(): descriptor = { "name": "article", "schema": { "fields": [ { "name": "id", "type": "integer" }, { "name": "datetime", "type": "date" }, ] }, "data": [ ["id", "datetime"], ["1", "2020-01-01 15:00:00"], ["2", "2020-01-01 15:00:00"], ], } resource = Resource(descriptor) df = resource.to_pandas() assert is_datetime64_ns_dtype(df.dtypes.values[1])
def test_pandas_storage_write_resource_existent_error(): resource = Resource(path="data/table.csv") storage = resource.to_pandas() with pytest.raises(FrictionlessException) as excinfo: storage.write_resource(resource) error = excinfo.value.error assert error.code == "storage-error" assert error.note.count("already exists") # Cleanup storage storage.delete_package(list(storage))
def test_pandas_parser_nan_in_integer_resource_column(): # see issue 1109 res = Resource([ ["int", "number", "string"], ["1", "2.3", "string"], ["", "4.3", "string"], ["3", "3.14", "string"], ]) df = res.to_pandas() assert all(df.dtypes.values == pd.array([pd.Int64Dtype(), float, object]))
def test_pandas_parser_nan_in_integer_csv_column(): # see issue 1109 res = Resource("data/issue-1109.csv") df = res.to_pandas() assert all(df.dtypes.values == pd.array([pd.Int64Dtype(), float, object]))