def test_load_data_from_storage(): SCHEMA = { 'fields': [{ 'format': 'default', 'name': 'id', 'type': 'integer' }], 'missingValues': [''] } storage = Mock(buckets=['data'], describe=lambda bucket: {'fields': [{ 'name': 'id', 'type': 'integer' }]}, iter=lambda bucket: [[1], [2], [3]], spec=Storage) resource = Resource({'path': 'data'}, storage=storage) resource.infer(confidence=0.8) assert resource.descriptor == { 'name': 'data', 'path': 'data', 'profile': 'tabular-data-resource', 'schema': SCHEMA } assert resource.headers == ['id'] assert resource.read() == [[1], [2], [3]]
def test_load_data_from_storage(): SCHEMA = { 'fields': [{'format': 'default', 'name': 'id', 'type': 'integer'}], 'missingValues': [''] } storage = Mock( buckets=['data'], describe=lambda bucket: {'fields': [{'name': 'id', 'type': 'integer'}]}, iter=lambda bucket: [[1], [2], [3]], spec=Storage) resource = Resource({'path': 'data'}, storage=storage) resource.infer() assert resource.descriptor == { 'name': 'data', 'path': 'data', 'encoding': 'utf-8', 'profile': 'tabular-data-resource', 'schema': SCHEMA} assert resource.headers == ['id'] assert resource.read() == [[1], [2], [3]]
def test_preserve_resource_format_from_descriptor_on_infer_issue_188(): resource = Resource({'path': 'data/data.csvformat', 'format': 'csv'}) assert resource.infer() == { 'encoding': 'utf-8', 'format': 'csv', 'mediatype': 'text/csv', 'name': 'data', 'path': 'data/data.csvformat', 'profile': 'tabular-data-resource', 'schema': {'fields': [ {'format': 'default', 'name': 'city', 'type': 'string'}, {'format': 'default', 'name': 'population', 'type': 'integer'}], 'missingValues': ['']}}
def test_source_multipart_local_infer(): descriptor = {'path': ['data/chunk1.csv', 'data/chunk2.csv']} resource = Resource(descriptor) resource.infer() assert resource.descriptor == { 'name': 'chunk1', 'profile': 'tabular-data-resource', 'path': ['data/chunk1.csv', 'data/chunk2.csv'], 'format': 'csv', 'mediatype': 'text/csv', 'encoding': 'utf-8', 'schema': { 'fields': [{ 'name': 'id', 'type': 'integer', 'format': 'default' }, { 'name': 'name', 'type': 'string', 'format': 'default' }], 'missingValues': [''] } }
def test_preserve_resource_format_from_descriptor_on_infer_issue_188(): resource = Resource({'path': 'data/data.csvformat', 'format': 'csv'}) assert resource.infer() == { 'encoding': 'utf-8', 'format': 'csv', 'mediatype': 'text/csv', 'name': 'data', 'path': 'data/data.csvformat', 'profile': 'tabular-data-resource', 'schema': { 'fields': [{ 'format': 'default', 'name': 'city', 'type': 'string' }, { 'format': 'default', 'name': 'population', 'type': 'integer' }], 'missingValues': [''] } }