예제 #1
0
def test_load_data_from_storage():
    SCHEMA = {
        'fields': [{
            'format': 'default',
            'name': 'id',
            'type': 'integer'
        }],
        'missingValues': ['']
    }
    storage = Mock(buckets=['data'],
                   describe=lambda bucket:
                   {'fields': [{
                       'name': 'id',
                       'type': 'integer'
                   }]},
                   iter=lambda bucket: [[1], [2], [3]],
                   spec=Storage)
    resource = Resource({'path': 'data'}, storage=storage)
    resource.infer(confidence=0.8)
    assert resource.descriptor == {
        'name': 'data',
        'path': 'data',
        'profile': 'tabular-data-resource',
        'schema': SCHEMA
    }
    assert resource.headers == ['id']
    assert resource.read() == [[1], [2], [3]]
예제 #2
0
def test_load_data_from_storage():
    SCHEMA = {
        'fields': [{'format': 'default', 'name': 'id', 'type': 'integer'}],
        'missingValues': ['']
    }
    storage = Mock(
        buckets=['data'],
        describe=lambda bucket: {'fields': [{'name': 'id', 'type': 'integer'}]},
        iter=lambda bucket: [[1], [2], [3]],
        spec=Storage)
    resource = Resource({'path': 'data'}, storage=storage)
    resource.infer()
    assert resource.descriptor == {
        'name': 'data',
        'path': 'data',
        'encoding': 'utf-8',
        'profile': 'tabular-data-resource',
        'schema': SCHEMA}
    assert resource.headers == ['id']
    assert resource.read() == [[1], [2], [3]]
예제 #3
0
def test_preserve_resource_format_from_descriptor_on_infer_issue_188():
    resource = Resource({'path': 'data/data.csvformat', 'format': 'csv'})
    assert resource.infer() == {
        'encoding': 'utf-8',
        'format': 'csv',
        'mediatype': 'text/csv',
        'name': 'data',
        'path': 'data/data.csvformat',
        'profile': 'tabular-data-resource',
        'schema': {'fields': [
            {'format': 'default', 'name': 'city', 'type': 'string'},
            {'format': 'default', 'name': 'population', 'type': 'integer'}],
            'missingValues': ['']}}
예제 #4
0
def test_source_multipart_local_infer():
    descriptor = {'path': ['data/chunk1.csv', 'data/chunk2.csv']}
    resource = Resource(descriptor)
    resource.infer()
    assert resource.descriptor == {
        'name': 'chunk1',
        'profile': 'tabular-data-resource',
        'path': ['data/chunk1.csv', 'data/chunk2.csv'],
        'format': 'csv',
        'mediatype': 'text/csv',
        'encoding': 'utf-8',
        'schema': {
            'fields': [{
                'name': 'id',
                'type': 'integer',
                'format': 'default'
            }, {
                'name': 'name',
                'type': 'string',
                'format': 'default'
            }],
            'missingValues': ['']
        }
    }
예제 #5
0
def test_preserve_resource_format_from_descriptor_on_infer_issue_188():
    resource = Resource({'path': 'data/data.csvformat', 'format': 'csv'})
    assert resource.infer() == {
        'encoding': 'utf-8',
        'format': 'csv',
        'mediatype': 'text/csv',
        'name': 'data',
        'path': 'data/data.csvformat',
        'profile': 'tabular-data-resource',
        'schema': {
            'fields': [{
                'format': 'default',
                'name': 'city',
                'type': 'string'
            }, {
                'format': 'default',
                'name': 'population',
                'type': 'integer'
            }],
            'missingValues': ['']
        }
    }