def test_save_data_to_storage(): SCHEMA = { 'fields': [{'format': 'default', 'name': 'id', 'type': 'integer'}], 'missingValues': [''] } storage = Mock(spec=Storage) resource = Resource({'data': [['id'], [1], [2], [3]]}) resource.save('data', storage=storage) storage.create.assert_called_with('data', SCHEMA, force=True) storage.write.assert_called_with('data', ANY)
def test_preserve_resource_format_from_descriptor_on_infer_issue_188(): resource = Resource({'path': 'data/data.csvformat', 'format': 'csv'}) assert resource.infer() == { 'encoding': 'utf-8', 'format': 'csv', 'mediatype': 'text/csv', 'name': 'data', 'path': 'data/data.csvformat', 'profile': 'tabular-data-resource', 'schema': {'fields': [ {'format': 'default', 'name': 'city', 'type': 'string'}, {'format': 'default', 'name': 'population', 'type': 'integer'}], 'missingValues': ['']}}
def test_load_data_from_storage(): SCHEMA = { 'fields': [{'format': 'default', 'name': 'id', 'type': 'integer'}], 'missingValues': [''] } storage = Mock( buckets=['data'], describe=lambda bucket: {'fields': [{'name': 'id', 'type': 'integer'}]}, iter=lambda bucket: [[1], [2], [3]], spec=Storage) resource = Resource({'path': 'data'}, storage=storage) resource.infer() assert resource.descriptor == { 'name': 'data', 'path': 'data', 'encoding': 'utf-8', 'profile': 'tabular-data-resource', 'schema': SCHEMA} assert resource.headers == ['id'] assert resource.read() == [[1], [2], [3]]
def test_descriptor_table_tabular_local(): descriptor = { 'name': 'name', 'profile': 'tabular-data-resource', 'path': ['resource_data.csv'], 'schema': 'resource_schema.json', } resource = Resource(descriptor, base_path='data') assert resource.table.read(keyed=True) == [ { 'id': 1, 'name': 'english' }, { 'id': 2, 'name': '中国人' }, ]
def test_descriptor_table_tabular_dialect_custom(): descriptor = { 'name': 'name', 'profile': 'tabular-data-resource', 'path': ['dialect.csv'], 'schema': 'resource_schema.json', 'dialect': { 'delimiter': '|', 'quoteChar': '#', 'escapeChar': '-', 'doubleQuote': False, 'skipInitialSpace': False, }, } resource = Resource(descriptor, base_path='data') assert resource.table.read(keyed=True) == [ {'id': 1, 'name': 'english'}, {'id': 2, 'name': ' |##'}, ]
def test_descriptor_dereference(): descriptor = 'data/resource_with_dereferencing.json' resource = Resource(descriptor) assert resource.descriptor == expand({ 'name': 'name', 'data': 'data', 'schema': { 'fields': [{ 'name': 'name' }] }, 'dialect': { 'delimiter': ',' }, 'dialects': { 'main': { 'delimiter': ',' } }, })
def test_descriptor_dereference_remote(patch_get): descriptor = { 'name': 'name', 'data': 'data', 'schema': 'http://example.com/schema', } # Mocks patch_get('http://example.com/schema', body='{"fields": [{"name": "name"}]}') # Tests resource = Resource(descriptor) assert resource.descriptor == expand({ 'name': 'name', 'data': 'data', 'schema': { 'fields': [{ 'name': 'name' }] }, })
def test_descriptor_expand_tabular_schema(): descriptor = { 'name': 'name', 'data': 'data', 'profile': 'tabular-data-resource', 'schema': { 'fields': [{'name': 'name'}], }, } resource = Resource(descriptor) assert resource.descriptor == { 'name': 'name', 'data': 'data', 'profile': 'tabular-data-resource', 'encoding': 'utf-8', 'schema': { 'fields': [{'name': 'name', 'type': 'string', 'format': 'default'}], 'missingValues': [''], } }
def test_descriptor_table_tabular_multipart_noheader_local(): descriptor = { 'name': 'name', 'profile': 'tabular-data-resource', 'path': ['chunk2.csv', 'chunk2.csv'], 'schema': 'resource_schema.json', 'dialect': { 'header': False }, } resource = Resource(descriptor, base_path='data') assert resource.table.read(keyed=True) == [ { 'id': 2, 'name': '中国人' }, { 'id': 2, 'name': '中国人' }, ]
def test_descriptor_table_tabular_multipart_remote(patch_get): descriptor = { 'name': 'name', 'profile': 'tabular-data-resource', 'path': [ 'http://example.com/chunk1.csv', 'http://example.com/chunk2.csv', 'http://example.com/chunk3.csv', ], 'schema': 'resource_schema.json', } # Mocks patch_get('http://example.com/chunk1.csv', body="id,name\n") patch_get('http://example.com/chunk2.csv', body="1,english") patch_get('http://example.com/chunk3.csv', body="2,中国人\n") # Tests resource = Resource(descriptor, base_path='data') assert resource.table.read(keyed=True) == [ {'id': 1, 'name': 'english'}, {'id': 2, 'name': '中国人'}, ]
def test_descriptor_table_tabular_multipart_mix_header_local(): descriptor = { 'name': 'name', 'profile': 'tabular-data-resource', 'path': ['chunk1.csv', 'chunk2.csv'], 'schema': 'resource_schema.json', 'dialect': { 'header': True }, } with pytest.warns(UserWarning): resource = Resource(descriptor, base_path='data') assert resource.table.read(keyed=True) == [ { 'id': 1, 'name': 'english' }, { 'id': 2, 'name': '中国人' }, ]
def test_data(): resource = Resource({'path': 'data/cities.tsv'}) assert resource.data[0:3] == [ { 'Area': '1807.92', 'Name': 'Acrelândia', 'Population': '12538', 'State': 'AC' }, { 'Area': '186.53', 'Name': 'Boca da Mata', 'Population': '25776', 'State': 'AL' }, { 'Area': '242.62', 'Name': 'Capela', 'Population': '17077', 'State': 'AL' }, ]
def test_resource_table_options(patch_get): descriptor = { 'name': 'name', 'profile': 'tabular-data-resource', 'path': ['http://example.com/resource_data.csv'], 'schema': 'resource_schema.json', } # Mocks patch_get('http://example.com/resource_data.csv', body="\n\nid,name\n1,english\n2,中国人") # Tests resource = Resource(descriptor, base_path='data', headers=3) assert resource.table.read(keyed=True) == [ { 'id': 1, 'name': 'english' }, { 'id': 2, 'name': '中国人' }, ]
def test_descriptor_table_tabular_inline(): descriptor = { 'name': 'name', 'profile': 'tabular-data-resource', 'data': [ ['id', 'name'], ['1', 'english'], ['2', '中国人'], ], 'schema': 'resource_schema.json', } resource = Resource(descriptor, base_path='data') assert resource.table.read(keyed=True) == [ { 'id': 1, 'name': 'english' }, { 'id': 2, 'name': '中国人' }, ]
def test_descriptor_expand_tabular_dialect(): descriptor = { 'name': 'name', 'data': 'data', 'profile': 'tabular-data-resource', 'dialect': { 'delimiter': 'custom', } } resource = Resource(descriptor) assert resource.descriptor == { 'name': 'name', 'data': 'data', 'profile': 'tabular-data-resource', 'dialect': { 'delimiter': 'custom', 'doubleQuote': True, 'lineTerminator': '\r\n', 'quoteChar': '"', 'skipInitialSpace': True, 'header': True, 'caseSensitiveHeader': False, } }
def test_descriptor_retrieve_path(): descriptor = 'data/data-resource.json' actual = Resource(descriptor).descriptor expect = expand(json.load(io.open(descriptor, encoding='utf-8'))) assert actual == expect
def test_check_integrity(): descriptor = deepcopy(DESCRIPTOR) resource = Resource(descriptor) assert resource.check_integrity()
def test_read_integrity_hash(): descriptor = deepcopy(DESCRIPTOR) descriptor['bytes'] = None resource = Resource(descriptor) resource.read(integrity=True) assert True
def test_read_integrity(): descriptor = deepcopy(DESCRIPTOR) resource = Resource(descriptor) resource.read(integrity=True) assert True
def test_raw_read(): resource = Resource({'path': 'data/foo.txt'}) assert resource.raw_read() == b'foo\n'
def test_raw_iter(): resource = Resource({'path': 'data/foo.txt'}) with resource.raw_iter() as filelike: assert list(filelike) == [b'foo\n']
def test_descriptor_retrieve_path_bad(): descriptor = 'data/bad-path.json' with pytest.raises(exceptions.DataPackageException): Resource(descriptor).descriptor