def process_resource(self, resource: ResourceWrapper): resource_name = resource.res.name if resource_name not in self.converted_resources: return resource else: converted_resource = self.converted_resources[resource_name] mode = converted_resource.get('mode', 'rewrite') table_name = converted_resource['table-name'] storage = Storage(self.engine, prefix=table_name) if mode == 'rewrite' and '' in storage.buckets: storage.delete('') schema_descriptor = resource.res.descriptor['schema'] schema = self.normalize_schema_for_engine(self.engine.dialect.name, schema_descriptor) if '' not in storage.buckets: logging.info('Creating DB table %s', table_name) try: storage.create('', schema) except ValidationError as e: logging.error('Error validating schema %r', schema_descriptor) for err in e.errors: logging.error('Error validating schema: %s', err) raise else: storage.describe('', schema) update_keys = None if mode == 'update': update_keys = converted_resource.get('update_keys') if update_keys is None: update_keys = schema_descriptor.get('primaryKey', []) logging.info('Writing to DB %s -> %s (mode=%s, keys=%s)', resource_name, table_name, mode, update_keys) return map( self.get_output_row, storage.write( '', self.normalize_for_engine(self.engine.dialect.name, resource, schema_descriptor), keyed=True, as_generator=True, update_keys=update_keys, buffer_size=self.batch_size, use_bloom_filter=self.use_bloom_filter, ))
def handle_resource(self, resource, spec, parameters, datapackage): resource_name = spec['name'] if resource_name not in self.converted_resources: return resource else: converted_resource = self.converted_resources[resource_name] mode = converted_resource.get('mode', 'rewrite') table_name = converted_resource['table-name'] storage = Storage(self.engine, prefix=table_name) if mode == 'rewrite' and '' in storage.buckets: storage.delete('') schema = self.normalise_schema_for_engine(self.engine.dialect.name, spec['schema']) if '' not in storage.buckets: logging.info('Creating DB table %s', table_name) try: storage.create('', schema) except ValidationError as e: logging.error('Error validating schema %r', spec['schema']) for err in e.errors: logging.error('Error validating schema: %s', err) raise else: storage.describe('', schema) update_keys = None if mode == 'update': update_keys = converted_resource.get('update_keys') if update_keys is None: update_keys = spec['schema'].get('primaryKey', []) logging.info('Writing to DB %s -> %s (mode=%s, keys=%s)', resource_name, table_name, mode, update_keys) return map( self.get_output_row, storage.write('', self.normalise_for_engine( self.engine.dialect.name, resource, spec), keyed=True, as_generator=True, update_keys=update_keys))
def test_storage_limited_databases(dialect, database_url): # Create storage engine = create_engine(database_url) storage = Storage(engine=engine, prefix='test_storage_') # Delete buckets storage.delete() # Create buckets storage.create( ['articles', 'comments'], [remove_fk(ARTICLES['schema']), remove_fk(COMMENTS['schema'])], indexes_fields=[[['rating'], ['name']], []]) storage.create('comments', remove_fk(COMMENTS['schema']), force=True) storage.create('temporal', TEMPORAL['schema']) storage.create('location', LOCATION['schema']) storage.create('compound', COMPOUND['schema']) # Write data storage.write('articles', ARTICLES['data']) storage.write('comments', COMMENTS['data']) storage.write('temporal', TEMPORAL['data']) storage.write('location', LOCATION['data']) storage.write('compound', COMPOUND['data']) # Create new storage to use reflection only storage = Storage(engine=engine, prefix='test_storage_') # Create existent bucket with pytest.raises(tableschema.exceptions.StorageError): storage.create('articles', ARTICLES['schema']) # Assert buckets assert storage.buckets == [ 'articles', 'comments', 'compound', 'location', 'temporal' ] # Assert schemas assert storage.describe('articles') == { 'fields': [ { 'name': 'id', 'type': 'integer', 'constraints': { 'required': True } }, { 'name': 'parent', 'type': 'integer' }, { 'name': 'name', 'type': 'string' }, { 'name': 'current', 'type': 'boolean' if dialect == 'sqlite' else 'integer' }, { 'name': 'rating', 'type': 'number' }, ], 'primaryKey': 'id', # foreignKeys not supported } assert storage.describe('comments') == { 'fields': [ { 'name': 'entry_id', 'type': 'integer', 'constraints': { 'required': True } }, { 'name': 'comment', 'type': 'string' }, { 'name': 'note', 'type': 'string' }, # type downgrade ], 'primaryKey': 'entry_id', # foreignKeys not supported } assert storage.describe('temporal') == { 'fields': [ { 'name': 'date', 'type': 'date' }, { 'name': 'date_year', 'type': 'date' }, # format removal { 'name': 'datetime', 'type': 'datetime' }, { 'name': 'duration', 'type': 'string' }, # type fallback { 'name': 'time', 'type': 'time' }, { 'name': 'year', 'type': 'integer' }, # type downgrade { 'name': 'yearmonth', 'type': 'string' }, # type fallback ], } assert storage.describe('location') == { 'fields': [ { 'name': 'location', 'type': 'string' }, # type fallback { 'name': 'geopoint', 'type': 'string' }, # type fallback ], } assert storage.describe('compound') == { 'fields': [ { 'name': 'stats', 'type': 'string' }, # type fallback { 'name': 'persons', 'type': 'string' }, # type fallback ], } # Assert data assert storage.read('articles') == cast(ARTICLES)['data'] assert storage.read('comments') == cast(COMMENTS)['data'] assert storage.read('temporal') == cast(TEMPORAL, skip=['duration', 'yearmonth'])['data'] assert storage.read('location') == cast(LOCATION, skip=['geojson', 'geopoint'])['data'] assert storage.read('compound') == cast(COMPOUND, skip=['array', 'object'])['data'] # Assert data with forced schema storage.describe('compound', COMPOUND['schema']) assert storage.read('compound') == cast(COMPOUND)['data'] # Delete non existent bucket with pytest.raises(tableschema.exceptions.StorageError): storage.delete('non_existent') # Delete buckets storage.delete()
articles_data = topen('data/articles.csv', with_headers=True).read() comments_data = topen('data/comments.csv', with_headers=True).read() # Engine engine = create_engine(os.environ['POSTGRES_URL']) # Storage storage = Storage(engine=engine, prefix='prefix_') # Delete tables for table in reversed(storage.tables): storage.delete(table) # Create tables storage.create(['articles', 'comments'], [articles_schema, comments_schema]) # Write data to tables storage.write('articles', articles_data) storage.write('comments', comments_data) # List tables print(storage.tables) # Describe tables print(storage.describe('articles')) print(storage.describe('comments')) # Read data from tables print(list(storage.read('articles'))) print(list(storage.read('comments')))
"name": "id", "type": "integer", "constraints": { "required": true } }, { "name": "name", "type": "string" }, { "name": "ssn", "type": "string", "protected": true } ] } """ storage.create(['records'], [json.loads(records_schema)], encrypted_definitions=encryptedDefintion) records_data = [ [ 1, "John", "123456789"] ] storage.write('records', records_data) print(storage.describe('records')) print(list(storage.read('records')))
def test_storage(): # Create storage engine = create_engine(os.environ['POSTGRES_URL']) storage = Storage(engine=engine, prefix='test_storage_') # Delete buckets storage.delete() # Create buckets storage.create(['articles', 'comments'], [ARTICLES['schema'], COMMENTS['schema']], indexes_fields=[[['rating'], ['name']], []]) storage.create('comments', COMMENTS['schema'], force=True) storage.create('temporal', TEMPORAL['schema']) storage.create('location', LOCATION['schema']) storage.create('compound', COMPOUND['schema']) # Write data storage.write('articles', ARTICLES['data']) storage.write('comments', COMMENTS['data']) storage.write('temporal', TEMPORAL['data']) storage.write('location', LOCATION['data']) storage.write('compound', COMPOUND['data']) # Create new storage to use reflection only storage = Storage(engine=engine, prefix='test_storage_') # Create existent bucket with pytest.raises(tableschema.exceptions.StorageError): storage.create('articles', ARTICLES['schema']) # Assert buckets assert storage.buckets == [ 'articles', 'compound', 'location', 'temporal', 'comments' ] # Assert schemas assert storage.describe('articles') == ARTICLES['schema'] assert storage.describe('comments') == { 'fields': [ { 'name': 'entry_id', 'type': 'integer', 'constraints': { 'required': True } }, { 'name': 'comment', 'type': 'string' }, { 'name': 'note', 'type': 'string' }, # type downgrade ], 'primaryKey': 'entry_id', 'foreignKeys': [ { 'fields': 'entry_id', 'reference': { 'resource': 'articles', 'fields': 'id' } }, ], } assert storage.describe('temporal') == { 'fields': [ { 'name': 'date', 'type': 'date' }, { 'name': 'date_year', 'type': 'date' }, # format removal { 'name': 'datetime', 'type': 'datetime' }, { 'name': 'duration', 'type': 'string' }, # type fallback { 'name': 'time', 'type': 'time' }, { 'name': 'year', 'type': 'integer' }, # type downgrade { 'name': 'yearmonth', 'type': 'string' }, # type fallback ], } assert storage.describe('location') == { 'fields': [ { 'name': 'location', 'type': 'object' }, # type downgrade { 'name': 'geopoint', 'type': 'string' }, # type fallback ], } assert storage.describe('compound') == { 'fields': [ { 'name': 'stats', 'type': 'object' }, { 'name': 'persons', 'type': 'object' }, # type downgrade ], } # Assert data assert storage.read('articles') == cast(ARTICLES)['data'] assert storage.read('comments') == cast(COMMENTS)['data'] assert storage.read('temporal') == cast(TEMPORAL, skip=['duration', 'yearmonth'])['data'] assert storage.read('location') == cast(LOCATION, skip=['geopoint'])['data'] assert storage.read('compound') == cast(COMPOUND)['data'] # Assert data with forced schema storage.describe('compound', COMPOUND['schema']) assert storage.read('compound') == cast(COMPOUND)['data'] # Delete non existent bucket with pytest.raises(tableschema.exceptions.StorageError): storage.delete('non_existent') # Delete buckets storage.delete()