def test_only_parameter(): # Check the 'only' parameter # Get resources simple_descriptor = json.load(io.open('data/simple.json', encoding='utf-8')) # Engine engine = create_engine(os.environ['DATABASE_URL'], echo=True) # Storage storage = Storage(engine=engine, prefix='test_only_') # Delete buckets storage.delete() # Create buckets storage.create( 'names', simple_descriptor, indexes_fields=[['person_id']]) def only(table): ret = 'name' not in table return ret engine = create_engine(os.environ['DATABASE_URL'], echo=True) storage = Storage(engine=engine, prefix='test_only_', reflect_only=only) # Delete non existent bucket with pytest.raises(RuntimeError): storage.delete('names')
def test_bad_type(): # Engine engine = create_engine(os.environ['DATABASE_URL']) # Storage storage = Storage(engine=engine, prefix='test_bad_type_') with pytest.raises(TypeError): storage.create('bad_type', { 'fields': [ { 'name': 'bad_field', 'type': 'any' } ] })
def handle_resource(self, resource, spec, parameters, datapackage): resource_name = spec['name'] if resource_name not in self.converted_resources: return resource else: converted_resource = self.converted_resources[resource_name] mode = converted_resource.get('mode', 'rewrite') table_name = converted_resource['table-name'] storage = Storage(self.engine, prefix=table_name) if mode == 'rewrite' and '' in storage.buckets: storage.delete('') if '' not in storage.buckets: logging.info('Creating DB table %s', table_name) storage.create('', spec['schema']) update_keys = None if mode == 'update': update_keys = converted_resource.get('update_keys') if update_keys is None: update_keys = spec['schema'].get('primaryKey', []) logging.info('Writing to DB %s -> %s (mode=%s, keys=%s)', resource_name, table_name, mode, update_keys) return storage.write('', resource, keyed=True, as_generator=True, update_keys=update_keys)
def create_storage_adaptor(connection_string, db_schema, geometry_support, from_srid=None, to_srid=None): engine = create_engine(connection_string) storage = Storage(engine, dbschema=db_schema, geometry_support=geometry_support, from_srid=from_srid, to_srid=to_srid, views=True) return engine, storage
def test_storage_bigdata(): # Generate schema/data descriptor = {'fields': [{'name': 'id', 'type': 'integer'}]} rows = [{'id': value} for value in range(0, 2500)] # Push rows engine = create_engine(os.environ['DATABASE_URL']) storage = Storage(engine=engine, prefix='test_storage_bigdata_') storage.create('bucket', descriptor, force=True) storage.write('bucket', rows, keyed=True) # Pull rows assert list(storage.read('bucket')) == list(map(lambda x: [x['id']], rows))
def test_storage_bigdata_rollback(): # Generate schema/data descriptor = {'fields': [{'name': 'id', 'type': 'integer'}]} rows = [(value,) for value in range(0, 2500)] + [('bad-value',)] # Push rows engine = create_engine(os.environ['DATABASE_URL']) storage = Storage(engine=engine, prefix='test_storage_bigdata_rollback_') storage.create('bucket', descriptor, force=True) try: storage.write('bucket', rows) except Exception: pass # Pull rows assert list(storage.read('bucket')) == []
def test_storage_bigdata(): # Generate schema/data descriptor = {'fields': [{'name': 'id', 'type': 'integer'}]} rows = [[value,] for value in range(0, 2500)] # Push rows engine = create_engine(os.environ['DATABASE_URL']) storage = Storage(engine=engine, prefix='test_storage_bigdata_') storage.create('bucket', descriptor, force=True) storage.write('bucket', rows) # Pull rows assert list(storage.read('bucket')) == rows
def test_storage(): # Get resources articles_descriptor = json.load(io.open('data/articles.json', encoding='utf-8')) comments_descriptor = json.load(io.open('data/comments.json', encoding='utf-8')) articles_rows = Stream('data/articles.csv', headers=1).open().read() comments_rows = Stream('data/comments.csv', headers=1).open().read() # Engine engine = create_engine(os.environ['DATABASE_URL']) # Storage storage = Storage(engine=engine, prefix='test_storage_') # Delete buckets storage.delete() # Create buckets storage.create( ['articles', 'comments'], [articles_descriptor, comments_descriptor], indexes_fields=[[['rating'], ['name'], ['created_datetime']], []]) # Recreate bucket storage.create('comments', comments_descriptor, force=True) # Write data to buckets storage.write('articles', articles_rows) gen = storage.write('comments', comments_rows, as_generator=True) lst = list(gen) assert len(lst) == 1 # Create new storage to use reflection only storage = Storage(engine=engine, prefix='test_storage_') # Create existent bucket with pytest.raises(RuntimeError): storage.create('articles', articles_descriptor) # Assert representation assert repr(storage).startswith('Storage') # Assert buckets assert storage.buckets == ['articles', 'comments'] # Assert descriptors assert storage.describe('articles') == sync_descriptor(articles_descriptor) assert storage.describe('comments') == sync_descriptor(comments_descriptor) # Assert rows assert list(storage.read('articles')) == sync_rows(articles_descriptor, articles_rows) assert list(storage.read('comments')) == sync_rows(comments_descriptor, comments_rows) # Delete non existent bucket with pytest.raises(RuntimeError): storage.delete('non_existent') # Delete buckets storage.delete()
from sqlalchemy import create_engine #from dotenv import load_dotenv; load_dotenv('.env') from jsontableschema_sql import Storage # Get resources articles_schema = json.load(io.open('data/articles.json', encoding='utf-8')) comments_schema = json.load(io.open('data/comments.json', encoding='utf-8')) articles_data = topen('data/articles.csv', with_headers=True).read() comments_data = topen('data/comments.csv', with_headers=True).read() # Engine engine = create_engine(os.environ['DATABASE_URL']) # Storage storage = Storage(engine=engine, prefix='prefix_') # Delete tables for table in reversed(storage.buckets): storage.delete(table) # Create tables storage.create(['articles', 'comments'], [articles_schema, comments_schema]) print(articles_data) # Write data to tables storage.write('articles', articles_data) storage.write('comments', comments_data) # List tables
def load_fdp_to_db(package, engine = None, callback=None): """ Load an FDP to the database, create a babbage model and save it as well :param package: URL for the datapackage.json """ # Load and validate the datapackage if engine is None: engine = get_engine() if callback is None: callback = noop callback(status=STATUS_LOADING_DATAPACKAGE) dpo = DataPackage(package, schema='fiscal') callback(status=STATUS_VALIDATING_DATAPACKAGE) dpo.validate() callback(status=STATUS_LOADING_RESOURCE) resource = dpo.resources[0] schema = resource.metadata['schema'] # Use the cube manager to get the table name registry = ModelRegistry() datapackage_name = dpo.metadata['name'] datapackage_owner = dpo.metadata['owner'] datapackage_author = dpo.metadata['author'] # Get the full name from the author field, and rewrite it without the email fullname, email_addr = email.utils.parseaddr(datapackage_author) email_addr = email_addr.split('@')[0] + '@not.shown' dpo.metadata['author'] = '{0} <{1}>'.format(fullname, email_addr) model_name = "{0}:{1}".format(datapackage_owner, datapackage_name) table_name = registry.table_name_for_package(datapackage_owner, datapackage_name) all_fields = set() field_translation = {} field_order = [] # Process schema - slugify field names for field in schema['fields']: name = database_name(field['name'], all_fields) all_fields.add(name) translated_field = { 'name': name, 'type': field['type'] } field_translation[field['name']] = translated_field field_order.append(field['name']) storage_schema = { 'fields': [ { 'type': f['type'], 'name': field_translation[f['name']]['name'], 'format': f.get('format', 'default') } for f in schema['fields'] ], # Babbage likes just one primary key 'primaryKey': '_id' } # Add Primary key to schema storage_schema['fields'].insert(0, { 'name': '_id', 'type': 'integer' }) # Load 1st resource data into DB storage = Storage(engine) if storage.check(table_name): callback(status=STATUS_DELETING_TABLE) storage.delete(table_name) callback(status=STATUS_CREATING_TABLE) storage.create(table_name, storage_schema) callback(status=STATUS_LOADING_DATA_READY) storage.write(table_name, _translator_iterator(resource.iter(), field_order, callback)) # Create Babbage Model callback(status=STATUS_CREATING_BABBAGE_MODEL) model = fdp_to_model(dpo, table_name, resource, field_translation) callback(status=STATUS_SAVING_METADATA) registry.save_model(model_name, package, dpo.metadata, model, datapackage_name, fullname) return model_name, dpo.metadata, model
def test_storage(): # Get resources articles_descriptor = json.load(io.open('data/articles.json', encoding='utf-8')) comments_descriptor = json.load(io.open('data/comments.json', encoding='utf-8')) articles_rows = Stream('data/articles.csv', headers=1).open().read() comments_rows = Stream('data/comments.csv', headers=1).open().read() # Engine engine = create_engine(os.environ['DATABASE_URL']) # Storage storage = Storage(engine=engine, prefix='test_storage_') # Delete buckets storage.delete() # Create buckets storage.create( ['articles', 'comments'], [articles_descriptor, comments_descriptor], indexes_fields=[[['rating'], ['name'], ['created_datetime']], []]) # Recreate bucket storage.create('comments', comments_descriptor, force=True) # Write data to buckets storage.write('articles', articles_rows) storage.write('comments', comments_rows) # Create new storage to use reflection only storage = Storage(engine=engine, prefix='test_storage_') # Create existent bucket with pytest.raises(RuntimeError): storage.create('articles', articles_descriptor) # Assert representation assert repr(storage).startswith('Storage') # Assert buckets assert storage.buckets == ['articles', 'comments'] # Assert descriptors assert storage.describe('articles') == sync_descriptor(articles_descriptor) assert storage.describe('comments') == sync_descriptor(comments_descriptor) # Assert rows assert list(storage.read('articles')) == sync_rows(articles_descriptor, articles_rows) assert list(storage.read('comments')) == sync_rows(comments_descriptor, comments_rows) # Delete non existent bucket with pytest.raises(RuntimeError): storage.delete('non_existent') # Delete buckets storage.delete()
def test_update(): # Get resources descriptor = json.load(io.open('data/original.json', encoding='utf-8')) original_rows = Stream('data/original.csv', headers=1).open().read() update_rows = Stream('data/update.csv', headers=1).open().read() update_keys = ['person_id', 'name'] # Engine engine = create_engine(os.environ['DATABASE_URL']) # Storage storage = Storage(engine=engine, prefix='test_update_', autoincrement='__id') # Delete buckets storage.delete() # Create buckets storage.create('colors', descriptor) # Write data to buckets storage.write('colors', original_rows, update_keys=update_keys) gen = storage.write('colors', update_rows, update_keys=update_keys, as_generator=True) gen = list(gen) assert len(gen) == 5 assert len(list(filter(lambda i: i.updated, gen))) == 3 assert list(map(lambda i: i.updated_id, gen)) == [5, 3, 6, 4, 5] storage = Storage(engine=engine, prefix='test_update_', autoincrement='__id') gen = storage.write('colors', update_rows, update_keys=update_keys, as_generator=True) gen = list(gen) assert len(gen) == 5 assert len(list(filter(lambda i: i.updated, gen))) == 5 assert list(map(lambda i: i.updated_id, gen)) == [5, 3, 6, 4, 5] # Create new storage to use reflection only storage = Storage(engine=engine, prefix='test_update_') rows = list(storage.iter('colors')) assert len(rows) == 6 color_by_person = dict( (row[1], row[3]) for row in rows ) assert color_by_person == { 1: 'blue', 2: 'green', 3: 'magenta', 4: 'sunshine', 5: 'peach', 6: 'grey' } # Storage without autoincrement storage = Storage(engine=engine, prefix='test_update_') storage.delete() storage.create('colors', descriptor) storage.write('colors', original_rows, update_keys=update_keys) gen = storage.write('colors', update_rows, update_keys=update_keys, as_generator=True) gen = list(gen) assert len(gen) == 5 assert len(list(filter(lambda i: i.updated, gen))) == 3 assert list(map(lambda i: i.updated_id, gen)) == [None, None, None, None, None]
def test_storage(): # Get resources articles_schema = json.load(io.open('data/articles.json', encoding='utf-8')) comments_schema = json.load(io.open('data/comments.json', encoding='utf-8')) articles_data = topen('data/articles.csv', with_headers=True).read() comments_data = topen('data/comments.csv', with_headers=True).read() # Engine engine = create_engine(os.environ['DATABASE_URL']) # Storage storage = Storage(engine=engine, prefix='prefix_') # Delete tables for table in reversed(storage.tables): storage.delete(table) # Create tables storage.create(['articles', 'comments'], [articles_schema, comments_schema]) # Write data to tables storage.write('articles', articles_data) storage.write('comments', comments_data) # Create new storage to use reflection only storage = Storage(engine=engine, prefix='prefix_') # Create existent table with pytest.raises(RuntimeError): storage.create('articles', articles_schema) # Get table representation assert repr(storage).startswith('Storage') # Get tables list assert storage.tables == ['articles', 'comments'] # Get table schemas assert storage.describe('articles') == convert_schema(articles_schema) assert storage.describe('comments') == convert_schema(comments_schema) # Get table data assert list(storage.read('articles')) == convert_data(articles_schema, articles_data) assert list(storage.read('comments')) == convert_data(comments_schema, comments_data) # Delete tables for table in reversed(storage.tables): storage.delete(table) # Delete non existent table with pytest.raises(RuntimeError): storage.delete('articles')
def load_fdp_to_db(package, engine=None, callback=None): """ Load an FDP to the database, create a babbage model and save it as well :param package: URL for the datapackage.json :param engine: DB engine :param callback: callback to use to send progress updates """ # Load and validate the datapackage if engine is None: engine = get_engine() if callback is None: callback = noop callback(status=STATUS_LOADING_DATAPACKAGE) dpo = DataPackage(package, schema='fiscal') callback(status=STATUS_VALIDATING_DATAPACKAGE) dpo.validate() callback(status=STATUS_LOADING_RESOURCE) resource = dpo.resources[0] schema = resource.descriptor['schema'] # Use the cube manager to get the table name registry = ModelRegistry() datapackage_name = dpo.descriptor['name'] datapackage_owner = dpo.descriptor['owner'] datapackage_author = dpo.descriptor['author'] # Get the full name from the author field, and rewrite it without the email fullname, email_addr = email.utils.parseaddr(datapackage_author) email_addr = email_addr.split('@')[0] + '@not.shown' dpo.descriptor['author'] = '{0} <{1}>'.format(fullname, email_addr) dpo.descriptor.setdefault('private', True) # Measure factors measures = dpo.descriptor.get('model',{}).get('measures',{}) factors = {} for _, measure in measures.items(): factor = measure.get('factor',1) if factor != 1: factors[measure.get('source')] = factor model_name = "{0}:{1}".format(datapackage_owner, datapackage_name) table_name = table_name_for_package(datapackage_owner, datapackage_name) all_fields = set() field_translation = {} field_order = [] # Process schema - slugify field names for field in schema['fields']: name = database_name(field['name'], all_fields) all_fields.add(name) translated_field = { 'name': name, 'type': field['type'] } field_translation[field['name']] = translated_field field_order.append(field['name']) storage_schema = { 'fields': [ { 'type': f['type'], 'name': field_translation[f['name']]['name'], 'format': f.get('format', 'default') } for f in schema['fields'] ], # Babbage likes just one primary key 'primaryKey': '_id' } # Add Primary key to schema storage_schema['fields'].insert(0, { 'name': '_id', 'type': 'integer' }) # Create Babbage Model callback(status=STATUS_CREATING_BABBAGE_MODEL) model = fdp_to_model(dpo, table_name, resource, field_translation) # Create indexes indexes = [] primary_keys = resource.descriptor['schema'].get('primaryKey',[]) for dim in model['dimensions'].values(): if dim['label'] in primary_keys: key_field = dim['attributes'][dim['key_attribute']]['label'] key_field = field_translation[key_field]['name'] indexes.append((key_field,)) label_field = dim['attributes'].get(dim.get('label_attribute'), {}).get('label') if label_field is not None: label_field = field_translation[label_field]['name'] if label_field != key_field: indexes.append((key_field, label_field)) # Load 1st resource data into DB storage = Storage(engine) if storage.check(table_name): callback(status=STATUS_DELETING_TABLE) storage.delete(table_name) callback(status=STATUS_CREATING_TABLE) storage.create(table_name, storage_schema, indexes) callback(status=STATUS_LOADING_DATA_READY) storage.write(table_name, _translator_iterator(resource.iter(), field_order, factors, callback)) callback(status=STATUS_SAVING_METADATA) registry.save_model(model_name, package, dpo.descriptor, model, datapackage_name, fullname) return model_name, dpo.descriptor, model
from dotenv import load_dotenv; load_dotenv('.env') from jsontableschema_sql import Storage # Get resources articles_schema = json.load(io.open('data/articles.json', encoding='utf-8')) comments_schema = json.load(io.open('data/comments.json', encoding='utf-8')) articles_data = topen('data/articles.csv', with_headers=True).read() comments_data = topen('data/comments.csv', with_headers=True).read() # Engine engine = create_engine(os.environ['DATABASE_URL']) # Storage storage = Storage(engine=engine, prefix='prefix_') # Delete tables for table in reversed(storage.tables): storage.delete(table) # Create tables storage.create(['articles', 'comments'], [articles_schema, comments_schema]) # Write data to tables storage.write('articles', articles_data) storage.write('comments', comments_data) # List tables print(storage.tables)