def test_only_parameter():
    # Check the 'only' parameter

    # Get resources
    simple_descriptor = json.load(io.open('data/simple.json', encoding='utf-8'))

    # Engine
    engine = create_engine(os.environ['DATABASE_URL'], echo=True)

    # Storage
    storage = Storage(engine=engine, prefix='test_only_')

    # Delete buckets
    storage.delete()

    # Create buckets
    storage.create(
            'names',
            simple_descriptor,
            indexes_fields=[['person_id']])

    def only(table):
        ret = 'name' not in table
        return ret
    engine = create_engine(os.environ['DATABASE_URL'], echo=True)
    storage = Storage(engine=engine, prefix='test_only_', reflect_only=only)
    # Delete non existent bucket
    with pytest.raises(RuntimeError):
        storage.delete('names')
예제 #2
0
def test_only_parameter():
    # Check the 'only' parameter

    # Get resources
    simple_descriptor = json.load(io.open('data/simple.json', encoding='utf-8'))

    # Engine
    engine = create_engine(os.environ['DATABASE_URL'], echo=True)

    # Storage
    storage = Storage(engine=engine, prefix='test_only_')

    # Delete buckets
    storage.delete()

    # Create buckets
    storage.create(
            'names',
            simple_descriptor,
            indexes_fields=[['person_id']])

    def only(table):
        ret = 'name' not in table
        return ret
    engine = create_engine(os.environ['DATABASE_URL'], echo=True)
    storage = Storage(engine=engine, prefix='test_only_', reflect_only=only)
    # Delete non existent bucket
    with pytest.raises(RuntimeError):
        storage.delete('names')
def test_bad_type():

    # Engine
    engine = create_engine(os.environ['DATABASE_URL'])

    # Storage
    storage = Storage(engine=engine, prefix='test_bad_type_')
    with pytest.raises(TypeError):
        storage.create('bad_type', {
            'fields': [
                {
                    'name': 'bad_field',
                    'type': 'any'
                }
            ]
        })
예제 #4
0
 def handle_resource(self, resource, spec, parameters, datapackage):
     resource_name = spec['name']
     if resource_name not in self.converted_resources:
         return resource
     else:
         converted_resource = self.converted_resources[resource_name]
         mode = converted_resource.get('mode', 'rewrite')
         table_name = converted_resource['table-name']
         storage = Storage(self.engine, prefix=table_name)
         if mode == 'rewrite' and '' in storage.buckets:
             storage.delete('')
         if '' not in storage.buckets:
             logging.info('Creating DB table %s', table_name)
             storage.create('', spec['schema'])
         update_keys = None
         if mode == 'update':
             update_keys = converted_resource.get('update_keys')
             if update_keys is None:
                 update_keys = spec['schema'].get('primaryKey', [])
         logging.info('Writing to DB %s -> %s (mode=%s, keys=%s)',
                      resource_name, table_name, mode, update_keys)
         return storage.write('',
                              resource,
                              keyed=True,
                              as_generator=True,
                              update_keys=update_keys)
예제 #5
0
def create_storage_adaptor(connection_string,
                           db_schema,
                           geometry_support,
                           from_srid=None,
                           to_srid=None):
    engine = create_engine(connection_string)
    storage = Storage(engine,
                      dbschema=db_schema,
                      geometry_support=geometry_support,
                      from_srid=from_srid,
                      to_srid=to_srid,
                      views=True)
    return engine, storage
def test_storage_bigdata():

    # Generate schema/data
    descriptor = {'fields': [{'name': 'id', 'type': 'integer'}]}
    rows = [{'id': value} for value in range(0, 2500)]

    # Push rows
    engine = create_engine(os.environ['DATABASE_URL'])
    storage = Storage(engine=engine, prefix='test_storage_bigdata_')
    storage.create('bucket', descriptor, force=True)
    storage.write('bucket', rows, keyed=True)

    # Pull rows
    assert list(storage.read('bucket')) == list(map(lambda x: [x['id']], rows))
def test_storage_bigdata_rollback():

    # Generate schema/data
    descriptor = {'fields': [{'name': 'id', 'type': 'integer'}]}
    rows = [(value,) for value in range(0, 2500)] + [('bad-value',)]

    # Push rows
    engine = create_engine(os.environ['DATABASE_URL'])
    storage = Storage(engine=engine, prefix='test_storage_bigdata_rollback_')
    storage.create('bucket', descriptor, force=True)
    try:
        storage.write('bucket', rows)
    except Exception:
        pass

    # Pull rows
    assert list(storage.read('bucket')) == []
예제 #8
0
def test_storage_bigdata():

    # Generate schema/data
    descriptor = {'fields': [{'name': 'id', 'type': 'integer'}]}
    rows = [[value,] for value in range(0, 2500)]

    # Push rows
    engine = create_engine(os.environ['DATABASE_URL'])
    storage = Storage(engine=engine, prefix='test_storage_bigdata_')
    storage.create('bucket', descriptor, force=True)
    storage.write('bucket', rows)

    # Pull rows
    assert list(storage.read('bucket')) == rows
예제 #9
0
def test_storage_bigdata_rollback():

    # Generate schema/data
    descriptor = {'fields': [{'name': 'id', 'type': 'integer'}]}
    rows = [(value,) for value in range(0, 2500)] + [('bad-value',)]

    # Push rows
    engine = create_engine(os.environ['DATABASE_URL'])
    storage = Storage(engine=engine, prefix='test_storage_bigdata_rollback_')
    storage.create('bucket', descriptor, force=True)
    try:
        storage.write('bucket', rows)
    except Exception:
        pass

    # Pull rows
    assert list(storage.read('bucket')) == []
def test_storage():

    # Get resources
    articles_descriptor = json.load(io.open('data/articles.json', encoding='utf-8'))
    comments_descriptor = json.load(io.open('data/comments.json', encoding='utf-8'))
    articles_rows = Stream('data/articles.csv', headers=1).open().read()
    comments_rows = Stream('data/comments.csv', headers=1).open().read()

    # Engine
    engine = create_engine(os.environ['DATABASE_URL'])

    # Storage
    storage = Storage(engine=engine, prefix='test_storage_')

    # Delete buckets
    storage.delete()

    # Create buckets
    storage.create(
            ['articles', 'comments'],
            [articles_descriptor, comments_descriptor],
            indexes_fields=[[['rating'], ['name'], ['created_datetime']], []])

    # Recreate bucket
    storage.create('comments', comments_descriptor, force=True)

    # Write data to buckets
    storage.write('articles', articles_rows)
    gen = storage.write('comments', comments_rows, as_generator=True)
    lst = list(gen)
    assert len(lst) == 1

    # Create new storage to use reflection only
    storage = Storage(engine=engine, prefix='test_storage_')

    # Create existent bucket
    with pytest.raises(RuntimeError):
        storage.create('articles', articles_descriptor)

    # Assert representation
    assert repr(storage).startswith('Storage')

    # Assert buckets
    assert storage.buckets == ['articles', 'comments']

    # Assert descriptors
    assert storage.describe('articles') == sync_descriptor(articles_descriptor)
    assert storage.describe('comments') == sync_descriptor(comments_descriptor)

    # Assert rows
    assert list(storage.read('articles')) == sync_rows(articles_descriptor, articles_rows)
    assert list(storage.read('comments')) == sync_rows(comments_descriptor, comments_rows)

    # Delete non existent bucket
    with pytest.raises(RuntimeError):
        storage.delete('non_existent')


    # Delete buckets
    storage.delete()
예제 #11
0
from sqlalchemy import create_engine
#from dotenv import load_dotenv; load_dotenv('.env')

from jsontableschema_sql import Storage

# Get resources
articles_schema = json.load(io.open('data/articles.json', encoding='utf-8'))
comments_schema = json.load(io.open('data/comments.json', encoding='utf-8'))
articles_data = topen('data/articles.csv', with_headers=True).read()
comments_data = topen('data/comments.csv', with_headers=True).read()

# Engine
engine = create_engine(os.environ['DATABASE_URL'])

# Storage
storage = Storage(engine=engine, prefix='prefix_')

# Delete tables
for table in reversed(storage.buckets):
    storage.delete(table)

# Create tables
storage.create(['articles', 'comments'], [articles_schema, comments_schema])

print(articles_data)

# Write data to tables
storage.write('articles', articles_data)
storage.write('comments', comments_data)

# List tables
    def load_fdp_to_db(package, engine = None, callback=None):
        """
        Load an FDP to the database, create a babbage model and save it as well
        :param package: URL for the datapackage.json
        """

        # Load and validate the datapackage
        if engine is None:
            engine = get_engine()
        if callback is None:
            callback = noop
        callback(status=STATUS_LOADING_DATAPACKAGE)
        dpo = DataPackage(package, schema='fiscal')
        callback(status=STATUS_VALIDATING_DATAPACKAGE)
        dpo.validate()
        callback(status=STATUS_LOADING_RESOURCE)
        resource = dpo.resources[0]
        schema = resource.metadata['schema']

        # Use the cube manager to get the table name
        registry = ModelRegistry()
        datapackage_name = dpo.metadata['name']
        datapackage_owner = dpo.metadata['owner']
        datapackage_author = dpo.metadata['author']

        # Get the full name from the author field, and rewrite it without the email
        fullname, email_addr = email.utils.parseaddr(datapackage_author)
        email_addr = email_addr.split('@')[0] + '@not.shown'
        dpo.metadata['author'] = '{0} <{1}>'.format(fullname, email_addr)

        model_name = "{0}:{1}".format(datapackage_owner, datapackage_name)
        table_name = registry.table_name_for_package(datapackage_owner, datapackage_name)

        all_fields = set()
        field_translation = {}
        field_order = []
        # Process schema - slugify field names
        for field in schema['fields']:
            name = database_name(field['name'], all_fields)
            all_fields.add(name)
            translated_field = {
                'name': name,
                'type': field['type']
            }
            field_translation[field['name']] = translated_field
            field_order.append(field['name'])

        storage_schema = {
            'fields': [
                {
                    'type': f['type'],
                    'name': field_translation[f['name']]['name'],
                    'format': f.get('format', 'default')
                }
                for f in schema['fields']
                ],
            # Babbage likes just one primary key
            'primaryKey': '_id'
        }

        # Add Primary key to schema
        storage_schema['fields'].insert(0, {
            'name': '_id',
            'type': 'integer'
        })

        # Load 1st resource data into DB
        storage = Storage(engine)
        if storage.check(table_name):
            callback(status=STATUS_DELETING_TABLE)
            storage.delete(table_name)
        callback(status=STATUS_CREATING_TABLE)
        storage.create(table_name, storage_schema)
        callback(status=STATUS_LOADING_DATA_READY)
        storage.write(table_name, _translator_iterator(resource.iter(), field_order, callback))

        # Create Babbage Model
        callback(status=STATUS_CREATING_BABBAGE_MODEL)
        model = fdp_to_model(dpo, table_name, resource, field_translation)
        callback(status=STATUS_SAVING_METADATA)
        registry.save_model(model_name, package, dpo.metadata,
                            model, datapackage_name, fullname)
        return model_name, dpo.metadata, model
예제 #13
0
def test_storage():

    # Get resources
    articles_descriptor = json.load(io.open('data/articles.json', encoding='utf-8'))
    comments_descriptor = json.load(io.open('data/comments.json', encoding='utf-8'))
    articles_rows = Stream('data/articles.csv', headers=1).open().read()
    comments_rows = Stream('data/comments.csv', headers=1).open().read()

    # Engine
    engine = create_engine(os.environ['DATABASE_URL'])

    # Storage
    storage = Storage(engine=engine, prefix='test_storage_')

    # Delete buckets
    storage.delete()

    # Create buckets
    storage.create(
            ['articles', 'comments'],
            [articles_descriptor, comments_descriptor],
            indexes_fields=[[['rating'], ['name'], ['created_datetime']], []])

    # Recreate bucket
    storage.create('comments', comments_descriptor, force=True)

    # Write data to buckets
    storage.write('articles', articles_rows)
    storage.write('comments', comments_rows)

    # Create new storage to use reflection only
    storage = Storage(engine=engine, prefix='test_storage_')

    # Create existent bucket
    with pytest.raises(RuntimeError):
        storage.create('articles', articles_descriptor)

    # Assert representation
    assert repr(storage).startswith('Storage')

    # Assert buckets
    assert storage.buckets == ['articles', 'comments']

    # Assert descriptors
    assert storage.describe('articles') == sync_descriptor(articles_descriptor)
    assert storage.describe('comments') == sync_descriptor(comments_descriptor)

    # Assert rows
    assert list(storage.read('articles')) == sync_rows(articles_descriptor, articles_rows)
    assert list(storage.read('comments')) == sync_rows(comments_descriptor, comments_rows)

    # Delete non existent bucket
    with pytest.raises(RuntimeError):
        storage.delete('non_existent')


    # Delete buckets
    storage.delete()
def test_update():


    # Get resources
    descriptor = json.load(io.open('data/original.json', encoding='utf-8'))
    original_rows = Stream('data/original.csv', headers=1).open().read()
    update_rows = Stream('data/update.csv', headers=1).open().read()
    update_keys = ['person_id', 'name']

    # Engine
    engine = create_engine(os.environ['DATABASE_URL'])

    # Storage
    storage = Storage(engine=engine, prefix='test_update_', autoincrement='__id')

    # Delete buckets
    storage.delete()

    # Create buckets
    storage.create('colors', descriptor)


    # Write data to buckets
    storage.write('colors', original_rows, update_keys=update_keys)

    gen = storage.write('colors', update_rows, update_keys=update_keys, as_generator=True)
    gen = list(gen)
    assert len(gen) == 5
    assert len(list(filter(lambda i: i.updated, gen))) == 3
    assert list(map(lambda i: i.updated_id, gen)) == [5, 3, 6, 4, 5]

    storage = Storage(engine=engine, prefix='test_update_', autoincrement='__id')
    gen = storage.write('colors', update_rows, update_keys=update_keys, as_generator=True)
    gen = list(gen)
    assert len(gen) == 5
    assert len(list(filter(lambda i: i.updated, gen))) == 5
    assert list(map(lambda i: i.updated_id, gen)) == [5, 3, 6, 4, 5]

    # Create new storage to use reflection only
    storage = Storage(engine=engine, prefix='test_update_')

    rows = list(storage.iter('colors'))

    assert len(rows) == 6
    color_by_person = dict(
        (row[1], row[3])
        for row in rows
    )
    assert color_by_person == {
        1: 'blue',
        2: 'green',
        3: 'magenta',
        4: 'sunshine',
        5: 'peach',
        6: 'grey'
    }

    # Storage without autoincrement
    storage = Storage(engine=engine, prefix='test_update_')
    storage.delete()
    storage.create('colors', descriptor)

    storage.write('colors', original_rows, update_keys=update_keys)
    gen = storage.write('colors', update_rows, update_keys=update_keys, as_generator=True)
    gen = list(gen)
    assert len(gen) == 5
    assert len(list(filter(lambda i: i.updated, gen))) == 3
    assert list(map(lambda i: i.updated_id, gen)) == [None, None, None, None, None]
예제 #15
0
def test_storage():

    # Get resources
    articles_schema = json.load(io.open('data/articles.json', encoding='utf-8'))
    comments_schema = json.load(io.open('data/comments.json', encoding='utf-8'))
    articles_data = topen('data/articles.csv', with_headers=True).read()
    comments_data = topen('data/comments.csv', with_headers=True).read()

    # Engine
    engine = create_engine(os.environ['DATABASE_URL'])

    # Storage
    storage = Storage(engine=engine, prefix='prefix_')

    # Delete tables
    for table in reversed(storage.tables):
        storage.delete(table)

    # Create tables
    storage.create(['articles', 'comments'], [articles_schema, comments_schema])

    # Write data to tables
    storage.write('articles', articles_data)
    storage.write('comments', comments_data)

    # Create new storage to use reflection only
    storage = Storage(engine=engine, prefix='prefix_')

    # Create existent table
    with pytest.raises(RuntimeError):
        storage.create('articles', articles_schema)

    # Get table representation
    assert repr(storage).startswith('Storage')

    # Get tables list
    assert storage.tables == ['articles', 'comments']

    # Get table schemas
    assert storage.describe('articles') == convert_schema(articles_schema)
    assert storage.describe('comments') == convert_schema(comments_schema)

    # Get table data
    assert list(storage.read('articles')) == convert_data(articles_schema, articles_data)
    assert list(storage.read('comments')) == convert_data(comments_schema, comments_data)

    # Delete tables
    for table in reversed(storage.tables):
        storage.delete(table)

    # Delete non existent table
    with pytest.raises(RuntimeError):
        storage.delete('articles')
    def load_fdp_to_db(package, engine=None, callback=None):
        """
        Load an FDP to the database, create a babbage model and save it as well
        :param package: URL for the datapackage.json
        :param engine: DB engine
        :param callback: callback to use to send progress updates
        """

        # Load and validate the datapackage
        if engine is None:
            engine = get_engine()
        if callback is None:
            callback = noop
        callback(status=STATUS_LOADING_DATAPACKAGE)
        dpo = DataPackage(package, schema='fiscal')
        callback(status=STATUS_VALIDATING_DATAPACKAGE)
        dpo.validate()
        callback(status=STATUS_LOADING_RESOURCE)
        resource = dpo.resources[0]
        schema = resource.descriptor['schema']

        # Use the cube manager to get the table name
        registry = ModelRegistry()
        datapackage_name = dpo.descriptor['name']
        datapackage_owner = dpo.descriptor['owner']
        datapackage_author = dpo.descriptor['author']

        # Get the full name from the author field, and rewrite it without the email
        fullname, email_addr = email.utils.parseaddr(datapackage_author)
        email_addr = email_addr.split('@')[0] + '@not.shown'
        dpo.descriptor['author'] = '{0} <{1}>'.format(fullname, email_addr)
        dpo.descriptor.setdefault('private', True)

        # Measure factors
        measures = dpo.descriptor.get('model',{}).get('measures',{})
        factors = {}
        for _, measure in measures.items():
            factor = measure.get('factor',1)
            if factor != 1:
                factors[measure.get('source')] = factor

        model_name = "{0}:{1}".format(datapackage_owner, datapackage_name)
        table_name = table_name_for_package(datapackage_owner, datapackage_name)

        all_fields = set()
        field_translation = {}
        field_order = []
        # Process schema - slugify field names
        for field in schema['fields']:
            name = database_name(field['name'], all_fields)
            all_fields.add(name)
            translated_field = {
                'name': name,
                'type': field['type']
            }
            field_translation[field['name']] = translated_field
            field_order.append(field['name'])

        storage_schema = {
            'fields': [
                {
                    'type': f['type'],
                    'name': field_translation[f['name']]['name'],
                    'format': f.get('format', 'default')
                }
                for f in schema['fields']
                ],
            # Babbage likes just one primary key
            'primaryKey': '_id'
        }

        # Add Primary key to schema
        storage_schema['fields'].insert(0, {
            'name': '_id',
            'type': 'integer'
        })

        # Create Babbage Model
        callback(status=STATUS_CREATING_BABBAGE_MODEL)
        model = fdp_to_model(dpo, table_name, resource, field_translation)

        # Create indexes
        indexes = []
        primary_keys = resource.descriptor['schema'].get('primaryKey',[])
        for dim in model['dimensions'].values():
            if dim['label'] in primary_keys:
                key_field = dim['attributes'][dim['key_attribute']]['label']
                key_field = field_translation[key_field]['name']
                indexes.append((key_field,))

                label_field = dim['attributes'].get(dim.get('label_attribute'), {}).get('label')
                if label_field is not None:
                    label_field = field_translation[label_field]['name']
                    if label_field != key_field:
                        indexes.append((key_field, label_field))


        # Load 1st resource data into DB
        storage = Storage(engine)
        if storage.check(table_name):
            callback(status=STATUS_DELETING_TABLE)
            storage.delete(table_name)
        callback(status=STATUS_CREATING_TABLE)
        storage.create(table_name, storage_schema, indexes)

        callback(status=STATUS_LOADING_DATA_READY)
        storage.write(table_name, _translator_iterator(resource.iter(), field_order, factors, callback))

        callback(status=STATUS_SAVING_METADATA)
        registry.save_model(model_name, package, dpo.descriptor,
                            model, datapackage_name, fullname)
        return model_name, dpo.descriptor, model
예제 #17
0
from dotenv import load_dotenv; load_dotenv('.env')

from jsontableschema_sql import Storage


# Get resources
articles_schema = json.load(io.open('data/articles.json', encoding='utf-8'))
comments_schema = json.load(io.open('data/comments.json', encoding='utf-8'))
articles_data = topen('data/articles.csv', with_headers=True).read()
comments_data = topen('data/comments.csv', with_headers=True).read()

# Engine
engine = create_engine(os.environ['DATABASE_URL'])

# Storage
storage = Storage(engine=engine, prefix='prefix_')

# Delete tables
for table in reversed(storage.tables):
    storage.delete(table)

# Create tables
storage.create(['articles', 'comments'], [articles_schema, comments_schema])

# Write data to tables
storage.write('articles', articles_data)
storage.write('comments', comments_data)

# List tables
print(storage.tables)