Example #1
0
class TestMetaData(unittest.TestCase):
    """Test case that provides setUp and tearDown
    of metadata separated from the camelot default
    metadata.  
    
    This can be used to setup and test various
    model configurations that dont interfer with 
    eachother.
    """
    def setUp(self):
        from sqlalchemy import MetaData
        from sqlalchemy.ext.declarative import declarative_base
        self.metadata = MetaData()
        self.class_registry = dict()
        self.Entity = declarative_base(cls=EntityBase,
                                       metadata=self.metadata,
                                       metaclass=EntityMeta,
                                       class_registry=self.class_registry,
                                       constructor=None,
                                       name='Entity')
        self.metadata.bind = 'sqlite://'
        self.session = Session()

    def create_all(self):
        from camelot.core.orm import process_deferred_properties
        process_deferred_properties(self.class_registry)
        self.metadata.create_all()

    def tearDown(self):
        self.metadata.drop_all()
        self.metadata.clear()
class Recommend(object):

    def __init__(self, engine):
        self.engine = engine
        self.md = MetaData(self.engine)

    def select(self):
        rank_urls = []
        for rec in self._load_top():
            rank_urls.append((rec["title"], rec["url"], rec["id"]))
        return rank_urls

    def _load_top(self, num=5):
        """Load top recommend url"""
        self.md.clear()
        my_bookmark = Table('my_bookmark', self.md, Column('url_id'))
        bookmark = Table('bookmark', self.md, Column('url_id'))
        feed = Table('feed', self.md, Column('id'), Column('title'), Column('url'))
        notification = Table('notification', self.md, Column('url_id'))

        j1 = join(bookmark, feed, bookmark.c.url_id == feed.c.id)
        j2 = j1.join(my_bookmark, bookmark.c.url_id == my_bookmark.c.url_id, isouter=True)
        j3 = j2.join(notification, notification.c.url_id == bookmark.c.url_id, isouter=True)

        s = select(columns=[feed.c.id, feed.c.url, feed.c.title]).\
                select_from(j3).where(my_bookmark.c.url_id == None).\
                where(notification.c.url_id == None).\
                group_by(bookmark.c.url_id).\
                having(count(bookmark.c.url_id)).\
                order_by(count(bookmark.c.url_id).desc()).\
                limit(num)
        #print(s) ### For debug
        return s.execute()
Example #3
0
class TestMetaData( unittest.TestCase ):
    """Test case that provides setUp and tearDown
    of metadata separated from the camelot default
    metadata.  
    
    This can be used to setup and test various
    model configurations that dont interfer with 
    eachother.
    """
    
    def setUp(self):
        from sqlalchemy import MetaData
        from sqlalchemy.ext.declarative import declarative_base
        self.metadata = MetaData()
        self.class_registry = dict()
        self.Entity = declarative_base( cls = EntityBase, 
                                        metadata = self.metadata,
                                        metaclass = EntityMeta,
                                        class_registry = self.class_registry,
                                        constructor = None,
                                        name = 'Entity' )
        self.metadata.bind = 'sqlite://'
        self.session = Session()

    def create_all(self):
        from camelot.core.orm import process_deferred_properties
        process_deferred_properties( self.class_registry )
        self.metadata.create_all()
        
    def tearDown(self):
        self.metadata.drop_all()
        self.metadata.clear()
def create_db_and_mapper():
    """Creates a mapper from desired model into SQLAlchemy"""
    from sqlalchemy.orm import (
        mapper,
        clear_mappers,
    )

    from sqlalchemy import (
        MetaData,
        Table,
        Column,
        Integer,
        String,
    )

    metadata = MetaData()
    metadata.clear()
    order_lines = Table(
        "order_lines",
        metadata,
        Column("id", Integer, primary_key=True, autoincrement=True),
        Column("sku", String),
        Column("qty", Integer),
        Column("ref", String),
    )
    metadata.create_all(get_engine())

    clear_mappers()
    mapper(models.OrderLine, order_lines)
Example #5
0
    def test_explicit_default_schema_metadata(self):
        engine = testing.db

        if testing.against("sqlite"):
            # Works for CREATE TABLE main.foo, SELECT FROM main.foo, etc.,
            # but fails on:
            #   FOREIGN KEY(col2) REFERENCES main.table1 (col1)
            schema = "main"
        else:
            schema = engine.dialect.default_schema_name

        assert bool(schema)

        metadata = MetaData(engine, schema=schema)
        table1 = Table("table1", metadata, Column("col1", sa.Integer, primary_key=True), test_needs_fk=True)
        table2 = Table(
            "table2",
            metadata,
            Column("col1", sa.Integer, primary_key=True),
            Column("col2", sa.Integer, sa.ForeignKey("table1.col1")),
            test_needs_fk=True,
        )
        try:
            metadata.create_all()
            metadata.create_all(checkfirst=True)
            assert len(metadata.tables) == 2
            metadata.clear()

            table1 = Table("table1", metadata, autoload=True)
            table2 = Table("table2", metadata, autoload=True)
            assert len(metadata.tables) == 2
        finally:
            metadata.drop_all()
Example #6
0
    def test_attached_as_schema(self):
        cx = testing.db.connect()
        try:
            cx.execute('ATTACH DATABASE ":memory:" AS  test_schema')
            dialect = cx.dialect
            assert dialect.get_table_names(cx, 'test_schema') == []
            meta = MetaData(cx)
            Table('created', meta, Column('id', Integer),
                  schema='test_schema')
            alt_master = Table('sqlite_master', meta, autoload=True,
                               schema='test_schema')
            meta.create_all(cx)
            eq_(dialect.get_table_names(cx, 'test_schema'), ['created'])
            assert len(alt_master.c) > 0
            meta.clear()
            reflected = Table('created', meta, autoload=True,
                              schema='test_schema')
            assert len(reflected.c) == 1
            cx.execute(reflected.insert(), dict(id=1))
            r = cx.execute(reflected.select()).fetchall()
            assert list(r) == [(1, )]
            cx.execute(reflected.update(), dict(id=2))
            r = cx.execute(reflected.select()).fetchall()
            assert list(r) == [(2, )]
            cx.execute(reflected.delete(reflected.c.id == 2))
            r = cx.execute(reflected.select()).fetchall()
            assert list(r) == []

            # note that sqlite_master is cleared, above

            meta.drop_all()
            assert dialect.get_table_names(cx, 'test_schema') == []
        finally:
            cx.execute('DETACH DATABASE test_schema')
Example #7
0
    def test_attached_as_schema(self):
        cx = testing.db.connect()
        try:
            cx.execute('ATTACH DATABASE ":memory:" AS  test_schema')
            dialect = cx.dialect
            assert dialect.get_table_names(cx, 'test_schema') == []
            meta = MetaData(cx)
            Table('created', meta, Column('id', Integer),
                  schema='test_schema')
            alt_master = Table('sqlite_master', meta, autoload=True,
                               schema='test_schema')
            meta.create_all(cx)
            eq_(dialect.get_table_names(cx, 'test_schema'), ['created'])
            assert len(alt_master.c) > 0
            meta.clear()
            reflected = Table('created', meta, autoload=True,
                              schema='test_schema')
            assert len(reflected.c) == 1
            cx.execute(reflected.insert(), dict(id=1))
            r = cx.execute(reflected.select()).fetchall()
            assert list(r) == [(1, )]
            cx.execute(reflected.update(), dict(id=2))
            r = cx.execute(reflected.select()).fetchall()
            assert list(r) == [(2, )]
            cx.execute(reflected.delete(reflected.c.id == 2))
            r = cx.execute(reflected.select()).fetchall()
            assert list(r) == []

            # note that sqlite_master is cleared, above

            meta.drop_all()
            assert dialect.get_table_names(cx, 'test_schema') == []
        finally:
            cx.execute('DETACH DATABASE test_schema')
Example #8
0
    def test_schema_collection_remove_all(self):
        metadata = MetaData()

        t1 = Table('t1', metadata, Column('x', Integer), schema='foo')
        t2 = Table('t2', metadata, Column('x', Integer), schema='bar')

        metadata.clear()
        eq_(metadata._schemas, set())
        eq_(len(metadata.tables), 0)
def drop_all_tables(engine):
    """
    Fix to enable SQLAlchemy to drop tables even if it didn't know about it.
    :param engine:
    :return:
    """
    meta = MetaData(engine)
    meta.reflect()
    meta.clear()
    meta.reflect()
    meta.drop_all()
Example #10
0
    def parse(self, engine):
        meta = MetaData(bind=engine)
        meta.clear()
        meta.reflect(schema=self.name)

        for table_ref in meta.sorted_tables:
            if table_ref.schema != self.name:
                # This is a table imported through a foreign key
                continue
            table_name = table_ref.name
            self.tables[table_name] = Table(table_name, self)
            self.tables[table_name].parse(table_ref, engine)
class Notification(object):
    def __init__(self, engine):
        self.engine = engine
        self.md = MetaData(self.engine)

    def add_as_notified(self, url_id):
        self.md.clear()
        md = MetaData(self.engine)
        t = Table('notification', md, autoload=True)
        i = insert(t).values(url_id=url_id,
                             notified_date=datetime.now().strftime('%Y%m%d'))
        i.execute()
Example #12
0
class Notification(object):

    def __init__(self, engine):
        self.engine = engine 
        self.md = MetaData(self.engine)

    def add_as_notified(self, url_id):
        self.md.clear()
        md = MetaData(self.engine)
        t = Table('notification', md, autoload=True)
        i = insert(t).values(url_id=url_id, 
                             notified_date=datetime.now().strftime('%Y%m%d'))
        i.execute()
def downgrade(migrate_engine):
    # Operations to reverse the above upgrade go here.
    meta = MetaData()
    meta.bind = migrate_engine

    new_quotas = quotas_table(meta)
    assert_new_quotas_have_no_active_duplicates(migrate_engine, new_quotas)

    old_quotas = old_style_quotas_table(meta, 'quotas_old')
    old_quotas.create()
    convert_backward(migrate_engine, old_quotas, new_quotas)
    new_quotas.drop()

    # clear metadata to work around this:
    # http://code.google.com/p/sqlalchemy-migrate/issues/detail?id=128
    meta.clear()
    old_quotas = quotas_table(meta, 'quotas_old')
    old_quotas.rename('quotas')
def downgrade(migrate_engine):
    # Operations to reverse the above upgrade go here.
    meta = MetaData()
    meta.bind = migrate_engine

    new_quotas = quotas_table(meta)
    assert_new_quotas_have_no_active_duplicates(migrate_engine, new_quotas)

    old_quotas = old_style_quotas_table(meta, 'quotas_old')
    old_quotas.create()
    convert_backward(migrate_engine, old_quotas, new_quotas)
    new_quotas.drop()

    # clear metadata to work around this:
    # http://code.google.com/p/sqlalchemy-migrate/issues/detail?id=128
    meta.clear()
    old_quotas = quotas_table(meta, 'quotas_old')
    old_quotas.rename('quotas')
Example #15
0
    def _setup_db(self):
        Base = declarative_base()
        metadata = MetaData()
        engine = create_engine(os.environ.get('DATABASE_URL'))
        self.connection = engine.connect()
        metadata.bind = engine
        metadata.clear()

        self.upload_history = Table(
            'recording_upload_history',
            metadata,
            Column('id', Integer, primary_key=True),
            Column('topic', String(512)),
            Column('meeting_id', String(512)),
            Column('recording_id', String(512)),
            Column('meeting_uuid', String(512)),
            # Column('meeting_link', String(512)),
            Column('start_time', String(512)),
            Column('file_name', String(512)),
            Column('file_size', Integer),
            Column('cnt_files', Integer),
            Column('recording_link', String(512)),
            Column('folder_link', String(512)),
            Column('status', String(256)),
            Column('message', Text),
            Column('run_at', String(256)),
        )

        self.upload_status = Table(
            'meeting_upload_status',
            metadata,
            Column('id', Integer, primary_key=True),
            Column('topic', String(512)),
            Column('meeting_id', String(512)),
            Column('meeting_uuid', String(512)),
            # Column('meeting_link', String(512)),
            Column('start_time', String(512)),
            Column('folder_link', String(512)),
            Column('cnt_files', Integer),
            Column('status', Boolean),
            Column('is_deleted', Boolean),
            Column('run_at', String(256)),
        )
        metadata.create_all()
def upgrade(migrate_engine):
    # Upgrade operations go here. Don't create your own engine;
    # bind migrate_engine to your metadata
    meta = MetaData()
    meta.bind = migrate_engine

    old_quotas = quotas_table(meta)
    assert_old_quotas_have_no_active_duplicates(migrate_engine, old_quotas)

    new_quotas = new_style_quotas_table(meta, 'quotas_new')
    new_quotas.create()
    convert_forward(migrate_engine, old_quotas, new_quotas)
    old_quotas.drop()

    # clear metadata to work around this:
    # http://code.google.com/p/sqlalchemy-migrate/issues/detail?id=128
    meta.clear()
    new_quotas = quotas_table(meta, 'quotas_new')
    new_quotas.rename('quotas')
def upgrade(migrate_engine):
    # Upgrade operations go here. Don't create your own engine;
    # bind migrate_engine to your metadata
    meta = MetaData()
    meta.bind = migrate_engine

    old_quotas = quotas_table(meta)
    assert_old_quotas_have_no_active_duplicates(migrate_engine, old_quotas)

    new_quotas = new_style_quotas_table(meta, 'quotas_new')
    new_quotas.create()
    convert_forward(migrate_engine, old_quotas, new_quotas)
    old_quotas.drop()

    # clear metadata to work around this:
    # http://code.google.com/p/sqlalchemy-migrate/issues/detail?id=128
    meta.clear()
    new_quotas = quotas_table(meta, 'quotas_new')
    new_quotas.rename('quotas')
Example #18
0
class User(object):
    """User class."""

    def __init__(self, engine, name):
        self.engine = engine 
        self.md = MetaData(self.engine)
        self.name = name

    @property
    def id(self):
        """Load id."""
        logging.debug('Fetch id')
        n = self._load_user_no()
        if n:
            return n
        return self._append_user()

    def _append_user(self):
        """Add new recommend user."""
        self.md.clear()
        t = Table('user', self.md, autoload=True)
        i = insert(t).values(name=self.name)
        i.execute()
        # TODO: Change logic.
        _id = self._load_user_no()
        logging.info('Add new user(id={}, name={}).'.format(_id, self.name))
        return _id

    def _load_user_no(self):
        """Load user_no."""
        self.md.clear()
        t = Table('user', self.md, autoload=True)
        c_name = column('name')
        s = select(columns=[column('id')], 
                   from_obj=t).where(c_name==self.name)
        r = s.execute().fetchone()
        _id = None
        if r:
            _id = r['id']
        logging.info('Load user id(name={}, id={}).'.format(self.name, _id))
        return _id
class User(object):
    """User class."""
    def __init__(self, engine, name):
        self.engine = engine
        self.md = MetaData(self.engine)
        self.name = name

    @property
    def id(self):
        """Load id."""
        logging.debug('Fetch id')
        n = self._load_user_no()
        if n:
            return n
        return self._append_user()

    def _append_user(self):
        """Add new recommend user."""
        self.md.clear()
        t = Table('user', self.md, autoload=True)
        i = insert(t).values(name=self.name)
        i.execute()
        # TODO: Change logic.
        _id = self._load_user_no()
        logging.info('Add new user(id={}, name={}).'.format(_id, self.name))
        return _id

    def _load_user_no(self):
        """Load user_no."""
        self.md.clear()
        t = Table('user', self.md, autoload=True)
        c_name = column('name')
        s = select(columns=[column('id')],
                   from_obj=t).where(c_name == self.name)
        r = s.execute().fetchone()
        _id = None
        if r:
            _id = r['id']
        logging.info('Load user id(name={}, id={}).'.format(self.name, _id))
        return _id
Example #20
0
class DatabaseCtx(object):
    def __init__(self, dbname):
        if dbname not in DATABASE:
            raise RuntimeError("Database name \"%s\" not known" % dbname)
        self.db_info = DATABASE[dbname]
        if truthy(self.db_info.get('sql_tracing')):
            dblog.setLevel(logging.INFO)
        self.eng  = create_engine(self.db_info['connect_str'])
        self.conn = self.eng.connect()
        self.meta = MetaData(self.conn)
        self.meta.reflect()

    def create_schema(self, tables = None, dryrun = False, force = False):
        if len(self.meta.tables) > 0:
            raise RuntimeError("Schema must be empty, create is aborted")

        schema.load_schema(self.meta)
        if not tables or tables == 'all':
            self.meta.create_all()
        else:
            to_create = [self.meta.tables[tab] for tab in tables.split(',')]
            self.meta.create_all(tables=to_create)

    def drop_schema(self, tables = None, dryrun = False, force = False):
        if len(self.meta.tables) == 0:
            raise RuntimeError("Schema is empty, nothing to drop")

        if not tables or tables == 'all':
            if not force:
                raise RuntimeError("Force must be specified if no list of tables given")
            self.meta.drop_all()
            self.meta.clear()
        else:
            to_drop = [self.meta.tables[tab] for tab in tables.split(',')]
            self.meta.drop_all(tables=to_drop)
            self.meta.clear()
            self.meta.reflect()

    def get_table(self, name):
        return self.meta.tables[name]
Example #21
0
    def test_explicit_default_schema(self):
        engine = testing.db

        if testing.against('mysql'):
            schema = testing.db.url.database
        elif testing.against('postgres'):
            schema = 'public'
        elif testing.against('sqlite'):
            # Works for CREATE TABLE main.foo, SELECT FROM main.foo, etc.,
            # but fails on:
            #   FOREIGN KEY(col2) REFERENCES main.table1 (col1)
            schema = 'main'
        else:
            schema = engine.dialect.get_default_schema_name(engine.connect())

        metadata = MetaData(engine)
        table1 = Table('table1',
                       metadata,
                       Column('col1', sa.Integer, primary_key=True),
                       test_needs_fk=True,
                       schema=schema)
        table2 = Table('table2',
                       metadata,
                       Column('col1', sa.Integer, primary_key=True),
                       Column('col2', sa.Integer,
                              sa.ForeignKey('%s.table1.col1' % schema)),
                       test_needs_fk=True,
                       schema=schema)
        try:
            metadata.create_all()
            metadata.create_all(checkfirst=True)
            assert len(metadata.tables) == 2
            metadata.clear()

            table1 = Table('table1', metadata, autoload=True, schema=schema)
            table2 = Table('table2', metadata, autoload=True, schema=schema)
            assert len(metadata.tables) == 2
        finally:
            metadata.drop_all()
Example #22
0
    def test_explicit_default_schema(self):
        engine = testing.db

        if testing.against('mysql'):
            schema = testing.db.url.database
        elif testing.against('postgres'):
            schema = 'public'
        elif testing.against('sqlite'):
            # Works for CREATE TABLE main.foo, SELECT FROM main.foo, etc.,
            # but fails on:
            #   FOREIGN KEY(col2) REFERENCES main.table1 (col1)
            schema = 'main'
        else:
            schema = engine.dialect.get_default_schema_name(engine.connect())

        metadata = MetaData(engine)
        table1 = Table('table1', metadata,
                       Column('col1', sa.Integer, primary_key=True),
                       test_needs_fk=True,
                       schema=schema)
        table2 = Table('table2', metadata,
                       Column('col1', sa.Integer, primary_key=True),
                       Column('col2', sa.Integer,
                              sa.ForeignKey('%s.table1.col1' % schema)),
                       test_needs_fk=True,
                       schema=schema)
        try:
            metadata.create_all()
            metadata.create_all(checkfirst=True)
            assert len(metadata.tables) == 2
            metadata.clear()

            table1 = Table('table1', metadata, autoload=True, schema=schema)
            table2 = Table('table2', metadata, autoload=True, schema=schema)
            assert len(metadata.tables) == 2
        finally:
            metadata.drop_all()
Example #23
0
class MysqlTopicStorage(TopicStorageInterface):
    def __init__(self, client, storage_template):
        self.engine = client
        self.storage_template = storage_template
        self.insp = inspect(client)
        self.metadata = MetaData()
        self.lock = threading.RLock()
        log.info("mysql template initialized")

    def get_topic_table_by_name(self, table_name):
        self.lock.acquire()
        try:
            table = Table(table_name,
                          self.metadata,
                          extend_existing=False,
                          autoload=True,
                          autoload_with=self.engine)
            return table
        finally:
            self.lock.release()

    def build_mysql_where_expression(self, table, where):
        for key, value in where.items():
            if key == "and" or key == "or":
                result_filters = self.get_result_filters(table, value)
                if key == "and":
                    return and_(*result_filters)
                if key == "or":
                    return or_(*result_filters)
            else:
                if isinstance(value, dict):
                    for k, v in value.items():
                        if k == "=":
                            return table.c[key.lower()] == v
                        if k == "!=":
                            return operator.ne(table.c[key.lower()], v)
                        if k == "like":
                            if v != "" or v != '' or v is not None:
                                return table.c[key.lower()].like("%" + v + "%")
                        if k == "in":
                            if isinstance(table.c[key.lower()].type, JSON):
                                stmt = ""
                                if isinstance(v, list):
                                    # value_ = ",".join(v)
                                    for item in v:
                                        if stmt == "":
                                            stmt = "JSON_CONTAINS(" + key.lower(
                                            ) + ", '[\"" + item + "\"]', '$') = 1"
                                        else:
                                            stmt = stmt + " or JSON_CONTAINS(" + key.lower(
                                            ) + ", '[\"" + item + "\"]', '$') = 1 "
                                else:
                                    value_ = v
                                    stmt = "JSON_CONTAINS(" + key.lower(
                                    ) + ", '[\"" + value_ + "\"]', '$') = 1"
                                return text(stmt)
                            else:
                                if isinstance(v, list):
                                    return table.c[key.lower()].in_(v)
                                elif isinstance(v, str):
                                    v_list = v.split(",")
                                    return table.c[key.lower()].in_(v_list)
                                else:
                                    raise TypeError(
                                        "operator in, the value \"{0}\" is not list or str"
                                        .format(v))
                        if k == "not-in":
                            if isinstance(table.c[key.lower()].type, JSON):
                                if isinstance(v, list):
                                    value_ = ",".join(v)
                                else:
                                    value_ = v
                                stmt = "JSON_CONTAINS(" + key.lower(
                                ) + ", '[\"" + value_ + "\"]', '$') = 0"
                                return text(stmt)
                            else:
                                if isinstance(v, list):
                                    return table.c[key.lower()].notin_(v)
                                elif isinstance(v, str):
                                    v_list = ",".join(v)
                                    return table.c[key.lower()].notin_(v_list)
                                else:
                                    raise TypeError(
                                        "operator not_in, the value \"{0}\" is not list or str"
                                        .format(v))
                        if k == ">":
                            return table.c[key.lower()] > v
                        if k == ">=":
                            return table.c[key.lower()] >= v
                        if k == "<":
                            return table.c[key.lower()] < v
                        if k == "<=":
                            return table.c[key.lower()] <= v
                        if k == "between":
                            if (isinstance(v, tuple)) and len(v) == 2:
                                return table.c[key.lower()].between(v[0], v[1])
                else:
                    return table.c[key.lower()] == value

    def get_result_filters(self, table, value):
        if isinstance(value, list):
            result_filters = []
            for express in value:
                result = self.build_mysql_where_expression(table, express)
                result_filters.append(result)
            return result_filters
        else:
            return []

    # @staticmethod
    def build_mysql_updates_expression(self, table, updates,
                                       stmt_type: str) -> dict:
        if stmt_type == "insert":
            new_updates = {}
            for key in table.c.keys():
                if key == "id_":
                    new_updates[key] = get_int_surrogate_key()
                elif key == "version_":
                    new_updates[key] = 0
                else:
                    if isinstance(table.c[key].type, JSON):
                        if updates.get(key) is not None:
                            new_updates[key] = updates.get(key)
                        else:
                            new_updates[key] = None
                    else:
                        if updates.get(key) is not None:
                            value_ = updates.get(key)
                            if isinstance(value_, dict):
                                for k, v in value_.items():
                                    if k == "_sum":
                                        new_updates[key.lower()] = v
                                    elif k == "_count":
                                        new_updates[key.lower()] = v
                                    elif k == "_avg":
                                        pass  # todo
                            else:
                                new_updates[key] = value_
                        else:
                            default_value = self.get_table_column_default_value(
                                table.name, key)
                            if default_value is not None:
                                value_ = default_value.strip("'").strip(" ")
                                if value_.isdigit():
                                    new_updates[key] = Decimal(value_)
                                else:
                                    new_updates[key] = value_
                            else:
                                new_updates[key] = None
            return new_updates
        elif stmt_type == "update":
            new_updates = {}
            for key in table.c.keys():
                if key == "version_":
                    new_updates[key] = updates.get(key) + 1
                else:
                    if isinstance(table.c[key].type, JSON):
                        if updates.get(key) is not None:
                            new_updates[key] = updates.get(key)
                    else:
                        if updates.get(key) is not None:
                            value_ = updates.get(key)
                            if isinstance(value_, dict):
                                for k, v in value_.items():
                                    if k == "_sum":
                                        new_updates[key.lower()] = text(
                                            f'{key.lower()} + {v}')
                                    elif k == "_count":
                                        new_updates[key.lower()] = text(
                                            f'{key.lower()} + {v}')
                                    elif k == "_avg":
                                        pass  # todo
                            else:
                                new_updates[key] = value_
            return new_updates

    @staticmethod
    def build_mysql_order(table, order_: list):
        result = []
        if order_ is None:
            return result
        else:
            for item in order_:
                if isinstance(item, tuple):
                    if item[1] == "desc":
                        new_ = desc(table.c[item[0].lower()])
                        result.append(new_)
                    if item[1] == "asc":
                        new_ = asc(table.c[item[0].lower()])
                        result.append(new_)
            return result

    def clear_metadata(self):
        self.metadata.clear()

    '''
    topic data interface
    '''

    def drop_(self, topic_name):
        return self.drop_topic_data_table(topic_name)

    def drop_topic_data_table(self, topic_name):
        table_name = 'topic_' + topic_name
        try:
            table = self.get_topic_table_by_name(table_name)
            table.drop(self.engine)
        except NoSuchTableError:
            log.warning("drop table \"{0}\" not existed".format(table_name))

    def topic_data_delete_(self, where, topic_name):
        table_name = 'topic_' + topic_name
        table = self.get_topic_table_by_name(table_name)
        if where is None:
            stmt = delete(table)
        else:
            stmt = delete(table).where(
                self.build_mysql_where_expression(table, where))
        with self.engine.connect() as conn:
            with conn.begin():
                conn.execute(stmt)

    @staticmethod
    def build_stmt(stmt_type, table_name, table):
        key = stmt_type + "-" + table_name
        result = cacheman[STMT].get(key)
        if result is not None:
            return result
        else:
            if stmt_type == "insert":
                stmt = insert(table)
                cacheman[STMT].set(key, stmt)
                return stmt
            elif stmt_type == "update":
                stmt = update(table)
                cacheman[STMT].set(key, stmt)
                return stmt
            elif stmt_type == "select":
                stmt = select(table)
                cacheman[STMT].set(key, stmt)
                return stmt

    def topic_data_insert_one(self, one, topic_name):
        table_name = f"topic_{topic_name}"
        table = self.get_topic_table_by_name(table_name)
        stmt = self.build_stmt("insert", table_name, table)
        one_dict: dict = capital_to_lower(convert_to_dict(one))
        value = self.build_mysql_updates_expression(table, one_dict, "insert")
        with self.engine.connect() as conn:
            with conn.begin():
                try:
                    result = conn.execute(stmt, value)
                except IntegrityError as e:
                    raise InsertConflictError("InsertConflict")
        return result.rowcount

    def topic_data_insert_(self, data, topic_name):
        table_name = f"topic_{topic_name}"
        table = self.get_topic_table_by_name(table_name)
        values = []
        for instance in data:
            instance_dict: dict = convert_to_dict(instance)
            instance_dict['id_'] = get_int_surrogate_key()
            value = {}
            for key in table.c.keys():
                value[key] = instance_dict.get(key)
            values.append(value)
        stmt = self.build_stmt("insert", table_name, table)
        with self.engine.connect() as conn:
            with conn.begin():
                conn.execute(stmt, values)

    def topic_data_update_one(self, id_: int, one: any, topic_name: str):
        table_name = 'topic_' + topic_name
        table = self.get_topic_table_by_name(table_name)
        stmt = self.build_stmt("update", table_name, table)

        stmt = stmt.where(eq(table.c['id_'], id_))
        one_dict = convert_to_dict(one)
        values = self.build_mysql_updates_expression(
            table, capital_to_lower(one_dict), "update")
        stmt = stmt.values(values)
        with self.engine.begin() as conn:
            conn.execute(stmt)

    def topic_data_update_one_with_version(self, id_: int, version_: int,
                                           one: any, topic_name: str):
        table_name = 'topic_' + topic_name
        table = self.get_topic_table_by_name(table_name)
        stmt = self.build_stmt("update", table_name, table)
        stmt = stmt.where(
            and_(eq(table.c['id_'], id_), eq(table.c['version_'], version_)))
        one_dict = convert_to_dict(one)
        one_dict['version_'] = version_
        values = self.build_mysql_updates_expression(
            table, capital_to_lower(one_dict), "update")
        stmt = stmt.values(values)
        with self.engine.begin() as conn:
            result = conn.execute(stmt)
        if result.rowcount == 0:
            raise OptimisticLockError("Optimistic lock error")

    def topic_data_update_(self, query_dict, instance, topic_name):
        table_name = 'topic_' + topic_name
        table = self.get_topic_table_by_name(table_name)
        stmt = self.build_stmt("update", table_name, table)
        stmt = (stmt.where(self.build_mysql_where_expression(
            table, query_dict)))
        instance_dict: dict = convert_to_dict(instance)
        values = {}
        for key, value in instance_dict.items():
            if key != 'id_':
                if key.lower() in table.c.keys():
                    values[key.lower()] = value
        stmt = stmt.values(values)
        with self.engine.begin() as conn:
            # with conn.begin():
            conn.execute(stmt)

    def topic_data_find_by_id(self, id_: int, topic_name: str) -> any:
        return self.topic_data_find_one({"id_": id_}, topic_name)

    def topic_data_find_one(self, where, topic_name) -> any:
        table_name = 'topic_' + topic_name
        table = self.get_topic_table_by_name(table_name)
        stmt = self.build_stmt("select", table_name, table)
        stmt = stmt.where(self.build_mysql_where_expression(table, where))
        with self.engine.connect() as conn:
            cursor = conn.execute(stmt).cursor
            columns = [col[0] for col in cursor.description]
            row = cursor.fetchone()
        if row is None:
            return None
        else:
            result = {}
            for index, name in enumerate(columns):
                if isinstance(table.c[name.lower()].type, JSON):
                    if row[index] is not None:
                        result[name] = json.loads(row[index])
                    else:
                        result[name] = None
                else:
                    result[name] = row[index]
            return self._convert_dict_key(result, topic_name)

    def topic_data_find_(self, where, topic_name):
        table_name = 'topic_' + topic_name
        table = self.get_topic_table_by_name(table_name)
        stmt = self.build_stmt("select", table_name, table)
        stmt = stmt.where(self.build_mysql_where_expression(table, where))
        with self.engine.connect() as conn:
            cursor = conn.execute(stmt).cursor
            columns = [col[0] for col in cursor.description]
            res = cursor.fetchall()
        if res is None:
            return None
        else:
            results = []
            for row in res:
                result = {}
                for index, name in enumerate(columns):
                    if isinstance(table.c[name.lower()].type, JSON):
                        if row[index] is not None:
                            result[name] = json.loads(row[index])
                        else:
                            result[name] = None
                    else:
                        result[name] = row[index]
                results.append(result)
            return self._convert_list_elements_key(results, topic_name)

    def topic_data_find_with_aggregate(self, where, topic_name, aggregate):
        table_name = 'topic_' + topic_name
        table = self.get_topic_table_by_name(table_name)
        for key, value in aggregate.items():
            if value == "sum":
                stmt = select(text(f'sum({key.lower()})'))
            elif value == "count":
                stmt = select(func.count())
            elif value == "avg":
                stmt = select(text(f'avg({key.lower()})'))
        stmt = stmt.select_from(table)
        stmt = stmt.where(self.build_mysql_where_expression(table, where))
        with self.engine.connect() as conn:
            cursor = conn.execute(stmt).cursor
            res = cursor.fetchone()
        if res is None:
            return None
        else:
            return res[0]

    def topic_data_list_all(self, topic_name) -> list:
        table_name = 'topic_' + topic_name
        table = self.get_topic_table_by_name(table_name)
        # stmt = select(table)
        stmt = self.build_stmt("select", table_name, table)
        with self.engine.connect() as conn:
            cursor = conn.execute(stmt).cursor
            columns = [col[0] for col in cursor.description]
            res = cursor.fetchall()
            if res is None:
                return None
            else:
                results = []
                for row in res:
                    result = {}
                    for index, name in enumerate(columns):
                        if isinstance(table.c[name.lower()].type, JSON):
                            if row[index] is not None:
                                result[name] = json.loads(row[index])
                            else:
                                result[name] = None
                        else:
                            result[name] = row[index]
                    if self.storage_template.check_topic_type(
                            topic_name) == "raw":
                        results.append(result['data_'])
                    else:
                        results.append(result)
                if self.storage_template.check_topic_type(topic_name) == "raw":
                    return results
                else:
                    return self._convert_list_elements_key(results, topic_name)

    def topic_data_page_(self, where, sort, pageable, model, name) -> DataPage:
        table_name = build_collection_name(name)
        count = self.count_topic_data_table(table_name)
        table = self.get_topic_table_by_name(table_name)
        stmt = self.build_stmt("select", table_name, table)
        stmt = stmt.where(self.build_mysql_where_expression(table, where))
        orders = self.build_mysql_order(table, sort)
        for order in orders:
            stmt = stmt.order_by(order)
        offset = pageable.pageSize * (pageable.pageNumber - 1)
        stmt = stmt.offset(offset).limit(pageable.pageSize)
        results = []
        with self.engine.connect() as conn:
            cursor = conn.execute(stmt).cursor
            columns = [col[0] for col in cursor.description]
            res = cursor.fetchall()
        if self.storage_template.check_topic_type(name) == "raw":
            for row in res:
                result = {}
                for index, name in enumerate(columns):
                    if name == "data_":
                        result.update(json.loads(row[index]))
                results.append(result)
        else:
            for row in res:
                result = {}
                for index, name in enumerate(columns):
                    if isinstance(table.c[name.lower()].type, JSON):
                        if row[index] is not None:
                            result[name] = json.loads(row[index])
                        else:
                            result[name] = None
                    else:
                        result[name] = row[index]
                if model is not None:
                    results.append(parse_obj(model, result, table))
                else:
                    results.append(result)
        return build_data_pages(pageable, results, count)

    '''
        internal method
    '''

    def get_table_column_default_value(self, table_name, column_name):
        columns = self._get_table_columns(table_name)
        for column in columns:
            if column["name"] == column_name:
                return column["default"]

    def _get_table_columns(self, table_name):
        cached_columns = cacheman[COLUMNS_BY_TABLE_NAME].get(table_name)
        if cached_columns is not None:
            return cached_columns
        columns = self.insp.get_columns(table_name)
        if columns is not None:
            cacheman[COLUMNS_BY_TABLE_NAME].set(table_name, columns)
            return columns

    def _convert_list_elements_key(self, list_info, topic_name):
        if list_info is None:
            return None
        new_list = []
        factors = self.storage_template.get_topic_factors(topic_name)
        for item in list_info:
            new_dict = {}
            for factor in factors:
                new_dict[factor['name']] = item[factor['name'].lower()]
                new_dict['id_'] = item['id_']
                if 'tenant_id_' in item:
                    new_dict['tenant_id_'] = item.get("tenant_id_", 1)
                if "insert_time_" in item:
                    new_dict['insert_time_'] = item.get(
                        "insert_time_",
                        datetime.now().replace(tzinfo=None))
                if "update_time_" in item:
                    new_dict['update_time_'] = item.get(
                        "update_time_",
                        datetime.now().replace(tzinfo=None))
                if "version_" in item:
                    new_dict['version_'] = item.get("version_", 0)
                if "aggregate_assist_" in item:
                    new_dict['aggregate_assist_'] = item.get(
                        "aggregate_assist_")
            new_list.append(new_dict)
        return new_list

    def _convert_dict_key(self, dict_info, topic_name):
        if dict_info is None:
            return None
        new_dict = {}
        # print("topic_name",topic_name)
        factors = self.storage_template.get_topic_factors(topic_name)
        for factor in factors:
            new_dict[factor['name']] = dict_info[factor['name'].lower()]
        new_dict['id_'] = dict_info['id_']
        if 'tenant_id_' in dict_info:
            new_dict['tenant_id_'] = dict_info.get("tenant_id_", 1)
        if "insert_time_" in dict_info:
            new_dict['insert_time_'] = dict_info.get(
                "insert_time_",
                datetime.now().replace(tzinfo=None))
        if "update_time_" in dict_info:
            new_dict['update_time_'] = dict_info.get(
                "update_time_",
                datetime.now().replace(tzinfo=None))
        if "version_" in dict_info:
            new_dict['version_'] = dict_info.get("version_", None)
        if "aggregate_assist_" in dict_info:
            new_dict['aggregate_assist_'] = dict_info.get("aggregate_assist_")
        return new_dict

    def count_topic_data_table(self, table_name):
        stmt = 'SELECT count(%s) AS count FROM %s' % ('id_', table_name)
        with self.engine.connect() as conn:
            cursor = conn.execute(text(stmt)).cursor
            columns = [col[0] for col in cursor.description]
            result = cursor.fetchone()
        return result[0]
Example #24
0
class CreateTablesFromCSVs(DataFrameConverter):
    """Infer a table schema from a CSV, and create a sql table from this definition"""
    def __init__(self, db_url):
        self.engine = create_engine(db_url)
        self.reflect_db_tables_to_sqlalchemy_classes()
        self.Base = None
        self.meta = MetaData(bind=self.engine)

    def create_and_fill_new_sql_table_from_df(self, table_name, data,
                                              if_exists):

        key_column = None
        print(f"Creating empty table named {table_name}")
        upload_id = SqlDataInventory.get_new_upload_id_for_table(table_name)
        upload_time = datetime.utcnow()
        data = self.append_metadata_to_data(
            data,
            upload_id=upload_id,
            key_columns=key_column,
        )
        data, schema = self.get_schema_from_df(data)
        # this creates an empty table of the correct schema using pandas to_sql
        self.create_new_table(table_name,
                              schema,
                              key_columns=key_column,
                              if_exists=if_exists)
        conn = connect_to_db_using_psycopg2()
        success = psycopg2_copy_from_stringio(conn, data, table_name)
        if not success:
            raise Exception
        return upload_id, upload_time, table_name

    def reflect_db_tables_to_sqlalchemy_classes(self):
        self.Base = automap_base()
        # reflect the tables present in the sql database as sqlalchemy models
        self.Base.prepare(self.engine, reflect=True)

    @staticmethod
    def get_data_from_csv(csv_data_file_path):
        """
        :param csv_data_file_path:
        :return: pandas dataframe
        """
        return pd.read_csv(csv_data_file_path, encoding="utf-8", comment="#")

    def create_new_table(
        self,
        table_name,
        schema,
        key_columns,
        if_exists,
    ):
        """
        Create an EMPTY table from CSV and generated schema.
        Empty table because the ORM copy function is very slow- we'll populate the table
        data using a lower-level interface to SQL
        """

        self.meta.reflect()
        table_object = self.meta.tables.get(table_name)
        if table_object is not None:
            if if_exists == REPLACE:
                self.meta.drop_all(tables=[table_object])
                print(f"Table {table_name} already exists- "
                      f"dropping and replacing as per argument")
                self.meta.clear()
            elif if_exists == FAIL:
                raise KeyError(
                    f"Table {table_name} already exists- failing as per argument"
                )
            elif if_exists == APPEND:
                f"Table {table_name} already exists- appending data as per argument"
                return
        # table doesn't exist- create it
        columns = []
        primary_keys = key_columns or [UPLOAD_ID, INDEX_COLUMN]
        for name, sqlalchemy_dtype in schema.items():
            if name in primary_keys:
                columns.append(Column(name, sqlalchemy_dtype,
                                      primary_key=True))

            else:
                columns.append(Column(name, sqlalchemy_dtype))
        _ = Table(table_name, self.meta, *columns)
        self.meta.create_all()
Example #25
0
class DB(Base):
    # Constants: connection level
    NONE = 0  # No connection; just set self.url
    CONNECT = 1  # Connect; no transaction
    TXN = 2  # Everything in a transaction

    level = TXN

    def _engineInfo(self, url=None):
        if url is None:
            url = self.url
        return url

    def _setup(self, url):
        self._connect(url)
        # make sure there are no tables lying around
        meta = MetaData(self.engine)
        meta.reflect()
        meta.drop_all()

    def _teardown(self):
        self._disconnect()

    def _connect(self, url):
        self.url = url
        # TODO: seems like 0.5.x branch does not work with engine.dispose and staticpool
        #self.engine = create_engine(url, echo=True, poolclass=StaticPool)
        self.engine = create_engine(url, echo=True)
        # silence the logger added by SA, nose adds its own!
        logging.getLogger('sqlalchemy').handlers = []
        self.meta = MetaData(bind=self.engine)
        if self.level < self.CONNECT:
            return
        #self.session = create_session(bind=self.engine)
        if self.level < self.TXN:
            return
        #self.txn = self.session.begin()

    def _disconnect(self):
        if hasattr(self, 'txn'):
            self.txn.rollback()
        if hasattr(self, 'session'):
            self.session.close()
        #if hasattr(self,'conn'):
        #    self.conn.close()
        self.engine.dispose()

    def _supported(self, url):
        db = url.split(':', 1)[0]
        func = getattr(self, self._TestCase__testMethodName)
        if hasattr(func, 'supported'):
            return db in func.supported
        if hasattr(func, 'not_supported'):
            return not (db in func.not_supported)
        # Neither list assigned; assume all are supported
        return True

    def _not_supported(self, url):
        return not self._supported(url)

    def _select_row(self):
        """Select rows, used in multiple tests"""
        return self.table.select().execution_options(
            autocommit=True).execute().fetchone()

    def refresh_table(self, name=None):
        """Reload the table from the database
        Assumes we're working with only a single table, self.table, and
        metadata self.meta

        Working w/ multiple tables is not possible, as tables can only be
        reloaded with meta.clear()
        """
        if name is None:
            name = self.table.name
        self.meta.clear()
        self.table = Table(name, self.meta, autoload=True)

    def compare_columns_equal(self, columns1, columns2, ignore=None):
        """Loop through all columns and compare them"""
        def key(column):
            return column.name

        for c1, c2 in zip(sorted(columns1, key=key), sorted(columns2,
                                                            key=key)):
            diffs = ColumnDelta(c1, c2).diffs
            if ignore:
                for key in ignore:
                    diffs.pop(key, None)
            if diffs:
                self.fail("Comparing %s to %s failed: %s" %
                          (columns1, columns2, diffs))
Example #26
0
class Storage(object):
    """SQL Tabular Storage.

    It's an implementation of `jsontablescema.Storage`.

    Args:
        engine (object): SQLAlchemy engine
        dbschema (str): database schema name
        prefix (str): prefix for all buckets
        reflect_only (callable): a boolean predicate to filter
            the list of table names when reflecting
        geometry_support (str): Whether to use a geometry column for geojson type.
            Can be `postgis` or `sde`.
    """

    # Public

    def __init__(self,
                 engine,
                 dbschema=None,
                 prefix='',
                 reflect_only=None,
                 autoincrement=None,
                 geometry_support=None,
                 from_srid=None,
                 to_srid=None,
                 views=False):

        # Set attributes
        self.__connection = engine.connect()
        self.__dbschema = dbschema
        self.__prefix = prefix
        self.__descriptors = {}
        self.__autoincrement = autoincrement
        self.__geometry_support = geometry_support
        self.__views = views
        if reflect_only is not None:
            self.__only = reflect_only
        else:
            self.__only = lambda _: True

        # Load geometry support
        if self.__geometry_support == 'postgis':
            mappers.load_postgis_support()
        elif self.__geometry_support in ['sde', 'sde-char']:
            mappers.load_sde_support(self.__geometry_support, from_srid,
                                     to_srid)

        # Create metadata
        self.__metadata = MetaData(bind=self.__connection,
                                   schema=self.__dbschema)
        self.__reflect()

    def __repr__(self):

        # Template and format
        template = 'Storage <{engine}/{dbschema}>'
        text = template.format(engine=self.__connection.engine,
                               dbschema=self.__dbschema)

        return text

    @property
    def buckets(self):

        # Collect
        buckets = []
        for table in self.__metadata.sorted_tables:
            bucket = mappers.tablename_to_bucket(self.__prefix, table.name)
            if bucket is not None:
                buckets.append(bucket)

        return buckets

    def create(self, bucket, descriptor, force=False, indexes_fields=None):
        """Create table by schema.

        Parameters
        ----------
        table: str/list
            Table name or list of table names.
        schema: dict/list
            JSONTableSchema schema or list of schemas.
        indexes_fields: list
            list of tuples containing field names, or list of such lists

        Raises
        ------
        RuntimeError
            If table already exists.

        """

        # Make lists
        buckets = bucket
        if isinstance(bucket, six.string_types):
            buckets = [bucket]
        descriptors = descriptor
        if isinstance(descriptor, dict):
            descriptors = [descriptor]
        if indexes_fields is None or len(indexes_fields) == 0:
            indexes_fields = [()] * len(descriptors)
        elif type(indexes_fields[0][0]) not in {list, tuple}:
            indexes_fields = [indexes_fields]
        assert len(indexes_fields) == len(descriptors)
        assert len(buckets) == len(descriptors)

        # Check buckets for existence
        for bucket in reversed(self.buckets):
            if bucket in buckets:
                if not force:
                    message = 'Bucket "%s" already exists.' % bucket
                    raise RuntimeError(message)
                self.delete(bucket)

        # Define buckets
        for bucket, descriptor, index_fields in zip(buckets, descriptors,
                                                    indexes_fields):

            # Add to schemas
            self.__descriptors[bucket] = descriptor

            # Create table
            jsontableschema.validate(descriptor)
            tablename = mappers.bucket_to_tablename(self.__prefix, bucket)
            columns, constraints, indexes = mappers.descriptor_to_columns_and_constraints(
                self.__prefix, bucket, descriptor, index_fields,
                self.__autoincrement)
            Table(tablename, self.__metadata,
                  *(columns + constraints + indexes))

        # Create tables, update metadata
        self.__metadata.create_all()

    def delete(self, bucket=None, ignore=False):

        # Make lists
        buckets = bucket
        if isinstance(bucket, six.string_types):
            buckets = [bucket]
        elif bucket is None:
            buckets = reversed(self.buckets)

        # Iterate over buckets
        tables = []
        for bucket in buckets:

            # Check existent
            if bucket not in self.buckets:
                if not ignore:
                    message = 'Bucket "%s" doesn\'t exist.' % bucket
                    raise RuntimeError(message)

            # Remove from buckets
            if bucket in self.__descriptors:
                del self.__descriptors[bucket]

            # Add table to tables
            table = self.__get_table(bucket)
            tables.append(table)

        # Drop tables, update metadata
        self.__metadata.drop_all(tables=tables)
        self.__metadata.clear()
        self.__reflect()

    def describe(self, bucket, descriptor=None):

        # Set descriptor
        if descriptor is not None:
            self.__descriptors[bucket] = descriptor

        # Get descriptor
        else:
            descriptor = self.__descriptors.get(bucket)
            if descriptor is None:
                table = self.__get_table(bucket)
                descriptor = mappers.columns_and_constraints_to_descriptor(
                    self.__prefix, table.name, table.columns,
                    table.constraints, self.__autoincrement)

        return descriptor

    def iter(self, bucket):

        # Get result
        table = self.__get_table(bucket)

        # Make sure we close the transaction after iterating,
        #   otherwise it is left hanging
        with self.__connection.begin():
            # Streaming could be not working for some backends:
            # http://docs.sqlalchemy.org/en/latest/core/connections.html
            select = table.select().execution_options(stream_results=True)
            result = select.execute()

            # Yield data
            for row in result:
                yield list(row)

    def read(self, bucket):

        # Get rows
        rows = list(self.iter(bucket))

        return rows

    def write(self,
              bucket,
              rows,
              keyed=False,
              as_generator=False,
              update_keys=None):

        if update_keys is not None and len(update_keys) == 0:
            raise ValueError('update_keys cannot be an empty list')

        table = self.__get_table(bucket)
        descriptor = self.describe(bucket)

        writer = StorageWriter(table, descriptor, update_keys,
                               self.__autoincrement)

        with self.__connection.begin():
            gen = writer.write(rows, keyed)
            if as_generator:
                return gen
            else:
                collections.deque(gen, maxlen=0)

    # Private
    def __get_table(self, bucket):
        """Return SQLAlchemy table for the given bucket.
        """

        # Prepare name
        tablename = mappers.bucket_to_tablename(self.__prefix, bucket)
        if self.__dbschema:
            tablename = '.'.join((self.__dbschema, tablename))

        return self.__metadata.tables[tablename]

    def __reflect(self):
        def only(name, _):
            ret = (self.__only(name) and mappers.tablename_to_bucket(
                self.__prefix, name) is not None)
            return ret

        self.__metadata.reflect(only=only, views=self.__views)
Example #27
0
class Database(object):

    def __init__(self, *tables, **kw):

        log.info("initialising database")
        self.status = "updating"

        self.kw = kw
        self.metadata = None

        self.connection_string = kw.get("connection_string", None)
        if self.connection_string:
            self.engine = create_engine(self.connection_string)
            self.metadata = MetaData()
            self.metadata.bind = self.engine
            self._Session = sessionmaker(bind=self.engine, autoflush = False)
            self.Session = sessionwrapper.SessionClass(self._Session, self)

        self.logging_tables = kw.get("logging_tables", None)
        self.quiet = kw.get("quiet", None)

        self.application = kw.get("application", None)
        if self.application:
            self.set_application(self.application)

        self.max_table_id = 0
        self.max_event_id = 0

        self.persisted = False
        self.graph = None
        self.relations = []

        self.tables = OrderedDict()

        self.search_actions = {}
        self.search_names = {}
        self.search_ids = {}

        for table in tables:
            self.add_table(table)

    def set_application(self, application):

        self.application = application
        if not self.connection_string:
            self.metadata = application.metadata
            self.engine = application.engine
            self._Session = application.Session
            self.Session = sessionwrapper.SessionClass(self._Session, self)

        if self.logging_tables is not None:
            self.logging_tables = self.application.logging_tables
        if self.quiet is not None:
            self.quiet = self.application.quiet

        self.application_folder = self.application.application_folder
        self.zodb = self.application.zodb
        self.zodb_tables_init()
        self.application.Session = self.Session

        with SchemaLock(self) as file_lock:
            self.load_from_persist()
        self.status = "active"


    def zodb_tables_init(self):
        zodb = self.application.aquire_zodb()

        connection = zodb.open()
        root = connection.root()
        if "tables" not in root:
            root["tables"] = PersistentMapping()
            root["table_count"] = 0
            root["event_count"] = 0
            transaction.commit()
        connection.close()

        zodb.close()
        self.application.get_zodb(True)

    def __getitem__(self, item):
        if isinstance(item, int):
            table = self.get_table_by_id(item)
            if not table:
                raise IndexError("table id %s does not exist" % item)
            return table
        else:
            return self.tables[item]

    def get_table_by_id(self, id):

        for table in self.tables.itervalues():
            if table.table_id == id:
                return table

    def add_table(self, table, ignore = False, drop = False):

        log.info("adding table %s" % table.name)

        if table.name in self.tables.iterkeys():
            if ignore:
                return
            elif drop:
                self.drop_table(table.name)
            else:
                raise custom_exceptions.DuplicateTableError("already a table named %s"
                                                            % table.name)


        self._add_table_no_persist(table)

    def rename_table(self, table, new_name, session = None):

        if isinstance(table, tables.Table):
            table_to_rename = table
        else:
            table_to_rename = self.tables[table]

        with SchemaLock(self) as file_lock:
            for relations in table_to_rename.tables_with_relations.values():
                for rel in relations:
                    if rel.other == table_to_rename.name:
                        field = rel.parent
                        field.args = [new_name] + list(field.args[1:])

            table_to_rename.name = new_name
            file_lock.export(uuid = True)
            table_to_rename.sa_table.rename(new_name)
            file_lock.export()
            self.load_from_persist(True)

        if table_to_rename.logged:
            self.rename_table("_log_%s" % table_to_rename.name, "_log_%s" % new_name, session)

    def drop_table(self, table):

        with SchemaLock(self) as file_lock:
            if isinstance(table, tables.Table):
                table_to_drop = table

            else:
                table_to_drop = self.tables[table]

            if table_to_drop.dependant_tables:
                raise custom_exceptions.DependencyError((
                    "cannot delete table %s as the following tables"
                    " depend on it %s" % (table.name, table.dependant_tables)))

            for relations in table_to_drop.tables_with_relations.itervalues():
                for relation in relations:
                    field = relation.parent
                    field.table.fields.pop(field.name)
                    field.table.field_list.remove(field)

            self.tables.pop(table_to_drop.name)

            file_lock.export(uuid = True)
            table_to_drop.sa_table.drop()
            file_lock.export()
            self.load_from_persist(True)

        if table_to_drop.logged:
            self.drop_table(self.tables["_log_" + table_to_drop.name])


    def add_relation_table(self, table):
        if "_core" not in self.tables:
            raise custom_exceptions.NoTableAddError("table %s cannot be added as there is"
                                                    "no _core table in the database"
                                                    % table.name)

        assert table.primary_entities
        assert table.secondary_entities

        table.relation = True
        table.kw["relation"] = True

        self.add_table(table)

        relation = ForeignKey("_core_id", "_core", backref = table.name)
        table._add_field_no_persist(relation)

        event = Event("delete",
                      actions.DeleteRows("_core"))

        table.add_event(event)

    def add_info_table(self, table):
        if "_core" not in self.tables:
            raise custom_exceptions.NoTableAddError("table %s cannot be added as there is"
                                                    "no _core table in the database"
                                                    % table.name)

        table.info_table = True
        table.kw["info_table"] = True

        self.add_table(table)

        relation = ForeignKey("_core_id", "_core", backref = table.name)
        table._add_field_no_persist(relation)

        event = Event("delete",
                      actions.DeleteRows("_core"))

        table.add_event(event)


    def add_entity(self, table):
        if "_core" not in self.tables:
            raise custom_exceptions.NoTableAddError("table %s cannot be added as there is"
                                                    "no _core table in the database"
                                                    % table.name)

        table.entity = True
        table.kw["entity"] = True
        self.add_table(table)

        #add relation
        relation = ForeignKey("_core_id", "_core", backref = table.name)
        table._add_field_no_persist(relation)



        ##add title events

        if table.title_field:
            title_field = table.title_field
        else:
            title_field = "name"

        event = Event("new change",
                      actions.CopyTextAfter("primary_entity._core_entity.title", title_field))

        table.add_event(event)


        if table.summary_fields:

            event = Event("new change",
                          actions.CopyTextAfterField("primary_entity._core_entity.summary", table.summary_fields))

            table.add_event(event)

        event = Event("delete",
                      actions.DeleteRows("primary_entity._core_entity"))

        table.add_event(event)


    def _add_table_no_persist(self, table):

        table._set_parent(self)


    def persist(self):

        self.status = "updating"

        for table in self.tables.values():
            if not self.logging_tables:
                ## FIXME should look at better place to set this
                table.kw["logged"] = False
                table.logged = False
            if table.logged and "_log_%s" % table.name not in self.tables.iterkeys() :
                self.add_table(self.logged_table(table))

        for table in self.tables.itervalues():
            table.add_foreign_key_columns()

        self.update_sa(True)

        with SchemaLock(self) as file_lock:
            file_lock.export(uuid = True)
            self.metadata.create_all(self.engine)
            self.persisted = True
            file_lock.export()
            self.load_from_persist(True)

    def get_file_path(self, uuid_name = False):

        uuid = datetime.datetime.now().isoformat().\
                replace(":", "-").replace(".", "-")
        if uuid_name:
            file_name = "generated_schema-%s.py" % uuid
        else:
            file_name = "generated_schema.py"

        file_path = os.path.join(
            self.application.application_folder,
            "_schema",
            file_name
        )
        return file_path

    def code_repr_load(self):

        import _schema.generated_schema as sch
        sch = reload(sch)
        database = sch.database
        database.clear_sa()
        for table in database.tables.values():
            table.database = self
            self.add_table(table)
            table.persisted = True

        self.max_table_id = database.max_table_id
        self.max_event_id = database.max_event_id

        self.persisted = True


    def code_repr_export(self, file_path):

        try:
            os.remove(file_path)
            os.remove(file_path+"c")
        except OSError:
            pass

        out_file = open(file_path, "w")

        output = [
            "from database.database import Database",
            "from database.tables import Table",
            "from database.fields import *",
            "from database.database import table, entity, relation",
            "from database.events import Event",
            "from database.actions import *",
            "",
            "",
            "database = Database(",
            "",
            "",
        ]

        for table in sorted(self.tables.values(),
                            key = lambda x:x.table_id):
            output.append(table.code_repr() + ",")

        kw_display = ""
        if self.kw:
            kw_list = ["%s = %s" % (i[0], repr(i[1])) for i in self.kw.items()]
            kw_display = ", ".join(sorted(kw_list))

        output.append(kw_display)
        output.append(")")

        out_file.write("\n".join(output))
        out_file.close()

        return file_path


    def load_from_persist(self, restart = False):

        self.clear_sa()
        self.tables = OrderedDict()
        try:
            self.code_repr_load()
        except ImportError:
            return
        self.add_relations()
        self.update_sa()
        self.validate_database()


    def add_relations(self):     #not property for optimisation
        self.relations = []
        for table_name, table_value in self.tables.iteritems():
            ## make sure fk columns are remade
            table_value.foriegn_key_columns_current = None
            table_value.add_relations()
            for rel_name, rel_value in table_value.relations.iteritems():
                self.relations.append(rel_value)



    def checkrelations(self):
        for relation in self.relations:
            if relation.other not in self.tables.iterkeys():
                raise custom_exceptions.RelationError,\
                        "table %s does not exits" % relation.other


    def update_sa(self, reload = False):
        if reload == True and self.status <> "terminated":
            self.status = "updating"

        if reload:
            self.clear_sa()

        self.checkrelations()
        self.make_graph()
        try:
            for table in self.tables.itervalues():
                table.make_paths()
                table.make_sa_table()
                table.make_sa_class()
            for table in self.tables.itervalues():
                table.sa_mapper()
            sa.orm.compile_mappers()
            for table in self.tables.itervalues():
                for column in table.columns.iterkeys():
                    getattr(table.sa_class, column).impl.active_history = True
                table.columns_cache = table.columns
            for table in self.tables.itervalues():
                table.make_schema_dict()
            ## put valid_info tables into info_table
            for table in self.tables.itervalues():
                if table.relation or table.entity:
                    for valid_info_table in table.valid_info_tables:
                        info_table = self.tables[valid_info_table]
                        assert info_table.info_table
                        info_table.valid_core_types.append(table.name)
            self.collect_search_actions()


        except (custom_exceptions.NoDatabaseError,\
                custom_exceptions.RelationError):
            pass
        if reload == True and self.status <> "terminated":
            self.status = "active"

    def clear_sa(self):
        sa.orm.clear_mappers()
        if self.metadata:
            self.metadata.clear()
        for table in self.tables.itervalues():
            table.foriegn_key_columns_current = None
            table.mapper = None
            table.sa_class = None
            table.sa_table = None
            table.paths = None
            table.local_tables = None
            table.one_to_many_tables = None
            table.events = dict(new = [],
                                delete = [],
                                change = [])
            table.schema_dict = None
            table.valid_core_types = []
            table.columns_cache = None

        self.graph = None
        self.search_actions = {}
        self.search_names = {}
        self.search_ids = {}


    def tables_with_relations(self, table):
        relations = defaultdict(list)
        for n, v in table.relations.iteritems():
            relations[(v.other, "here")].append(v)
        for v in self.relations:
            if v.other == table.name:
                relations[(v.table.name, "other")].append(v)
        return relations

    def result_set(self, search):

        return resultset.ResultSet(search)

    def search(self, table_name, where = "id>0", *args, **kw):
        ##FIXME id>0 should be changed to none once search is sorted

        """
        :param table_name: specifies the base table you will be query from (required)

        :param where: either a paramatarised or normal where clause, if paramitarised
        either values or params keywords have to be added. (optional first arg, if
        missing will query without where)

        :param tables: an optional list of onetoone or manytoone tables to be extracted
        with results

        :param keep_all: will keep id, _core_entity_id, modified_by and modified_on fields

        :param fields: an optional explicit field list in the form 'field' for base table
        and 'table.field' for other tables.  Overwrites table option and keep all.

        :param limit: the row limit

        :param offset: the offset

        :param internal: if true will not convert date, boolean and decimal fields

        :param values: a list of values to replace the ? in the paramatarised queries

        :param params: a dict with the keys as the replacement to inside the curly
        brackets i.e key name will replace {name} in query.

        :param order_by: a string in the same form as a sql order by
        ie 'name desc, donkey.name, donkey.age desc'  (name in base table)
        """

        session = kw.pop("session", None)
        if session:
            external_session = True
        else:
            session = self.Session()
            external_session = False

        tables = kw.get("tables", [table_name])
        fields = kw.get("fields", None)

        join_tables = []

        if fields:
            join_tables = split_table_fields(fields, table_name).keys()
            if table_name in join_tables:
                join_tables.remove(table_name)
            tables = None
        if tables:
            join_tables.extend(tables)
            if table_name in tables:
                join_tables.remove(table_name)

        if "order_by" not in kw:
            kw["order_by"] = "id"

        if join_tables:
            kw["extra_outer"] = join_tables
            kw["distinct_many"] = False

        try:
            query = search.Search(self, table_name, session, where, *args, **kw)
            result = resultset.ResultSet(query, **kw)
            result.collect()
            return result

        except Exception, e:
            session.rollback()
            raise
        finally:
Example #28
0
class SAProvider(BaseProvider):
    """Provider Implementation class for SQLAlchemy"""
    def __init__(self, *args, **kwargs):
        """Initialize and maintain Engine"""
        # Since SQLAlchemyProvider can cater to multiple databases, it is important
        #   that we know which database we are dealing with, to run database-specific
        #   statements like `PRAGMA` for SQLite.
        if "DATABASE" not in args[2]:
            logger.error(
                f"Missing `DATABASE` information in conn_info: {args[2]}")
            raise ConfigurationError(
                "Missing `DATABASE` attribute in Connection info")

        super().__init__(*args, **kwargs)

        kwargs = self._get_database_specific_engine_args()

        self._engine = create_engine(make_url(self.conn_info["DATABASE_URI"]),
                                     **kwargs)

        if self.conn_info["DATABASE"] == Database.POSTGRESQL.value:
            # Nest database tables under a schema, so that we have complete control
            #   on creating/dropping db structures. We cannot control structures in the
            #   the default `public` schema.
            #
            # Use `SCHEMA` value if specified as part of the conn info. Otherwise, construct
            #   and use default schema name as `DB`_schema.
            schema = (self.conn_info["SCHEMA"]
                      if "SCHEMA" in self.conn_info else "public")

            self._metadata = MetaData(bind=self._engine, schema=schema)
        else:
            self._metadata = MetaData(bind=self._engine)

        # A temporary cache of already constructed model classes
        self._model_classes = {}

    def _get_database_specific_engine_args(self):
        """Supplies additional database-specific arguments to SQLAlchemy Engine.

        Return: a dictionary with database-specific SQLAlchemy Engine arguments.
        """
        if self.conn_info["DATABASE"] == Database.POSTGRESQL.value:
            return {"isolation_level": "AUTOCOMMIT"}

        return {}

    def _get_database_specific_session_args(self):
        """Set Database specific session parameters.

        Depending on the database in use, this method supplies
        additional arguments while constructing sessions.

        Return: a dictionary with additional arguments and values.
        """
        if self.conn_info["DATABASE"] == Database.POSTGRESQL.value:
            return {"autocommit": True, "autoflush": False}

        return {}

    def get_session(self):
        """Establish a session to the Database"""
        # Create the session
        kwargs = self._get_database_specific_session_args()
        session_factory = orm.sessionmaker(bind=self._engine,
                                           expire_on_commit=False,
                                           **kwargs)
        session_cls = orm.scoped_session(session_factory)

        return session_cls

    def _execute_database_specific_connection_statements(self, conn):
        """Execute connection statements depending on the database in use.

        Each database has a unique set of commands and associated format to control
        connection-related parameters. Since we use SQLAlchemy, statements should
        be run dynamically based on the database in use.

        Arguments:
        * conn: An active connection object to the database

        Return: None
        """
        if self.conn_info["DATABASE"] == Database.SQLITE.value:
            conn.execute("PRAGMA case_sensitive_like = ON;")

        return conn

    def get_connection(self, session_cls=None):
        """Create the connection to the Database instance"""
        # If this connection has to be created within an existing session,
        #   ``session_cls`` will be provided as an argument.
        #   Otherwise, fetch a new ``session_cls`` from ``get_session()``
        if session_cls is None:
            session_cls = self.get_session()

        conn = session_cls()
        conn = self._execute_database_specific_connection_statements(conn)

        return conn

    def _data_reset(self):
        conn = self._engine.connect()

        transaction = conn.begin()

        if self.conn_info["DATABASE"] == Database.SQLITE.value:
            conn.execute("PRAGMA foreign_keys = OFF;")

        for table in self._metadata.sorted_tables:
            conn.execute(table.delete())

        if self.conn_info["DATABASE"] == Database.SQLITE.value:
            conn.execute("PRAGMA foreign_keys = ON;")

        transaction.commit()

        # Discard any active Unit of Work
        if current_uow and current_uow.in_progress:
            current_uow.rollback()

    def _create_database_artifacts(self):
        for _, aggregate_record in self.domain.registry.aggregates.items():
            self.domain.repository_for(aggregate_record.cls)._dao

        self._metadata.create_all()

    def _drop_database_artifacts(self):
        self._metadata.drop_all()
        self._metadata.clear()

    def decorate_model_class(self, entity_cls, model_cls):
        schema_name = derive_schema_name(model_cls)

        # Return the model class if it was already seen/decorated
        if schema_name in self._model_classes:
            return self._model_classes[schema_name]

        # If `model_cls` is already subclassed from SqlAlchemyModel,
        #   this method call is a no-op
        if issubclass(model_cls, SqlalchemyModel):
            return model_cls
        else:
            custom_attrs = {
                key: value
                for (key, value) in vars(model_cls).items()
                if key not in ["Meta", "__module__", "__doc__", "__weakref__"]
            }

            from protean.core.model import ModelMeta

            meta_ = ModelMeta()
            meta_.entity_cls = entity_cls

            custom_attrs.update({"meta_": meta_, "metadata": self._metadata})
            # FIXME Ensure the custom model attributes are constructed properly
            decorated_model_cls = type(model_cls.__name__,
                                       (SqlalchemyModel, model_cls),
                                       custom_attrs)

            # Memoize the constructed model class
            self._model_classes[schema_name] = decorated_model_cls

            return decorated_model_cls

    def construct_model_class(self, entity_cls):
        """Return a fully-baked Model class for a given Entity class"""
        model_cls = None

        # Return the model class if it was already seen/decorated
        if entity_cls.meta_.schema_name in self._model_classes:
            model_cls = self._model_classes[entity_cls.meta_.schema_name]
        else:
            from protean.core.model import ModelMeta

            meta_ = ModelMeta()
            meta_.entity_cls = entity_cls

            attrs = {
                "meta_": meta_,
                "metadata": self._metadata,
            }
            # FIXME Ensure the custom model attributes are constructed properly
            model_cls = type(entity_cls.__name__ + "Model",
                             (SqlalchemyModel, ), attrs)

            # Memoize the constructed model class
            self._model_classes[entity_cls.meta_.schema_name] = model_cls

        # Set Entity Class as a class level attribute for the Model, to be able to reference later.
        return model_cls

    def get_dao(self, entity_cls, model_cls):
        """Return a DAO object configured with a live connection"""
        return SADAO(self.domain, self, entity_cls, model_cls)

    def raw(self, query: Any, data: Any = None):
        """Run raw query on Provider"""
        if data is None:
            data = {}
        assert isinstance(query, str)
        assert isinstance(data, (dict, None))

        return self.get_connection().execute(query, data)
from sqlalchemy import Table, Column, MetaData, Integer, String, ForeignKeyConstraint, DateTime

metadata = MetaData()
metadata.clear()

PostCategories = Table(
    'post_categories',
    metadata,
    Column('id', Integer, primary_key=True),
    Column('title', String(length=100), nullable=False, unique=True),
)

Post = Table(
    'post', metadata, Column('id', Integer, primary_key=True),
    Column('category_id', Integer, nullable=False),
    Column('title', String(length=100), nullable=False),
    Column('text', String, nullable=False),
    Column('main_img', String, nullable=False),
    Column('created_at', DateTime, nullable=False),
    Column('last_updated', DateTime, nullable=False),
    ForeignKeyConstraint(['category_id'], [PostCategories.c.id],
                         name='post_category_id_fkey',
                         ondelete='CASCADE'))
class Storage(object):
    """SQL Tabular Storage.

    It's an implementation of `jsontablescema.Storage`.

    Args:
        engine (object): SQLAlchemy engine
        dbschema (str): database schema name
        prefix (str): prefix for all buckets

    """

    # Public

    def __init__(self, engine, dbschema=None, prefix=''):

        # Set attributes
        self.__connection = engine.connect()
        self.__dbschema = dbschema
        self.__prefix = prefix
        self.__descriptors = {}

        # Create metadata
        self.__metadata = MetaData(bind=self.__connection,
                                   schema=self.__dbschema,
                                   reflect=True)

    def __repr__(self):

        # Template and format
        template = 'Storage <{engine}/{dbschema}>'
        text = template.format(engine=self.__connection.engine,
                               dbschema=self.__dbschema)

        return text

    @property
    def buckets(self):

        # Collect
        buckets = []
        for table in self.__metadata.sorted_tables:
            bucket = mappers.tablename_to_bucket(self.__prefix, table.name)
            if bucket is not None:
                buckets.append(bucket)

        return buckets

    def create(self, bucket, descriptor, force=False):

        # Make lists
        buckets = bucket
        if isinstance(bucket, six.string_types):
            buckets = [bucket]
        descriptors = descriptor
        if isinstance(descriptor, dict):
            descriptors = [descriptor]

        # Check buckets for existence
        for bucket in reversed(self.buckets):
            if bucket in buckets:
                if not force:
                    message = 'Bucket "%s" already exists.' % bucket
                    raise RuntimeError(message)
                self.delete(bucket)

        # Define buckets
        for bucket, descriptor in zip(buckets, descriptors):

            # Add to schemas
            self.__descriptors[bucket] = descriptor

            # Crate table
            jsontableschema.validate(descriptor)
            tablename = mappers.bucket_to_tablename(self.__prefix, bucket)
            columns, constraints = mappers.descriptor_to_columns_and_constraints(
                self.__prefix, bucket, descriptor)
            Table(tablename, self.__metadata, *(columns + constraints))

        # Create tables, update metadata
        self.__metadata.create_all()

    def delete(self, bucket=None, ignore=False):

        # Make lists
        buckets = bucket
        if isinstance(bucket, six.string_types):
            buckets = [bucket]
        elif bucket is None:
            buckets = reversed(self.buckets)

        # Iterate over buckets
        tables = []
        for bucket in buckets:

            # Check existent
            if bucket not in self.buckets:
                if not ignore:
                    message = 'Bucket "%s" doesn\'t exist.' % bucket
                    raise RuntimeError(message)

            # Remove from buckets
            if bucket in self.__descriptors:
                del self.__descriptors[bucket]

            # Add table to tables
            table = self.__get_table(bucket)
            tables.append(table)

        # Drop tables, update metadata
        self.__metadata.drop_all(tables=tables)
        self.__metadata.clear()
        self.__metadata.reflect()

    def describe(self, bucket, descriptor=None):

        # Set descriptor
        if descriptor is not None:
            self.__descriptors[bucket] = descriptor

        # Get descriptor
        else:
            descriptor = self.__descriptors.get(bucket)
            if descriptor is None:
                table = self.__get_table(bucket)
                descriptor = mappers.columns_and_constraints_to_descriptor(
                    self.__prefix, table.name, table.columns,
                    table.constraints)

        return descriptor

    def iter(self, bucket):

        # Get result
        table = self.__get_table(bucket)
        # Streaming could be not working for some backends:
        # http://docs.sqlalchemy.org/en/latest/core/connections.html
        select = table.select().execution_options(stream_results=True)
        result = select.execute()

        # Yield data
        for row in result:
            yield list(row)

    def read(self, bucket):

        # Get rows
        rows = list(self.iter(bucket))

        return rows

    def write(self, bucket, rows):

        # Prepare
        BUFFER_SIZE = 1000
        descriptor = self.describe(bucket)
        schema = jsontableschema.Schema(descriptor)
        table = self.__get_table(bucket)

        # Write
        with self.__connection.begin():
            keyed_rows = []
            for row in rows:
                keyed_row = {}
                for index, field in enumerate(schema.fields):
                    value = row[index]
                    try:
                        value = field.cast_value(value)
                    except InvalidObjectType:
                        value = json.loads(value)
                    keyed_row[field.name] = value
                keyed_rows.append(keyed_row)
                if len(keyed_rows) > BUFFER_SIZE:
                    # Insert data
                    table.insert().execute(keyed_rows)
                    # Clean memory
                    keyed_rows = []
            if len(keyed_rows) > 0:
                # Insert data
                table.insert().execute(keyed_rows)

    # Private

    def __get_table(self, bucket):
        """Return SQLAlchemy table for the given bucket.
        """

        # Prepare name
        tablename = mappers.bucket_to_tablename(self.__prefix, bucket)
        if self.__dbschema:
            tablename = '.'.join(self.__dbschema, tablename)

        return self.__metadata.tables[tablename]
Example #31
0
class Bookmark(object):

    def __init__(self, engine, user, start_no=0, end_no=100):
        self.engine = engine
        self.md = MetaData(self.engine)
        self.user = user
        self._feeds = []
        self.start_no = 0
        self.end_no = 100

    @property
    def feeds(self):
        """Return feed data."""
        if not self._feeds:
            self._load()
        return self._feeds

    def _load(self):
        """Load feed info."""
        interval = 20 # API default setting.

        for i in range(self.start_no,
                       self.end_no,
                       interval):
            url = self._make_feed_api_url(i)
            feed = self._request(url)
            if not feed["entries"]:
                break
            self._append_to_feeds(feed)
            time.sleep(2)

    def _make_feed_api_url(self, id):
        """Create api url of rss feed."""
        return HATENA_FEED_URL.format(user=self.user.name, no=str(id))

    def _request(self, url):
        """Request api.
        
        Request argument url and return result data as feedparser object..
        """
        return feedparser.parse(requests.get(url).text)

    def _append_to_feeds(self, feed):
        """Parse and append feed data."""
        for f in feed["entries"]:
            link = f["link"]
            title = f['title']
            self._feeds.append(Feed(self.engine, link, title))

    def save(self):
        """Save url."""
        if not self._feeds:
            self._load()
        # TODO: Load user no
        logging.info('SAVE BOOKMARK')
        for f in self._feeds:
            logging.info(f.url)
            if self._has_record(f.id):
                # TODO:
                #   Fix to use return if not existing new feed.
                #   To escape duplicate access.   
                logging.info('IGNORE')
                continue
            logging.info('ADD')
            self._register(f.id)
        logging.info('----------------------')
    
    def _register(self, url_id):
        """Register bookmark transaction."""
        self.md.clear()
        md = MetaData(self.engine)
        t = Table('bookmark', md, autoload=True)
        i = insert(t).values(url_id=url_id,
                             user_id=self.user.id,
                             registered_date=int(
                                 date.today().strftime("%Y%m%d")))
        i.execute()

    def _has_record(self, url_id):
        """Check bookmark url is already existing."""
        t = Table('bookmark', self.md)
        c_user = column('user_id')
        c_url = column('url_id')
        s = select(columns=[column('id')], from_obj=t).where(
                c_url==url_id).where(c_user==self.user.id)
        return s.execute().scalar()
Example #32
0
class TestIdempotentOperations(TestCase):
    """Test database migration utilities.

    Unfortunately not all operations can be tested with an SQLite database
    since not all ALTER TABLE operations are supported, see
    http://www.sqlite.org/lang_altertable.html.

    Currenlty these untested operations are:
    - IdempotentOperations. drop_column
    - DeactivatedFKConstraint

    """
    def setUp(self):
        self.connection = create_engine('sqlite:///:memory:').connect()
        self.metadata = MetaData(self.connection)
        self.table = Table('thingy', self.metadata,
                           Column('thingy_id', Integer, primary_key=True))
        self.metadata.create_all()

        self.migration_context = MigrationContext.configure(self.connection)
        self.op = IdempotentOperations(self.migration_context, self)
        self.inspector = Inspector(self.connection)

    def tearDown(self):
        self.metadata.drop_all()
        self.connection.close()

    def refresh_metadata(self):
        self.metadata.clear()
        self.metadata.reflect()

    def test_add_column_works_with_valid_preconditions(self):
        self.assertEqual(['thingy_id'],
                         self.metadata.tables['thingy'].columns.keys())

        self.op.add_column('thingy', Column('foo', String))
        self.refresh_metadata()

        self.assertEqual(['thingy_id', 'foo'],
                         self.metadata.tables['thingy'].columns.keys())

    def test_add_column_skips_add_when_column_name_already_exists(self):
        self.assertEqual(['thingy_id'],
                         self.metadata.tables['thingy'].columns.keys())

        self.op.add_column('thingy', Column('thingy_id', String))
        self.refresh_metadata()

        self.assertEqual(['thingy_id'],
                         self.metadata.tables['thingy'].columns.keys())

    def test_create_tables_skips_create_when_table_already_exists(self):
        self.assertEqual(['thingy'], self.metadata.tables.keys())

        self.op.create_table('thingy')
        self.refresh_metadata()
        self.assertEqual(['thingy'], self.metadata.tables.keys())

    def test_create_table_works_with_valid_preconditions(self):
        self.assertEqual(['thingy'], self.metadata.tables.keys())
        self.op.create_table('xuq', Column('foo', Integer, primary_key=True))
        self.refresh_metadata()
        self.assertEqual(['thingy', 'xuq'], self.metadata.tables.keys())
class Feed(object):
    """Bookmark user class."""
    def __init__(self, engine, url, title=''):
        logging.basicConfig(level=20)
        self.engine = engine
        self.url = url
        self.title = title
        self.md = MetaData(self.engine)
        self.sleep_sec = 1

    @property
    def id(self):
        """Load feed id."""
        if not self._load_id():
            self._append()
        return self._load_id()

    def extract(self):
        """Extract bookmarked users from setted url."""
        users = []

        # TODO: Load id in __init__
        if not self._load_id():
            self._append()
        api_url = self._make_entry_api_url(self.url)
        result = self._request(api_url)
        if not result:
            return users
        for b in result.get('bookmarks', []):
            if "user" not in b:
                continue
            users.append(User(self.engine, b["user"]))
        time.sleep(self.sleep_sec)
        return users

    def _make_entry_api_url(self, url):
        """Create hatena bookmark entry api url."""
        e_url = quote(url, safe='')
        return HATENA_ENTRY_URL.format(url=e_url)

    def _request(self, url):
        """Request api.
        
        Request argument url and return result data as dict.
        """
        return requests.get(url).json()

    def _load_id(self):
        """Load feed id from database."""
        t = Table('feed', self.md)
        c_url = column('url')
        c_id = column('id')
        s = select(columns=[c_id], from_obj=t).where(c_url == self.url)
        r = s.execute().fetchone()
        if r:
            return r['id']
        return None

    def _append(self):
        """Add new feed url into database."""
        logging.info('SAVE MY FEED')
        logging.info(self.url)
        self.md.clear()
        md = MetaData(self.engine)
        t = Table('feed', md, autoload=True)
        i = insert(t).values(url=self.url, title=self.title)
        i.execute()
        logging.info('----------------------')
Example #34
0
class Storage(tableschema.Storage):

    # Public

    def __init__(self, engine, dbschema=None, prefix='', reflect_only=None, autoincrement=None):
        """https://github.com/frictionlessdata/tableschema-sql-py#storage
        """

        # Set attributes
        self.__connection = engine.connect()
        self.__dbschema = dbschema
        self.__prefix = prefix
        self.__descriptors = {}
        self.__fallbacks = {}
        self.__autoincrement = autoincrement
        self.__only = reflect_only or (lambda _: True)

        # Create mapper
        self.__mapper = Mapper(prefix=prefix, dialect=engine.dialect.name)

        # Create metadata and reflect
        self.__metadata = MetaData(bind=self.__connection, schema=self.__dbschema)
        self.__reflect()

    def __repr__(self):
        """https://github.com/frictionlessdata/tableschema-sql-py#storage
        """

        # Template and format
        template = 'Storage <{engine}/{dbschema}>'
        text = template.format(
            engine=self.__connection.engine,
            dbschema=self.__dbschema)

        return text

    @property
    def buckets(self):
        """https://github.com/frictionlessdata/tableschema-sql-py#storage
        """
        buckets = []
        for table in self.__metadata.sorted_tables:
            bucket = self.__mapper.restore_bucket(table.name)
            if bucket is not None:
                buckets.append(bucket)
        return buckets

    def create(self, bucket, descriptor, force=False, indexes_fields=None):
        """https://github.com/frictionlessdata/tableschema-sql-py#storage
        """

        # Make lists
        buckets = bucket
        if isinstance(bucket, six.string_types):
            buckets = [bucket]
        descriptors = descriptor
        if isinstance(descriptor, dict):
            descriptors = [descriptor]
        if indexes_fields is None or len(indexes_fields) == 0:
            indexes_fields = [()] * len(descriptors)
        elif type(indexes_fields[0][0]) not in {list, tuple}:
            indexes_fields = [indexes_fields]

        # Check dimensions
        if not (len(buckets) == len(descriptors) == len(indexes_fields)):
            raise tableschema.exceptions.StorageError('Wrong argument dimensions')

        # Check buckets for existence
        for bucket in reversed(self.buckets):
            if bucket in buckets:
                if not force:
                    message = 'Bucket "%s" already exists.' % bucket
                    raise tableschema.exceptions.StorageError(message)
                self.delete(bucket)

        # Define buckets
        for bucket, descriptor, index_fields in zip(buckets, descriptors, indexes_fields):
            tableschema.validate(descriptor)
            table_name = self.__mapper.convert_bucket(bucket)
            columns, constraints, indexes, fallbacks, table_comment = self.__mapper \
                .convert_descriptor(bucket, descriptor, index_fields, self.__autoincrement)
            Table(table_name, self.__metadata, *(columns + constraints + indexes),
                  comment=table_comment)
            self.__descriptors[bucket] = descriptor
            self.__fallbacks[bucket] = fallbacks

        # Create tables, update metadata
        self.__metadata.create_all()

    def delete(self, bucket=None, ignore=False):
        """https://github.com/frictionlessdata/tableschema-sql-py#storage
        """

        # Make lists
        buckets = bucket
        if isinstance(bucket, six.string_types):
            buckets = [bucket]
        elif bucket is None:
            buckets = reversed(self.buckets)

        # Iterate
        tables = []
        for bucket in buckets:

            # Check existent
            if bucket not in self.buckets:
                if not ignore:
                    message = 'Bucket "%s" doesn\'t exist.' % bucket
                    raise tableschema.exceptions.StorageError(message)
                return

            # Remove from buckets
            if bucket in self.__descriptors:
                del self.__descriptors[bucket]

            # Add table to tables
            table = self.__get_table(bucket)
            tables.append(table)

        # Drop tables, update metadata
        self.__metadata.drop_all(tables=tables)
        self.__metadata.clear()
        self.__reflect()

    def describe(self, bucket, descriptor=None):
        """https://github.com/frictionlessdata/tableschema-sql-py#storage
        """

        # Set descriptor
        if descriptor is not None:
            self.__descriptors[bucket] = descriptor

        # Get descriptor
        else:
            descriptor = self.__descriptors.get(bucket)
            if descriptor is None:
                table = self.__get_table(bucket)
                descriptor = self.__mapper.restore_descriptor(
                    table.name, table.columns, table.constraints, self.__autoincrement)

        return descriptor

    def iter(self, bucket):
        """https://github.com/frictionlessdata/tableschema-sql-py#storage
        """

        # Get table and fallbacks
        table = self.__get_table(bucket)
        schema = tableschema.Schema(self.describe(bucket))

        # Open and close transaction
        with self.__connection.begin():
            # Streaming could be not working for some backends:
            # http://docs.sqlalchemy.org/en/latest/core/connections.html
            select = table.select().execution_options(stream_results=True)
            result = select.execute()
            for row in result:
                row = self.__mapper.restore_row(row, schema=schema)
                yield row

    def read(self, bucket):
        """https://github.com/frictionlessdata/tableschema-sql-py#storage
        """
        rows = list(self.iter(bucket))
        return rows

    def write(self, bucket, rows, keyed=False, as_generator=False, update_keys=None):
        """https://github.com/frictionlessdata/tableschema-sql-py#storage
        """

        # Check update keys
        if update_keys is not None and len(update_keys) == 0:
            message = 'Argument "update_keys" cannot be an empty list'
            raise tableschema.exceptions.StorageError(message)

        # Get table and description
        table = self.__get_table(bucket)
        schema = tableschema.Schema(self.describe(bucket))
        fallbacks = self.__fallbacks.get(bucket, [])

        # Write rows to table
        convert_row = partial(self.__mapper.convert_row, schema=schema, fallbacks=fallbacks)
        writer = Writer(table, schema, update_keys, self.__autoincrement, convert_row)
        with self.__connection.begin():
            gen = writer.write(rows, keyed=keyed)
            if as_generator:
                return gen
            collections.deque(gen, maxlen=0)

    # Private

    def __get_table(self, bucket):
        """Get table by bucket
        """
        table_name = self.__mapper.convert_bucket(bucket)
        if self.__dbschema:
            table_name = '.'.join((self.__dbschema, table_name))
        return self.__metadata.tables[table_name]

    def __reflect(self):
        """Reflect metadata
        """

        def only(name, _):
            return self.__only(name) and self.__mapper.restore_bucket(name) is not None

        self.__metadata.reflect(only=only)
class OracleTopicStorage(TopicStorageInterface):
    engine = None
    insp = None
    metadata = MetaData()

    def __init__(self, client, storage_template):
        self.engine = client
        self.storage_template = storage_template
        self.insp = inspect(client)
        self.metadata = MetaData()
        self.lock = threading.RLock()
        log.info("topic oracle template initialized")

    def get_topic_table_by_name(self, table_name):
        self.lock.acquire()
        try:
            table = Table(table_name,
                          self.metadata,
                          extend_existing=False,
                          autoload=True,
                          autoload_with=self.engine)
            return table
        finally:
            self.lock.release()

    def build_oracle_where_expression(self, table, where):
        for key, value in where.items():
            if key == "and" or key == "or":
                if isinstance(value, list):
                    filters = []
                    for express in value:
                        result = self.build_oracle_where_expression(
                            table, express)
                        filters.append(result)
                if key == "and":
                    return and_(*filters)
                if key == "or":
                    return or_(*filters)
            else:
                if isinstance(value, dict):
                    for k, v in value.items():
                        if k == "=":
                            return table.c[key.lower()] == v
                        if k == "!=":
                            return operator.ne(table.c[key.lower()], v)
                        if k == "like":
                            if v != "" or v != '' or v is not None:
                                return table.c[key.lower()].like("%" + v + "%")
                        if k == "in":
                            if isinstance(table.c[key.lower()].type, CLOB):
                                if isinstance(v, list):
                                    value_ = ",".join(v)
                                else:
                                    value_ = v
                                return text('json_exists(' + key.lower() +
                                            ', \'$?(@ in (\"' + value_ +
                                            '\"))\')')
                            else:
                                if isinstance(v, list):
                                    if len(v) != 0:
                                        return table.c[key.lower()].in_(v)
                                elif isinstance(v, str):
                                    v_list = v.split(",")
                                    return table.c[key.lower()].in_(v_list)
                                else:
                                    raise TypeError(
                                        "operator in, the value \"{0}\" is not list or str"
                                        .format(v))
                        if k == "not-in":
                            if isinstance(table.c[key.lower()].type, CLOB):
                                if isinstance(v, list):
                                    value_ = ",".join(v)
                                else:
                                    value_ = v
                                return text('json_exists(' + key.lower() +
                                            ', \'$?(@ not in (\"' + value_ +
                                            '\"))\')')
                            else:
                                if isinstance(v, list):
                                    if len(v) != 0:
                                        return table.c[key.lower()].notin_(v)
                                elif isinstance(v, str):
                                    v_list = v.split(",")
                                    return table.c[key.lower()].notin_(v_list)
                                else:
                                    raise TypeError(
                                        "operator not_in, the value \"{0}\" is not list or str"
                                        .format(v))
                        if k == ">":
                            return table.c[key.lower()] > v
                        if k == ">=":
                            return table.c[key.lower()] >= v
                        if k == "<":
                            return table.c[key.lower()] < v
                        if k == "<=":
                            return table.c[key.lower()] <= v
                        if k == "between":
                            if (isinstance(v, tuple)) and len(v) == 2:
                                return table.c[key.lower()].between(
                                    self._check_value_type(v[0]),
                                    self._check_value_type(v[1]))
                else:
                    return table.c[key.lower()] == value

    def build_oracle_updates_expression(self, table, updates,
                                        stmt_type: str) -> dict:
        if stmt_type == "insert":
            new_updates = {}
            for key in table.c.keys():
                if key == "id_":
                    new_updates[key] = get_surrogate_key()
                elif key == "version_":
                    new_updates[key] = 0
                else:
                    if isinstance(table.c[key].type, CLOB):
                        if updates.get(key) is not None:
                            new_updates[key] = dumps(updates.get(key))
                        else:
                            new_updates[key] = None
                    else:
                        if updates.get(key) is not None:
                            value_ = updates.get(key)
                            if isinstance(value_, dict):
                                for k, v in value_.items():
                                    if k == "_sum":
                                        new_updates[key.lower()] = v
                                    elif k == "_count":
                                        new_updates[key.lower()] = v
                                    elif k == "_avg":
                                        new_updates[key.lower()] = v
                            else:
                                new_updates[key] = value_
                        else:
                            default_value = self._get_table_column_default_value(
                                table.name, key)
                            if default_value is not None:
                                value_ = default_value.strip("'").strip(" ")
                                if value_.isdigit():
                                    new_updates[key] = Decimal(value_)
                                else:
                                    new_updates[key] = value_
                            else:
                                new_updates[key] = None
            return new_updates
        elif stmt_type == "update":
            new_updates = {}
            for key in table.c.keys():
                if key == "version_":
                    new_updates[key] = updates.get(key) + 1
                else:
                    if isinstance(table.c[key].type, CLOB):
                        if updates.get(key) is not None:
                            new_updates[key] = dumps(updates.get(key))
                    else:
                        if updates.get(key) is not None:
                            value_ = updates.get(key)
                            if isinstance(value_, dict):
                                for k, v in value_.items():
                                    if k == "_sum":
                                        new_updates[key.lower()] = text(
                                            f'{key.lower()} + {v}')
                                    elif k == "_count":
                                        new_updates[key.lower()] = text(
                                            f'{key.lower()} + {v}')
                                    elif k == "_avg":
                                        pass  # todo
                            else:
                                new_updates[key] = value_
            return new_updates

    def build_oracle_order(self, table, order_: list):
        result = []
        if order_ is None:
            return result
        else:
            for item in order_:
                if isinstance(item, tuple):
                    if item[1] == "desc":
                        new_ = desc(table.c[item[0].lower()])
                        result.append(new_)
                    if item[1] == "asc":
                        new_ = asc(table.c[item[0].lower()])
                        result.append(new_)
            return result

    '''
    topic data interface
    '''

    def drop_(self, topic_name):
        return self.drop_topic_data_table(topic_name)

    def drop_topic_data_table(self, topic_name):
        try:
            table_name = build_collection_name(topic_name)
            table = self.get_topic_table_by_name(table_name)
            table.drop(self.engine)
            self.clear_metadata()
        except NoSuchTableError as err:
            log.info("NoSuchTableError: {0}".format(table_name))

    def topic_data_delete_(self, where, topic_name):
        table_name = build_collection_name(topic_name)
        table = self.get_topic_table_by_name(table_name)
        if where is None:
            stmt = delete(table)
        else:
            stmt = delete(table).where(
                self.build_oracle_where_expression(table, where))
        with self.engine.connect() as conn:
            conn.execute(stmt)

    def topic_data_insert_one(self, one, topic_name):
        table_name = build_collection_name(topic_name)
        table = self.get_topic_table_by_name(table_name)
        one_dict: dict = capital_to_lower(convert_to_dict(one))
        value = self.build_oracle_updates_expression(table, one_dict, "insert")
        stmt = insert(table)
        with self.engine.connect() as conn:
            with conn.begin():
                try:
                    result = conn.execute(stmt, value)
                except IntegrityError as e:
                    raise InsertConflictError("InsertConflict")
        return result.rowcount

    def topic_data_insert_(self, data, topic_name):
        table_name = build_collection_name(topic_name)
        table = self.get_topic_table_by_name(table_name)
        values = []
        for instance in data:
            one_dict: dict = capital_to_lower(convert_to_dict(instance))
            value = self.build_oracle_updates_expression(
                table, one_dict, "insert")
            values.append(value)
        stmt = insert(table)
        with self.engine.connect() as conn:
            result = conn.execute(stmt, values)

    def topic_data_update_one(self, id_: str, one: any, topic_name: str):
        table_name = build_collection_name(topic_name)
        table = self.get_topic_table_by_name(table_name)
        stmt = update(table).where(eq(table.c['id_'], id_))
        one_dict = capital_to_lower(convert_to_dict(one))
        value = self.build_oracle_updates_expression(table, one_dict, "update")
        stmt = stmt.values(value)
        with self.engine.begin() as conn:
            result = conn.execute(stmt)
        return result.rowcount

    def topic_data_update_one_with_version(self, id_: str, version_: int,
                                           one: any, topic_name: str):
        table_name = build_collection_name(topic_name)
        table = self.get_topic_table_by_name(table_name)
        stmt = update(table).where(
            and_(eq(table.c['id_'], id_), eq(table.c['version_'], version_)))
        one_dict = capital_to_lower(convert_to_dict(one))
        value = self.build_oracle_updates_expression(table, one_dict, "update")
        stmt = stmt.values(value)
        with self.engine.begin() as conn:
            result = conn.execute(stmt)
        if result.rowcount == 0:
            raise OptimisticLockError("Optimistic lock error")

    def topic_data_update_(self, query_dict, instances: list, topic_name):
        table_name = build_collection_name(topic_name)
        table = self.get_topic_table_by_name(table_name)
        stmt = (update(table).where(
            self.build_oracle_where_expression(table, query_dict)))
        values = []
        for instance in instances:
            one_dict = capital_to_lower(convert_to_dict(instance))
            value = self.build_oracle_updates_expression(
                table, one_dict, "update")
            values.append(value)
        stmt = stmt.values(values)
        with self.engine.begin() as conn:
            result = conn.execute(stmt)

    def topic_data_find_by_id(self, id_: str, topic_name: str) -> any:
        return self.topic_data_find_one({"id_": id_}, topic_name)

    def topic_data_find_one(self, where, topic_name) -> any:
        table_name = build_collection_name(topic_name)
        table = self.get_topic_table_by_name(table_name)
        stmt = select(table).where(
            self.build_oracle_where_expression(table, where))
        with self.engine.connect() as conn:
            cursor = conn.execute(stmt).cursor
            columns = [col[0] for col in cursor.description]
            cursor.rowfactory = lambda *args: dict(zip(columns, args))
            row = cursor.fetchone()
        if row is None:
            return None
        else:
            result = {}
            for index, name in enumerate(columns):
                if isinstance(table.c[name.lower()].type, CLOB):
                    if row[name] is not None:
                        result[name] = json.loads(row[name])
                    else:
                        result[name] = None
                else:
                    result[name] = row[name]
            return self._convert_dict_key(result, topic_name)

    def topic_data_find_(self, where, topic_name):
        table_name = build_collection_name(topic_name)
        table = self.get_topic_table_by_name(table_name)
        stmt = select(table).where(
            self.build_oracle_where_expression(table, where))
        with self.engine.connect() as conn:
            cursor = conn.execute(stmt).cursor
            columns = [col[0] for col in cursor.description]
            cursor.rowfactory = lambda *args: dict(zip(columns, args))
            rows = cursor.fetchall()
        if rows is None:
            return None
        else:
            if isinstance(rows, list):
                results = []
                for row in rows:
                    result = {}
                    for index, name in enumerate(columns):
                        if isinstance(table.c[name.lower()].type, CLOB):
                            if row[name] is not None:
                                result[name] = json.loads(row[name])
                            else:
                                result[name] = None
                        else:
                            result[name] = row[name]
                    results.append(self._convert_dict_key(result, topic_name))
                return results
            else:
                result = {}
                for index, name in enumerate(columns):
                    if isinstance(table.c[name.lower()].type, CLOB):
                        result[name] = dumps(rows[index])
                    else:
                        result[name] = rows[index]
                return result

    def topic_data_find_with_aggregate(self, where, topic_name, aggregate):
        table_name = 'topic_' + topic_name
        table = self.get_topic_table_by_name(table_name)
        return_column_name = None
        for key, value in aggregate.items():
            if value == "sum":
                stmt = select(text(f'sum({key.lower()}) as sum_{key.lower()}'))
                return_column_name = f'SUM_{key.upper()}'
            elif value == "count":
                stmt = select(f'count(*) as count')
                return_column_name = 'COUNT'
            elif value == "avg":
                stmt = select(text(f'avg({key.lower()}) as avg_{key.lower()}'))
                return_column_name = f'AVG_{key.upper()}'
        stmt = stmt.select_from(table)
        stmt = stmt.where(self.build_oracle_where_expression(table, where))
        with self.engine.connect() as conn:
            cursor = conn.execute(stmt).cursor
            columns = [col[0] for col in cursor.description]
            cursor.rowfactory = lambda *args: dict(zip(columns, args))
            res = cursor.fetchone()
        if res is None:
            return None
        else:
            return res[return_column_name]

    def topic_data_list_all(self, topic_name) -> list:
        table_name = build_collection_name(topic_name)
        table = self.get_topic_table_by_name(table_name)
        stmt = select(table)
        with self.engine.connect() as conn:
            cursor = conn.execute(stmt).cursor
            columns = [col[0] for col in cursor.description]
            cursor.rowfactory = lambda *args: dict(zip(columns, args))
            rows = cursor.fetchall()

            if rows is None:
                return None
            else:
                results = []
                for row in rows:
                    result = {}
                    for index, name in enumerate(columns):
                        if isinstance(table.c[name.lower()].type, CLOB):
                            if row[name] is not None:
                                result[name] = json.loads(row[name])
                            else:
                                result[name] = None
                        else:
                            result[name] = row[name]
                    if self.storage_template.check_topic_type(name) == "raw":
                        results.append(result['DATA_'])
                    else:
                        results.append(result)
                if self.storage_template.check_topic_type(name) == "raw":
                    return results
                else:
                    return self._convert_list_elements_key(results, topic_name)

    def topic_data_page_(self, where, sort, pageable, model, name) -> DataPage:
        table_name = build_collection_name(name)
        count = self.count_topic_data_table(table_name)
        table = self.get_topic_table_by_name(table_name)
        stmt = select(table).where(
            self.build_oracle_where_expression(table, where))
        orders = self.build_oracle_order(table, sort)
        for order in orders:
            stmt = stmt.order_by(order)
        offset = pageable.pageSize * (pageable.pageNumber - 1)
        stmt = text(
            str(stmt.compile(compile_kwargs={"literal_binds": True})) +
            " OFFSET :offset ROWS FETCH NEXT :maxnumrows ROWS ONLY")
        result = []
        with self.engine.connect() as conn:
            cursor = conn.execute(stmt, {
                "offset": offset,
                "maxnumrows": pageable.pageSize
            }).cursor
            columns = [col[0] for col in cursor.description]
            cursor.rowfactory = lambda *args: dict(zip(columns, args))
            res = cursor.fetchall()
        if self.storage_template.check_topic_type(name) == "raw":
            for row in res:
                result.append(json.loads(row['DATA_']))
        else:
            for row in res:
                if model is not None:
                    result.append(parse_obj(model, row, table))
                else:
                    result.append(row)
        return build_data_pages(pageable, result, count)

    def clear_metadata(self):
        self.metadata.clear()

    '''
    protected method, used by class own method
    '''

    def _get_table_column_default_value(self, table_name, column_name):
        cached_columns = cacheman[COLUMNS_BY_TABLE_NAME].get(table_name)
        if cached_columns is not None:
            columns = cached_columns
        else:
            columns = self.insp.get_columns(table_name)
            cacheman[COLUMNS_BY_TABLE_NAME].set(table_name, columns)
        for column in columns:
            if column["name"] == column_name:
                return column["default"]

    def _convert_dict_key(self, dict_info, topic_name):
        if dict_info is None:
            return None
        new_dict = {}
        factors = self.storage_template.get_topic_factors(topic_name)
        for factor in factors:
            new_dict[factor['name']] = dict_info[factor['name'].upper()]
        new_dict['id_'] = dict_info['ID_']
        if 'TENANT_ID_' in dict_info:
            new_dict['tenant_id_'] = dict_info.get("TENANT_ID_", 1)
        if "INSERT_TIME_" in dict_info:
            new_dict['insert_time_'] = dict_info.get(
                "INSERT_TIME_",
                datetime.datetime.now().replace(tzinfo=None))
        if "UPDATE_TIME_" in dict_info:
            new_dict['update_time_'] = dict_info.get(
                "UPDATE_TIME_",
                datetime.datetime.now().replace(tzinfo=None))
        if "VERSION_" in dict_info:
            new_dict['version_'] = dict_info.get("VERSION_", 0)
        if "AGGREGATE_ASSIST_" in dict_info:
            new_dict['aggregate_assist_'] = dict_info.get("AGGREGATE_ASSIST_")
        return new_dict

    def _convert_list_elements_key(self, list_info, topic_name):
        if list_info is None:
            return None
        new_list = []
        factors = self.storage_template.get_topic_factors(topic_name)
        for item in list_info:
            new_dict = {}
            for factor in factors:
                new_dict[factor['name']] = item[factor['name'].upper()]
                new_dict['id_'] = item['ID_']
                if 'TENANT_ID_' in item:
                    new_dict['tenant_id_'] = item.get("TENANT_ID_", 1)
                if "INSERT_TIME_":
                    new_dict['insert_time_'] = item.get(
                        "INSERT_TIME_",
                        datetime.datetime.now().replace(tzinfo=None))
                if "UPDATE_TIME_":
                    new_dict['update_time_'] = item.get(
                        "UPDATE_TIME_",
                        datetime.datetime.now().replace(tzinfo=None))
                if "VERSION_" in item:
                    new_dict['version_'] = item.get("VERSION_", 0)
                if "AGGREGATE_ASSIST_" in item:
                    new_dict['aggregate_assist_'] = item.get(
                        "AGGREGATE_ASSIST_")
            new_list.append(new_dict)
        return new_list

    @staticmethod
    def _check_value_type(value):
        if isinstance(value, datetime.datetime):
            return func.to_date(value, "yyyy-mm-dd hh24:mi:ss")
        elif isinstance(value, datetime.date):
            return func.to_date(value, "yyyy-mm-dd")
        else:
            return value

    def count_topic_data_table(self, table_name):
        stmt = 'SELECT count(%s) AS count FROM %s' % ('id_', table_name)
        with self.engine.connect() as conn:
            cursor = conn.execute(text(stmt)).cursor
            columns = [col[0] for col in cursor.description]
            cursor.rowfactory = lambda *args: dict(zip(columns, args))
            result = cursor.fetchone()
        return result['COUNT']
Example #36
0
class Storage(tableschema.Storage):
    """SQL storage

    Package implements
    [Tabular Storage](https://github.com/frictionlessdata/tableschema-py#storage)
    interface (see full documentation on the link):

    ![Storage](https://i.imgur.com/RQgrxqp.png)

    > Only additional API is documented

    # Arguments
        engine (object): `sqlalchemy` engine
        dbschema (str): name of database schema
        prefix (str): prefix for all buckets
        reflect_only (callable):
            a boolean predicate to filter the list of table names when reflecting
        autoincrement (str/dict):
            add autoincrement column at the beginning.
              - if a string it's an autoincrement column name
              - if a dict it's an autoincrements mapping with column
                names indexed by bucket names, for example,
                `{'bucket1'\\: 'id', 'bucket2'\\: 'other_id}`

    """

    # Public

    def __init__(self, engine, dbschema=None, prefix='', reflect_only=None, autoincrement=None):

        # Set attributes
        self.__connection = engine.connect()
        self.__dbschema = dbschema
        self.__prefix = prefix
        self.__descriptors = {}
        self.__fallbacks = {}
        self.__autoincrement = autoincrement
        self.__only = reflect_only or (lambda _: True)
        self.__dialect = engine.dialect.name

        # Added regex support to sqlite
        if self.__dialect == 'sqlite':
            def regexp(expr, item):
                reg = re.compile(expr)
                return reg.search(item) is not None
            # It will fail silently if this function already exists
            self.__connection.connection.create_function('REGEXP', 2, regexp)

        # Create mapper
        self.__mapper = Mapper(prefix=prefix, dialect=self.__dialect)

        # Create metadata and reflect
        self.__metadata = MetaData(bind=self.__connection, schema=self.__dbschema)
        self.__reflect()

    def __repr__(self):

        # Template and format
        template = 'Storage <{engine}/{dbschema}>'
        text = template.format(
            engine=self.__connection.engine,
            dbschema=self.__dbschema)

        return text

    @property
    def buckets(self):
        buckets = []
        for table in self.__metadata.sorted_tables:
            bucket = self.__mapper.restore_bucket(table.name)
            if bucket is not None:
                buckets.append(bucket)
        return buckets

    def create(self, bucket, descriptor, force=False, indexes_fields=None):
        """Create bucket

        # Arguments
            indexes_fields (str[]):
                list of tuples containing field names, or list of such lists

        """

        # Make lists
        buckets = bucket
        if isinstance(bucket, six.string_types):
            buckets = [bucket]
        descriptors = descriptor
        if isinstance(descriptor, dict):
            descriptors = [descriptor]
        if indexes_fields is None or len(indexes_fields) == 0:
            indexes_fields = [()] * len(descriptors)
        elif type(indexes_fields[0][0]) not in {list, tuple}:
            indexes_fields = [indexes_fields]

        # Check dimensions
        if not (len(buckets) == len(descriptors) == len(indexes_fields)):
            raise tableschema.exceptions.StorageError('Wrong argument dimensions')

        # Check buckets for existence
        for bucket in reversed(self.buckets):
            if bucket in buckets:
                if not force:
                    message = 'Bucket "%s" already exists.' % bucket
                    raise tableschema.exceptions.StorageError(message)
                self.delete(bucket)

        # Define buckets
        for bucket, descriptor, index_fields in zip(buckets, descriptors, indexes_fields):
            tableschema.validate(descriptor)
            table_name = self.__mapper.convert_bucket(bucket)
            autoincrement = self.__get_autoincrement_for_bucket(bucket)
            columns, constraints, indexes, fallbacks, table_comment = self.__mapper \
                .convert_descriptor(bucket, descriptor, index_fields, autoincrement)
            Table(table_name, self.__metadata, *(columns + constraints + indexes),
                  comment=table_comment)
            self.__descriptors[bucket] = descriptor
            self.__fallbacks[bucket] = fallbacks

        # Create tables, update metadata
        try:
            self.__metadata.create_all()
        except sqlalchemy.exc.ProgrammingError as exception:
            if 'there is no unique constraint matching given keys' in str(exception):
                message = 'Foreign keys can only reference primary key or unique fields\n%s'
                six.raise_from(
                    tableschema.exceptions.ValidationError(message % str(exception)),
                    None)

    def delete(self, bucket=None, ignore=False):

        # Make lists
        buckets = bucket
        if isinstance(bucket, six.string_types):
            buckets = [bucket]
        elif bucket is None:
            buckets = reversed(self.buckets)

        # Iterate
        tables = []
        for bucket in buckets:

            # Check existent
            if bucket not in self.buckets:
                if not ignore:
                    message = 'Bucket "%s" doesn\'t exist.' % bucket
                    raise tableschema.exceptions.StorageError(message)
                return

            # Remove from buckets
            if bucket in self.__descriptors:
                del self.__descriptors[bucket]

            # Add table to tables
            table = self.__get_table(bucket)
            tables.append(table)

        # Drop tables, update metadata
        self.__metadata.drop_all(tables=tables)
        self.__metadata.clear()
        self.__reflect()

    def describe(self, bucket, descriptor=None):

        # Set descriptor
        if descriptor is not None:
            self.__descriptors[bucket] = descriptor

        # Get descriptor
        else:
            descriptor = self.__descriptors.get(bucket)
            if descriptor is None:
                table = self.__get_table(bucket)
                autoincrement = self.__get_autoincrement_for_bucket(bucket)
                descriptor = self.__mapper.restore_descriptor(
                    table.name, table.columns, table.constraints, autoincrement)

        return descriptor

    def iter(self, bucket):

        # Get table and fallbacks
        table = self.__get_table(bucket)
        schema = tableschema.Schema(self.describe(bucket))
        autoincrement = self.__get_autoincrement_for_bucket(bucket)

        # Open and close transaction
        with self.__connection.begin():
            # Streaming could be not working for some backends:
            # http://docs.sqlalchemy.org/en/latest/core/connections.html
            select = table.select().execution_options(stream_results=True)
            result = select.execute()
            for row in result:
                row = self.__mapper.restore_row(
                    row, schema=schema, autoincrement=autoincrement)
                yield row

    def read(self, bucket):
        rows = list(self.iter(bucket))
        return rows

    def write(self, bucket, rows, keyed=False, as_generator=False, update_keys=None,
              buffer_size=1000, use_bloom_filter=True):
        """Write to bucket

        # Arguments
            keyed (bool):
                accept keyed rows
            as_generator (bool):
                returns generator to provide writing control to the client
            update_keys (str[]):
                update instead of inserting if key values match existent rows
            buffer_size (int=1000):
                maximum number of rows to try and write to the db in one batch
            use_bloom_filter (bool=True):
                should we use a bloom filter to optimize DB update performance
                (in exchange for some setup time)

        """

        # Check update keys
        if update_keys is not None and len(update_keys) == 0:
            message = 'Argument "update_keys" cannot be an empty list'
            raise tableschema.exceptions.StorageError(message)

        # Get table and description
        table = self.__get_table(bucket)
        schema = tableschema.Schema(self.describe(bucket))
        fallbacks = self.__fallbacks.get(bucket, [])

        # Write rows to table
        convert_row = partial(self.__mapper.convert_row, schema=schema, fallbacks=fallbacks)
        autoincrement = self.__get_autoincrement_for_bucket(bucket)
        writer = Writer(table, schema,
            # Only PostgreSQL supports "returning" so we don't use autoincrement for all
            autoincrement=autoincrement if self.__dialect in ['postgresql'] else None,
            update_keys=update_keys,
            convert_row=convert_row,
            buffer_size=buffer_size,
            use_bloom_filter=use_bloom_filter)
        with self.__connection.begin():
            gen = writer.write(rows, keyed=keyed)
            if as_generator:
                return gen
            collections.deque(gen, maxlen=0)

    # Private

    def __get_table(self, bucket):
        table_name = self.__mapper.convert_bucket(bucket)
        if self.__dbschema:
            table_name = '.'.join((self.__dbschema, table_name))
        return self.__metadata.tables[table_name]

    def __reflect(self):
        def only(name, _):
            return self.__only(name) and self.__mapper.restore_bucket(name) is not None
        self.__metadata.reflect(only=only)

    def __get_autoincrement_for_bucket(self, bucket):
        if isinstance(self.__autoincrement, dict):
            return self.__autoincrement.get(bucket)
        return self.__autoincrement
Example #37
0
class Feed(object):
    """Bookmark user class."""

    def __init__(self, engine, url, title=''):
        logging.basicConfig(level=20)
        self.engine = engine 
        self.url = url
        self.title = title
        self.md = MetaData(self.engine)
        self.sleep_sec = 1

    @property
    def id(self):
        """Load feed id."""
        if not self._load_id():
            self._append()
        return self._load_id()

    def extract(self):
        """Extract bookmarked users from setted url."""
        users = []

        # TODO: Load id in __init__
        if not self._load_id():
            self._append()
        api_url = self._make_entry_api_url(self.url)
        result = self._request(api_url)
        if not result:
            return users
        for b in result.get('bookmarks', []):
            if "user" not in b:
                continue
            users.append(User(self.engine, b["user"]))
        time.sleep(self.sleep_sec)
        return users

    def _make_entry_api_url(self, url):
        """Create hatena bookmark entry api url."""
        e_url = quote(url, safe='')
        return HATENA_ENTRY_URL.format(url=e_url)

    def _request(self, url):
        """Request api.
        
        Request argument url and return result data as dict.
        """
        return requests.get(url).json()

    def _load_id(self):
        """Load feed id from database."""
        t = Table('feed', self.md)
        c_url = column('url')
        c_id = column('id')
        s = select(columns=[c_id], from_obj=t).where(c_url==self.url)
        r = s.execute().fetchone()
        if r:
            return r['id']
        return None

    def _append(self):
        """Add new feed url into database."""
        logging.info('SAVE MY FEED')
        logging.info(self.url)
        self.md.clear()
        md = MetaData(self.engine)
        t = Table('feed', md, autoload=True)
        i = insert(t).values(url=self.url,
                             title=self.title)
        i.execute()
        logging.info('----------------------')
Example #38
0
class Storage(object):
    """SQL Tabular Storage.

    It's an implementation of `jsontablescema.Storage`.

    Args:
        engine (object): SQLAlchemy engine
        dbschema (str): database schema name
        prefix (str): prefix for all buckets
        reflect_only (callable): a boolean predicate to filter
            the list of table names when reflecting
    """

    # Public

    def __init__(self, engine, dbschema=None, prefix='', reflect_only=None):

        # Set attributes
        self.__connection = engine.connect()
        self.__dbschema = dbschema
        self.__prefix = prefix
        self.__descriptors = {}
        if reflect_only is not None:
            self.__only = reflect_only
        else:
            self.__only = lambda _: True

        # Create metadata
        self.__metadata = MetaData(
            bind=self.__connection,
            schema=self.__dbschema)
        self.__reflect()

    def __repr__(self):

        # Template and format
        template = 'Storage <{engine}/{dbschema}>'
        text = template.format(
            engine=self.__connection.engine,
            dbschema=self.__dbschema)

        return text

    @property
    def buckets(self):

        # Collect
        buckets = []
        for table in self.__metadata.sorted_tables:
            bucket = mappers.tablename_to_bucket(self.__prefix, table.name)
            if bucket is not None:
                buckets.append(bucket)

        return buckets

    def create(self, bucket, descriptor, force=False, indexes_fields=None):
        """Create table by schema.

        Parameters
        ----------
        table: str/list
            Table name or list of table names.
        schema: dict/list
            JSONTableSchema schema or list of schemas.
        indexes_fields: list
            list of tuples containing field names, or list of such lists

        Raises
        ------
        RuntimeError
            If table already exists.

        """

        # Make lists
        buckets = bucket
        if isinstance(bucket, six.string_types):
            buckets = [bucket]
        descriptors = descriptor
        if isinstance(descriptor, dict):
            descriptors = [descriptor]
        if indexes_fields is None or len(indexes_fields) == 0:
            indexes_fields = [()] * len(descriptors)
        elif type(indexes_fields[0][0]) not in {list, tuple}:
            indexes_fields = [indexes_fields]
        assert len(indexes_fields) == len(descriptors)
        assert len(buckets) == len(descriptors)

        # Check buckets for existence
        for bucket in reversed(self.buckets):
            if bucket in buckets:
                if not force:
                    message = 'Bucket "%s" already exists.' % bucket
                    raise RuntimeError(message)
                self.delete(bucket)

        # Define buckets
        for bucket, descriptor, index_fields in zip(buckets, descriptors, indexes_fields):

            # Add to schemas
            self.__descriptors[bucket] = descriptor

            # Create table
            jsontableschema.validate(descriptor)
            tablename = mappers.bucket_to_tablename(self.__prefix, bucket)
            columns, constraints, indexes = mappers.descriptor_to_columns_and_constraints(
                self.__prefix, bucket, descriptor, index_fields)
            Table(tablename, self.__metadata, *(columns+constraints+indexes))

        # Create tables, update metadata
        self.__metadata.create_all()

    def delete(self, bucket=None, ignore=False):

        # Make lists
        buckets = bucket
        if isinstance(bucket, six.string_types):
            buckets = [bucket]
        elif bucket is None:
            buckets = reversed(self.buckets)

        # Iterate over buckets
        tables = []
        for bucket in buckets:

            # Check existent
            if bucket not in self.buckets:
                if not ignore:
                    message = 'Bucket "%s" doesn\'t exist.' % bucket
                    raise RuntimeError(message)

            # Remove from buckets
            if bucket in self.__descriptors:
                del self.__descriptors[bucket]

            # Add table to tables
            table = self.__get_table(bucket)
            tables.append(table)

        # Drop tables, update metadata
        self.__metadata.drop_all(tables=tables)
        self.__metadata.clear()
        self.__reflect()

    def describe(self, bucket, descriptor=None):

        # Set descriptor
        if descriptor is not None:
            self.__descriptors[bucket] = descriptor

        # Get descriptor
        else:
            descriptor = self.__descriptors.get(bucket)
            if descriptor is None:
                table = self.__get_table(bucket)
                descriptor = mappers.columns_and_constraints_to_descriptor(
                    self.__prefix, table.name, table.columns, table.constraints)

        return descriptor

    def iter(self, bucket):

        # Get result
        table = self.__get_table(bucket)
        # Streaming could be not working for some backends:
        # http://docs.sqlalchemy.org/en/latest/core/connections.html
        select = table.select().execution_options(stream_results=True)
        result = select.execute()

        # Yield data
        for row in result:
            yield list(row)

    def read(self, bucket):

        # Get rows
        rows = list(self.iter(bucket))

        return rows

    def write(self, bucket, rows):

        # Prepare
        BUFFER_SIZE = 1000
        descriptor = self.describe(bucket)
        schema = jsontableschema.Schema(descriptor)
        table = self.__get_table(bucket)

        # Write
        with self.__connection.begin():
            keyed_rows = []
            for row in rows:
                keyed_row = {}
                for index, field in enumerate(schema.fields):
                    value = row[index]
                    try:
                        value = field.cast_value(value)
                    except InvalidObjectType:
                        value = json.loads(value)
                    keyed_row[field.name] = value
                keyed_rows.append(keyed_row)
                if len(keyed_rows) > BUFFER_SIZE:
                    # Insert data
                    table.insert().execute(keyed_rows)
                    # Clean memory
                    keyed_rows = []
            if len(keyed_rows) > 0:
                # Insert data
                table.insert().execute(keyed_rows)

    # Private

    def __get_table(self, bucket):
        """Return SQLAlchemy table for the given bucket.
        """

        # Prepare name
        tablename = mappers.bucket_to_tablename(self.__prefix, bucket)
        if self.__dbschema:
            tablename = '.'.join(self.__dbschema, tablename)

        return self.__metadata.tables[tablename]

    def __reflect(self):
        def only(name, _):
            ret = (
                self.__only(name) and
                mappers.tablename_to_bucket(self.__prefix, name) is not None
            )
            return ret

        self.__metadata.reflect(only=only)
Example #39
0
class ApdbSchema(object):
    """Class for management of APDB schema.

    Attributes
    ----------
    objects : `sqlalchemy.Table`
        DiaObject table instance
    objects_nightly : `sqlalchemy.Table`
        DiaObjectNightly table instance, may be None
    objects_last : `sqlalchemy.Table`
        DiaObjectLast table instance, may be None
    sources : `sqlalchemy.Table`
        DiaSource table instance
    forcedSources : `sqlalchemy.Table`
        DiaForcedSource table instance
    visits : `sqlalchemy.Table`
        ApdbProtoVisits table instance

    Parameters
    ----------
    engine : `sqlalchemy.engine.Engine`
        SQLAlchemy engine instance
    dia_object_index : `str`
        Indexing mode for DiaObject table, see `ApdbConfig.dia_object_index`
        for details.
    dia_object_nightly : `bool`
        If `True` then create per-night DiaObject table as well.
    schema_file : `str`
        Name of the YAML schema file.
    extra_schema_file : `str`, optional
        Name of the YAML schema file with extra column definitions.
    column_map : `str`, optional
        Name of the YAML file with column mappings.
    afw_schemas : `dict`, optional
        Dictionary with table name for a key and `afw.table.Schema`
        for a value. Columns in schema will be added to standard APDB
        schema (only if standard schema does not have matching column).
    prefix : `str`, optional
        Prefix to add to all scheam elements.
    """

    # map afw type names into cat type names
    _afw_type_map = {"I": "INT",
                     "L": "BIGINT",
                     "F": "FLOAT",
                     "D": "DOUBLE",
                     "Angle": "DOUBLE",
                     "String": "CHAR",
                     "Flag": "BOOL"}
    _afw_type_map_reverse = {"INT": "I",
                             "BIGINT": "L",
                             "FLOAT": "F",
                             "DOUBLE": "D",
                             "DATETIME": "L",
                             "CHAR": "String",
                             "BOOL": "Flag"}

    def __init__(self, engine, dia_object_index, dia_object_nightly,
                 schema_file, extra_schema_file=None, column_map=None,
                 afw_schemas=None, prefix=""):

        self._engine = engine
        self._dia_object_index = dia_object_index
        self._dia_object_nightly = dia_object_nightly
        self._prefix = prefix

        self._metadata = MetaData(self._engine)

        self.objects = None
        self.objects_nightly = None
        self.objects_last = None
        self.sources = None
        self.forcedSources = None
        self.visits = None

        if column_map:
            _LOG.debug("Reading column map file %s", column_map)
            with open(column_map) as yaml_stream:
                # maps cat column name to afw column name
                self._column_map = yaml.load(yaml_stream, Loader=yaml.SafeLoader)
                _LOG.debug("column map: %s", self._column_map)
        else:
            _LOG.debug("No column map file is given, initialize to empty")
            self._column_map = {}
        self._column_map_reverse = {}
        for table, cmap in self._column_map.items():
            # maps afw column name to cat column name
            self._column_map_reverse[table] = {v: k for k, v in cmap.items()}
        _LOG.debug("reverse column map: %s", self._column_map_reverse)

        # build complete table schema
        self._schemas = self._buildSchemas(schema_file, extra_schema_file,
                                           afw_schemas)

        # map cat column types to alchemy
        self._type_map = dict(DOUBLE=self._getDoubleType(),
                              FLOAT=sqlalchemy.types.Float,
                              DATETIME=sqlalchemy.types.TIMESTAMP,
                              BIGINT=sqlalchemy.types.BigInteger,
                              INTEGER=sqlalchemy.types.Integer,
                              INT=sqlalchemy.types.Integer,
                              TINYINT=sqlalchemy.types.Integer,
                              BLOB=sqlalchemy.types.LargeBinary,
                              CHAR=sqlalchemy.types.CHAR,
                              BOOL=sqlalchemy.types.Boolean)

        # generate schema for all tables, must be called last
        self._makeTables()

    def _makeTables(self, mysql_engine='InnoDB', oracle_tablespace=None, oracle_iot=False):
        """Generate schema for all tables.

        Parameters
        ----------
        mysql_engine : `str`, optional
            MySQL engine type to use for new tables.
        oracle_tablespace : `str`, optional
            Name of Oracle tablespace, only useful with oracle
        oracle_iot : `bool`, optional
            Make Index-organized DiaObjectLast table.
        """

        info = dict(oracle_tablespace=oracle_tablespace)

        if self._dia_object_index == 'pix_id_iov':
            # Special PK with HTM column in first position
            constraints = self._tableIndices('DiaObjectIndexHtmFirst', info)
        else:
            constraints = self._tableIndices('DiaObject', info)
        table = Table(self._prefix+'DiaObject', self._metadata,
                      *(self._tableColumns('DiaObject') + constraints),
                      mysql_engine=mysql_engine,
                      info=info)
        self.objects = table

        if self._dia_object_nightly:
            # Same as DiaObject but no index
            table = Table(self._prefix+'DiaObjectNightly', self._metadata,
                          *self._tableColumns('DiaObject'),
                          mysql_engine=mysql_engine,
                          info=info)
            self.objects_nightly = table

        if self._dia_object_index == 'last_object_table':
            # Same as DiaObject but with special index
            info2 = info.copy()
            info2.update(oracle_iot=oracle_iot)
            table = Table(self._prefix+'DiaObjectLast', self._metadata,
                          *(self._tableColumns('DiaObjectLast') +
                            self._tableIndices('DiaObjectLast', info)),
                          mysql_engine=mysql_engine,
                          info=info2)
            self.objects_last = table

        # for all other tables use index definitions in schema
        for table_name in ('DiaSource', 'SSObject', 'DiaForcedSource', 'DiaObject_To_Object_Match'):
            table = Table(self._prefix+table_name, self._metadata,
                          *(self._tableColumns(table_name) +
                            self._tableIndices(table_name, info)),
                          mysql_engine=mysql_engine,
                          info=info)
            if table_name == 'DiaSource':
                self.sources = table
            elif table_name == 'DiaForcedSource':
                self.forcedSources = table

        # special table to track visits, only used by prototype
        table = Table(self._prefix+'ApdbProtoVisits', self._metadata,
                      Column('visitId', sqlalchemy.types.BigInteger, nullable=False),
                      Column('visitTime', sqlalchemy.types.TIMESTAMP, nullable=False),
                      PrimaryKeyConstraint('visitId', name=self._prefix+'PK_ApdbProtoVisits'),
                      Index(self._prefix+'IDX_ApdbProtoVisits_vTime', 'visitTime', info=info),
                      mysql_engine=mysql_engine,
                      info=info)
        self.visits = table

    def makeSchema(self, drop=False, mysql_engine='InnoDB', oracle_tablespace=None, oracle_iot=False):
        """Create or re-create all tables.

        Parameters
        ----------
        drop : `bool`, optional
            If True then drop tables before creating new ones.
        mysql_engine : `str`, optional
            MySQL engine type to use for new tables.
        oracle_tablespace : `str`, optional
            Name of Oracle tablespace, only useful with oracle
        oracle_iot : `bool`, optional
            Make Index-organized DiaObjectLast table.
        """

        # re-make table schema for all needed tables with possibly different options
        _LOG.debug("clear metadata")
        self._metadata.clear()
        _LOG.debug("re-do schema mysql_engine=%r oracle_tablespace=%r",
                   mysql_engine, oracle_tablespace)
        self._makeTables(mysql_engine=mysql_engine, oracle_tablespace=oracle_tablespace,
                         oracle_iot=oracle_iot)

        # create all tables (optionally drop first)
        if drop:
            _LOG.info('dropping all tables')
            self._metadata.drop_all()
        _LOG.info('creating all tables')
        self._metadata.create_all()

    def getAfwSchema(self, table_name, columns=None):
        """Return afw schema for given table.

        Parameters
        ----------
        table_name : `str`
            One of known APDB table names.
        columns : `list` of `str`, optional
            Include only given table columns in schema, by default all columns
            are included.

        Returns
        -------
        schema : `lsst.afw.table.Schema`
        column_map : `dict`
            Mapping of the table/result column names into schema key.
        """

        table = self._schemas[table_name]
        col_map = self._column_map.get(table_name, {})

        # make a schema
        col2afw = {}
        schema = afwTable.SourceTable.makeMinimalSchema()
        for column in table.columns:
            if columns and column.name not in columns:
                continue
            afw_col = col_map.get(column.name, column.name)
            if afw_col in schema.getNames():
                # Continue if the column is already in the minimal schema.
                key = schema.find(afw_col).getKey()
            elif column.type in ("DOUBLE", "FLOAT") and column.unit == "deg":
                #
                # NOTE: degree to radian conversion is not supported (yet)
                #
                # angles in afw are radians and have special "Angle" type
                key = schema.addField(afw_col,
                                      type="Angle",
                                      doc=column.description or "",
                                      units="rad")
            elif column.type == "BLOB":
                # No BLOB support for now
                key = None
            else:
                units = column.unit or ""
                # some units in schema are not recognized by afw but we do not care
                if self._afw_type_map_reverse[column.type] == 'String':
                    key = schema.addField(afw_col,
                                          type=self._afw_type_map_reverse[column.type],
                                          doc=column.description or "",
                                          units=units,
                                          parse_strict="silent",
                                          size=10)
                elif units == "deg":
                    key = schema.addField(afw_col,
                                          type='Angle',
                                          doc=column.description or "",
                                          parse_strict="silent")
                else:
                    key = schema.addField(afw_col,
                                          type=self._afw_type_map_reverse[column.type],
                                          doc=column.description or "",
                                          units=units,
                                          parse_strict="silent")
            col2afw[column.name] = key

        return schema, col2afw

    def getAfwColumns(self, table_name):
        """Returns mapping of afw column names to Column definitions.

        Parameters
        ----------
        table_name : `str`
            One of known APDB table names.

        Returns
        -------
        column_map : `dict`
            Mapping of afw column names to `ColumnDef` instances.
        """
        table = self._schemas[table_name]
        col_map = self._column_map.get(table_name, {})

        cmap = {}
        for column in table.columns:
            afw_name = col_map.get(column.name, column.name)
            cmap[afw_name] = column
        return cmap

    def getColumnMap(self, table_name):
        """Returns mapping of column names to Column definitions.

        Parameters
        ----------
        table_name : `str`
            One of known APDB table names.

        Returns
        -------
        column_map : `dict`
            Mapping of column names to `ColumnDef` instances.
        """
        table = self._schemas[table_name]
        cmap = {column.name: column for column in table.columns}
        return cmap

    def _buildSchemas(self, schema_file, extra_schema_file=None, afw_schemas=None):
        """Create schema definitions for all tables.

        Reads YAML schemas and builds dictionary containing `TableDef`
        instances for each table.

        Parameters
        ----------
        schema_file : `str`
            Name of YAML file with standard cat schema.
        extra_schema_file : `str`, optional
            Name of YAML file with extra table information or `None`.
        afw_schemas : `dict`, optional
            Dictionary with table name for a key and `afw.table.Schema`
            for a value. Columns in schema will be added to standard APDB
            schema (only if standard schema does not have matching column).

        Returns
        -------
        schemas : `dict`
            Mapping of table names to `TableDef` instances.
        """

        _LOG.debug("Reading schema file %s", schema_file)
        with open(schema_file) as yaml_stream:
            tables = list(yaml.load_all(yaml_stream, Loader=yaml.SafeLoader))
            # index it by table name
        _LOG.debug("Read %d tables from schema", len(tables))

        if extra_schema_file:
            _LOG.debug("Reading extra schema file %s", extra_schema_file)
            with open(extra_schema_file) as yaml_stream:
                extras = list(yaml.load_all(yaml_stream, Loader=yaml.SafeLoader))
                # index it by table name
                schemas_extra = {table['table']: table for table in extras}
        else:
            schemas_extra = {}

        # merge extra schema into a regular schema, for now only columns are merged
        for table in tables:
            table_name = table['table']
            if table_name in schemas_extra:
                columns = table['columns']
                extra_columns = schemas_extra[table_name].get('columns', [])
                extra_columns = {col['name']: col for col in extra_columns}
                _LOG.debug("Extra columns for table %s: %s", table_name, extra_columns.keys())
                columns = []
                for col in table['columns']:
                    if col['name'] in extra_columns:
                        columns.append(extra_columns.pop(col['name']))
                    else:
                        columns.append(col)
                # add all remaining extra columns
                table['columns'] = columns + list(extra_columns.values())

                if 'indices' in schemas_extra[table_name]:
                    raise RuntimeError("Extra table definition contains indices, "
                                       "merging is not implemented")

                del schemas_extra[table_name]

        # Pure "extra" table definitions may contain indices
        tables += schemas_extra.values()

        # convert all dicts into named tuples
        schemas = {}
        for table in tables:

            columns = table.get('columns', [])

            table_name = table['table']
            afw_schema = afw_schemas and afw_schemas.get(table_name)
            if afw_schema:
                # use afw schema to create extra columns
                column_names = {col['name'] for col in columns}
                column_names_lower = {col.lower() for col in column_names}
                for _, field in afw_schema:
                    column = self._field2dict(field, table_name)
                    if column['name'] not in column_names:
                        # check that there is no column name that only differs in case
                        if column['name'].lower() in column_names_lower:
                            raise ValueError("afw.table column name case does not match schema column name")
                        columns.append(column)

            table_columns = []
            for col in columns:
                # For prototype set default to 0 even if columns don't specify it
                if "default" not in col:
                    default = None
                    if col['type'] not in ("BLOB", "DATETIME"):
                        default = 0
                else:
                    default = col["default"]

                column = ColumnDef(name=col['name'],
                                   type=col['type'],
                                   nullable=col.get("nullable"),
                                   default=default,
                                   description=col.get("description"),
                                   unit=col.get("unit"),
                                   ucd=col.get("ucd"))
                table_columns.append(column)

            table_indices = []
            for idx in table.get('indices', []):
                index = IndexDef(name=idx.get('name'),
                                 type=idx.get('type'),
                                 columns=idx.get('columns'))
                table_indices.append(index)

            schemas[table_name] = TableDef(name=table_name,
                                           description=table.get('description'),
                                           columns=table_columns,
                                           indices=table_indices)

        return schemas

    def _tableColumns(self, table_name):
        """Return set of columns in a table

        Parameters
        ----------
        table_name : `str`
            Name of the table.

        Returns
        -------
        column_defs : `list`
            List of `Column` objects.
        """

        # get the list of columns in primary key, they are treated somewhat
        # specially below
        table_schema = self._schemas[table_name]
        pkey_columns = set()
        for index in table_schema.indices:
            if index.type == 'PRIMARY':
                pkey_columns = set(index.columns)
                break

        # convert all column dicts into alchemy Columns
        column_defs = []
        for column in table_schema.columns:
            kwargs = dict(nullable=column.nullable)
            if column.default is not None:
                kwargs.update(server_default=str(column.default))
            if column.name in pkey_columns:
                kwargs.update(autoincrement=False)
            ctype = self._type_map[column.type]
            column_defs.append(Column(column.name, ctype, **kwargs))

        return column_defs

    def _field2dict(self, field, table_name):
        """Convert afw schema field definition into a dict format.

        Parameters
        ----------
        field : `lsst.afw.table.Field`
            Field in afw table schema.
        table_name : `str`
            Name of the table.

        Returns
        -------
        field_dict : `dict`
            Field attributes for SQL schema:

            - ``name`` : field name (`str`)
            - ``type`` : type name in SQL, e.g. "INT", "FLOAT" (`str`)
            - ``nullable`` : `True` if column can be ``NULL`` (`bool`)
        """
        column = field.getName()
        column = self._column_map_reverse[table_name].get(column, column)
        ctype = self._afw_type_map[field.getTypeString()]
        return dict(name=column, type=ctype, nullable=True)

    def _tableIndices(self, table_name, info):
        """Return set of constraints/indices in a table

        Parameters
        ----------
        table_name : `str`
            Name of the table.
        info : `dict`
            Additional options passed to SQLAlchemy index constructor.

        Returns
        -------
        index_defs : `list`
            List of SQLAlchemy index/constraint objects.
        """

        table_schema = self._schemas[table_name]

        # convert all index dicts into alchemy Columns
        index_defs = []
        for index in table_schema.indices:
            if index.type == "INDEX":
                index_defs.append(Index(self._prefix+index.name, *index.columns, info=info))
            else:
                kwargs = {}
                if index.name:
                    kwargs['name'] = self._prefix+index.name
                if index.type == "PRIMARY":
                    index_defs.append(PrimaryKeyConstraint(*index.columns, **kwargs))
                elif index.type == "UNIQUE":
                    index_defs.append(UniqueConstraint(*index.columns, **kwargs))

        return index_defs

    def _getDoubleType(self):
        """DOUBLE type is database-specific, select one based on dialect.

        Returns
        -------
        type_object : `object`
            Database-specific type definition.
        """
        if self._engine.name == 'mysql':
            from sqlalchemy.dialects.mysql import DOUBLE
            return DOUBLE(asdecimal=False)
        elif self._engine.name == 'postgresql':
            from sqlalchemy.dialects.postgresql import DOUBLE_PRECISION
            return DOUBLE_PRECISION
        elif self._engine.name == 'oracle':
            from sqlalchemy.dialects.oracle import DOUBLE_PRECISION
            return DOUBLE_PRECISION
        elif self._engine.name == 'sqlite':
            # all floats in sqlite are 8-byte
            from sqlalchemy.dialects.sqlite import REAL
            return REAL
        else:
            raise TypeError('cannot determine DOUBLE type, unexpected dialect: ' + self._engine.name)
Example #40
0
File: db.py Project: zirmite/nimi
from sqlalchemy.engine.url import URL
from sqlalchemy import bindparam
import pymongo as pym
from bson.objectid import ObjectId

client = pym.MongoClient()
dbm = client.names
htmltab = dbm.html
infotab = dbm.info
thmcoll = dbm.themes
assocoll = dbm.assoc

dbsql = URL(drivername="mysql+pymysql", database='babynames', query={'read_default_file' : '/Users/azirm/.my.cnf', 'read_default_group' : 'python', 'use_unicode': 1, 'charset': 'utf8'})
eng = create_engine(name_or_url=dbsql)
meta = MetaData(bind=eng)
meta.clear()
meta.reflect()

boynames = meta.tables['boynames']
girlnames = meta.tables['girlnames']
ntables = [boynames, girlnames]
sels = [sql.select([ntable.c.name, ntable.c.htmlid]).where(ntable.c.htmlid!=None) for ntable in ntables]

def mktable(tabledef):

	meta.clear()
	meta.reflect()
	if tabledef.name not in meta.tables:
		tabi = tabledef
		evalstr = 1
		meta.create_all()
class DB(Base):
    # Constants: connection level
    NONE = 0  # No connection; just set self.url
    CONNECT = 1   # Connect; no transaction
    TXN = 2   # Everything in a transaction

    level = TXN

    def _engineInfo(self, url=None):
        if url is None: 
            url = self.url
        return url

    def _setup(self, url):
        self._connect(url)
        # make sure there are no tables lying around
        meta = MetaData(self.engine)
        meta.reflect()
        meta.drop_all()

    def _teardown(self):
        self._disconnect()

    def _connect(self, url):
        self.url = url
        # TODO: seems like 0.5.x branch does not work with engine.dispose and staticpool
        #self.engine = create_engine(url, echo=True, poolclass=StaticPool)
        self.engine = create_engine(url, echo=True)
        # silence the logger added by SA, nose adds its own!
        logging.getLogger('sqlalchemy').handlers=[]
        self.meta = MetaData(bind=self.engine)
        if self.level < self.CONNECT:
            return
        #self.session = create_session(bind=self.engine)
        if self.level < self.TXN: 
            return
        #self.txn = self.session.begin()

    def _disconnect(self):
        if hasattr(self, 'txn'):
            self.txn.rollback()
        if hasattr(self, 'session'):
            self.session.close()
        #if hasattr(self,'conn'):
        #    self.conn.close()
        self.engine.dispose()

    def _supported(self, url):
        db = url.split(':',1)[0]
        func = getattr(self, self._TestCase__testMethodName)
        if hasattr(func, 'supported'):
            return db in func.supported
        if hasattr(func, 'not_supported'):
            return not (db in func.not_supported)
        # Neither list assigned; assume all are supported
        return True

    def _not_supported(self, url):
        return not self._supported(url)

    def _select_row(self):
        """Select rows, used in multiple tests"""
        return self.table.select().execution_options(
            autocommit=True).execute().fetchone()

    def refresh_table(self, name=None):
        """Reload the table from the database
        Assumes we're working with only a single table, self.table, and
        metadata self.meta

        Working w/ multiple tables is not possible, as tables can only be
        reloaded with meta.clear()
        """
        if name is None:
            name = self.table.name
        self.meta.clear()
        self.table = Table(name, self.meta, autoload=True)

    def compare_columns_equal(self, columns1, columns2, ignore=None):
        """Loop through all columns and compare them"""
        def key(column):
            return column.name
        for c1, c2 in zip(sorted(columns1, key=key), sorted(columns2, key=key)):
            diffs = ColumnDelta(c1, c2).diffs
            if ignore:
                for key in ignore:
                    diffs.pop(key, None)
            if diffs:
                self.fail("Comparing %s to %s failed: %s" % (columns1, columns2, diffs))
Example #42
0
class Storage(base.Storage):
    """SQL Tabular Storage.

    Parameters
    ----------
    engine: object
        SQLAlchemy engine.
    dbschema: str
        Database schema name.
    prefix: str
        Prefix for all tables.

    """

    # Public

    def __init__(self, engine, dbschema=None, prefix=''):

        # Set attributes
        self.__engine = engine
        self.__dbschema = dbschema
        self.__prefix = prefix
        self.__schemas = {}

        # Create metadata
        self.__metadata = MetaData(
                bind=self.__engine,
                schema=self.__dbschema,
                reflect=True)

    def __repr__(self):

        # Template and format
        template = 'Storage <{engine}/{dbschema}>'
        text = template.format(
                engine=self.__engine,
                dbschema=self.__dbschema)

        return text

    @property
    def tables(self):
        """Return list of storage's table names.
        """

        # Collect
        tables = []
        for dbtable in self.__metadata.sorted_tables:
            table = dbtable.name
            table = mappers.restore_table(self.__prefix, table)
            if table is not None:
                tables.append(table)

        return tables

    def check(self, table):
        """Return if table exists.
        """

        # Check existence
        existence = table in self.tables

        return existence

    def create(self, table, schema):
        """Create table by schema.

        Parameters
        ----------
        table: str/list
            Table name or list of table names.
        schema: dict/list
            JSONTableSchema schema or list of schemas.

        Raises
        ------
        RuntimeError
            If table already exists.

        """

        # Make lists
        tables = table
        if isinstance(table, six.string_types):
            tables = [table]
        schemas = schema
        if isinstance(schema, dict):
            schemas = [schema]

        # Check tables for existence
        for table in tables:
            if self.check(table):
                message = 'Table "%s" already exists.' % table
                raise RuntimeError(message)

        # Define tables
        for table, schema in zip(tables, schemas):

            # Add to schemas
            self.__schemas[table] = schema

            # Crate sa table
            table = mappers.convert_table(self.__prefix, table)
            jsontableschema.validate(schema)
            columns, constraints = mappers.convert_schema(
                    self.__prefix, table, schema)
            Table(table, self.__metadata, *(columns+constraints))

        # Create tables, update metadata
        self.__metadata.create_all()
        # Metadata reflect is auto

    def delete(self, table):
        """Delete table.

        Parameters
        ----------
        table: str/list
            Table name or list of table names.

        Raises
        ------
        RuntimeError
            If table doesn't exist.

        """

        # Make lists
        tables = table
        if isinstance(table, six.string_types):
            tables = [table]

        # Iterate over tables
        dbtables = []
        for table in tables:

            # Check existent
            if not self.check(table):
                message = 'Table "%s" doesn\'t exist.' % self
                raise RuntimeError(message)

            # Remove from schemas
            if table in self.__schemas:
                del self.__schemas[table]

            # Add table to dbtables
            dbtable = self.__get_dbtable(table)
            dbtables.append(dbtable)

        # Drop tables, update metadata
        self.__metadata.drop_all(tables=dbtables)
        self.__metadata.clear()
        self.__metadata.reflect()

    def describe(self, table):
        """Return table's JSONTableSchema schema.

        Parameters
        ----------
        table: str
            Table name.

        Returns
        -------
        dict
            JSONTableSchema schema.

        """

        # Get schema
        if table in self.__schemas:
            schema = self.__schemas[table]
        else:
            dbtable = self.__get_dbtable(table)
            table = mappers.convert_table(self.__prefix, table)
            schema = mappers.restore_schema(
                    self.__prefix, table, dbtable.columns, dbtable.constraints)

        return schema

    def read(self, table):
        """Read data from table.

        Parameters
        ----------
        table: str
            Table name.

        Returns
        -------
        generator
            Data tuples generator.

        """

        # Get result
        dbtable = self.__get_dbtable(table)
        result = dbtable.select().execute()

        # Yield data
        for row in result:
            yield row

    def write(self, table, data):
        """Write data to table.

        Parameters
        ----------
        table: str
            Table name.
        data: list
            List of data tuples.

        """

        # Process data
        schema = self.describe(table)
        model = SchemaModel(schema)
        cdata = []
        for row in data:
            rdata = {}
            for index, field in enumerate(model.fields):
                value = row[index]
                try:
                    value = model.cast(field['name'], value)
                except InvalidObjectType as exception:
                    value = json.loads(value)
                rdata[field['name']] = value
            cdata.append(rdata)

        # Insert data
        dbtable = self.__get_dbtable(table)
        dbtable.insert().execute(cdata)

    # Private

    def __get_dbtable(self, table):
        """Return dbtable instance from metadata.
        """

        # Prepare dict key
        key = mappers.convert_table(self.__prefix, table)
        if self.__dbschema:
            # TODO: Start to test dbschema parameter
            key = '.'.join(self.__dbschema, key)  # pragma: no cover

        return self.__metadata.tables[key]
Example #43
0
class Database():
    '''A wrapper around a bsddb database, acting as a dictionary.
    Can accept all Python datatypes as keys and values. All bencoding types
    are just dictionaries, lists, integers and strings, so this is enough!'''

    def __init__(self, dbname, flag='c'):
        '''Read the database given by dbname. '''
        dbPath = dbname
        engine = create_engine("sqlite:///%s" % dbPath, echo=True)
        self.data = MetaData(engine)
        print self.data

    def __contains__(self, key):
        '''Return true if database contains a key'''
        key = dumps(key)
        # has_key returns 1 or 0 (but shouldn't this just by key in data?)
        boolean = self.data.has_key(key)
        return bool(boolean)

    def __getitem__(self, key):
        '''Return the value held by the key'''
        key = dumps(key)
        value = self.data[key]
        return loads(value)

    has_key = __contains__
    get = __getitem__

    def __setitem__(self, key, value):
        '''Set the value of key to the value given. '''
        key = dumps(key)
        value = dumps(value)
        self.data[key] = value

    def __repr__(self):
        '''represents the database'''
        keys = self.data.keys()
        items = [(loads(key), loads(self.data[key])) for key in keys]
        return str(dict(items))

    def clear(self):
        '''Remove all data in the database. '''
        self.data.clear()

    def items(self):
        '''Return a list of tuples of the keys and values '''
        keys = self.data.keys()
        items = [(loads(key), loads(self.data[key])) for key in keys]
        return items

    def values(self):
        '''Returns a list of values '''
        values = [loads(value) for value in self.data.values()]
        return values

    def pop(self, key):
        '''Return the value held by key, or default
           if it isn't in the database '''
        key = dumps(key)
        value = self.data[key]
        del self.data[key]
        return loads(value)

    def setdefault(self, key, default):
        '''Return the value held by key, or default if isn't in the database'''
        key = dumps(key)
        try:
            value = self.data[key]
        except KeyError:
            return default
        return loads(value)

    def __del__(self):
        '''Syncs database'''
        self.data.sync()
class Analytics(object):
    """
    Analytics App
    """
    def __init__(self, app=None, db=None, blueprints=None):
        """
        Create a new instance of the analyzer.
        :param app: The Flask App instance
        :param db: A Sqlalchemy instance, e.g. db = Sqlalchemy(app)
        :param blueprints:  A whitelist for blueprints that should be tracked.
                            If this attribute is set only views whose blueprint name matches any of the names inside the
                            blueprints list will be tracked. To track all requests set this attribute to None.
                            Example whitelist could be: blueprints = ['main', 'admin'].
        """
        self.app = None
        self.db = None
        self.table = None
        self.metadata = None
        self.table_name = 'analytics_data'
        self.engine = None
        self.blueprints = None
        if app and db:
            self.init_app(app, db, blueprints)

    def init_app(self, app, db, blueprints=None):
        """
        Initializes an existing instance. Useful when using the application factory pattern.
        :param app: The Flask App instance
        :param db: A Sqlalchemy instance, e.g. db = Sqlalchemy(app)
        :param blueprints:  A whitelist for blueprints that should be tracked.
                            If this attribute is set only views whose blueprint name matches any of the names inside the
                            blueprints list will be tracked. To track all requests set this attribute to None.
                            Example whitelist could be: blueprints = ['main', 'admin'].
        """
        if not app or not db:
            raise ValueError(
                "Flask App instance and sqlalchemy db object are required")
        self.app = app
        self.db = db

        self.blueprints = blueprints

        # check if table exists on first startup or create it
        # NOTE: The table cannot be altered yet.
        # So when you change the table layout changes it is necessary to write a custom migration

        with self.app.app_context():
            self.engine = db.engine
            self.metadata = MetaData(db.engine)
            if not self.engine.dialect.has_table(self.engine, self.table_name):
                self._create_table()
            else:
                self.metadata.reflect(bind=self.engine)
                self.table = self.metadata.tables[self.table_name]

        # register event hooks
        app.before_request(self.before_request)
        app.after_request(self.after_request)

    def _drop_table(self):
        """
        Drops the database
        :return:
        """
        self.table.drop(checkfirst=True)

    def _create_table(self):
        self.table = Table(self.table_name, self.metadata,
                           Column('id', Integer, primary_key=True),
                           Column('url', String(128)),
                           Column('user_agent', String(256)),
                           Column('view_args', String(128)),
                           Column('status_code', Integer),
                           Column('path', String(64)),
                           Column('latency', Float),
                           Column('timestamp', DateTime),
                           Column('request', String(64)),
                           Column('url_args', String(64)),
                           Column('ua_browser', String(16)),
                           Column('ua_language', String(16)),
                           Column('ua_platform', String(16)),
                           Column('ua_version', String(16)),
                           Column('referer', String(64)),
                           Column('uuid', String, default=0))
        self.table.create(bind=self.engine)

    def reinitialize_db(self):
        self._drop_table()
        self.metadata.clear()
        self._create_table()

    def store_record(self, record: AnalyticsRecord):
        """
        Store a record to the database.
        """
        with self.engine.begin() as conn:
            stmt = self.table.insert().values(
                url=str(record.url)[:128],
                uuid=record.uid,
                ua_browser=str(getattr(record.user_agent, 'browser',
                                       '-'))[:16],
                ua_language=str(getattr(record.user_agent, 'language',
                                        '-'))[:16],
                ua_platform=str(getattr(record.user_agent, 'platform',
                                        '-'))[:16],
                ua_version=str(getattr(record.user_agent, 'version',
                                       '-'))[:16],
                user_agent=str(record.user_agent),
                view_args=json.dumps(record.view_args)[:64],
                status_code=record.status_code,
                path=str(record.path)[:64],
                latency=record.latency,
                request=str(record.request)[:64],
                timestamp=record.timestamp,
                referer=str(record.referer)[:64])
            # catch errors in order to prevent an app crash and pass the message to the app´s logger
            try:
                conn.execute(stmt)
            except Exception as e:
                self.app.logger.error(e)

    @property
    def query(self):
        """
        Query the analytics database table
        :return: A Sqlalchemy.BaseQuery instance that can be used just a like a normal sqlalchemy query
        """
        return self.db.session.query(self.table)

    def before_request(self):
        """ Only used to store the time when a request is first issued to be able to measure latency"""
        g.start_time = dt.datetime.utcnow()

        # create a uuid to identify a client during it´s session
        # this is way faster than hashing the user_agent
        if 'UUID' not in session.keys():
            session['UUID'] = str(uuid.uuid4())

    def after_request(self, response):
        """
        Store all information about the request
        :param response: pass the response object without touching it
        :return: original Flask response
        """
        ctx = _request_ctx_stack.top

        if self.blueprints:
            if ctx.request.blueprint not in self.blueprints:
                return response

        t_0 = getattr(g, 'start_time', dt.datetime.utcnow())
        record = AnalyticsRecord(
            uid=session.get('UUID', default=0),
            url=ctx.request.url,
            user_agent=ctx.request.user_agent,
            view_args=ctx.request.view_args,
            status_code=response.status_code,
            path=ctx.request.path,
            latency=(dt.datetime.utcnow() - t_0).microseconds / 100000,
            timestamp=t_0,
            content_length=response.content_length,
            request=
            f"{ctx.request.method}{ctx.request.url}{ctx.request.environ.get('SERVER_PROTOCOL')}",
            url_args=dict([(k, ctx.request.args[k])
                           for k in ctx.request.args]),
            referer=request.headers.get("Referer"))

        self.store_record(record)
        return response

    # SOME USEFUL PREDEFINED QUERIES

    def query_between(self, from_=None, until=None):
        """
        Query the analytics table. By using db.session.query(...) it is possible to use the built in pagination!
        :param from_: datetime.datetime object specifying the earliest date that should be included
        :param until: datetime.datetime object specifying the latest date that should be included
        :return: BaseQuery
        """
        if from_ is None:
            from_ = dt.datetime.utcnow()
        if until is None:
            until = dt.datetime(1970, 1, 1)

        return self.query.filter(self.table.c.timestamp.between(until, from_)) \
            .order_by(self.table.c.timestamp.desc())

    def total_unique_visits(self):
        return self.db.session.query(func.count(distinct(
            self.table.c.uuid))).scalar()

    def total_unique_visits_during(self, from_=None, until=None):
        if from_ is None:
            from_ = dt.datetime.utcnow()
        if until is None:
            until = dt.datetime(1970, 1, 1)

        return self.db.session.query(func.count(distinct(self.table.c.uuid))) \
            .filter(self.table.c.timestamp.between(until, from_)) \
            .scalar()

    def top_page(self):
        from sqlalchemy import desc
        return self.db.session.query(func.count(self.table.c.path)
                                     .label('count'), self.table.c.path) \
            .group_by(self.table.c.path) \
            .order_by(desc('count')).first()