class TestMetaData(unittest.TestCase): """Test case that provides setUp and tearDown of metadata separated from the camelot default metadata. This can be used to setup and test various model configurations that dont interfer with eachother. """ def setUp(self): from sqlalchemy import MetaData from sqlalchemy.ext.declarative import declarative_base self.metadata = MetaData() self.class_registry = dict() self.Entity = declarative_base(cls=EntityBase, metadata=self.metadata, metaclass=EntityMeta, class_registry=self.class_registry, constructor=None, name='Entity') self.metadata.bind = 'sqlite://' self.session = Session() def create_all(self): from camelot.core.orm import process_deferred_properties process_deferred_properties(self.class_registry) self.metadata.create_all() def tearDown(self): self.metadata.drop_all() self.metadata.clear()
class Recommend(object): def __init__(self, engine): self.engine = engine self.md = MetaData(self.engine) def select(self): rank_urls = [] for rec in self._load_top(): rank_urls.append((rec["title"], rec["url"], rec["id"])) return rank_urls def _load_top(self, num=5): """Load top recommend url""" self.md.clear() my_bookmark = Table('my_bookmark', self.md, Column('url_id')) bookmark = Table('bookmark', self.md, Column('url_id')) feed = Table('feed', self.md, Column('id'), Column('title'), Column('url')) notification = Table('notification', self.md, Column('url_id')) j1 = join(bookmark, feed, bookmark.c.url_id == feed.c.id) j2 = j1.join(my_bookmark, bookmark.c.url_id == my_bookmark.c.url_id, isouter=True) j3 = j2.join(notification, notification.c.url_id == bookmark.c.url_id, isouter=True) s = select(columns=[feed.c.id, feed.c.url, feed.c.title]).\ select_from(j3).where(my_bookmark.c.url_id == None).\ where(notification.c.url_id == None).\ group_by(bookmark.c.url_id).\ having(count(bookmark.c.url_id)).\ order_by(count(bookmark.c.url_id).desc()).\ limit(num) #print(s) ### For debug return s.execute()
class TestMetaData( unittest.TestCase ): """Test case that provides setUp and tearDown of metadata separated from the camelot default metadata. This can be used to setup and test various model configurations that dont interfer with eachother. """ def setUp(self): from sqlalchemy import MetaData from sqlalchemy.ext.declarative import declarative_base self.metadata = MetaData() self.class_registry = dict() self.Entity = declarative_base( cls = EntityBase, metadata = self.metadata, metaclass = EntityMeta, class_registry = self.class_registry, constructor = None, name = 'Entity' ) self.metadata.bind = 'sqlite://' self.session = Session() def create_all(self): from camelot.core.orm import process_deferred_properties process_deferred_properties( self.class_registry ) self.metadata.create_all() def tearDown(self): self.metadata.drop_all() self.metadata.clear()
def create_db_and_mapper(): """Creates a mapper from desired model into SQLAlchemy""" from sqlalchemy.orm import ( mapper, clear_mappers, ) from sqlalchemy import ( MetaData, Table, Column, Integer, String, ) metadata = MetaData() metadata.clear() order_lines = Table( "order_lines", metadata, Column("id", Integer, primary_key=True, autoincrement=True), Column("sku", String), Column("qty", Integer), Column("ref", String), ) metadata.create_all(get_engine()) clear_mappers() mapper(models.OrderLine, order_lines)
def test_explicit_default_schema_metadata(self): engine = testing.db if testing.against("sqlite"): # Works for CREATE TABLE main.foo, SELECT FROM main.foo, etc., # but fails on: # FOREIGN KEY(col2) REFERENCES main.table1 (col1) schema = "main" else: schema = engine.dialect.default_schema_name assert bool(schema) metadata = MetaData(engine, schema=schema) table1 = Table("table1", metadata, Column("col1", sa.Integer, primary_key=True), test_needs_fk=True) table2 = Table( "table2", metadata, Column("col1", sa.Integer, primary_key=True), Column("col2", sa.Integer, sa.ForeignKey("table1.col1")), test_needs_fk=True, ) try: metadata.create_all() metadata.create_all(checkfirst=True) assert len(metadata.tables) == 2 metadata.clear() table1 = Table("table1", metadata, autoload=True) table2 = Table("table2", metadata, autoload=True) assert len(metadata.tables) == 2 finally: metadata.drop_all()
def test_attached_as_schema(self): cx = testing.db.connect() try: cx.execute('ATTACH DATABASE ":memory:" AS test_schema') dialect = cx.dialect assert dialect.get_table_names(cx, 'test_schema') == [] meta = MetaData(cx) Table('created', meta, Column('id', Integer), schema='test_schema') alt_master = Table('sqlite_master', meta, autoload=True, schema='test_schema') meta.create_all(cx) eq_(dialect.get_table_names(cx, 'test_schema'), ['created']) assert len(alt_master.c) > 0 meta.clear() reflected = Table('created', meta, autoload=True, schema='test_schema') assert len(reflected.c) == 1 cx.execute(reflected.insert(), dict(id=1)) r = cx.execute(reflected.select()).fetchall() assert list(r) == [(1, )] cx.execute(reflected.update(), dict(id=2)) r = cx.execute(reflected.select()).fetchall() assert list(r) == [(2, )] cx.execute(reflected.delete(reflected.c.id == 2)) r = cx.execute(reflected.select()).fetchall() assert list(r) == [] # note that sqlite_master is cleared, above meta.drop_all() assert dialect.get_table_names(cx, 'test_schema') == [] finally: cx.execute('DETACH DATABASE test_schema')
def test_schema_collection_remove_all(self): metadata = MetaData() t1 = Table('t1', metadata, Column('x', Integer), schema='foo') t2 = Table('t2', metadata, Column('x', Integer), schema='bar') metadata.clear() eq_(metadata._schemas, set()) eq_(len(metadata.tables), 0)
def drop_all_tables(engine): """ Fix to enable SQLAlchemy to drop tables even if it didn't know about it. :param engine: :return: """ meta = MetaData(engine) meta.reflect() meta.clear() meta.reflect() meta.drop_all()
def parse(self, engine): meta = MetaData(bind=engine) meta.clear() meta.reflect(schema=self.name) for table_ref in meta.sorted_tables: if table_ref.schema != self.name: # This is a table imported through a foreign key continue table_name = table_ref.name self.tables[table_name] = Table(table_name, self) self.tables[table_name].parse(table_ref, engine)
class Notification(object): def __init__(self, engine): self.engine = engine self.md = MetaData(self.engine) def add_as_notified(self, url_id): self.md.clear() md = MetaData(self.engine) t = Table('notification', md, autoload=True) i = insert(t).values(url_id=url_id, notified_date=datetime.now().strftime('%Y%m%d')) i.execute()
def downgrade(migrate_engine): # Operations to reverse the above upgrade go here. meta = MetaData() meta.bind = migrate_engine new_quotas = quotas_table(meta) assert_new_quotas_have_no_active_duplicates(migrate_engine, new_quotas) old_quotas = old_style_quotas_table(meta, 'quotas_old') old_quotas.create() convert_backward(migrate_engine, old_quotas, new_quotas) new_quotas.drop() # clear metadata to work around this: # http://code.google.com/p/sqlalchemy-migrate/issues/detail?id=128 meta.clear() old_quotas = quotas_table(meta, 'quotas_old') old_quotas.rename('quotas')
def _setup_db(self): Base = declarative_base() metadata = MetaData() engine = create_engine(os.environ.get('DATABASE_URL')) self.connection = engine.connect() metadata.bind = engine metadata.clear() self.upload_history = Table( 'recording_upload_history', metadata, Column('id', Integer, primary_key=True), Column('topic', String(512)), Column('meeting_id', String(512)), Column('recording_id', String(512)), Column('meeting_uuid', String(512)), # Column('meeting_link', String(512)), Column('start_time', String(512)), Column('file_name', String(512)), Column('file_size', Integer), Column('cnt_files', Integer), Column('recording_link', String(512)), Column('folder_link', String(512)), Column('status', String(256)), Column('message', Text), Column('run_at', String(256)), ) self.upload_status = Table( 'meeting_upload_status', metadata, Column('id', Integer, primary_key=True), Column('topic', String(512)), Column('meeting_id', String(512)), Column('meeting_uuid', String(512)), # Column('meeting_link', String(512)), Column('start_time', String(512)), Column('folder_link', String(512)), Column('cnt_files', Integer), Column('status', Boolean), Column('is_deleted', Boolean), Column('run_at', String(256)), ) metadata.create_all()
def upgrade(migrate_engine): # Upgrade operations go here. Don't create your own engine; # bind migrate_engine to your metadata meta = MetaData() meta.bind = migrate_engine old_quotas = quotas_table(meta) assert_old_quotas_have_no_active_duplicates(migrate_engine, old_quotas) new_quotas = new_style_quotas_table(meta, 'quotas_new') new_quotas.create() convert_forward(migrate_engine, old_quotas, new_quotas) old_quotas.drop() # clear metadata to work around this: # http://code.google.com/p/sqlalchemy-migrate/issues/detail?id=128 meta.clear() new_quotas = quotas_table(meta, 'quotas_new') new_quotas.rename('quotas')
class User(object): """User class.""" def __init__(self, engine, name): self.engine = engine self.md = MetaData(self.engine) self.name = name @property def id(self): """Load id.""" logging.debug('Fetch id') n = self._load_user_no() if n: return n return self._append_user() def _append_user(self): """Add new recommend user.""" self.md.clear() t = Table('user', self.md, autoload=True) i = insert(t).values(name=self.name) i.execute() # TODO: Change logic. _id = self._load_user_no() logging.info('Add new user(id={}, name={}).'.format(_id, self.name)) return _id def _load_user_no(self): """Load user_no.""" self.md.clear() t = Table('user', self.md, autoload=True) c_name = column('name') s = select(columns=[column('id')], from_obj=t).where(c_name==self.name) r = s.execute().fetchone() _id = None if r: _id = r['id'] logging.info('Load user id(name={}, id={}).'.format(self.name, _id)) return _id
class User(object): """User class.""" def __init__(self, engine, name): self.engine = engine self.md = MetaData(self.engine) self.name = name @property def id(self): """Load id.""" logging.debug('Fetch id') n = self._load_user_no() if n: return n return self._append_user() def _append_user(self): """Add new recommend user.""" self.md.clear() t = Table('user', self.md, autoload=True) i = insert(t).values(name=self.name) i.execute() # TODO: Change logic. _id = self._load_user_no() logging.info('Add new user(id={}, name={}).'.format(_id, self.name)) return _id def _load_user_no(self): """Load user_no.""" self.md.clear() t = Table('user', self.md, autoload=True) c_name = column('name') s = select(columns=[column('id')], from_obj=t).where(c_name == self.name) r = s.execute().fetchone() _id = None if r: _id = r['id'] logging.info('Load user id(name={}, id={}).'.format(self.name, _id)) return _id
class DatabaseCtx(object): def __init__(self, dbname): if dbname not in DATABASE: raise RuntimeError("Database name \"%s\" not known" % dbname) self.db_info = DATABASE[dbname] if truthy(self.db_info.get('sql_tracing')): dblog.setLevel(logging.INFO) self.eng = create_engine(self.db_info['connect_str']) self.conn = self.eng.connect() self.meta = MetaData(self.conn) self.meta.reflect() def create_schema(self, tables = None, dryrun = False, force = False): if len(self.meta.tables) > 0: raise RuntimeError("Schema must be empty, create is aborted") schema.load_schema(self.meta) if not tables or tables == 'all': self.meta.create_all() else: to_create = [self.meta.tables[tab] for tab in tables.split(',')] self.meta.create_all(tables=to_create) def drop_schema(self, tables = None, dryrun = False, force = False): if len(self.meta.tables) == 0: raise RuntimeError("Schema is empty, nothing to drop") if not tables or tables == 'all': if not force: raise RuntimeError("Force must be specified if no list of tables given") self.meta.drop_all() self.meta.clear() else: to_drop = [self.meta.tables[tab] for tab in tables.split(',')] self.meta.drop_all(tables=to_drop) self.meta.clear() self.meta.reflect() def get_table(self, name): return self.meta.tables[name]
def test_explicit_default_schema(self): engine = testing.db if testing.against('mysql'): schema = testing.db.url.database elif testing.against('postgres'): schema = 'public' elif testing.against('sqlite'): # Works for CREATE TABLE main.foo, SELECT FROM main.foo, etc., # but fails on: # FOREIGN KEY(col2) REFERENCES main.table1 (col1) schema = 'main' else: schema = engine.dialect.get_default_schema_name(engine.connect()) metadata = MetaData(engine) table1 = Table('table1', metadata, Column('col1', sa.Integer, primary_key=True), test_needs_fk=True, schema=schema) table2 = Table('table2', metadata, Column('col1', sa.Integer, primary_key=True), Column('col2', sa.Integer, sa.ForeignKey('%s.table1.col1' % schema)), test_needs_fk=True, schema=schema) try: metadata.create_all() metadata.create_all(checkfirst=True) assert len(metadata.tables) == 2 metadata.clear() table1 = Table('table1', metadata, autoload=True, schema=schema) table2 = Table('table2', metadata, autoload=True, schema=schema) assert len(metadata.tables) == 2 finally: metadata.drop_all()
class MysqlTopicStorage(TopicStorageInterface): def __init__(self, client, storage_template): self.engine = client self.storage_template = storage_template self.insp = inspect(client) self.metadata = MetaData() self.lock = threading.RLock() log.info("mysql template initialized") def get_topic_table_by_name(self, table_name): self.lock.acquire() try: table = Table(table_name, self.metadata, extend_existing=False, autoload=True, autoload_with=self.engine) return table finally: self.lock.release() def build_mysql_where_expression(self, table, where): for key, value in where.items(): if key == "and" or key == "or": result_filters = self.get_result_filters(table, value) if key == "and": return and_(*result_filters) if key == "or": return or_(*result_filters) else: if isinstance(value, dict): for k, v in value.items(): if k == "=": return table.c[key.lower()] == v if k == "!=": return operator.ne(table.c[key.lower()], v) if k == "like": if v != "" or v != '' or v is not None: return table.c[key.lower()].like("%" + v + "%") if k == "in": if isinstance(table.c[key.lower()].type, JSON): stmt = "" if isinstance(v, list): # value_ = ",".join(v) for item in v: if stmt == "": stmt = "JSON_CONTAINS(" + key.lower( ) + ", '[\"" + item + "\"]', '$') = 1" else: stmt = stmt + " or JSON_CONTAINS(" + key.lower( ) + ", '[\"" + item + "\"]', '$') = 1 " else: value_ = v stmt = "JSON_CONTAINS(" + key.lower( ) + ", '[\"" + value_ + "\"]', '$') = 1" return text(stmt) else: if isinstance(v, list): return table.c[key.lower()].in_(v) elif isinstance(v, str): v_list = v.split(",") return table.c[key.lower()].in_(v_list) else: raise TypeError( "operator in, the value \"{0}\" is not list or str" .format(v)) if k == "not-in": if isinstance(table.c[key.lower()].type, JSON): if isinstance(v, list): value_ = ",".join(v) else: value_ = v stmt = "JSON_CONTAINS(" + key.lower( ) + ", '[\"" + value_ + "\"]', '$') = 0" return text(stmt) else: if isinstance(v, list): return table.c[key.lower()].notin_(v) elif isinstance(v, str): v_list = ",".join(v) return table.c[key.lower()].notin_(v_list) else: raise TypeError( "operator not_in, the value \"{0}\" is not list or str" .format(v)) if k == ">": return table.c[key.lower()] > v if k == ">=": return table.c[key.lower()] >= v if k == "<": return table.c[key.lower()] < v if k == "<=": return table.c[key.lower()] <= v if k == "between": if (isinstance(v, tuple)) and len(v) == 2: return table.c[key.lower()].between(v[0], v[1]) else: return table.c[key.lower()] == value def get_result_filters(self, table, value): if isinstance(value, list): result_filters = [] for express in value: result = self.build_mysql_where_expression(table, express) result_filters.append(result) return result_filters else: return [] # @staticmethod def build_mysql_updates_expression(self, table, updates, stmt_type: str) -> dict: if stmt_type == "insert": new_updates = {} for key in table.c.keys(): if key == "id_": new_updates[key] = get_int_surrogate_key() elif key == "version_": new_updates[key] = 0 else: if isinstance(table.c[key].type, JSON): if updates.get(key) is not None: new_updates[key] = updates.get(key) else: new_updates[key] = None else: if updates.get(key) is not None: value_ = updates.get(key) if isinstance(value_, dict): for k, v in value_.items(): if k == "_sum": new_updates[key.lower()] = v elif k == "_count": new_updates[key.lower()] = v elif k == "_avg": pass # todo else: new_updates[key] = value_ else: default_value = self.get_table_column_default_value( table.name, key) if default_value is not None: value_ = default_value.strip("'").strip(" ") if value_.isdigit(): new_updates[key] = Decimal(value_) else: new_updates[key] = value_ else: new_updates[key] = None return new_updates elif stmt_type == "update": new_updates = {} for key in table.c.keys(): if key == "version_": new_updates[key] = updates.get(key) + 1 else: if isinstance(table.c[key].type, JSON): if updates.get(key) is not None: new_updates[key] = updates.get(key) else: if updates.get(key) is not None: value_ = updates.get(key) if isinstance(value_, dict): for k, v in value_.items(): if k == "_sum": new_updates[key.lower()] = text( f'{key.lower()} + {v}') elif k == "_count": new_updates[key.lower()] = text( f'{key.lower()} + {v}') elif k == "_avg": pass # todo else: new_updates[key] = value_ return new_updates @staticmethod def build_mysql_order(table, order_: list): result = [] if order_ is None: return result else: for item in order_: if isinstance(item, tuple): if item[1] == "desc": new_ = desc(table.c[item[0].lower()]) result.append(new_) if item[1] == "asc": new_ = asc(table.c[item[0].lower()]) result.append(new_) return result def clear_metadata(self): self.metadata.clear() ''' topic data interface ''' def drop_(self, topic_name): return self.drop_topic_data_table(topic_name) def drop_topic_data_table(self, topic_name): table_name = 'topic_' + topic_name try: table = self.get_topic_table_by_name(table_name) table.drop(self.engine) except NoSuchTableError: log.warning("drop table \"{0}\" not existed".format(table_name)) def topic_data_delete_(self, where, topic_name): table_name = 'topic_' + topic_name table = self.get_topic_table_by_name(table_name) if where is None: stmt = delete(table) else: stmt = delete(table).where( self.build_mysql_where_expression(table, where)) with self.engine.connect() as conn: with conn.begin(): conn.execute(stmt) @staticmethod def build_stmt(stmt_type, table_name, table): key = stmt_type + "-" + table_name result = cacheman[STMT].get(key) if result is not None: return result else: if stmt_type == "insert": stmt = insert(table) cacheman[STMT].set(key, stmt) return stmt elif stmt_type == "update": stmt = update(table) cacheman[STMT].set(key, stmt) return stmt elif stmt_type == "select": stmt = select(table) cacheman[STMT].set(key, stmt) return stmt def topic_data_insert_one(self, one, topic_name): table_name = f"topic_{topic_name}" table = self.get_topic_table_by_name(table_name) stmt = self.build_stmt("insert", table_name, table) one_dict: dict = capital_to_lower(convert_to_dict(one)) value = self.build_mysql_updates_expression(table, one_dict, "insert") with self.engine.connect() as conn: with conn.begin(): try: result = conn.execute(stmt, value) except IntegrityError as e: raise InsertConflictError("InsertConflict") return result.rowcount def topic_data_insert_(self, data, topic_name): table_name = f"topic_{topic_name}" table = self.get_topic_table_by_name(table_name) values = [] for instance in data: instance_dict: dict = convert_to_dict(instance) instance_dict['id_'] = get_int_surrogate_key() value = {} for key in table.c.keys(): value[key] = instance_dict.get(key) values.append(value) stmt = self.build_stmt("insert", table_name, table) with self.engine.connect() as conn: with conn.begin(): conn.execute(stmt, values) def topic_data_update_one(self, id_: int, one: any, topic_name: str): table_name = 'topic_' + topic_name table = self.get_topic_table_by_name(table_name) stmt = self.build_stmt("update", table_name, table) stmt = stmt.where(eq(table.c['id_'], id_)) one_dict = convert_to_dict(one) values = self.build_mysql_updates_expression( table, capital_to_lower(one_dict), "update") stmt = stmt.values(values) with self.engine.begin() as conn: conn.execute(stmt) def topic_data_update_one_with_version(self, id_: int, version_: int, one: any, topic_name: str): table_name = 'topic_' + topic_name table = self.get_topic_table_by_name(table_name) stmt = self.build_stmt("update", table_name, table) stmt = stmt.where( and_(eq(table.c['id_'], id_), eq(table.c['version_'], version_))) one_dict = convert_to_dict(one) one_dict['version_'] = version_ values = self.build_mysql_updates_expression( table, capital_to_lower(one_dict), "update") stmt = stmt.values(values) with self.engine.begin() as conn: result = conn.execute(stmt) if result.rowcount == 0: raise OptimisticLockError("Optimistic lock error") def topic_data_update_(self, query_dict, instance, topic_name): table_name = 'topic_' + topic_name table = self.get_topic_table_by_name(table_name) stmt = self.build_stmt("update", table_name, table) stmt = (stmt.where(self.build_mysql_where_expression( table, query_dict))) instance_dict: dict = convert_to_dict(instance) values = {} for key, value in instance_dict.items(): if key != 'id_': if key.lower() in table.c.keys(): values[key.lower()] = value stmt = stmt.values(values) with self.engine.begin() as conn: # with conn.begin(): conn.execute(stmt) def topic_data_find_by_id(self, id_: int, topic_name: str) -> any: return self.topic_data_find_one({"id_": id_}, topic_name) def topic_data_find_one(self, where, topic_name) -> any: table_name = 'topic_' + topic_name table = self.get_topic_table_by_name(table_name) stmt = self.build_stmt("select", table_name, table) stmt = stmt.where(self.build_mysql_where_expression(table, where)) with self.engine.connect() as conn: cursor = conn.execute(stmt).cursor columns = [col[0] for col in cursor.description] row = cursor.fetchone() if row is None: return None else: result = {} for index, name in enumerate(columns): if isinstance(table.c[name.lower()].type, JSON): if row[index] is not None: result[name] = json.loads(row[index]) else: result[name] = None else: result[name] = row[index] return self._convert_dict_key(result, topic_name) def topic_data_find_(self, where, topic_name): table_name = 'topic_' + topic_name table = self.get_topic_table_by_name(table_name) stmt = self.build_stmt("select", table_name, table) stmt = stmt.where(self.build_mysql_where_expression(table, where)) with self.engine.connect() as conn: cursor = conn.execute(stmt).cursor columns = [col[0] for col in cursor.description] res = cursor.fetchall() if res is None: return None else: results = [] for row in res: result = {} for index, name in enumerate(columns): if isinstance(table.c[name.lower()].type, JSON): if row[index] is not None: result[name] = json.loads(row[index]) else: result[name] = None else: result[name] = row[index] results.append(result) return self._convert_list_elements_key(results, topic_name) def topic_data_find_with_aggregate(self, where, topic_name, aggregate): table_name = 'topic_' + topic_name table = self.get_topic_table_by_name(table_name) for key, value in aggregate.items(): if value == "sum": stmt = select(text(f'sum({key.lower()})')) elif value == "count": stmt = select(func.count()) elif value == "avg": stmt = select(text(f'avg({key.lower()})')) stmt = stmt.select_from(table) stmt = stmt.where(self.build_mysql_where_expression(table, where)) with self.engine.connect() as conn: cursor = conn.execute(stmt).cursor res = cursor.fetchone() if res is None: return None else: return res[0] def topic_data_list_all(self, topic_name) -> list: table_name = 'topic_' + topic_name table = self.get_topic_table_by_name(table_name) # stmt = select(table) stmt = self.build_stmt("select", table_name, table) with self.engine.connect() as conn: cursor = conn.execute(stmt).cursor columns = [col[0] for col in cursor.description] res = cursor.fetchall() if res is None: return None else: results = [] for row in res: result = {} for index, name in enumerate(columns): if isinstance(table.c[name.lower()].type, JSON): if row[index] is not None: result[name] = json.loads(row[index]) else: result[name] = None else: result[name] = row[index] if self.storage_template.check_topic_type( topic_name) == "raw": results.append(result['data_']) else: results.append(result) if self.storage_template.check_topic_type(topic_name) == "raw": return results else: return self._convert_list_elements_key(results, topic_name) def topic_data_page_(self, where, sort, pageable, model, name) -> DataPage: table_name = build_collection_name(name) count = self.count_topic_data_table(table_name) table = self.get_topic_table_by_name(table_name) stmt = self.build_stmt("select", table_name, table) stmt = stmt.where(self.build_mysql_where_expression(table, where)) orders = self.build_mysql_order(table, sort) for order in orders: stmt = stmt.order_by(order) offset = pageable.pageSize * (pageable.pageNumber - 1) stmt = stmt.offset(offset).limit(pageable.pageSize) results = [] with self.engine.connect() as conn: cursor = conn.execute(stmt).cursor columns = [col[0] for col in cursor.description] res = cursor.fetchall() if self.storage_template.check_topic_type(name) == "raw": for row in res: result = {} for index, name in enumerate(columns): if name == "data_": result.update(json.loads(row[index])) results.append(result) else: for row in res: result = {} for index, name in enumerate(columns): if isinstance(table.c[name.lower()].type, JSON): if row[index] is not None: result[name] = json.loads(row[index]) else: result[name] = None else: result[name] = row[index] if model is not None: results.append(parse_obj(model, result, table)) else: results.append(result) return build_data_pages(pageable, results, count) ''' internal method ''' def get_table_column_default_value(self, table_name, column_name): columns = self._get_table_columns(table_name) for column in columns: if column["name"] == column_name: return column["default"] def _get_table_columns(self, table_name): cached_columns = cacheman[COLUMNS_BY_TABLE_NAME].get(table_name) if cached_columns is not None: return cached_columns columns = self.insp.get_columns(table_name) if columns is not None: cacheman[COLUMNS_BY_TABLE_NAME].set(table_name, columns) return columns def _convert_list_elements_key(self, list_info, topic_name): if list_info is None: return None new_list = [] factors = self.storage_template.get_topic_factors(topic_name) for item in list_info: new_dict = {} for factor in factors: new_dict[factor['name']] = item[factor['name'].lower()] new_dict['id_'] = item['id_'] if 'tenant_id_' in item: new_dict['tenant_id_'] = item.get("tenant_id_", 1) if "insert_time_" in item: new_dict['insert_time_'] = item.get( "insert_time_", datetime.now().replace(tzinfo=None)) if "update_time_" in item: new_dict['update_time_'] = item.get( "update_time_", datetime.now().replace(tzinfo=None)) if "version_" in item: new_dict['version_'] = item.get("version_", 0) if "aggregate_assist_" in item: new_dict['aggregate_assist_'] = item.get( "aggregate_assist_") new_list.append(new_dict) return new_list def _convert_dict_key(self, dict_info, topic_name): if dict_info is None: return None new_dict = {} # print("topic_name",topic_name) factors = self.storage_template.get_topic_factors(topic_name) for factor in factors: new_dict[factor['name']] = dict_info[factor['name'].lower()] new_dict['id_'] = dict_info['id_'] if 'tenant_id_' in dict_info: new_dict['tenant_id_'] = dict_info.get("tenant_id_", 1) if "insert_time_" in dict_info: new_dict['insert_time_'] = dict_info.get( "insert_time_", datetime.now().replace(tzinfo=None)) if "update_time_" in dict_info: new_dict['update_time_'] = dict_info.get( "update_time_", datetime.now().replace(tzinfo=None)) if "version_" in dict_info: new_dict['version_'] = dict_info.get("version_", None) if "aggregate_assist_" in dict_info: new_dict['aggregate_assist_'] = dict_info.get("aggregate_assist_") return new_dict def count_topic_data_table(self, table_name): stmt = 'SELECT count(%s) AS count FROM %s' % ('id_', table_name) with self.engine.connect() as conn: cursor = conn.execute(text(stmt)).cursor columns = [col[0] for col in cursor.description] result = cursor.fetchone() return result[0]
class CreateTablesFromCSVs(DataFrameConverter): """Infer a table schema from a CSV, and create a sql table from this definition""" def __init__(self, db_url): self.engine = create_engine(db_url) self.reflect_db_tables_to_sqlalchemy_classes() self.Base = None self.meta = MetaData(bind=self.engine) def create_and_fill_new_sql_table_from_df(self, table_name, data, if_exists): key_column = None print(f"Creating empty table named {table_name}") upload_id = SqlDataInventory.get_new_upload_id_for_table(table_name) upload_time = datetime.utcnow() data = self.append_metadata_to_data( data, upload_id=upload_id, key_columns=key_column, ) data, schema = self.get_schema_from_df(data) # this creates an empty table of the correct schema using pandas to_sql self.create_new_table(table_name, schema, key_columns=key_column, if_exists=if_exists) conn = connect_to_db_using_psycopg2() success = psycopg2_copy_from_stringio(conn, data, table_name) if not success: raise Exception return upload_id, upload_time, table_name def reflect_db_tables_to_sqlalchemy_classes(self): self.Base = automap_base() # reflect the tables present in the sql database as sqlalchemy models self.Base.prepare(self.engine, reflect=True) @staticmethod def get_data_from_csv(csv_data_file_path): """ :param csv_data_file_path: :return: pandas dataframe """ return pd.read_csv(csv_data_file_path, encoding="utf-8", comment="#") def create_new_table( self, table_name, schema, key_columns, if_exists, ): """ Create an EMPTY table from CSV and generated schema. Empty table because the ORM copy function is very slow- we'll populate the table data using a lower-level interface to SQL """ self.meta.reflect() table_object = self.meta.tables.get(table_name) if table_object is not None: if if_exists == REPLACE: self.meta.drop_all(tables=[table_object]) print(f"Table {table_name} already exists- " f"dropping and replacing as per argument") self.meta.clear() elif if_exists == FAIL: raise KeyError( f"Table {table_name} already exists- failing as per argument" ) elif if_exists == APPEND: f"Table {table_name} already exists- appending data as per argument" return # table doesn't exist- create it columns = [] primary_keys = key_columns or [UPLOAD_ID, INDEX_COLUMN] for name, sqlalchemy_dtype in schema.items(): if name in primary_keys: columns.append(Column(name, sqlalchemy_dtype, primary_key=True)) else: columns.append(Column(name, sqlalchemy_dtype)) _ = Table(table_name, self.meta, *columns) self.meta.create_all()
class DB(Base): # Constants: connection level NONE = 0 # No connection; just set self.url CONNECT = 1 # Connect; no transaction TXN = 2 # Everything in a transaction level = TXN def _engineInfo(self, url=None): if url is None: url = self.url return url def _setup(self, url): self._connect(url) # make sure there are no tables lying around meta = MetaData(self.engine) meta.reflect() meta.drop_all() def _teardown(self): self._disconnect() def _connect(self, url): self.url = url # TODO: seems like 0.5.x branch does not work with engine.dispose and staticpool #self.engine = create_engine(url, echo=True, poolclass=StaticPool) self.engine = create_engine(url, echo=True) # silence the logger added by SA, nose adds its own! logging.getLogger('sqlalchemy').handlers = [] self.meta = MetaData(bind=self.engine) if self.level < self.CONNECT: return #self.session = create_session(bind=self.engine) if self.level < self.TXN: return #self.txn = self.session.begin() def _disconnect(self): if hasattr(self, 'txn'): self.txn.rollback() if hasattr(self, 'session'): self.session.close() #if hasattr(self,'conn'): # self.conn.close() self.engine.dispose() def _supported(self, url): db = url.split(':', 1)[0] func = getattr(self, self._TestCase__testMethodName) if hasattr(func, 'supported'): return db in func.supported if hasattr(func, 'not_supported'): return not (db in func.not_supported) # Neither list assigned; assume all are supported return True def _not_supported(self, url): return not self._supported(url) def _select_row(self): """Select rows, used in multiple tests""" return self.table.select().execution_options( autocommit=True).execute().fetchone() def refresh_table(self, name=None): """Reload the table from the database Assumes we're working with only a single table, self.table, and metadata self.meta Working w/ multiple tables is not possible, as tables can only be reloaded with meta.clear() """ if name is None: name = self.table.name self.meta.clear() self.table = Table(name, self.meta, autoload=True) def compare_columns_equal(self, columns1, columns2, ignore=None): """Loop through all columns and compare them""" def key(column): return column.name for c1, c2 in zip(sorted(columns1, key=key), sorted(columns2, key=key)): diffs = ColumnDelta(c1, c2).diffs if ignore: for key in ignore: diffs.pop(key, None) if diffs: self.fail("Comparing %s to %s failed: %s" % (columns1, columns2, diffs))
class Storage(object): """SQL Tabular Storage. It's an implementation of `jsontablescema.Storage`. Args: engine (object): SQLAlchemy engine dbschema (str): database schema name prefix (str): prefix for all buckets reflect_only (callable): a boolean predicate to filter the list of table names when reflecting geometry_support (str): Whether to use a geometry column for geojson type. Can be `postgis` or `sde`. """ # Public def __init__(self, engine, dbschema=None, prefix='', reflect_only=None, autoincrement=None, geometry_support=None, from_srid=None, to_srid=None, views=False): # Set attributes self.__connection = engine.connect() self.__dbschema = dbschema self.__prefix = prefix self.__descriptors = {} self.__autoincrement = autoincrement self.__geometry_support = geometry_support self.__views = views if reflect_only is not None: self.__only = reflect_only else: self.__only = lambda _: True # Load geometry support if self.__geometry_support == 'postgis': mappers.load_postgis_support() elif self.__geometry_support in ['sde', 'sde-char']: mappers.load_sde_support(self.__geometry_support, from_srid, to_srid) # Create metadata self.__metadata = MetaData(bind=self.__connection, schema=self.__dbschema) self.__reflect() def __repr__(self): # Template and format template = 'Storage <{engine}/{dbschema}>' text = template.format(engine=self.__connection.engine, dbschema=self.__dbschema) return text @property def buckets(self): # Collect buckets = [] for table in self.__metadata.sorted_tables: bucket = mappers.tablename_to_bucket(self.__prefix, table.name) if bucket is not None: buckets.append(bucket) return buckets def create(self, bucket, descriptor, force=False, indexes_fields=None): """Create table by schema. Parameters ---------- table: str/list Table name or list of table names. schema: dict/list JSONTableSchema schema or list of schemas. indexes_fields: list list of tuples containing field names, or list of such lists Raises ------ RuntimeError If table already exists. """ # Make lists buckets = bucket if isinstance(bucket, six.string_types): buckets = [bucket] descriptors = descriptor if isinstance(descriptor, dict): descriptors = [descriptor] if indexes_fields is None or len(indexes_fields) == 0: indexes_fields = [()] * len(descriptors) elif type(indexes_fields[0][0]) not in {list, tuple}: indexes_fields = [indexes_fields] assert len(indexes_fields) == len(descriptors) assert len(buckets) == len(descriptors) # Check buckets for existence for bucket in reversed(self.buckets): if bucket in buckets: if not force: message = 'Bucket "%s" already exists.' % bucket raise RuntimeError(message) self.delete(bucket) # Define buckets for bucket, descriptor, index_fields in zip(buckets, descriptors, indexes_fields): # Add to schemas self.__descriptors[bucket] = descriptor # Create table jsontableschema.validate(descriptor) tablename = mappers.bucket_to_tablename(self.__prefix, bucket) columns, constraints, indexes = mappers.descriptor_to_columns_and_constraints( self.__prefix, bucket, descriptor, index_fields, self.__autoincrement) Table(tablename, self.__metadata, *(columns + constraints + indexes)) # Create tables, update metadata self.__metadata.create_all() def delete(self, bucket=None, ignore=False): # Make lists buckets = bucket if isinstance(bucket, six.string_types): buckets = [bucket] elif bucket is None: buckets = reversed(self.buckets) # Iterate over buckets tables = [] for bucket in buckets: # Check existent if bucket not in self.buckets: if not ignore: message = 'Bucket "%s" doesn\'t exist.' % bucket raise RuntimeError(message) # Remove from buckets if bucket in self.__descriptors: del self.__descriptors[bucket] # Add table to tables table = self.__get_table(bucket) tables.append(table) # Drop tables, update metadata self.__metadata.drop_all(tables=tables) self.__metadata.clear() self.__reflect() def describe(self, bucket, descriptor=None): # Set descriptor if descriptor is not None: self.__descriptors[bucket] = descriptor # Get descriptor else: descriptor = self.__descriptors.get(bucket) if descriptor is None: table = self.__get_table(bucket) descriptor = mappers.columns_and_constraints_to_descriptor( self.__prefix, table.name, table.columns, table.constraints, self.__autoincrement) return descriptor def iter(self, bucket): # Get result table = self.__get_table(bucket) # Make sure we close the transaction after iterating, # otherwise it is left hanging with self.__connection.begin(): # Streaming could be not working for some backends: # http://docs.sqlalchemy.org/en/latest/core/connections.html select = table.select().execution_options(stream_results=True) result = select.execute() # Yield data for row in result: yield list(row) def read(self, bucket): # Get rows rows = list(self.iter(bucket)) return rows def write(self, bucket, rows, keyed=False, as_generator=False, update_keys=None): if update_keys is not None and len(update_keys) == 0: raise ValueError('update_keys cannot be an empty list') table = self.__get_table(bucket) descriptor = self.describe(bucket) writer = StorageWriter(table, descriptor, update_keys, self.__autoincrement) with self.__connection.begin(): gen = writer.write(rows, keyed) if as_generator: return gen else: collections.deque(gen, maxlen=0) # Private def __get_table(self, bucket): """Return SQLAlchemy table for the given bucket. """ # Prepare name tablename = mappers.bucket_to_tablename(self.__prefix, bucket) if self.__dbschema: tablename = '.'.join((self.__dbschema, tablename)) return self.__metadata.tables[tablename] def __reflect(self): def only(name, _): ret = (self.__only(name) and mappers.tablename_to_bucket( self.__prefix, name) is not None) return ret self.__metadata.reflect(only=only, views=self.__views)
class Database(object): def __init__(self, *tables, **kw): log.info("initialising database") self.status = "updating" self.kw = kw self.metadata = None self.connection_string = kw.get("connection_string", None) if self.connection_string: self.engine = create_engine(self.connection_string) self.metadata = MetaData() self.metadata.bind = self.engine self._Session = sessionmaker(bind=self.engine, autoflush = False) self.Session = sessionwrapper.SessionClass(self._Session, self) self.logging_tables = kw.get("logging_tables", None) self.quiet = kw.get("quiet", None) self.application = kw.get("application", None) if self.application: self.set_application(self.application) self.max_table_id = 0 self.max_event_id = 0 self.persisted = False self.graph = None self.relations = [] self.tables = OrderedDict() self.search_actions = {} self.search_names = {} self.search_ids = {} for table in tables: self.add_table(table) def set_application(self, application): self.application = application if not self.connection_string: self.metadata = application.metadata self.engine = application.engine self._Session = application.Session self.Session = sessionwrapper.SessionClass(self._Session, self) if self.logging_tables is not None: self.logging_tables = self.application.logging_tables if self.quiet is not None: self.quiet = self.application.quiet self.application_folder = self.application.application_folder self.zodb = self.application.zodb self.zodb_tables_init() self.application.Session = self.Session with SchemaLock(self) as file_lock: self.load_from_persist() self.status = "active" def zodb_tables_init(self): zodb = self.application.aquire_zodb() connection = zodb.open() root = connection.root() if "tables" not in root: root["tables"] = PersistentMapping() root["table_count"] = 0 root["event_count"] = 0 transaction.commit() connection.close() zodb.close() self.application.get_zodb(True) def __getitem__(self, item): if isinstance(item, int): table = self.get_table_by_id(item) if not table: raise IndexError("table id %s does not exist" % item) return table else: return self.tables[item] def get_table_by_id(self, id): for table in self.tables.itervalues(): if table.table_id == id: return table def add_table(self, table, ignore = False, drop = False): log.info("adding table %s" % table.name) if table.name in self.tables.iterkeys(): if ignore: return elif drop: self.drop_table(table.name) else: raise custom_exceptions.DuplicateTableError("already a table named %s" % table.name) self._add_table_no_persist(table) def rename_table(self, table, new_name, session = None): if isinstance(table, tables.Table): table_to_rename = table else: table_to_rename = self.tables[table] with SchemaLock(self) as file_lock: for relations in table_to_rename.tables_with_relations.values(): for rel in relations: if rel.other == table_to_rename.name: field = rel.parent field.args = [new_name] + list(field.args[1:]) table_to_rename.name = new_name file_lock.export(uuid = True) table_to_rename.sa_table.rename(new_name) file_lock.export() self.load_from_persist(True) if table_to_rename.logged: self.rename_table("_log_%s" % table_to_rename.name, "_log_%s" % new_name, session) def drop_table(self, table): with SchemaLock(self) as file_lock: if isinstance(table, tables.Table): table_to_drop = table else: table_to_drop = self.tables[table] if table_to_drop.dependant_tables: raise custom_exceptions.DependencyError(( "cannot delete table %s as the following tables" " depend on it %s" % (table.name, table.dependant_tables))) for relations in table_to_drop.tables_with_relations.itervalues(): for relation in relations: field = relation.parent field.table.fields.pop(field.name) field.table.field_list.remove(field) self.tables.pop(table_to_drop.name) file_lock.export(uuid = True) table_to_drop.sa_table.drop() file_lock.export() self.load_from_persist(True) if table_to_drop.logged: self.drop_table(self.tables["_log_" + table_to_drop.name]) def add_relation_table(self, table): if "_core" not in self.tables: raise custom_exceptions.NoTableAddError("table %s cannot be added as there is" "no _core table in the database" % table.name) assert table.primary_entities assert table.secondary_entities table.relation = True table.kw["relation"] = True self.add_table(table) relation = ForeignKey("_core_id", "_core", backref = table.name) table._add_field_no_persist(relation) event = Event("delete", actions.DeleteRows("_core")) table.add_event(event) def add_info_table(self, table): if "_core" not in self.tables: raise custom_exceptions.NoTableAddError("table %s cannot be added as there is" "no _core table in the database" % table.name) table.info_table = True table.kw["info_table"] = True self.add_table(table) relation = ForeignKey("_core_id", "_core", backref = table.name) table._add_field_no_persist(relation) event = Event("delete", actions.DeleteRows("_core")) table.add_event(event) def add_entity(self, table): if "_core" not in self.tables: raise custom_exceptions.NoTableAddError("table %s cannot be added as there is" "no _core table in the database" % table.name) table.entity = True table.kw["entity"] = True self.add_table(table) #add relation relation = ForeignKey("_core_id", "_core", backref = table.name) table._add_field_no_persist(relation) ##add title events if table.title_field: title_field = table.title_field else: title_field = "name" event = Event("new change", actions.CopyTextAfter("primary_entity._core_entity.title", title_field)) table.add_event(event) if table.summary_fields: event = Event("new change", actions.CopyTextAfterField("primary_entity._core_entity.summary", table.summary_fields)) table.add_event(event) event = Event("delete", actions.DeleteRows("primary_entity._core_entity")) table.add_event(event) def _add_table_no_persist(self, table): table._set_parent(self) def persist(self): self.status = "updating" for table in self.tables.values(): if not self.logging_tables: ## FIXME should look at better place to set this table.kw["logged"] = False table.logged = False if table.logged and "_log_%s" % table.name not in self.tables.iterkeys() : self.add_table(self.logged_table(table)) for table in self.tables.itervalues(): table.add_foreign_key_columns() self.update_sa(True) with SchemaLock(self) as file_lock: file_lock.export(uuid = True) self.metadata.create_all(self.engine) self.persisted = True file_lock.export() self.load_from_persist(True) def get_file_path(self, uuid_name = False): uuid = datetime.datetime.now().isoformat().\ replace(":", "-").replace(".", "-") if uuid_name: file_name = "generated_schema-%s.py" % uuid else: file_name = "generated_schema.py" file_path = os.path.join( self.application.application_folder, "_schema", file_name ) return file_path def code_repr_load(self): import _schema.generated_schema as sch sch = reload(sch) database = sch.database database.clear_sa() for table in database.tables.values(): table.database = self self.add_table(table) table.persisted = True self.max_table_id = database.max_table_id self.max_event_id = database.max_event_id self.persisted = True def code_repr_export(self, file_path): try: os.remove(file_path) os.remove(file_path+"c") except OSError: pass out_file = open(file_path, "w") output = [ "from database.database import Database", "from database.tables import Table", "from database.fields import *", "from database.database import table, entity, relation", "from database.events import Event", "from database.actions import *", "", "", "database = Database(", "", "", ] for table in sorted(self.tables.values(), key = lambda x:x.table_id): output.append(table.code_repr() + ",") kw_display = "" if self.kw: kw_list = ["%s = %s" % (i[0], repr(i[1])) for i in self.kw.items()] kw_display = ", ".join(sorted(kw_list)) output.append(kw_display) output.append(")") out_file.write("\n".join(output)) out_file.close() return file_path def load_from_persist(self, restart = False): self.clear_sa() self.tables = OrderedDict() try: self.code_repr_load() except ImportError: return self.add_relations() self.update_sa() self.validate_database() def add_relations(self): #not property for optimisation self.relations = [] for table_name, table_value in self.tables.iteritems(): ## make sure fk columns are remade table_value.foriegn_key_columns_current = None table_value.add_relations() for rel_name, rel_value in table_value.relations.iteritems(): self.relations.append(rel_value) def checkrelations(self): for relation in self.relations: if relation.other not in self.tables.iterkeys(): raise custom_exceptions.RelationError,\ "table %s does not exits" % relation.other def update_sa(self, reload = False): if reload == True and self.status <> "terminated": self.status = "updating" if reload: self.clear_sa() self.checkrelations() self.make_graph() try: for table in self.tables.itervalues(): table.make_paths() table.make_sa_table() table.make_sa_class() for table in self.tables.itervalues(): table.sa_mapper() sa.orm.compile_mappers() for table in self.tables.itervalues(): for column in table.columns.iterkeys(): getattr(table.sa_class, column).impl.active_history = True table.columns_cache = table.columns for table in self.tables.itervalues(): table.make_schema_dict() ## put valid_info tables into info_table for table in self.tables.itervalues(): if table.relation or table.entity: for valid_info_table in table.valid_info_tables: info_table = self.tables[valid_info_table] assert info_table.info_table info_table.valid_core_types.append(table.name) self.collect_search_actions() except (custom_exceptions.NoDatabaseError,\ custom_exceptions.RelationError): pass if reload == True and self.status <> "terminated": self.status = "active" def clear_sa(self): sa.orm.clear_mappers() if self.metadata: self.metadata.clear() for table in self.tables.itervalues(): table.foriegn_key_columns_current = None table.mapper = None table.sa_class = None table.sa_table = None table.paths = None table.local_tables = None table.one_to_many_tables = None table.events = dict(new = [], delete = [], change = []) table.schema_dict = None table.valid_core_types = [] table.columns_cache = None self.graph = None self.search_actions = {} self.search_names = {} self.search_ids = {} def tables_with_relations(self, table): relations = defaultdict(list) for n, v in table.relations.iteritems(): relations[(v.other, "here")].append(v) for v in self.relations: if v.other == table.name: relations[(v.table.name, "other")].append(v) return relations def result_set(self, search): return resultset.ResultSet(search) def search(self, table_name, where = "id>0", *args, **kw): ##FIXME id>0 should be changed to none once search is sorted """ :param table_name: specifies the base table you will be query from (required) :param where: either a paramatarised or normal where clause, if paramitarised either values or params keywords have to be added. (optional first arg, if missing will query without where) :param tables: an optional list of onetoone or manytoone tables to be extracted with results :param keep_all: will keep id, _core_entity_id, modified_by and modified_on fields :param fields: an optional explicit field list in the form 'field' for base table and 'table.field' for other tables. Overwrites table option and keep all. :param limit: the row limit :param offset: the offset :param internal: if true will not convert date, boolean and decimal fields :param values: a list of values to replace the ? in the paramatarised queries :param params: a dict with the keys as the replacement to inside the curly brackets i.e key name will replace {name} in query. :param order_by: a string in the same form as a sql order by ie 'name desc, donkey.name, donkey.age desc' (name in base table) """ session = kw.pop("session", None) if session: external_session = True else: session = self.Session() external_session = False tables = kw.get("tables", [table_name]) fields = kw.get("fields", None) join_tables = [] if fields: join_tables = split_table_fields(fields, table_name).keys() if table_name in join_tables: join_tables.remove(table_name) tables = None if tables: join_tables.extend(tables) if table_name in tables: join_tables.remove(table_name) if "order_by" not in kw: kw["order_by"] = "id" if join_tables: kw["extra_outer"] = join_tables kw["distinct_many"] = False try: query = search.Search(self, table_name, session, where, *args, **kw) result = resultset.ResultSet(query, **kw) result.collect() return result except Exception, e: session.rollback() raise finally:
class SAProvider(BaseProvider): """Provider Implementation class for SQLAlchemy""" def __init__(self, *args, **kwargs): """Initialize and maintain Engine""" # Since SQLAlchemyProvider can cater to multiple databases, it is important # that we know which database we are dealing with, to run database-specific # statements like `PRAGMA` for SQLite. if "DATABASE" not in args[2]: logger.error( f"Missing `DATABASE` information in conn_info: {args[2]}") raise ConfigurationError( "Missing `DATABASE` attribute in Connection info") super().__init__(*args, **kwargs) kwargs = self._get_database_specific_engine_args() self._engine = create_engine(make_url(self.conn_info["DATABASE_URI"]), **kwargs) if self.conn_info["DATABASE"] == Database.POSTGRESQL.value: # Nest database tables under a schema, so that we have complete control # on creating/dropping db structures. We cannot control structures in the # the default `public` schema. # # Use `SCHEMA` value if specified as part of the conn info. Otherwise, construct # and use default schema name as `DB`_schema. schema = (self.conn_info["SCHEMA"] if "SCHEMA" in self.conn_info else "public") self._metadata = MetaData(bind=self._engine, schema=schema) else: self._metadata = MetaData(bind=self._engine) # A temporary cache of already constructed model classes self._model_classes = {} def _get_database_specific_engine_args(self): """Supplies additional database-specific arguments to SQLAlchemy Engine. Return: a dictionary with database-specific SQLAlchemy Engine arguments. """ if self.conn_info["DATABASE"] == Database.POSTGRESQL.value: return {"isolation_level": "AUTOCOMMIT"} return {} def _get_database_specific_session_args(self): """Set Database specific session parameters. Depending on the database in use, this method supplies additional arguments while constructing sessions. Return: a dictionary with additional arguments and values. """ if self.conn_info["DATABASE"] == Database.POSTGRESQL.value: return {"autocommit": True, "autoflush": False} return {} def get_session(self): """Establish a session to the Database""" # Create the session kwargs = self._get_database_specific_session_args() session_factory = orm.sessionmaker(bind=self._engine, expire_on_commit=False, **kwargs) session_cls = orm.scoped_session(session_factory) return session_cls def _execute_database_specific_connection_statements(self, conn): """Execute connection statements depending on the database in use. Each database has a unique set of commands and associated format to control connection-related parameters. Since we use SQLAlchemy, statements should be run dynamically based on the database in use. Arguments: * conn: An active connection object to the database Return: None """ if self.conn_info["DATABASE"] == Database.SQLITE.value: conn.execute("PRAGMA case_sensitive_like = ON;") return conn def get_connection(self, session_cls=None): """Create the connection to the Database instance""" # If this connection has to be created within an existing session, # ``session_cls`` will be provided as an argument. # Otherwise, fetch a new ``session_cls`` from ``get_session()`` if session_cls is None: session_cls = self.get_session() conn = session_cls() conn = self._execute_database_specific_connection_statements(conn) return conn def _data_reset(self): conn = self._engine.connect() transaction = conn.begin() if self.conn_info["DATABASE"] == Database.SQLITE.value: conn.execute("PRAGMA foreign_keys = OFF;") for table in self._metadata.sorted_tables: conn.execute(table.delete()) if self.conn_info["DATABASE"] == Database.SQLITE.value: conn.execute("PRAGMA foreign_keys = ON;") transaction.commit() # Discard any active Unit of Work if current_uow and current_uow.in_progress: current_uow.rollback() def _create_database_artifacts(self): for _, aggregate_record in self.domain.registry.aggregates.items(): self.domain.repository_for(aggregate_record.cls)._dao self._metadata.create_all() def _drop_database_artifacts(self): self._metadata.drop_all() self._metadata.clear() def decorate_model_class(self, entity_cls, model_cls): schema_name = derive_schema_name(model_cls) # Return the model class if it was already seen/decorated if schema_name in self._model_classes: return self._model_classes[schema_name] # If `model_cls` is already subclassed from SqlAlchemyModel, # this method call is a no-op if issubclass(model_cls, SqlalchemyModel): return model_cls else: custom_attrs = { key: value for (key, value) in vars(model_cls).items() if key not in ["Meta", "__module__", "__doc__", "__weakref__"] } from protean.core.model import ModelMeta meta_ = ModelMeta() meta_.entity_cls = entity_cls custom_attrs.update({"meta_": meta_, "metadata": self._metadata}) # FIXME Ensure the custom model attributes are constructed properly decorated_model_cls = type(model_cls.__name__, (SqlalchemyModel, model_cls), custom_attrs) # Memoize the constructed model class self._model_classes[schema_name] = decorated_model_cls return decorated_model_cls def construct_model_class(self, entity_cls): """Return a fully-baked Model class for a given Entity class""" model_cls = None # Return the model class if it was already seen/decorated if entity_cls.meta_.schema_name in self._model_classes: model_cls = self._model_classes[entity_cls.meta_.schema_name] else: from protean.core.model import ModelMeta meta_ = ModelMeta() meta_.entity_cls = entity_cls attrs = { "meta_": meta_, "metadata": self._metadata, } # FIXME Ensure the custom model attributes are constructed properly model_cls = type(entity_cls.__name__ + "Model", (SqlalchemyModel, ), attrs) # Memoize the constructed model class self._model_classes[entity_cls.meta_.schema_name] = model_cls # Set Entity Class as a class level attribute for the Model, to be able to reference later. return model_cls def get_dao(self, entity_cls, model_cls): """Return a DAO object configured with a live connection""" return SADAO(self.domain, self, entity_cls, model_cls) def raw(self, query: Any, data: Any = None): """Run raw query on Provider""" if data is None: data = {} assert isinstance(query, str) assert isinstance(data, (dict, None)) return self.get_connection().execute(query, data)
from sqlalchemy import Table, Column, MetaData, Integer, String, ForeignKeyConstraint, DateTime metadata = MetaData() metadata.clear() PostCategories = Table( 'post_categories', metadata, Column('id', Integer, primary_key=True), Column('title', String(length=100), nullable=False, unique=True), ) Post = Table( 'post', metadata, Column('id', Integer, primary_key=True), Column('category_id', Integer, nullable=False), Column('title', String(length=100), nullable=False), Column('text', String, nullable=False), Column('main_img', String, nullable=False), Column('created_at', DateTime, nullable=False), Column('last_updated', DateTime, nullable=False), ForeignKeyConstraint(['category_id'], [PostCategories.c.id], name='post_category_id_fkey', ondelete='CASCADE'))
class Storage(object): """SQL Tabular Storage. It's an implementation of `jsontablescema.Storage`. Args: engine (object): SQLAlchemy engine dbschema (str): database schema name prefix (str): prefix for all buckets """ # Public def __init__(self, engine, dbschema=None, prefix=''): # Set attributes self.__connection = engine.connect() self.__dbschema = dbschema self.__prefix = prefix self.__descriptors = {} # Create metadata self.__metadata = MetaData(bind=self.__connection, schema=self.__dbschema, reflect=True) def __repr__(self): # Template and format template = 'Storage <{engine}/{dbschema}>' text = template.format(engine=self.__connection.engine, dbschema=self.__dbschema) return text @property def buckets(self): # Collect buckets = [] for table in self.__metadata.sorted_tables: bucket = mappers.tablename_to_bucket(self.__prefix, table.name) if bucket is not None: buckets.append(bucket) return buckets def create(self, bucket, descriptor, force=False): # Make lists buckets = bucket if isinstance(bucket, six.string_types): buckets = [bucket] descriptors = descriptor if isinstance(descriptor, dict): descriptors = [descriptor] # Check buckets for existence for bucket in reversed(self.buckets): if bucket in buckets: if not force: message = 'Bucket "%s" already exists.' % bucket raise RuntimeError(message) self.delete(bucket) # Define buckets for bucket, descriptor in zip(buckets, descriptors): # Add to schemas self.__descriptors[bucket] = descriptor # Crate table jsontableschema.validate(descriptor) tablename = mappers.bucket_to_tablename(self.__prefix, bucket) columns, constraints = mappers.descriptor_to_columns_and_constraints( self.__prefix, bucket, descriptor) Table(tablename, self.__metadata, *(columns + constraints)) # Create tables, update metadata self.__metadata.create_all() def delete(self, bucket=None, ignore=False): # Make lists buckets = bucket if isinstance(bucket, six.string_types): buckets = [bucket] elif bucket is None: buckets = reversed(self.buckets) # Iterate over buckets tables = [] for bucket in buckets: # Check existent if bucket not in self.buckets: if not ignore: message = 'Bucket "%s" doesn\'t exist.' % bucket raise RuntimeError(message) # Remove from buckets if bucket in self.__descriptors: del self.__descriptors[bucket] # Add table to tables table = self.__get_table(bucket) tables.append(table) # Drop tables, update metadata self.__metadata.drop_all(tables=tables) self.__metadata.clear() self.__metadata.reflect() def describe(self, bucket, descriptor=None): # Set descriptor if descriptor is not None: self.__descriptors[bucket] = descriptor # Get descriptor else: descriptor = self.__descriptors.get(bucket) if descriptor is None: table = self.__get_table(bucket) descriptor = mappers.columns_and_constraints_to_descriptor( self.__prefix, table.name, table.columns, table.constraints) return descriptor def iter(self, bucket): # Get result table = self.__get_table(bucket) # Streaming could be not working for some backends: # http://docs.sqlalchemy.org/en/latest/core/connections.html select = table.select().execution_options(stream_results=True) result = select.execute() # Yield data for row in result: yield list(row) def read(self, bucket): # Get rows rows = list(self.iter(bucket)) return rows def write(self, bucket, rows): # Prepare BUFFER_SIZE = 1000 descriptor = self.describe(bucket) schema = jsontableschema.Schema(descriptor) table = self.__get_table(bucket) # Write with self.__connection.begin(): keyed_rows = [] for row in rows: keyed_row = {} for index, field in enumerate(schema.fields): value = row[index] try: value = field.cast_value(value) except InvalidObjectType: value = json.loads(value) keyed_row[field.name] = value keyed_rows.append(keyed_row) if len(keyed_rows) > BUFFER_SIZE: # Insert data table.insert().execute(keyed_rows) # Clean memory keyed_rows = [] if len(keyed_rows) > 0: # Insert data table.insert().execute(keyed_rows) # Private def __get_table(self, bucket): """Return SQLAlchemy table for the given bucket. """ # Prepare name tablename = mappers.bucket_to_tablename(self.__prefix, bucket) if self.__dbschema: tablename = '.'.join(self.__dbschema, tablename) return self.__metadata.tables[tablename]
class Bookmark(object): def __init__(self, engine, user, start_no=0, end_no=100): self.engine = engine self.md = MetaData(self.engine) self.user = user self._feeds = [] self.start_no = 0 self.end_no = 100 @property def feeds(self): """Return feed data.""" if not self._feeds: self._load() return self._feeds def _load(self): """Load feed info.""" interval = 20 # API default setting. for i in range(self.start_no, self.end_no, interval): url = self._make_feed_api_url(i) feed = self._request(url) if not feed["entries"]: break self._append_to_feeds(feed) time.sleep(2) def _make_feed_api_url(self, id): """Create api url of rss feed.""" return HATENA_FEED_URL.format(user=self.user.name, no=str(id)) def _request(self, url): """Request api. Request argument url and return result data as feedparser object.. """ return feedparser.parse(requests.get(url).text) def _append_to_feeds(self, feed): """Parse and append feed data.""" for f in feed["entries"]: link = f["link"] title = f['title'] self._feeds.append(Feed(self.engine, link, title)) def save(self): """Save url.""" if not self._feeds: self._load() # TODO: Load user no logging.info('SAVE BOOKMARK') for f in self._feeds: logging.info(f.url) if self._has_record(f.id): # TODO: # Fix to use return if not existing new feed. # To escape duplicate access. logging.info('IGNORE') continue logging.info('ADD') self._register(f.id) logging.info('----------------------') def _register(self, url_id): """Register bookmark transaction.""" self.md.clear() md = MetaData(self.engine) t = Table('bookmark', md, autoload=True) i = insert(t).values(url_id=url_id, user_id=self.user.id, registered_date=int( date.today().strftime("%Y%m%d"))) i.execute() def _has_record(self, url_id): """Check bookmark url is already existing.""" t = Table('bookmark', self.md) c_user = column('user_id') c_url = column('url_id') s = select(columns=[column('id')], from_obj=t).where( c_url==url_id).where(c_user==self.user.id) return s.execute().scalar()
class TestIdempotentOperations(TestCase): """Test database migration utilities. Unfortunately not all operations can be tested with an SQLite database since not all ALTER TABLE operations are supported, see http://www.sqlite.org/lang_altertable.html. Currenlty these untested operations are: - IdempotentOperations. drop_column - DeactivatedFKConstraint """ def setUp(self): self.connection = create_engine('sqlite:///:memory:').connect() self.metadata = MetaData(self.connection) self.table = Table('thingy', self.metadata, Column('thingy_id', Integer, primary_key=True)) self.metadata.create_all() self.migration_context = MigrationContext.configure(self.connection) self.op = IdempotentOperations(self.migration_context, self) self.inspector = Inspector(self.connection) def tearDown(self): self.metadata.drop_all() self.connection.close() def refresh_metadata(self): self.metadata.clear() self.metadata.reflect() def test_add_column_works_with_valid_preconditions(self): self.assertEqual(['thingy_id'], self.metadata.tables['thingy'].columns.keys()) self.op.add_column('thingy', Column('foo', String)) self.refresh_metadata() self.assertEqual(['thingy_id', 'foo'], self.metadata.tables['thingy'].columns.keys()) def test_add_column_skips_add_when_column_name_already_exists(self): self.assertEqual(['thingy_id'], self.metadata.tables['thingy'].columns.keys()) self.op.add_column('thingy', Column('thingy_id', String)) self.refresh_metadata() self.assertEqual(['thingy_id'], self.metadata.tables['thingy'].columns.keys()) def test_create_tables_skips_create_when_table_already_exists(self): self.assertEqual(['thingy'], self.metadata.tables.keys()) self.op.create_table('thingy') self.refresh_metadata() self.assertEqual(['thingy'], self.metadata.tables.keys()) def test_create_table_works_with_valid_preconditions(self): self.assertEqual(['thingy'], self.metadata.tables.keys()) self.op.create_table('xuq', Column('foo', Integer, primary_key=True)) self.refresh_metadata() self.assertEqual(['thingy', 'xuq'], self.metadata.tables.keys())
class Feed(object): """Bookmark user class.""" def __init__(self, engine, url, title=''): logging.basicConfig(level=20) self.engine = engine self.url = url self.title = title self.md = MetaData(self.engine) self.sleep_sec = 1 @property def id(self): """Load feed id.""" if not self._load_id(): self._append() return self._load_id() def extract(self): """Extract bookmarked users from setted url.""" users = [] # TODO: Load id in __init__ if not self._load_id(): self._append() api_url = self._make_entry_api_url(self.url) result = self._request(api_url) if not result: return users for b in result.get('bookmarks', []): if "user" not in b: continue users.append(User(self.engine, b["user"])) time.sleep(self.sleep_sec) return users def _make_entry_api_url(self, url): """Create hatena bookmark entry api url.""" e_url = quote(url, safe='') return HATENA_ENTRY_URL.format(url=e_url) def _request(self, url): """Request api. Request argument url and return result data as dict. """ return requests.get(url).json() def _load_id(self): """Load feed id from database.""" t = Table('feed', self.md) c_url = column('url') c_id = column('id') s = select(columns=[c_id], from_obj=t).where(c_url == self.url) r = s.execute().fetchone() if r: return r['id'] return None def _append(self): """Add new feed url into database.""" logging.info('SAVE MY FEED') logging.info(self.url) self.md.clear() md = MetaData(self.engine) t = Table('feed', md, autoload=True) i = insert(t).values(url=self.url, title=self.title) i.execute() logging.info('----------------------')
class Storage(tableschema.Storage): # Public def __init__(self, engine, dbschema=None, prefix='', reflect_only=None, autoincrement=None): """https://github.com/frictionlessdata/tableschema-sql-py#storage """ # Set attributes self.__connection = engine.connect() self.__dbschema = dbschema self.__prefix = prefix self.__descriptors = {} self.__fallbacks = {} self.__autoincrement = autoincrement self.__only = reflect_only or (lambda _: True) # Create mapper self.__mapper = Mapper(prefix=prefix, dialect=engine.dialect.name) # Create metadata and reflect self.__metadata = MetaData(bind=self.__connection, schema=self.__dbschema) self.__reflect() def __repr__(self): """https://github.com/frictionlessdata/tableschema-sql-py#storage """ # Template and format template = 'Storage <{engine}/{dbschema}>' text = template.format( engine=self.__connection.engine, dbschema=self.__dbschema) return text @property def buckets(self): """https://github.com/frictionlessdata/tableschema-sql-py#storage """ buckets = [] for table in self.__metadata.sorted_tables: bucket = self.__mapper.restore_bucket(table.name) if bucket is not None: buckets.append(bucket) return buckets def create(self, bucket, descriptor, force=False, indexes_fields=None): """https://github.com/frictionlessdata/tableschema-sql-py#storage """ # Make lists buckets = bucket if isinstance(bucket, six.string_types): buckets = [bucket] descriptors = descriptor if isinstance(descriptor, dict): descriptors = [descriptor] if indexes_fields is None or len(indexes_fields) == 0: indexes_fields = [()] * len(descriptors) elif type(indexes_fields[0][0]) not in {list, tuple}: indexes_fields = [indexes_fields] # Check dimensions if not (len(buckets) == len(descriptors) == len(indexes_fields)): raise tableschema.exceptions.StorageError('Wrong argument dimensions') # Check buckets for existence for bucket in reversed(self.buckets): if bucket in buckets: if not force: message = 'Bucket "%s" already exists.' % bucket raise tableschema.exceptions.StorageError(message) self.delete(bucket) # Define buckets for bucket, descriptor, index_fields in zip(buckets, descriptors, indexes_fields): tableschema.validate(descriptor) table_name = self.__mapper.convert_bucket(bucket) columns, constraints, indexes, fallbacks, table_comment = self.__mapper \ .convert_descriptor(bucket, descriptor, index_fields, self.__autoincrement) Table(table_name, self.__metadata, *(columns + constraints + indexes), comment=table_comment) self.__descriptors[bucket] = descriptor self.__fallbacks[bucket] = fallbacks # Create tables, update metadata self.__metadata.create_all() def delete(self, bucket=None, ignore=False): """https://github.com/frictionlessdata/tableschema-sql-py#storage """ # Make lists buckets = bucket if isinstance(bucket, six.string_types): buckets = [bucket] elif bucket is None: buckets = reversed(self.buckets) # Iterate tables = [] for bucket in buckets: # Check existent if bucket not in self.buckets: if not ignore: message = 'Bucket "%s" doesn\'t exist.' % bucket raise tableschema.exceptions.StorageError(message) return # Remove from buckets if bucket in self.__descriptors: del self.__descriptors[bucket] # Add table to tables table = self.__get_table(bucket) tables.append(table) # Drop tables, update metadata self.__metadata.drop_all(tables=tables) self.__metadata.clear() self.__reflect() def describe(self, bucket, descriptor=None): """https://github.com/frictionlessdata/tableschema-sql-py#storage """ # Set descriptor if descriptor is not None: self.__descriptors[bucket] = descriptor # Get descriptor else: descriptor = self.__descriptors.get(bucket) if descriptor is None: table = self.__get_table(bucket) descriptor = self.__mapper.restore_descriptor( table.name, table.columns, table.constraints, self.__autoincrement) return descriptor def iter(self, bucket): """https://github.com/frictionlessdata/tableschema-sql-py#storage """ # Get table and fallbacks table = self.__get_table(bucket) schema = tableschema.Schema(self.describe(bucket)) # Open and close transaction with self.__connection.begin(): # Streaming could be not working for some backends: # http://docs.sqlalchemy.org/en/latest/core/connections.html select = table.select().execution_options(stream_results=True) result = select.execute() for row in result: row = self.__mapper.restore_row(row, schema=schema) yield row def read(self, bucket): """https://github.com/frictionlessdata/tableschema-sql-py#storage """ rows = list(self.iter(bucket)) return rows def write(self, bucket, rows, keyed=False, as_generator=False, update_keys=None): """https://github.com/frictionlessdata/tableschema-sql-py#storage """ # Check update keys if update_keys is not None and len(update_keys) == 0: message = 'Argument "update_keys" cannot be an empty list' raise tableschema.exceptions.StorageError(message) # Get table and description table = self.__get_table(bucket) schema = tableschema.Schema(self.describe(bucket)) fallbacks = self.__fallbacks.get(bucket, []) # Write rows to table convert_row = partial(self.__mapper.convert_row, schema=schema, fallbacks=fallbacks) writer = Writer(table, schema, update_keys, self.__autoincrement, convert_row) with self.__connection.begin(): gen = writer.write(rows, keyed=keyed) if as_generator: return gen collections.deque(gen, maxlen=0) # Private def __get_table(self, bucket): """Get table by bucket """ table_name = self.__mapper.convert_bucket(bucket) if self.__dbschema: table_name = '.'.join((self.__dbschema, table_name)) return self.__metadata.tables[table_name] def __reflect(self): """Reflect metadata """ def only(name, _): return self.__only(name) and self.__mapper.restore_bucket(name) is not None self.__metadata.reflect(only=only)
class OracleTopicStorage(TopicStorageInterface): engine = None insp = None metadata = MetaData() def __init__(self, client, storage_template): self.engine = client self.storage_template = storage_template self.insp = inspect(client) self.metadata = MetaData() self.lock = threading.RLock() log.info("topic oracle template initialized") def get_topic_table_by_name(self, table_name): self.lock.acquire() try: table = Table(table_name, self.metadata, extend_existing=False, autoload=True, autoload_with=self.engine) return table finally: self.lock.release() def build_oracle_where_expression(self, table, where): for key, value in where.items(): if key == "and" or key == "or": if isinstance(value, list): filters = [] for express in value: result = self.build_oracle_where_expression( table, express) filters.append(result) if key == "and": return and_(*filters) if key == "or": return or_(*filters) else: if isinstance(value, dict): for k, v in value.items(): if k == "=": return table.c[key.lower()] == v if k == "!=": return operator.ne(table.c[key.lower()], v) if k == "like": if v != "" or v != '' or v is not None: return table.c[key.lower()].like("%" + v + "%") if k == "in": if isinstance(table.c[key.lower()].type, CLOB): if isinstance(v, list): value_ = ",".join(v) else: value_ = v return text('json_exists(' + key.lower() + ', \'$?(@ in (\"' + value_ + '\"))\')') else: if isinstance(v, list): if len(v) != 0: return table.c[key.lower()].in_(v) elif isinstance(v, str): v_list = v.split(",") return table.c[key.lower()].in_(v_list) else: raise TypeError( "operator in, the value \"{0}\" is not list or str" .format(v)) if k == "not-in": if isinstance(table.c[key.lower()].type, CLOB): if isinstance(v, list): value_ = ",".join(v) else: value_ = v return text('json_exists(' + key.lower() + ', \'$?(@ not in (\"' + value_ + '\"))\')') else: if isinstance(v, list): if len(v) != 0: return table.c[key.lower()].notin_(v) elif isinstance(v, str): v_list = v.split(",") return table.c[key.lower()].notin_(v_list) else: raise TypeError( "operator not_in, the value \"{0}\" is not list or str" .format(v)) if k == ">": return table.c[key.lower()] > v if k == ">=": return table.c[key.lower()] >= v if k == "<": return table.c[key.lower()] < v if k == "<=": return table.c[key.lower()] <= v if k == "between": if (isinstance(v, tuple)) and len(v) == 2: return table.c[key.lower()].between( self._check_value_type(v[0]), self._check_value_type(v[1])) else: return table.c[key.lower()] == value def build_oracle_updates_expression(self, table, updates, stmt_type: str) -> dict: if stmt_type == "insert": new_updates = {} for key in table.c.keys(): if key == "id_": new_updates[key] = get_surrogate_key() elif key == "version_": new_updates[key] = 0 else: if isinstance(table.c[key].type, CLOB): if updates.get(key) is not None: new_updates[key] = dumps(updates.get(key)) else: new_updates[key] = None else: if updates.get(key) is not None: value_ = updates.get(key) if isinstance(value_, dict): for k, v in value_.items(): if k == "_sum": new_updates[key.lower()] = v elif k == "_count": new_updates[key.lower()] = v elif k == "_avg": new_updates[key.lower()] = v else: new_updates[key] = value_ else: default_value = self._get_table_column_default_value( table.name, key) if default_value is not None: value_ = default_value.strip("'").strip(" ") if value_.isdigit(): new_updates[key] = Decimal(value_) else: new_updates[key] = value_ else: new_updates[key] = None return new_updates elif stmt_type == "update": new_updates = {} for key in table.c.keys(): if key == "version_": new_updates[key] = updates.get(key) + 1 else: if isinstance(table.c[key].type, CLOB): if updates.get(key) is not None: new_updates[key] = dumps(updates.get(key)) else: if updates.get(key) is not None: value_ = updates.get(key) if isinstance(value_, dict): for k, v in value_.items(): if k == "_sum": new_updates[key.lower()] = text( f'{key.lower()} + {v}') elif k == "_count": new_updates[key.lower()] = text( f'{key.lower()} + {v}') elif k == "_avg": pass # todo else: new_updates[key] = value_ return new_updates def build_oracle_order(self, table, order_: list): result = [] if order_ is None: return result else: for item in order_: if isinstance(item, tuple): if item[1] == "desc": new_ = desc(table.c[item[0].lower()]) result.append(new_) if item[1] == "asc": new_ = asc(table.c[item[0].lower()]) result.append(new_) return result ''' topic data interface ''' def drop_(self, topic_name): return self.drop_topic_data_table(topic_name) def drop_topic_data_table(self, topic_name): try: table_name = build_collection_name(topic_name) table = self.get_topic_table_by_name(table_name) table.drop(self.engine) self.clear_metadata() except NoSuchTableError as err: log.info("NoSuchTableError: {0}".format(table_name)) def topic_data_delete_(self, where, topic_name): table_name = build_collection_name(topic_name) table = self.get_topic_table_by_name(table_name) if where is None: stmt = delete(table) else: stmt = delete(table).where( self.build_oracle_where_expression(table, where)) with self.engine.connect() as conn: conn.execute(stmt) def topic_data_insert_one(self, one, topic_name): table_name = build_collection_name(topic_name) table = self.get_topic_table_by_name(table_name) one_dict: dict = capital_to_lower(convert_to_dict(one)) value = self.build_oracle_updates_expression(table, one_dict, "insert") stmt = insert(table) with self.engine.connect() as conn: with conn.begin(): try: result = conn.execute(stmt, value) except IntegrityError as e: raise InsertConflictError("InsertConflict") return result.rowcount def topic_data_insert_(self, data, topic_name): table_name = build_collection_name(topic_name) table = self.get_topic_table_by_name(table_name) values = [] for instance in data: one_dict: dict = capital_to_lower(convert_to_dict(instance)) value = self.build_oracle_updates_expression( table, one_dict, "insert") values.append(value) stmt = insert(table) with self.engine.connect() as conn: result = conn.execute(stmt, values) def topic_data_update_one(self, id_: str, one: any, topic_name: str): table_name = build_collection_name(topic_name) table = self.get_topic_table_by_name(table_name) stmt = update(table).where(eq(table.c['id_'], id_)) one_dict = capital_to_lower(convert_to_dict(one)) value = self.build_oracle_updates_expression(table, one_dict, "update") stmt = stmt.values(value) with self.engine.begin() as conn: result = conn.execute(stmt) return result.rowcount def topic_data_update_one_with_version(self, id_: str, version_: int, one: any, topic_name: str): table_name = build_collection_name(topic_name) table = self.get_topic_table_by_name(table_name) stmt = update(table).where( and_(eq(table.c['id_'], id_), eq(table.c['version_'], version_))) one_dict = capital_to_lower(convert_to_dict(one)) value = self.build_oracle_updates_expression(table, one_dict, "update") stmt = stmt.values(value) with self.engine.begin() as conn: result = conn.execute(stmt) if result.rowcount == 0: raise OptimisticLockError("Optimistic lock error") def topic_data_update_(self, query_dict, instances: list, topic_name): table_name = build_collection_name(topic_name) table = self.get_topic_table_by_name(table_name) stmt = (update(table).where( self.build_oracle_where_expression(table, query_dict))) values = [] for instance in instances: one_dict = capital_to_lower(convert_to_dict(instance)) value = self.build_oracle_updates_expression( table, one_dict, "update") values.append(value) stmt = stmt.values(values) with self.engine.begin() as conn: result = conn.execute(stmt) def topic_data_find_by_id(self, id_: str, topic_name: str) -> any: return self.topic_data_find_one({"id_": id_}, topic_name) def topic_data_find_one(self, where, topic_name) -> any: table_name = build_collection_name(topic_name) table = self.get_topic_table_by_name(table_name) stmt = select(table).where( self.build_oracle_where_expression(table, where)) with self.engine.connect() as conn: cursor = conn.execute(stmt).cursor columns = [col[0] for col in cursor.description] cursor.rowfactory = lambda *args: dict(zip(columns, args)) row = cursor.fetchone() if row is None: return None else: result = {} for index, name in enumerate(columns): if isinstance(table.c[name.lower()].type, CLOB): if row[name] is not None: result[name] = json.loads(row[name]) else: result[name] = None else: result[name] = row[name] return self._convert_dict_key(result, topic_name) def topic_data_find_(self, where, topic_name): table_name = build_collection_name(topic_name) table = self.get_topic_table_by_name(table_name) stmt = select(table).where( self.build_oracle_where_expression(table, where)) with self.engine.connect() as conn: cursor = conn.execute(stmt).cursor columns = [col[0] for col in cursor.description] cursor.rowfactory = lambda *args: dict(zip(columns, args)) rows = cursor.fetchall() if rows is None: return None else: if isinstance(rows, list): results = [] for row in rows: result = {} for index, name in enumerate(columns): if isinstance(table.c[name.lower()].type, CLOB): if row[name] is not None: result[name] = json.loads(row[name]) else: result[name] = None else: result[name] = row[name] results.append(self._convert_dict_key(result, topic_name)) return results else: result = {} for index, name in enumerate(columns): if isinstance(table.c[name.lower()].type, CLOB): result[name] = dumps(rows[index]) else: result[name] = rows[index] return result def topic_data_find_with_aggregate(self, where, topic_name, aggregate): table_name = 'topic_' + topic_name table = self.get_topic_table_by_name(table_name) return_column_name = None for key, value in aggregate.items(): if value == "sum": stmt = select(text(f'sum({key.lower()}) as sum_{key.lower()}')) return_column_name = f'SUM_{key.upper()}' elif value == "count": stmt = select(f'count(*) as count') return_column_name = 'COUNT' elif value == "avg": stmt = select(text(f'avg({key.lower()}) as avg_{key.lower()}')) return_column_name = f'AVG_{key.upper()}' stmt = stmt.select_from(table) stmt = stmt.where(self.build_oracle_where_expression(table, where)) with self.engine.connect() as conn: cursor = conn.execute(stmt).cursor columns = [col[0] for col in cursor.description] cursor.rowfactory = lambda *args: dict(zip(columns, args)) res = cursor.fetchone() if res is None: return None else: return res[return_column_name] def topic_data_list_all(self, topic_name) -> list: table_name = build_collection_name(topic_name) table = self.get_topic_table_by_name(table_name) stmt = select(table) with self.engine.connect() as conn: cursor = conn.execute(stmt).cursor columns = [col[0] for col in cursor.description] cursor.rowfactory = lambda *args: dict(zip(columns, args)) rows = cursor.fetchall() if rows is None: return None else: results = [] for row in rows: result = {} for index, name in enumerate(columns): if isinstance(table.c[name.lower()].type, CLOB): if row[name] is not None: result[name] = json.loads(row[name]) else: result[name] = None else: result[name] = row[name] if self.storage_template.check_topic_type(name) == "raw": results.append(result['DATA_']) else: results.append(result) if self.storage_template.check_topic_type(name) == "raw": return results else: return self._convert_list_elements_key(results, topic_name) def topic_data_page_(self, where, sort, pageable, model, name) -> DataPage: table_name = build_collection_name(name) count = self.count_topic_data_table(table_name) table = self.get_topic_table_by_name(table_name) stmt = select(table).where( self.build_oracle_where_expression(table, where)) orders = self.build_oracle_order(table, sort) for order in orders: stmt = stmt.order_by(order) offset = pageable.pageSize * (pageable.pageNumber - 1) stmt = text( str(stmt.compile(compile_kwargs={"literal_binds": True})) + " OFFSET :offset ROWS FETCH NEXT :maxnumrows ROWS ONLY") result = [] with self.engine.connect() as conn: cursor = conn.execute(stmt, { "offset": offset, "maxnumrows": pageable.pageSize }).cursor columns = [col[0] for col in cursor.description] cursor.rowfactory = lambda *args: dict(zip(columns, args)) res = cursor.fetchall() if self.storage_template.check_topic_type(name) == "raw": for row in res: result.append(json.loads(row['DATA_'])) else: for row in res: if model is not None: result.append(parse_obj(model, row, table)) else: result.append(row) return build_data_pages(pageable, result, count) def clear_metadata(self): self.metadata.clear() ''' protected method, used by class own method ''' def _get_table_column_default_value(self, table_name, column_name): cached_columns = cacheman[COLUMNS_BY_TABLE_NAME].get(table_name) if cached_columns is not None: columns = cached_columns else: columns = self.insp.get_columns(table_name) cacheman[COLUMNS_BY_TABLE_NAME].set(table_name, columns) for column in columns: if column["name"] == column_name: return column["default"] def _convert_dict_key(self, dict_info, topic_name): if dict_info is None: return None new_dict = {} factors = self.storage_template.get_topic_factors(topic_name) for factor in factors: new_dict[factor['name']] = dict_info[factor['name'].upper()] new_dict['id_'] = dict_info['ID_'] if 'TENANT_ID_' in dict_info: new_dict['tenant_id_'] = dict_info.get("TENANT_ID_", 1) if "INSERT_TIME_" in dict_info: new_dict['insert_time_'] = dict_info.get( "INSERT_TIME_", datetime.datetime.now().replace(tzinfo=None)) if "UPDATE_TIME_" in dict_info: new_dict['update_time_'] = dict_info.get( "UPDATE_TIME_", datetime.datetime.now().replace(tzinfo=None)) if "VERSION_" in dict_info: new_dict['version_'] = dict_info.get("VERSION_", 0) if "AGGREGATE_ASSIST_" in dict_info: new_dict['aggregate_assist_'] = dict_info.get("AGGREGATE_ASSIST_") return new_dict def _convert_list_elements_key(self, list_info, topic_name): if list_info is None: return None new_list = [] factors = self.storage_template.get_topic_factors(topic_name) for item in list_info: new_dict = {} for factor in factors: new_dict[factor['name']] = item[factor['name'].upper()] new_dict['id_'] = item['ID_'] if 'TENANT_ID_' in item: new_dict['tenant_id_'] = item.get("TENANT_ID_", 1) if "INSERT_TIME_": new_dict['insert_time_'] = item.get( "INSERT_TIME_", datetime.datetime.now().replace(tzinfo=None)) if "UPDATE_TIME_": new_dict['update_time_'] = item.get( "UPDATE_TIME_", datetime.datetime.now().replace(tzinfo=None)) if "VERSION_" in item: new_dict['version_'] = item.get("VERSION_", 0) if "AGGREGATE_ASSIST_" in item: new_dict['aggregate_assist_'] = item.get( "AGGREGATE_ASSIST_") new_list.append(new_dict) return new_list @staticmethod def _check_value_type(value): if isinstance(value, datetime.datetime): return func.to_date(value, "yyyy-mm-dd hh24:mi:ss") elif isinstance(value, datetime.date): return func.to_date(value, "yyyy-mm-dd") else: return value def count_topic_data_table(self, table_name): stmt = 'SELECT count(%s) AS count FROM %s' % ('id_', table_name) with self.engine.connect() as conn: cursor = conn.execute(text(stmt)).cursor columns = [col[0] for col in cursor.description] cursor.rowfactory = lambda *args: dict(zip(columns, args)) result = cursor.fetchone() return result['COUNT']
class Storage(tableschema.Storage): """SQL storage Package implements [Tabular Storage](https://github.com/frictionlessdata/tableschema-py#storage) interface (see full documentation on the link): ![Storage](https://i.imgur.com/RQgrxqp.png) > Only additional API is documented # Arguments engine (object): `sqlalchemy` engine dbschema (str): name of database schema prefix (str): prefix for all buckets reflect_only (callable): a boolean predicate to filter the list of table names when reflecting autoincrement (str/dict): add autoincrement column at the beginning. - if a string it's an autoincrement column name - if a dict it's an autoincrements mapping with column names indexed by bucket names, for example, `{'bucket1'\\: 'id', 'bucket2'\\: 'other_id}` """ # Public def __init__(self, engine, dbschema=None, prefix='', reflect_only=None, autoincrement=None): # Set attributes self.__connection = engine.connect() self.__dbschema = dbschema self.__prefix = prefix self.__descriptors = {} self.__fallbacks = {} self.__autoincrement = autoincrement self.__only = reflect_only or (lambda _: True) self.__dialect = engine.dialect.name # Added regex support to sqlite if self.__dialect == 'sqlite': def regexp(expr, item): reg = re.compile(expr) return reg.search(item) is not None # It will fail silently if this function already exists self.__connection.connection.create_function('REGEXP', 2, regexp) # Create mapper self.__mapper = Mapper(prefix=prefix, dialect=self.__dialect) # Create metadata and reflect self.__metadata = MetaData(bind=self.__connection, schema=self.__dbschema) self.__reflect() def __repr__(self): # Template and format template = 'Storage <{engine}/{dbschema}>' text = template.format( engine=self.__connection.engine, dbschema=self.__dbschema) return text @property def buckets(self): buckets = [] for table in self.__metadata.sorted_tables: bucket = self.__mapper.restore_bucket(table.name) if bucket is not None: buckets.append(bucket) return buckets def create(self, bucket, descriptor, force=False, indexes_fields=None): """Create bucket # Arguments indexes_fields (str[]): list of tuples containing field names, or list of such lists """ # Make lists buckets = bucket if isinstance(bucket, six.string_types): buckets = [bucket] descriptors = descriptor if isinstance(descriptor, dict): descriptors = [descriptor] if indexes_fields is None or len(indexes_fields) == 0: indexes_fields = [()] * len(descriptors) elif type(indexes_fields[0][0]) not in {list, tuple}: indexes_fields = [indexes_fields] # Check dimensions if not (len(buckets) == len(descriptors) == len(indexes_fields)): raise tableschema.exceptions.StorageError('Wrong argument dimensions') # Check buckets for existence for bucket in reversed(self.buckets): if bucket in buckets: if not force: message = 'Bucket "%s" already exists.' % bucket raise tableschema.exceptions.StorageError(message) self.delete(bucket) # Define buckets for bucket, descriptor, index_fields in zip(buckets, descriptors, indexes_fields): tableschema.validate(descriptor) table_name = self.__mapper.convert_bucket(bucket) autoincrement = self.__get_autoincrement_for_bucket(bucket) columns, constraints, indexes, fallbacks, table_comment = self.__mapper \ .convert_descriptor(bucket, descriptor, index_fields, autoincrement) Table(table_name, self.__metadata, *(columns + constraints + indexes), comment=table_comment) self.__descriptors[bucket] = descriptor self.__fallbacks[bucket] = fallbacks # Create tables, update metadata try: self.__metadata.create_all() except sqlalchemy.exc.ProgrammingError as exception: if 'there is no unique constraint matching given keys' in str(exception): message = 'Foreign keys can only reference primary key or unique fields\n%s' six.raise_from( tableschema.exceptions.ValidationError(message % str(exception)), None) def delete(self, bucket=None, ignore=False): # Make lists buckets = bucket if isinstance(bucket, six.string_types): buckets = [bucket] elif bucket is None: buckets = reversed(self.buckets) # Iterate tables = [] for bucket in buckets: # Check existent if bucket not in self.buckets: if not ignore: message = 'Bucket "%s" doesn\'t exist.' % bucket raise tableschema.exceptions.StorageError(message) return # Remove from buckets if bucket in self.__descriptors: del self.__descriptors[bucket] # Add table to tables table = self.__get_table(bucket) tables.append(table) # Drop tables, update metadata self.__metadata.drop_all(tables=tables) self.__metadata.clear() self.__reflect() def describe(self, bucket, descriptor=None): # Set descriptor if descriptor is not None: self.__descriptors[bucket] = descriptor # Get descriptor else: descriptor = self.__descriptors.get(bucket) if descriptor is None: table = self.__get_table(bucket) autoincrement = self.__get_autoincrement_for_bucket(bucket) descriptor = self.__mapper.restore_descriptor( table.name, table.columns, table.constraints, autoincrement) return descriptor def iter(self, bucket): # Get table and fallbacks table = self.__get_table(bucket) schema = tableschema.Schema(self.describe(bucket)) autoincrement = self.__get_autoincrement_for_bucket(bucket) # Open and close transaction with self.__connection.begin(): # Streaming could be not working for some backends: # http://docs.sqlalchemy.org/en/latest/core/connections.html select = table.select().execution_options(stream_results=True) result = select.execute() for row in result: row = self.__mapper.restore_row( row, schema=schema, autoincrement=autoincrement) yield row def read(self, bucket): rows = list(self.iter(bucket)) return rows def write(self, bucket, rows, keyed=False, as_generator=False, update_keys=None, buffer_size=1000, use_bloom_filter=True): """Write to bucket # Arguments keyed (bool): accept keyed rows as_generator (bool): returns generator to provide writing control to the client update_keys (str[]): update instead of inserting if key values match existent rows buffer_size (int=1000): maximum number of rows to try and write to the db in one batch use_bloom_filter (bool=True): should we use a bloom filter to optimize DB update performance (in exchange for some setup time) """ # Check update keys if update_keys is not None and len(update_keys) == 0: message = 'Argument "update_keys" cannot be an empty list' raise tableschema.exceptions.StorageError(message) # Get table and description table = self.__get_table(bucket) schema = tableschema.Schema(self.describe(bucket)) fallbacks = self.__fallbacks.get(bucket, []) # Write rows to table convert_row = partial(self.__mapper.convert_row, schema=schema, fallbacks=fallbacks) autoincrement = self.__get_autoincrement_for_bucket(bucket) writer = Writer(table, schema, # Only PostgreSQL supports "returning" so we don't use autoincrement for all autoincrement=autoincrement if self.__dialect in ['postgresql'] else None, update_keys=update_keys, convert_row=convert_row, buffer_size=buffer_size, use_bloom_filter=use_bloom_filter) with self.__connection.begin(): gen = writer.write(rows, keyed=keyed) if as_generator: return gen collections.deque(gen, maxlen=0) # Private def __get_table(self, bucket): table_name = self.__mapper.convert_bucket(bucket) if self.__dbschema: table_name = '.'.join((self.__dbschema, table_name)) return self.__metadata.tables[table_name] def __reflect(self): def only(name, _): return self.__only(name) and self.__mapper.restore_bucket(name) is not None self.__metadata.reflect(only=only) def __get_autoincrement_for_bucket(self, bucket): if isinstance(self.__autoincrement, dict): return self.__autoincrement.get(bucket) return self.__autoincrement
class Feed(object): """Bookmark user class.""" def __init__(self, engine, url, title=''): logging.basicConfig(level=20) self.engine = engine self.url = url self.title = title self.md = MetaData(self.engine) self.sleep_sec = 1 @property def id(self): """Load feed id.""" if not self._load_id(): self._append() return self._load_id() def extract(self): """Extract bookmarked users from setted url.""" users = [] # TODO: Load id in __init__ if not self._load_id(): self._append() api_url = self._make_entry_api_url(self.url) result = self._request(api_url) if not result: return users for b in result.get('bookmarks', []): if "user" not in b: continue users.append(User(self.engine, b["user"])) time.sleep(self.sleep_sec) return users def _make_entry_api_url(self, url): """Create hatena bookmark entry api url.""" e_url = quote(url, safe='') return HATENA_ENTRY_URL.format(url=e_url) def _request(self, url): """Request api. Request argument url and return result data as dict. """ return requests.get(url).json() def _load_id(self): """Load feed id from database.""" t = Table('feed', self.md) c_url = column('url') c_id = column('id') s = select(columns=[c_id], from_obj=t).where(c_url==self.url) r = s.execute().fetchone() if r: return r['id'] return None def _append(self): """Add new feed url into database.""" logging.info('SAVE MY FEED') logging.info(self.url) self.md.clear() md = MetaData(self.engine) t = Table('feed', md, autoload=True) i = insert(t).values(url=self.url, title=self.title) i.execute() logging.info('----------------------')
class Storage(object): """SQL Tabular Storage. It's an implementation of `jsontablescema.Storage`. Args: engine (object): SQLAlchemy engine dbschema (str): database schema name prefix (str): prefix for all buckets reflect_only (callable): a boolean predicate to filter the list of table names when reflecting """ # Public def __init__(self, engine, dbschema=None, prefix='', reflect_only=None): # Set attributes self.__connection = engine.connect() self.__dbschema = dbschema self.__prefix = prefix self.__descriptors = {} if reflect_only is not None: self.__only = reflect_only else: self.__only = lambda _: True # Create metadata self.__metadata = MetaData( bind=self.__connection, schema=self.__dbschema) self.__reflect() def __repr__(self): # Template and format template = 'Storage <{engine}/{dbschema}>' text = template.format( engine=self.__connection.engine, dbschema=self.__dbschema) return text @property def buckets(self): # Collect buckets = [] for table in self.__metadata.sorted_tables: bucket = mappers.tablename_to_bucket(self.__prefix, table.name) if bucket is not None: buckets.append(bucket) return buckets def create(self, bucket, descriptor, force=False, indexes_fields=None): """Create table by schema. Parameters ---------- table: str/list Table name or list of table names. schema: dict/list JSONTableSchema schema or list of schemas. indexes_fields: list list of tuples containing field names, or list of such lists Raises ------ RuntimeError If table already exists. """ # Make lists buckets = bucket if isinstance(bucket, six.string_types): buckets = [bucket] descriptors = descriptor if isinstance(descriptor, dict): descriptors = [descriptor] if indexes_fields is None or len(indexes_fields) == 0: indexes_fields = [()] * len(descriptors) elif type(indexes_fields[0][0]) not in {list, tuple}: indexes_fields = [indexes_fields] assert len(indexes_fields) == len(descriptors) assert len(buckets) == len(descriptors) # Check buckets for existence for bucket in reversed(self.buckets): if bucket in buckets: if not force: message = 'Bucket "%s" already exists.' % bucket raise RuntimeError(message) self.delete(bucket) # Define buckets for bucket, descriptor, index_fields in zip(buckets, descriptors, indexes_fields): # Add to schemas self.__descriptors[bucket] = descriptor # Create table jsontableschema.validate(descriptor) tablename = mappers.bucket_to_tablename(self.__prefix, bucket) columns, constraints, indexes = mappers.descriptor_to_columns_and_constraints( self.__prefix, bucket, descriptor, index_fields) Table(tablename, self.__metadata, *(columns+constraints+indexes)) # Create tables, update metadata self.__metadata.create_all() def delete(self, bucket=None, ignore=False): # Make lists buckets = bucket if isinstance(bucket, six.string_types): buckets = [bucket] elif bucket is None: buckets = reversed(self.buckets) # Iterate over buckets tables = [] for bucket in buckets: # Check existent if bucket not in self.buckets: if not ignore: message = 'Bucket "%s" doesn\'t exist.' % bucket raise RuntimeError(message) # Remove from buckets if bucket in self.__descriptors: del self.__descriptors[bucket] # Add table to tables table = self.__get_table(bucket) tables.append(table) # Drop tables, update metadata self.__metadata.drop_all(tables=tables) self.__metadata.clear() self.__reflect() def describe(self, bucket, descriptor=None): # Set descriptor if descriptor is not None: self.__descriptors[bucket] = descriptor # Get descriptor else: descriptor = self.__descriptors.get(bucket) if descriptor is None: table = self.__get_table(bucket) descriptor = mappers.columns_and_constraints_to_descriptor( self.__prefix, table.name, table.columns, table.constraints) return descriptor def iter(self, bucket): # Get result table = self.__get_table(bucket) # Streaming could be not working for some backends: # http://docs.sqlalchemy.org/en/latest/core/connections.html select = table.select().execution_options(stream_results=True) result = select.execute() # Yield data for row in result: yield list(row) def read(self, bucket): # Get rows rows = list(self.iter(bucket)) return rows def write(self, bucket, rows): # Prepare BUFFER_SIZE = 1000 descriptor = self.describe(bucket) schema = jsontableschema.Schema(descriptor) table = self.__get_table(bucket) # Write with self.__connection.begin(): keyed_rows = [] for row in rows: keyed_row = {} for index, field in enumerate(schema.fields): value = row[index] try: value = field.cast_value(value) except InvalidObjectType: value = json.loads(value) keyed_row[field.name] = value keyed_rows.append(keyed_row) if len(keyed_rows) > BUFFER_SIZE: # Insert data table.insert().execute(keyed_rows) # Clean memory keyed_rows = [] if len(keyed_rows) > 0: # Insert data table.insert().execute(keyed_rows) # Private def __get_table(self, bucket): """Return SQLAlchemy table for the given bucket. """ # Prepare name tablename = mappers.bucket_to_tablename(self.__prefix, bucket) if self.__dbschema: tablename = '.'.join(self.__dbschema, tablename) return self.__metadata.tables[tablename] def __reflect(self): def only(name, _): ret = ( self.__only(name) and mappers.tablename_to_bucket(self.__prefix, name) is not None ) return ret self.__metadata.reflect(only=only)
class ApdbSchema(object): """Class for management of APDB schema. Attributes ---------- objects : `sqlalchemy.Table` DiaObject table instance objects_nightly : `sqlalchemy.Table` DiaObjectNightly table instance, may be None objects_last : `sqlalchemy.Table` DiaObjectLast table instance, may be None sources : `sqlalchemy.Table` DiaSource table instance forcedSources : `sqlalchemy.Table` DiaForcedSource table instance visits : `sqlalchemy.Table` ApdbProtoVisits table instance Parameters ---------- engine : `sqlalchemy.engine.Engine` SQLAlchemy engine instance dia_object_index : `str` Indexing mode for DiaObject table, see `ApdbConfig.dia_object_index` for details. dia_object_nightly : `bool` If `True` then create per-night DiaObject table as well. schema_file : `str` Name of the YAML schema file. extra_schema_file : `str`, optional Name of the YAML schema file with extra column definitions. column_map : `str`, optional Name of the YAML file with column mappings. afw_schemas : `dict`, optional Dictionary with table name for a key and `afw.table.Schema` for a value. Columns in schema will be added to standard APDB schema (only if standard schema does not have matching column). prefix : `str`, optional Prefix to add to all scheam elements. """ # map afw type names into cat type names _afw_type_map = {"I": "INT", "L": "BIGINT", "F": "FLOAT", "D": "DOUBLE", "Angle": "DOUBLE", "String": "CHAR", "Flag": "BOOL"} _afw_type_map_reverse = {"INT": "I", "BIGINT": "L", "FLOAT": "F", "DOUBLE": "D", "DATETIME": "L", "CHAR": "String", "BOOL": "Flag"} def __init__(self, engine, dia_object_index, dia_object_nightly, schema_file, extra_schema_file=None, column_map=None, afw_schemas=None, prefix=""): self._engine = engine self._dia_object_index = dia_object_index self._dia_object_nightly = dia_object_nightly self._prefix = prefix self._metadata = MetaData(self._engine) self.objects = None self.objects_nightly = None self.objects_last = None self.sources = None self.forcedSources = None self.visits = None if column_map: _LOG.debug("Reading column map file %s", column_map) with open(column_map) as yaml_stream: # maps cat column name to afw column name self._column_map = yaml.load(yaml_stream, Loader=yaml.SafeLoader) _LOG.debug("column map: %s", self._column_map) else: _LOG.debug("No column map file is given, initialize to empty") self._column_map = {} self._column_map_reverse = {} for table, cmap in self._column_map.items(): # maps afw column name to cat column name self._column_map_reverse[table] = {v: k for k, v in cmap.items()} _LOG.debug("reverse column map: %s", self._column_map_reverse) # build complete table schema self._schemas = self._buildSchemas(schema_file, extra_schema_file, afw_schemas) # map cat column types to alchemy self._type_map = dict(DOUBLE=self._getDoubleType(), FLOAT=sqlalchemy.types.Float, DATETIME=sqlalchemy.types.TIMESTAMP, BIGINT=sqlalchemy.types.BigInteger, INTEGER=sqlalchemy.types.Integer, INT=sqlalchemy.types.Integer, TINYINT=sqlalchemy.types.Integer, BLOB=sqlalchemy.types.LargeBinary, CHAR=sqlalchemy.types.CHAR, BOOL=sqlalchemy.types.Boolean) # generate schema for all tables, must be called last self._makeTables() def _makeTables(self, mysql_engine='InnoDB', oracle_tablespace=None, oracle_iot=False): """Generate schema for all tables. Parameters ---------- mysql_engine : `str`, optional MySQL engine type to use for new tables. oracle_tablespace : `str`, optional Name of Oracle tablespace, only useful with oracle oracle_iot : `bool`, optional Make Index-organized DiaObjectLast table. """ info = dict(oracle_tablespace=oracle_tablespace) if self._dia_object_index == 'pix_id_iov': # Special PK with HTM column in first position constraints = self._tableIndices('DiaObjectIndexHtmFirst', info) else: constraints = self._tableIndices('DiaObject', info) table = Table(self._prefix+'DiaObject', self._metadata, *(self._tableColumns('DiaObject') + constraints), mysql_engine=mysql_engine, info=info) self.objects = table if self._dia_object_nightly: # Same as DiaObject but no index table = Table(self._prefix+'DiaObjectNightly', self._metadata, *self._tableColumns('DiaObject'), mysql_engine=mysql_engine, info=info) self.objects_nightly = table if self._dia_object_index == 'last_object_table': # Same as DiaObject but with special index info2 = info.copy() info2.update(oracle_iot=oracle_iot) table = Table(self._prefix+'DiaObjectLast', self._metadata, *(self._tableColumns('DiaObjectLast') + self._tableIndices('DiaObjectLast', info)), mysql_engine=mysql_engine, info=info2) self.objects_last = table # for all other tables use index definitions in schema for table_name in ('DiaSource', 'SSObject', 'DiaForcedSource', 'DiaObject_To_Object_Match'): table = Table(self._prefix+table_name, self._metadata, *(self._tableColumns(table_name) + self._tableIndices(table_name, info)), mysql_engine=mysql_engine, info=info) if table_name == 'DiaSource': self.sources = table elif table_name == 'DiaForcedSource': self.forcedSources = table # special table to track visits, only used by prototype table = Table(self._prefix+'ApdbProtoVisits', self._metadata, Column('visitId', sqlalchemy.types.BigInteger, nullable=False), Column('visitTime', sqlalchemy.types.TIMESTAMP, nullable=False), PrimaryKeyConstraint('visitId', name=self._prefix+'PK_ApdbProtoVisits'), Index(self._prefix+'IDX_ApdbProtoVisits_vTime', 'visitTime', info=info), mysql_engine=mysql_engine, info=info) self.visits = table def makeSchema(self, drop=False, mysql_engine='InnoDB', oracle_tablespace=None, oracle_iot=False): """Create or re-create all tables. Parameters ---------- drop : `bool`, optional If True then drop tables before creating new ones. mysql_engine : `str`, optional MySQL engine type to use for new tables. oracle_tablespace : `str`, optional Name of Oracle tablespace, only useful with oracle oracle_iot : `bool`, optional Make Index-organized DiaObjectLast table. """ # re-make table schema for all needed tables with possibly different options _LOG.debug("clear metadata") self._metadata.clear() _LOG.debug("re-do schema mysql_engine=%r oracle_tablespace=%r", mysql_engine, oracle_tablespace) self._makeTables(mysql_engine=mysql_engine, oracle_tablespace=oracle_tablespace, oracle_iot=oracle_iot) # create all tables (optionally drop first) if drop: _LOG.info('dropping all tables') self._metadata.drop_all() _LOG.info('creating all tables') self._metadata.create_all() def getAfwSchema(self, table_name, columns=None): """Return afw schema for given table. Parameters ---------- table_name : `str` One of known APDB table names. columns : `list` of `str`, optional Include only given table columns in schema, by default all columns are included. Returns ------- schema : `lsst.afw.table.Schema` column_map : `dict` Mapping of the table/result column names into schema key. """ table = self._schemas[table_name] col_map = self._column_map.get(table_name, {}) # make a schema col2afw = {} schema = afwTable.SourceTable.makeMinimalSchema() for column in table.columns: if columns and column.name not in columns: continue afw_col = col_map.get(column.name, column.name) if afw_col in schema.getNames(): # Continue if the column is already in the minimal schema. key = schema.find(afw_col).getKey() elif column.type in ("DOUBLE", "FLOAT") and column.unit == "deg": # # NOTE: degree to radian conversion is not supported (yet) # # angles in afw are radians and have special "Angle" type key = schema.addField(afw_col, type="Angle", doc=column.description or "", units="rad") elif column.type == "BLOB": # No BLOB support for now key = None else: units = column.unit or "" # some units in schema are not recognized by afw but we do not care if self._afw_type_map_reverse[column.type] == 'String': key = schema.addField(afw_col, type=self._afw_type_map_reverse[column.type], doc=column.description or "", units=units, parse_strict="silent", size=10) elif units == "deg": key = schema.addField(afw_col, type='Angle', doc=column.description or "", parse_strict="silent") else: key = schema.addField(afw_col, type=self._afw_type_map_reverse[column.type], doc=column.description or "", units=units, parse_strict="silent") col2afw[column.name] = key return schema, col2afw def getAfwColumns(self, table_name): """Returns mapping of afw column names to Column definitions. Parameters ---------- table_name : `str` One of known APDB table names. Returns ------- column_map : `dict` Mapping of afw column names to `ColumnDef` instances. """ table = self._schemas[table_name] col_map = self._column_map.get(table_name, {}) cmap = {} for column in table.columns: afw_name = col_map.get(column.name, column.name) cmap[afw_name] = column return cmap def getColumnMap(self, table_name): """Returns mapping of column names to Column definitions. Parameters ---------- table_name : `str` One of known APDB table names. Returns ------- column_map : `dict` Mapping of column names to `ColumnDef` instances. """ table = self._schemas[table_name] cmap = {column.name: column for column in table.columns} return cmap def _buildSchemas(self, schema_file, extra_schema_file=None, afw_schemas=None): """Create schema definitions for all tables. Reads YAML schemas and builds dictionary containing `TableDef` instances for each table. Parameters ---------- schema_file : `str` Name of YAML file with standard cat schema. extra_schema_file : `str`, optional Name of YAML file with extra table information or `None`. afw_schemas : `dict`, optional Dictionary with table name for a key and `afw.table.Schema` for a value. Columns in schema will be added to standard APDB schema (only if standard schema does not have matching column). Returns ------- schemas : `dict` Mapping of table names to `TableDef` instances. """ _LOG.debug("Reading schema file %s", schema_file) with open(schema_file) as yaml_stream: tables = list(yaml.load_all(yaml_stream, Loader=yaml.SafeLoader)) # index it by table name _LOG.debug("Read %d tables from schema", len(tables)) if extra_schema_file: _LOG.debug("Reading extra schema file %s", extra_schema_file) with open(extra_schema_file) as yaml_stream: extras = list(yaml.load_all(yaml_stream, Loader=yaml.SafeLoader)) # index it by table name schemas_extra = {table['table']: table for table in extras} else: schemas_extra = {} # merge extra schema into a regular schema, for now only columns are merged for table in tables: table_name = table['table'] if table_name in schemas_extra: columns = table['columns'] extra_columns = schemas_extra[table_name].get('columns', []) extra_columns = {col['name']: col for col in extra_columns} _LOG.debug("Extra columns for table %s: %s", table_name, extra_columns.keys()) columns = [] for col in table['columns']: if col['name'] in extra_columns: columns.append(extra_columns.pop(col['name'])) else: columns.append(col) # add all remaining extra columns table['columns'] = columns + list(extra_columns.values()) if 'indices' in schemas_extra[table_name]: raise RuntimeError("Extra table definition contains indices, " "merging is not implemented") del schemas_extra[table_name] # Pure "extra" table definitions may contain indices tables += schemas_extra.values() # convert all dicts into named tuples schemas = {} for table in tables: columns = table.get('columns', []) table_name = table['table'] afw_schema = afw_schemas and afw_schemas.get(table_name) if afw_schema: # use afw schema to create extra columns column_names = {col['name'] for col in columns} column_names_lower = {col.lower() for col in column_names} for _, field in afw_schema: column = self._field2dict(field, table_name) if column['name'] not in column_names: # check that there is no column name that only differs in case if column['name'].lower() in column_names_lower: raise ValueError("afw.table column name case does not match schema column name") columns.append(column) table_columns = [] for col in columns: # For prototype set default to 0 even if columns don't specify it if "default" not in col: default = None if col['type'] not in ("BLOB", "DATETIME"): default = 0 else: default = col["default"] column = ColumnDef(name=col['name'], type=col['type'], nullable=col.get("nullable"), default=default, description=col.get("description"), unit=col.get("unit"), ucd=col.get("ucd")) table_columns.append(column) table_indices = [] for idx in table.get('indices', []): index = IndexDef(name=idx.get('name'), type=idx.get('type'), columns=idx.get('columns')) table_indices.append(index) schemas[table_name] = TableDef(name=table_name, description=table.get('description'), columns=table_columns, indices=table_indices) return schemas def _tableColumns(self, table_name): """Return set of columns in a table Parameters ---------- table_name : `str` Name of the table. Returns ------- column_defs : `list` List of `Column` objects. """ # get the list of columns in primary key, they are treated somewhat # specially below table_schema = self._schemas[table_name] pkey_columns = set() for index in table_schema.indices: if index.type == 'PRIMARY': pkey_columns = set(index.columns) break # convert all column dicts into alchemy Columns column_defs = [] for column in table_schema.columns: kwargs = dict(nullable=column.nullable) if column.default is not None: kwargs.update(server_default=str(column.default)) if column.name in pkey_columns: kwargs.update(autoincrement=False) ctype = self._type_map[column.type] column_defs.append(Column(column.name, ctype, **kwargs)) return column_defs def _field2dict(self, field, table_name): """Convert afw schema field definition into a dict format. Parameters ---------- field : `lsst.afw.table.Field` Field in afw table schema. table_name : `str` Name of the table. Returns ------- field_dict : `dict` Field attributes for SQL schema: - ``name`` : field name (`str`) - ``type`` : type name in SQL, e.g. "INT", "FLOAT" (`str`) - ``nullable`` : `True` if column can be ``NULL`` (`bool`) """ column = field.getName() column = self._column_map_reverse[table_name].get(column, column) ctype = self._afw_type_map[field.getTypeString()] return dict(name=column, type=ctype, nullable=True) def _tableIndices(self, table_name, info): """Return set of constraints/indices in a table Parameters ---------- table_name : `str` Name of the table. info : `dict` Additional options passed to SQLAlchemy index constructor. Returns ------- index_defs : `list` List of SQLAlchemy index/constraint objects. """ table_schema = self._schemas[table_name] # convert all index dicts into alchemy Columns index_defs = [] for index in table_schema.indices: if index.type == "INDEX": index_defs.append(Index(self._prefix+index.name, *index.columns, info=info)) else: kwargs = {} if index.name: kwargs['name'] = self._prefix+index.name if index.type == "PRIMARY": index_defs.append(PrimaryKeyConstraint(*index.columns, **kwargs)) elif index.type == "UNIQUE": index_defs.append(UniqueConstraint(*index.columns, **kwargs)) return index_defs def _getDoubleType(self): """DOUBLE type is database-specific, select one based on dialect. Returns ------- type_object : `object` Database-specific type definition. """ if self._engine.name == 'mysql': from sqlalchemy.dialects.mysql import DOUBLE return DOUBLE(asdecimal=False) elif self._engine.name == 'postgresql': from sqlalchemy.dialects.postgresql import DOUBLE_PRECISION return DOUBLE_PRECISION elif self._engine.name == 'oracle': from sqlalchemy.dialects.oracle import DOUBLE_PRECISION return DOUBLE_PRECISION elif self._engine.name == 'sqlite': # all floats in sqlite are 8-byte from sqlalchemy.dialects.sqlite import REAL return REAL else: raise TypeError('cannot determine DOUBLE type, unexpected dialect: ' + self._engine.name)
from sqlalchemy.engine.url import URL from sqlalchemy import bindparam import pymongo as pym from bson.objectid import ObjectId client = pym.MongoClient() dbm = client.names htmltab = dbm.html infotab = dbm.info thmcoll = dbm.themes assocoll = dbm.assoc dbsql = URL(drivername="mysql+pymysql", database='babynames', query={'read_default_file' : '/Users/azirm/.my.cnf', 'read_default_group' : 'python', 'use_unicode': 1, 'charset': 'utf8'}) eng = create_engine(name_or_url=dbsql) meta = MetaData(bind=eng) meta.clear() meta.reflect() boynames = meta.tables['boynames'] girlnames = meta.tables['girlnames'] ntables = [boynames, girlnames] sels = [sql.select([ntable.c.name, ntable.c.htmlid]).where(ntable.c.htmlid!=None) for ntable in ntables] def mktable(tabledef): meta.clear() meta.reflect() if tabledef.name not in meta.tables: tabi = tabledef evalstr = 1 meta.create_all()
class DB(Base): # Constants: connection level NONE = 0 # No connection; just set self.url CONNECT = 1 # Connect; no transaction TXN = 2 # Everything in a transaction level = TXN def _engineInfo(self, url=None): if url is None: url = self.url return url def _setup(self, url): self._connect(url) # make sure there are no tables lying around meta = MetaData(self.engine) meta.reflect() meta.drop_all() def _teardown(self): self._disconnect() def _connect(self, url): self.url = url # TODO: seems like 0.5.x branch does not work with engine.dispose and staticpool #self.engine = create_engine(url, echo=True, poolclass=StaticPool) self.engine = create_engine(url, echo=True) # silence the logger added by SA, nose adds its own! logging.getLogger('sqlalchemy').handlers=[] self.meta = MetaData(bind=self.engine) if self.level < self.CONNECT: return #self.session = create_session(bind=self.engine) if self.level < self.TXN: return #self.txn = self.session.begin() def _disconnect(self): if hasattr(self, 'txn'): self.txn.rollback() if hasattr(self, 'session'): self.session.close() #if hasattr(self,'conn'): # self.conn.close() self.engine.dispose() def _supported(self, url): db = url.split(':',1)[0] func = getattr(self, self._TestCase__testMethodName) if hasattr(func, 'supported'): return db in func.supported if hasattr(func, 'not_supported'): return not (db in func.not_supported) # Neither list assigned; assume all are supported return True def _not_supported(self, url): return not self._supported(url) def _select_row(self): """Select rows, used in multiple tests""" return self.table.select().execution_options( autocommit=True).execute().fetchone() def refresh_table(self, name=None): """Reload the table from the database Assumes we're working with only a single table, self.table, and metadata self.meta Working w/ multiple tables is not possible, as tables can only be reloaded with meta.clear() """ if name is None: name = self.table.name self.meta.clear() self.table = Table(name, self.meta, autoload=True) def compare_columns_equal(self, columns1, columns2, ignore=None): """Loop through all columns and compare them""" def key(column): return column.name for c1, c2 in zip(sorted(columns1, key=key), sorted(columns2, key=key)): diffs = ColumnDelta(c1, c2).diffs if ignore: for key in ignore: diffs.pop(key, None) if diffs: self.fail("Comparing %s to %s failed: %s" % (columns1, columns2, diffs))
class Storage(base.Storage): """SQL Tabular Storage. Parameters ---------- engine: object SQLAlchemy engine. dbschema: str Database schema name. prefix: str Prefix for all tables. """ # Public def __init__(self, engine, dbschema=None, prefix=''): # Set attributes self.__engine = engine self.__dbschema = dbschema self.__prefix = prefix self.__schemas = {} # Create metadata self.__metadata = MetaData( bind=self.__engine, schema=self.__dbschema, reflect=True) def __repr__(self): # Template and format template = 'Storage <{engine}/{dbschema}>' text = template.format( engine=self.__engine, dbschema=self.__dbschema) return text @property def tables(self): """Return list of storage's table names. """ # Collect tables = [] for dbtable in self.__metadata.sorted_tables: table = dbtable.name table = mappers.restore_table(self.__prefix, table) if table is not None: tables.append(table) return tables def check(self, table): """Return if table exists. """ # Check existence existence = table in self.tables return existence def create(self, table, schema): """Create table by schema. Parameters ---------- table: str/list Table name or list of table names. schema: dict/list JSONTableSchema schema or list of schemas. Raises ------ RuntimeError If table already exists. """ # Make lists tables = table if isinstance(table, six.string_types): tables = [table] schemas = schema if isinstance(schema, dict): schemas = [schema] # Check tables for existence for table in tables: if self.check(table): message = 'Table "%s" already exists.' % table raise RuntimeError(message) # Define tables for table, schema in zip(tables, schemas): # Add to schemas self.__schemas[table] = schema # Crate sa table table = mappers.convert_table(self.__prefix, table) jsontableschema.validate(schema) columns, constraints = mappers.convert_schema( self.__prefix, table, schema) Table(table, self.__metadata, *(columns+constraints)) # Create tables, update metadata self.__metadata.create_all() # Metadata reflect is auto def delete(self, table): """Delete table. Parameters ---------- table: str/list Table name or list of table names. Raises ------ RuntimeError If table doesn't exist. """ # Make lists tables = table if isinstance(table, six.string_types): tables = [table] # Iterate over tables dbtables = [] for table in tables: # Check existent if not self.check(table): message = 'Table "%s" doesn\'t exist.' % self raise RuntimeError(message) # Remove from schemas if table in self.__schemas: del self.__schemas[table] # Add table to dbtables dbtable = self.__get_dbtable(table) dbtables.append(dbtable) # Drop tables, update metadata self.__metadata.drop_all(tables=dbtables) self.__metadata.clear() self.__metadata.reflect() def describe(self, table): """Return table's JSONTableSchema schema. Parameters ---------- table: str Table name. Returns ------- dict JSONTableSchema schema. """ # Get schema if table in self.__schemas: schema = self.__schemas[table] else: dbtable = self.__get_dbtable(table) table = mappers.convert_table(self.__prefix, table) schema = mappers.restore_schema( self.__prefix, table, dbtable.columns, dbtable.constraints) return schema def read(self, table): """Read data from table. Parameters ---------- table: str Table name. Returns ------- generator Data tuples generator. """ # Get result dbtable = self.__get_dbtable(table) result = dbtable.select().execute() # Yield data for row in result: yield row def write(self, table, data): """Write data to table. Parameters ---------- table: str Table name. data: list List of data tuples. """ # Process data schema = self.describe(table) model = SchemaModel(schema) cdata = [] for row in data: rdata = {} for index, field in enumerate(model.fields): value = row[index] try: value = model.cast(field['name'], value) except InvalidObjectType as exception: value = json.loads(value) rdata[field['name']] = value cdata.append(rdata) # Insert data dbtable = self.__get_dbtable(table) dbtable.insert().execute(cdata) # Private def __get_dbtable(self, table): """Return dbtable instance from metadata. """ # Prepare dict key key = mappers.convert_table(self.__prefix, table) if self.__dbschema: # TODO: Start to test dbschema parameter key = '.'.join(self.__dbschema, key) # pragma: no cover return self.__metadata.tables[key]
class Database(): '''A wrapper around a bsddb database, acting as a dictionary. Can accept all Python datatypes as keys and values. All bencoding types are just dictionaries, lists, integers and strings, so this is enough!''' def __init__(self, dbname, flag='c'): '''Read the database given by dbname. ''' dbPath = dbname engine = create_engine("sqlite:///%s" % dbPath, echo=True) self.data = MetaData(engine) print self.data def __contains__(self, key): '''Return true if database contains a key''' key = dumps(key) # has_key returns 1 or 0 (but shouldn't this just by key in data?) boolean = self.data.has_key(key) return bool(boolean) def __getitem__(self, key): '''Return the value held by the key''' key = dumps(key) value = self.data[key] return loads(value) has_key = __contains__ get = __getitem__ def __setitem__(self, key, value): '''Set the value of key to the value given. ''' key = dumps(key) value = dumps(value) self.data[key] = value def __repr__(self): '''represents the database''' keys = self.data.keys() items = [(loads(key), loads(self.data[key])) for key in keys] return str(dict(items)) def clear(self): '''Remove all data in the database. ''' self.data.clear() def items(self): '''Return a list of tuples of the keys and values ''' keys = self.data.keys() items = [(loads(key), loads(self.data[key])) for key in keys] return items def values(self): '''Returns a list of values ''' values = [loads(value) for value in self.data.values()] return values def pop(self, key): '''Return the value held by key, or default if it isn't in the database ''' key = dumps(key) value = self.data[key] del self.data[key] return loads(value) def setdefault(self, key, default): '''Return the value held by key, or default if isn't in the database''' key = dumps(key) try: value = self.data[key] except KeyError: return default return loads(value) def __del__(self): '''Syncs database''' self.data.sync()
class Analytics(object): """ Analytics App """ def __init__(self, app=None, db=None, blueprints=None): """ Create a new instance of the analyzer. :param app: The Flask App instance :param db: A Sqlalchemy instance, e.g. db = Sqlalchemy(app) :param blueprints: A whitelist for blueprints that should be tracked. If this attribute is set only views whose blueprint name matches any of the names inside the blueprints list will be tracked. To track all requests set this attribute to None. Example whitelist could be: blueprints = ['main', 'admin']. """ self.app = None self.db = None self.table = None self.metadata = None self.table_name = 'analytics_data' self.engine = None self.blueprints = None if app and db: self.init_app(app, db, blueprints) def init_app(self, app, db, blueprints=None): """ Initializes an existing instance. Useful when using the application factory pattern. :param app: The Flask App instance :param db: A Sqlalchemy instance, e.g. db = Sqlalchemy(app) :param blueprints: A whitelist for blueprints that should be tracked. If this attribute is set only views whose blueprint name matches any of the names inside the blueprints list will be tracked. To track all requests set this attribute to None. Example whitelist could be: blueprints = ['main', 'admin']. """ if not app or not db: raise ValueError( "Flask App instance and sqlalchemy db object are required") self.app = app self.db = db self.blueprints = blueprints # check if table exists on first startup or create it # NOTE: The table cannot be altered yet. # So when you change the table layout changes it is necessary to write a custom migration with self.app.app_context(): self.engine = db.engine self.metadata = MetaData(db.engine) if not self.engine.dialect.has_table(self.engine, self.table_name): self._create_table() else: self.metadata.reflect(bind=self.engine) self.table = self.metadata.tables[self.table_name] # register event hooks app.before_request(self.before_request) app.after_request(self.after_request) def _drop_table(self): """ Drops the database :return: """ self.table.drop(checkfirst=True) def _create_table(self): self.table = Table(self.table_name, self.metadata, Column('id', Integer, primary_key=True), Column('url', String(128)), Column('user_agent', String(256)), Column('view_args', String(128)), Column('status_code', Integer), Column('path', String(64)), Column('latency', Float), Column('timestamp', DateTime), Column('request', String(64)), Column('url_args', String(64)), Column('ua_browser', String(16)), Column('ua_language', String(16)), Column('ua_platform', String(16)), Column('ua_version', String(16)), Column('referer', String(64)), Column('uuid', String, default=0)) self.table.create(bind=self.engine) def reinitialize_db(self): self._drop_table() self.metadata.clear() self._create_table() def store_record(self, record: AnalyticsRecord): """ Store a record to the database. """ with self.engine.begin() as conn: stmt = self.table.insert().values( url=str(record.url)[:128], uuid=record.uid, ua_browser=str(getattr(record.user_agent, 'browser', '-'))[:16], ua_language=str(getattr(record.user_agent, 'language', '-'))[:16], ua_platform=str(getattr(record.user_agent, 'platform', '-'))[:16], ua_version=str(getattr(record.user_agent, 'version', '-'))[:16], user_agent=str(record.user_agent), view_args=json.dumps(record.view_args)[:64], status_code=record.status_code, path=str(record.path)[:64], latency=record.latency, request=str(record.request)[:64], timestamp=record.timestamp, referer=str(record.referer)[:64]) # catch errors in order to prevent an app crash and pass the message to the app´s logger try: conn.execute(stmt) except Exception as e: self.app.logger.error(e) @property def query(self): """ Query the analytics database table :return: A Sqlalchemy.BaseQuery instance that can be used just a like a normal sqlalchemy query """ return self.db.session.query(self.table) def before_request(self): """ Only used to store the time when a request is first issued to be able to measure latency""" g.start_time = dt.datetime.utcnow() # create a uuid to identify a client during it´s session # this is way faster than hashing the user_agent if 'UUID' not in session.keys(): session['UUID'] = str(uuid.uuid4()) def after_request(self, response): """ Store all information about the request :param response: pass the response object without touching it :return: original Flask response """ ctx = _request_ctx_stack.top if self.blueprints: if ctx.request.blueprint not in self.blueprints: return response t_0 = getattr(g, 'start_time', dt.datetime.utcnow()) record = AnalyticsRecord( uid=session.get('UUID', default=0), url=ctx.request.url, user_agent=ctx.request.user_agent, view_args=ctx.request.view_args, status_code=response.status_code, path=ctx.request.path, latency=(dt.datetime.utcnow() - t_0).microseconds / 100000, timestamp=t_0, content_length=response.content_length, request= f"{ctx.request.method}{ctx.request.url}{ctx.request.environ.get('SERVER_PROTOCOL')}", url_args=dict([(k, ctx.request.args[k]) for k in ctx.request.args]), referer=request.headers.get("Referer")) self.store_record(record) return response # SOME USEFUL PREDEFINED QUERIES def query_between(self, from_=None, until=None): """ Query the analytics table. By using db.session.query(...) it is possible to use the built in pagination! :param from_: datetime.datetime object specifying the earliest date that should be included :param until: datetime.datetime object specifying the latest date that should be included :return: BaseQuery """ if from_ is None: from_ = dt.datetime.utcnow() if until is None: until = dt.datetime(1970, 1, 1) return self.query.filter(self.table.c.timestamp.between(until, from_)) \ .order_by(self.table.c.timestamp.desc()) def total_unique_visits(self): return self.db.session.query(func.count(distinct( self.table.c.uuid))).scalar() def total_unique_visits_during(self, from_=None, until=None): if from_ is None: from_ = dt.datetime.utcnow() if until is None: until = dt.datetime(1970, 1, 1) return self.db.session.query(func.count(distinct(self.table.c.uuid))) \ .filter(self.table.c.timestamp.between(until, from_)) \ .scalar() def top_page(self): from sqlalchemy import desc return self.db.session.query(func.count(self.table.c.path) .label('count'), self.table.c.path) \ .group_by(self.table.c.path) \ .order_by(desc('count')).first()