def _drop_db(config): connection = get_db_conn(config, with_db=False) connection.autocommit = True try: with connection.cursor() as cursor: cursor.execute(f"DROP DATABASE {config.db_config.dbname};") finally: connection.close()
def test_archive_table(): api = PgGraphApi(config_path='config.test.ini') api.archive_table('publisher', [1, 2]) conn = get_db_conn(api.config) with conn.cursor() as cursor: cursor.execute('SELECT author_id, book_id FROM author_book;') ab_rows = [dict(row) for row in cursor.fetchall()] cursor.execute('SELECT author_id, book_id FROM author_book_archive;') ab_archive_rows = [dict(row) for row in cursor.fetchall()] cursor.execute('SELECT id FROM book;') book_rows = [dict(row) for row in cursor.fetchall()] cursor.execute('SELECT id FROM book_archive;') book_archive_rows = [dict(row) for row in cursor.fetchall()] cursor.execute('SELECT id FROM publisher;') pub_rows = [dict(row) for row in cursor.fetchall()] cursor.execute('SELECT id FROM publisher_archive;') pub_archive_rows = [dict(row) for row in cursor.fetchall()] conn.close() assert ab_rows == [{ 'author_id': 7, 'book_id': 4 }, { 'author_id': 7, 'book_id': 5 }] assert ab_archive_rows == [{ 'author_id': 1, 'book_id': 1 }, { 'author_id': 2, 'book_id': 1 }, { 'author_id': 3, 'book_id': 2 }, { 'author_id': 4, 'book_id': 2 }, { 'author_id': 5, 'book_id': 3 }, { 'author_id': 6, 'book_id': 3 }] assert book_rows == [{'id': 4}, {'id': 5}] assert book_archive_rows == [{'id': 1}, {'id': 2}, {'id': 3}] assert pub_rows == [{'id': 3}] assert pub_archive_rows == [{'id': 1}, {'id': 2}]
def _kill_connections(config): connection = get_db_conn(config) connection.autocommit = True try: with connection.cursor() as cursor: cursor.execute( "SELECT pg_terminate_backend(pid) FROM pg_stat_activity WHERE datname = %s;", (config.db_config.dbname, )) except Exception as err: print('error while kill conns', err)
def get_rows_references(self, table_name: str, ids: List[int]): """ Get dictionary of links to %ids% rows in %table_name% table from other tables Result (table_name = table_a, ids = [1, 5, 6]): { 1: { 'table_b': {'table_a_id': [1, 4, 6]}, 'table_c': {'a_id': [29]}, }, 5: { 'table_b': {'table_a_id': []}, 'table_c': {'a_id': [12, 13]}, }, 6: { 'table_b': {'table_a_id': []}, 'table_c': {'a_id': []}, } } """ if table_name not in self.references: raise KeyError(f'Table {table_name} not found') rows_refs = {id_: {} for id_ in ids} s_in = ', '.join('%s' for _ in ids) conn = get_db_conn(self.config) try: for ref_table_name, ref_table_data in self.references[ table_name].items(): for ref_tables in rows_refs.values(): ref_tables[ref_table_name] = { fk.fk_ref: [] for fk in ref_table_data['references'] } for fk in ref_table_data['references']: query = SQL( f"SELECT {fk.pk_ref}, {fk.fk_ref} " f"FROM {self.config.db_config.schema}.{ref_table_name} " f"WHERE {fk.fk_ref} IN ({s_in})") with conn.cursor(cursor_factory=DictCursor) as curs: curs.execute(query, ids) result = curs.fetchall() rows = [dict(row) for row in result] for row in rows: tmp = rows_refs[row[fk.fk_ref]][ref_table_name][ fk.fk_ref] tmp.append(row) finally: conn.close() return rows_refs
def _create_db(config): connection = get_db_conn(config, with_db=False) connection.autocommit = True try: with connection.cursor() as cursor: cursor.execute(f"CREATE DATABASE {config.db_config.dbname};") except Exception as error: if not hasattr(error, "pgerror") or "already exists" not in error.pgerror: raise error print("Database '%s' already exists.", config.db_config.dbname) finally: connection.close()
def _fill_db(config): connection = get_db_conn(config) connection.autocommit = True try: with connection.cursor() as cursor: cursor.execute(""" CREATE TABLE IF NOT EXISTS publisher ( id serial PRIMARY KEY, name text NOT NULL ); CREATE TABLE IF NOT EXISTS book ( id serial PRIMARY KEY, name text NOT NULL, publisher_id integer REFERENCES publisher (id) ); CREATE TABLE IF NOT EXISTS author ( id serial PRIMARY KEY, fio text NOT NULL ); CREATE TABLE IF NOT EXISTS author_book ( author_id integer REFERENCES author (id), book_id integer REFERENCES book (id), PRIMARY KEY (author_id, book_id) ); INSERT INTO publisher (id, name) VALUES (1, 'O Reilly'), (2, 'Packt'), (3, 'Bloomsbury'); INSERT INTO book (id, name, publisher_id) VALUES (1, 'High Performance Python', 1), (2, 'Kubernetes: Up and Running', 1), (3, 'Python Machine Learning', 2), (4, 'Harry Potter and the Philosophers Stone', 3), (5, 'Harry Potter and the Chamber of Secrets', 3); INSERT INTO author (id, fio) VALUES (1, 'Ian Ozsvald'), (2, 'Micha Gorelick'), (3, 'Brendan Burns'), (4, 'Joe Beda'), (5, 'Sebastian Raschka'), (6, 'Vahid Mirjalili'), (7, 'J.K. Rowling'); INSERT INTO author_book (author_id, book_id) VALUES (1, 1), (2, 1), (3, 2), (4, 2), (5, 3), (6, 3), (7, 4), (7, 5); """) finally: connection.close()
def _clear_tables(config): connection = get_db_conn(config) connection.autocommit = True try: with connection.cursor() as cursor: cursor.execute(f""" DROP TABLE IF EXISTS publisher CASCADE; DROP TABLE IF EXISTS publisher_archive CASCADE; DROP TABLE IF EXISTS book CASCADE; DROP TABLE IF EXISTS book_archive CASCADE; DROP TABLE IF EXISTS author CASCADE; DROP TABLE IF EXISTS author_archive CASCADE; DROP TABLE IF EXISTS author_book CASCADE; DROP TABLE IF EXISTS author_book_archive CASCADE; """) except Exception as error: if not hasattr(error, "pgerror") or "does not exist" not in error.pgerror: raise error print("Database '%s' does not exist.", config.db_config.dbname) finally: connection.close()
def archive_table(self, table_name, ids: List[int]): """ Recursive iterative archiving / deleting rows by %ids% from %table_name% table and related tables. pk_column - %table_name% primary key """ conn = get_db_conn(self.config) try: logging.info(f'{table_name} - START') pk_column = self.primary_keys.get(table_name) if not pk_column: raise KeyError(f'Primary key for table {table_name} not found') archiver = Archiver(conn, self.references, self.config) rows = [{pk_column: id_} for id_ in ids] for rows_chunk in chunks(rows, self.config.archiver_config.chunk_size): archiver.archive_recursive(table_name, rows_chunk, pk_column) logging.info(f'{table_name} - END') finally: conn.close()
def build_references(config: Config, conn: connection = None) -> Dict[str, dict]: """ Build a tables dependency graph Algorithm: 1) Get all table names 2) Get all Foreign Keys 3) Build a tables dependency graph (references dict) For each table: For each child table: build dependency graph recursive Result: { 'references': { 'table_a': { 'table_b': { 'references': [{'pk_ref': 'id', 'fk_ref': 'table_b_id'}] 'ref_tables': { 'table_c': { 'table_a': {}, 'table_b': {} }, ... } }, 'table_c': {...} }, 'table_b': {...} }, 'primary_keys': { 'table_a': 'id', 'table_b': 'id', 'table_c': 'id' } } """ references = {} primary_keys = {} if not conn: conn = get_db_conn(config) try: tables = get_all_tables(conn, config.db_config) foreign_keys = get_all_fk(conn, config.db_config) for table in tables: references[table['table_name']] = {} for fk in foreign_keys: if fk['main_table'] not in references: references[fk['main_table']] = {} if not fk['ref_table'] in references[fk['main_table']]: references[fk['main_table']][fk['ref_table']] = { 'ref_tables': {}, 'references': [] } table_references = references[fk['main_table']][fk['ref_table']]['references'] table_references.append(ForeignKey( pk_main=fk['main_table_column'], pk_ref=fk['ref_pk_columns'], fk_ref=fk['ref_fk_column'], )) primary_keys[fk['main_table']] = fk['main_table_column'] if references: references = OrderedDict(sorted(references.items(), key=lambda row: len(row[1]), reverse=True)) for parent, refs in references.items(): for ref, ref_data in refs.items(): visited = {parent, ref} ref_childs = ref_data['ref_tables'] recursive_build(ref, ref_childs, references, visited) finally: conn.close() result = { 'references': references, 'primary_keys': primary_keys } return result