def tabulate(source_dbc, destination_dbc, tables): #tabulate row_counts = list() source_conn = source_dbc.get_db_connection() dest_conn = destination_dbc.get_db_connection() db_helper = database_helper.get_specific_helper() try: for table in tables: o = db_helper.get_table_count_estimate(table_name(table), schema_name(table), source_conn) dest_schema_name = dest_conn.db_name if isinstance( dest_conn, MySqlConnection) else schema_name(table) n = db_helper.get_table_count_estimate(table_name(table), dest_schema_name, dest_conn) row_counts.append((table, o, n)) finally: source_conn.close() dest_conn.close() print('\n'.join([ '{}, {}, {}, {}'.format(x[0], x[1], x[2], x[2] / x[1] if x[1] > 0 else 0) for x in row_counts ]))
def teardown(self): user_schemas = database_helper.get_specific_helper().list_all_user_schemas(self.__source_db_connection) if len(user_schemas) == 0: raise Exception("Couldn't find any non system schemas.") drop_statements = ["DROP SCHEMA IF EXISTS \"{}\" CASCADE".format(s) for s in user_schemas if s != 'public'] q = ';'.join(drop_statements) q += ";DROP SCHEMA IF EXISTS public CASCADE;CREATE SCHEMA IF NOT EXISTS public;" self.run_query(q)
def __init__(self, source_dbc, destination_dbc, all_tables, clean_previous=True): self.__source_dbc = source_dbc self.__destination_dbc = destination_dbc self.__source_conn = source_dbc.get_db_connection(read_repeatable=True) self.__destination_conn = destination_dbc.get_db_connection() self.__all_tables = all_tables self.__db_helper = database_helper.get_specific_helper() self.__db_helper.turn_off_constraints(self.__destination_conn)
def columns_to_copy(table, relationships, conn): target_breaks = set() opportunists = config_reader.get_preserve_fk_opportunistically() for dep_break in config_reader.get_dependency_breaks(): if dep_break.fk_table == table and dep_break not in opportunists: target_breaks.add(dep_break.target_table) columns_to_null = set() for rel in relationships: if rel['fk_table'] == table and rel['target_table'] in target_breaks: columns_to_null.update(rel['fk_columns']) columns = database_helper.get_specific_helper().get_table_columns( table_name(table), schema_name(table), conn) return ','.join([ '{}.{}'.format(quoter(table_name(table)), quoter(c)) if c not in columns_to_null else 'NULL as {}'.format(quoter(c)) for c in columns ])
config_reader.initialize(sys.stdin) else: config_reader.initialize() db_type = config_reader.get_db_type() source_dbc = DbConnect(db_type, config_reader.get_source_db_connection_info()) destination_dbc = DbConnect( db_type, config_reader.get_destination_db_connection_info()) database = db_creator(db_type, source_dbc, destination_dbc) database.teardown() database.create() # Get list of tables to operate on db_helper = database_helper.get_specific_helper() all_tables = db_helper.list_all_tables(source_dbc) all_tables = [ x for x in all_tables if x not in config_reader.get_excluded_tables() ] subsetter = Subset(source_dbc, destination_dbc, all_tables) try: subsetter.prep_temp_dbs() subsetter.run_middle_out() if "--no-constraints" not in sys.argv: database.add_constraints() print("Beginning post subset SQL calls")