def copy_rows(source, destination, query, destination_table):
    datatypes = get_table_datatypes(table_name(destination_table),
                                    schema_name(destination_table),
                                    destination)

    def template_piece(dt):
        if dt == '_json':
            return '%s::json[]'
        elif dt == '_jsonb':
            return '%s::jsonb[]'
        else:
            return '%s'

    template = '(' + ','.join([template_piece(dt) for dt in datatypes]) + ')'

    cursor_name = 'table_cursor_' + str(uuid.uuid4()).replace('-', '')
    cursor = source.cursor(name=cursor_name)
    cursor.execute(query)

    fetch_row_count = 100000
    while True:
        rows = cursor.fetchmany(fetch_row_count)
        if len(rows) == 0:
            break

        # we end up doing a lot of execute statements here, copying data.
        # using the inner_cursor means we don't log all the noise
        destination_cursor = destination.cursor().inner_cursor

        insert_query = 'INSERT INTO {} VALUES %s'.format(
            fully_qualified_table(destination_table))

        execute_values(destination_cursor, insert_query, rows, template)

        destination_cursor.close()

    cursor.close()
    destination.commit()
Example #2
0
    def __subset_upstream(self, target, processed_tables, relationships):

        redacted_relationships = redact_relationships(relationships)
        relevant_key_constraints = list(
            filter(
                lambda r: r['target_table'] in processed_tables and r[
                    'fk_table'] == target, redacted_relationships))
        # this table isn't referenced by anything we've already processed, so let's leave it empty
        #  OR
        # table was already added, this only happens if the upstream table was also a direct target
        if len(relevant_key_constraints) == 0 or target in processed_tables:
            return False

        temp_target_name = 'subset_temp_' + table_name(target)

        try:
            # copy the whole table
            columns_query = columns_to_copy(target, relationships,
                                            self.__source_conn)
            self.__db_helper.run_query(
                'CREATE TEMPORARY TABLE {} AS SELECT * FROM {} LIMIT 0'.format(
                    quoter(temp_target_name),
                    fully_qualified_table(
                        mysql_db_name_hack(target, self.__destination_conn))),
                self.__destination_conn)
            query = 'SELECT {} FROM {}'.format(columns_query,
                                               fully_qualified_table(target))
            self.__db_helper.copy_rows(self.__source_conn,
                                       self.__destination_conn, query,
                                       temp_target_name)

            # filter it down in the target database
            table_columns = self.__db_helper.get_table_columns(
                table_name(target), schema_name(target), self.__source_conn)
            clauses = [
                '{} IN (SELECT {} FROM {})'.format(
                    columns_tupled(kc['fk_columns']),
                    columns_joined(kc['target_columns']),
                    fully_qualified_table(
                        mysql_db_name_hack(kc['target_table'],
                                           self.__destination_conn)))
                for kc in relevant_key_constraints
            ]
            clauses.extend(upstream_filter_match(target, table_columns))

            select_query = 'SELECT * FROM {} WHERE TRUE AND {}'.format(
                quoter(temp_target_name), ' AND '.join(clauses))
            insert_query = 'INSERT INTO {} {}'.format(
                fully_qualified_table(
                    mysql_db_name_hack(target, self.__destination_conn)),
                select_query)
            self.__db_helper.run_query(insert_query, self.__destination_conn)
            self.__destination_conn.commit()

        finally:
            # delete temporary table
            mysql_temporary = 'TEMPORARY' if config_reader.get_db_type(
            ) == 'mysql' else ''
            self.__db_helper.run_query(
                'DROP {} TABLE IF EXISTS {}'.format(mysql_temporary,
                                                    quoter(temp_target_name)),
                self.__destination_conn)

        return True
def source_db_temp_table(target_table):
    return 'tonic_subset_' + schema_name(target_table) + '_' + table_name(
        target_table)
def source_db_temp_table(target_table):
    return temp_db + '.' + schema_name(target_table) + '_' + table_name(
        target_table)