Esempio n. 1
0
    def _get_foreign_keys_metadata(conversion, table_name):
        """
        Retrieves foreign keys metadata.
        :param conversion: Conversion
        :param table_name: str
        :return: None
        """
        log_title = 'ForeignKeyProcessor::_get_foreign_keys_metadata'
        msg = '\t--[%s] Search foreign keys for table "%s"."%s"...' % (
            log_title, conversion.schema, table_name)
        FsOps.log(conversion, msg)
        sql = """
            SELECT 
                cols.COLUMN_NAME, refs.REFERENCED_TABLE_NAME, refs.REFERENCED_COLUMN_NAME,
                cRefs.UPDATE_RULE, cRefs.DELETE_RULE, cRefs.CONSTRAINT_NAME 
            FROM INFORMATION_SCHEMA.`COLUMNS` AS cols 
            INNER JOIN INFORMATION_SCHEMA.`KEY_COLUMN_USAGE` AS refs 
                ON refs.TABLE_SCHEMA = cols.TABLE_SCHEMA 
                    AND refs.REFERENCED_TABLE_SCHEMA = cols.TABLE_SCHEMA 
                    AND refs.TABLE_NAME = cols.TABLE_NAME 
                    AND refs.COLUMN_NAME = cols.COLUMN_NAME 
            LEFT JOIN INFORMATION_SCHEMA.REFERENTIAL_CONSTRAINTS AS cRefs 
                ON cRefs.CONSTRAINT_SCHEMA = cols.TABLE_SCHEMA 
                    AND cRefs.CONSTRAINT_NAME = refs.CONSTRAINT_NAME 
            LEFT JOIN INFORMATION_SCHEMA.`KEY_COLUMN_USAGE` AS links 
                ON links.TABLE_SCHEMA = cols.TABLE_SCHEMA 
                    AND links.REFERENCED_TABLE_SCHEMA = cols.TABLE_SCHEMA 
                    AND links.REFERENCED_TABLE_NAME = cols.TABLE_NAME 
                    AND links.REFERENCED_COLUMN_NAME = cols.COLUMN_NAME 
            LEFT JOIN INFORMATION_SCHEMA.REFERENTIAL_CONSTRAINTS AS cLinks 
                ON cLinks.CONSTRAINT_SCHEMA = cols.TABLE_SCHEMA 
                    AND cLinks.CONSTRAINT_NAME = links.CONSTRAINT_NAME 
            WHERE cols.TABLE_SCHEMA = '%s' AND cols.TABLE_NAME = '%s';
        """ % (conversion.mysql_db_name,
               ExtraConfigProcessor.get_table_name(
                   conversion, table_name, should_get_original=True))

        result = DBAccess.query(conversion=conversion,
                                caller=log_title,
                                sql=sql,
                                vendor=DBVendors.MYSQL,
                                process_exit_on_error=False,
                                should_return_client=False)

        if result.error:
            return

        extra_rows = ExtraConfigProcessor.parse_foreign_keys(
            conversion, table_name)
        full_rows = (result.data or []) + extra_rows
        ForeignKeyProcessor._set_foreign_keys_for_given_table(
            conversion, table_name, full_rows)
        msg = '\t--[%s] Foreign keys for table "%s"."%s" are set...' % (
            log_title, conversion.schema, table_name)
        FsOps.log(conversion, msg)
Esempio n. 2
0
    def prepare_data_chunks(conversion, table_name, have_data_chunks_processed):
        """
        Prepares a list of tables metadata.
        :param conversion: Conversion
        :param table_name: str
        :param have_data_chunks_processed: bool
        :return: None
        """
        if have_data_chunks_processed:
            return

        log_title = 'DataChunksProcessor::prepare_data_chunks'
        log_path = conversion.dic_tables[table_name].table_log_path
        original_table_name = ExtraConfigProcessor.get_table_name(conversion, table_name, True)

        select_field_list = ColumnsDataArranger.arrange_columns_data(
            conversion.dic_tables[table_name].table_columns,
            conversion.mysql_version
        )

        rows_cnt_result = DBAccess.query(
            conversion=conversion,
            caller=log_title,
            sql='SELECT COUNT(1) AS rows_count FROM `%s`;' % original_table_name,
            vendor=DBVendors.MYSQL,
            process_exit_on_error=True,
            should_return_client=False
        )

        rows_cnt = int(rows_cnt_result.data[0]['rows_count'])
        msg = '\t--[%s] Total rows to insert into "%s"."%s": %d' % (log_title, conversion.schema, table_name, rows_cnt)
        FsOps.log(conversion, msg, log_path)

        meta = {
            '_tableName': table_name,
            '_selectFieldList': select_field_list,
            '_rowsCnt': rows_cnt,
        }

        sql = 'INSERT INTO "{0}"."data_pool_{0}{1}"("metadata") VALUES (%(meta)s);' \
            .format(conversion.schema, conversion.mysql_db_name)

        DBAccess.query(
            conversion=conversion,
            caller=log_title,
            sql=sql,
            vendor=DBVendors.PG,
            process_exit_on_error=True,
            should_return_client=False,
            client=None,
            bindings={'meta': json.dumps(meta)}
        )
Esempio n. 3
0
    def create_indexes(conversion, table_name):
        """
        Creates indexes, including PK, on given table.
        :param conversion: Conversion
        :param table_name: str
        :return: None
        """
        log_title = 'IndexesProcessor::create_indexes'
        original_table_name = ExtraConfigProcessor.get_table_name(conversion, table_name, should_get_original=True)
        show_index_result = DBAccess.query(
            conversion=conversion,
            caller=log_title,
            sql='SHOW INDEX FROM `%s`;' % original_table_name,
            vendor=DBVendors.MYSQL,
            process_exit_on_error=False,
            should_return_client=False
        )

        if show_index_result.error:
            return

        pg_indexes = {}

        for index in show_index_result.data:
            pg_column_name = ExtraConfigProcessor.get_column_name(
                conversion=conversion,
                original_table_name=original_table_name,
                current_column_name=index['Column_name'],
                should_get_original=False
            )

            if index['Key_name'] in pg_indexes:
                pg_indexes[index['Key_name']]['column_name'].append('"%s"' % pg_column_name)
                continue

            pg_index_type = 'GIST' if index['Index_type'] == 'SPATIAL' else index['Index_type']
            pg_indexes[index['Key_name']] = {
                'is_unique': index['Non_unique'] == 0,
                'column_name': ['"%s"' % pg_column_name],
                'index_type': ' USING %s' % pg_index_type,
            }

        params = [
            [conversion, index_name, table_name, pg_indexes, idx]
            for idx, index_name in enumerate(pg_indexes.keys())
        ]

        ConcurrencyManager.run_in_parallel(conversion, IndexesProcessor._set_index, params)

        msg = '\t--[%s] "%s"."%s": PK/indices are successfully set...' % (log_title, conversion.schema, table_name)
        FsOps.log(conversion, msg, conversion.dic_tables[table_name].table_log_path)
Esempio n. 4
0
    def _process_columns_comments(conversion, table_name):
        """
        Creates columns comments.
        :param conversion: Conversion
        :param table_name: str
        :return: None
        """
        original_table_name = ExtraConfigProcessor.get_table_name(conversion, table_name, should_get_original=True)
        params = [
            [conversion, table_name, original_table_name, column]
            for column in conversion.dic_tables[table_name].table_columns
            if column['Comment'] != ''
        ]

        ConcurrencyManager.run_in_parallel(conversion, CommentsProcessor._set_column_comment, params)
Esempio n. 5
0
    def _process_table_comments(conversion, table_name):
        """
        Creates table comments.
        :param conversion: Conversion
        :param table_name: str
        :return: None
        """
        log_title = 'CommentsProcessor::_process_table_comments'
        sql_select_comment = '''
            SELECT table_comment AS table_comment FROM information_schema.tables 
            WHERE table_schema = '%s' AND table_name = '%s';
        ''' % (
            conversion.mysql_db_name,
            ExtraConfigProcessor.get_table_name(conversion, table_name, should_get_original=True)
        )

        select_comments_result = DBAccess.query(
            conversion=conversion,
            caller=log_title,
            sql=sql_select_comment,
            vendor=DBVendors.MYSQL,
            process_exit_on_error=False,
            should_return_client=False
        )

        if select_comments_result.error:
            return

        comment = CommentsProcessor._escape_quotes(select_comments_result.data[0]['table_comment'])
        sql_create_comment = 'COMMENT ON TABLE "%s"."%s" IS \'%s\';' % (conversion.schema, table_name, comment)
        create_comment_result = DBAccess.query(
            conversion=conversion,
            caller=log_title,
            sql=sql_create_comment,
            vendor=DBVendors.PG,
            process_exit_on_error=False,
            should_return_client=False
        )

        if create_comment_result.error:
            return

        msg = '\t--[%s] Successfully set comment for table "%s"."%s"' % (log_title, conversion.schema, table_name)
        FsOps.log(conversion, msg, conversion.dic_tables[table_name].table_log_path)
Esempio n. 6
0
    def process_enum(conversion, table_name):
        """
        Defines which columns of the given table are of type "enum".
        Sets an appropriate constraint, if appropriate.
        :param conversion: Conversion
        :param table_name: str
        :return: None
        """
        log_title = 'EnumProcessor::process_enum'
        msg = '\t--[%s] Defines "ENUMs" for table "%s"."%s"' % (log_title, conversion.schema, table_name)
        FsOps.log(conversion, msg, conversion.dic_tables[table_name].table_log_path)
        original_table_name = ExtraConfigProcessor.get_table_name(conversion, table_name, True)
        params = [
            [conversion, table_name, original_table_name, column]
            for column in conversion.dic_tables[table_name].table_columns
            if EnumProcessor._is_enum(column)
        ]

        ConcurrencyManager.run_in_parallel(conversion, EnumProcessor._set_enum, params)
Esempio n. 7
0
    def set_sequence_value(conversion, table_name):
        """
        Sets sequence value.
        :param conversion: Conversion
        :param table_name: str
        :return: None
        """
        original_table_name = ExtraConfigProcessor.get_table_name(
            conversion, table_name, True)
        table_columns_list = conversion.dic_tables[table_name].table_columns
        auto_increment_columns = [
            column for column in table_columns_list
            if column['Extra'] == 'auto_increment'
        ]

        if len(auto_increment_columns) == 0:
            return  # No auto-incremented column found.

        log_title = 'SequenceProcessor::set_sequence_value'
        auto_increment_column = auto_increment_columns[0]['Field']
        column_name = ExtraConfigProcessor.get_column_name(
            conversion=conversion,
            original_table_name=original_table_name,
            current_column_name=auto_increment_column,
            should_get_original=False)

        seq_name = '%s_%s_seq' % (table_name, column_name)
        sql = 'SELECT SETVAL(\'"%s"."%s"\',' % (conversion.schema, seq_name)
        sql += '(SELECT MAX("%s") FROM "%s"."%s"));' % (
            column_name, conversion.schema, table_name)
        result = DBAccess.query(conversion=conversion,
                                caller=log_title,
                                sql=sql,
                                vendor=DBVendors.PG,
                                process_exit_on_error=False,
                                should_return_client=False)

        if not result.error:
            msg = '\t--[%s] Sequence "%s"."%s" is created...' % (
                log_title, conversion.schema, seq_name)
            FsOps.log(conversion, msg,
                      conversion.dic_tables[table_name].table_log_path)
Esempio n. 8
0
    def _set_foreign_keys_for_given_table(conversion, table_name, rows):
        """
        Sets foreign keys for given table.
        :param conversion: Conversion
        :param table_name: str
        :param rows: list
        :return: None
        """
        constraints = {}
        original_table_name = ExtraConfigProcessor.get_table_name(
            conversion, table_name, should_get_original=True)

        for row in rows:
            current_column_name = ExtraConfigProcessor.get_column_name(
                conversion=conversion,
                original_table_name=original_table_name,
                current_column_name=row['COLUMN_NAME'],
                should_get_original=False)

            current_referenced_table_name = ExtraConfigProcessor.get_table_name(
                conversion=conversion,
                current_table_name=row['REFERENCED_TABLE_NAME'],
                should_get_original=False)

            original_referenced_table_name = ExtraConfigProcessor.get_table_name(
                conversion=conversion,
                current_table_name=row['REFERENCED_TABLE_NAME'],
                should_get_original=True)

            current_referenced_column_name = ExtraConfigProcessor.get_column_name(
                conversion=conversion,
                original_table_name=original_referenced_table_name,
                current_column_name=row['REFERENCED_COLUMN_NAME'],
                should_get_original=False)

            if row['CONSTRAINT_NAME'] in constraints:
                constraints[row['CONSTRAINT_NAME']]['column_name'].append(
                    '"%s"' % current_column_name)
                constraints[
                    row['CONSTRAINT_NAME']]['referenced_column_name'].append(
                        '"%s"' % current_referenced_column_name)

                return

            constraints[row['CONSTRAINT_NAME']] = {}
            constraints[row['CONSTRAINT_NAME']]['column_name'] = [
                '"%s"' % current_column_name
            ]
            constraints[row['CONSTRAINT_NAME']]['referenced_column_name'] = [
                '"%s"' % current_referenced_column_name
            ]
            constraints[row['CONSTRAINT_NAME']][
                'referenced_table_name'] = current_referenced_table_name
            constraints[
                row['CONSTRAINT_NAME']]['update_rule'] = row['UPDATE_RULE']
            constraints[
                row['CONSTRAINT_NAME']]['delete_rule'] = row['DELETE_RULE']

        params = [[conversion, constraints, foreign_key, table_name]
                  for foreign_key in constraints.keys()]
        ConcurrencyManager.run_in_parallel(
            conversion, ForeignKeyProcessor._set_single_foreign_key, params)
Esempio n. 9
0
    def populate_table_worker(
            conversion,
            table_name,
            str_select_field_list,
            rows_cnt,
            data_pool_id,
            connection_to_master
    ):
        """
        Loads a chunk of data using "PostgreSQL COPY".
        :param conversion: Conversion
        :param table_name: str
        :param str_select_field_list: str
        :param rows_cnt: int
        :param data_pool_id: int
        :param connection_to_master: multiprocessing.connection.Connection
        :return: None
        """
        log_title = 'DataLoader::populate_table_worker'
        original_table_name = ExtraConfigProcessor.get_table_name(conversion, table_name, True)
        sql = 'SELECT %s FROM `%s`;' % (str_select_field_list, original_table_name)
        original_session_replication_role = None
        text_stream = None
        pg_cursor = None
        pg_client = None
        mysql_cursor = None
        mysql_client = None

        try:
            mysql_client = DBAccess.get_mysql_unbuffered_client(conversion)
            mysql_cursor = mysql_client.cursor()
            mysql_cursor.execute(sql)
            number_of_inserted_rows = 0
            number_of_workers = min(conversion.max_each_db_connection_pool_size / 2, cpu_count())

            with ProcessPoolExecutor(max_workers=number_of_workers) as executor:
                while True:
                    batch_size = 25000  # TODO: think about batch size calculation.
                    batch = mysql_cursor.fetchmany(batch_size)
                    rows_to_insert = len(batch)

                    if rows_to_insert == 0:
                        break

                    executor.submit(
                        DataLoader._arrange_and_load_batch,
                        conversion.config,
                        table_name,
                        batch,
                        rows_cnt,
                        rows_to_insert,
                        number_of_inserted_rows
                    )
        except Exception as e:
            msg = 'Data retrieved by following MySQL query has been rejected by the target PostgreSQL server.'
            FsOps.generate_error(conversion, '\t--[{0}] {1}\n\t--[{0}] {2}'.format(log_title, e, msg), sql)
        finally:
            try:
                connection_to_master.send(table_name)
            except Exception as ex:
                msg = 'Failed to notify master that %s table\'s populating is finished.' % table_name
                FsOps.generate_error(conversion, '\t--[{0}] {1}\n\t--[{0}] {2}'.format(log_title, ex, msg))
            finally:
                for resource in (connection_to_master, text_stream, pg_cursor, mysql_cursor, mysql_client):
                    if resource:
                        resource.close()

                DataLoader.delete_data_pool_item(conversion, data_pool_id, pg_client, original_session_replication_role)
Esempio n. 10
0
    def create_sequence(conversion, table_name):
        """
        Defines which column in given table has the "auto_increment" attribute.
        Creates an appropriate sequence.
        :param conversion: Conversion
        :param table_name: str
        :return: None
        """
        original_table_name = ExtraConfigProcessor.get_table_name(
            conversion, table_name, True)
        table_columns_list = conversion.dic_tables[table_name].table_columns
        auto_increment_columns = [
            column for column in table_columns_list
            if column['Extra'] == 'auto_increment'
        ]

        if len(auto_increment_columns) == 0:
            return  # No auto-incremented column found.

        log_title = 'SequenceProcessor::create_sequence'
        auto_increment_column = auto_increment_columns[0]['Field']
        column_name = ExtraConfigProcessor.get_column_name(
            conversion=conversion,
            original_table_name=original_table_name,
            current_column_name=auto_increment_column,
            should_get_original=False)

        seq_name = '%s_%s_seq' % (table_name, column_name)
        create_sequence_result = DBAccess.query(
            conversion=conversion,
            caller=log_title,
            sql='CREATE SEQUENCE "%s"."%s";' % (conversion.schema, seq_name),
            vendor=DBVendors.PG,
            process_exit_on_error=False,
            should_return_client=True)

        if create_sequence_result.error:
            DBAccess.release_db_client(conversion,
                                       create_sequence_result.client)
            return

        sql_set_next_val = 'ALTER TABLE "%s"."%s" ALTER COLUMN "%s"' % (
            conversion.schema, table_name, column_name)
        sql_set_next_val += " SET DEFAULT NEXTVAL('%s.%s');" % (
            conversion.schema, seq_name)

        set_next_val_result = DBAccess.query(
            conversion=conversion,
            caller=log_title,
            sql=sql_set_next_val,
            vendor=DBVendors.PG,
            process_exit_on_error=False,
            should_return_client=True,
            client=create_sequence_result.client)

        if set_next_val_result.error:
            DBAccess.release_db_client(conversion, set_next_val_result.client)
            return

        sql_set_sequence_owner = 'ALTER SEQUENCE "{0}"."{1}" OWNED BY "{0}"."{2}"."{3}";'\
            .format(conversion.schema, seq_name, table_name, column_name)

        set_sequence_owner_result = DBAccess.query(
            conversion=conversion,
            caller=log_title,
            sql=sql_set_sequence_owner,
            vendor=DBVendors.PG,
            process_exit_on_error=False,
            should_return_client=True,
            client=set_next_val_result.client)

        if set_sequence_owner_result.error:
            DBAccess.release_db_client(conversion,
                                       set_sequence_owner_result.client)
            return

        sql_set_sequence_value = 'SELECT SETVAL(\'"{0}"."{1}"\', (SELECT MAX("{2}") FROM "{0}"."{3}"));'\
            .format(conversion.schema, seq_name, column_name, table_name)

        set_sequence_value_result = DBAccess.query(
            conversion=conversion,
            caller=log_title,
            sql=sql_set_sequence_value,
            vendor=DBVendors.PG,
            process_exit_on_error=False,
            should_return_client=False,
            client=set_sequence_owner_result.client)

        if not set_sequence_value_result.error:
            msg = '\t--[%s] Sequence "%s"."%s" is created...' % (
                log_title, conversion.schema, seq_name)
            FsOps.log(conversion, msg,
                      conversion.dic_tables[table_name].table_log_path)
Esempio n. 11
0
    def load_structure(conversion):
        """
        Loads source tables and views, that need to be migrated.
        :param conversion: Conversion, the configuration object.
        :return: None
        """
        log_title = 'StructureLoader::load_structure'
        StructureLoader._get_mysql_version(conversion)
        have_tables_loaded = MigrationStateManager.get(conversion,
                                                       'tables_loaded')
        sql = 'SHOW FULL TABLES IN `%s` WHERE 1 = 1' % conversion.mysql_db_name

        if len(conversion.include_tables) != 0:
            include_tables = ','.join([
                '"%s"' % table_name for table_name in conversion.include_tables
            ])
            sql += ' AND Tables_in_%s IN(%s)' % (conversion.mysql_db_name,
                                                 include_tables)

        if len(conversion.exclude_tables) != 0:
            exclude_tables = ','.join([
                '"%s"' % table_name for table_name in conversion.exclude_tables
            ])
            sql += ' AND Tables_in_%s NOT IN(%s)' % (conversion.mysql_db_name,
                                                     exclude_tables)

        sql += ';'

        result = DBAccess.query(conversion=conversion,
                                caller=log_title,
                                sql=sql,
                                vendor=DBVendors.MYSQL,
                                process_exit_on_error=True,
                                should_return_client=False)

        tables_cnt, views_cnt = 0, 0
        thread_pool_params = []

        for row in result.data:
            relation_name = row['Tables_in_' + conversion.mysql_db_name]

            if row['Table_type'] == 'BASE TABLE' and Utils.get_index_of(
                    relation_name, conversion.exclude_tables) == -1:
                relation_name = ExtraConfigProcessor.get_table_name(
                    conversion, relation_name, False)
                conversion.tables_to_migrate.append(relation_name)
                conversion.dic_tables[relation_name] = Table(
                    '%s/%s.log' % (conversion.logs_dir_path, relation_name))
                thread_pool_params.append(
                    [conversion, relation_name, have_tables_loaded])
                tables_cnt += 1
            elif row['Table_type'] == 'VIEW':
                conversion.views_to_migrate.append(relation_name)
                views_cnt += 1

        ConcurrencyManager.run_in_parallel(
            conversion, StructureLoader.process_table_before_data_loading,
            thread_pool_params)

        msg = '''\t--[{0}] Source DB structure is loaded...\n\t--[{0}] Tables to migrate: {1}\n
        --[{0}] Views to migrate: {2}'''.format(log_title, tables_cnt,
                                                views_cnt)
        FsOps.log(conversion, msg)
        MigrationStateManager.set(conversion, 'tables_loaded')