def test_alter_table(self, containers, rbrsource, schematracker,
                         alter_table_query, table_name, gtid_enabled):
        if not gtid_enabled:
            increment_heartbeat(containers, rbrsource)
        execute_query_get_one_row(
            containers, rbrsource,
            alter_table_query.format(table_name=table_name))
        execute_query_get_one_row(
            containers, rbrsource,
            "ALTER TABLE {name} ROW_FORMAT=COMPRESSED".format(name=table_name))

        time.sleep(2)

        # Check the schematracker db also has the table.
        verify_describe_table_query = "DESCRIBE {table_name}".format(
            table_name=table_name)
        verify_alter_table_result = execute_query_get_all_rows(
            containers, schematracker, verify_describe_table_query)
        expected_alter_table_result = execute_query_get_all_rows(
            containers, rbrsource, verify_describe_table_query)

        if 'address' in verify_alter_table_result[0].values():
            actual_result = verify_alter_table_result[0]
        elif 'address' in verify_alter_table_result[1].values():
            actual_result = verify_alter_table_result[1]
        else:
            raise AssertionError('The alter table query did not succeed')

        if 'address' in expected_alter_table_result[0].values():
            expected_result = expected_alter_table_result[0]
        else:
            expected_result = expected_alter_table_result[1]

        self.assert_expected_result(actual_result, expected_result)
    def test_create_table(self, containers, rbrsource, schematracker,
                          create_table_query, avro_schema, table_name,
                          namespace, schematizer, rbr_source_session,
                          gtid_enabled):
        if not gtid_enabled:
            increment_heartbeat(containers, rbrsource)
        execute_query_get_one_row(
            containers, rbrsource,
            create_table_query.format(table_name=table_name))

        # Need to poll for the creation of the table
        _wait_for_table(containers, schematracker, table_name)

        # Check the schematracker db also has the table.
        verify_create_table_query = "SHOW CREATE TABLE {table_name}".format(
            table_name=table_name)
        verify_create_table_result = execute_query_get_one_row(
            containers, schematracker, verify_create_table_query)
        expected_create_table_result = execute_query_get_one_row(
            containers, rbrsource, verify_create_table_query)
        self.assert_expected_result(verify_create_table_result,
                                    expected_create_table_result)

        # It's necessary to insert data for the topic to actually be created.
        Biz = _generate_basic_model(table_name)
        rbr_source_session.add(Biz(id=1, name='insert'))
        rbr_source_session.commit()

        _wait_for_schematizer_topic(schematizer, namespace, table_name)

        # Check schematizer.
        self.check_schematizer_has_correct_source_info(table_name=table_name,
                                                       avro_schema=avro_schema,
                                                       namespace=namespace,
                                                       schematizer=schematizer)
def test_change_log_messages(
    containers,
    rbrsource,
    create_table_query,
    schematizer,
    namespace,
    source,
    rbr_source_session,
    gtid_enabled
):

    if not gtid_enabled:
        increment_heartbeat(containers, rbrsource)

    execute_query_get_one_row(
        containers,
        rbrsource,
        create_table_query.format(table_name=source)
    )

    BasicModel = _generate_basic_model(source)
    model_1 = BasicModel(id=1, name='insert')
    model_2 = BasicModel(id=2, name='insert')
    rbr_source_session.add(model_1)
    rbr_source_session.add(model_2)
    rbr_source_session.commit()
    model_1.name = 'update'
    rbr_source_session.delete(model_2)
    rbr_source_session.commit()

    messages = _fetch_messages(
        containers,
        schematizer,
        namespace,
        source,
        4
    )

    expected_messages = [
        {
            'message_type': MessageType.create,
            'payload_data': {'id': 1, 'table_name': source, 'table_schema': 'yelp'}
        },
        {
            'message_type': MessageType.create,
            'payload_data': {'id': 2, 'table_name': source, 'table_schema': 'yelp'}
        },
        {
            'message_type': MessageType.update,
            'payload_data': {'id': 1, 'table_name': source, 'table_schema': 'yelp'},
            'previous_payload_data': {'id': 1, 'table_name': source, 'table_schema': 'yelp'}
        },
        {
            'message_type': MessageType.delete,
            'payload_data': {'id': 2, 'table_name': source, 'table_schema': 'yelp'}
        },
    ]
    _verify_messages(messages, expected_messages)
    def test_basic_table(self, containers, replhandler, rbrsource,
                         create_table_query, namespace, schematizer,
                         rbr_source_session, gtid_enabled):
        if not gtid_enabled:
            increment_heartbeat(containers, rbrsource)

        source = "{0}_basic_table".format(replhandler)
        execute_query_get_one_row(containers, rbrsource,
                                  create_table_query.format(table_name=source))

        BasicModel = _generate_basic_model(source)
        model_1 = BasicModel(id=1, name='insert')
        model_2 = BasicModel(id=2, name='insert')
        rbr_source_session.add(model_1)
        rbr_source_session.add(model_2)
        rbr_source_session.commit()
        model_1.name = 'update'
        rbr_source_session.delete(model_2)
        rbr_source_session.commit()

        messages = _fetch_messages(containers, schematizer, namespace, source,
                                   4)
        expected_messages = [
            {
                'message_type': MessageType.create,
                'payload_data': {
                    'id': 1,
                    'name': 'insert'
                }
            },
            {
                'message_type': MessageType.create,
                'payload_data': {
                    'id': 2,
                    'name': 'insert'
                }
            },
            {
                'message_type': MessageType.update,
                'payload_data': {
                    'id': 1,
                    'name': 'update'
                },
                'previous_payload_data': {
                    'id': 1,
                    'name': 'insert'
                }
            },
            {
                'message_type': MessageType.delete,
                'payload_data': {
                    'id': 2,
                    'name': 'insert'
                }
            },
        ]
        _verify_messages(messages, expected_messages)
    def create_complex_table(self, containers, rbrsource, complex_table_name,
                             complex_table_create_query):
        if complex_table_create_query.strip():
            complex_table_create_query = ", {}".format(
                complex_table_create_query)
        query = """CREATE TABLE {complex_table_name}
        (
            `id` int(11) NOT NULL PRIMARY KEY
            {complex_table_create_query}
        ) ENGINE=InnoDB DEFAULT CHARSET=utf8
        """.format(complex_table_name=complex_table_name,
                   complex_table_create_query=complex_table_create_query)

        execute_query_get_one_row(containers, rbrsource, query)
Example #6
0
def _wait_for_table(containers, db_name, table_name):
    poll_query = "SHOW TABLES LIKE '{table_name}'".format(table_name=table_name)
    end_time = time.time() + timeout_seconds
    while end_time > time.time():
        result = execute_query_get_one_row(containers, db_name, poll_query)
        if result is not None:
            break
        time.sleep(0.5)
    def test_create_table_with_row_format(self, containers, rbrsource,
                                          schematracker, replhandler,
                                          gtid_enabled):
        table_name = '{0}_row_format_tester'.format(replhandler)
        create_table_stmt = """
        CREATE TABLE {name}
        ( id int(11) primary key) ROW_FORMAT=COMPRESSED ENGINE=InnoDB
        """.format(name=table_name)
        if not gtid_enabled:
            increment_heartbeat(containers, rbrsource)
        execute_query_get_one_row(containers, rbrsource, create_table_stmt)

        _wait_for_table(containers, schematracker, table_name)
        # Check the schematracker db also has the table.
        verify_create_table_query = "SHOW CREATE TABLE {table_name}".format(
            table_name=table_name)
        verify_create_table_result = execute_query_get_one_row(
            containers, schematracker, verify_create_table_query)
        expected_create_table_result = execute_query_get_one_row(
            containers, rbrsource, verify_create_table_query)
        self.assert_expected_result(verify_create_table_result,
                                    expected_create_table_result)
    def test_table_with_contains_pii(self, containers, replhandler, rbrsource,
                                     create_table_query, namespace,
                                     schematizer, rbr_source_session,
                                     gtid_enabled):
        with reconfigure(encryption_type='AES_MODE_CBC-1',
                         key_location='acceptance/configs/data_pipeline/'):
            if not gtid_enabled:
                increment_heartbeat(containers, rbrsource)

            source = "{}_secret_table".format(replhandler)
            execute_query_get_one_row(
                containers, rbrsource,
                create_table_query.format(table_name=source))

            BasicModel = _generate_basic_model(source)
            model_1 = BasicModel(id=1, name='insert')
            model_2 = BasicModel(id=2, name='insert')
            rbr_source_session.add(model_1)
            rbr_source_session.add(model_2)
            rbr_source_session.commit()

            messages = _fetch_messages(containers, schematizer, namespace,
                                       source, 2)
            expected_messages = [{
                'message_type': MessageType.create,
                'payload_data': {
                    'id': 1,
                    'name': 'insert'
                }
            }, {
                'message_type': MessageType.create,
                'payload_data': {
                    'id': 2,
                    'name': 'insert'
                }
            }]
            _verify_messages(messages, expected_messages)
    def test_processing_table_rename(
        self,
        containers_without_repl_handler,
        rbrsource,
        schematizer,
        schematracker,
        namespace,
        start_service,
        gtid_enabled
    ):
        """
        This test verifies that the service handles table renames.
        """
        table_name_one = "hogwarts_{r}".format(r=self.get_random_string())
        table_name_two = "durmstrang_{r}".format(r=self.get_random_string())
        create_table_query = """
        CREATE TABLE {table_name}
        (
            `name` VARCHAR(255) NOT NULL PRIMARY KEY,
            `house` VARCHAR(255) NOT NULL
        )"""
        insert_query = """
        INSERT INTO {table} VALUES ( "{one}", "{two}" )
        """
        change_table_name_query = """
        RENAME TABLE {old} TO {new}
        """
        if not gtid_enabled:
            increment_heartbeat(containers_without_repl_handler, rbrsource)
        create_query = create_table_query.format(table_name=table_name_one)
        rename_query = change_table_name_query.format(
            old=table_name_one,
            new=table_name_two
        )
        execute_query_get_one_row(
            containers_without_repl_handler,
            rbrsource,
            create_query
        )
        execute_query_get_one_row(
            containers_without_repl_handler,
            rbrsource,
            insert_query.format(
                table=table_name_one,
                one='Cedric Diggory',
                two='Hufflepuff'
            )
        )

        execute_query_get_one_row(
            containers_without_repl_handler,
            rbrsource,
            insert_query.format(
                table=table_name_one,
                one='Hannah Abbott',
                two='Hufflepuff'
            )
        )
        execute_query_get_one_row(
            containers_without_repl_handler,
            rbrsource,
            rename_query
        )
        execute_query_get_one_row(
            containers_without_repl_handler,
            rbrsource,
            insert_query.format(
                table=table_name_two,
                one='Viktor Krum',
                two='Durmstrang'
            )
        )

        start_service(
            resume_stream=True,
            num_of_queries_to_process=5,
            end_time=30,
            is_schema_event_helper_enabled=False,
            num_of_schema_events=100,
        )
        show_query = "SHOW CREATE TABLE {name}"
        source_schema = execute_query_get_one_row(
            containers_without_repl_handler,
            rbrsource,
            show_query.format(name=table_name_two)
        )
        tracker_schema = execute_query_get_one_row(
            containers_without_repl_handler,
            schematracker,
            show_query.format(name=table_name_two)
        )
        assert source_schema == tracker_schema

        messages_two = _fetch_messages(
            containers_without_repl_handler,
            schematizer,
            namespace,
            table_name_two,
            1
        )

        messages_one = _fetch_messages(
            containers_without_repl_handler,
            schematizer,
            namespace,
            table_name_one,
            2
        )

        expected_messages_one = [
            {
                'message_type': MessageType.create,
                'payload_data': {'name': 'Cedric Diggory',
                                 'house': 'Hufflepuff'}
            },
            {
                'message_type': MessageType.create,
                'payload_data': {'name': 'Hannah Abbott', 'house': 'Hufflepuff'}
            }
        ]
        _verify_messages(messages_one, expected_messages_one)

        expected_messages_two = [
            {
                'message_type': MessageType.create,
                'payload_data': {'name': 'Viktor Krum', 'house': 'Durmstrang'}
            }
        ]
        _verify_messages(messages_two, expected_messages_two)
    def test_unclean_shutdown_schema_event(
        self,
        containers_without_repl_handler,
        rbrsource,
        schematracker,
        schematizer,
        namespace,
        start_service,
        gtid_enabled
    ):
        """This tests the recovery of the service if it fails when executing an
        schema event.
        A failure is triggered intentionally when an alter table to add column
        event is being handled. The test asserts that after the service restarts
        it processes that event again and processes the schema event. It also
        asserts that it doesn't miss out on any event and doesn't process an
        event more than once.
        """
        table_name = "ministry_of_magic_{r}".format(r=self.get_random_string())
        create_query = """
        CREATE TABLE {table}
        (name VARCHAR(255) PRIMARY KEY, dept VARCHAR(255))
        """
        insert_query_one = """
        INSERT INTO {table} VALUES
        ("{one}", "{two}")
        """
        add_col_query = """
        ALTER TABLE {table}
        ADD floor INT(11)
        """
        insert_query_two = """
        INSERT INTO {table} VALUES
        ("{one}", "{two}", "{three}")
        """

        if not gtid_enabled:
            increment_heartbeat(containers_without_repl_handler, rbrsource)
        execute_query_get_one_row(
            containers_without_repl_handler,
            rbrsource,
            create_query.format(table=table_name)
        )
        execute_query_get_one_row(
            containers_without_repl_handler,
            rbrsource,
            insert_query_one.format(
                table=table_name,
                one='Rufus Scrimgeour',
                two='Minister'
            )
        )
        execute_query_get_one_row(
            containers_without_repl_handler,
            rbrsource,
            add_col_query.format(
                table=table_name
            )
        )
        execute_query_get_one_row(
            containers_without_repl_handler,
            rbrsource,
            insert_query_two.format(
                table=table_name,
                one='Kingsley Shacklebolt',
                two='Auror',
                three=2
            )
        )

        start_service(
            resume_stream=True,
            num_of_queries_to_process=3,
            end_time=30,
            is_schema_event_helper_enabled=True,
            num_of_schema_events=0,
        )

        execute_query_get_one_row(
            containers_without_repl_handler,
            schematracker,
            "DROP TABLE {table}".format(table=table_name)
        )

        execute_query_get_one_row(
            containers_without_repl_handler,
            schematracker,
            create_query.format(table=table_name)
        )

        tracker_create_table = execute_query_get_all_rows(
            containers_without_repl_handler,
            schematracker,
            "SHOW CREATE TABLE {table}".format(table=table_name)
        )[0]['Create Table']

        source_create_table = execute_query_get_all_rows(
            containers_without_repl_handler,
            rbrsource,
            "SHOW CREATE TABLE {table}".format(table=table_name)
        )[0]['Create Table']

        assert source_create_table != tracker_create_table

        start_service(
            resume_stream=True,
            num_of_queries_to_process=2,
            end_time=30,
            is_schema_event_helper_enabled=False,
            num_of_schema_events=100,
        )

        tracker_create_table = execute_query_get_all_rows(
            containers_without_repl_handler,
            schematracker,
            "SHOW CREATE TABLE {table}".format(table=table_name)
        )[0]['Create Table']

        assert source_create_table == tracker_create_table

        _fetch_messages(
            containers_without_repl_handler,
            schematizer,
            namespace,
            table_name,
            2
        )
    def test_saving_topic_and_kafka_offset_info(
        self,
        containers_without_repl_handler,
        rbrsource,
        rbrstate,
        schematizer,
        namespace,
        start_service,
        gtid_enabled
    ):
        """This tests that the service saves the correct topic and offset
        information on failure in the global_event_state table.
        It also asserts that it doesn't miss out on any event and
        doesn't process an event more than once.
        """
        table_name = "animagi_{r}".format(r=self.get_random_string())
        create_query = """
        CREATE TABLE {table}
        (name VARCHAR(255) PRIMARY KEY, animal VARCHAR(255))
        """
        insert_query_one = """
        INSERT INTO {table} VALUES
        ("{one}", "{two}")
        """

        if not gtid_enabled:
            increment_heartbeat(containers_without_repl_handler, rbrsource)
        execute_query_get_one_row(
            containers_without_repl_handler,
            rbrsource,
            create_query.format(table=table_name)
        )
        execute_query_get_one_row(
            containers_without_repl_handler,
            rbrsource,
            insert_query_one.format(
                table=table_name,
                one='James Potter',
                two='stag'
            )
        )
        execute_query_get_one_row(
            containers_without_repl_handler,
            rbrsource,
            insert_query_one.format(
                table=table_name,
                one='Sirius Black',
                two='dog'
            )
        )
        execute_query_get_one_row(
            containers_without_repl_handler,
            rbrsource,
            insert_query_one.format(
                table=table_name,
                one='Rita Skeeter',
                two='beetle'
            )
        )
        execute_query_get_one_row(
            containers_without_repl_handler,
            rbrsource,
            insert_query_one.format(
                table=table_name,
                one='Minerva McGonagall',
                two='cat'
            )
        )

        start_service(
            resume_stream=True,
            num_of_queries_to_process=3,
            end_time=30,
            is_schema_event_helper_enabled=False,
            num_of_schema_events=100,
        )

        _wait_for_schematizer_topic(schematizer, namespace, table_name)
        topics = schematizer.get_topics_by_criteria(
            namespace_name=namespace,
            source_name=table_name
        )
        topic_name = topics[0].name

        saved_data_state = execute_query_get_all_rows(
            containers_without_repl_handler,
            rbrstate,
            "SELECT * FROM {table} WHERE kafka_topic like \"%{t}%\"".format(
                table='data_event_checkpoint',
                t=table_name
            )
        )

        assert saved_data_state[0]['kafka_offset'] == 2
        assert saved_data_state[0]['kafka_topic'] == topic_name

        if not gtid_enabled:
            saved_heartbeat_state = execute_query_get_all_rows(
                containers_without_repl_handler,
                rbrstate,
                "SELECT * FROM {table} WHERE table_name=\"{name}\"".format(
                    table='global_event_state',
                    name=table_name
                )
            )
            position_info = ast.literal_eval(saved_heartbeat_state[0]['position'])
            log_pos = position_info['log_pos']
            log_file = position_info['log_file']
            offset = position_info['offset']

        start_service(
            resume_stream=True,
            num_of_queries_to_process=2,
            end_time=30,
            is_schema_event_helper_enabled=False,
            num_of_schema_events=100,
        )

        saved_data_state = execute_query_get_all_rows(
            containers_without_repl_handler,
            rbrstate,
            "SELECT * FROM {table} WHERE kafka_topic like \"%{t}%\"".format(
                table='data_event_checkpoint',
                t=table_name
            )
        )
        assert saved_data_state[0]['kafka_offset'] == 4
        assert saved_data_state[0]['kafka_topic'] == topic_name

        if not gtid_enabled:
            saved_heartbeat_state = execute_query_get_all_rows(
                containers_without_repl_handler,
                rbrstate,
                "SELECT * FROM {table} WHERE table_name=\"{name}\"".format(
                    table='global_event_state',
                    name=table_name
                )
            )

            position_info = ast.literal_eval(saved_heartbeat_state[0]['position'])
            assert position_info['log_pos'] == log_pos
            assert position_info['log_file'] == log_file
            assert position_info['offset'] == offset + 4
Example #12
0
    def test_shutdown_processing_events_publishes_events_only_once(
        self,
        containers_without_repl_handler,
        rbrsource,
        schematizer,
        namespace,
        start_service,
        gtid_enabled
    ):
        """Tests the guarantee that the service provides about publishing every
        MySQL change only once. A few queries are executed on the source
        database and the service is started. The service is asked to halt after
        processing a subset of events and then restart and pick up from where it
        left off.
        """
        table_name = "coursework_{r}".format(r=self.get_random_string())
        create_query = """
        CREATE TABLE {table}
        (name VARCHAR(255) PRIMARY KEY, teacher VARCHAR(255))
        """
        insert_query_one = """
        INSERT INTO {table} VALUES
        ("{one}", "{two}")
        """
        update_query = """
        UPDATE {table}
        SET teacher="{one}"
        WHERE name="{two}"
        """
        if not gtid_enabled:
            increment_heartbeat(containers_without_repl_handler, rbrsource)
        execute_query_get_one_row(
            containers_without_repl_handler,
            rbrsource,
            create_query.format(table=table_name)
        )
        execute_query_get_one_row(
            containers_without_repl_handler,
            rbrsource,
            insert_query_one.format(
                table=table_name,
                one='potions',
                two='Severus Snape'
            )
        )
        execute_query_get_one_row(
            containers_without_repl_handler,
            rbrsource,
            insert_query_one.format(
                table=table_name,
                one='care of magical creatures',
                two='Rubeus Hagrid'
            )
        )
        execute_query_get_one_row(
            containers_without_repl_handler,
            rbrsource,
            update_query.format(
                table=table_name,
                one='Grubbly Plank',
                two='care of magical creatures'
            )
        )

        start_service(
            resume_stream=False,
            num_of_queries_to_process=2,
            end_time=30,
            is_schema_event_helper_enabled=False,
            num_of_schema_events=100,
        )

        messages_one = _fetch_messages(
            containers_without_repl_handler,
            schematizer,
            namespace,
            table_name,
            1
        )

        expected_messages_one = [
            {
                'message_type': MessageType.create,
                'payload_data': {'name': 'potions', 'teacher': 'Severus Snape'}
            },
        ]
        _verify_messages(messages_one, expected_messages_one)

        start_service(
            resume_stream=True,
            num_of_queries_to_process=2,
            end_time=30,
            is_schema_event_helper_enabled=False,
            num_of_schema_events=100,
        )

        messages_two = _fetch_messages(
            containers_without_repl_handler,
            schematizer,
            namespace,
            table_name,
            3
        )
        messages_two = messages_two[1:]

        expected_messages_two = [
            {
                'message_type': MessageType.create,
                'payload_data': {'name': 'care of magical creatures',
                                 'teacher': 'Rubeus Hagrid'}
            },
            {
                'message_type': MessageType.update,
                'payload_data': {'name': 'care of magical creatures',
                                 'teacher': 'Grubbly Plank'},
                'previous_payload_data': {'name': 'care of magical creatures',
                                          'teacher': 'Rubeus Hagrid'}
            }
        ]

        _verify_messages(messages_two, expected_messages_two)