def test_setup_contains_pii_from_schematizer_once(self, message):
     schematizer_client = get_schematizer()
     with attach_spy_on_func(schematizer_client, 'get_schema_by_id') as spy:
         message.contains_pii
         assert spy.call_count == 1
     with attach_spy_on_func(schematizer_client, 'get_schema_by_id') as spy:
         message.contains_pii
         assert spy.call_count == 0
Exemple #2
0
 def test_setup_contains_pii_from_schematizer_once(self, message):
     schematizer_client = get_schematizer()
     with attach_spy_on_func(schematizer_client, 'get_schema_by_id') as spy:
         message.contains_pii
         assert spy.call_count == 1
     with attach_spy_on_func(schematizer_client, 'get_schema_by_id') as spy:
         message.contains_pii
         assert spy.call_count == 0
Exemple #3
0
 def test_producer_registration_message_on_exit(self, producer_instance):
     producer = producer_instance.__enter__()
     with attach_spy_on_func(producer.registrar, 'stop') as func_spy:
         producer.publish(
             CreateMessage(schema_id=1, payload=bytes("Test message")))
         producer.__exit__(None, None, None)
         assert func_spy.call_count == 1
    def test_skip_commit_offset_if_offset_unchanged(self, publish_messages,
                                                    message,
                                                    consumer_instance):
        asserter = ConsumerAsserter(consumer=consumer_instance,
                                    expected_message=message)
        with consumer_instance as consumer:
            publish_messages(message, 4)

            with attach_spy_on_func(consumer.kafka_client,
                                    'send_offset_commit_request') as func_spy:
                msgs_r1 = consumer.get_messages(count=2,
                                                blocking=True,
                                                timeout=TIMEOUT)
                asserter.assert_messages(msgs_r1, 2)

                consumer.commit_messages(msgs_r1)
                assert func_spy.call_count == 1

                func_spy.reset_mock()

                # call_count does not increase
                # when no new msgs are commited
                consumer.commit_messages(msgs_r1)
                assert func_spy.call_count == 0

                # assert that next call to get_message should
                # get message from next offset
                msgs_r2 = consumer.get_messages(count=2,
                                                blocking=True,
                                                timeout=TIMEOUT)
                asserter.assert_messages(msgs_r2, 2)
    def test_consumer_periodic_registration_messages(
        self,
        publish_messages,
        input_compatible_message,
        consumer_instance
    ):
        """
        This function tests whether a Consumer correctly periodically creates and
        sends registration messages once it has received messages from a topic it
        is consuming from.

        Note: Tests fails when threshold is set significanly below 1 second
        """
        TIMEOUT = 1.8
        consumer_instance.registrar.threshold = 1
        with consumer_instance as consumer:
            with attach_spy_on_func(
                consumer.registrar.clog_writer,
                'publish'
            ) as func_spy:
                publish_messages(input_compatible_message, count=1)
                consumer.get_message(blocking=True, timeout=TIMEOUT)
                consumer.registrar.threshold = 1
                consumer.registrar.start()
                time.sleep(2.5)
                assert func_spy.call_count == 2
                consumer.registrar.stop()
    def test_publish_to_new_topic(self, create_new_schema, producer):
        new_schema = create_new_schema(source='retry_source')
        message = CreateMessage(new_schema.schema_id, payload=str('1'))

        with attach_spy_on_func(
            producer._kafka_producer.kafka_client,
            'send_produce_request'
        ) as send_request_spy:
            orig_topic_to_offset_map = self.get_orig_topic_to_offset_map(producer)
            send_request_spy.reset()

            producer.publish(message)
            producer.flush()

            # it should fail at least the 1st time because the topic doesn't
            # exist. Depending on how fast the topic is created, it could retry
            # more than 2 times.
            assert send_request_spy.call_count >= 2

        messages = self.get_messages_from_start(message.topic)
        self.assert_equal_msgs(expected_msgs=[message], actual_msgs=messages)
        self.assert_new_topic_to_offset_map(
            producer,
            message.topic,
            orig_topic_to_offset_map,
            published_message_count=1
        )
Exemple #7
0
 def test_producer_initial_registration_messages(self, use_work_pool):
     with attach_spy_on_func(clog, 'log_line') as func_spy:
         with Producer(
                 producer_name='producer_1',
                 team_name='bam',
                 expected_frequency_seconds=ExpectedFrequency.constantly,
                 use_work_pool=use_work_pool,
                 schema_id_list=[1, 2, 3]):
             assert func_spy.call_count == 3
 def test_producer_registration_message_on_exit(self, producer_instance):
     producer = producer_instance.__enter__()
     with attach_spy_on_func(
         producer.registrar,
         'stop'
     ) as func_spy:
         producer.publish(CreateMessage(schema_id=1, payload=bytes("Test message")))
         producer.__exit__(None, None, None)
         assert func_spy.call_count == 1
Exemple #9
0
 def test_consumer_registration_message_on_exit(self, publish_messages,
                                                input_compatible_message,
                                                consumer_instance):
     TIMEOUT = 1.8
     consumer = consumer_instance.__enter__()
     with attach_spy_on_func(consumer.registrar, 'stop') as func_spy:
         publish_messages(input_compatible_message, count=1)
         consumer.get_message(blocking=True, timeout=TIMEOUT)
         consumer.__exit__(None, None, None)
         assert func_spy.call_count == 1
    def skip_test_offset_cache_cleared_at_rebalance(
        self,
        topic,
        pii_topic,
        publish_messages,
        consumer_instance,
        consumer_two_instance,
        message,
        pii_message
    ):
        # TODO [DATAPIPE-249] previous version of test has an issue that
        # sometimes the consumer one doesn't get any message right after
        # consumer two starts.  It's unclear the cause and may be related
        # to how the tests are setup. Re-writting the test to bypass it
        # and defer addressing it in the DATAPIPE-249.
        consumer_one_rebalanced_event = Event()
        with consumer_instance as consumer_one:
            publish_messages(message, count=10)
            publish_messages(pii_message, count=10)

            consumer_one_message = consumer_one.get_message(
                blocking=True,
                timeout=TIMEOUT
            )
            consumer_one.commit_message(consumer_one_message)

            # trigger rebalancing by starting another consumer with same name
            consumer_two_process = Process(
                target=self._run_consumer_two,
                args=(consumer_two_instance, consumer_one_rebalanced_event)
            )
            consumer_two_process.start()
            # consumer one is rebalanced during `get_message`
            consumer_one.get_message(blocking=True, timeout=TIMEOUT)
            consumer_one_rebalanced_event.set()

            consumer_two_process.join(timeout=1)
            assert not consumer_two_process.exitcode

            # force consumer rebalance again; consumer rebalance occurs when
            # get_message is called; set short timeout because we don't care
            # if there is any message left.
            consumer_one.get_message(blocking=True, timeout=0.1)

            # The same offset should be committed again because the rebalancing
            # will clear the internal offset cache.
            with attach_spy_on_func(
                consumer_one.kafka_client,
                'send_offset_commit_request'
            ) as func_spy:
                consumer_one.commit_message(consumer_one_message)
                assert func_spy.call_count == 1
 def test_producer_initial_registration_messages(self, use_work_pool):
     with attach_spy_on_func(
         clog,
         'log_line'
     ) as func_spy:
         with Producer(
             producer_name='producer_1',
             team_name='bam',
             expected_frequency_seconds=ExpectedFrequency.constantly,
             use_work_pool=use_work_pool,
             schema_id_list=[1, 2, 3]
         ):
             assert func_spy.call_count == 3
Exemple #12
0
 def test_consumer_initial_registration_message(self, topic):
     """
     Assert that an initial RegistrationMessage is sent upon starting
     the Consumer with a non-empty topic_to_consumer_topic_state_map.
     """
     with attach_spy_on_func(clog, 'log_line') as func_spy:
         fake_topic = ConsumerTopicState({}, 23)
         with Consumer(
                 consumer_name='test_consumer',
                 team_name='bam',
                 expected_frequency_seconds=ExpectedFrequency.constantly,
                 topic_to_consumer_topic_state_map={topic: fake_topic}):
             assert func_spy.call_count == 1
Exemple #13
0
 def test_periodic_wake_calls(self, registrar):
     """
     Test that calling start() periodically publishes messages at the expected rate
     until stop() is called.
     """
     with attach_spy_on_func(registrar,
                             'publish_registration_messages') as func_spy:
         registrar.threshold = 1
         registrar.start()
         time.sleep(3.5)
         registrar.stop()
         time.sleep(.5)
         # One call to publish_registration_messages happens on stop()
         assert func_spy.call_count == 4
    def test_call_kafka_commit_offsets_when_offset_change(
            self,
            publish_messages,
            message,
            consumer_instance
    ):
        asserter = ConsumerAsserter(
            consumer=consumer_instance,
            expected_message=message
        )
        with consumer_instance as consumer:
            publish_messages(message, 4)

            with attach_spy_on_func(
                consumer.kafka_client,
                'send_offset_commit_request'
            ) as func_spy:
                msgs_r1 = consumer.get_messages(
                    count=3,
                    blocking=True,
                    timeout=TIMEOUT
                )
                asserter.assert_messages(msgs_r1, 3)

                consumer.commit_messages(msgs_r1)
                assert func_spy.call_count == 1

                func_spy.reset_mock()

                # call_count increases
                # when offset is different from last commited offset
                consumer.commit_message(msgs_r1[0])
                assert func_spy.call_count == 1

                func_spy.reset_mock()

                consumer.commit_message(msgs_r1[2])
                assert func_spy.call_count == 1

                # assert that next call to get_message should
                # get message from next offset
                msgs_r2 = consumer.get_messages(
                    count=1,
                    blocking=True,
                    timeout=TIMEOUT
                )
                assert len(msgs_r2) == 1
                asserter.assert_messages(msgs_r2, 1)
 def test_periodic_wake_calls(self, registrar):
     """
     Test that calling start() periodically publishes messages at the expected rate
     until stop() is called.
     """
     with attach_spy_on_func(
         registrar,
         'publish_registration_messages'
     ) as func_spy:
         registrar.threshold = 1
         registrar.start()
         time.sleep(3.5)
         registrar.stop()
         time.sleep(.5)
         # One call to publish_registration_messages happens on stop()
         assert func_spy.call_count == 4
 def test_consumer_registration_message_on_exit(
     self,
     publish_messages,
     input_compatible_message,
     consumer_instance
 ):
     TIMEOUT = 1.8
     consumer = consumer_instance.__enter__()
     with attach_spy_on_func(
         consumer.registrar,
         'stop'
     ) as func_spy:
         publish_messages(input_compatible_message, count=1)
         consumer.get_message(blocking=True, timeout=TIMEOUT)
         consumer.__exit__(None, None, None)
         assert func_spy.call_count == 1
Exemple #17
0
 def test_producer_periodic_registration_messages(self, producer_instance):
     """
     Note: Tests fails when threshold is set significanly below 1 second, presumably
           because of the nature of threading. Should be irrelevant if the threshold
           in registrar is set significantly higher.
     """
     producer_instance.registrar.threshold = 1
     with producer_instance as producer:
         with attach_spy_on_func(producer.registrar.clog_writer,
                                 'publish') as func_spy:
             producer.publish(
                 CreateMessage(schema_id=1, payload=bytes("FAKE MESSAGE")))
             producer.publish(
                 CreateMessage(schema_id=2,
                               payload=bytes("DIFFERENT FAKE MESSAGE")))
             time.sleep(2.5)
             assert func_spy.call_count == 4
 def test_consumer_initial_registration_message(self, topic):
     """
     Assert that an initial RegistrationMessage is sent upon starting
     the Consumer with a non-empty topic_to_consumer_topic_state_map.
     """
     with attach_spy_on_func(
         clog,
         'log_line'
     ) as func_spy:
         fake_topic = ConsumerTopicState({}, 23)
         with Consumer(
             consumer_name='test_consumer',
             team_name='bam',
             expected_frequency_seconds=ExpectedFrequency.constantly,
             topic_to_consumer_topic_state_map={topic: fake_topic}
         ):
             assert func_spy.call_count == 1
Exemple #19
0
    def test_ensure_messages_published_on_new_topic(self, create_new_schema,
                                                    producer):
        """When a topic doesn't exist, all of the messages on that topic should
        be published.
        """
        new_schema = create_new_schema(source='ensure_published_source_two')
        message = CreateMessage(new_schema.schema_id, payload=str('1'))
        topic = str(new_schema.topic.name)

        with attach_spy_on_func(producer, 'publish') as func_spy:
            producer.ensure_messages_published([message], {})
            assert func_spy.call_count == 1
        with setup_capture_new_messages_consumer(topic) as consumer:
            kafka_offset = 0
            consumer.seek(kafka_offset, 0)  # kafka_offset from head
            self._assert_all_messages_published(consumer,
                                                expected_payloads=[1])
    def test_ensure_messages_published_on_new_topic(
        self, create_new_schema, producer
    ):
        """When a topic doesn't exist, all of the messages on that topic should
        be published.
        """
        new_schema = create_new_schema(source='ensure_published_source_two')
        message = CreateMessage(new_schema.schema_id, payload=str('1'))
        topic = str(new_schema.topic.name)

        with attach_spy_on_func(producer, 'publish') as func_spy:
            producer.ensure_messages_published([message], {})
            assert func_spy.call_count == 1
        with setup_capture_new_messages_consumer(topic) as consumer:
            kafka_offset = 0
            consumer.seek(kafka_offset, 0)  # kafka_offset from head
            self._assert_all_messages_published(consumer, expected_payloads=[1])
Exemple #21
0
    def test_publish_succeeds_without_retry(self, topic, message, producer):
        with attach_spy_on_func(
                producer._kafka_producer.kafka_client, 'send_produce_request'
        ) as send_request_spy, capture_new_messages(topic) as get_messages:
            orig_topic_to_offset_map = self.get_orig_topic_to_offset_map(
                producer)

            producer.publish(message)
            producer.flush()

            messages = get_messages()
            self.assert_equal_msgs(expected_msgs=[message],
                                   actual_msgs=messages)
            assert send_request_spy.call_count == 1
            self.assert_new_topic_to_offset_map(producer,
                                                message.topic,
                                                orig_topic_to_offset_map,
                                                published_message_count=1)
 def test_producer_periodic_registration_messages(self, producer_instance):
     """
     Note: Tests fails when threshold is set significanly below 1 second, presumably
           because of the nature of threading. Should be irrelevant if the threshold
           in registrar is set significantly higher.
     """
     producer_instance.registrar.threshold = 1
     with producer_instance as producer:
         with attach_spy_on_func(
             producer.registrar.clog_writer,
             'publish'
         ) as func_spy:
             producer.publish(CreateMessage(schema_id=1, payload=bytes("FAKE MESSAGE")))
             producer.publish(CreateMessage(
                 schema_id=2,
                 payload=bytes("DIFFERENT FAKE MESSAGE")
             ))
             time.sleep(2.5)
             assert func_spy.call_count == 4
Exemple #23
0
    def skip_test_offset_cache_cleared_at_rebalance(self, topic, pii_topic,
                                                    publish_messages,
                                                    consumer_instance,
                                                    consumer_two_instance,
                                                    message, pii_message):
        # TODO [DATAPIPE-249] previous version of test has an issue that
        # sometimes the consumer one doesn't get any message right after
        # consumer two starts.  It's unclear the cause and may be related
        # to how the tests are setup. Re-writting the test to bypass it
        # and defer addressing it in the DATAPIPE-249.
        consumer_one_rebalanced_event = Event()
        with consumer_instance as consumer_one:
            publish_messages(message, count=10)
            publish_messages(pii_message, count=10)

            consumer_one_message = consumer_one.get_message(blocking=True,
                                                            timeout=TIMEOUT)
            consumer_one.commit_message(consumer_one_message)

            # trigger rebalancing by starting another consumer with same name
            consumer_two_process = Process(
                target=self._run_consumer_two,
                args=(consumer_two_instance, consumer_one_rebalanced_event))
            consumer_two_process.start()
            # consumer one is rebalanced during `get_message`
            consumer_one.get_message(blocking=True, timeout=TIMEOUT)
            consumer_one_rebalanced_event.set()

            consumer_two_process.join(timeout=1)
            assert not consumer_two_process.exitcode

            # force consumer rebalance again; consumer rebalance occurs when
            # get_message is called; set short timeout because we don't care
            # if there is any message left.
            consumer_one.get_message(blocking=True, timeout=0.1)

            # The same offset should be committed again because the rebalancing
            # will clear the internal offset cache.
            with attach_spy_on_func(consumer_one.kafka_client,
                                    'send_offset_commit_request') as func_spy:
                consumer_one.commit_message(consumer_one_message)
                assert func_spy.call_count == 1
    def test_skip_commit_offset_if_offset_unchanged(
            self,
            publish_messages,
            message,
            consumer_instance
    ):
        asserter = ConsumerAsserter(
            consumer=consumer_instance,
            expected_message=message
        )
        with consumer_instance as consumer:
            publish_messages(message, 4)

            with attach_spy_on_func(
                consumer.kafka_client,
                'send_offset_commit_request'
            ) as func_spy:
                msgs_r1 = consumer.get_messages(
                    count=2,
                    blocking=True,
                    timeout=TIMEOUT
                )
                asserter.assert_messages(msgs_r1, 2)

                consumer.commit_messages(msgs_r1)
                assert func_spy.call_count == 1

                func_spy.reset_mock()

                # call_count does not increase
                # when no new msgs are commited
                consumer.commit_messages(msgs_r1)
                assert func_spy.call_count == 0

                # assert that next call to get_message should
                # get message from next offset
                msgs_r2 = consumer.get_messages(
                    count=2,
                    blocking=True,
                    timeout=TIMEOUT
                )
                asserter.assert_messages(msgs_r2, 2)
    def test_offset_cache_reset_on_topic_reset(
            self,
            publish_messages,
            message,
            consumer_instance
    ):
        asserter = ConsumerAsserter(
            consumer=consumer_instance,
            expected_message=message
        )
        with consumer_instance as consumer:
            publish_messages(message, 4)
            with attach_spy_on_func(
                consumer.kafka_client,
                'send_offset_commit_request'
            ) as func_spy:
                msgs = consumer.get_messages(
                    count=4,
                    blocking=True,
                    timeout=TIMEOUT
                )
                assert len(msgs) == 4
                asserter.assert_messages(msgs, 4)

                consumer.commit_messages(msgs)
                assert func_spy.call_count == 1
                topic_map = {topic: None for topic in consumer.topic_to_partition_map}

                with mock.patch.object(
                    consumer,
                    '_get_topics_in_region_from_topic_name',
                    side_effect=[[x] for x in topic_map.keys()]
                ):
                    consumer.reset_topics(topic_to_consumer_topic_state_map=topic_map)

                func_spy.reset_mock()

                # on commiting messages with same offset
                # send_offset_commit_request should get called
                # because cache is reset on consumer.reset_topics
                consumer.commit_messages(msgs)
                assert func_spy.call_count == 1
    def test_publish_succeeds_without_retry(self, topic, message, producer):
        with attach_spy_on_func(
            producer._kafka_producer.kafka_client,
            'send_produce_request'
        ) as send_request_spy, capture_new_messages(
            topic
        ) as get_messages:
            orig_topic_to_offset_map = self.get_orig_topic_to_offset_map(producer)

            producer.publish(message)
            producer.flush()

            messages = get_messages()
            self.assert_equal_msgs(expected_msgs=[message], actual_msgs=messages)
            assert send_request_spy.call_count == 1
            self.assert_new_topic_to_offset_map(
                producer,
                message.topic,
                orig_topic_to_offset_map,
                published_message_count=1
            )
Exemple #27
0
    def test_consumer_periodic_registration_messages(self, publish_messages,
                                                     input_compatible_message,
                                                     consumer_instance):
        """
        This function tests whether a Consumer correctly periodically creates and
        sends registration messages once it has received messages from a topic it
        is consuming from.

        Note: Tests fails when threshold is set significanly below 1 second
        """
        TIMEOUT = 1.8
        consumer_instance.registrar.threshold = 1
        with consumer_instance as consumer:
            with attach_spy_on_func(consumer.registrar.clog_writer,
                                    'publish') as func_spy:
                publish_messages(input_compatible_message, count=1)
                consumer.get_message(blocking=True, timeout=TIMEOUT)
                consumer.registrar.threshold = 1
                consumer.registrar.start()
                time.sleep(2.5)
                assert func_spy.call_count == 2
                consumer.registrar.stop()
    def test_offset_cache_reset_on_topic_reset(self, publish_messages, message,
                                               consumer_instance):
        asserter = ConsumerAsserter(consumer=consumer_instance,
                                    expected_message=message)
        with consumer_instance as consumer:
            publish_messages(message, 4)
            with attach_spy_on_func(consumer.kafka_client,
                                    'send_offset_commit_request') as func_spy:
                msgs = consumer.get_messages(count=4,
                                             blocking=True,
                                             timeout=TIMEOUT)
                assert len(msgs) == 4
                asserter.assert_messages(msgs, 4)

                consumer.commit_messages(msgs)
                assert func_spy.call_count == 1
                topic_map = {
                    topic: None
                    for topic in consumer.topic_to_partition_map
                }

                with mock.patch.object(consumer,
                                       '_get_topics_in_region_from_topic_name',
                                       side_effect=[[x]
                                                    for x in topic_map.keys()
                                                    ]):
                    consumer.reset_topics(
                        topic_to_consumer_topic_state_map=topic_map)

                func_spy.reset_mock()

                # on commiting messages with same offset
                # send_offset_commit_request should get called
                # because cache is reset on consumer.reset_topics
                consumer.commit_messages(msgs)
                assert func_spy.call_count == 1
    def test_call_kafka_commit_offsets_when_offset_change(
            self, publish_messages, message, consumer_instance):
        asserter = ConsumerAsserter(consumer=consumer_instance,
                                    expected_message=message)
        with consumer_instance as consumer:
            publish_messages(message, 4)

            with attach_spy_on_func(consumer.kafka_client,
                                    'send_offset_commit_request') as func_spy:
                msgs_r1 = consumer.get_messages(count=3,
                                                blocking=True,
                                                timeout=TIMEOUT)
                asserter.assert_messages(msgs_r1, 3)

                consumer.commit_messages(msgs_r1)
                assert func_spy.call_count == 1

                func_spy.reset_mock()

                # call_count increases
                # when offset is different from last commited offset
                consumer.commit_message(msgs_r1[0])
                assert func_spy.call_count == 1

                func_spy.reset_mock()

                consumer.commit_message(msgs_r1[2])
                assert func_spy.call_count == 1

                # assert that next call to get_message should
                # get message from next offset
                msgs_r2 = consumer.get_messages(count=1,
                                                blocking=True,
                                                timeout=TIMEOUT)
                assert len(msgs_r2) == 1
                asserter.assert_messages(msgs_r2, 1)
Exemple #30
0
    def test_publish_to_new_topic(self, create_new_schema, producer):
        new_schema = create_new_schema(source='retry_source')
        message = CreateMessage(new_schema.schema_id, payload=str('1'))

        with attach_spy_on_func(producer._kafka_producer.kafka_client,
                                'send_produce_request') as send_request_spy:
            orig_topic_to_offset_map = self.get_orig_topic_to_offset_map(
                producer)
            send_request_spy.reset()

            producer.publish(message)
            producer.flush()

            # it should fail at least the 1st time because the topic doesn't
            # exist. Depending on how fast the topic is created, it could retry
            # more than 2 times.
            assert send_request_spy.call_count >= 2

        messages = self.get_messages_from_start(message.topic)
        self.assert_equal_msgs(expected_msgs=[message], actual_msgs=messages)
        self.assert_new_topic_to_offset_map(producer,
                                            message.topic,
                                            orig_topic_to_offset_map,
                                            published_message_count=1)