Example #1
0
    def test_publish_messages_with_diff_topic_and_timestamp(
            self, registered_schema, another_schema, topic, another_topic,
            producer, create_message):
        messages_to_publish = [
            create_message(registered_schema, timeslot=0.5),
            create_message(another_schema, timeslot=0.8),
            create_message(registered_schema, timeslot=3.5),
            create_message(another_schema, timeslot=4),
            create_message(another_schema, timeslot=6),
        ]

        with setup_capture_new_messages_consumer(
                topic) as consumer, setup_capture_new_messages_consumer(
                    another_topic
                ) as another_consumer, setup_capture_new_messages_consumer(
                    producer.monitor.monitor_topic) as monitor_consumer:
            self.publish_messages(messages_to_publish, producer)

            assert len(consumer.get_messages(count=100)) == 2
            assert len(another_consumer.get_messages(count=100)) == 3

            monitor_messages = monitor_consumer.get_messages(count=100)
            assert len(monitor_messages) == 14

            self.assert_equal_monitor_messages(
                actual_raw_messages=monitor_messages,
                expected_topic=topic,
                expected_messages_counts=[1, 0, 0, 1, 0, 0, 0],
                expected_start_timestamp=producer.monitor.start_time)
            self.assert_equal_monitor_messages(
                actual_raw_messages=monitor_messages,
                expected_topic=another_topic,
                expected_messages_counts=[1, 0, 0, 0, 1, 0, 1],
                expected_start_timestamp=producer.monitor.start_time)
Example #2
0
    def test_publish_messages_with_diff_timestamps(self, producer,
                                                   create_message,
                                                   registered_schema):
        messages_to_publish = [
            create_message(registered_schema, timeslot=0.5),
            create_message(registered_schema, timeslot=1.5),
            create_message(registered_schema, timeslot=3.5)
        ]
        topic = messages_to_publish[0].topic

        with setup_capture_new_messages_consumer(
                topic) as consumer, setup_capture_new_messages_consumer(
                    producer.monitor.monitor_topic) as monitor_consumer:
            self.publish_messages(messages_to_publish, producer)

            messages = consumer.get_messages(count=100)
            assert len(messages) == len(messages_to_publish)

            monitor_messages = monitor_consumer.get_messages(count=100)
            assert len(monitor_messages) == 4

            self.assert_equal_monitor_messages(
                actual_raw_messages=monitor_messages,
                expected_topic=topic,
                expected_messages_counts=[1, 1, 0, 1],
                expected_start_timestamp=producer.monitor.start_time)
Example #3
0
    def test_publish_messages_with_diff_timestamps(
        self, producer, create_message, registered_schema
    ):
        messages_to_publish = [
            create_message(registered_schema, timeslot=0.5),
            create_message(registered_schema, timeslot=1.5),
            create_message(registered_schema, timeslot=3.5)
        ]
        topic = messages_to_publish[0].topic

        with setup_capture_new_messages_consumer(
            topic
        ) as consumer, setup_capture_new_messages_consumer(
            producer.monitor.monitor_topic
        ) as monitor_consumer:
            self.publish_messages(messages_to_publish, producer)

            messages = consumer.get_messages(count=100)
            assert len(messages) == len(messages_to_publish)

            monitor_messages = monitor_consumer.get_messages(count=100)
            assert len(monitor_messages) == 4

            self.assert_equal_monitor_messages(
                actual_raw_messages=monitor_messages,
                expected_topic=topic,
                expected_messages_counts=[1, 1, 0, 1],
                expected_start_timestamp=producer.monitor.start_time
            )
Example #4
0
    def test_monitoring_message_basic(
        self, producer, create_message, registered_schema
    ):
        message = create_message(registered_schema, timeslot=2.5)
        messages_to_publish = [message] * 10
        topic = message.topic

        with setup_capture_new_messages_consumer(
            topic
        ) as consumer, setup_capture_new_messages_consumer(
            producer.monitor.monitor_topic
        ) as monitor_consumer:
            self.publish_messages(messages_to_publish, producer)

            messages = consumer.get_messages(count=100)
            assert len(messages) == len(messages_to_publish)

            monitor_messages = monitor_consumer.get_messages(count=100)
            assert len(monitor_messages) == 3

            self.assert_equal_monitor_messages(
                actual_raw_messages=monitor_messages,
                expected_topic=topic,
                expected_messages_counts=[0, 0, 10],
                expected_start_timestamp=producer.monitor.start_time
            )
Example #5
0
    def test_forced_recovery_when_overpublished(
        self, topic, messages, producer, topic_offsets
    ):
        for message in messages:
            producer.publish(message)
        producer.flush()

        with reconfigure(
            force_recovery_from_publication_unensurable_error=True
        ), setup_capture_new_messages_consumer(
            topic
        ) as consumer, mock.patch.object(
            data_pipeline.producer,
            'logger'
        ) as mock_logger:
            producer.ensure_messages_published(messages[:2], topic_offsets)

            self._assert_logged_info_correct(
                mock_logger,
                len(messages),
                topic,
                topic_offsets,
                message_count=len(messages[:2])
            )
            self._verify_position_and_highwatermarks(
                topics=[topic],
                producer=producer,
                message_count=len(messages[:2])
            )

            assert len(consumer.get_messages(10)) == 2
Example #6
0
    def _test_success_ensure_messages_published(self, topic, messages,
                                                producer, topic_offsets,
                                                unpublished_count):
        messages_to_publish = len(messages) - unpublished_count
        messages_published_first = messages[:messages_to_publish]

        with setup_capture_new_messages_consumer(
                topic) as consumer, mock.patch.object(data_pipeline.producer,
                                                      'logger') as mock_logger:
            for message in messages_published_first:
                producer.publish(message)
            producer.flush()
            producer.position_data_callback = mock.Mock()

            producer.ensure_messages_published(messages, topic_offsets)

            if unpublished_count > 0:
                assert producer.position_data_callback.call_count == 1

            self._assert_all_messages_published(consumer)

            self._verify_position_and_highwatermarks(
                topics=[topic],
                producer=producer,
                message_count=self.number_of_messages)

            self._assert_logged_info_correct(
                mock_logger,
                messages_already_published=len(messages_published_first),
                topic=topic,
                topic_offsets=topic_offsets,
                message_count=len(messages))
Example #7
0
    def test_multitopic_offsets(self, topic, messages, secondary_topic,
                                secondary_messages, producer, topic_offsets,
                                containers):
        """Publishes a single message on the secondary_topic, and all
        messages on the primary topic, simulating the case where publishes for
        one topic fail, while the other succeeds, and the one that succeeds
        comes later in time.  The goal is that the position data still reflects
        the original message ordering, irrespective of failure.
        """
        containers.create_kafka_topic(secondary_topic)
        with setup_capture_new_messages_consumer(secondary_topic) as consumer:
            producer.publish(secondary_messages[0])
            for message in messages:
                producer.publish(message)
            producer.flush()

            producer.ensure_messages_published(secondary_messages + messages,
                                               topic_offsets)

            self._verify_position_and_highwatermarks(
                topics=[topic, secondary_topic],
                producer=producer,
                message_count=self.number_of_messages)

            assert len(consumer.get_messages(10)) == len(secondary_messages)
Example #8
0
    def _test_success_ensure_messages_published(self, topic, messages,
                                                producer, topic_offsets,
                                                unpublished_count):
        messages_to_publish = len(messages) - unpublished_count
        messages_published_first = messages[:messages_to_publish]

        with setup_capture_new_messages_consumer(
                topic) as consumer, mock.patch.object(data_pipeline.producer,
                                                      'logger') as mock_logger:
            for message in messages_published_first:
                producer.publish(message)
            producer.flush()
            producer.position_data_callback = mock.Mock()

            producer.ensure_messages_published(messages, topic_offsets)

            if unpublished_count > 0:
                assert producer.position_data_callback.call_count == 1

            self._assert_all_messages_published(consumer)

            position_info = producer.get_checkpoint_position_data()
            last_position = position_info.last_published_message_position_info
            assert last_position['position'] == self.number_of_messages

            self._assert_logged_info_correct(
                mock_logger,
                messages_already_published=len(messages_published_first),
                topic=topic,
                topic_offsets=topic_offsets,
                message_count=len(messages))
Example #9
0
    def test_publish_messages_with_diff_topic_and_timestamp(
        self,
        registered_schema,
        another_schema,
        topic,
        another_topic,
        producer,
        create_message
    ):
        messages_to_publish = [
            create_message(registered_schema, timeslot=0.5),
            create_message(another_schema, timeslot=0.8),
            create_message(registered_schema, timeslot=3.5),
            create_message(another_schema, timeslot=4),
            create_message(another_schema, timeslot=6),
        ]

        with setup_capture_new_messages_consumer(
            topic
        ) as consumer, setup_capture_new_messages_consumer(
            another_topic
        ) as another_consumer, setup_capture_new_messages_consumer(
            producer.monitor.monitor_topic
        ) as monitor_consumer:
            self.publish_messages(messages_to_publish, producer)

            assert len(consumer.get_messages(count=100)) == 2
            assert len(another_consumer.get_messages(count=100)) == 3

            monitor_messages = monitor_consumer.get_messages(count=100)
            assert len(monitor_messages) == 14

            self.assert_equal_monitor_messages(
                actual_raw_messages=monitor_messages,
                expected_topic=topic,
                expected_messages_counts=[1, 0, 0, 1, 0, 0, 0],
                expected_start_timestamp=producer.monitor.start_time
            )
            self.assert_equal_monitor_messages(
                actual_raw_messages=monitor_messages,
                expected_topic=another_topic,
                expected_messages_counts=[1, 0, 0, 0, 1, 0, 1],
                expected_start_timestamp=producer.monitor.start_time
            )
Example #10
0
    def test_get_position_data(self, create_message, producer):
        upstream_info = {'offset': 'fake'}
        message = create_message(upstream_position_info=upstream_info)
        with setup_capture_new_messages_consumer(message.topic) as consumer:
            producer.publish(message)
            producer.flush()
            position_data = producer.get_checkpoint_position_data()

            self._verify_position_data(position_data, upstream_info,
                                       message.topic)
            self._verify_topic_kafka_offset(position_data, message.topic,
                                            consumer, producer, create_message)
Example #11
0
    def test_monitoring_message_basic(self, producer, create_message,
                                      registered_schema):
        message = create_message(registered_schema, timeslot=2.5)
        messages_to_publish = [message] * 10
        topic = message.topic

        with setup_capture_new_messages_consumer(
                topic) as consumer, setup_capture_new_messages_consumer(
                    producer.monitor.monitor_topic) as monitor_consumer:
            self.publish_messages(messages_to_publish, producer)

            messages = consumer.get_messages(count=100)
            assert len(messages) == len(messages_to_publish)

            monitor_messages = monitor_consumer.get_messages(count=100)
            assert len(monitor_messages) == 3

            self.assert_equal_monitor_messages(
                actual_raw_messages=monitor_messages,
                expected_topic=topic,
                expected_messages_counts=[0, 0, 10],
                expected_start_timestamp=producer.monitor.start_time)
Example #12
0
    def test_get_position_data(self, create_message, producer):
        upstream_info = {'offset': 'fake'}
        message = create_message(upstream_position_info=upstream_info)
        with setup_capture_new_messages_consumer(message.topic) as consumer:
            producer.publish(message)
            producer.flush()
            position_data = producer.get_checkpoint_position_data()

            self._verify_position_data(position_data, upstream_info, message.topic)
            self._verify_topic_kafka_offset(
                position_data,
                message.topic,
                consumer,
                producer,
                create_message
            )
Example #13
0
    def test_ensure_messages_published_on_new_topic(
        self, create_new_schema, producer
    ):
        """When a topic doesn't exist, all of the messages on that topic should
        be published.
        """
        new_schema = create_new_schema(source='ensure_published_source_two')
        message = CreateMessage(new_schema.schema_id, payload=str('1'))
        topic = str(new_schema.topic.name)

        with attach_spy_on_func(producer, 'publish') as func_spy:
            producer.ensure_messages_published([message], {})
            assert func_spy.call_count == 1
        with setup_capture_new_messages_consumer(topic) as consumer:
            kafka_offset = 0
            consumer.seek(kafka_offset, 0)  # kafka_offset from head
            self._assert_all_messages_published(consumer, expected_payloads=[1])
Example #14
0
    def test_ensure_messages_published_on_new_topic(self, create_new_schema,
                                                    producer):
        """When a topic doesn't exist, all of the messages on that topic should
        be published.
        """
        new_schema = create_new_schema(source='ensure_published_source_two')
        message = CreateMessage(new_schema.schema_id, payload=str('1'))
        topic = str(new_schema.topic.name)

        with attach_spy_on_func(producer, 'publish') as func_spy:
            producer.ensure_messages_published([message], {})
            assert func_spy.call_count == 1
        with setup_capture_new_messages_consumer(topic) as consumer:
            kafka_offset = 0
            consumer.seek(kafka_offset, 0)  # kafka_offset from head
            self._assert_all_messages_published(consumer,
                                                expected_payloads=[1])
Example #15
0
    def test_position_data_callback(self, create_message, producer_name,
                                    team_name):
        callback = mock.Mock()
        producer = Producer(
            producer_name=producer_name,
            team_name=team_name,
            expected_frequency_seconds=ExpectedFrequency.constantly,
            position_data_callback=callback)
        upstream_info = {'offset': 'fake'}
        message = create_message(upstream_position_info=upstream_info)
        with setup_capture_new_messages_consumer(message.topic) as consumer:
            producer.publish(message)
            producer.flush()
            (position_data, ), _ = callback.call_args

            self._verify_position_data(position_data, upstream_info,
                                       message.topic)
            self._verify_topic_kafka_offset(position_data, message.topic,
                                            consumer, producer, create_message)
Example #16
0
    def test_forced_recovery_when_overpublished(self, topic, messages,
                                                producer, topic_offsets):
        for message in messages:
            producer.publish(message)
        producer.flush()

        with reconfigure(force_recovery_from_publication_unensurable_error=True
                         ), setup_capture_new_messages_consumer(
                             topic) as consumer, mock.patch.object(
                                 data_pipeline.producer,
                                 'logger') as mock_logger:
            producer.ensure_messages_published(messages[:2], topic_offsets)

            self._assert_logged_info_correct(mock_logger,
                                             len(messages),
                                             topic,
                                             topic_offsets,
                                             message_count=len(messages[:2]))

            assert len(consumer.get_messages(10)) == 2
Example #17
0
    def test_position_data_callback(self, create_message, producer_name, team_name):
        callback = mock.Mock()
        producer = Producer(
            producer_name=producer_name,
            team_name=team_name,
            expected_frequency_seconds=ExpectedFrequency.constantly,
            position_data_callback=callback
        )
        upstream_info = {'offset': 'fake'}
        message = create_message(upstream_position_info=upstream_info)
        with setup_capture_new_messages_consumer(message.topic) as consumer:
            producer.publish(message)
            producer.flush()
            (position_data,), _ = callback.call_args

            self._verify_position_data(position_data, upstream_info, message.topic)
            self._verify_topic_kafka_offset(
                position_data,
                message.topic,
                consumer,
                producer,
                create_message
            )
Example #18
0
    def test_multitopic_offsets(
        self,
        topic,
        messages,
        secondary_topic,
        secondary_messages,
        producer,
        topic_offsets,
        containers
    ):
        """Publishes a single message on the secondary_topic, and all
        messages on the primary topic, simulating the case where publishes for
        one topic fail, while the other succeeds, and the one that succeeds
        comes later in time.  The goal is that the position data still reflects
        the original message ordering, irrespective of failure.
        """
        containers.create_kafka_topic(secondary_topic)
        with setup_capture_new_messages_consumer(
            secondary_topic
        ) as consumer:
            producer.publish(secondary_messages[0])
            for message in messages:
                producer.publish(message)
            producer.flush()

            producer.ensure_messages_published(
                secondary_messages + messages,
                topic_offsets
            )

            self._verify_position_and_highwatermarks(
                topics=[topic, secondary_topic],
                producer=producer,
                message_count=self.number_of_messages
            )

            assert len(consumer.get_messages(10)) == len(secondary_messages)
Example #19
0
    def _test_success_ensure_messages_published(
        self, topic, messages, producer, topic_offsets, unpublished_count
    ):
        messages_to_publish = len(messages) - unpublished_count
        messages_published_first = messages[:messages_to_publish]

        with setup_capture_new_messages_consumer(
            topic
        ) as consumer, mock.patch.object(
            data_pipeline.producer,
            'logger'
        ) as mock_logger:
            for message in messages_published_first:
                producer.publish(message)
            producer.flush()
            producer.position_data_callback = mock.Mock()

            producer.ensure_messages_published(messages, topic_offsets)

            if unpublished_count > 0:
                assert producer.position_data_callback.call_count == 1

            self._assert_all_messages_published(consumer)

            self._verify_position_and_highwatermarks(
                topics=[topic],
                producer=producer,
                message_count=self.number_of_messages
            )

            self._assert_logged_info_correct(
                mock_logger,
                messages_already_published=len(messages_published_first),
                topic=topic,
                topic_offsets=topic_offsets,
                message_count=len(messages)
            )
Example #20
0
 def get_messages_from_start(self, topic_name):
     with setup_capture_new_messages_consumer(topic_name) as consumer:
         consumer.seek(0, 0)  # set to the first message
         return consumer.get_messages()
Example #21
0
 def test_ensure_messages_published_without_message(self, topic, producer,
                                                    topic_offsets):
     with setup_capture_new_messages_consumer(topic) as consumer:
         producer.ensure_messages_published([], topic_offsets)
         self._assert_all_messages_published(consumer, expected_payloads=[])
Example #22
0
 def test_ensure_messages_published_without_message(
     self, topic, producer, topic_offsets
 ):
     with setup_capture_new_messages_consumer(topic) as consumer:
         producer.ensure_messages_published([], topic_offsets)
         self._assert_all_messages_published(consumer, expected_payloads=[])
Example #23
0
 def get_messages_from_start(self, topic_name):
     with setup_capture_new_messages_consumer(topic_name) as consumer:
         consumer.seek(0, 0)  # set to the first message
         return consumer.get_messages()