def _test_success_ensure_messages_published(self, topic, messages, producer, topic_offsets, unpublished_count): messages_to_publish = len(messages) - unpublished_count messages_published_first = messages[:messages_to_publish] with setup_capture_new_messages_consumer( topic) as consumer, mock.patch.object(data_pipeline.producer, 'logger') as mock_logger: for message in messages_published_first: producer.publish(message) producer.flush() producer.position_data_callback = mock.Mock() producer.ensure_messages_published(messages, topic_offsets) if unpublished_count > 0: assert producer.position_data_callback.call_count == 1 self._assert_all_messages_published(consumer) self._verify_position_and_highwatermarks( topics=[topic], producer=producer, message_count=self.number_of_messages) self._assert_logged_info_correct( mock_logger, messages_already_published=len(messages_published_first), topic=topic, topic_offsets=topic_offsets, message_count=len(messages))
def test_multitopic_offsets(self, topic, messages, secondary_topic, secondary_messages, producer, topic_offsets, containers): """Publishes a single message on the secondary_topic, and all messages on the primary topic, simulating the case where publishes for one topic fail, while the other succeeds, and the one that succeeds comes later in time. The goal is that the position data still reflects the original message ordering, irrespective of failure. """ containers.create_kafka_topic(secondary_topic) with setup_capture_new_messages_consumer(secondary_topic) as consumer: producer.publish(secondary_messages[0]) for message in messages: producer.publish(message) producer.flush() producer.ensure_messages_published(secondary_messages + messages, topic_offsets) self._verify_position_and_highwatermarks( topics=[topic, secondary_topic], producer=producer, message_count=self.number_of_messages) assert len(consumer.get_messages(10)) == len(secondary_messages)
def test_forced_recovery_when_overpublished( self, topic, messages, producer, topic_offsets ): for message in messages: producer.publish(message) producer.flush() with reconfigure( force_recovery_from_publication_unensurable_error=True ), setup_capture_new_messages_consumer( topic ) as consumer, mock.patch.object( data_pipeline.producer, 'logger' ) as mock_logger: producer.ensure_messages_published(messages[:2], topic_offsets) self._assert_logged_info_correct( mock_logger, len(messages), topic, topic_offsets, message_count=len(messages[:2]) ) self._verify_position_and_highwatermarks( topics=[topic], producer=producer, message_count=len(messages[:2]) ) assert len(consumer.get_messages(10)) == 2
def _test_success_ensure_messages_published(self, topic, messages, producer, topic_offsets, unpublished_count): messages_to_publish = len(messages) - unpublished_count messages_published_first = messages[:messages_to_publish] with setup_capture_new_messages_consumer( topic) as consumer, mock.patch.object(data_pipeline.producer, 'logger') as mock_logger: for message in messages_published_first: producer.publish(message) producer.flush() producer.position_data_callback = mock.Mock() producer.ensure_messages_published(messages, topic_offsets) if unpublished_count > 0: assert producer.position_data_callback.call_count == 1 self._assert_all_messages_published(consumer) position_info = producer.get_checkpoint_position_data() last_position = position_info.last_published_message_position_info assert last_position['position'] == self.number_of_messages self._assert_logged_info_correct( mock_logger, messages_already_published=len(messages_published_first), topic=topic, topic_offsets=topic_offsets, message_count=len(messages))
def test_ensure_messages_published_fails_when_overpublished( self, topic, messages, producer, topic_offsets): for message in messages: producer.publish(message) producer.flush() with pytest.raises(PublicationUnensurableError), mock.patch.object( data_pipeline.producer, 'logger') as mock_logger: producer.ensure_messages_published(messages[:2], topic_offsets) self._assert_logged_info_correct(mock_logger, len(messages), topic, topic_offsets, message_count=len(messages[:2]))
def test_ensure_messages_published_on_new_topic(self, create_new_schema, producer): """When a topic doesn't exist, all of the messages on that topic should be published. """ new_schema = create_new_schema(source='ensure_published_source_two') message = CreateMessage(new_schema.schema_id, payload=str('1')) topic = str(new_schema.topic.name) with attach_spy_on_func(producer, 'publish') as func_spy: producer.ensure_messages_published([message], {}) assert func_spy.call_count == 1 with setup_capture_new_messages_consumer(topic) as consumer: kafka_offset = 0 consumer.seek(kafka_offset, 0) # kafka_offset from head self._assert_all_messages_published(consumer, expected_payloads=[1])
def test_ensure_messages_published_on_new_topic( self, create_new_schema, producer ): """When a topic doesn't exist, all of the messages on that topic should be published. """ new_schema = create_new_schema(source='ensure_published_source_two') message = CreateMessage(new_schema.schema_id, payload=str('1')) topic = str(new_schema.topic.name) with attach_spy_on_func(producer, 'publish') as func_spy: producer.ensure_messages_published([message], {}) assert func_spy.call_count == 1 with setup_capture_new_messages_consumer(topic) as consumer: kafka_offset = 0 consumer.seek(kafka_offset, 0) # kafka_offset from head self._assert_all_messages_published(consumer, expected_payloads=[1])
def test_forced_recovery_when_overpublished(self, topic, messages, producer, topic_offsets): for message in messages: producer.publish(message) producer.flush() with reconfigure(force_recovery_from_publication_unensurable_error=True ), setup_capture_new_messages_consumer( topic) as consumer, mock.patch.object( data_pipeline.producer, 'logger') as mock_logger: producer.ensure_messages_published(messages[:2], topic_offsets) self._assert_logged_info_correct(mock_logger, len(messages), topic, topic_offsets, message_count=len(messages[:2])) assert len(consumer.get_messages(10)) == 2
def test_ensure_messages_published_fails_when_overpublished( self, topic, messages, producer, topic_offsets ): for message in messages: producer.publish(message) producer.flush() with pytest.raises( PublicationUnensurableError ), mock.patch.object( data_pipeline.producer, 'logger' ) as mock_logger: producer.ensure_messages_published(messages[:2], topic_offsets) self._assert_logged_info_correct( mock_logger, len(messages), topic, topic_offsets, message_count=len(messages[:2]) )
def test_multitopic_offsets( self, topic, messages, secondary_topic, secondary_messages, producer, topic_offsets, containers ): """Publishes a single message on the secondary_topic, and all messages on the primary topic, simulating the case where publishes for one topic fail, while the other succeeds, and the one that succeeds comes later in time. The goal is that the position data still reflects the original message ordering, irrespective of failure. """ containers.create_kafka_topic(secondary_topic) with setup_capture_new_messages_consumer( secondary_topic ) as consumer: producer.publish(secondary_messages[0]) for message in messages: producer.publish(message) producer.flush() producer.ensure_messages_published( secondary_messages + messages, topic_offsets ) self._verify_position_and_highwatermarks( topics=[topic, secondary_topic], producer=producer, message_count=self.number_of_messages ) assert len(consumer.get_messages(10)) == len(secondary_messages)
def _test_success_ensure_messages_published( self, topic, messages, producer, topic_offsets, unpublished_count ): messages_to_publish = len(messages) - unpublished_count messages_published_first = messages[:messages_to_publish] with setup_capture_new_messages_consumer( topic ) as consumer, mock.patch.object( data_pipeline.producer, 'logger' ) as mock_logger: for message in messages_published_first: producer.publish(message) producer.flush() producer.position_data_callback = mock.Mock() producer.ensure_messages_published(messages, topic_offsets) if unpublished_count > 0: assert producer.position_data_callback.call_count == 1 self._assert_all_messages_published(consumer) self._verify_position_and_highwatermarks( topics=[topic], producer=producer, message_count=self.number_of_messages ) self._assert_logged_info_correct( mock_logger, messages_already_published=len(messages_published_first), topic=topic, topic_offsets=topic_offsets, message_count=len(messages) )
def test_ensure_messages_published_without_message(self, topic, producer, topic_offsets): with setup_capture_new_messages_consumer(topic) as consumer: producer.ensure_messages_published([], topic_offsets) self._assert_all_messages_published(consumer, expected_payloads=[])
def test_ensure_messages_published_without_message( self, topic, producer, topic_offsets ): with setup_capture_new_messages_consumer(topic) as consumer: producer.ensure_messages_published([], topic_offsets) self._assert_all_messages_published(consumer, expected_payloads=[])