def test_producer_registration_message_on_exit(self, producer_instance): producer = producer_instance.__enter__() with attach_spy_on_func(producer.registrar, 'stop') as func_spy: producer.publish( CreateMessage(schema_id=1, payload=bytes("Test message"))) producer.__exit__(None, None, None) assert func_spy.call_count == 1
def _publish_and_assert_pii_message(self, message, producer): with capture_new_messages(message.topic) as get_messages: producer.publish(message) producer.flush() offsets_and_messages = get_messages() assert len(offsets_and_messages) == 1 dp_message = create_from_offset_and_message( offsets_and_messages[0] ) assert dp_message.payload == message.payload assert dp_message.payload_data == message.payload_data assert dp_message.schema_id == message.schema_id unpacked_message = Envelope().unpack(offsets_and_messages[0].message.value) unpacked_meta_attr = unpacked_message['meta'][0] encryption_helper = EncryptionHelper( dp_message.encryption_type, MetaAttribute( unpacked_meta_attr['schema_id'], unpacked_meta_attr['payload'] ) ) encrypted_payload = encryption_helper.encrypt_payload(message.payload) assert unpacked_message['payload'] == encrypted_payload
def test_retry_false_failed_publish(self, message, producer): # TODO(DATAPIPE-606|clin) investigate better way than mocking response orig_func = producer._kafka_producer.kafka_client.send_produce_request def run_original_func_but_throw_exception(*args, **kwargs): orig_func(*args, **kwargs) raise RandomException() with mock.patch.object( producer._kafka_producer.kafka_client, 'send_produce_request', side_effect=run_original_func_but_throw_exception ) as mock_send_request, capture_new_messages( message.topic) as get_messages: orig_topic_to_offset_map = self.get_orig_topic_to_offset_map( producer) mock_send_request.reset() producer.publish(message) producer.flush() messages = get_messages() self.assert_equal_msgs(expected_msgs=[message], actual_msgs=messages) assert mock_send_request.call_count == 1 # should be no retry self.assert_new_topic_to_offset_map(producer, message.topic, orig_topic_to_offset_map, published_message_count=1)
def test_retry_failed_publish_without_highwatermark( self, message, producer): # TODO(DATAPIPE-606|clin) investigate better way than mocking response with mock.patch.object( producer._kafka_producer.kafka_client, 'send_produce_request', side_effect=[ FailedPayloadsError ]) as mock_send_request, mock.patch( 'data_pipeline._kafka_util.get_topics_watermarks', side_effect=Exception), capture_new_messages( message.topic) as get_messages, pytest.raises( MaxRetryError) as e: orig_topic_to_offset_map = self.get_orig_topic_to_offset_map( producer) producer.publish(message) producer.flush() assert mock_send_request.call_count == 1 # should be no retry self.assert_last_retry_result(e.value.last_result, message, expected_published_msgs_count=0) messages = get_messages() assert len(messages) == 0 self.assert_new_topic_to_offset_map(producer, message.topic, orig_topic_to_offset_map, published_message_count=0)
def test_child_processes_do_not_survive_an_exception(self, producer_instance, message): with pytest.raises(RandomException), producer_instance as producer: producer.publish(message) producer.flush() producer.publish(message) raise RandomException() assert len(multiprocessing.active_children()) == 0
def test_forced_recovery_when_overpublished( self, topic, messages, producer, topic_offsets ): for message in messages: producer.publish(message) producer.flush() with reconfigure( force_recovery_from_publication_unensurable_error=True ), setup_capture_new_messages_consumer( topic ) as consumer, mock.patch.object( data_pipeline.producer, 'logger' ) as mock_logger: producer.ensure_messages_published(messages[:2], topic_offsets) self._assert_logged_info_correct( mock_logger, len(messages), topic, topic_offsets, message_count=len(messages[:2]) ) self._verify_position_and_highwatermarks( topics=[topic], producer=producer, message_count=len(messages[:2]) ) assert len(consumer.get_messages(10)) == 2
def _test_success_ensure_messages_published(self, topic, messages, producer, topic_offsets, unpublished_count): messages_to_publish = len(messages) - unpublished_count messages_published_first = messages[:messages_to_publish] with setup_capture_new_messages_consumer( topic) as consumer, mock.patch.object(data_pipeline.producer, 'logger') as mock_logger: for message in messages_published_first: producer.publish(message) producer.flush() producer.position_data_callback = mock.Mock() producer.ensure_messages_published(messages, topic_offsets) if unpublished_count > 0: assert producer.position_data_callback.call_count == 1 self._assert_all_messages_published(consumer) self._verify_position_and_highwatermarks( topics=[topic], producer=producer, message_count=self.number_of_messages) self._assert_logged_info_correct( mock_logger, messages_already_published=len(messages_published_first), topic=topic, topic_offsets=topic_offsets, message_count=len(messages))
def test_retry_false_failed_publish(self, message, producer): # TODO(DATAPIPE-606|clin) investigate better way than mocking response orig_func = producer._kafka_producer.kafka_client.send_produce_request def run_original_func_but_throw_exception(*args, **kwargs): orig_func(*args, **kwargs) raise RandomException() with mock.patch.object( producer._kafka_producer.kafka_client, 'send_produce_request', side_effect=run_original_func_but_throw_exception ) as mock_send_request, capture_new_messages( message.topic ) as get_messages: orig_topic_to_offset_map = self.get_orig_topic_to_offset_map(producer) mock_send_request.reset() producer.publish(message) producer.flush() messages = get_messages() self.assert_equal_msgs(expected_msgs=[message], actual_msgs=messages) assert mock_send_request.call_count == 1 # should be no retry self.assert_new_topic_to_offset_map( producer, message.topic, orig_topic_to_offset_map, published_message_count=1 )
def test_retry_failed_publish_without_highwatermark(self, message, producer): # TODO(DATAPIPE-606|clin) investigate better way than mocking response with mock.patch.object( producer._kafka_producer.kafka_client, 'send_produce_request', side_effect=[FailedPayloadsError] ) as mock_send_request, mock.patch( 'data_pipeline._kafka_util.get_topics_watermarks', side_effect=Exception ), capture_new_messages( message.topic ) as get_messages, pytest.raises( MaxRetryError ) as e: orig_topic_to_offset_map = self.get_orig_topic_to_offset_map(producer) producer.publish(message) producer.flush() assert mock_send_request.call_count == 1 # should be no retry self.assert_last_retry_result( e.value.last_result, message, expected_published_msgs_count=0 ) messages = get_messages() assert len(messages) == 0 self.assert_new_topic_to_offset_map( producer, message.topic, orig_topic_to_offset_map, published_message_count=0 )
def test_publish_one_msg_succeeds_one_fails_after_retry( self, message, another_message, topic, producer ): # TODO(DATAPIPE-606|clin) investigate better way than mocking response mock_response = ProduceResponse(topic, partition=0, error=0, offset=1) fail_response = FailedPayloadsError(payload=mock.Mock()) side_effect = ([[mock_response, fail_response]] + [[fail_response]] * self.max_retry_count) with mock.patch.object( producer._kafka_producer.kafka_client, 'send_produce_request', side_effect=side_effect ), pytest.raises( MaxRetryError ) as e: producer.publish(message) producer.publish(another_message) producer.flush() self.assert_last_retry_result( e.value.last_result, another_message, expected_published_msgs_count=1 )
def test_publish_to_new_topic(self, create_new_schema, producer): new_schema = create_new_schema(source='retry_source') message = CreateMessage(new_schema.schema_id, payload=str('1')) with attach_spy_on_func( producer._kafka_producer.kafka_client, 'send_produce_request' ) as send_request_spy: orig_topic_to_offset_map = self.get_orig_topic_to_offset_map(producer) send_request_spy.reset() producer.publish(message) producer.flush() # it should fail at least the 1st time because the topic doesn't # exist. Depending on how fast the topic is created, it could retry # more than 2 times. assert send_request_spy.call_count >= 2 messages = self.get_messages_from_start(message.topic) self.assert_equal_msgs(expected_msgs=[message], actual_msgs=messages) self.assert_new_topic_to_offset_map( producer, message.topic, orig_topic_to_offset_map, published_message_count=1 )
def test_multitopic_offsets(self, topic, messages, secondary_topic, secondary_messages, producer, topic_offsets, containers): """Publishes a single message on the secondary_topic, and all messages on the primary topic, simulating the case where publishes for one topic fail, while the other succeeds, and the one that succeeds comes later in time. The goal is that the position data still reflects the original message ordering, irrespective of failure. """ containers.create_kafka_topic(secondary_topic) with setup_capture_new_messages_consumer(secondary_topic) as consumer: producer.publish(secondary_messages[0]) for message in messages: producer.publish(message) producer.flush() producer.ensure_messages_published(secondary_messages + messages, topic_offsets) self._verify_position_and_highwatermarks( topics=[topic, secondary_topic], producer=producer, message_count=self.number_of_messages) assert len(consumer.get_messages(10)) == len(secondary_messages)
def _test_success_ensure_messages_published(self, topic, messages, producer, topic_offsets, unpublished_count): messages_to_publish = len(messages) - unpublished_count messages_published_first = messages[:messages_to_publish] with setup_capture_new_messages_consumer( topic) as consumer, mock.patch.object(data_pipeline.producer, 'logger') as mock_logger: for message in messages_published_first: producer.publish(message) producer.flush() producer.position_data_callback = mock.Mock() producer.ensure_messages_published(messages, topic_offsets) if unpublished_count > 0: assert producer.position_data_callback.call_count == 1 self._assert_all_messages_published(consumer) position_info = producer.get_checkpoint_position_data() last_position = position_info.last_published_message_position_info assert last_position['position'] == self.number_of_messages self._assert_logged_info_correct( mock_logger, messages_already_published=len(messages_published_first), topic=topic, topic_offsets=topic_offsets, message_count=len(messages))
def test_messages_published_without_flush(self, message, producer_instance): with capture_new_messages( message.topic ) as get_messages, producer_instance as producer: producer.publish(message) assert len(multiprocessing.active_children()) == 0 assert len(get_messages()) == 1
def test_messages_published_without_flush(self, message, producer_instance): with capture_new_messages( message.topic) as get_messages, producer_instance as producer: producer.publish(message) assert len(multiprocessing.active_children()) == 0 assert len(get_messages()) == 1
def test_messages_not_duplicated(self, message, producer_instance): with capture_new_messages( message.topic) as get_messages, producer_instance as producer: producer.publish(message) producer.flush() assert len(multiprocessing.active_children()) == 0 assert len(get_messages()) == 1
def test_child_processes_do_not_survive_an_exception( self, producer_instance, message): with pytest.raises(RandomException), producer_instance as producer: producer.publish(message) producer.flush() producer.publish(message) raise RandomException() assert len(multiprocessing.active_children()) == 0
def test_producer_registration_message_on_exit(self, producer_instance): producer = producer_instance.__enter__() with attach_spy_on_func( producer.registrar, 'stop' ) as func_spy: producer.publish(CreateMessage(schema_id=1, payload=bytes("Test message"))) producer.__exit__(None, None, None) assert func_spy.call_count == 1
def test_messages_not_duplicated(self, message, producer_instance): with capture_new_messages( message.topic ) as get_messages, producer_instance as producer: producer.publish(message) producer.flush() assert len(multiprocessing.active_children()) == 0 assert len(get_messages()) == 1
def test_publish_message_with_keys(self, message_with_pkeys, producer): expected_keys_avro_json = { "type": "record", "namespace": "yelp.data_pipeline", "name": "primary_keys", "doc": "Represents primary keys present in Message payload.", "fields": [ { "type": "string", "name": "field2", "doc": "test", "pkey": 1 }, { "type": "int", "name": "field1", "doc": "test", "pkey": 2 }, { "type": "int", "name": "field3", "doc": "test", "pkey": 3 }, ] } expected_keys = { "field2": message_with_pkeys.payload_data["field2"], "field1": message_with_pkeys.payload_data["field1"], "field3": message_with_pkeys.payload_data["field3"] } with capture_new_messages(message_with_pkeys.topic) as get_messages: producer.publish(message_with_pkeys) producer.flush() offsets_and_messages = get_messages() assert len(offsets_and_messages) == 1 dp_message = create_from_offset_and_message(offsets_and_messages[0]) assert dp_message.keys == expected_keys avro_string_writer = AvroStringWriter(schema=expected_keys_avro_json) expected_encoded_keys = avro_string_writer.encode( message_avro_representation=expected_keys) assert offsets_and_messages[0].message.key == expected_encoded_keys avro_string_reader = AvroStringReader( reader_schema=expected_keys_avro_json, writer_schema=expected_keys_avro_json) decoded_keys = avro_string_reader.decode( encoded_message=offsets_and_messages[0].message.key) assert decoded_keys == expected_keys
def test_publish_message_with_no_keys(self, message, producer): with capture_new_messages(message.topic) as get_messages: producer.publish(message) producer.flush() offsets_and_messages = get_messages() assert len(offsets_and_messages) == 1 dp_message = create_from_offset_and_message(offsets_and_messages[0]) assert dp_message.keys == {}
def topic_offsets(self, request, producer, random_schema, containers): is_fresh_topic = request.param if is_fresh_topic: containers.create_kafka_topic(str(random_schema.topic.name)) return {} message = CreateMessage(random_schema.schema_id, payload=str('-1')) producer.publish(message) producer.flush() return producer.get_checkpoint_position_data().topic_to_kafka_offset_map
def test_meteorite_on_off(self, create_message, registered_schema, producer, enable_meteorite, expected_call_count): with mock.patch.object( data_pipeline.tools.meteorite_wrappers.StatsCounter, 'process', autospec=True) as mock_stats_counter: producer.enable_meteorite = enable_meteorite m = create_message(registered_schema, timeslot=1.0) producer.publish(m) assert mock_stats_counter.call_count == expected_call_count
def topic_offsets(self, request, producer, random_schema, containers): is_fresh_topic = request.param if is_fresh_topic: containers.create_kafka_topic(str(random_schema.topic.name)) return {} message = CreateMessage(random_schema.schema_id, payload=str('-1')) producer.publish(message) producer.flush() return producer.get_checkpoint_position_data( ).topic_to_kafka_offset_map
def test_sensu_on_off(self, create_message, registered_schema, producer, enable_sensu, expected_call_count): with mock.patch.object( data_pipeline.tools.sensu_ttl_alerter.SensuTTLAlerter, 'process', autospec=True, return_value=None) as mock_sensu_ttl_process: producer.enable_sensu = enable_sensu m = create_message(registered_schema, timeslot=1.0) producer.publish(m) assert mock_sensu_ttl_process.call_count == expected_call_count
def test_get_position_data(self, create_message, producer): upstream_info = {'offset': 'fake'} message = create_message(upstream_position_info=upstream_info) with setup_capture_new_messages_consumer(message.topic) as consumer: producer.publish(message) producer.flush() position_data = producer.get_checkpoint_position_data() self._verify_position_data(position_data, upstream_info, message.topic) self._verify_topic_kafka_offset(position_data, message.topic, consumer, producer, create_message)
def test_sensu_process_called_once_inside_window(self, create_message, registered_schema, producer, message_count): with mock.patch.object( data_pipeline.tools.sensu_ttl_alerter.SensuTTLAlerter, 'process', autospec=True, return_value=None) as mock_sensu_ttl_process: producer.enable_sensu = True m1 = create_message(registered_schema, timeslot=1.0) for i in range(message_count): producer.publish(m1) assert mock_sensu_ttl_process.call_count == 1
def test_publish_message_with_keys( self, message_with_pkeys, producer ): expected_keys_avro_json = { "type": "record", "namespace": "yelp.data_pipeline", "name": "primary_keys", "doc": "Represents primary keys present in Message payload.", "fields": [ {"type": "string", "name": "field2", "doc": "test", "pkey": 1}, {"type": "int", "name": "field1", "doc": "test", "pkey": 2}, {"type": "int", "name": "field3", "doc": "test", "pkey": 3}, ] } expected_keys = { "field2": message_with_pkeys.payload_data["field2"], "field1": message_with_pkeys.payload_data["field1"], "field3": message_with_pkeys.payload_data["field3"] } with capture_new_messages(message_with_pkeys.topic) as get_messages: producer.publish(message_with_pkeys) producer.flush() offsets_and_messages = get_messages() assert len(offsets_and_messages) == 1 dp_message = create_from_offset_and_message( offsets_and_messages[0] ) assert dp_message.keys == expected_keys avro_string_writer = AvroStringWriter( schema=expected_keys_avro_json ) expected_encoded_keys = avro_string_writer.encode( message_avro_representation=expected_keys ) assert offsets_and_messages[0].message.key == expected_encoded_keys avro_string_reader = AvroStringReader( reader_schema=expected_keys_avro_json, writer_schema=expected_keys_avro_json ) decoded_keys = avro_string_reader.decode( encoded_message=offsets_and_messages[0].message.key ) assert decoded_keys == expected_keys
def test_ensure_messages_published_fails_when_overpublished( self, topic, messages, producer, topic_offsets): for message in messages: producer.publish(message) producer.flush() with pytest.raises(PublicationUnensurableError), mock.patch.object( data_pipeline.producer, 'logger') as mock_logger: producer.ensure_messages_published(messages[:2], topic_offsets) self._assert_logged_info_correct(mock_logger, len(messages), topic, topic_offsets, message_count=len(messages[:2]))
def test_publish_message_with_no_keys( self, message, producer ): with capture_new_messages(message.topic) as get_messages: producer.publish(message) producer.flush() offsets_and_messages = get_messages() assert len(offsets_and_messages) == 1 dp_message = create_from_offset_and_message( offsets_and_messages[0] ) assert dp_message.keys == {}
def _verify_topic_kafka_offset(self, position_data, topic, consumer, producer, create_message): # The pointer is to the next offset where messages will be published. # There shouldn't be any messages there yet. kafka_offset = position_data.topic_to_kafka_offset_map[topic] consumer.seek(kafka_offset, 0) # kafka_offset from head assert len(consumer.get_messages(count=10)) == 0 # publish another message, so we can seek to it message = create_message(upstream_position_info={'offset': 'fake2'}) producer.publish(message) producer.flush() # There should be a message now that we've published one consumer.seek(kafka_offset, 0) # kafka_offset from head assert len(consumer.get_messages(count=10)) == 1
def test_get_position_data(self, create_message, producer): upstream_info = {'offset': 'fake'} message = create_message(upstream_position_info=upstream_info) with setup_capture_new_messages_consumer(message.topic) as consumer: producer.publish(message) producer.flush() position_data = producer.get_checkpoint_position_data() self._verify_position_data(position_data, upstream_info, message.topic) self._verify_topic_kafka_offset( position_data, message.topic, consumer, producer, create_message )
def test_meteorite_on_off( self, create_message, registered_schema, producer, enable_meteorite, expected_call_count ): with mock.patch.object( data_pipeline.tools.meteorite_wrappers.StatsCounter, 'process', autospec=True ) as mock_stats_counter: producer.enable_meteorite = enable_meteorite m = create_message(registered_schema, timeslot=1.0) producer.publish(m) assert mock_stats_counter.call_count == expected_call_count
def test_producer_periodic_registration_messages(self, producer_instance): """ Note: Tests fails when threshold is set significanly below 1 second, presumably because of the nature of threading. Should be irrelevant if the threshold in registrar is set significantly higher. """ producer_instance.registrar.threshold = 1 with producer_instance as producer: with attach_spy_on_func(producer.registrar.clog_writer, 'publish') as func_spy: producer.publish( CreateMessage(schema_id=1, payload=bytes("FAKE MESSAGE"))) producer.publish( CreateMessage(schema_id=2, payload=bytes("DIFFERENT FAKE MESSAGE"))) time.sleep(2.5) assert func_spy.call_count == 4
def _verify_topic_kafka_offset( self, position_data, topic, consumer, producer, create_message ): # The pointer is to the next offset where messages will be published. # There shouldn't be any messages there yet. kafka_offset = position_data.topic_to_kafka_offset_map[topic] consumer.seek(kafka_offset, 0) # kafka_offset from head assert len(consumer.get_messages(count=10)) == 0 # publish another message, so we can seek to it message = create_message(upstream_position_info={'offset': 'fake2'}) producer.publish(message) producer.flush() # There should be a message now that we've published one consumer.seek(kafka_offset, 0) # kafka_offset from head assert len(consumer.get_messages(count=10)) == 1
def test_sensu_on_off( self, create_message, registered_schema, producer, enable_sensu, expected_call_count ): with mock.patch.object( data_pipeline.tools.sensu_ttl_alerter.SensuTTLAlerter, 'process', autospec=True, return_value=None ) as mock_sensu_ttl_process: producer.enable_sensu = enable_sensu m = create_message(registered_schema, timeslot=1.0) producer.publish(m) assert mock_sensu_ttl_process.call_count == expected_call_count
def test_sensu_process_called_once_inside_window( self, create_message, registered_schema, producer, message_count ): with mock.patch.object( data_pipeline.tools.sensu_ttl_alerter.SensuTTLAlerter, 'process', autospec=True, return_value=None ) as mock_sensu_ttl_process: producer.enable_sensu = True m1 = create_message(registered_schema, timeslot=1.0) for i in range(message_count): producer.publish(m1) assert mock_sensu_ttl_process.call_count == 1
def test_publish_succeeds_without_retry(self, topic, message, producer): with attach_spy_on_func( producer._kafka_producer.kafka_client, 'send_produce_request' ) as send_request_spy, capture_new_messages(topic) as get_messages: orig_topic_to_offset_map = self.get_orig_topic_to_offset_map( producer) producer.publish(message) producer.flush() messages = get_messages() self.assert_equal_msgs(expected_msgs=[message], actual_msgs=messages) assert send_request_spy.call_count == 1 self.assert_new_topic_to_offset_map(producer, message.topic, orig_topic_to_offset_map, published_message_count=1)
def test_publish_one_msg_succeeds_one_fails_after_retry( self, message, another_message, topic, producer): # TODO(DATAPIPE-606|clin) investigate better way than mocking response mock_response = ProduceResponse(topic, partition=0, error=0, offset=1) fail_response = FailedPayloadsError(payload=mock.Mock()) side_effect = ([[mock_response, fail_response]] + [[fail_response]] * self.max_retry_count) with mock.patch.object( producer._kafka_producer.kafka_client, 'send_produce_request', side_effect=side_effect), pytest.raises(MaxRetryError) as e: producer.publish(message) producer.publish(another_message) producer.flush() self.assert_last_retry_result(e.value.last_result, another_message, expected_published_msgs_count=1)
def test_position_data_callback(self, create_message, producer_name, team_name): callback = mock.Mock() producer = Producer( producer_name=producer_name, team_name=team_name, expected_frequency_seconds=ExpectedFrequency.constantly, position_data_callback=callback) upstream_info = {'offset': 'fake'} message = create_message(upstream_position_info=upstream_info) with setup_capture_new_messages_consumer(message.topic) as consumer: producer.publish(message) producer.flush() (position_data, ), _ = callback.call_args self._verify_position_data(position_data, upstream_info, message.topic) self._verify_topic_kafka_offset(position_data, message.topic, consumer, producer, create_message)
def test_producer_periodic_registration_messages(self, producer_instance): """ Note: Tests fails when threshold is set significanly below 1 second, presumably because of the nature of threading. Should be irrelevant if the threshold in registrar is set significantly higher. """ producer_instance.registrar.threshold = 1 with producer_instance as producer: with attach_spy_on_func( producer.registrar.clog_writer, 'publish' ) as func_spy: producer.publish(CreateMessage(schema_id=1, payload=bytes("FAKE MESSAGE"))) producer.publish(CreateMessage( schema_id=2, payload=bytes("DIFFERENT FAKE MESSAGE") )) time.sleep(2.5) assert func_spy.call_count == 4
def test_forced_recovery_when_overpublished(self, topic, messages, producer, topic_offsets): for message in messages: producer.publish(message) producer.flush() with reconfigure(force_recovery_from_publication_unensurable_error=True ), setup_capture_new_messages_consumer( topic) as consumer, mock.patch.object( data_pipeline.producer, 'logger') as mock_logger: producer.ensure_messages_published(messages[:2], topic_offsets) self._assert_logged_info_correct(mock_logger, len(messages), topic, topic_offsets, message_count=len(messages[:2])) assert len(consumer.get_messages(10)) == 2
def test_publish_fails_after_retry(self, message, producer): # TODO(DATAPIPE-606|clin) investigate better way than mocking response with mock.patch.object( producer._kafka_producer.kafka_client, 'send_produce_request', side_effect=[FailedPayloadsError ]) as mock_send_request, capture_new_messages( message.topic) as get_messages, pytest.raises( MaxRetryError): orig_topic_to_offset_map = self.get_orig_topic_to_offset_map( producer) producer.publish(message) producer.flush() messages = get_messages() assert len(messages) == 0 assert mock_send_request.call_count == self.max_retry_count self.assert_new_topic_to_offset_map(producer, message.topic, orig_topic_to_offset_map, published_message_count=0)
def test_publish_succeeds_without_retry(self, topic, message, producer): with attach_spy_on_func( producer._kafka_producer.kafka_client, 'send_produce_request' ) as send_request_spy, capture_new_messages( topic ) as get_messages: orig_topic_to_offset_map = self.get_orig_topic_to_offset_map(producer) producer.publish(message) producer.flush() messages = get_messages() self.assert_equal_msgs(expected_msgs=[message], actual_msgs=messages) assert send_request_spy.call_count == 1 self.assert_new_topic_to_offset_map( producer, message.topic, orig_topic_to_offset_map, published_message_count=1 )
def _publish_and_assert_pii_message(self, message, producer): with capture_new_messages(message.topic) as get_messages: producer.publish(message) producer.flush() offsets_and_messages = get_messages() assert len(offsets_and_messages) == 1 dp_message = create_from_offset_and_message(offsets_and_messages[0]) assert dp_message.payload == message.payload assert dp_message.payload_data == message.payload_data assert dp_message.schema_id == message.schema_id unpacked_message = Envelope().unpack( offsets_and_messages[0].message.value) unpacked_meta_attr = unpacked_message['meta'][0] encryption_helper = EncryptionHelper( dp_message.encryption_type, MetaAttribute(unpacked_meta_attr['schema_id'], unpacked_meta_attr['payload'])) encrypted_payload = encryption_helper.encrypt_payload(message.payload) assert unpacked_message['payload'] == encrypted_payload
def test_ensure_messages_published_fails_when_overpublished( self, topic, messages, producer, topic_offsets ): for message in messages: producer.publish(message) producer.flush() with pytest.raises( PublicationUnensurableError ), mock.patch.object( data_pipeline.producer, 'logger' ) as mock_logger: producer.ensure_messages_published(messages[:2], topic_offsets) self._assert_logged_info_correct( mock_logger, len(messages), topic, topic_offsets, message_count=len(messages[:2]) )
def test_position_data_callback(self, create_message, producer_name, team_name): callback = mock.Mock() producer = Producer( producer_name=producer_name, team_name=team_name, expected_frequency_seconds=ExpectedFrequency.constantly, position_data_callback=callback ) upstream_info = {'offset': 'fake'} message = create_message(upstream_position_info=upstream_info) with setup_capture_new_messages_consumer(message.topic) as consumer: producer.publish(message) producer.flush() (position_data,), _ = callback.call_args self._verify_position_data(position_data, upstream_info, message.topic) self._verify_topic_kafka_offset( position_data, message.topic, consumer, producer, create_message )
def _test_success_ensure_messages_published( self, topic, messages, producer, topic_offsets, unpublished_count ): messages_to_publish = len(messages) - unpublished_count messages_published_first = messages[:messages_to_publish] with setup_capture_new_messages_consumer( topic ) as consumer, mock.patch.object( data_pipeline.producer, 'logger' ) as mock_logger: for message in messages_published_first: producer.publish(message) producer.flush() producer.position_data_callback = mock.Mock() producer.ensure_messages_published(messages, topic_offsets) if unpublished_count > 0: assert producer.position_data_callback.call_count == 1 self._assert_all_messages_published(consumer) self._verify_position_and_highwatermarks( topics=[topic], producer=producer, message_count=self.number_of_messages ) self._assert_logged_info_correct( mock_logger, messages_already_published=len(messages_published_first), topic=topic, topic_offsets=topic_offsets, message_count=len(messages) )
def test_multitopic_offsets( self, topic, messages, secondary_topic, secondary_messages, producer, topic_offsets, containers ): """Publishes a single message on the secondary_topic, and all messages on the primary topic, simulating the case where publishes for one topic fail, while the other succeeds, and the one that succeeds comes later in time. The goal is that the position data still reflects the original message ordering, irrespective of failure. """ containers.create_kafka_topic(secondary_topic) with setup_capture_new_messages_consumer( secondary_topic ) as consumer: producer.publish(secondary_messages[0]) for message in messages: producer.publish(message) producer.flush() producer.ensure_messages_published( secondary_messages + messages, topic_offsets ) self._verify_position_and_highwatermarks( topics=[topic, secondary_topic], producer=producer, message_count=self.number_of_messages ) assert len(consumer.get_messages(10)) == len(secondary_messages)
def test_publish_to_new_topic(self, create_new_schema, producer): new_schema = create_new_schema(source='retry_source') message = CreateMessage(new_schema.schema_id, payload=str('1')) with attach_spy_on_func(producer._kafka_producer.kafka_client, 'send_produce_request') as send_request_spy: orig_topic_to_offset_map = self.get_orig_topic_to_offset_map( producer) send_request_spy.reset() producer.publish(message) producer.flush() # it should fail at least the 1st time because the topic doesn't # exist. Depending on how fast the topic is created, it could retry # more than 2 times. assert send_request_spy.call_count >= 2 messages = self.get_messages_from_start(message.topic) self.assert_equal_msgs(expected_msgs=[message], actual_msgs=messages) self.assert_new_topic_to_offset_map(producer, message.topic, orig_topic_to_offset_map, published_message_count=1)
def test_publish_fails_after_retry(self, message, producer): # TODO(DATAPIPE-606|clin) investigate better way than mocking response with mock.patch.object( producer._kafka_producer.kafka_client, 'send_produce_request', side_effect=[FailedPayloadsError] ) as mock_send_request, capture_new_messages( message.topic ) as get_messages, pytest.raises( MaxRetryError ): orig_topic_to_offset_map = self.get_orig_topic_to_offset_map(producer) producer.publish(message) producer.flush() messages = get_messages() assert len(messages) == 0 assert mock_send_request.call_count == self.max_retry_count self.assert_new_topic_to_offset_map( producer, message.topic, orig_topic_to_offset_map, published_message_count=0 )
def set_topic_offsets_to_latest(self, producer, message, another_message): producer.publish(message) producer.publish(another_message) producer.flush()
def _publish_message(self, message, producer): with capture_new_data_pipeline_messages(message.topic) as get_messages: producer.publish(message) producer.flush() return get_messages()
def publish_messages(self, messages, producer): for message in messages: producer.publish(message) producer.flush() producer.monitor.flush_buffered_info()