def test_retry_failed_publish_without_highwatermark(self, message, producer): # TODO(DATAPIPE-606|clin) investigate better way than mocking response with mock.patch.object( producer._kafka_producer.kafka_client, 'send_produce_request', side_effect=[FailedPayloadsError] ) as mock_send_request, mock.patch( 'data_pipeline._kafka_util.get_topics_watermarks', side_effect=Exception ), capture_new_messages( message.topic ) as get_messages, pytest.raises( MaxRetryError ) as e: orig_topic_to_offset_map = self.get_orig_topic_to_offset_map(producer) producer.publish(message) producer.flush() assert mock_send_request.call_count == 1 # should be no retry self.assert_last_retry_result( e.value.last_result, message, expected_published_msgs_count=0 ) messages = get_messages() assert len(messages) == 0 self.assert_new_topic_to_offset_map( producer, message.topic, orig_topic_to_offset_map, published_message_count=0 )
def test_retry_failed_publish_without_highwatermark( self, message, producer): # TODO(DATAPIPE-606|clin) investigate better way than mocking response with mock.patch.object( producer._kafka_producer.kafka_client, 'send_produce_request', side_effect=[ FailedPayloadsError ]) as mock_send_request, mock.patch( 'data_pipeline._kafka_util.get_topics_watermarks', side_effect=Exception), capture_new_messages( message.topic) as get_messages, pytest.raises( MaxRetryError) as e: orig_topic_to_offset_map = self.get_orig_topic_to_offset_map( producer) producer.publish(message) producer.flush() assert mock_send_request.call_count == 1 # should be no retry self.assert_last_retry_result(e.value.last_result, message, expected_published_msgs_count=0) messages = get_messages() assert len(messages) == 0 self.assert_new_topic_to_offset_map(producer, message.topic, orig_topic_to_offset_map, published_message_count=0)
def _publish_and_assert_pii_message(self, message, producer): with capture_new_messages(message.topic) as get_messages: producer.publish(message) producer.flush() offsets_and_messages = get_messages() assert len(offsets_and_messages) == 1 dp_message = create_from_offset_and_message( offsets_and_messages[0] ) assert dp_message.payload == message.payload assert dp_message.payload_data == message.payload_data assert dp_message.schema_id == message.schema_id unpacked_message = Envelope().unpack(offsets_and_messages[0].message.value) unpacked_meta_attr = unpacked_message['meta'][0] encryption_helper = EncryptionHelper( dp_message.encryption_type, MetaAttribute( unpacked_meta_attr['schema_id'], unpacked_meta_attr['payload'] ) ) encrypted_payload = encryption_helper.encrypt_payload(message.payload) assert unpacked_message['payload'] == encrypted_payload
def test_retry_false_failed_publish(self, message, producer): # TODO(DATAPIPE-606|clin) investigate better way than mocking response orig_func = producer._kafka_producer.kafka_client.send_produce_request def run_original_func_but_throw_exception(*args, **kwargs): orig_func(*args, **kwargs) raise RandomException() with mock.patch.object( producer._kafka_producer.kafka_client, 'send_produce_request', side_effect=run_original_func_but_throw_exception ) as mock_send_request, capture_new_messages( message.topic ) as get_messages: orig_topic_to_offset_map = self.get_orig_topic_to_offset_map(producer) mock_send_request.reset() producer.publish(message) producer.flush() messages = get_messages() self.assert_equal_msgs(expected_msgs=[message], actual_msgs=messages) assert mock_send_request.call_count == 1 # should be no retry self.assert_new_topic_to_offset_map( producer, message.topic, orig_topic_to_offset_map, published_message_count=1 )
def test_retry_false_failed_publish(self, message, producer): # TODO(DATAPIPE-606|clin) investigate better way than mocking response orig_func = producer._kafka_producer.kafka_client.send_produce_request def run_original_func_but_throw_exception(*args, **kwargs): orig_func(*args, **kwargs) raise RandomException() with mock.patch.object( producer._kafka_producer.kafka_client, 'send_produce_request', side_effect=run_original_func_but_throw_exception ) as mock_send_request, capture_new_messages( message.topic) as get_messages: orig_topic_to_offset_map = self.get_orig_topic_to_offset_map( producer) mock_send_request.reset() producer.publish(message) producer.flush() messages = get_messages() self.assert_equal_msgs(expected_msgs=[message], actual_msgs=messages) assert mock_send_request.call_count == 1 # should be no retry self.assert_new_topic_to_offset_map(producer, message.topic, orig_topic_to_offset_map, published_message_count=1)
def test_messages_published_without_flush(self, message, producer_instance): with capture_new_messages( message.topic) as get_messages, producer_instance as producer: producer.publish(message) assert len(multiprocessing.active_children()) == 0 assert len(get_messages()) == 1
def test_messages_not_duplicated(self, message, producer_instance): with capture_new_messages( message.topic) as get_messages, producer_instance as producer: producer.publish(message) producer.flush() assert len(multiprocessing.active_children()) == 0 assert len(get_messages()) == 1
def test_messages_published_without_flush(self, message, producer_instance): with capture_new_messages( message.topic ) as get_messages, producer_instance as producer: producer.publish(message) assert len(multiprocessing.active_children()) == 0 assert len(get_messages()) == 1
def test_publish_message_with_keys(self, message_with_pkeys, producer): expected_keys_avro_json = { "type": "record", "namespace": "yelp.data_pipeline", "name": "primary_keys", "doc": "Represents primary keys present in Message payload.", "fields": [ { "type": "string", "name": "field2", "doc": "test", "pkey": 1 }, { "type": "int", "name": "field1", "doc": "test", "pkey": 2 }, { "type": "int", "name": "field3", "doc": "test", "pkey": 3 }, ] } expected_keys = { "field2": message_with_pkeys.payload_data["field2"], "field1": message_with_pkeys.payload_data["field1"], "field3": message_with_pkeys.payload_data["field3"] } with capture_new_messages(message_with_pkeys.topic) as get_messages: producer.publish(message_with_pkeys) producer.flush() offsets_and_messages = get_messages() assert len(offsets_and_messages) == 1 dp_message = create_from_offset_and_message(offsets_and_messages[0]) assert dp_message.keys == expected_keys avro_string_writer = AvroStringWriter(schema=expected_keys_avro_json) expected_encoded_keys = avro_string_writer.encode( message_avro_representation=expected_keys) assert offsets_and_messages[0].message.key == expected_encoded_keys avro_string_reader = AvroStringReader( reader_schema=expected_keys_avro_json, writer_schema=expected_keys_avro_json) decoded_keys = avro_string_reader.decode( encoded_message=offsets_and_messages[0].message.key) assert decoded_keys == expected_keys
def test_messages_not_duplicated(self, message, producer_instance): with capture_new_messages( message.topic ) as get_messages, producer_instance as producer: producer.publish(message) producer.flush() assert len(multiprocessing.active_children()) == 0 assert len(get_messages()) == 1
def test_publish_message_with_no_keys(self, message, producer): with capture_new_messages(message.topic) as get_messages: producer.publish(message) producer.flush() offsets_and_messages = get_messages() assert len(offsets_and_messages) == 1 dp_message = create_from_offset_and_message(offsets_and_messages[0]) assert dp_message.keys == {}
def test_publish_message_with_keys( self, message_with_pkeys, producer ): expected_keys_avro_json = { "type": "record", "namespace": "yelp.data_pipeline", "name": "primary_keys", "doc": "Represents primary keys present in Message payload.", "fields": [ {"type": "string", "name": "field2", "doc": "test", "pkey": 1}, {"type": "int", "name": "field1", "doc": "test", "pkey": 2}, {"type": "int", "name": "field3", "doc": "test", "pkey": 3}, ] } expected_keys = { "field2": message_with_pkeys.payload_data["field2"], "field1": message_with_pkeys.payload_data["field1"], "field3": message_with_pkeys.payload_data["field3"] } with capture_new_messages(message_with_pkeys.topic) as get_messages: producer.publish(message_with_pkeys) producer.flush() offsets_and_messages = get_messages() assert len(offsets_and_messages) == 1 dp_message = create_from_offset_and_message( offsets_and_messages[0] ) assert dp_message.keys == expected_keys avro_string_writer = AvroStringWriter( schema=expected_keys_avro_json ) expected_encoded_keys = avro_string_writer.encode( message_avro_representation=expected_keys ) assert offsets_and_messages[0].message.key == expected_encoded_keys avro_string_reader = AvroStringReader( reader_schema=expected_keys_avro_json, writer_schema=expected_keys_avro_json ) decoded_keys = avro_string_reader.decode( encoded_message=offsets_and_messages[0].message.key ) assert decoded_keys == expected_keys
def test_publish_message_with_no_keys( self, message, producer ): with capture_new_messages(message.topic) as get_messages: producer.publish(message) producer.flush() offsets_and_messages = get_messages() assert len(offsets_and_messages) == 1 dp_message = create_from_offset_and_message( offsets_and_messages[0] ) assert dp_message.keys == {}
def test_publish_succeeds_without_retry(self, topic, message, producer): with attach_spy_on_func( producer._kafka_producer.kafka_client, 'send_produce_request' ) as send_request_spy, capture_new_messages(topic) as get_messages: orig_topic_to_offset_map = self.get_orig_topic_to_offset_map( producer) producer.publish(message) producer.flush() messages = get_messages() self.assert_equal_msgs(expected_msgs=[message], actual_msgs=messages) assert send_request_spy.call_count == 1 self.assert_new_topic_to_offset_map(producer, message.topic, orig_topic_to_offset_map, published_message_count=1)
def test_publish_fails_after_retry(self, message, producer): # TODO(DATAPIPE-606|clin) investigate better way than mocking response with mock.patch.object( producer._kafka_producer.kafka_client, 'send_produce_request', side_effect=[FailedPayloadsError ]) as mock_send_request, capture_new_messages( message.topic) as get_messages, pytest.raises( MaxRetryError): orig_topic_to_offset_map = self.get_orig_topic_to_offset_map( producer) producer.publish(message) producer.flush() messages = get_messages() assert len(messages) == 0 assert mock_send_request.call_count == self.max_retry_count self.assert_new_topic_to_offset_map(producer, message.topic, orig_topic_to_offset_map, published_message_count=0)
def test_publish_succeeds_without_retry(self, topic, message, producer): with attach_spy_on_func( producer._kafka_producer.kafka_client, 'send_produce_request' ) as send_request_spy, capture_new_messages( topic ) as get_messages: orig_topic_to_offset_map = self.get_orig_topic_to_offset_map(producer) producer.publish(message) producer.flush() messages = get_messages() self.assert_equal_msgs(expected_msgs=[message], actual_msgs=messages) assert send_request_spy.call_count == 1 self.assert_new_topic_to_offset_map( producer, message.topic, orig_topic_to_offset_map, published_message_count=1 )
def _publish_and_assert_pii_message(self, message, producer): with capture_new_messages(message.topic) as get_messages: producer.publish(message) producer.flush() offsets_and_messages = get_messages() assert len(offsets_and_messages) == 1 dp_message = create_from_offset_and_message(offsets_and_messages[0]) assert dp_message.payload == message.payload assert dp_message.payload_data == message.payload_data assert dp_message.schema_id == message.schema_id unpacked_message = Envelope().unpack( offsets_and_messages[0].message.value) unpacked_meta_attr = unpacked_message['meta'][0] encryption_helper = EncryptionHelper( dp_message.encryption_type, MetaAttribute(unpacked_meta_attr['schema_id'], unpacked_meta_attr['payload'])) encrypted_payload = encryption_helper.encrypt_payload(message.payload) assert unpacked_message['payload'] == encrypted_payload
def test_publish_fails_after_retry(self, message, producer): # TODO(DATAPIPE-606|clin) investigate better way than mocking response with mock.patch.object( producer._kafka_producer.kafka_client, 'send_produce_request', side_effect=[FailedPayloadsError] ) as mock_send_request, capture_new_messages( message.topic ) as get_messages, pytest.raises( MaxRetryError ): orig_topic_to_offset_map = self.get_orig_topic_to_offset_map(producer) producer.publish(message) producer.flush() messages = get_messages() assert len(messages) == 0 assert mock_send_request.call_count == self.max_retry_count self.assert_new_topic_to_offset_map( producer, message.topic, orig_topic_to_offset_map, published_message_count=0 )