def assert_equal_msgs(self, expected_msgs, actual_msgs): envelope = Envelope() assert len(actual_msgs) == len(expected_msgs) for actual, expected in zip(actual_msgs, expected_msgs): actual_payload = envelope.unpack(actual.message.value)['payload'] expected_payload = expected.payload assert actual_payload == expected_payload
def _assert_all_messages_published(self, consumer, expected_payloads=None): messages = consumer.get_messages(count=self.number_of_messages * 2) expected_payloads = (expected_payloads if expected_payloads is not None else range(self.number_of_messages)) envelope = Envelope() payloads = [ int(envelope.unpack(message.message.value)['payload']) for message in messages ] assert payloads == expected_payloads
def assert_equal_monitor_messages(self, actual_raw_messages, expected_topic, expected_messages_counts, expected_start_timestamp=0): envelope = Envelope() expected_count_idx = 0 for msg in actual_raw_messages: actual_message = self._get_actual_message(msg.message.value, envelope) assert actual_message.message_type == _ProtectedMessageType.monitor.name payload_data = actual_message.payload_data if payload_data['topic'] != expected_topic: continue self.assert_equal_monitor_message( actual_payload_data=payload_data, expected_topic=expected_topic, expected_message_count=expected_messages_counts[ expected_count_idx], expected_start_timestamp=expected_start_timestamp) expected_start_timestamp += self.monitor_window_in_sec expected_count_idx += 1
def create_from_offset_and_message( offset_and_message, force_payload_decoding=True, reader_schema_id=None, envelope=None ): """ Build a data_pipeline.message.Message from a kafka.common.OffsetAndMessage. If no reader schema id is provided, the schema used for encoding will be used for decoding. Args: offset_and_message (kafka.common.OffsetAndMessage): a namedtuple containing the offset and message. Message contains magic, attributes, keys and values. force_payload_decoding (Optional[boolean]): If this is set to `True` then we will decode the payload/previous_payload immediately. Otherwise the decoding will happen whenever the lazy *_data properties are accessed. reader_schema_id (Optional[int]): Schema id used to decode the incoming kafka message and build data_pipeline.message.Message message. Defaults to None. envelope (Optional[:class:data_pipeline.envelope.Envelope]): Envelope instance that unpacks the data pipeline messages. Returns (data_pipeline.message.Message): The message object """ return _create_message_from_packed_message( packed_message=offset_and_message.message, envelope=envelope or Envelope(), force_payload_decoding=force_payload_decoding, reader_schema_id=reader_schema_id )
def _get_update_info_from_simple_consumer_response(response): (partition, (offset, raw_message)) = response # message is of type kafka.common.Message raw_message_bytes = raw_message.value unpacked_message = Envelope().unpack(packed_message=raw_message_bytes) timestamp = unpacked_message['timestamp'] return offset, partition, timestamp
class ClogWriter(object): def __init__(self): self.envelope = Envelope() def publish(self, message): try: clog.log_line(message.topic, self.envelope.pack(message, ascii_encoded=True)) except: logger.error("Failed to scribe message - {}".format(str(message)))
def assert_last_retry_result(self, last_retry_result, message, expected_published_msgs_count): expected_requests = [ ProduceRequest( topic=message.topic, partition=0, messages=[_prepare(_EnvelopeAndMessage(Envelope(), message))]) ] assert last_retry_result.unpublished_requests == expected_requests assert last_retry_result.total_published_message_count == expected_published_msgs_count
def __init__(self, consumer_name, team_name, expected_frequency_seconds, topic_to_consumer_topic_state_map=None, consumer_source=None, force_payload_decode=True, auto_offset_reset='smallest', partitioner_cooldown=get_config( ).consumer_partitioner_cooldown_default, use_group_sha=get_config().consumer_use_group_sha_default, topic_refresh_frequency_seconds=get_config(). topic_refresh_frequency_seconds, pre_rebalance_callback=None, post_rebalance_callback=None, fetch_offsets_for_topics=None, pre_topic_refresh_callback=None, cluster_name=None): super(BaseConsumer, self).__init__(consumer_name, team_name, expected_frequency_seconds, monitoring_enabled=False) if ((topic_to_consumer_topic_state_map and consumer_source) or (not topic_to_consumer_topic_state_map and not consumer_source)): raise ValueError( "Exactly one of topic_to_consumer_topic_state_map " "or consumer_source must be specified") self.consumer_source = consumer_source self.topic_to_consumer_topic_state_map = topic_to_consumer_topic_state_map self.force_payload_decode = force_payload_decode self.auto_offset_reset = auto_offset_reset self.partitioner_cooldown = partitioner_cooldown self.use_group_sha = use_group_sha self.running = False self.consumer_group = None self.pre_rebalance_callback = pre_rebalance_callback self.post_rebalance_callback = post_rebalance_callback self.fetch_offsets_for_topics = fetch_offsets_for_topics self.pre_topic_refresh_callback = pre_topic_refresh_callback self.cluster_name = self._set_cluster_name(cluster_name) self._refresh_timer = _ConsumerTick( refresh_time_seconds=topic_refresh_frequency_seconds) self._topic_to_reader_schema_map = self._get_topic_to_reader_schema_map( consumer_source) self._consumer_retry_policy = RetryPolicy( ExpBackoffPolicy(with_jitter=True), max_retry_count=get_config().consumer_max_offset_retry_count) self._envelope = Envelope() if self.topic_to_consumer_topic_state_map: self.cluster_type = self._determine_cluster_type_from_topics( self.topic_to_consumer_topic_state_map.keys())
def test_create_from_offset_and_message_with_no_reader_schema_specified( self, registered_schema, payload, example_payload_data): unpacked_message = CreateMessage( schema_id=registered_schema.schema_id, payload=payload, timestamp=1500, ) offset_and_message = OffsetAndMessage( 0, create_message(Envelope().pack(unpacked_message))) extracted_message = create_from_offset_and_message( offset_and_message=offset_and_message, reader_schema_id=None) assert extracted_message.schema_id == registered_schema.schema_id assert extracted_message.topic == registered_schema.topic.name assert extracted_message.reader_schema_id == registered_schema.schema_id assert extracted_message.payload_data == example_payload_data
def create_from_kafka_message( kafka_message, envelope=None, force_payload_decoding=True, reader_schema_id=None ): """ Build a data_pipeline.message.Message from a yelp_kafka message. If no reader schema id is provided, the schema used for encoding will be used for decoding. Args: kafka_message (kafka.common.KafkaMessage): The message info which has the topic, partition, offset, key, and value(payload) of the received message. envelope (Optional[:class:data_pipeline.envelope.Envelope]): Envelope instance that unpacks the data pipeline messages. force_payload_decoding (Optional[boolean]): If this is set to `True` then we will decode the payload/previous_payload immediately. Otherwise the decoding will happen whenever the lazy *_data properties are accessed. reader_schema_id (Optional[int]): Schema id used to decode the kafka_message and build data_pipeline.message.Message message. Defaults to None. Returns (class:`data_pipeline.message.Message`): The message object """ kafka_position_info = KafkaPositionInfo( offset=kafka_message.offset, partition=kafka_message.partition, key=kafka_message.key, ) return _create_message_from_packed_message( packed_message=kafka_message, envelope=envelope or Envelope(), force_payload_decoding=force_payload_decoding, kafka_position_info=kafka_position_info, reader_schema_id=reader_schema_id )
def _publish_and_assert_pii_message(self, message, producer): with capture_new_messages(message.topic) as get_messages: producer.publish(message) producer.flush() offsets_and_messages = get_messages() assert len(offsets_and_messages) == 1 dp_message = create_from_offset_and_message(offsets_and_messages[0]) assert dp_message.payload == message.payload assert dp_message.payload_data == message.payload_data assert dp_message.schema_id == message.schema_id unpacked_message = Envelope().unpack( offsets_and_messages[0].message.value) unpacked_meta_attr = unpacked_message['meta'][0] encryption_helper = EncryptionHelper( dp_message.encryption_type, MetaAttribute(unpacked_meta_attr['schema_id'], unpacked_meta_attr['payload'])) encrypted_payload = encryption_helper.encrypt_payload(message.payload) assert unpacked_message['payload'] == encrypted_payload
def prepare_message(self, message): return create_message(Envelope().pack(message)).value
def envelope(self): return Envelope()
def offset_and_message(self, message): return OffsetAndMessage(0, create_message(Envelope().pack(message)))
def __init__(self): self.envelope = Envelope()