def output_consumed(self, message: Message): if (self.output_directory and not self._initialize_default_output_directory and message.partition() not in self.writers): writer = PlainTextFileWriter(self.output_directory / f"partition_{message.partition()}") writer.init_destination_directory() self.writers[message.partition()] = writer else: writer = self.writers.get(message.partition(), self.writers[-1]) writer.write_message(message)
def decode_message_from_avro(self, message: Message): key_schema_id, decoded_key = self.decode_bytes(message.key()) value_schema_id, decoded_value = self.decode_bytes(message.value()) headers = [] if message.headers(): for header_key, header_value in message.headers(): headers.append( MessageHeader(key=header_key, value=header_value.decode("utf-8") if header_value else None)) decoded_message = DecodedAvroMessage(decoded_key, decoded_value, message.partition(), key_schema_id, value_schema_id, headers=headers) schema_version = it.count(1) serializable_message = { "key": decoded_message.key, "value": decoded_message.value, "partition": decoded_message.partition, "schema_directory_name": f"{next(schema_version):04}_{key_schema_id}_{value_schema_id}", } return key_schema_id, value_schema_id, decoded_message, serializable_message
def _process_message(self, msg: KafkaMessage): err = msg.error() if err: if err.code() == KafkaError._PARTITION_EOF: return None else: monitoring.got_counter("kafka_consumer_exception") params = { "code": err.code(), "pid": os.getpid(), "topic": msg.topic(), "partition": msg.partition(), "offset": msg.offset(), log_const.KEY_NAME: log_const.EXCEPTION_VALUE } log( "KafkaConsumer Error %(code)s at pid %(pid)s: topic=%(topic)s partition=[%(partition)s] " "reached end at offset %(offset)s\n", params=params, level="WARNING") raise KafkaException(err) if msg.value(): if msg.headers() is None: msg.set_headers([]) return msg
def delivery_success_callback(msg: Message): pprint({ 'topic': msg.topic(), 'partition': msg.partition(), 'timestamp': msg.timestamp(), 'key': msg.key(), 'value': msg.value(), })
def output_consumed(self, message: Message): """ Outputs the message to a destination determined by the implementation of the inheriting class. :param message: Message to output :return: This method returns no values """ writer = self.writers.get(message.partition(), self.writers[-1]) writer.write_message(message)
def update_callback(self, err: Optional[cimpl.KafkaError], msg: cimpl.Message): assert err is None, f"Received KafkaError {err}." self.binary_value = msg.value() self.binary_key = msg.key() self.partition = msg.partition() self.offset = msg.offset() self.timestamp = msg.timestamp()[1]
def delivery_report(err: str, msg: Message): """Called once for each message produced to indicate delivery result. Triggered by poll() or flush().""" if err is not None: print("Message delivery failed: {}".format(err)) else: print("Message delivered to {} [{}]".format(msg.topic(), msg.partition()))
def decode_message(message: Message) -> DecodedMessage: if message.key() is None: decoded_key = None else: decoded_key = message.key().decode("utf-8") decoded_value = message.value().decode("utf-8") headers = [] if message.headers(): for header_key, header_value in message.headers(): headers.append(MessageHeader(key=header_key, value=header_value.decode("utf-8") if header_value else None)) return DecodedMessage( key=decoded_key, value=decoded_value, partition=message.partition(), offset=message.offset(), timestamp=str(message.timestamp()), headers=headers, )
def write_message_to_file(self, message: Message): key_schema_id, decoded_key = self.decode_bytes(message.key()) value_schema_id, decoded_value = self.decode_bytes(message.value()) decoded_message = DecodedAvroMessage(decoded_key, decoded_value, message.partition(), key_schema_id, value_schema_id) if self.schema_changed( decoded_message) or self.schema_dir_name is None: self.schema_dir_name = f"{next(self.schema_version):04}_{key_schema_id}_{value_schema_id}" self.current_key_schema_id = key_schema_id self.current_value_schema_id = value_schema_id self._dump_schemata(key_schema_id, value_schema_id) serializable_message = { "key": decoded_message.key, "value": decoded_message.value, "partition": decoded_message.partition, "schema_directory_name": self.schema_dir_name, } pickle.dump(serializable_message, self.file)
def decode_message(message: Message) -> DecodedMessage: decoded_key = message.key().decode("utf-8") decoded_value = message.value().decode("utf-8") return DecodedMessage(decoded_key, decoded_value, message.partition())