def _unpack_records(self): # NOTE: if the batch is not compressed it's equal to 1 record in # v0 and v1. tp = self._tp records = self._records while records.has_next(): next_batch = records.next_batch() if self._check_crcs and not next_batch.validate_crc(): # This iterator will be closed after the exception, so we don't # try to drain other batches here. They will be refetched. raise Errors.CorruptRecordException(f"Invalid CRC - {tp}") if self._isolation_level == READ_COMMITTED and \ next_batch.producer_id is not None: self._consume_aborted_up_to(next_batch.base_offset) if next_batch.is_control_batch: if self._contains_abort_marker(next_batch): # Using `discard` instead of `remove`, because Kafka # may return an abort marker for an otherwise empty # topic-partition. self._aborted_producers.discard(next_batch.producer_id) if next_batch.is_transactional and \ next_batch.producer_id in self._aborted_producers: log.debug( "Skipping aborted record batch from partition %s with" " producer_id %s and offsets %s to %s", tp, next_batch.producer_id, next_batch.base_offset, next_batch.next_offset - 1) self.next_fetch_offset = next_batch.next_offset continue # We skip control batches no matter the isolation level if next_batch.is_control_batch: self.next_fetch_offset = next_batch.next_offset continue for record in next_batch: # It's OK for the offset to be larger than the current # partition. It will happen in compacted topics. if record.offset < self.next_fetch_offset: # Probably just a compressed messageset, it's ok to skip. continue consumer_record = self._consumer_record(tp, record) self.next_fetch_offset = record.offset + 1 yield consumer_record # Message format v2 preserves the last offset in a batch even if # the last record is removed through compaction. By using the next # offset computed from the last offset in the batch, we ensure that # the offset of the next fetch will point to the next batch, which # avoids unnecessary re-fetching of the same batch (in the worst # case, the consumer could get stuck fetching the same batch # repeatedly). self.next_fetch_offset = next_batch.next_offset
def _unpack_records(self, tp, records): # NOTE: if the batch is not compressed it's equal to 1 record in # v0 and v1. deserialize = self._deserialize check_crcs = self._check_crcs while records.has_next(): next_batch = records.next_batch() if check_crcs and not next_batch.validate_crc(): # This iterator will be closed after the exception, so we don't # try to drain other batches here. They will be refetched. raise Errors.CorruptRecordException("Invalid CRC") for record in next_batch: # Save encoded sizes key_size = len(record.key) if record.key is not None else -1 value_size = \ len(record.value) if record.value is not None else -1 key, value = deserialize(record) yield ConsumerRecord(tp.topic, tp.partition, record.offset, record.timestamp, record.timestamp_type, key, value, record.checksum, key_size, value_size)