예제 #1
0
 def __next__(self):
     if self._next_record_index >= self._num_records:
         if self._pos != len(self._buffer):
             raise CorruptRecordException(
                 "{} unconsumed bytes after all records consumed".format(
                     len(self._buffer) - self._pos))
         raise StopIteration
     try:
         msg = self._read_msg()
     except (ValueError, IndexError) as err:
         raise CorruptRecordException(
             "Found invalid record structure: {!r}".format(err))
     else:
         self._next_record_index += 1
     return msg
예제 #2
0
    def _decompress(self, key_offset):
        # Copy of `_read_key_value`, but uses memoryview
        pos = key_offset
        key_size = struct.unpack_from(">i", self._buffer, pos)[0]
        pos += self.KEY_LENGTH
        if key_size != -1:
            pos += key_size
        value_size = struct.unpack_from(">i", self._buffer, pos)[0]
        pos += self.VALUE_LENGTH
        if value_size == -1:
            raise CorruptRecordException("Value of compressed message is None")
        else:
            data = self._buffer[pos:pos + value_size]

        compression_type = self.compression_type
        self._assert_has_codec(compression_type)
        if compression_type == self.CODEC_GZIP:
            uncompressed = gzip_decode(data)
        elif compression_type == self.CODEC_SNAPPY:
            uncompressed = snappy_decode(data.tobytes())
        elif compression_type == self.CODEC_LZ4:
            if self._magic == 0:
                uncompressed = lz4_decode_old_kafka(data.tobytes())
            else:
                uncompressed = lz4_decode(data.tobytes())
        return uncompressed
예제 #3
0
 def next_batch(self, _min_slice=MIN_SLICE, _magic_offset=MAGIC_OFFSET):
     next_slice = self._next_slice
     if next_slice is None:
         return None
     if len(next_slice) < _min_slice:
         raise CorruptRecordException(
             "Record size is less than the minimum record overhead "
             "({})".format(_min_slice - self.LOG_OVERHEAD))
     self._cache_next()
     magic, = struct.unpack_from(">b", next_slice, _magic_offset)
     if magic <= 1:
         return LegacyRecordBatch(next_slice, magic)
     else:
         return DefaultRecordBatch(next_slice)
예제 #4
0
    def _read_msg(
            self,
            decode_varint=decode_varint):
        # Record =>
        #   Length => Varint
        #   Attributes => Int8
        #   TimestampDelta => Varlong
        #   OffsetDelta => Varint
        #   Key => Bytes
        #   Value => Bytes
        #   Headers => [HeaderKey HeaderValue]
        #     HeaderKey => String
        #     HeaderValue => Bytes

        buffer = self._buffer
        pos = self._pos
        length, pos = decode_varint(buffer, pos)
        start_pos = pos
        _, pos = decode_varint(buffer, pos)  # attrs can be skipped for now

        ts_delta, pos = decode_varint(buffer, pos)
        if self.timestamp_type == self.LOG_APPEND_TIME:
            timestamp = self.max_timestamp
        else:
            timestamp = self.first_timestamp + ts_delta

        offset_delta, pos = decode_varint(buffer, pos)
        offset = self.base_offset + offset_delta

        key_len, pos = decode_varint(buffer, pos)
        if key_len >= 0:
            key = bytes(buffer[pos: pos + key_len])
            pos += key_len
        else:
            key = None

        value_len, pos = decode_varint(buffer, pos)
        if value_len >= 0:
            value = bytes(buffer[pos: pos + value_len])
            pos += value_len
        else:
            value = None

        header_count, pos = decode_varint(buffer, pos)
        if header_count < 0:
            raise CorruptRecordException("Found invalid number of record "
                                         "headers {}".format(header_count))
        headers = []
        while header_count:
            # Header key is of type String, that can't be None
            h_key_len, pos = decode_varint(buffer, pos)
            if h_key_len < 0:
                raise CorruptRecordException(
                    "Invalid negative header key size {}".format(h_key_len))
            h_key = buffer[pos: pos + h_key_len].decode("utf-8")
            pos += h_key_len

            # Value is of type NULLABLE_BYTES, so it can be None
            h_value_len, pos = decode_varint(buffer, pos)
            if h_value_len >= 0:
                h_value = bytes(buffer[pos: pos + h_value_len])
                pos += h_value_len
            else:
                h_value = None

            headers.append((h_key, h_value))
            header_count -= 1

        # validate whether we have read all header bytes in the current record
        if pos - start_pos != length:
            raise CorruptRecordException(
                "Invalid record size: expected to read {} bytes in record "
                "payload, but instead read {}".format(length, pos - start_pos))
        self._pos = pos

        return DefaultRecord(
            offset, timestamp, self.timestamp_type, key, value, headers)