def decompress(self): codec = self.attributes & self.CODEC_MASK assert codec in ( self.CODEC_GZIP, self.CODEC_SNAPPY, self.CODEC_LZ4, self.CODEC_ZSTD, ) if codec == self.CODEC_GZIP: assert has_gzip(), "Gzip decompression unsupported" raw_bytes = gzip_decode(self.value) elif codec == self.CODEC_SNAPPY: assert has_snappy(), "Snappy decompression unsupported" raw_bytes = snappy_decode(self.value) elif codec == self.CODEC_LZ4: assert has_lz4(), "LZ4 decompression unsupported" if self.magic == 0: raw_bytes = lz4_decode_old_kafka(self.value) else: raw_bytes = lz4_decode(self.value) elif codec == self.CODEC_ZSTD: assert has_zstd(), "ZSTD decompression unsupported" raw_bytes = zstd_decode(self.value) else: raise Exception("This should be impossible") return MessageSet.decode(raw_bytes, bytes_to_read=len(raw_bytes))
def test_snappy(self): if not has_snappy(): return for i in xrange(ITERATIONS): s1 = random_string() s2 = snappy_decode(snappy_encode(s1)) self.assertEquals(s1, s2)
def test_create_snappy(self): payloads = [(b"v1", None), (b"v2", None)] msg = create_snappy_message(payloads) self.assertEqual(msg.magic, 0) self.assertEqual(msg.attributes, ATTRIBUTE_CODEC_MASK & CODEC_SNAPPY) self.assertEqual(msg.key, None) decoded = snappy_decode(msg.value) expect = b"".join([ struct.pack(">q", 0), # MsgSet offset struct.pack(">i", 16), # MsgSet size struct.pack(">i", 1285512130), # CRC struct.pack(">bb", 0, 0), # Magic, flags struct.pack(">i", -1), # -1 indicates a null key struct.pack(">i", 2), # Msg length (bytes) b"v1", # Message contents struct.pack(">q", 0), # MsgSet offset struct.pack(">i", 16), # MsgSet size struct.pack(">i", -711587208), # CRC struct.pack(">bb", 0, 0), # Magic, flags struct.pack(">i", -1), # -1 indicates a null key struct.pack(">i", 2), # Msg length (bytes) b"v2", # Message contents ]) self.assertEqual(decoded, expect)
def _decompress(self, key_offset): # Copy of `_read_key_value`, but uses memoryview pos = key_offset key_size = struct.unpack_from(">i", self._buffer, pos)[0] pos += self.KEY_LENGTH if key_size != -1: pos += key_size value_size = struct.unpack_from(">i", self._buffer, pos)[0] pos += self.VALUE_LENGTH if value_size == -1: raise CorruptRecordException("Value of compressed message is None") else: data = self._buffer[pos:pos + value_size] compression_type = self.compression_type if compression_type == self.CODEC_GZIP: uncompressed = gzip_decode(data) elif compression_type == self.CODEC_SNAPPY: uncompressed = snappy_decode(data.tobytes()) elif compression_type == self.CODEC_LZ4: if self._magic == 0: uncompressed = lz4_decode_old_kafka(data.tobytes()) else: uncompressed = lz4_decode(data.tobytes()) return uncompressed
def test_create_snappy(self): payloads = ["v1", "v2"] msg = create_snappy_message(payloads) self.assertEqual(msg.magic, 0) self.assertEqual(msg.attributes, ATTRIBUTE_CODEC_MASK & CODEC_SNAPPY) self.assertEqual(msg.key, None) decoded = snappy_decode(msg.value) expect = "".join([ struct.pack(">q", 0), # MsgSet offset struct.pack(">i", 16), # MsgSet size struct.pack(">i", 1285512130), # CRC struct.pack(">bb", 0, 0), # Magic, flags struct.pack(">i", -1), # -1 indicates a null key struct.pack(">i", 2), # Msg length (bytes) "v1", # Message contents struct.pack(">q", 0), # MsgSet offset struct.pack(">i", 16), # MsgSet size struct.pack(">i", -711587208), # CRC struct.pack(">bb", 0, 0), # Magic, flags struct.pack(">i", -1), # -1 indicates a null key struct.pack(">i", 2), # Msg length (bytes) "v2", # Message contents ]) self.assertEqual(decoded, expect)
def _decode_message(cls, data, offset): """ Decode a single Message The only caller of this method is decode_message_set_iter. They are decoupled to support nested messages (compressed MessageSets). The offset is actually read from decode_message_set_iter (it is part of the MessageSet payload). """ ((crc, magic, att), cur) = relative_unpack('>IBB', data, 0) if crc != crc32(data[4:]): raise ChecksumError("Message checksum failed") (key, cur) = read_int_string(data, cur) (value, cur) = read_int_string(data, cur) codec = att & ATTRIBUTE_CODEC_MASK if codec == CODEC_NONE: yield (offset, Message(magic, att, key, value)) elif codec == CODEC_GZIP: gz = gzip_decode(value) for (offset, msg) in KafkaProtocol._decode_message_set_iter(gz): yield (offset, msg) elif codec == CODEC_SNAPPY: snp = snappy_decode(value) for (offset, msg) in KafkaProtocol._decode_message_set_iter(snp): yield (offset, msg)
def test_create_snappy_keyed(self): payloads = [(b"v1", b"k1"), (b"v2", b"k2")] msg = create_snappy_message(payloads) self.assertEqual(msg.magic, 0) self.assertEqual(msg.attributes, ATTRIBUTE_CODEC_MASK & CODEC_SNAPPY) self.assertEqual(msg.key, None) decoded = snappy_decode(msg.value) expect = b"".join([ struct.pack(">q", 0), # MsgSet Offset struct.pack(">i", 18), # Msg Size struct.pack(">i", 1474775406), # CRC struct.pack(">bb", 0, 0), # Magic, flags struct.pack(">i", 2), # Length of key b"k1", # Key struct.pack(">i", 2), # Length of value b"v1", # Value struct.pack(">q", 0), # MsgSet Offset struct.pack(">i", 18), # Msg Size struct.pack(">i", -16383415), # CRC struct.pack(">bb", 0, 0), # Magic, flags struct.pack(">i", 2), # Length of key b"k2", # Key struct.pack(">i", 2), # Length of value b"v2", # Value ]) self.assertEqual(decoded, expect)
def _decode_message(cls, data, offset): """ Decode a single Message The only caller of this method is decode_message_set_iter. They are decoupled to support nested messages (compressed MessageSets). The offset is actually read from decode_message_set_iter (it is part of the MessageSet payload). """ ((crc, magic, att), cur) = relative_unpack('>iBB', data, 0) if crc != zlib.crc32(data[4:]): raise ChecksumError("Message checksum failed") (key, cur) = read_int_string(data, cur) (value, cur) = read_int_string(data, cur) codec = att & KafkaProtocol.ATTRIBUTE_CODEC_MASK if codec == KafkaProtocol.CODEC_NONE: yield (offset, Message(magic, att, key, value)) elif codec == KafkaProtocol.CODEC_GZIP: gz = gzip_decode(value) for (offset, msg) in KafkaProtocol._decode_message_set_iter(gz): yield (offset, msg) elif codec == KafkaProtocol.CODEC_SNAPPY: snp = snappy_decode(value) for (offset, msg) in KafkaProtocol._decode_message_set_iter(snp): yield (offset, msg)
def test_snappy_decode_xerial(self): header = b"\x82SNAPPY\x00\x00\x00\x00\x01\x00\x00\x00\x01" random_snappy = snappy_encode(b"SNAPPY" * 50) block_len = len(random_snappy) random_snappy2 = snappy_encode(b"XERIAL" * 50) block_len2 = len(random_snappy2) to_test = header + struct.pack("!i", block_len) + random_snappy + struct.pack("!i", block_len2) + random_snappy2 self.assertEquals(snappy_decode(to_test), (b"SNAPPY" * 50) + (b"XERIAL" * 50))
def test_create_snappy(self): msg = KafkaClient.create_snappy_message("testing") self.assertEquals(msg.magic, 1) self.assertEquals(msg.attributes, 2) self.assertEquals(msg.crc, -62350868) (messages, _) = KafkaClient.read_message_set(snappy_decode(msg.payload)) inner = messages[0] self.assertEquals(inner.magic, 1) self.assertEquals(inner.attributes, 0) self.assertEquals(inner.payload, "testing") self.assertEquals(inner.crc, -386704890)
def test_snappy_decode_xerial(): header = b'\x82SNAPPY\x00\x00\x00\x00\x01\x00\x00\x00\x01' random_snappy = snappy_encode(b'SNAPPY' * 50, xerial_compatible=False) block_len = len(random_snappy) random_snappy2 = snappy_encode(b'XERIAL' * 50, xerial_compatible=False) block_len2 = len(random_snappy2) to_test = header \ + struct.pack('!i', block_len) + random_snappy \ + struct.pack('!i', block_len2) + random_snappy2 \ assert snappy_decode(to_test) == (b'SNAPPY' * 50) + (b'XERIAL' * 50)
def test_snappy_decode_xerial(self): header = b'\x82SNAPPY\x00\x00\x00\x00\x01\x00\x00\x00\x01' random_snappy = snappy_encode('SNAPPY' * 50) block_len = len(random_snappy) random_snappy2 = snappy_encode('XERIAL' * 50) block_len2 = len(random_snappy2) to_test = header \ + struct.pack('!i', block_len) + random_snappy \ + struct.pack('!i', block_len2) + random_snappy2 \ self.assertEquals(snappy_decode(to_test), ('SNAPPY' * 50) + ('XERIAL' * 50))
def test_snappy_decode_xerial(self): header = b'\x82SNAPPY\x00\x00\x00\x00\x01\x00\x00\x00\x01' random_snappy = snappy_encode(b'SNAPPY' * 50) block_len = len(random_snappy) random_snappy2 = snappy_encode(b'XERIAL' * 50) block_len2 = len(random_snappy2) to_test = header \ + struct.pack('!i', block_len) + random_snappy \ + struct.pack('!i', block_len2) + random_snappy2 \ self.assertEquals(snappy_decode(to_test), (b'SNAPPY' * 50) + (b'XERIAL' * 50))
def _maybe_uncompress(self): if not self._decompressed: compression_type = self.compression_type if compression_type != self.CODEC_NONE: data = memoryview(self._buffer)[self._pos:] if compression_type == self.CODEC_GZIP: uncompressed = gzip_decode(data) if compression_type == self.CODEC_SNAPPY: uncompressed = snappy_decode(data.tobytes()) if compression_type == self.CODEC_LZ4: uncompressed = lz4_decode(data.tobytes()) self._buffer = bytearray(uncompressed) self._pos = 0 self._decompressed = True
def decompress(self): codec = self.attributes & self.CODEC_MASK assert codec in (self.CODEC_GZIP, self.CODEC_SNAPPY, self.CODEC_LZ4) if codec == self.CODEC_GZIP: assert has_gzip(), 'Gzip decompression unsupported' raw_bytes = gzip_decode(self.value) elif codec == self.CODEC_SNAPPY: assert has_snappy(), 'Snappy decompression unsupported' raw_bytes = snappy_decode(self.value) elif codec == self.CODEC_LZ4: assert has_lz4(), 'LZ4 decompression unsupported' if self.magic == 0: raw_bytes = lz4_decode_old_kafka(self.value) else: raw_bytes = lz4_decode(self.value) else: raise Exception('This should be impossible') return MessageSet.decode(raw_bytes, bytes_to_read=len(raw_bytes))
def test_snappy(self): for i in xrange(1000): b1 = random_string(100).encode('utf-8') b2 = snappy_decode(snappy_encode(b1)) self.assertEqual(b1, b2)
def test_snappy(self): for i in compat.xrange(1000): s1 = compat.bytes(random_string(100)) s2 = snappy_decode(snappy_encode(s1)) self.assertEquals(s1, s2)
def test_snappy(): for i in xrange(1000): b1 = random_string(100).encode('utf-8') b2 = snappy_decode(snappy_encode(b1)) assert b1 == b2
def test_snappy(): for i in range(1000): b1 = random_string(100).encode('utf-8') b2 = snappy_decode(snappy_encode(b1)) assert b1 == b2
def test_snappy(self): for i in xrange(1000): s1 = random_string(100) s2 = snappy_decode(snappy_encode(s1)) self.assertEquals(s1, s2)
def test_snappy(self): for i in xrange(1000): s1 = random_string(100) s2 = snappy_decode(snappy_encode(s1)) self.assertEqual(s1, s2)