예제 #1
0
    def decompress(self):
        codec = self.attributes & self.CODEC_MASK
        assert codec in (
            self.CODEC_GZIP,
            self.CODEC_SNAPPY,
            self.CODEC_LZ4,
            self.CODEC_ZSTD,
        )
        if codec == self.CODEC_GZIP:
            assert has_gzip(), "Gzip decompression unsupported"
            raw_bytes = gzip_decode(self.value)
        elif codec == self.CODEC_SNAPPY:
            assert has_snappy(), "Snappy decompression unsupported"
            raw_bytes = snappy_decode(self.value)
        elif codec == self.CODEC_LZ4:
            assert has_lz4(), "LZ4 decompression unsupported"
            if self.magic == 0:
                raw_bytes = lz4_decode_old_kafka(self.value)
            else:
                raw_bytes = lz4_decode(self.value)
        elif codec == self.CODEC_ZSTD:
            assert has_zstd(), "ZSTD decompression unsupported"
            raw_bytes = zstd_decode(self.value)
        else:
            raise Exception("This should be impossible")

        return MessageSet.decode(raw_bytes, bytes_to_read=len(raw_bytes))
예제 #2
0
 def test_snappy(self):
     if not has_snappy():
         return
     for i in xrange(ITERATIONS):
         s1 = random_string()
         s2 = snappy_decode(snappy_encode(s1))
         self.assertEquals(s1, s2)
예제 #3
0
    def test_create_snappy(self):
        payloads = [(b"v1", None), (b"v2", None)]
        msg = create_snappy_message(payloads)
        self.assertEqual(msg.magic, 0)
        self.assertEqual(msg.attributes, ATTRIBUTE_CODEC_MASK & CODEC_SNAPPY)
        self.assertEqual(msg.key, None)
        decoded = snappy_decode(msg.value)
        expect = b"".join([
            struct.pack(">q", 0),          # MsgSet offset
            struct.pack(">i", 16),         # MsgSet size
            struct.pack(">i", 1285512130), # CRC
            struct.pack(">bb", 0, 0),      # Magic, flags
            struct.pack(">i", -1),         # -1 indicates a null key
            struct.pack(">i", 2),          # Msg length (bytes)
            b"v1",                         # Message contents

            struct.pack(">q", 0),          # MsgSet offset
            struct.pack(">i", 16),         # MsgSet size
            struct.pack(">i", -711587208), # CRC
            struct.pack(">bb", 0, 0),      # Magic, flags
            struct.pack(">i", -1),         # -1 indicates a null key
            struct.pack(">i", 2),          # Msg length (bytes)
            b"v2",                         # Message contents
        ])

        self.assertEqual(decoded, expect)
예제 #4
0
    def _decompress(self, key_offset):
        # Copy of `_read_key_value`, but uses memoryview
        pos = key_offset
        key_size = struct.unpack_from(">i", self._buffer, pos)[0]
        pos += self.KEY_LENGTH
        if key_size != -1:
            pos += key_size
        value_size = struct.unpack_from(">i", self._buffer, pos)[0]
        pos += self.VALUE_LENGTH
        if value_size == -1:
            raise CorruptRecordException("Value of compressed message is None")
        else:
            data = self._buffer[pos:pos + value_size]

        compression_type = self.compression_type
        if compression_type == self.CODEC_GZIP:
            uncompressed = gzip_decode(data)
        elif compression_type == self.CODEC_SNAPPY:
            uncompressed = snappy_decode(data.tobytes())
        elif compression_type == self.CODEC_LZ4:
            if self._magic == 0:
                uncompressed = lz4_decode_old_kafka(data.tobytes())
            else:
                uncompressed = lz4_decode(data.tobytes())
        return uncompressed
예제 #5
0
    def test_create_snappy(self):
        payloads = ["v1", "v2"]
        msg = create_snappy_message(payloads)
        self.assertEqual(msg.magic, 0)
        self.assertEqual(msg.attributes, ATTRIBUTE_CODEC_MASK & CODEC_SNAPPY)
        self.assertEqual(msg.key, None)
        decoded = snappy_decode(msg.value)
        expect = "".join([
            struct.pack(">q", 0),  # MsgSet offset
            struct.pack(">i", 16),  # MsgSet size
            struct.pack(">i", 1285512130),  # CRC
            struct.pack(">bb", 0, 0),  # Magic, flags
            struct.pack(">i", -1),  # -1 indicates a null key
            struct.pack(">i", 2),  # Msg length (bytes)
            "v1",  # Message contents
            struct.pack(">q", 0),  # MsgSet offset
            struct.pack(">i", 16),  # MsgSet size
            struct.pack(">i", -711587208),  # CRC
            struct.pack(">bb", 0, 0),  # Magic, flags
            struct.pack(">i", -1),  # -1 indicates a null key
            struct.pack(">i", 2),  # Msg length (bytes)
            "v2",  # Message contents
        ])

        self.assertEqual(decoded, expect)
예제 #6
0
    def _decompress(self, key_offset):
        # Copy of `_read_key_value`, but uses memoryview
        pos = key_offset
        key_size = struct.unpack_from(">i", self._buffer, pos)[0]
        pos += self.KEY_LENGTH
        if key_size != -1:
            pos += key_size
        value_size = struct.unpack_from(">i", self._buffer, pos)[0]
        pos += self.VALUE_LENGTH
        if value_size == -1:
            raise CorruptRecordException("Value of compressed message is None")
        else:
            data = self._buffer[pos:pos + value_size]

        compression_type = self.compression_type
        if compression_type == self.CODEC_GZIP:
            uncompressed = gzip_decode(data)
        elif compression_type == self.CODEC_SNAPPY:
            uncompressed = snappy_decode(data.tobytes())
        elif compression_type == self.CODEC_LZ4:
            if self._magic == 0:
                uncompressed = lz4_decode_old_kafka(data.tobytes())
            else:
                uncompressed = lz4_decode(data.tobytes())
        return uncompressed
예제 #7
0
    def _decode_message(cls, data, offset):
        """
        Decode a single Message

        The only caller of this method is decode_message_set_iter.
        They are decoupled to support nested messages (compressed MessageSets).
        The offset is actually read from decode_message_set_iter (it is part
        of the MessageSet payload).
        """
        ((crc, magic, att), cur) = relative_unpack('>IBB', data, 0)
        if crc != crc32(data[4:]):
            raise ChecksumError("Message checksum failed")

        (key, cur) = read_int_string(data, cur)
        (value, cur) = read_int_string(data, cur)

        codec = att & ATTRIBUTE_CODEC_MASK

        if codec == CODEC_NONE:
            yield (offset, Message(magic, att, key, value))

        elif codec == CODEC_GZIP:
            gz = gzip_decode(value)
            for (offset, msg) in KafkaProtocol._decode_message_set_iter(gz):
                yield (offset, msg)

        elif codec == CODEC_SNAPPY:
            snp = snappy_decode(value)
            for (offset, msg) in KafkaProtocol._decode_message_set_iter(snp):
                yield (offset, msg)
예제 #8
0
 def test_snappy(self):
     if not has_snappy():
         return
     for i in xrange(ITERATIONS):
         s1 = random_string()
         s2 = snappy_decode(snappy_encode(s1))
         self.assertEquals(s1, s2)
예제 #9
0
    def test_create_snappy_keyed(self):
        payloads = [(b"v1", b"k1"), (b"v2", b"k2")]
        msg = create_snappy_message(payloads)
        self.assertEqual(msg.magic, 0)
        self.assertEqual(msg.attributes, ATTRIBUTE_CODEC_MASK & CODEC_SNAPPY)
        self.assertEqual(msg.key, None)
        decoded = snappy_decode(msg.value)
        expect = b"".join([
            struct.pack(">q", 0),          # MsgSet Offset
            struct.pack(">i", 18),         # Msg Size
            struct.pack(">i", 1474775406), # CRC
            struct.pack(">bb", 0, 0),      # Magic, flags
            struct.pack(">i", 2),          # Length of key
            b"k1",                         # Key
            struct.pack(">i", 2),          # Length of value
            b"v1",                         # Value

            struct.pack(">q", 0),          # MsgSet Offset
            struct.pack(">i", 18),         # Msg Size
            struct.pack(">i", -16383415),  # CRC
            struct.pack(">bb", 0, 0),      # Magic, flags
            struct.pack(">i", 2),          # Length of key
            b"k2",                         # Key
            struct.pack(">i", 2),          # Length of value
            b"v2",                         # Value
        ])

        self.assertEqual(decoded, expect)
예제 #10
0
    def _decode_message(cls, data, offset):
        """
        Decode a single Message

        The only caller of this method is decode_message_set_iter.
        They are decoupled to support nested messages (compressed MessageSets).
        The offset is actually read from decode_message_set_iter (it is part
        of the MessageSet payload).
        """
        ((crc, magic, att), cur) = relative_unpack('>iBB', data, 0)
        if crc != zlib.crc32(data[4:]):
            raise ChecksumError("Message checksum failed")

        (key, cur) = read_int_string(data, cur)
        (value, cur) = read_int_string(data, cur)

        codec = att & KafkaProtocol.ATTRIBUTE_CODEC_MASK

        if codec == KafkaProtocol.CODEC_NONE:
            yield (offset, Message(magic, att, key, value))

        elif codec == KafkaProtocol.CODEC_GZIP:
            gz = gzip_decode(value)
            for (offset, msg) in KafkaProtocol._decode_message_set_iter(gz):
                yield (offset, msg)

        elif codec == KafkaProtocol.CODEC_SNAPPY:
            snp = snappy_decode(value)
            for (offset, msg) in KafkaProtocol._decode_message_set_iter(snp):
                yield (offset, msg)
예제 #11
0
    def test_snappy_decode_xerial(self):
        header = b"\x82SNAPPY\x00\x00\x00\x00\x01\x00\x00\x00\x01"
        random_snappy = snappy_encode(b"SNAPPY" * 50)
        block_len = len(random_snappy)
        random_snappy2 = snappy_encode(b"XERIAL" * 50)
        block_len2 = len(random_snappy2)

        to_test = header + struct.pack("!i", block_len) + random_snappy + struct.pack("!i", block_len2) + random_snappy2
        self.assertEquals(snappy_decode(to_test), (b"SNAPPY" * 50) + (b"XERIAL" * 50))
예제 #12
0
 def test_create_snappy(self):
     msg = KafkaClient.create_snappy_message("testing")
     self.assertEquals(msg.magic, 1)
     self.assertEquals(msg.attributes, 2)
     self.assertEquals(msg.crc, -62350868)
     (messages, _) = KafkaClient.read_message_set(snappy_decode(msg.payload))
     inner = messages[0]
     self.assertEquals(inner.magic, 1)
     self.assertEquals(inner.attributes, 0)
     self.assertEquals(inner.payload, "testing")
     self.assertEquals(inner.crc, -386704890)
예제 #13
0
 def test_create_snappy(self):
     msg = KafkaClient.create_snappy_message("testing")
     self.assertEquals(msg.magic, 1)
     self.assertEquals(msg.attributes, 2)
     self.assertEquals(msg.crc, -62350868)
     (messages,
      _) = KafkaClient.read_message_set(snappy_decode(msg.payload))
     inner = messages[0]
     self.assertEquals(inner.magic, 1)
     self.assertEquals(inner.attributes, 0)
     self.assertEquals(inner.payload, "testing")
     self.assertEquals(inner.crc, -386704890)
예제 #14
0
def test_snappy_decode_xerial():
    header = b'\x82SNAPPY\x00\x00\x00\x00\x01\x00\x00\x00\x01'
    random_snappy = snappy_encode(b'SNAPPY' * 50, xerial_compatible=False)
    block_len = len(random_snappy)
    random_snappy2 = snappy_encode(b'XERIAL' * 50, xerial_compatible=False)
    block_len2 = len(random_snappy2)

    to_test = header \
        + struct.pack('!i', block_len) + random_snappy \
        + struct.pack('!i', block_len2) + random_snappy2 \

    assert snappy_decode(to_test) == (b'SNAPPY' * 50) + (b'XERIAL' * 50)
예제 #15
0
    def test_snappy_decode_xerial(self):
        header = b'\x82SNAPPY\x00\x00\x00\x00\x01\x00\x00\x00\x01'
        random_snappy = snappy_encode('SNAPPY' * 50)
        block_len = len(random_snappy)
        random_snappy2 = snappy_encode('XERIAL' * 50)
        block_len2 = len(random_snappy2)

        to_test = header \
            + struct.pack('!i', block_len) + random_snappy \
            + struct.pack('!i', block_len2) + random_snappy2 \

        self.assertEquals(snappy_decode(to_test), ('SNAPPY' * 50) + ('XERIAL' * 50))
예제 #16
0
def test_snappy_decode_xerial():
    header = b'\x82SNAPPY\x00\x00\x00\x00\x01\x00\x00\x00\x01'
    random_snappy = snappy_encode(b'SNAPPY' * 50, xerial_compatible=False)
    block_len = len(random_snappy)
    random_snappy2 = snappy_encode(b'XERIAL' * 50, xerial_compatible=False)
    block_len2 = len(random_snappy2)

    to_test = header \
        + struct.pack('!i', block_len) + random_snappy \
        + struct.pack('!i', block_len2) + random_snappy2 \

    assert snappy_decode(to_test) == (b'SNAPPY' * 50) + (b'XERIAL' * 50)
예제 #17
0
    def test_snappy_decode_xerial(self):
        header = b'\x82SNAPPY\x00\x00\x00\x00\x01\x00\x00\x00\x01'
        random_snappy = snappy_encode(b'SNAPPY' * 50)
        block_len = len(random_snappy)
        random_snappy2 = snappy_encode(b'XERIAL' * 50)
        block_len2 = len(random_snappy2)

        to_test = header \
            + struct.pack('!i', block_len) + random_snappy \
            + struct.pack('!i', block_len2) + random_snappy2 \

        self.assertEquals(snappy_decode(to_test),
                          (b'SNAPPY' * 50) + (b'XERIAL' * 50))
예제 #18
0
 def _maybe_uncompress(self):
     if not self._decompressed:
         compression_type = self.compression_type
         if compression_type != self.CODEC_NONE:
             data = memoryview(self._buffer)[self._pos:]
             if compression_type == self.CODEC_GZIP:
                 uncompressed = gzip_decode(data)
             if compression_type == self.CODEC_SNAPPY:
                 uncompressed = snappy_decode(data.tobytes())
             if compression_type == self.CODEC_LZ4:
                 uncompressed = lz4_decode(data.tobytes())
             self._buffer = bytearray(uncompressed)
             self._pos = 0
     self._decompressed = True
예제 #19
0
    def decompress(self):
        codec = self.attributes & self.CODEC_MASK
        assert codec in (self.CODEC_GZIP, self.CODEC_SNAPPY, self.CODEC_LZ4)
        if codec == self.CODEC_GZIP:
            assert has_gzip(), 'Gzip decompression unsupported'
            raw_bytes = gzip_decode(self.value)
        elif codec == self.CODEC_SNAPPY:
            assert has_snappy(), 'Snappy decompression unsupported'
            raw_bytes = snappy_decode(self.value)
        elif codec == self.CODEC_LZ4:
            assert has_lz4(), 'LZ4 decompression unsupported'
            if self.magic == 0:
                raw_bytes = lz4_decode_old_kafka(self.value)
            else:
                raw_bytes = lz4_decode(self.value)
        else:
            raise Exception('This should be impossible')

        return MessageSet.decode(raw_bytes, bytes_to_read=len(raw_bytes))
예제 #20
0
 def test_snappy(self):
     for i in xrange(1000):
         b1 = random_string(100).encode('utf-8')
         b2 = snappy_decode(snappy_encode(b1))
         self.assertEqual(b1, b2)
예제 #21
0
 def test_snappy(self):
     for i in xrange(1000):
         b1 = random_string(100).encode('utf-8')
         b2 = snappy_decode(snappy_encode(b1))
         self.assertEqual(b1, b2)
예제 #22
0
 def test_snappy(self):
     for i in compat.xrange(1000):
         s1 = compat.bytes(random_string(100))
         s2 = snappy_decode(snappy_encode(s1))
         self.assertEquals(s1, s2)
예제 #23
0
def test_snappy():
    for i in xrange(1000):
        b1 = random_string(100).encode('utf-8')
        b2 = snappy_decode(snappy_encode(b1))
        assert b1 == b2
예제 #24
0
def test_snappy():
    for i in range(1000):
        b1 = random_string(100).encode('utf-8')
        b2 = snappy_decode(snappy_encode(b1))
        assert b1 == b2
예제 #25
0
 def test_snappy(self):
     for i in xrange(1000):
         s1 = random_string(100)
         s2 = snappy_decode(snappy_encode(s1))
         self.assertEquals(s1, s2)
예제 #26
0
 def test_snappy(self):
     for i in xrange(1000):
         s1 = random_string(100)
         s2 = snappy_decode(snappy_encode(s1))
         self.assertEqual(s1, s2)