Ejemplo n.º 1
0
 def _maybe_compress(self):
     if self._compression_type:
         self._assert_has_codec(self._compression_type)
         data = bytes(self._buffer)
         if self._compression_type == self.CODEC_GZIP:
             compressed = gzip_encode(data)
         elif self._compression_type == self.CODEC_SNAPPY:
             compressed = snappy_encode(data)
         elif self._compression_type == self.CODEC_LZ4:
             if self._magic == 0:
                 compressed = lz4_encode_old_kafka(data)
             else:
                 compressed = lz4_encode(data)
         size = self.size_in_bytes(0,
                                   timestamp=0,
                                   key=None,
                                   value=compressed)
         # We will try to reuse the same buffer if we have enough space
         if size > len(self._buffer):
             self._buffer = bytearray(size)
         else:
             del self._buffer[size:]
         self._encode_msg(start_pos=0,
                          offset=0,
                          timestamp=0,
                          key=None,
                          value=compressed,
                          attributes=self._compression_type)
         return True
     return False
Ejemplo n.º 2
0
def test__unpack_message_set_compressed_v1(fetcher):
    fetcher.config['check_crcs'] = False
    tp = TopicPartition('foo', 0)
    messages = [
        (0, None, Message(b'a')),
        (1, None, Message(b'b')),
        (2, None, Message(b'c')),
    ]
    message_bytes = []
    for offset, _, m in messages:
        encoded = m.encode()
        message_bytes.append(
            Int64.encode(offset) + Int32.encode(len(encoded)) + encoded)
    compressed_bytes = gzip_encode(b''.join(message_bytes))
    compressed_base_offset = 10
    compressed_msgs = [(compressed_base_offset, None,
                        Message(compressed_bytes,
                                magic=1,
                                attributes=Message.CODEC_GZIP))]
    records = list(fetcher._unpack_message_set(tp, compressed_msgs))
    assert len(records) == 3
    assert all(map(lambda x: isinstance(x, ConsumerRecord), records))
    assert records[0].value == b'a'
    assert records[1].value == b'b'
    assert records[2].value == b'c'
    assert records[0].offset == 8
    assert records[1].offset == 9
    assert records[2].offset == 10
Ejemplo n.º 3
0
 def _maybe_compress(self):
     if self._compression_type:
         buf = self._buffer
         if self._compression_type == self.CODEC_GZIP:
             compressed = gzip_encode(buf)
         elif self._compression_type == self.CODEC_SNAPPY:
             compressed = snappy_encode(buf)
         elif self._compression_type == self.CODEC_LZ4:
             if self._magic == 0:
                 compressed = lz4_encode_old_kafka(bytes(buf))
             else:
                 compressed = lz4_encode(bytes(buf))
         compressed_size = len(compressed)
         size = self._size_in_bytes(key_size=0, value_size=compressed_size)
         if size > len(self._buffer):
             self._buffer = bytearray(size)
         else:
             del self._buffer[size:]
         self._encode_msg(self._buffer,
                          offset=0,
                          timestamp=0,
                          key_size=0,
                          key=None,
                          value_size=compressed_size,
                          value=compressed,
                          attributes=self._compression_type)
         self._pos = size
         return True
     return False
Ejemplo n.º 4
0
 def test_gzip(self):
     if not has_gzip():
         return
     for i in xrange(ITERATIONS):
         s1 = random_string()
         s2 = gzip_decode(gzip_encode(s1))
         self.assertEquals(s1, s2)
Ejemplo n.º 5
0
 def test_gzip(self):
     if not has_gzip():
         return
     for i in xrange(ITERATIONS):
         s1 = random_string()
         s2 = gzip_decode(gzip_encode(s1))
         self.assertEquals(s1, s2)
 def _maybe_compress(self):
     if self._compression_type != self.CODEC_NONE:
         self._assert_has_codec(self._compression_type)
         header_size = self.HEADER_STRUCT.size
         data = bytes(self._buffer[header_size:])
         if self._compression_type == self.CODEC_GZIP:
             compressed = gzip_encode(data)
         elif self._compression_type == self.CODEC_SNAPPY:
             compressed = snappy_encode(data)
         elif self._compression_type == self.CODEC_LZ4:
             compressed = lz4_encode(data)
         elif self._compression_type == self.CODEC_ZSTD:
             compressed = zstd_encode(data)
         compressed_size = len(compressed)
         if len(data) <= compressed_size:
             # We did not get any benefit from compression, lets send
             # uncompressed
             return False
         else:
             # Trim bytearray to the required size
             needed_size = header_size + compressed_size
             del self._buffer[needed_size:]
             self._buffer[header_size:needed_size] = compressed
             return True
     return False
Ejemplo n.º 7
0
 def _maybe_compress(self):
     if self._compression_type:
         self._assert_has_codec(self._compression_type)
         data = bytes(self._buffer)
         if self._compression_type == self.CODEC_GZIP:
             compressed = gzip_encode(data)
         elif self._compression_type == self.CODEC_SNAPPY:
             compressed = snappy_encode(data)
         elif self._compression_type == self.CODEC_LZ4:
             if self._magic == 0:
                 compressed = lz4_encode_old_kafka(data)
             else:
                 compressed = lz4_encode(data)
         size = self.size_in_bytes(
             0, timestamp=0, key=None, value=compressed)
         # We will try to reuse the same buffer if we have enough space
         if size > len(self._buffer):
             self._buffer = bytearray(size)
         else:
             del self._buffer[size:]
         self._encode_msg(
             start_pos=0,
             offset=0, timestamp=0, key=None, value=compressed,
             attributes=self._compression_type)
         return True
     return False
Ejemplo n.º 8
0
 def _maybe_compress(self):
     if self._compression_type:
         buf = self._buffer
         if self._compression_type == self.CODEC_GZIP:
             compressed = gzip_encode(buf)
         elif self._compression_type == self.CODEC_SNAPPY:
             compressed = snappy_encode(buf)
         elif self._compression_type == self.CODEC_LZ4:
             if self._magic == 0:
                 compressed = lz4_encode_old_kafka(bytes(buf))
             else:
                 compressed = lz4_encode(bytes(buf))
         compressed_size = len(compressed)
         size = self._size_in_bytes(key_size=0, value_size=compressed_size)
         if size > len(self._buffer):
             self._buffer = bytearray(size)
         else:
             del self._buffer[size:]
         self._encode_msg(
             self._buffer,
             offset=0, timestamp=0, key_size=0, key=None,
             value_size=compressed_size, value=compressed,
             attributes=self._compression_type)
         self._pos = size
         return True
     return False
Ejemplo n.º 9
0
def create_gzip_message(payloads, key=None):
    """
    Construct a Gzipped Message containing multiple Messages

    The given payloads will be encoded, compressed, and sent as a single atomic
    message to Kafka.

    Params
    ======
    payloads: list(bytes), a list of payload to send be sent to Kafka
    key: bytes, a key used for partition routing (optional)
    """
    message_set = KafkaProtocol._encode_message_set(
            [create_message(payload) for payload in payloads])
    gzipped = gzip_encode(message_set) 
    return Message(0, 0x00 | (KafkaProtocol.ATTRIBUTE_CODEC_MASK & KafkaProtocol.CODEC_GZIP), key, gzipped)
Ejemplo n.º 10
0
def create_gzip_message(payloads, key=None, compresslevel=None):
    """
    Construct a Gzipped Message containing multiple Messages

    The given payloads will be encoded, compressed, and sent as a single atomic
    message to Kafka.

    Arguments:
        payloads: list(bytes), a list of payload to send be sent to Kafka
        key: bytes, a key used for partition routing (optional)

    """
    message_set = KafkaProtocol._encode_message_set(
        [create_message(payload, pl_key) for payload, pl_key in payloads])

    gzipped = gzip_encode(message_set, compresslevel=compresslevel)
    codec = ATTRIBUTE_CODEC_MASK & CODEC_GZIP

    return kafka.common.Message(0, 0x00 | codec, key, gzipped)
Ejemplo n.º 11
0
def create_gzip_message(payloads, key=None, compresslevel=None):
    """
    Construct a Gzipped Message containing multiple Messages

    The given payloads will be encoded, compressed, and sent as a single atomic
    message to Kafka.

    Arguments:
        payloads: list(bytes), a list of payload to send be sent to Kafka
        key: bytes, a key used for partition routing (optional)

    """
    message_set = KafkaProtocol._encode_message_set(
        [create_message(payload, pl_key) for payload, pl_key in payloads])

    gzipped = gzip_encode(message_set, compresslevel=compresslevel)
    codec = ATTRIBUTE_CODEC_MASK & CODEC_GZIP

    return kafka.structs.Message(0, 0x00 | codec, key, gzipped)
Ejemplo n.º 12
0
def create_gzip_message(payloads, key=None):
    """
    Construct a Gzipped Message containing multiple Messages

    The given payloads will be encoded, compressed, and sent as a single atomic
    message to Kafka.

    Params
    ======
    payloads: list(bytes), a list of payload to send be sent to Kafka
    key: bytes, a key used for partition routing (optional)
    """
    message_set = KafkaProtocol._encode_message_set(
        [create_message(payload) for payload in payloads])

    gzipped = gzip_encode(message_set)
    codec = KafkaProtocol.ATTRIBUTE_CODEC_MASK & KafkaProtocol.CODEC_GZIP

    return Message(0, 0x00 | codec, key, gzipped)
Ejemplo n.º 13
0
 def _maybe_compress(self):
     if self._compression_type != self.CODEC_NONE:
         header_size = self.HEADER_STRUCT.size
         data = bytes(self._buffer[header_size:])
         if self._compression_type == self.CODEC_GZIP:
             compressed = gzip_encode(data)
         elif self._compression_type == self.CODEC_SNAPPY:
             compressed = snappy_encode(data)
         elif self._compression_type == self.CODEC_LZ4:
             compressed = lz4_encode(data)
         compressed_size = len(compressed)
         if len(data) <= compressed_size:
             # We did not get any benefit from compression, lets send
             # uncompressed
             return False
         else:
             # Trim bytearray to the required size
             needed_size = header_size + compressed_size
             del self._buffer[needed_size:]
             self._buffer[header_size:needed_size] = compressed
             return True
     return False
Ejemplo n.º 14
0
 def test_gzip(self):
     for i in compat.xrange(1000):
         s1 = compat.bytes(random_string(100))
         s2 = gzip_decode(gzip_encode(s1))
         self.assertEquals(s1, s2)
Ejemplo n.º 15
0
 def test_gzip(self):
     for i in xrange(1000):
         b1 = random_string(100).encode('utf-8')
         b2 = gzip_decode(gzip_encode(b1))
         self.assertEqual(b1, b2)
Ejemplo n.º 16
0
def test_gzip():
    for i in range(1000):
        b1 = random_string(100).encode('utf-8')
        b2 = gzip_decode(gzip_encode(b1))
        assert b1 == b2
Ejemplo n.º 17
0
 def test_gzip(self):
     for i in xrange(1000):
         s1 = random_string(100)
         s2 = gzip_decode(gzip_encode(s1))
         self.assertEqual(s1, s2)
Ejemplo n.º 18
0
 def test_gzip(self):
     for i in xrange(1000):
         s1 = random_string(100)
         s2 = gzip_decode(gzip_encode(s1))
         self.assertEquals(s1, s2)
Ejemplo n.º 19
0
def test_gzip():
    for i in xrange(1000):
        b1 = random_string(100).encode('utf-8')
        b2 = gzip_decode(gzip_encode(b1))
        assert b1 == b2
Ejemplo n.º 20
0
 def test_gzip(self):
     for i in xrange(1000):
         b1 = random_string(100).encode('utf-8')
         b2 = gzip_decode(gzip_encode(b1))
         self.assertEqual(b1, b2)