def _maybe_compress(self): if self._compression_type: self._assert_has_codec(self._compression_type) data = bytes(self._buffer) if self._compression_type == self.CODEC_GZIP: compressed = gzip_encode(data) elif self._compression_type == self.CODEC_SNAPPY: compressed = snappy_encode(data) elif self._compression_type == self.CODEC_LZ4: if self._magic == 0: compressed = lz4_encode_old_kafka(data) else: compressed = lz4_encode(data) size = self.size_in_bytes(0, timestamp=0, key=None, value=compressed) # We will try to reuse the same buffer if we have enough space if size > len(self._buffer): self._buffer = bytearray(size) else: del self._buffer[size:] self._encode_msg(start_pos=0, offset=0, timestamp=0, key=None, value=compressed, attributes=self._compression_type) return True return False
def test__unpack_message_set_compressed_v1(fetcher): fetcher.config['check_crcs'] = False tp = TopicPartition('foo', 0) messages = [ (0, None, Message(b'a')), (1, None, Message(b'b')), (2, None, Message(b'c')), ] message_bytes = [] for offset, _, m in messages: encoded = m.encode() message_bytes.append( Int64.encode(offset) + Int32.encode(len(encoded)) + encoded) compressed_bytes = gzip_encode(b''.join(message_bytes)) compressed_base_offset = 10 compressed_msgs = [(compressed_base_offset, None, Message(compressed_bytes, magic=1, attributes=Message.CODEC_GZIP))] records = list(fetcher._unpack_message_set(tp, compressed_msgs)) assert len(records) == 3 assert all(map(lambda x: isinstance(x, ConsumerRecord), records)) assert records[0].value == b'a' assert records[1].value == b'b' assert records[2].value == b'c' assert records[0].offset == 8 assert records[1].offset == 9 assert records[2].offset == 10
def _maybe_compress(self): if self._compression_type: buf = self._buffer if self._compression_type == self.CODEC_GZIP: compressed = gzip_encode(buf) elif self._compression_type == self.CODEC_SNAPPY: compressed = snappy_encode(buf) elif self._compression_type == self.CODEC_LZ4: if self._magic == 0: compressed = lz4_encode_old_kafka(bytes(buf)) else: compressed = lz4_encode(bytes(buf)) compressed_size = len(compressed) size = self._size_in_bytes(key_size=0, value_size=compressed_size) if size > len(self._buffer): self._buffer = bytearray(size) else: del self._buffer[size:] self._encode_msg(self._buffer, offset=0, timestamp=0, key_size=0, key=None, value_size=compressed_size, value=compressed, attributes=self._compression_type) self._pos = size return True return False
def test_gzip(self): if not has_gzip(): return for i in xrange(ITERATIONS): s1 = random_string() s2 = gzip_decode(gzip_encode(s1)) self.assertEquals(s1, s2)
def _maybe_compress(self): if self._compression_type != self.CODEC_NONE: self._assert_has_codec(self._compression_type) header_size = self.HEADER_STRUCT.size data = bytes(self._buffer[header_size:]) if self._compression_type == self.CODEC_GZIP: compressed = gzip_encode(data) elif self._compression_type == self.CODEC_SNAPPY: compressed = snappy_encode(data) elif self._compression_type == self.CODEC_LZ4: compressed = lz4_encode(data) elif self._compression_type == self.CODEC_ZSTD: compressed = zstd_encode(data) compressed_size = len(compressed) if len(data) <= compressed_size: # We did not get any benefit from compression, lets send # uncompressed return False else: # Trim bytearray to the required size needed_size = header_size + compressed_size del self._buffer[needed_size:] self._buffer[header_size:needed_size] = compressed return True return False
def _maybe_compress(self): if self._compression_type: self._assert_has_codec(self._compression_type) data = bytes(self._buffer) if self._compression_type == self.CODEC_GZIP: compressed = gzip_encode(data) elif self._compression_type == self.CODEC_SNAPPY: compressed = snappy_encode(data) elif self._compression_type == self.CODEC_LZ4: if self._magic == 0: compressed = lz4_encode_old_kafka(data) else: compressed = lz4_encode(data) size = self.size_in_bytes( 0, timestamp=0, key=None, value=compressed) # We will try to reuse the same buffer if we have enough space if size > len(self._buffer): self._buffer = bytearray(size) else: del self._buffer[size:] self._encode_msg( start_pos=0, offset=0, timestamp=0, key=None, value=compressed, attributes=self._compression_type) return True return False
def _maybe_compress(self): if self._compression_type: buf = self._buffer if self._compression_type == self.CODEC_GZIP: compressed = gzip_encode(buf) elif self._compression_type == self.CODEC_SNAPPY: compressed = snappy_encode(buf) elif self._compression_type == self.CODEC_LZ4: if self._magic == 0: compressed = lz4_encode_old_kafka(bytes(buf)) else: compressed = lz4_encode(bytes(buf)) compressed_size = len(compressed) size = self._size_in_bytes(key_size=0, value_size=compressed_size) if size > len(self._buffer): self._buffer = bytearray(size) else: del self._buffer[size:] self._encode_msg( self._buffer, offset=0, timestamp=0, key_size=0, key=None, value_size=compressed_size, value=compressed, attributes=self._compression_type) self._pos = size return True return False
def create_gzip_message(payloads, key=None): """ Construct a Gzipped Message containing multiple Messages The given payloads will be encoded, compressed, and sent as a single atomic message to Kafka. Params ====== payloads: list(bytes), a list of payload to send be sent to Kafka key: bytes, a key used for partition routing (optional) """ message_set = KafkaProtocol._encode_message_set( [create_message(payload) for payload in payloads]) gzipped = gzip_encode(message_set) return Message(0, 0x00 | (KafkaProtocol.ATTRIBUTE_CODEC_MASK & KafkaProtocol.CODEC_GZIP), key, gzipped)
def create_gzip_message(payloads, key=None, compresslevel=None): """ Construct a Gzipped Message containing multiple Messages The given payloads will be encoded, compressed, and sent as a single atomic message to Kafka. Arguments: payloads: list(bytes), a list of payload to send be sent to Kafka key: bytes, a key used for partition routing (optional) """ message_set = KafkaProtocol._encode_message_set( [create_message(payload, pl_key) for payload, pl_key in payloads]) gzipped = gzip_encode(message_set, compresslevel=compresslevel) codec = ATTRIBUTE_CODEC_MASK & CODEC_GZIP return kafka.common.Message(0, 0x00 | codec, key, gzipped)
def create_gzip_message(payloads, key=None, compresslevel=None): """ Construct a Gzipped Message containing multiple Messages The given payloads will be encoded, compressed, and sent as a single atomic message to Kafka. Arguments: payloads: list(bytes), a list of payload to send be sent to Kafka key: bytes, a key used for partition routing (optional) """ message_set = KafkaProtocol._encode_message_set( [create_message(payload, pl_key) for payload, pl_key in payloads]) gzipped = gzip_encode(message_set, compresslevel=compresslevel) codec = ATTRIBUTE_CODEC_MASK & CODEC_GZIP return kafka.structs.Message(0, 0x00 | codec, key, gzipped)
def create_gzip_message(payloads, key=None): """ Construct a Gzipped Message containing multiple Messages The given payloads will be encoded, compressed, and sent as a single atomic message to Kafka. Params ====== payloads: list(bytes), a list of payload to send be sent to Kafka key: bytes, a key used for partition routing (optional) """ message_set = KafkaProtocol._encode_message_set( [create_message(payload) for payload in payloads]) gzipped = gzip_encode(message_set) codec = KafkaProtocol.ATTRIBUTE_CODEC_MASK & KafkaProtocol.CODEC_GZIP return Message(0, 0x00 | codec, key, gzipped)
def _maybe_compress(self): if self._compression_type != self.CODEC_NONE: header_size = self.HEADER_STRUCT.size data = bytes(self._buffer[header_size:]) if self._compression_type == self.CODEC_GZIP: compressed = gzip_encode(data) elif self._compression_type == self.CODEC_SNAPPY: compressed = snappy_encode(data) elif self._compression_type == self.CODEC_LZ4: compressed = lz4_encode(data) compressed_size = len(compressed) if len(data) <= compressed_size: # We did not get any benefit from compression, lets send # uncompressed return False else: # Trim bytearray to the required size needed_size = header_size + compressed_size del self._buffer[needed_size:] self._buffer[header_size:needed_size] = compressed return True return False
def test_gzip(self): for i in compat.xrange(1000): s1 = compat.bytes(random_string(100)) s2 = gzip_decode(gzip_encode(s1)) self.assertEquals(s1, s2)
def test_gzip(self): for i in xrange(1000): b1 = random_string(100).encode('utf-8') b2 = gzip_decode(gzip_encode(b1)) self.assertEqual(b1, b2)
def test_gzip(): for i in range(1000): b1 = random_string(100).encode('utf-8') b2 = gzip_decode(gzip_encode(b1)) assert b1 == b2
def test_gzip(self): for i in xrange(1000): s1 = random_string(100) s2 = gzip_decode(gzip_encode(s1)) self.assertEqual(s1, s2)
def test_gzip(self): for i in xrange(1000): s1 = random_string(100) s2 = gzip_decode(gzip_encode(s1)) self.assertEquals(s1, s2)
def test_gzip(): for i in xrange(1000): b1 = random_string(100).encode('utf-8') b2 = gzip_decode(gzip_encode(b1)) assert b1 == b2