def test_snappy_decode_xerial(self): header = b"\x82SNAPPY\x00\x00\x00\x00\x01\x00\x00\x00\x01" random_snappy = snappy_encode(b"SNAPPY" * 50) block_len = len(random_snappy) random_snappy2 = snappy_encode(b"XERIAL" * 50) block_len2 = len(random_snappy2) to_test = header + struct.pack("!i", block_len) + random_snappy + struct.pack("!i", block_len2) + random_snappy2 self.assertEquals(snappy_decode(to_test), (b"SNAPPY" * 50) + (b"XERIAL" * 50))
def test_snappy_decode_xerial(): header = b'\x82SNAPPY\x00\x00\x00\x00\x01\x00\x00\x00\x01' random_snappy = snappy_encode(b'SNAPPY' * 50, xerial_compatible=False) block_len = len(random_snappy) random_snappy2 = snappy_encode(b'XERIAL' * 50, xerial_compatible=False) block_len2 = len(random_snappy2) to_test = header \ + struct.pack('!i', block_len) + random_snappy \ + struct.pack('!i', block_len2) + random_snappy2 \ assert snappy_decode(to_test) == (b'SNAPPY' * 50) + (b'XERIAL' * 50)
def test_snappy_decode_xerial(self): header = b'\x82SNAPPY\x00\x00\x00\x00\x01\x00\x00\x00\x01' random_snappy = snappy_encode('SNAPPY' * 50) block_len = len(random_snappy) random_snappy2 = snappy_encode('XERIAL' * 50) block_len2 = len(random_snappy2) to_test = header \ + struct.pack('!i', block_len) + random_snappy \ + struct.pack('!i', block_len2) + random_snappy2 \ self.assertEquals(snappy_decode(to_test), ('SNAPPY' * 50) + ('XERIAL' * 50))
def test_snappy_decode_xerial(self): header = b'\x82SNAPPY\x00\x00\x00\x00\x01\x00\x00\x00\x01' random_snappy = snappy_encode(b'SNAPPY' * 50) block_len = len(random_snappy) random_snappy2 = snappy_encode(b'XERIAL' * 50) block_len2 = len(random_snappy2) to_test = header \ + struct.pack('!i', block_len) + random_snappy \ + struct.pack('!i', block_len2) + random_snappy2 \ self.assertEquals(snappy_decode(to_test), (b'SNAPPY' * 50) + (b'XERIAL' * 50))
def _maybe_compress(self): if self._compression_type: buf = self._buffer if self._compression_type == self.CODEC_GZIP: compressed = gzip_encode(buf) elif self._compression_type == self.CODEC_SNAPPY: compressed = snappy_encode(buf) elif self._compression_type == self.CODEC_LZ4: if self._magic == 0: compressed = lz4_encode_old_kafka(bytes(buf)) else: compressed = lz4_encode(bytes(buf)) compressed_size = len(compressed) size = self._size_in_bytes(key_size=0, value_size=compressed_size) if size > len(self._buffer): self._buffer = bytearray(size) else: del self._buffer[size:] self._encode_msg( self._buffer, offset=0, timestamp=0, key_size=0, key=None, value_size=compressed_size, value=compressed, attributes=self._compression_type) self._pos = size return True return False
def _maybe_compress(self): if self._compression_type: self._assert_has_codec(self._compression_type) data = bytes(self._buffer) if self._compression_type == self.CODEC_GZIP: compressed = gzip_encode(data) elif self._compression_type == self.CODEC_SNAPPY: compressed = snappy_encode(data) elif self._compression_type == self.CODEC_LZ4: if self._magic == 0: compressed = lz4_encode_old_kafka(data) else: compressed = lz4_encode(data) size = self.size_in_bytes(0, timestamp=0, key=None, value=compressed) # We will try to reuse the same buffer if we have enough space if size > len(self._buffer): self._buffer = bytearray(size) else: del self._buffer[size:] self._encode_msg(start_pos=0, offset=0, timestamp=0, key=None, value=compressed, attributes=self._compression_type) return True return False
def test_snappy(self): if not has_snappy(): return for i in xrange(ITERATIONS): s1 = random_string() s2 = snappy_decode(snappy_encode(s1)) self.assertEquals(s1, s2)
def _maybe_compress(self): if self._compression_type != self.CODEC_NONE: self._assert_has_codec(self._compression_type) header_size = self.HEADER_STRUCT.size data = bytes(self._buffer[header_size:]) if self._compression_type == self.CODEC_GZIP: compressed = gzip_encode(data) elif self._compression_type == self.CODEC_SNAPPY: compressed = snappy_encode(data) elif self._compression_type == self.CODEC_LZ4: compressed = lz4_encode(data) elif self._compression_type == self.CODEC_ZSTD: compressed = zstd_encode(data) compressed_size = len(compressed) if len(data) <= compressed_size: # We did not get any benefit from compression, lets send # uncompressed return False else: # Trim bytearray to the required size needed_size = header_size + compressed_size del self._buffer[needed_size:] self._buffer[header_size:needed_size] = compressed return True return False
def _maybe_compress(self): if self._compression_type: self._assert_has_codec(self._compression_type) data = bytes(self._buffer) if self._compression_type == self.CODEC_GZIP: compressed = gzip_encode(data) elif self._compression_type == self.CODEC_SNAPPY: compressed = snappy_encode(data) elif self._compression_type == self.CODEC_LZ4: if self._magic == 0: compressed = lz4_encode_old_kafka(data) else: compressed = lz4_encode(data) size = self.size_in_bytes( 0, timestamp=0, key=None, value=compressed) # We will try to reuse the same buffer if we have enough space if size > len(self._buffer): self._buffer = bytearray(size) else: del self._buffer[size:] self._encode_msg( start_pos=0, offset=0, timestamp=0, key=None, value=compressed, attributes=self._compression_type) return True return False
def _maybe_compress(self): if self._compression_type: buf = self._buffer if self._compression_type == self.CODEC_GZIP: compressed = gzip_encode(buf) elif self._compression_type == self.CODEC_SNAPPY: compressed = snappy_encode(buf) elif self._compression_type == self.CODEC_LZ4: if self._magic == 0: compressed = lz4_encode_old_kafka(bytes(buf)) else: compressed = lz4_encode(bytes(buf)) compressed_size = len(compressed) size = self._size_in_bytes(key_size=0, value_size=compressed_size) if size > len(self._buffer): self._buffer = bytearray(size) else: del self._buffer[size:] self._encode_msg(self._buffer, offset=0, timestamp=0, key_size=0, key=None, value_size=compressed_size, value=compressed, attributes=self._compression_type) self._pos = size return True return False
def test_snappy_detect_xerial(): import kafka as kafka1 _detect_xerial_stream = kafka1.codec._detect_xerial_stream header = b'\x82SNAPPY\x00\x00\x00\x00\x01\x00\x00\x00\x01Some extra bytes' false_header = b'\x01SNAPPY\x00\x00\x00\x01\x00\x00\x00\x01' default_snappy = snappy_encode(b'foobar' * 50) random_snappy = snappy_encode(b'SNAPPY' * 50, xerial_compatible=False) short_data = b'\x01\x02\x03\x04' assert _detect_xerial_stream(header) is True assert _detect_xerial_stream(b'') is False assert _detect_xerial_stream(b'\x00') is False assert _detect_xerial_stream(false_header) is False assert _detect_xerial_stream(default_snappy) is True assert _detect_xerial_stream(random_snappy) is False assert _detect_xerial_stream(short_data) is False
def test_snappy_encode_xerial(self): to_ensure = b'\x82SNAPPY\x00\x00\x00\x00\x01\x00\x00\x00\x01' + \ '\x00\x00\x00\x18' + \ '\xac\x02\x14SNAPPY\xfe\x06\x00\xfe\x06\x00\xfe\x06\x00\xfe\x06\x00\x96\x06\x00' + \ '\x00\x00\x00\x18' + \ '\xac\x02\x14XERIAL\xfe\x06\x00\xfe\x06\x00\xfe\x06\x00\xfe\x06\x00\x96\x06\x00' to_test = ('SNAPPY' * 50) + ('XERIAL' * 50) compressed = snappy_encode(to_test, xerial_compatible=True, xerial_blocksize=300) self.assertEquals(compressed, to_ensure)
def test_snappy_encode_xerial(): to_ensure = ( b'\x82SNAPPY\x00\x00\x00\x00\x01\x00\x00\x00\x01' b'\x00\x00\x00\x18' b'\xac\x02\x14SNAPPY\xfe\x06\x00\xfe\x06\x00\xfe\x06\x00\xfe\x06\x00\x96\x06\x00' b'\x00\x00\x00\x18' b'\xac\x02\x14XERIAL\xfe\x06\x00\xfe\x06\x00\xfe\x06\x00\xfe\x06\x00\x96\x06\x00' ) to_test = (b'SNAPPY' * 50) + (b'XERIAL' * 50) compressed = snappy_encode(to_test, xerial_compatible=True, xerial_blocksize=300) assert compressed == to_ensure
def test_snappy_encode_xerial(self): to_ensure = ( b"\x82SNAPPY\x00\x00\x00\x00\x01\x00\x00\x00\x01" b"\x00\x00\x00\x18" b"\xac\x02\x14SNAPPY\xfe\x06\x00\xfe\x06\x00\xfe\x06\x00\xfe\x06\x00\x96\x06\x00" b"\x00\x00\x00\x18" b"\xac\x02\x14XERIAL\xfe\x06\x00\xfe\x06\x00\xfe\x06\x00\xfe\x06\x00\x96\x06\x00" ) to_test = (b"SNAPPY" * 50) + (b"XERIAL" * 50) compressed = snappy_encode(to_test, xerial_compatible=True, xerial_blocksize=300) self.assertEquals(compressed, to_ensure)
def test_snappy_encode_xerial(self): to_ensure = b'\x82SNAPPY\x00\x00\x00\x00\x01\x00\x00\x00\x01' + \ b'\x00\x00\x00\x18' + \ b'\xac\x02\x14SNAPPY\xfe\x06\x00\xfe\x06\x00\xfe\x06\x00\xfe\x06\x00\x96\x06\x00' + \ b'\x00\x00\x00\x18' + \ b'\xac\x02\x14XERIAL\xfe\x06\x00\xfe\x06\x00\xfe\x06\x00\xfe\x06\x00\x96\x06\x00' to_test = (b'SNAPPY' * 50) + (b'XERIAL' * 50) compressed = snappy_encode(to_test, xerial_compatible=True, xerial_blocksize=300) self.assertEquals(compressed, to_ensure)
def test_snappy_detect_xerial(self): import kafka as kafka1 _detect_xerial_stream = kafka1.codec._detect_xerial_stream header = b'\x82SNAPPY\x00\x00\x00\x00\x01\x00\x00\x00\x01Some extra bytes' false_header = b'\x01SNAPPY\x00\x00\x00\x01\x00\x00\x00\x01' random_snappy = snappy_encode('SNAPPY' * 50) short_data = b'\x01\x02\x03\x04' self.assertTrue(_detect_xerial_stream(header)) self.assertFalse(_detect_xerial_stream(b'')) self.assertFalse(_detect_xerial_stream(b'\x00')) self.assertFalse(_detect_xerial_stream(false_header)) self.assertFalse(_detect_xerial_stream(random_snappy)) self.assertFalse(_detect_xerial_stream(short_data))
def test_snappy_detect_xerial(self): import kafka as kafka1 _detect_xerial_stream = kafka1.codec._detect_xerial_stream header = b'\x82SNAPPY\x00\x00\x00\x00\x01\x00\x00\x00\x01Some extra bytes' false_header = b'\x01SNAPPY\x00\x00\x00\x01\x00\x00\x00\x01' random_snappy = snappy_encode(b'SNAPPY' * 50) short_data = b'\x01\x02\x03\x04' self.assertTrue(_detect_xerial_stream(header)) self.assertFalse(_detect_xerial_stream(b'')) self.assertFalse(_detect_xerial_stream(b'\x00')) self.assertFalse(_detect_xerial_stream(false_header)) self.assertFalse(_detect_xerial_stream(random_snappy)) self.assertFalse(_detect_xerial_stream(short_data))
def create_snappy_message(payloads, key=None): """ Construct a Snappy Message containing multiple Messages The given payloads will be encoded, compressed, and sent as a single atomic message to Kafka. Params ====== payloads: list(bytes), a list of payload to send be sent to Kafka key: bytes, a key used for partition routing (optional) """ message_set = KafkaProtocol._encode_message_set( [create_message(payload) for payload in payloads]) snapped = snappy_encode(message_set) return Message(0, 0x00 | (KafkaProtocol.ATTRIBUTE_CODEC_MASK & KafkaProtocol.CODEC_SNAPPY), key, snapped)
def test_snappy_detect_xerial(self): import kafka as kafka1 _detect_xerial_stream = kafka1.codec._detect_xerial_stream header = b"\x82SNAPPY\x00\x00\x00\x00\x01\x00\x00\x00\x01Some extra bytes" false_header = b"\x01SNAPPY\x00\x00\x00\x01\x00\x00\x00\x01" random_snappy = snappy_encode(b"SNAPPY" * 50) short_data = b"\x01\x02\x03\x04" self.assertTrue(_detect_xerial_stream(header)) self.assertFalse(_detect_xerial_stream(b"")) self.assertFalse(_detect_xerial_stream(b"\x00")) self.assertFalse(_detect_xerial_stream(false_header)) self.assertFalse(_detect_xerial_stream(random_snappy)) self.assertFalse(_detect_xerial_stream(short_data))
def create_snappy_message(payloads, key=None): """ Construct a Snappy Message containing multiple Messages The given payloads will be encoded, compressed, and sent as a single atomic message to Kafka. Params ====== payloads: list(bytes), a list of payload to send be sent to Kafka key: bytes, a key used for partition routing (optional) """ message_set = KafkaProtocol._encode_message_set( [create_message(payload) for payload in payloads]) snapped = snappy_encode(message_set) codec = KafkaProtocol.ATTRIBUTE_CODEC_MASK & KafkaProtocol.CODEC_SNAPPY return Message(0, 0x00 | codec, key, snapped)
def create_snappy_message(payloads, key=None): """ Construct a Snappy Message containing multiple Messages The given payloads will be encoded, compressed, and sent as a single atomic message to Kafka. Arguments: payloads: list(bytes), a list of payload to send be sent to Kafka key: bytes, a key used for partition routing (optional) """ message_set = KafkaProtocol._encode_message_set( [create_message(payload, pl_key) for payload, pl_key in payloads]) snapped = snappy_encode(message_set) codec = ATTRIBUTE_CODEC_MASK & CODEC_SNAPPY return kafka.structs.Message(0, 0x00 | codec, key, snapped)
def _maybe_compress(self): if self._compression_type != self.CODEC_NONE: header_size = self.HEADER_STRUCT.size data = bytes(self._buffer[header_size:]) if self._compression_type == self.CODEC_GZIP: compressed = gzip_encode(data) elif self._compression_type == self.CODEC_SNAPPY: compressed = snappy_encode(data) elif self._compression_type == self.CODEC_LZ4: compressed = lz4_encode(data) compressed_size = len(compressed) if len(data) <= compressed_size: # We did not get any benefit from compression, lets send # uncompressed return False else: # Trim bytearray to the required size needed_size = header_size + compressed_size del self._buffer[needed_size:] self._buffer[header_size:needed_size] = compressed return True return False
def test_snappy(self): for i in xrange(1000): b1 = random_string(100).encode('utf-8') b2 = snappy_decode(snappy_encode(b1)) self.assertEqual(b1, b2)
def test_snappy(): for i in xrange(1000): b1 = random_string(100).encode('utf-8') b2 = snappy_decode(snappy_encode(b1)) assert b1 == b2
def test_snappy(self): for i in xrange(1000): s1 = random_string(100) s2 = snappy_decode(snappy_encode(s1)) self.assertEquals(s1, s2)
def test_snappy(self): for i in compat.xrange(1000): s1 = compat.bytes(random_string(100)) s2 = snappy_decode(snappy_encode(s1)) self.assertEquals(s1, s2)
def test_snappy(self): for i in xrange(1000): s1 = random_string(100) s2 = snappy_decode(snappy_encode(s1)) self.assertEqual(s1, s2)
def test_snappy(): for i in range(1000): b1 = random_string(100).encode('utf-8') b2 = snappy_decode(snappy_encode(b1)) assert b1 == b2