def _decompress(cls, data: typing.Iterable[bytes]) -> typing.Iterable[bytes]: with LZ4FrameDecompressor() as decomp: for chunk in data: if not chunk: break yield decomp.decompress(chunk) assert decomp.eof
def process_compressed_mdblob(self, compressed_data, **kwargs): try: with LZ4FrameDecompressor() as decompressor: decompressed_data = decompressor.decompress(compressed_data) unused_data = decompressor.unused_data except RuntimeError as e: self._logger.warning(f"Unable to decompress mdblob: {str(e)}") return [] health_info = None if unused_data: try: health_info = HealthItemsPayload.unpack(unused_data) except Exception as e: # pylint: disable=broad-except # pragma: no cover self._logger.warning(f"Unable to parse health information: {type(e).__name__}: {str(e)}") return self.process_squashed_mdblob(decompressed_data, health_info=health_info, **kwargs)
def Decompress(buffer: bytes, method, decompressSize=0) -> bytes: if method == "Oodle": from ..Oodle import Decompress as OoDecompress result = OoDecompress(buffer=buffer, decompressLength=decompressSize) assert len(result) == decompressSize return result elif method == "Gzip": from gzip import decompress as gDecompress result = gDecompress(buffer) assert len(result) == decompressSize return result elif method == "Zlib": from zlib import decompress as zDecompress result = zDecompress(buffer, bufsize=decompressSize) assert len(result) == decompressSize return result elif method == "LZ4": from lz4.frame import LZ4FrameDecompressor lz4Decompress = LZ4FrameDecompressor().decompress result = lz4Decompress(buffer, max_length=decompressSize) assert len(result) == decompressSize return result else: raise NotImplementedError("Unknown Compression Method " + str(method))
def test_metadata_compressor(): SERIALIZED_METADATA = f"<{'S' * 1000}>".encode('ascii') SERIALIZED_DELETE = f"<{'D' * 100}>".encode('ascii') SERIALIZED_HEALTH = "1,2,1234567890;".encode('ascii') metadata = Mock() metadata.status = NEW metadata.serialized = Mock(return_value=SERIALIZED_METADATA) metadata.serialized_delete = Mock(return_value=SERIALIZED_DELETE) metadata.serialized_health = Mock(return_value=SERIALIZED_HEALTH) def add_items(mc: MetadataCompressor, expected_items_count: int): prev_size = 0 for i in range(1, 1000): item_was_added = mc.put(metadata) if not item_was_added: assert mc.count == i - 1 # last item was not added assert mc.count == expected_items_count # compressor was able to add 10 items only break assert mc.count == i # after the element was successfully added, the count should increase assert mc.size > prev_size # with each item the total size should become bigger prev_size = mc.size else: assert False # too many items was added, something is wrong assert prev_size < mc.chunk_size # total size should fit into the chunk assert not mc.closed result = mc.close() assert mc.closed assert isinstance(result, bytes) assert len(result) == prev_size assert len(result) < len( SERIALIZED_METADATA ) * expected_items_count # our test data should be easy to compress return result # compressing a normal data without a health info mc = MetadataCompressor(200) assert mc.chunk_size == 200 assert not mc.include_health # include_health is False by default assert mc.count == 0 # no items added yet expected_items_count = 10 # chunk of size 200 should be enough to put 10 test items data = add_items(mc, expected_items_count) d = LZ4FrameDecompressor() decompressed = d.decompress(data) assert decompressed == SERIALIZED_METADATA * expected_items_count # check the correctness of the decompressed data unused_data = d.unused_data assert not unused_data # if health info is not included, no unused_data should be placed after the LZ4 frame assert metadata.serialized_health.assert_not_called assert metadata.serialized_delete.assert_not_called # cannot operate on closed MetadataCompressor with pytest.raises(TypeError, match='^Compressor is already closed$'): mc.put(metadata) with pytest.raises(TypeError, match='^Compressor is already closed$'): mc.close() # chunk size is not enough even for a single item mc = MetadataCompressor(10) added = mc.put(metadata) # first item should be added successfully even if the size of compressed item is bigger than the chunk size assert added size = mc.size assert size > mc.chunk_size added = mc.put(metadata) assert not added # second item was not added assert mc.count == 1 assert mc.size == size # size was not changed data = mc.close() d = LZ4FrameDecompressor() decompressed = d.decompress(data) assert decompressed == SERIALIZED_METADATA # include health info mc = MetadataCompressor(200, True) assert mc.include_health expected_items_count = 5 # with health info we can put at most 10 test items into the chunk of size 200 data = add_items(mc, expected_items_count) d = LZ4FrameDecompressor() decompressed = d.decompress(data) assert decompressed == SERIALIZED_METADATA * expected_items_count # check the correctness of the decompressed data unused_data = d.unused_data assert metadata.serialized_health.assert_called assert metadata.serialized_delete.assert_not_called health_items = HealthItemsPayload.unpack(unused_data) assert len(health_items) == expected_items_count for health_item in health_items: assert health_item == (1, 2, 1234567890)