def process(self, data): dst = bytearray() src = StructReader(data) while not src.eof: copy = src.read_byte() for mask in (0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80): if src.eof: break if not copy & mask: dst.append(src.read_byte()) continue elif not dst: raise ValueError('copy requested against empty buffer') with src.be: match_len = src.read_integer(6) + _MATCH_MIN match_pos = src.read_integer(10) if not match_pos or match_pos > len(dst): raise RuntimeError(F'invalid match offset at position {src.tell()}') match_pos = len(dst) - match_pos while match_len > 0: match = dst[match_pos:match_pos + match_len] dst.extend(match) match_pos += len(match) match_len -= len(match) return dst
def test_bitreader_le(self): data = 0b10010100111010100100001111101_11_00000000_0101010101010010010111100000101001010101100000001110010111110100_111_000_100 size, remainder = divmod(data.bit_length(), 8) self.assertEqual(remainder, 0) data = memoryview(data.to_bytes(size, 'little')) sr = StructReader(data) self.assertEqual(sr.read_integer(3), 0b100) self.assertEqual(sr.read_integer(3), 0b000) self.assertEqual(sr.read_integer(3), 0b111) self.assertEqual( sr.u64(), 0b0101010101010010010111100000101001010101100000001110010111110100) self.assertFalse(any(sr.read_flags(8, reverse=True))) self.assertEqual(sr.read_bit(), 1) self.assertRaises(ValueError, lambda: sr.read_struct('')) self.assertEqual(sr.read_bit(), 1) self.assertEqual(sr.read_integer(29), 0b10010100111010100100001111101) self.assertTrue(sr.eof)
def test_bitreader_be(self): data = 0b01010_10011101_0100100001_1111_0111101010000101010101010010010111100000101001010101100000001110010111110100111000_101 size, remainder = divmod(data.bit_length(), 8) self.assertEqual(remainder, 7) data = memoryview(data.to_bytes(size + 1, 'big')) sr = StructReader(data) with sr.be: self.assertEqual(sr.read_bit(), 0) self.assertEqual(sr.read_bit(), 1) self.assertEqual(sr.read_bit(), 0) self.assertEqual(sr.read_bit(), 1) self.assertEqual(sr.read_bit(), 0) self.assertEqual(sr.read_byte(), 0b10011101) self.assertEqual(sr.read_integer(10), 0b100100001) self.assertTrue(all(sr.read_flags(4))) self.assertEqual( sr.read_integer(82), 0b0111101010000101010101010010010111100000101001010101100000001110010111110100111000 ) self.assertRaises(EOF, sr.u16)
def _decompress_xpress_huffman(self, reader: StructReader, writer: MemoryFile, target: Optional[int] = None, max_chunk_size: int = 0x10000) -> None: limit = writer.tell() if target is not None: target += limit while not reader.eof: if reader.remaining_bytes < XPRESS_NUM_SYMBOLS // 2: raise IndexError( F'There are only {reader.remaining_bytes} bytes reamining in the input buffer,' F' but at least {XPRESS_NUM_SYMBOLS//2} are required to read a Huffman table.' ) table = bytearray( reader.read_integer(4) for _ in range(XPRESS_NUM_SYMBOLS)) table = make_huffman_decode_table(table, XPRESS_TABLEBITS, XPRESS_MAX_CODEWORD_LEN) limit = limit + max_chunk_size flags = BitBufferedReader(reader, 16) while True: position = writer.tell() if position == target: if reader.remaining_bytes: self.log_info( F'chunk decompressed with {reader.remaining_bytes} bytes remaining in input buffer' ) return if position >= limit: if position > limit: limit = position self.log_info( F'decompression of one chunk generated more than the limit of {max_chunk_size} bytes' ) flags.collect() break try: sym = flags.huffman_symbol(table, XPRESS_TABLEBITS, XPRESS_MAX_CODEWORD_LEN) except EOFError: self.log_debug('end of file while reading huffman symbol') break if sym < XPRESS_NUM_CHARS: writer.write_byte(sym) continue length = sym & 0xF offsetlog = (sym >> 4) & 0xF flags.collect() if reader.eof: break offset = (1 << offsetlog) | flags.read(offsetlog) if length == 0xF: nudge = reader.read_byte() if nudge < 0xFF: length += nudge else: length = reader.u16() or reader.u32() length += XPRESS_MIN_MATCH_LEN writer.replay(offset, length)