def _read_strings(self, reader: StructReader, size: int, offset: int) -> Generator[str, None, None]: def uleb128(): value = 0 more = True for k in range(0, 35, 7): limb = reader.read_integer(7) more = reader.read_bit() value |= limb << k if not more: break assert not more return value with StreamDetour(reader, offset): offsets = [reader.u32() for _ in range(size)] for offset in offsets: reader.seek(offset) size = uleb128() if not size: continue data = reader.read_c_string() string = JvClassFile.decode_utf8m(data) if len(string) != size: raise RuntimeError( F'Read string of length {len(string)}, expected length {size}.' ) yield string
def __init__(self, reader: StructReader): reader.bigendian = True entry_start_offset = reader.tell() self.size_of_entry = reader.i32() self.offset = reader.i32() self.size_of_compressed_data = reader.i32() self.size_od_uncompressed_data = reader.i32() self.is_compressed = bool(reader.read_byte()) entry_type = bytes(reader.read(1)) name_length = self.size_of_entry - reader.tell() + entry_start_offset if name_length > 0x1000: raise RuntimeError( F'Refusing to process TOC entry with name of size {name_length}.' ) name, *_ = bytes(reader.read(name_length)).partition(B'\0') try: name = name.decode('utf8', 'backslashreplace') except Exception: name = None if not all(part.isprintable() for part in re.split('\\s*', name)): raise RuntimeError( 'Refusing to process TOC entry with non-printable name.') name = name or str(uuid.uuid4()) if entry_type == B'Z': entry_type = B'z' try: self.type = PiType(entry_type) except ValueError: xtpyi.logger.error(F'unknown type {entry_type!r} in field {name}') self.type = PiType.UNKNOWN self.name = name
def _decompress_mszip(self, reader: StructReader, writer: MemoryFile, target: Optional[int] = None): header = bytes(reader.read(2)) if header != B'CK': raise ValueError( F'chunk did not begin with CK header, got {header!r} instead') decompress = zlib.decompressobj(-zlib.MAX_WBITS, zdict=writer.getbuffer()) writer.write(decompress.decompress(reader.read())) writer.write(decompress.flush())
def _extract_ole(self, data: bytearray) -> str: stream = MemoryFile(data) with self._olefile.OleFileIO(stream) as ole: doc = ole.openstream('WordDocument').read() with StructReader(doc) as reader: table_name = F'{(doc[11]>>1)&1}Table' reader.seek(0x1A2) offset = reader.u32() length = reader.u32() with StructReader(ole.openstream(table_name).read()) as reader: reader.seek(offset) table = reader.read(length) piece_table = self._load_piece_table(table) return self._get_text(doc, piece_table)
def process(self, data: bytearray): with MemoryFile() as output, StructReader(data) as reader: if reader.read(2) != B'JC': self.log_warn( 'data does not begin with magic sequence, assuming that header is missing' ) reader.seek(0) size = checksum = None else: size = reader.u32() checksum = reader.u32() if self.args.ignore_header: size = None self._decompress(output, reader, size) if size is not None: if len(output) > size: self.log_info(F'tuncating to size {size}') output.truncate(size) elif len(output) < size: self.log_warn( F'header size was {size}, but only {len(data)} bytes were decompressed' ) data = output.getvalue() if checksum: c = self._checksum(data) if c != checksum: self.log_warn( F'header checksum was {checksum:08X}, computed value is {c:08X}' ) return data
def __init__(self, reader: StructReader): if reader.read(4) != self.SIGNATURE: raise ValueError self.disk_number = reader.u16() self.start_disk_number = reader.u16() self.entries_on_disk = reader.u16() self.entries_in_directory = reader.u16() self.directory_size = reader.u32() self.directory_offset = reader.u32() try: cl = reader.u32() self.comment = cl and reader.read(cl) or None except EOFError: self.comment = None
def __init__(self, buffer: Union[bytearray, StructReader], bits_per_read: int = 32): if not isinstance(buffer, StructReader): buffer = StructReader(memoryview(buffer), bigendian=False) self._reader: StructReader[memoryview] = buffer self._bit_buffer_data: int = 0 self._bit_buffer_size: int = 0 self._bits_per_read = bits_per_read
def test_bitreader_le(self): data = 0b10010100111010100100001111101_11_00000000_0101010101010010010111100000101001010101100000001110010111110100_111_000_100 size, remainder = divmod(data.bit_length(), 8) self.assertEqual(remainder, 0) data = memoryview(data.to_bytes(size, 'little')) sr = StructReader(data) self.assertEqual(sr.read_integer(3), 0b100) self.assertEqual(sr.read_integer(3), 0b000) self.assertEqual(sr.read_integer(3), 0b111) self.assertEqual( sr.u64(), 0b0101010101010010010111100000101001010101100000001110010111110100) self.assertFalse(any(sr.read_flags(8, reverse=True))) self.assertEqual(sr.read_bit(), 1) self.assertRaises(ValueError, lambda: sr.read_struct('')) self.assertEqual(sr.read_bit(), 1) self.assertEqual(sr.read_integer(29), 0b10010100111010100100001111101) self.assertTrue(sr.eof)
def unpack(self, data): def cpio(): with suppress(EOF): return CPIOEntry(reader) reader = StructReader(memoryview(data)) for entry in iter(cpio, None): if entry.name == 'TRAILER!!!': break yield self._pack(entry.name, entry.mtime, entry.data)
def __init__(self, reader: StructReader, version: str): reader.bigendian = True self.base = reader.tell() signature = reader.read(4) if signature != self.MagicSignature: raise ValueError('invalid magic') magic = bytes(reader.read(4)) with contextlib.suppress(KeyError): version = xtpyi._xdis.magics.versions[magic] vtuple = version2tuple(version) padding_size = 4 if vtuple >= (3, 3): padding_size += 4 if vtuple >= (3, 7): padding_size += 4 self.version = version self.magic = magic + padding_size * b'\0' self.toc_offset = reader.i32() self.reader = reader self.entries: List[PiMeta] = []
def _load_piece_table(self, table: bytes) -> bytes: with StructReader(table) as reader: while not reader.eof: entry_type = reader.read_byte() if entry_type == 1: reader.seekrel(reader.read_byte()) continue if entry_type == 2: length = reader.u32() return reader.read(length) raise NotImplementedError( F'Unsupported table entry type value 0x{entry_type:X}.')
def process(self, data): dst = bytearray() src = StructReader(data) while not src.eof: copy = src.read_byte() for mask in (0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80): if src.eof: break if not copy & mask: dst.append(src.read_byte()) continue elif not dst: raise ValueError('copy requested against empty buffer') with src.be: match_len = src.read_integer(6) + _MATCH_MIN match_pos = src.read_integer(10) if not match_pos or match_pos > len(dst): raise RuntimeError(F'invalid match offset at position {src.tell()}') match_pos = len(dst) - match_pos while match_len > 0: match = dst[match_pos:match_pos + match_len] dst.extend(match) match_pos += len(match) match_len -= len(match) return dst
def __init__(self, reader: StructReader): reader.bigendian = True if reader.read(4).hex() != 'cafebabe': raise ValueError('class file magic missing.') minor = reader.u16() major = reader.u16() self.version = (major, minor) self.pool: List[Union[Struct, int, float, str]] = [] self._read_pool(reader) self.strings: List[str] = { s.value for s in self.pool if isinstance(s, Struct) and s.tag == JvConstType.String} self.access = JvAccessFlags(reader) self.this = reader.u16() self.parent = reader.u16() try: self.interfaces = [self.pool[reader.u16()] for _ in range(reader.u16())] except IndexError: raise ValueError('Failed parsing Interfaces.') try: self.fields = [JvClassMember(reader, pool=self.pool) for _ in range(reader.u16())] except IndexError: raise ValueError('Failed parsing Fields.') try: self.methods = [JvClassMember(reader, pool=self.pool) for _ in range(reader.u16())] except IndexError: raise ValueError('Failed parsing Methods.') try: self.attributes = [JvAttribute(reader, pool=self.pool) for _ in range(reader.u16())] except IndexError: raise ValueError('Failed parsing Attributes.')
def __init__(self, reader: StructReader): reader.bigendian = True self.max_stack = reader.u16() self.max_locals = reader.u16() self.disassembly: List[JvOpCode] = [] with StructReader(reader.read(reader.u32())) as code: code.bigendian = True while not code.eof: self.disassembly.append(JvOpCode(code, pool=self.pool)) self.exceptions = [JvException(reader) for _ in range(reader.u16())] self.attributes = [JvAttribute(reader) for _ in range(reader.u16())]
def test_bitreader_be(self): data = 0b01010_10011101_0100100001_1111_0111101010000101010101010010010111100000101001010101100000001110010111110100111000_101 size, remainder = divmod(data.bit_length(), 8) self.assertEqual(remainder, 7) data = memoryview(data.to_bytes(size + 1, 'big')) sr = StructReader(data) with sr.be: self.assertEqual(sr.read_bit(), 0) self.assertEqual(sr.read_bit(), 1) self.assertEqual(sr.read_bit(), 0) self.assertEqual(sr.read_bit(), 1) self.assertEqual(sr.read_bit(), 0) self.assertEqual(sr.read_byte(), 0b10011101) self.assertEqual(sr.read_integer(10), 0b100100001) self.assertTrue(all(sr.read_flags(4))) self.assertEqual( sr.read_integer(82), 0b0111101010000101010101010010010111100000101001010101100000001110010111110100111000 ) self.assertRaises(EOF, sr.u16)
def __init__(self, reader: StructReader): ( self.MODULE, # 0x8000 self.ENUM, # 0x4000 self.ANNOTATION, # 0x2000 self.SYNTHETIC, # 0x1000 _, # ... self.ABSTRACT, # 0x0400 self.INTERFACE, # 0x0200 _, _, _, # ... self.SUPER, # 0x0020 self.FINAL, # 0x0010 _, _, _, # ... self.PUBLIC, # 0x0001 ) = reader.read_flags(16)
def _read_libname(self, reader: StructReader) -> Optional[str]: position = reader.tell() try: libname, t, rest = reader.read_bytes(64).partition(B'\0') except EOF: reader.seekset(position) return None try: libname = libname.decode('utf8') except Exception: reader.seekset(position) return None if not t or any(rest) or len(rest) < 10 or not re.fullmatch( R'[\s!-~]+', libname): reader.seekset(position) return None return libname
def process(self, data): with StructReader(data) as archive: if archive.read(8) != b'SZDD\x88\xF0\x27\x33': if not self.args.lenient: raise ValueError('signature missing') self.log_fail( 'the header signature is invalid, this is likely not an SZDD archive' ) if archive.read_byte() != 0x41: raise ValueError('Unsupported compression mode') # ignore the missing file extension letter: archive.seekrel(1) output_len = archive.u32() window_pos = 0x1000 - 0x10 output_pos = 0 output = bytearray(output_len) window = bytearray(0x1000) for k in range(len(window)): window[k] = 0x20 while not archive.eof: control = archive.read_byte() for cb in (0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80): if archive.eof: break if control & cb: output[output_pos] = window[ window_pos] = archive.read_byte() output_pos += 1 window_pos += 1 window_pos &= 0xFFF else: match_pos = archive.read_byte() match_len = archive.read_byte() match_pos |= (match_len & 0xF0) << 4 match_len = (match_len & 0x0F) + 3 match_pos &= 0xFFF for _ in range(match_len): window[window_pos] = window[match_pos] output[output_pos] = window[window_pos] output_pos += 1 window_pos += 1 match_pos += 1 window_pos &= 0xFFF match_pos &= 0xFFF return output
def process(self, data: bytearray): view = memoryview(data) with MemoryFile() as output, StructReader(view) as reader: for k in count(1): if reader.eof: break trailing_size = len(data) - reader.tell() try: ID, VN, DS = reader.read_struct('4sBB') if ID != B'LZIP': if k > 1: raise EOF else: self.log_warn(F'ignoring invalid LZIP signature: {ID.hex()}') if VN != 1: self.log_warn(F'ignoring invalid LZIP version: {VN}') dict_size = 1 << (DS & 0x1F) dict_size -= (dict_size // 16) * ((DS >> 5) & 7) if dict_size not in range(_MIN_DICT_SIZE, _MAX_DICT_SIZE + 1): raise ValueError( F'The dictionary size {dict_size} is out of the valid range ' F'[{_MIN_DICT_SIZE}, {_MAX_DICT_SIZE}]; unable to proceed.' ) decoder = MemberDecoder(dict_size, reader, output) if not decoder(): raise ValueError(F'Data error in stream {k}.') crc32, data_size, member_size = reader.read_struct('<LQQ') if crc32 != decoder.crc32: self.log_warn(F'checksum in stream {k} was {decoder.crc:08X}, should have been {crc32:08X}.') if member_size - 20 != decoder.member_position: self.log_warn(F'member size in stream {k} was {decoder.member_position}, should have been {member_size}.') if data_size != decoder.data_position: self.log_warn(F'data size in stream {k} was {decoder.data_position}, should have been {data_size}.') except EOF: if k <= 1: raise self.log_info(F'silently ignoring {trailing_size} bytes of trailing data') break return output.getvalue()
def _get_text(self, doc: bytes, piece_table: bytes) -> str: piece_count: int = 1 + (len(piece_table) - 4) // 12 with StringIO() as text: with StructReader(piece_table) as reader: character_positions = [ reader.u32() for _ in range(piece_count) ] for i in range(piece_count - 1): cp_start = character_positions[i] cp_end = character_positions[i + 1] fc_value = reader.read_struct('xxLxx', unwrap=True) is_ansi = bool((fc_value >> 30) & 1) fc = fc_value & 0xBFFFFFFF cb = cp_end - cp_start if is_ansi: encoding = 'cp1252' fc = fc // 2 else: encoding = 'utf16' cb *= 2 raw = doc[fc:fc + cb] text.write(raw.decode(encoding).replace('\r', '\n')) return text.getvalue()
def __init__(self, reader: StructReader): def readint(length: int): return int(bytes(reader.read(length * 2)), 16) self.signature = reader.read(6) if self.signature != b'070701': raise ValueError('invalid CPIO header signature') self.inode = readint(4) self.mode = readint(4) self.uid = readint(4) self.gid = readint(4) self.nlinks = readint(4) mtime = readint(4) self.mtime = datetime.utcfromtimestamp(mtime) self.size = readint(4) self.dev = readint(4), readint(4) self.rdev = readint(4), readint(4) namesize = readint(4) self.checksum = readint(4) self.name = bytes(reader.read(namesize)).decode('ascii').rstrip('\0') reader.byte_align(4) self.data = reader.read(self.size) reader.byte_align(4)
def _decompress_xpress(self, reader: StructReader, writer: MemoryFile, target: Optional[int] = None) -> bytearray: if target is not None: target += writer.tell() flags = BitBufferedReader(reader) nibble_cache = None while not reader.eof: if target is not None and writer.tell() >= target: return if not flags.next(): writer.write(reader.read(1)) continue offset, length = divmod(reader.u16(), 8) offset += 1 if length == 7: length = nibble_cache if length is None: length_pair = reader.u8() nibble_cache = length_pair >> 4 length = length_pair & 0xF else: nibble_cache = None if length == 15: length = reader.u8() if length == 0xFF: length = reader.u16() or reader.u32() length -= 22 if length < 0: raise RuntimeError( F'Invalid match length of {length} for long delta sequence' ) length += 15 length += 7 length += 3 writer.replay(offset, length)
def _decompress_xpress_huffman(self, reader: StructReader, writer: MemoryFile, target: Optional[int] = None, max_chunk_size: int = 0x10000) -> None: limit = writer.tell() if target is not None: target += limit while not reader.eof: if reader.remaining_bytes < XPRESS_NUM_SYMBOLS // 2: raise IndexError( F'There are only {reader.remaining_bytes} bytes reamining in the input buffer,' F' but at least {XPRESS_NUM_SYMBOLS//2} are required to read a Huffman table.' ) table = bytearray( reader.read_integer(4) for _ in range(XPRESS_NUM_SYMBOLS)) table = make_huffman_decode_table(table, XPRESS_TABLEBITS, XPRESS_MAX_CODEWORD_LEN) limit = limit + max_chunk_size flags = BitBufferedReader(reader, 16) while True: position = writer.tell() if position == target: if reader.remaining_bytes: self.log_info( F'chunk decompressed with {reader.remaining_bytes} bytes remaining in input buffer' ) return if position >= limit: if position > limit: limit = position self.log_info( F'decompression of one chunk generated more than the limit of {max_chunk_size} bytes' ) flags.collect() break try: sym = flags.huffman_symbol(table, XPRESS_TABLEBITS, XPRESS_MAX_CODEWORD_LEN) except EOFError: self.log_debug('end of file while reading huffman symbol') break if sym < XPRESS_NUM_CHARS: writer.write_byte(sym) continue length = sym & 0xF offsetlog = (sym >> 4) & 0xF flags.collect() if reader.eof: break offset = (1 << offsetlog) | flags.read(offsetlog) if length == 0xF: nudge = reader.read_byte() if nudge < 0xFF: length += nudge else: length = reader.u16() or reader.u32() length += XPRESS_MIN_MATCH_LEN writer.replay(offset, length)
def test_bitreader_structured(self): items = ( 0b1100101, # noqa -0x1337, # noqa 0xDEFACED, # noqa 0xC0CAC01A, # noqa -0o1337, # noqa 2076.171875, # noqa math.pi # noqa ) data = struct.pack('<bhiLqfd', *items) sr = StructReader(data) self.assertEqual(sr.read_nibble(), 0b101) self.assertRaises(sr.Unaligned, lambda: sr.read_exactly(2)) sr.seek(0) self.assertEqual(sr.read_byte(), 0b1100101) self.assertEqual(sr.i16(), -0x1337) self.assertEqual(sr.i32(), 0xDEFACED) self.assertEqual(sr.u32(), 0xC0CAC01A) self.assertEqual(sr.i64(), -0o1337) self.assertAlmostEqual(sr.read_struct('f', True), 2076.171875) self.assertAlmostEqual(sr.read_struct('d', True), math.pi) self.assertTrue(sr.eof)
class blz(Unit): """ BriefLZ compression and decompression. The compression algorithm uses a pure Python suffix tree implementation: It requires a lot of time & memory. """ def _begin(self, data): self._src = StructReader(memoryview(data)) self._dst = MemoryFile(bytearray()) return self def _reset(self): self._src.seek(0) self._dst.seek(0) self._dst.truncate() return self def _decompress(self): ( signature, version, src_count, src_crc32, dst_count, dst_crc32, ) = self._src.read_struct('>6L') if signature != 0x626C7A1A: raise ValueError(F'Invalid BriefLZ signature: {signature:08X}, should be 626C7A1A.') if version > 10: raise ValueError(F'Invalid version number {version}, should be less than 10.') self.log_debug(F'signature: 0x{signature:08X} V{version}') self.log_debug(F'src count: 0x{src_count:08X}') self.log_debug(F'src crc32: 0x{src_crc32:08X}') self.log_debug(F'dst count: 0x{dst_count:08X}') self.log_debug(F'dst crc32: 0x{dst_crc32:08X}') src = self._src.getbuffer() src = src[24:24 + src_count] if len(src) < src_count: self.log_warn(F'Only {len(src)} bytes in buffer, but header annoucned a length of {src_count}.') if src_crc32: check = zlib.crc32(src) if check != src_crc32: self.log_warn(F'Invalid source data CRC {check:08X}, should be {src_crc32:08X}.') dst = self._decompress_chunk(dst_count) if not dst_crc32: return dst check = zlib.crc32(dst) if check != dst_crc32: self.log_warn(F'Invalid result data CRC {check:08X}, should be {dst_crc32:08X}.') return dst def _decompress_modded(self): self._src.seekrel(8) total_size = self._src.u64() chunk_size = self._src.u64() remaining = total_size self.log_debug(F'total size: 0x{total_size:016X}') self.log_debug(F'chunk size: 0x{chunk_size:016X}') while remaining > chunk_size: self._decompress_chunk(chunk_size) remaining -= chunk_size return self._decompress_chunk(remaining) def _decompress_chunk(self, size=None): bitcount = 0 bitstore = 0 decompressed = 1 def readbit(): nonlocal bitcount, bitstore if not bitcount: bitstore = int.from_bytes(self._src.read_exactly(2), 'little') bitcount = 0xF else: bitcount = bitcount - 1 return (bitstore >> bitcount) & 1 def readint(): result = 2 + readbit() while readbit(): result <<= 1 result += readbit() return result self._dst.write(self._src.read_exactly(1)) try: while not size or decompressed < size: if readbit(): length = readint() + 2 sector = readint() - 2 offset = self._src.read_byte() + 1 delta = offset + 0x100 * sector available = self._dst.tell() if delta not in range(available + 1): raise RefineryPartialResult( F'Requested rewind by 0x{delta:08X} bytes with only 0x{available:08X} bytes in output buffer.', partial=self._dst.getvalue()) quotient, remainder = divmod(length, delta) replay = memoryview(self._dst.getbuffer()) replay = bytes(replay[-delta:] if quotient else replay[-delta:length - delta]) replay = quotient * replay + replay[:remainder] self._dst.write(replay) decompressed += length else: self._dst.write(self._src.read_exactly(1)) decompressed += 1 except EOF as E: raise RefineryPartialResult(str(E), partial=self._dst.getbuffer()) dst = self._dst.getbuffer() if decompressed < size: raise RefineryPartialResult( F'Attempted to decompress {size} bytes, got only {len(dst)}.', dst) if decompressed > size: raise RuntimeError('Decompressed buffer contained more bytes than expected.') return dst def _compress(self): from refinery.lib.suffixtree import SuffixTree try: self.log_info('computing suffix tree') tree = SuffixTree(self._src.getbuffer()) except Exception: raise bitstore = 0 # The bit stream to be written bitcount = 0 # The number of bits in the bit stream buffer = MemoryFile(bytearray()) # Write empty header and first byte of source self._dst.write(bytearray(24)) self._dst.write(self._src.read_exactly(1)) def writeint(n: int) -> None: """ Write an integer to the bit stream. """ nonlocal bitstore, bitcount nbits = n.bit_length() if nbits < 2: raise ValueError # The highest bit is implicitly assumed: n ^= 1 << (nbits - 1) remaining = nbits - 2 while remaining: remaining -= 1 bitstore <<= 2 bitcount += 2 bitstore |= ((n >> remaining) & 3) | 1 bitstore <<= 2 bitcount += 2 bitstore |= (n & 1) << 1 src = self._src.getbuffer() remaining = len(src) - 1 self.log_info('compressing data') while True: cursor = len(src) - remaining rest = src[cursor:] if bitcount >= 0x10: block_count, bitcount = divmod(bitcount, 0x10) info_channel = bitstore >> bitcount bitstore = info_channel << bitcount ^ bitstore # The decompressor will read bits from top to bottom, and each 16 bit block has to be # little-endian encoded. The bit stream is encoded top to bottom bit in the bitstore # variable, and by encoding it as a big endian integer, the stream is in the correct # order. However, we need to swap adjacent bytes to achieve little endian encoding for # each of the blocks: info_channel = bytearray(info_channel.to_bytes(block_count * 2, 'big')) for k in range(block_count): k0 = 2 * k + 0 k1 = 2 * k + 1 info_channel[k0], info_channel[k1] = info_channel[k1], info_channel[k0] info_channel = memoryview(info_channel) data_channel = memoryview(buffer.getbuffer()) self._dst.write(info_channel[:2]) self._dst.write(data_channel[:-1]) self._dst.write(info_channel[2:]) data_channel = bytes(data_channel[-1:]) buffer.truncate(0) store = buffer if bitcount else self._dst store.write(data_channel) if remaining + bitcount < 0x10: buffer = buffer.getbuffer() if rest or buffer: bitstore <<= 0x10 - bitcount self._dst.write(bitstore.to_bytes(2, 'little')) self._dst.write(buffer) self._dst.write(rest) elif bitcount: raise RuntimeError('Bitbuffer Overflow') break node = tree.root length = 0 offset = 0 sector = None while node.children and length < len(rest): for child in node.children.values(): if tree.data[child.start] == rest[length]: node = child break if node.start >= cursor: break offset = node.start - length length = node.end + 1 - offset length = min(remaining, length) if length >= 4: sector, offset = divmod(cursor - offset - 1, 0x100) bitcount += 1 bitstore <<= 1 if sector is None: buffer.write(rest[:1]) remaining -= 1 continue bitstore |= 1 buffer.write(bytes((offset,))) writeint(length - 2) writeint(sector + 2) remaining -= length self._dst.seek(24) dst = self._dst.peek() self._dst.seek(0) self._dst.write(struct.pack('>6L', 0x626C7A1A, 1, len(dst), zlib.crc32(dst), len(src), zlib.crc32(src))) return self._dst.getbuffer() def process(self, data): self._begin(data) partial = None try: return self._decompress() except ValueError as error: if isinstance(error, RefineryPartialResult): partial = error self.log_warn(F'Reverting to modified BriefLZ after decompression error: {error!s}') self._reset() try: return self._decompress_modded() except RefineryPartialResult: raise except Exception as error: if not partial: raise raise partial from error def reverse(self, data): return self._begin(data)._compress()
def _begin(self, data): self._src = StructReader(memoryview(data)) self._dst = MemoryFile(bytearray()) return self
def decompress_stream(self, data: ByteString, LZOv1: bool = False) -> bytearray: """ An implementation of LZO decompression. We use the article "[LZO stream format as understood by Linux's LZO decompressor](https://www.kernel.org/doc/html/latest/staging/lzo.html)" as a reference since no proper specification is available. """ def integer() -> int: length = 0 while True: byte = src.read_byte() if byte: return length + byte length += 0xFF if length > 0x100000: raise LZOError('Too many zeros in integer encoding.') def literal(count): dst.write(src.read_bytes(count)) def copy(distance: int, length: int): if distance > len(dst): raise LZOError(F'Distance {distance} > bufsize {len(dst)}') buffer = dst.getbuffer() if distance > length: start = len(buffer) - distance end = start + length dst.write(buffer[start:end]) else: block = buffer[-distance:] while len(block) < length: block += block[:length - len(block)] if len(block) > length: block[length:] = () dst.write(block) src = StructReader(memoryview(data)) dst = MemoryFile() state = 0 first = src.read_byte() if first == 0x10: raise LZOError('Invalid first stream byte 0x10.') elif first <= 0x12: src.seekrel(-1) elif first <= 0x15: state = first - 0x11 literal(state) else: state = 4 literal(first - 0x11) while True: instruction = src.read_byte() if instruction < 0x10: if state == 0: length = instruction or integer() + 15 state = length + 3 if state < 4: raise LZOError('Literal encoding is too short.') else: state = instruction & 0b0011 D = (instruction & 0b1100) >> 2 H = src.read_byte() distance = (H << 2) + D + 1 if state >= 4: distance += 0x800 length = 3 else: length = 2 copy(distance, length) elif instruction < 0x20: L = instruction & 0b0111 H = instruction & 0b1000 length = L or integer() + 7 argument = src.u16() state = argument & 3 distance = (H << 11) + (argument >> 2) if not distance: return dst.getbuffer() if LZOv1 and distance & 0x803F == 0x803F and length in range(261, 265): raise LZOError('Compressed data contains sequence that is banned in LZOv1.') if LZOv1 and distance == 0xBFFF: X = src.read_byte() count = ((X << 3) | L) + 4 self.log_debug(F'Writing run of {X} zero bytes according to LZOv1.') dst.write(B'\0' * count) else: copy(distance + 0x4000, length + 2) elif instruction < 0x40: L = instruction & 0b11111 length = L or integer() + 31 argument = src.u16() state = argument & 3 distance = (argument >> 2) + 1 copy(distance, length + 2) else: if instruction < 0x80: length = 3 + ((instruction >> 5) & 1) else: length = 5 + ((instruction >> 5) & 3) H = src.read_byte() D = (instruction & 0b11100) >> 2 state = instruction & 3 distance = (H << 3) + D + 1 copy(distance, length) if state: literal(state)
def __init__(self, reader: StructReader, offset: int, unmarshal: Unmarshal = Unmarshal.No): reader.bigendian = True reader.seekset(offset) self.reader = reader signature = reader.read_bytes(8) if signature != self.MagicSignature: raise ValueError( F'offset 0x{offset:X} has invalid signature {signature.hex().upper()}; ' F'should be {self.MagicSignature.hex().upper()}') self.size = reader.i32() toc_offset = reader.i32() toc_length = reader.i32() self.py_version = '.'.join(str(reader.u32())) self.py_libname = self._read_libname(reader) self.offset = reader.tell() - self.size self.toc: Dict[str, PiTOCEntry] = {} toc_end = self.offset + toc_offset + toc_length reader.seekset(self.offset + toc_offset) while reader.tell() < toc_end: try: entry = PiTOCEntry(reader) except EOF: xtpyi.logger.warning('end of file while reading TOC') break except Exception as error: xtpyi.logger.warning( F'unexpected error while reading TOC: {error!s}') break if entry.name in self.toc: raise KeyError(F'duplicate name {entry.name}') self.toc[entry.name] = entry self.files: Dict[str, PiMeta] = {} no_pyz_found = True pyz_entries: Dict[str, PYZ] = {} for entry in list(self.toc.values()): if entry.type is not PiType.PYZ: continue no_pyz_found = False name, xt = os.path.splitext(entry.name) name_pyz = F'{name}.pyz' if name == entry.name: del self.toc[name] self.toc[name_pyz] = entry entry.name = name_pyz reader.seekset(self.offset + entry.offset) if entry.is_compressed: data = self.extract(entry.name).unpack() else: data = reader pyz_entries[name] = PYZ(data, self.py_version) magics = {pyz.magic for pyz in pyz_entries.values()} if not magics: if not no_pyz_found: xtpyi.logger.warning( 'no magic signature could be recovered from embedded pyzip archives; this is ' 'unsual and means that there is no way to guess the missing magic for source ' 'file entries and it will likely not be possible to decompile them.' ) return elif len(magics) > 1: xtpyi.logger.warning( 'more than one magic signature was recovered; this is unusual.' ) magics = list(magics) keys: Set[bytes] = set() for entry in self.toc.values(): extracted = self.extract(entry.name) if entry.type not in (PiType.SOURCE, PiType.MODULE): self.files[entry.name] = extracted continue data = extracted.unpack() name, _ = os.path.splitext(extracted.name) del self.files[extracted.name] extracted.name = F'{name}.pyc' self.files[extracted.name] = extracted if len(magics) == 1 and data[:4] != magics[0]: extracted.data = magics[0] + data decompiled = make_decompiled_item(name, data, *magics) if entry.type is PiType.SOURCE: decompiled.type = PiType.USERCODE self.files[F'{name}.py'] = decompiled if name.endswith('crypto_key'): for key in decompiled.unpack() | carve('string', decode=True): if len(key) != 0x10: continue xtpyi.logger.info(F'found key: {key.decode(xtpyi.codec)}') keys.add(key) if unmarshal is Unmarshal.No: return if not keys: key = None else: key = next(iter(keys)) for name, pyz in pyz_entries.items(): pyz.unpack(unmarshal is Unmarshal.YesAndDecompile, key) for unpacked in pyz.entries: unpacked.name = path = F'{name}/{unpacked.name}' if path in self.files: raise ValueError(F'duplicate file name: {path}') self.files[path] = unpacked
def __init__(self, reader: StructReader, calculate_checks=False): if reader.read(4) != b'dex\n': raise ValueError('Invalid Signature') with StreamDetour(reader, 0x28): endian_test_data = reader.u32() if endian_test_data == 0x78563412: reader.bigendian = True self.version = reader.read(4).rstrip(b'\0') self.checksum = reader.u32() if calculate_checks: with StreamDetour(reader): self.calculated_checksum = zlib.adler32(reader.read()) else: self.calculated_checksum = None self.signature = reader.read(20) if calculate_checks: with StreamDetour(reader): self.calculated_signature = hashlib.sha1( reader.read()).digest() else: self.calculated_signature = None self.size_of_file = reader.u32() self.size_of_header = reader.u32() if reader.u32() != 0x12345678: raise ValueError('Invalid Endian Tag') self.link_size = reader.u32() self.link_offset = reader.u32() self.map_offset = reader.u32() self.strings: List[str] = list( self._read_strings(reader, reader.u32(), reader.u32()))
def process(self, data): mode: MODE = self.args.mode with StructReader(memoryview(data)) as reader, MemoryFile() as writer: reader: StructReader[memoryview] check = zlib.crc32(reader.peek(6)) magic = reader.read(4) if magic != self._SIGNATURE: if mode is None: self.log_warn( F'data starts with {magic.hex().upper()} rather than the expected sequence ' F'{self._SIGNATURE.hex().upper()}; this could be a raw stream.' ) else: reader.seek(0) handler = self._get_handler(mode) handler(reader, writer, None) return writer.getbuffer() header_size = reader.u16() if header_size != 24: self.log_warn( F'the header size {header_size} was not equal to 24') crc32byte = reader.u8() check = zlib.crc32(reader.peek(0x11), check) & 0xFF if check != crc32byte: self.log_warn( F'the CRC32 check byte was {crc32byte}, computed value was {check}' ) _mode_code = reader.u8() try: _mode = MODE(_mode_code) except ValueError: msg = F'header contains unknown compression type code {_mode_code}' if mode is None: raise ValueError(msg) else: self.log_warn(msg) else: if mode is not None and mode != _mode: logger = self.log_warn else: logger = self.log_info mode = _mode logger(F'header specifies algorithm {_mode.name}') self.log_info(F'using algorithm {mode.name}') decompress = self._get_handler(mode) final_size = reader.u32() _unknown_1 = reader.u32() chunk_size = reader.u32() _unknown_2 = reader.u32() if _unknown_1 != 0: self.log_warn( F'unknown value 1 was unexpectedly nonzero: 0x{_unknown_1:08X}' ) if _unknown_2 != 0: self.log_warn( F'unknown value 2 was unexpectedly nonzero: 0x{_unknown_2:08X}' ) self.log_debug(F'final size: 0x{final_size:08X}') self.log_debug(F'chunk size: 0x{chunk_size:08X}') if chunk_size > COMPRESS_MAX_CHUNK: raise ValueError( 'the header chunk size is greater than the maximum value') while len(writer) < final_size: src_size = reader.u32() src_data = reader.read(src_size) if len(src_data) != src_size: raise IndexError( F'Attempted to read {src_size} bytes, but got only {len(src_data)}.' ) if src_size + len(writer) == final_size: self.log_debug( F'final chunk is uncompressed, appending {src_size} raw bytes to output' ) writer.write(src_data) break self.log_debug(F'reading chunk of size {src_size}') start = writer.tell() chunk = StructReader(src_data) target = min(chunk_size, final_size - len(writer)) decompress(chunk, writer, target) writer.flush() written = writer.tell() - start if written != target: raise RuntimeError( F'decompressed output had unexpected size {written} instead of {chunk_size}' ) if not reader.eof: self.log_info( F'compression complete with {reader.remaining_bytes} bytes remaining in input' ) return writer.getbuffer()