Esempio n. 1
0
    def _read_strings(self, reader: StructReader, size: int,
                      offset: int) -> Generator[str, None, None]:
        def uleb128():
            value = 0
            more = True
            for k in range(0, 35, 7):
                limb = reader.read_integer(7)
                more = reader.read_bit()
                value |= limb << k
                if not more:
                    break
            assert not more
            return value

        with StreamDetour(reader, offset):
            offsets = [reader.u32() for _ in range(size)]
            for offset in offsets:
                reader.seek(offset)
                size = uleb128()
                if not size:
                    continue
                data = reader.read_c_string()
                string = JvClassFile.decode_utf8m(data)
                if len(string) != size:
                    raise RuntimeError(
                        F'Read string of length {len(string)}, expected length {size}.'
                    )
                yield string
Esempio n. 2
0
 def __init__(self, reader: StructReader):
     reader.bigendian = True
     entry_start_offset = reader.tell()
     self.size_of_entry = reader.i32()
     self.offset = reader.i32()
     self.size_of_compressed_data = reader.i32()
     self.size_od_uncompressed_data = reader.i32()
     self.is_compressed = bool(reader.read_byte())
     entry_type = bytes(reader.read(1))
     name_length = self.size_of_entry - reader.tell() + entry_start_offset
     if name_length > 0x1000:
         raise RuntimeError(
             F'Refusing to process TOC entry with name of size {name_length}.'
         )
     name, *_ = bytes(reader.read(name_length)).partition(B'\0')
     try:
         name = name.decode('utf8', 'backslashreplace')
     except Exception:
         name = None
     if not all(part.isprintable() for part in re.split('\\s*', name)):
         raise RuntimeError(
             'Refusing to process TOC entry with non-printable name.')
     name = name or str(uuid.uuid4())
     if entry_type == B'Z':
         entry_type = B'z'
     try:
         self.type = PiType(entry_type)
     except ValueError:
         xtpyi.logger.error(F'unknown type {entry_type!r} in field {name}')
         self.type = PiType.UNKNOWN
     self.name = name
Esempio n. 3
0
 def _decompress_mszip(self,
                       reader: StructReader,
                       writer: MemoryFile,
                       target: Optional[int] = None):
     header = bytes(reader.read(2))
     if header != B'CK':
         raise ValueError(
             F'chunk did not begin with CK header, got {header!r} instead')
     decompress = zlib.decompressobj(-zlib.MAX_WBITS,
                                     zdict=writer.getbuffer())
     writer.write(decompress.decompress(reader.read()))
     writer.write(decompress.flush())
Esempio n. 4
0
 def _extract_ole(self, data: bytearray) -> str:
     stream = MemoryFile(data)
     with self._olefile.OleFileIO(stream) as ole:
         doc = ole.openstream('WordDocument').read()
         with StructReader(doc) as reader:
             table_name = F'{(doc[11]>>1)&1}Table'
             reader.seek(0x1A2)
             offset = reader.u32()
             length = reader.u32()
         with StructReader(ole.openstream(table_name).read()) as reader:
             reader.seek(offset)
             table = reader.read(length)
         piece_table = self._load_piece_table(table)
         return self._get_text(doc, piece_table)
Esempio n. 5
0
 def process(self, data: bytearray):
     with MemoryFile() as output, StructReader(data) as reader:
         if reader.read(2) != B'JC':
             self.log_warn(
                 'data does not begin with magic sequence, assuming that header is missing'
             )
             reader.seek(0)
             size = checksum = None
         else:
             size = reader.u32()
             checksum = reader.u32()
         if self.args.ignore_header:
             size = None
         self._decompress(output, reader, size)
         if size is not None:
             if len(output) > size:
                 self.log_info(F'tuncating to size {size}')
                 output.truncate(size)
             elif len(output) < size:
                 self.log_warn(
                     F'header size was {size}, but only {len(data)} bytes were decompressed'
                 )
         data = output.getvalue()
         if checksum:
             c = self._checksum(data)
             if c != checksum:
                 self.log_warn(
                     F'header checksum was {checksum:08X}, computed value is {c:08X}'
                 )
         return data
Esempio n. 6
0
 def __init__(self, reader: StructReader):
     if reader.read(4) != self.SIGNATURE:
         raise ValueError
     self.disk_number = reader.u16()
     self.start_disk_number = reader.u16()
     self.entries_on_disk = reader.u16()
     self.entries_in_directory = reader.u16()
     self.directory_size = reader.u32()
     self.directory_offset = reader.u32()
     try:
         cl = reader.u32()
         self.comment = cl and reader.read(cl) or None
     except EOFError:
         self.comment = None
Esempio n. 7
0
 def __init__(self,
              buffer: Union[bytearray, StructReader],
              bits_per_read: int = 32):
     if not isinstance(buffer, StructReader):
         buffer = StructReader(memoryview(buffer), bigendian=False)
     self._reader: StructReader[memoryview] = buffer
     self._bit_buffer_data: int = 0
     self._bit_buffer_size: int = 0
     self._bits_per_read = bits_per_read
Esempio n. 8
0
 def test_bitreader_le(self):
     data = 0b10010100111010100100001111101_11_00000000_0101010101010010010111100000101001010101100000001110010111110100_111_000_100
     size, remainder = divmod(data.bit_length(), 8)
     self.assertEqual(remainder, 0)
     data = memoryview(data.to_bytes(size, 'little'))
     sr = StructReader(data)
     self.assertEqual(sr.read_integer(3), 0b100)
     self.assertEqual(sr.read_integer(3), 0b000)
     self.assertEqual(sr.read_integer(3), 0b111)
     self.assertEqual(
         sr.u64(),
         0b0101010101010010010111100000101001010101100000001110010111110100)
     self.assertFalse(any(sr.read_flags(8, reverse=True)))
     self.assertEqual(sr.read_bit(), 1)
     self.assertRaises(ValueError, lambda: sr.read_struct(''))
     self.assertEqual(sr.read_bit(), 1)
     self.assertEqual(sr.read_integer(29), 0b10010100111010100100001111101)
     self.assertTrue(sr.eof)
Esempio n. 9
0
    def unpack(self, data):
        def cpio():
            with suppress(EOF):
                return CPIOEntry(reader)

        reader = StructReader(memoryview(data))
        for entry in iter(cpio, None):
            if entry.name == 'TRAILER!!!':
                break
            yield self._pack(entry.name, entry.mtime, entry.data)
Esempio n. 10
0
 def __init__(self, reader: StructReader, version: str):
     reader.bigendian = True
     self.base = reader.tell()
     signature = reader.read(4)
     if signature != self.MagicSignature:
         raise ValueError('invalid magic')
     magic = bytes(reader.read(4))
     with contextlib.suppress(KeyError):
         version = xtpyi._xdis.magics.versions[magic]
     vtuple = version2tuple(version)
     padding_size = 4
     if vtuple >= (3, 3):
         padding_size += 4
     if vtuple >= (3, 7):
         padding_size += 4
     self.version = version
     self.magic = magic + padding_size * b'\0'
     self.toc_offset = reader.i32()
     self.reader = reader
     self.entries: List[PiMeta] = []
Esempio n. 11
0
 def _load_piece_table(self, table: bytes) -> bytes:
     with StructReader(table) as reader:
         while not reader.eof:
             entry_type = reader.read_byte()
             if entry_type == 1:
                 reader.seekrel(reader.read_byte())
                 continue
             if entry_type == 2:
                 length = reader.u32()
                 return reader.read(length)
             raise NotImplementedError(
                 F'Unsupported table entry type value 0x{entry_type:X}.')
Esempio n. 12
0
 def process(self, data):
     dst = bytearray()
     src = StructReader(data)
     while not src.eof:
         copy = src.read_byte()
         for mask in (0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80):
             if src.eof:
                 break
             if not copy & mask:
                 dst.append(src.read_byte())
                 continue
             elif not dst:
                 raise ValueError('copy requested against empty buffer')
             with src.be:
                 match_len = src.read_integer(6) + _MATCH_MIN
                 match_pos = src.read_integer(10)
             if not match_pos or match_pos > len(dst):
                 raise RuntimeError(F'invalid match offset at position {src.tell()}')
             match_pos = len(dst) - match_pos
             while match_len > 0:
                 match = dst[match_pos:match_pos + match_len]
                 dst.extend(match)
                 match_pos += len(match)
                 match_len -= len(match)
     return dst
Esempio n. 13
0
    def __init__(self, reader: StructReader):
        reader.bigendian = True
        if reader.read(4).hex() != 'cafebabe':
            raise ValueError('class file magic missing.')
        minor = reader.u16()
        major = reader.u16()
        self.version = (major, minor)

        self.pool: List[Union[Struct, int, float, str]] = []
        self._read_pool(reader)

        self.strings: List[str] = {
            s.value for s in self.pool if isinstance(s, Struct) and s.tag == JvConstType.String}

        self.access = JvAccessFlags(reader)

        self.this = reader.u16()
        self.parent = reader.u16()

        try:
            self.interfaces = [self.pool[reader.u16()]
                for _ in range(reader.u16())]
        except IndexError:
            raise ValueError('Failed parsing Interfaces.')
        try:
            self.fields = [JvClassMember(reader, pool=self.pool)
                for _ in range(reader.u16())]
        except IndexError:
            raise ValueError('Failed parsing Fields.')
        try:
            self.methods = [JvClassMember(reader, pool=self.pool)
                for _ in range(reader.u16())]
        except IndexError:
            raise ValueError('Failed parsing Methods.')
        try:
            self.attributes = [JvAttribute(reader, pool=self.pool)
                for _ in range(reader.u16())]
        except IndexError:
            raise ValueError('Failed parsing Attributes.')
Esempio n. 14
0
 def __init__(self, reader: StructReader):
     reader.bigendian = True
     self.max_stack = reader.u16()
     self.max_locals = reader.u16()
     self.disassembly: List[JvOpCode] = []
     with StructReader(reader.read(reader.u32())) as code:
         code.bigendian = True
         while not code.eof:
             self.disassembly.append(JvOpCode(code, pool=self.pool))
     self.exceptions = [JvException(reader) for _ in range(reader.u16())]
     self.attributes = [JvAttribute(reader) for _ in range(reader.u16())]
Esempio n. 15
0
 def test_bitreader_be(self):
     data = 0b01010_10011101_0100100001_1111_0111101010000101010101010010010111100000101001010101100000001110010111110100111000_101
     size, remainder = divmod(data.bit_length(), 8)
     self.assertEqual(remainder, 7)
     data = memoryview(data.to_bytes(size + 1, 'big'))
     sr = StructReader(data)
     with sr.be:
         self.assertEqual(sr.read_bit(), 0)
         self.assertEqual(sr.read_bit(), 1)
         self.assertEqual(sr.read_bit(), 0)
         self.assertEqual(sr.read_bit(), 1)
         self.assertEqual(sr.read_bit(), 0)
         self.assertEqual(sr.read_byte(), 0b10011101)
         self.assertEqual(sr.read_integer(10), 0b100100001)
         self.assertTrue(all(sr.read_flags(4)))
         self.assertEqual(
             sr.read_integer(82),
             0b0111101010000101010101010010010111100000101001010101100000001110010111110100111000
         )
         self.assertRaises(EOF, sr.u16)
Esempio n. 16
0
 def __init__(self, reader: StructReader):
     (
         self.MODULE,      # 0x8000
         self.ENUM,        # 0x4000
         self.ANNOTATION,  # 0x2000
         self.SYNTHETIC,   # 0x1000
         _,                # ...
         self.ABSTRACT,    # 0x0400
         self.INTERFACE,   # 0x0200
         _, _, _,          # ...
         self.SUPER,       # 0x0020
         self.FINAL,       # 0x0010
         _, _, _,          # ...
         self.PUBLIC,      # 0x0001
     ) = reader.read_flags(16)
Esempio n. 17
0
 def _read_libname(self, reader: StructReader) -> Optional[str]:
     position = reader.tell()
     try:
         libname, t, rest = reader.read_bytes(64).partition(B'\0')
     except EOF:
         reader.seekset(position)
         return None
     try:
         libname = libname.decode('utf8')
     except Exception:
         reader.seekset(position)
         return None
     if not t or any(rest) or len(rest) < 10 or not re.fullmatch(
             R'[\s!-~]+', libname):
         reader.seekset(position)
         return None
     return libname
Esempio n. 18
0
 def process(self, data):
     with StructReader(data) as archive:
         if archive.read(8) != b'SZDD\x88\xF0\x27\x33':
             if not self.args.lenient:
                 raise ValueError('signature missing')
             self.log_fail(
                 'the header signature is invalid, this is likely not an SZDD archive'
             )
         if archive.read_byte() != 0x41:
             raise ValueError('Unsupported compression mode')
         # ignore the missing file extension letter:
         archive.seekrel(1)
         output_len = archive.u32()
         window_pos = 0x1000 - 0x10
         output_pos = 0
         output = bytearray(output_len)
         window = bytearray(0x1000)
         for k in range(len(window)):
             window[k] = 0x20
         while not archive.eof:
             control = archive.read_byte()
             for cb in (0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80):
                 if archive.eof:
                     break
                 if control & cb:
                     output[output_pos] = window[
                         window_pos] = archive.read_byte()
                     output_pos += 1
                     window_pos += 1
                     window_pos &= 0xFFF
                 else:
                     match_pos = archive.read_byte()
                     match_len = archive.read_byte()
                     match_pos |= (match_len & 0xF0) << 4
                     match_len = (match_len & 0x0F) + 3
                     match_pos &= 0xFFF
                     for _ in range(match_len):
                         window[window_pos] = window[match_pos]
                         output[output_pos] = window[window_pos]
                         output_pos += 1
                         window_pos += 1
                         match_pos += 1
                         window_pos &= 0xFFF
                         match_pos &= 0xFFF
         return output
Esempio n. 19
0
    def process(self, data: bytearray):
        view = memoryview(data)
        with MemoryFile() as output, StructReader(view) as reader:
            for k in count(1):
                if reader.eof:
                    break
                trailing_size = len(data) - reader.tell()
                try:
                    ID, VN, DS = reader.read_struct('4sBB')
                    if ID != B'LZIP':
                        if k > 1:
                            raise EOF
                        else:
                            self.log_warn(F'ignoring invalid LZIP signature: {ID.hex()}')
                    if VN != 1:
                        self.log_warn(F'ignoring invalid LZIP version: {VN}')
                    dict_size = 1 << (DS & 0x1F)
                    dict_size -= (dict_size // 16) * ((DS >> 5) & 7)
                    if dict_size not in range(_MIN_DICT_SIZE, _MAX_DICT_SIZE + 1):
                        raise ValueError(
                            F'The dictionary size {dict_size} is out of the valid range '
                            F'[{_MIN_DICT_SIZE}, {_MAX_DICT_SIZE}]; unable to proceed.'
                        )
                    decoder = MemberDecoder(dict_size, reader, output)
                    if not decoder():
                        raise ValueError(F'Data error in stream {k}.')
                    crc32, data_size, member_size = reader.read_struct('<LQQ')
                    if crc32 != decoder.crc32:
                        self.log_warn(F'checksum in stream {k} was {decoder.crc:08X}, should have been {crc32:08X}.')
                    if member_size - 20 != decoder.member_position:
                        self.log_warn(F'member size in stream {k} was {decoder.member_position}, should have been {member_size}.')
                    if data_size != decoder.data_position:
                        self.log_warn(F'data size in stream {k} was {decoder.data_position}, should have been {data_size}.')
                except EOF:
                    if k <= 1:
                        raise
                    self.log_info(F'silently ignoring {trailing_size} bytes of trailing data')
                    break

            return output.getvalue()
Esempio n. 20
0
 def _get_text(self, doc: bytes, piece_table: bytes) -> str:
     piece_count: int = 1 + (len(piece_table) - 4) // 12
     with StringIO() as text:
         with StructReader(piece_table) as reader:
             character_positions = [
                 reader.u32() for _ in range(piece_count)
             ]
             for i in range(piece_count - 1):
                 cp_start = character_positions[i]
                 cp_end = character_positions[i + 1]
                 fc_value = reader.read_struct('xxLxx', unwrap=True)
                 is_ansi = bool((fc_value >> 30) & 1)
                 fc = fc_value & 0xBFFFFFFF
                 cb = cp_end - cp_start
                 if is_ansi:
                     encoding = 'cp1252'
                     fc = fc // 2
                 else:
                     encoding = 'utf16'
                     cb *= 2
                 raw = doc[fc:fc + cb]
                 text.write(raw.decode(encoding).replace('\r', '\n'))
         return text.getvalue()
Esempio n. 21
0
    def __init__(self, reader: StructReader):
        def readint(length: int):
            return int(bytes(reader.read(length * 2)), 16)

        self.signature = reader.read(6)
        if self.signature != b'070701':
            raise ValueError('invalid CPIO header signature')
        self.inode = readint(4)
        self.mode = readint(4)
        self.uid = readint(4)
        self.gid = readint(4)
        self.nlinks = readint(4)
        mtime = readint(4)
        self.mtime = datetime.utcfromtimestamp(mtime)
        self.size = readint(4)
        self.dev = readint(4), readint(4)
        self.rdev = readint(4), readint(4)
        namesize = readint(4)
        self.checksum = readint(4)
        self.name = bytes(reader.read(namesize)).decode('ascii').rstrip('\0')
        reader.byte_align(4)
        self.data = reader.read(self.size)
        reader.byte_align(4)
Esempio n. 22
0
 def _decompress_xpress(self,
                        reader: StructReader,
                        writer: MemoryFile,
                        target: Optional[int] = None) -> bytearray:
     if target is not None:
         target += writer.tell()
     flags = BitBufferedReader(reader)
     nibble_cache = None
     while not reader.eof:
         if target is not None and writer.tell() >= target:
             return
         if not flags.next():
             writer.write(reader.read(1))
             continue
         offset, length = divmod(reader.u16(), 8)
         offset += 1
         if length == 7:
             length = nibble_cache
             if length is None:
                 length_pair = reader.u8()
                 nibble_cache = length_pair >> 4
                 length = length_pair & 0xF
             else:
                 nibble_cache = None
             if length == 15:
                 length = reader.u8()
                 if length == 0xFF:
                     length = reader.u16() or reader.u32()
                     length -= 22
                     if length < 0:
                         raise RuntimeError(
                             F'Invalid match length of {length} for long delta sequence'
                         )
                 length += 15
             length += 7
         length += 3
         writer.replay(offset, length)
Esempio n. 23
0
    def _decompress_xpress_huffman(self,
                                   reader: StructReader,
                                   writer: MemoryFile,
                                   target: Optional[int] = None,
                                   max_chunk_size: int = 0x10000) -> None:
        limit = writer.tell()
        if target is not None:
            target += limit

        while not reader.eof:

            if reader.remaining_bytes < XPRESS_NUM_SYMBOLS // 2:
                raise IndexError(
                    F'There are only {reader.remaining_bytes} bytes reamining in the input buffer,'
                    F' but at least {XPRESS_NUM_SYMBOLS//2} are required to read a Huffman table.'
                )

            table = bytearray(
                reader.read_integer(4) for _ in range(XPRESS_NUM_SYMBOLS))
            table = make_huffman_decode_table(table, XPRESS_TABLEBITS,
                                              XPRESS_MAX_CODEWORD_LEN)
            limit = limit + max_chunk_size
            flags = BitBufferedReader(reader, 16)

            while True:
                position = writer.tell()
                if position == target:
                    if reader.remaining_bytes:
                        self.log_info(
                            F'chunk decompressed with {reader.remaining_bytes} bytes remaining in input buffer'
                        )
                    return
                if position >= limit:
                    if position > limit:
                        limit = position
                        self.log_info(
                            F'decompression of one chunk generated more than the limit of {max_chunk_size} bytes'
                        )
                    flags.collect()
                    break
                try:
                    sym = flags.huffman_symbol(table, XPRESS_TABLEBITS,
                                               XPRESS_MAX_CODEWORD_LEN)
                except EOFError:
                    self.log_debug('end of file while reading huffman symbol')
                    break
                if sym < XPRESS_NUM_CHARS:
                    writer.write_byte(sym)
                    continue
                length = sym & 0xF
                offsetlog = (sym >> 4) & 0xF
                flags.collect()
                if reader.eof:
                    break
                offset = (1 << offsetlog) | flags.read(offsetlog)
                if length == 0xF:
                    nudge = reader.read_byte()
                    if nudge < 0xFF:
                        length += nudge
                    else:
                        length = reader.u16() or reader.u32()
                length += XPRESS_MIN_MATCH_LEN
                writer.replay(offset, length)
Esempio n. 24
0
 def test_bitreader_structured(self):
     items = (
         0b1100101,  # noqa
         -0x1337,  # noqa
         0xDEFACED,  # noqa
         0xC0CAC01A,  # noqa
         -0o1337,  # noqa
         2076.171875,  # noqa
         math.pi  # noqa
     )
     data = struct.pack('<bhiLqfd', *items)
     sr = StructReader(data)
     self.assertEqual(sr.read_nibble(), 0b101)
     self.assertRaises(sr.Unaligned, lambda: sr.read_exactly(2))
     sr.seek(0)
     self.assertEqual(sr.read_byte(), 0b1100101)
     self.assertEqual(sr.i16(), -0x1337)
     self.assertEqual(sr.i32(), 0xDEFACED)
     self.assertEqual(sr.u32(), 0xC0CAC01A)
     self.assertEqual(sr.i64(), -0o1337)
     self.assertAlmostEqual(sr.read_struct('f', True), 2076.171875)
     self.assertAlmostEqual(sr.read_struct('d', True), math.pi)
     self.assertTrue(sr.eof)
Esempio n. 25
0
class blz(Unit):
    """
    BriefLZ compression and decompression. The compression algorithm uses a pure Python suffix tree
    implementation: It requires a lot of time & memory.
    """
    def _begin(self, data):
        self._src = StructReader(memoryview(data))
        self._dst = MemoryFile(bytearray())
        return self

    def _reset(self):
        self._src.seek(0)
        self._dst.seek(0)
        self._dst.truncate()
        return self

    def _decompress(self):
        (
            signature,
            version,
            src_count,
            src_crc32,
            dst_count,
            dst_crc32,
        ) = self._src.read_struct('>6L')
        if signature != 0x626C7A1A:
            raise ValueError(F'Invalid BriefLZ signature: {signature:08X}, should be 626C7A1A.')
        if version > 10:
            raise ValueError(F'Invalid version number {version}, should be less than 10.')
        self.log_debug(F'signature: 0x{signature:08X} V{version}')
        self.log_debug(F'src count: 0x{src_count:08X}')
        self.log_debug(F'src crc32: 0x{src_crc32:08X}')
        self.log_debug(F'dst count: 0x{dst_count:08X}')
        self.log_debug(F'dst crc32: 0x{dst_crc32:08X}')
        src = self._src.getbuffer()
        src = src[24:24 + src_count]
        if len(src) < src_count:
            self.log_warn(F'Only {len(src)} bytes in buffer, but header annoucned a length of {src_count}.')
        if src_crc32:
            check = zlib.crc32(src)
            if check != src_crc32:
                self.log_warn(F'Invalid source data CRC {check:08X}, should be {src_crc32:08X}.')
        dst = self._decompress_chunk(dst_count)
        if not dst_crc32:
            return dst
        check = zlib.crc32(dst)
        if check != dst_crc32:
            self.log_warn(F'Invalid result data CRC {check:08X}, should be {dst_crc32:08X}.')
        return dst

    def _decompress_modded(self):
        self._src.seekrel(8)
        total_size = self._src.u64()
        chunk_size = self._src.u64()
        remaining = total_size
        self.log_debug(F'total size: 0x{total_size:016X}')
        self.log_debug(F'chunk size: 0x{chunk_size:016X}')
        while remaining > chunk_size:
            self._decompress_chunk(chunk_size)
            remaining -= chunk_size
        return self._decompress_chunk(remaining)

    def _decompress_chunk(self, size=None):
        bitcount = 0
        bitstore = 0
        decompressed = 1

        def readbit():
            nonlocal bitcount, bitstore
            if not bitcount:
                bitstore = int.from_bytes(self._src.read_exactly(2), 'little')
                bitcount = 0xF
            else:
                bitcount = bitcount - 1
            return (bitstore >> bitcount) & 1

        def readint():
            result = 2 + readbit()
            while readbit():
                result <<= 1
                result += readbit()
            return result

        self._dst.write(self._src.read_exactly(1))

        try:
            while not size or decompressed < size:
                if readbit():
                    length = readint() + 2
                    sector = readint() - 2
                    offset = self._src.read_byte() + 1
                    delta = offset + 0x100 * sector
                    available = self._dst.tell()
                    if delta not in range(available + 1):
                        raise RefineryPartialResult(
                            F'Requested rewind by 0x{delta:08X} bytes with only 0x{available:08X} bytes in output buffer.',
                            partial=self._dst.getvalue())
                    quotient, remainder = divmod(length, delta)
                    replay = memoryview(self._dst.getbuffer())
                    replay = bytes(replay[-delta:] if quotient else replay[-delta:length - delta])
                    replay = quotient * replay + replay[:remainder]
                    self._dst.write(replay)
                    decompressed += length
                else:
                    self._dst.write(self._src.read_exactly(1))
                    decompressed += 1
        except EOF as E:
            raise RefineryPartialResult(str(E), partial=self._dst.getbuffer())
        dst = self._dst.getbuffer()
        if decompressed < size:
            raise RefineryPartialResult(
                F'Attempted to decompress {size} bytes, got only {len(dst)}.', dst)
        if decompressed > size:
            raise RuntimeError('Decompressed buffer contained more bytes than expected.')
        return dst

    def _compress(self):
        from refinery.lib.suffixtree import SuffixTree

        try:
            self.log_info('computing suffix tree')
            tree = SuffixTree(self._src.getbuffer())
        except Exception:
            raise

        bitstore = 0  # The bit stream to be written
        bitcount = 0  # The number of bits in the bit stream
        buffer = MemoryFile(bytearray())

        # Write empty header and first byte of source
        self._dst.write(bytearray(24))
        self._dst.write(self._src.read_exactly(1))

        def writeint(n: int) -> None:
            """
            Write an integer to the bit stream.
            """
            nonlocal bitstore, bitcount
            nbits = n.bit_length()
            if nbits < 2:
                raise ValueError
            # The highest bit is implicitly assumed:
            n ^= 1 << (nbits - 1)
            remaining = nbits - 2
            while remaining:
                remaining -= 1
                bitstore <<= 2
                bitcount += 2
                bitstore |= ((n >> remaining) & 3) | 1
            bitstore <<= 2
            bitcount += 2
            bitstore |= (n & 1) << 1

        src = self._src.getbuffer()
        remaining = len(src) - 1
        self.log_info('compressing data')

        while True:
            cursor = len(src) - remaining
            rest = src[cursor:]
            if bitcount >= 0x10:
                block_count, bitcount = divmod(bitcount, 0x10)
                info_channel = bitstore >> bitcount
                bitstore = info_channel << bitcount ^ bitstore
                # The decompressor will read bits from top to bottom, and each 16 bit block has to be
                # little-endian encoded. The bit stream is encoded top to bottom bit in the bitstore
                # variable, and by encoding it as a big endian integer, the stream is in the correct
                # order. However, we need to swap adjacent bytes to achieve little endian encoding for
                # each of the blocks:
                info_channel = bytearray(info_channel.to_bytes(block_count * 2, 'big'))
                for k in range(block_count):
                    k0 = 2 * k + 0
                    k1 = 2 * k + 1
                    info_channel[k0], info_channel[k1] = info_channel[k1], info_channel[k0]
                info_channel = memoryview(info_channel)
                data_channel = memoryview(buffer.getbuffer())
                self._dst.write(info_channel[:2])
                self._dst.write(data_channel[:-1])
                self._dst.write(info_channel[2:])
                data_channel = bytes(data_channel[-1:])
                buffer.truncate(0)
                store = buffer if bitcount else self._dst
                store.write(data_channel)
            if remaining + bitcount < 0x10:
                buffer = buffer.getbuffer()
                if rest or buffer:
                    bitstore <<= 0x10 - bitcount
                    self._dst.write(bitstore.to_bytes(2, 'little'))
                    self._dst.write(buffer)
                    self._dst.write(rest)
                elif bitcount:
                    raise RuntimeError('Bitbuffer Overflow')
                break
            node = tree.root
            length = 0
            offset = 0
            sector = None
            while node.children and length < len(rest):
                for child in node.children.values():
                    if tree.data[child.start] == rest[length]:
                        node = child
                        break
                if node.start >= cursor:
                    break
                offset = node.start - length
                length = node.end + 1 - offset
            length = min(remaining, length)
            if length >= 4:
                sector, offset = divmod(cursor - offset - 1, 0x100)
            bitcount += 1
            bitstore <<= 1
            if sector is None:
                buffer.write(rest[:1])
                remaining -= 1
                continue
            bitstore |= 1
            buffer.write(bytes((offset,)))
            writeint(length - 2)
            writeint(sector + 2)
            remaining -= length

        self._dst.seek(24)
        dst = self._dst.peek()
        self._dst.seek(0)
        self._dst.write(struct.pack('>6L', 0x626C7A1A, 1, len(dst), zlib.crc32(dst), len(src), zlib.crc32(src)))
        return self._dst.getbuffer()

    def process(self, data):
        self._begin(data)
        partial = None
        try:
            return self._decompress()
        except ValueError as error:
            if isinstance(error, RefineryPartialResult):
                partial = error
            self.log_warn(F'Reverting to modified BriefLZ after decompression error: {error!s}')
            self._reset()

        try:
            return self._decompress_modded()
        except RefineryPartialResult:
            raise
        except Exception as error:
            if not partial:
                raise
            raise partial from error

    def reverse(self, data):
        return self._begin(data)._compress()
Esempio n. 26
0
 def _begin(self, data):
     self._src = StructReader(memoryview(data))
     self._dst = MemoryFile(bytearray())
     return self
Esempio n. 27
0
    def decompress_stream(self, data: ByteString, LZOv1: bool = False) -> bytearray:
        """
        An implementation of LZO decompression. We use the article
        "[LZO stream format as understood by Linux's LZO decompressor](https://www.kernel.org/doc/html/latest/staging/lzo.html)"
        as a reference since no proper specification is available.
        """
        def integer() -> int:
            length = 0
            while True:
                byte = src.read_byte()
                if byte:
                    return length + byte
                length += 0xFF
                if length > 0x100000:
                    raise LZOError('Too many zeros in integer encoding.')

        def literal(count):
            dst.write(src.read_bytes(count))

        def copy(distance: int, length: int):
            if distance > len(dst):
                raise LZOError(F'Distance {distance} > bufsize {len(dst)}')
            buffer = dst.getbuffer()
            if distance > length:
                start = len(buffer) - distance
                end = start + length
                dst.write(buffer[start:end])
            else:
                block = buffer[-distance:]
                while len(block) < length:
                    block += block[:length - len(block)]
                if len(block) > length:
                    block[length:] = ()
                dst.write(block)

        src = StructReader(memoryview(data))
        dst = MemoryFile()

        state = 0
        first = src.read_byte()

        if first == 0x10:
            raise LZOError('Invalid first stream byte 0x10.')
        elif first <= 0x12:
            src.seekrel(-1)
        elif first <= 0x15:
            state = first - 0x11
            literal(state)
        else:
            state = 4
            literal(first - 0x11)

        while True:
            instruction = src.read_byte()
            if instruction < 0x10:
                if state == 0:
                    length = instruction or integer() + 15
                    state = length + 3
                    if state < 4:
                        raise LZOError('Literal encoding is too short.')
                else:
                    state = instruction & 0b0011
                    D = (instruction & 0b1100) >> 2
                    H = src.read_byte()
                    distance = (H << 2) + D + 1
                    if state >= 4:
                        distance += 0x800
                        length = 3
                    else:
                        length = 2
                    copy(distance, length)
            elif instruction < 0x20:
                L = instruction & 0b0111
                H = instruction & 0b1000
                length = L or integer() + 7
                argument = src.u16()
                state = argument & 3
                distance = (H << 11) + (argument >> 2)
                if not distance:
                    return dst.getbuffer()
                if LZOv1 and distance & 0x803F == 0x803F and length in range(261, 265):
                    raise LZOError('Compressed data contains sequence that is banned in LZOv1.')
                if LZOv1 and distance == 0xBFFF:
                    X = src.read_byte()
                    count = ((X << 3) | L) + 4
                    self.log_debug(F'Writing run of {X} zero bytes according to LZOv1.')
                    dst.write(B'\0' * count)
                else:
                    copy(distance + 0x4000, length + 2)
            elif instruction < 0x40:
                L = instruction & 0b11111
                length = L or integer() + 31
                argument = src.u16()
                state = argument & 3
                distance = (argument >> 2) + 1
                copy(distance, length + 2)
            else:
                if instruction < 0x80:
                    length = 3 + ((instruction >> 5) & 1)
                else:
                    length = 5 + ((instruction >> 5) & 3)
                H = src.read_byte()
                D = (instruction & 0b11100) >> 2
                state = instruction & 3
                distance = (H << 3) + D + 1
                copy(distance, length)
            if state:
                literal(state)
Esempio n. 28
0
    def __init__(self,
                 reader: StructReader,
                 offset: int,
                 unmarshal: Unmarshal = Unmarshal.No):
        reader.bigendian = True
        reader.seekset(offset)
        self.reader = reader
        signature = reader.read_bytes(8)
        if signature != self.MagicSignature:
            raise ValueError(
                F'offset 0x{offset:X} has invalid signature {signature.hex().upper()}; '
                F'should be {self.MagicSignature.hex().upper()}')
        self.size = reader.i32()
        toc_offset = reader.i32()
        toc_length = reader.i32()
        self.py_version = '.'.join(str(reader.u32()))
        self.py_libname = self._read_libname(reader)
        self.offset = reader.tell() - self.size

        self.toc: Dict[str, PiTOCEntry] = {}
        toc_end = self.offset + toc_offset + toc_length
        reader.seekset(self.offset + toc_offset)
        while reader.tell() < toc_end:
            try:
                entry = PiTOCEntry(reader)
            except EOF:
                xtpyi.logger.warning('end of file while reading TOC')
                break
            except Exception as error:
                xtpyi.logger.warning(
                    F'unexpected error while reading TOC: {error!s}')
                break
            if entry.name in self.toc:
                raise KeyError(F'duplicate name {entry.name}')
            self.toc[entry.name] = entry

        self.files: Dict[str, PiMeta] = {}
        no_pyz_found = True
        pyz_entries: Dict[str, PYZ] = {}

        for entry in list(self.toc.values()):
            if entry.type is not PiType.PYZ:
                continue
            no_pyz_found = False
            name, xt = os.path.splitext(entry.name)
            name_pyz = F'{name}.pyz'
            if name == entry.name:
                del self.toc[name]
                self.toc[name_pyz] = entry
                entry.name = name_pyz
            reader.seekset(self.offset + entry.offset)
            if entry.is_compressed:
                data = self.extract(entry.name).unpack()
            else:
                data = reader
            pyz_entries[name] = PYZ(data, self.py_version)

        magics = {pyz.magic for pyz in pyz_entries.values()}

        if not magics:
            if not no_pyz_found:
                xtpyi.logger.warning(
                    'no magic signature could be recovered from embedded pyzip archives; this is '
                    'unsual and means that there is no way to guess the missing magic for source '
                    'file entries and it will likely not be possible to decompile them.'
                )
            return
        elif len(magics) > 1:
            xtpyi.logger.warning(
                'more than one magic signature was recovered; this is unusual.'
            )

        magics = list(magics)
        keys: Set[bytes] = set()

        for entry in self.toc.values():
            extracted = self.extract(entry.name)
            if entry.type not in (PiType.SOURCE, PiType.MODULE):
                self.files[entry.name] = extracted
                continue
            data = extracted.unpack()
            name, _ = os.path.splitext(extracted.name)
            del self.files[extracted.name]
            extracted.name = F'{name}.pyc'
            self.files[extracted.name] = extracted

            if len(magics) == 1 and data[:4] != magics[0]:
                extracted.data = magics[0] + data
            decompiled = make_decompiled_item(name, data, *magics)
            if entry.type is PiType.SOURCE:
                decompiled.type = PiType.USERCODE
            self.files[F'{name}.py'] = decompiled
            if name.endswith('crypto_key'):
                for key in decompiled.unpack() | carve('string', decode=True):
                    if len(key) != 0x10:
                        continue
                    xtpyi.logger.info(F'found key: {key.decode(xtpyi.codec)}')
                    keys.add(key)

        if unmarshal is Unmarshal.No:
            return

        if not keys:
            key = None
        else:
            key = next(iter(keys))

        for name, pyz in pyz_entries.items():
            pyz.unpack(unmarshal is Unmarshal.YesAndDecompile, key)
            for unpacked in pyz.entries:
                unpacked.name = path = F'{name}/{unpacked.name}'
                if path in self.files:
                    raise ValueError(F'duplicate file name: {path}')
                self.files[path] = unpacked
Esempio n. 29
0
    def __init__(self, reader: StructReader, calculate_checks=False):
        if reader.read(4) != b'dex\n':
            raise ValueError('Invalid Signature')
        with StreamDetour(reader, 0x28):
            endian_test_data = reader.u32()
            if endian_test_data == 0x78563412:
                reader.bigendian = True
        self.version = reader.read(4).rstrip(b'\0')

        self.checksum = reader.u32()
        if calculate_checks:
            with StreamDetour(reader):
                self.calculated_checksum = zlib.adler32(reader.read())
        else:
            self.calculated_checksum = None

        self.signature = reader.read(20)
        if calculate_checks:
            with StreamDetour(reader):
                self.calculated_signature = hashlib.sha1(
                    reader.read()).digest()
        else:
            self.calculated_signature = None

        self.size_of_file = reader.u32()
        self.size_of_header = reader.u32()

        if reader.u32() != 0x12345678:
            raise ValueError('Invalid Endian Tag')

        self.link_size = reader.u32()
        self.link_offset = reader.u32()
        self.map_offset = reader.u32()

        self.strings: List[str] = list(
            self._read_strings(reader, reader.u32(), reader.u32()))
Esempio n. 30
0
    def process(self, data):
        mode: MODE = self.args.mode
        with StructReader(memoryview(data)) as reader, MemoryFile() as writer:
            reader: StructReader[memoryview]
            check = zlib.crc32(reader.peek(6))
            magic = reader.read(4)
            if magic != self._SIGNATURE:
                if mode is None:
                    self.log_warn(
                        F'data starts with {magic.hex().upper()} rather than the expected sequence '
                        F'{self._SIGNATURE.hex().upper()}; this could be a raw stream.'
                    )
                else:
                    reader.seek(0)
                    handler = self._get_handler(mode)
                    handler(reader, writer, None)
                    return writer.getbuffer()

            header_size = reader.u16()
            if header_size != 24:
                self.log_warn(
                    F'the header size {header_size} was not equal to 24')

            crc32byte = reader.u8()
            check = zlib.crc32(reader.peek(0x11), check) & 0xFF
            if check != crc32byte:
                self.log_warn(
                    F'the CRC32 check byte was {crc32byte}, computed value was {check}'
                )

            _mode_code = reader.u8()

            try:
                _mode = MODE(_mode_code)
            except ValueError:
                msg = F'header contains unknown compression type code {_mode_code}'
                if mode is None:
                    raise ValueError(msg)
                else:
                    self.log_warn(msg)
            else:
                if mode is not None and mode != _mode:
                    logger = self.log_warn
                else:
                    logger = self.log_info
                    mode = _mode
                logger(F'header specifies algorithm {_mode.name}')

            self.log_info(F'using algorithm {mode.name}')
            decompress = self._get_handler(mode)

            final_size = reader.u32()
            _unknown_1 = reader.u32()
            chunk_size = reader.u32()
            _unknown_2 = reader.u32()

            if _unknown_1 != 0:
                self.log_warn(
                    F'unknown value 1 was unexpectedly nonzero: 0x{_unknown_1:08X}'
                )
            if _unknown_2 != 0:
                self.log_warn(
                    F'unknown value 2 was unexpectedly nonzero: 0x{_unknown_2:08X}'
                )

            self.log_debug(F'final size: 0x{final_size:08X}')
            self.log_debug(F'chunk size: 0x{chunk_size:08X}')

            if chunk_size > COMPRESS_MAX_CHUNK:
                raise ValueError(
                    'the header chunk size is greater than the maximum value')

            while len(writer) < final_size:
                src_size = reader.u32()
                src_data = reader.read(src_size)
                if len(src_data) != src_size:
                    raise IndexError(
                        F'Attempted to read {src_size} bytes, but got only {len(src_data)}.'
                    )
                if src_size + len(writer) == final_size:
                    self.log_debug(
                        F'final chunk is uncompressed, appending {src_size} raw bytes to output'
                    )
                    writer.write(src_data)
                    break
                self.log_debug(F'reading chunk of size {src_size}')
                start = writer.tell()
                chunk = StructReader(src_data)
                target = min(chunk_size, final_size - len(writer))
                decompress(chunk, writer, target)
                writer.flush()
                written = writer.tell() - start
                if written != target:
                    raise RuntimeError(
                        F'decompressed output had unexpected size {written} instead of {chunk_size}'
                    )

            if not reader.eof:
                self.log_info(
                    F'compression complete with {reader.remaining_bytes} bytes remaining in input'
                )
            return writer.getbuffer()