Beispiel #1
0
 def __init__(self, reader: StructReader):
     reader.bigendian = True
     entry_start_offset = reader.tell()
     self.size_of_entry = reader.i32()
     self.offset = reader.i32()
     self.size_of_compressed_data = reader.i32()
     self.size_od_uncompressed_data = reader.i32()
     self.is_compressed = bool(reader.read_byte())
     entry_type = bytes(reader.read(1))
     name_length = self.size_of_entry - reader.tell() + entry_start_offset
     if name_length > 0x1000:
         raise RuntimeError(
             F'Refusing to process TOC entry with name of size {name_length}.'
         )
     name, *_ = bytes(reader.read(name_length)).partition(B'\0')
     try:
         name = name.decode('utf8', 'backslashreplace')
     except Exception:
         name = None
     if not all(part.isprintable() for part in re.split('\\s*', name)):
         raise RuntimeError(
             'Refusing to process TOC entry with non-printable name.')
     name = name or str(uuid.uuid4())
     if entry_type == B'Z':
         entry_type = B'z'
     try:
         self.type = PiType(entry_type)
     except ValueError:
         xtpyi.logger.error(F'unknown type {entry_type!r} in field {name}')
         self.type = PiType.UNKNOWN
     self.name = name
Beispiel #2
0
 def __init__(self, reader: StructReader):
     with StreamDetour(reader):
         self.code = opc(reader.read_byte())
         self.table: Optional[Dict[int, int]] = None
         try:
             fmt = self.OPC_ARGMAP[self.code]
         except KeyError:
             self.arguments = []
         else:
             self.arguments = list(reader.read_struct(fmt))
         if self.code == opc.newarray:
             self.arguments = [JvBaseType(reader.read_byte())]
         elif self.code in self.OPC_CONSTPOOL:
             try:
                 self.arguments[0] = self.pool[self.arguments[0] - 1]
             except (AttributeError, IndexError):
                 pass
         elif self.code == opc.lookupswitch:
             reader.byte_align(blocksize=4)
             default, npairs = reader.read_struct('LL')
             pairs = reader.read_struct(F'{npairs*2}L')
             self.table = dict(zip(*([iter(pairs)] * 2)))
             self.table[None] = default
         elif self.code == opc.tableswitch:
             reader.byte_align(blocksize=4)
             default, low, high = reader.read_struct('LLL')
             assert low <= high
             offsets = reader.read_struct(F'{high-low+1}L')
             self.table = {k + low: offset for k, offset in enumerate(offsets)}
             self.table[None] = default
         elif self.code == opc.wide:
             argop = opc(reader.get_byte())
             self.arguments = (argop, reader.u16())
             if argop == opc.iinc:
                 self.arguments += reader.i16(),
             else:
                 assert argop in (
                     opc.iload, opc.istore,
                     opc.fload, opc.fstore,
                     opc.aload, opc.astore,
                     opc.lload, opc.lstore,
                     opc.dload, opc.dstore,
                     opc.ret)
         offset = reader.tell()
     self.raw = bytes(reader.read(offset - reader.tell()))
Beispiel #3
0
 def _read_libname(self, reader: StructReader) -> Optional[str]:
     position = reader.tell()
     try:
         libname, t, rest = reader.read_bytes(64).partition(B'\0')
     except EOF:
         reader.seekset(position)
         return None
     try:
         libname = libname.decode('utf8')
     except Exception:
         reader.seekset(position)
         return None
     if not t or any(rest) or len(rest) < 10 or not re.fullmatch(
             R'[\s!-~]+', libname):
         reader.seekset(position)
         return None
     return libname
Beispiel #4
0
 def __init__(self, reader: StructReader, version: str):
     reader.bigendian = True
     self.base = reader.tell()
     signature = reader.read(4)
     if signature != self.MagicSignature:
         raise ValueError('invalid magic')
     magic = bytes(reader.read(4))
     with contextlib.suppress(KeyError):
         version = xtpyi._xdis.magics.versions[magic]
     vtuple = version2tuple(version)
     padding_size = 4
     if vtuple >= (3, 3):
         padding_size += 4
     if vtuple >= (3, 7):
         padding_size += 4
     self.version = version
     self.magic = magic + padding_size * b'\0'
     self.toc_offset = reader.i32()
     self.reader = reader
     self.entries: List[PiMeta] = []
Beispiel #5
0
    def __init__(self,
                 reader: StructReader,
                 offset: int,
                 unmarshal: Unmarshal = Unmarshal.No):
        reader.bigendian = True
        reader.seekset(offset)
        self.reader = reader
        signature = reader.read_bytes(8)
        if signature != self.MagicSignature:
            raise ValueError(
                F'offset 0x{offset:X} has invalid signature {signature.hex().upper()}; '
                F'should be {self.MagicSignature.hex().upper()}')
        self.size = reader.i32()
        toc_offset = reader.i32()
        toc_length = reader.i32()
        self.py_version = '.'.join(str(reader.u32()))
        self.py_libname = self._read_libname(reader)
        self.offset = reader.tell() - self.size

        self.toc: Dict[str, PiTOCEntry] = {}
        toc_end = self.offset + toc_offset + toc_length
        reader.seekset(self.offset + toc_offset)
        while reader.tell() < toc_end:
            try:
                entry = PiTOCEntry(reader)
            except EOF:
                xtpyi.logger.warning('end of file while reading TOC')
                break
            except Exception as error:
                xtpyi.logger.warning(
                    F'unexpected error while reading TOC: {error!s}')
                break
            if entry.name in self.toc:
                raise KeyError(F'duplicate name {entry.name}')
            self.toc[entry.name] = entry

        self.files: Dict[str, PiMeta] = {}
        no_pyz_found = True
        pyz_entries: Dict[str, PYZ] = {}

        for entry in list(self.toc.values()):
            if entry.type is not PiType.PYZ:
                continue
            no_pyz_found = False
            name, xt = os.path.splitext(entry.name)
            name_pyz = F'{name}.pyz'
            if name == entry.name:
                del self.toc[name]
                self.toc[name_pyz] = entry
                entry.name = name_pyz
            reader.seekset(self.offset + entry.offset)
            if entry.is_compressed:
                data = self.extract(entry.name).unpack()
            else:
                data = reader
            pyz_entries[name] = PYZ(data, self.py_version)

        magics = {pyz.magic for pyz in pyz_entries.values()}

        if not magics:
            if not no_pyz_found:
                xtpyi.logger.warning(
                    'no magic signature could be recovered from embedded pyzip archives; this is '
                    'unsual and means that there is no way to guess the missing magic for source '
                    'file entries and it will likely not be possible to decompile them.'
                )
            return
        elif len(magics) > 1:
            xtpyi.logger.warning(
                'more than one magic signature was recovered; this is unusual.'
            )

        magics = list(magics)
        keys: Set[bytes] = set()

        for entry in self.toc.values():
            extracted = self.extract(entry.name)
            if entry.type not in (PiType.SOURCE, PiType.MODULE):
                self.files[entry.name] = extracted
                continue
            data = extracted.unpack()
            name, _ = os.path.splitext(extracted.name)
            del self.files[extracted.name]
            extracted.name = F'{name}.pyc'
            self.files[extracted.name] = extracted

            if len(magics) == 1 and data[:4] != magics[0]:
                extracted.data = magics[0] + data
            decompiled = make_decompiled_item(name, data, *magics)
            if entry.type is PiType.SOURCE:
                decompiled.type = PiType.USERCODE
            self.files[F'{name}.py'] = decompiled
            if name.endswith('crypto_key'):
                for key in decompiled.unpack() | carve('string', decode=True):
                    if len(key) != 0x10:
                        continue
                    xtpyi.logger.info(F'found key: {key.decode(xtpyi.codec)}')
                    keys.add(key)

        if unmarshal is Unmarshal.No:
            return

        if not keys:
            key = None
        else:
            key = next(iter(keys))

        for name, pyz in pyz_entries.items():
            pyz.unpack(unmarshal is Unmarshal.YesAndDecompile, key)
            for unpacked in pyz.entries:
                unpacked.name = path = F'{name}/{unpacked.name}'
                if path in self.files:
                    raise ValueError(F'duplicate file name: {path}')
                self.files[path] = unpacked
Beispiel #6
0
    def process(self, data: bytearray):
        formatter = string.Formatter()
        until = self.args.until
        until = until and PythonExpression(until, all_variables_allowed=True)
        reader = StructReader(memoryview(data))
        mainspec = self.args.spec
        byteorder = mainspec[:1]
        if byteorder in '<!=@>':
            mainspec = mainspec[1:]
        else:
            byteorder = '='

        def fixorder(spec):
            if spec[0] not in '<!=@>':
                spec = byteorder + spec
            return spec

        it = itertools.count() if self.args.multi else (0, )
        for index in it:

            if reader.eof:
                break
            if index >= self.args.count:
                break

            meta = metavars(data, ghost=True)
            meta['index'] = index
            args = []
            last = None
            checkpoint = reader.tell()

            try:
                for prefix, name, spec, conversion in formatter.parse(
                        mainspec):
                    if prefix:
                        args.extend(reader.read_struct(fixorder(prefix)))
                    if name is None:
                        continue
                    if conversion:
                        reader.byte_align(
                            PythonExpression.evaluate(conversion, meta))
                    if spec:
                        spec = meta.format_str(spec, self.codec, args)
                    if spec != '':
                        try:
                            spec = PythonExpression.evaluate(spec, meta)
                        except ParserError:
                            pass
                    if spec == '':
                        last = value = reader.read()
                    elif isinstance(spec, int):
                        last = value = reader.read_bytes(spec)
                    else:
                        value = reader.read_struct(fixorder(spec))
                        if not value:
                            self.log_warn(F'field {name} was empty, ignoring.')
                            continue
                        if len(value) > 1:
                            self.log_info(
                                F'parsing field {name} produced {len(value)} items reading a tuple'
                            )
                        else:
                            value = value[0]

                    args.append(value)

                    if name == _SHARP:
                        raise ValueError(
                            'Extracting a field with name # is forbidden.')
                    elif name.isdecimal():
                        index = int(name)
                        limit = len(args) - 1
                        if index > limit:
                            self.log_warn(
                                F'cannot assign index field {name}, the highest index is {limit}'
                            )
                        else:
                            args[index] = value
                        continue
                    elif name:
                        meta[name] = value

                if until and not until(meta):
                    self.log_info(
                        F'the expression ({until}) evaluated to zero; aborting.'
                    )
                    break

                with StreamDetour(reader, checkpoint) as detour:
                    full = reader.read(detour.cursor - checkpoint)
                if last is None:
                    last = full

                outputs = []

                for template in self.args.outputs:
                    used = set()
                    outputs.append(
                        meta.format(template,
                                    self.codec, [full, *args], {_SHARP: last},
                                    True,
                                    used=used))
                    for key in used:
                        meta.pop(key, None)

                for output in outputs:
                    chunk = self.labelled(output, **meta)
                    chunk.set_next_batch(index)
                    yield chunk

            except EOF:
                leftover = repr(SizeInt(len(reader) - checkpoint)).strip()
                self.log_info(F'discarding {leftover} left in buffer')
                break