Esempio n. 1
0
    def decrypt_file(self, data) -> Union[bytes, None]:
        CRYPT_BLOCK_SIZE = 16
        initialization_vector = data[:CRYPT_BLOCK_SIZE]

        if not self.encryption_key:
            while self.potential_keys:
                encryption_key = self.potential_keys.pop(0)
                try:
                    cipher: AES.AESCipher = AES.new(encryption_key.encode(),
                                                    AES.MODE_CFB,
                                                    initialization_vector)
                    decrypted_data = cipher.decrypt(
                        data[CRYPT_BLOCK_SIZE:]
                    )  # will silently fail if password is wrong
                    _ = zlib.decompress(
                        decrypted_data)  # ensures the password is correct
                except zlib.error as e:
                    logger.debug(
                        f"[!] Decryption of .pyc failed with password {encryption_key}. Discarding key."
                    )
                else:
                    self.encryption_key = encryption_key
                    logger.debug(
                        f"[!] Verified ZlibArchive password is {self.encryption_key}."
                    )
                    return decrypted_data
        else:
            try:
                cipher: AES.AESCipher = AES.new(self.encryption_key.encode(),
                                                AES.MODE_CFB,
                                                initialization_vector)
                return cipher.decrypt(data[CRYPT_BLOCK_SIZE:])
            except zlib.error as e:
                logger.error(f"[!] Failed to decrypt .pyc with error: {e}")
                return None
Esempio n. 2
0
    def check_for_password_file(self):
        self.potential_keys = []
        if hasattr(self, "archive_path"):
            dir_of_pyz = self.archive_path.parent
        else:
            dir_of_pyz = Path.cwd()

        key_file = dir_of_pyz / "pyimod00_crypto_key.pyc"
        if key_file.exists():
            self.encrypted = True
            logger.debug(
                f"[+] Found ZlibArchive encryption key file at path {key_file}"
            )
            crypto_key_filename: str  # full path of
            try:
                (
                    crypto_key_filename,
                    crypto_key_co,
                    crypto_key_python_version,
                    crypto_key_compilation_timestamp,
                    crypto_key_magic_int,
                    crypto_key_is_pypy,
                    crypto_key_source_size,
                    crypto_key_sip_hash,
                ) = disassemble_file(str(key_file),
                                     outstream=open(os.devnull, "w"))
            except Exception as e:
                logger.warning(
                    f"[!] Could not disassemble file {key_file}. Received error: {e}"
                )
            else:
                self.compilation_time = datetime.fromtimestamp(
                    crypto_key_compilation_timestamp)
                for const_string in crypto_key_co.co_consts:
                    if const_string and len(const_string) == 16:
                        self.potential_keys.append(const_string)
            # If we couldn't decompile the file to see the consts, lets just search the raw bytes of the file
            # for the password
            if not self.potential_keys:
                with key_file.open("rb") as file_ptr:
                    file_strings = utils.parse_for_strings(file_ptr.read())
                s: str
                for s in file_strings:
                    if len(s) >= 16 and "pyimod00_crypto_key" not in s:
                        while len(s) >= 16:
                            self.potential_keys.append(s[0:16])
                            s = s[1:]

            logger.info(
                f"[*] Found these potential PyInstaller PYZ Archive encryption keys: {self.potential_keys}"
            )

            if not self.potential_keys:
                logger.error(
                    f"[*] Encryption key file detected, however no password was able to be retrieved."
                )
Esempio n. 3
0
def megafile_remap(
    reference_megafile: pathlib.Path, remapped_bytecode_path: pathlib.Path
) -> Tuple[Dict[int, Dict[int, int]], str]:
    """Calculate the remapped opcodes and version of a megafile.

    This takes in the standard-compiled version of the megafile, as well as
    the custom-interpreter version. It returns the Python version and the
    dictionary of opcodes to possible remapped opcodes.

    Parameters
    ----------
    reference_megafile: pathlib.Path
        The standard-compiled version of the megafile.
    remapped_bytecode_path: pathlib.Path
        The custom-interpreter version of the megafile.

    Returns
    -------
     Tuple[Dict[int, Dict[int, int]], str]
        A tuple containing a dictionary of original_opcode to
        Dict[replacement_opcode:replacement_count] and the opmap's Python
        version. replacement_opcode is an opcode that was seen in place of
        original_opcode, and the replacement_count is the amount of times it was
        seen replacing the original_opcode throughout all the bytecode that was
        analyzed.
    """
    reference_filename: str
    reference_co: CodeType  # can also be xdis codetypes
    reference_version: float
    reference_timestamp: int
    reference_magic_int: int
    reference_is_pypy: bool
    reference_source_size: int
    reference_sip_hash: str
    (
        reference_filename,
        reference_co,
        reference_version,
        reference_timestamp,
        reference_magic_int,
        reference_is_pypy,
        reference_source_size,
        reference_sip_hash,
    ) = xdis.disasm.disassemble_file(str(reference_megafile),
                                     outstream=open(os.devnull, "w"))

    fixed_megafile_file: pathlib.Path
    if fixed_megafile_file := artifact_types.pyc.Pyc.check_and_fix_pyc(
            remapped_bytecode_path, provided_version=str(reference_version)):
        logger.error(
            f"[+] Duplicated megafile file {str(remapped_bytecode_path)} to correct issues with the pyc. New filepath:"
            f" {fixed_megafile_file.name}")
        remapped_bytecode_path = fixed_megafile_file.name
Esempio n. 4
0
def opcode_constants_remap(
        opcode_file: pathlib.Path,
        provided_version: str = None) -> Tuple[Dict[int, Dict[int, int]], str]:
    """Parse code object constants to try and recreate opcode mappings.

    This method walks the constants attribute of the opcode.pyc code object.
    See the remap documentation for more information on this method.

    Parameters
    ----------
    opcode_file: pathlib.Path
        The path on disk to the opcode.pyc file.
    provided_version: str, optional
        The version of Python that this opcode file corresponds to.

    Returns
    -------
     Tuple[Dict[int, Dict[int, int]], str]
        A tuple containing a dictionary of original_opcode to
        Dict[replacement_opcode:replacement_count] and the opmap's Python
        version. replacement_opcode is an opcode that was seen in place of
        original_opcode, and the replacement_count is the amount of times it was
        seen replacing the original_opcode throughout all the bytecode that was
        analyzed.
    """
    def get_nearest_opcode(opname: str, unused_opcodes: List[int],
                           version: str) -> int:
        xdis_opcode: ModuleType
        try:
            xdis_opcode = xdis.disasm.get_opcode(version, is_pypy=False)
            actual_opcode = getattr(xdis_opcode, opname)
        except Exception:
            return unused_opcodes[0]

        smallest_distance: int = 999
        closest_opcode: int = -1
        for opcode in unused_opcodes:
            if abs(actual_opcode - opcode) < smallest_distance:
                closest_opcode = opcode
                smallest_distance = abs(actual_opcode - opcode)
        return closest_opcode

    logger.debug(
        f"[*] Checking opcode.pyc file at {str(opcode_file)} to determine if opcode map is normal."
    )
    fixed_pyc_file: tempfile.NamedTemporaryFile
    if fixed_pyc_file := artifact_types.pyc.Pyc.check_and_fix_pyc(
            opcode_file, provided_version=provided_version):
        logger.error(
            f"[+] Duplicated opcode file {str(opcode_file)} to correct issues with the pyc. New filepath:"
            f" {fixed_pyc_file.name}")
        opcode_file = fixed_pyc_file.name
Esempio n. 5
0
def check_for_our_xdis() -> None:
    """Check that the pydecipher fork of xdis is installed.

    Exits if its not.
    """
    if hasattr(xdis.op_imports, "remap_opcodes"):
        logger.debug("[*] Custom version of xdis detected. All clear to proceed.")
    else:
        logger.error(
            "[!] It seems that the public/normal version of xdis has been installed. Please see the documentation"
            "on how to download the pydecipher-customized fork of xdis."
        )
        sys.exit(1)
Esempio n. 6
0
    def disassemble_and_dump(self, brute_force: bool = False):
        code_bytes = self.resource_contents[self.marshalled_obj_start_idx:]
        hijacked_stderr = io.StringIO()
        with redirect_stderr(hijacked_stderr):
            try:  # TODO make this more specific error catching
                code_objects = load_code(code_bytes, self.magic_num)
                if not isinstance(code_objects, list):
                    # TODO make this a non-generic error
                    raise RuntimeError(
                        "Py2Exe should return a marshalled list of code objects"
                    )
                if not all(code_objects):
                    raise RuntimeError("NoneType code objects returned")
            except Exception:
                logger.debug(
                    f"[!] Failed to produce disassembly of bytecode with magic num {self.magic_num} "
                    f"(Python version {magicint2version[self.magic_num]})")
                self.magic_num = -1
                return
            else:
                logger.info(
                    f"[+] Successfully disassembled bytecode with magic number {self.magic_num}, "
                    f"corresponding to Python version {magicint2version[self.magic_num]}"
                )

        for co in code_objects:
            new_filename: str = self._clean_filename(co.co_filename)
            self.output_dir.mkdir(parents=True, exist_ok=True)
            if brute_force:
                bytecode_filepath: str = self.output_dir / magicint2version[
                    self.magic_num] / new_filename
                bytecode_filepath.parent.mkdir(exist_ok=True)
            else:
                bytecode_filepath: str = str(
                    self.output_dir.joinpath(new_filename))

            try:
                xdis.load.write_bytecode_file(bytecode_filepath, co,
                                              self.magic_num)
            except Exception as e:
                logger.error(
                    f"[!] Could not write file {bytecode_filepath.name} with error: {e}"
                )
            else:
                logger.info(
                    f"[+] Successfully wrote file {new_filename} to {self.output_dir}"
                )
Esempio n. 7
0
    def unpack(self) -> None:
        """Recursively search this artifact for frozen Python artifacts."""
        zip_bytes: io.BytesIO
        with io.BytesIO(self.archive_contents) as zip_bytes:
            self.output_dir.mkdir(parents=True, exist_ok=True)
            try:
                f: zipfile.PyZipFile = zipfile.PyZipFile(
                    zip_bytes, "r", zipfile.ZIP_DEFLATED)
                f.extractall(self.output_dir)
            except (zipfile.BadZipfile, zlib.error):
                pass
            else:
                seen_errors: List[str] = []
                list_of_files: List[os.PathLike] = []
                for (dirpath, dirnames, filenames) in os.walk(self.output_dir):
                    for filename in filenames:
                        full_path: pathlib.Path = Path(dirpath).joinpath(
                            filename)
                        list_of_files.append(full_path)

                logger.info(
                    f"[*] Unpacking {len(list_of_files)} files found in this zip file..."
                )
                fp: pathlib.Path
                for fp in list_of_files:
                    try:
                        pydecipher.unpack(fp, **self.kwargs)
                    except RuntimeError as e:
                        if str(e) and str(e) not in seen_errors:
                            seen_errors.append(str(e))

                if seen_errors:
                    logger.error(
                        f"[!] The following {len(seen_errors)} errors were encountered during the unpacking"
                        " of this zip file.")
                    err: str
                    for err in seen_errors:
                        logger.error(err)
Esempio n. 8
0
def run(_args: List[str] = None) -> None:
    """Orchestrate the flow of the remap command.

    This is the entry-point of the remap command. It calls out to other routines
    and attempts to follow this high-level flow:

        1.  Check that program is running in sufficiently new Python
            environment, and parse any arguments
        2.  Determine what type of input was passed to program, which will
            ultimately decide what method remap uses to recover the opmap.
        3.  Attempt one of the opmap recovery methods (see documentation for
            more on these methods)
        4.  If the opmap was successfully recovered, validate it, then write
            it to a file.

    Parameters
    ----------
    _args : List[str]
        If this function is being called from other Python code, remap
        flags and other command-line options can be passed in as a list.
    """
    if sys.version_info < (3, 8):
        logger.critical(
            "[!] This tool can only be run in Python 3.8 or later.")
        sys.exit(1)
    utils.check_for_our_xdis()

    args: argparse.Namespace = _parse_args(_args)

    logging_options: Dict[str, Union[bool, os.PathLike]] = {
        "verbose": args.verbose,
        "quiet": args.quiet
    }
    pydecipher.set_logging_options(**logging_options)

    remapped_bytecode_path: pathlib.Path = pathlib.Path(
        args.remapped_bytecode_path).resolve()

    if args.output:
        output_dir: pathlib.Path = pathlib.Path(args.output.strip()).resolve()
    else:
        output_dir: pathlib.Path = pathlib.Path.cwd()
    output_dir = output_dir / f"remap_output_{utils.slugify(remapped_bytecode_path.name)}"

    # The following block sets up logging to a stringIO stream, which will
    # eventually be placed in a file. We don't immediately log to a file because
    # we don't want to leave a log file on disk unless the program succeeds.
    log_stream: io.StringIO = io.StringIO()
    log_stream__handler: logging.StreamHandler = logging.StreamHandler(
        log_stream)
    log_stream__handler.setFormatter(pydecipher.log_format)
    log_stream__handler.setLevel(logging.DEBUG)
    logger.addHandler(log_stream__handler)

    remappings: Dict[int, Dict[int, int]] = {}
    version: str = ""
    remapping_method: str = ""
    cli: str = " ".join(sys.argv) if not _args else " ".join(_args)
    if args.version:
        version = args.version
    if args.megafile:
        # Determine if argument is a version or a path
        if pathlib.Path(args.megafile).exists():
            standard_bytecode_path: pathlib.Path = pathlib.Path(args.megafile)
        else:
            potential_version: str = args.megafile
            magic_num: int = bytecode.version_str_to_magic_num_int(
                potential_version)
            if magic_num:
                compiled_file: str
                for compiled_file in os.listdir(
                        pathlib.Path(__file__).parent / "reference_files" /
                        "compiled"):
                    full_path_obj: pathlib.Path = (
                        pathlib.Path(__file__).parent / "reference_files" /
                        "compiled" / compiled_file)
                    infile: BinaryIO
                    with full_path_obj.open("rb") as infile:
                        if xdis.magics.magic2int(infile.read(4)) == magic_num:
                            logger.info(
                                f"[*] Found matching megafile for version {potential_version}"
                            )
                            standard_bytecode_path: pathlib.Path = full_path_obj
                            break
            if not standard_bytecode_path:
                logger.error(
                    "[!] Something went wrong. remap could not find a standard compiled version of this megafile."
                )  # Next, find the path of the reference file
                sys.exit(1)
        remappings, version = megafile_remap(standard_bytecode_path,
                                             remapped_bytecode_path)
        remapping_method = "Megafile"
    elif args.opcode_file:
        remappings, version = opcode_constants_remap(remapped_bytecode_path,
                                                     provided_version=version)
        remapping_method = "opcode.pyc constants-walking"
    elif args.standard_bytecode_path:
        standard_bytecode_path: pathlib.Path = pathlib.Path(
            args.standard_bytecode_path).resolve()
        utils.check_read_access(standard_bytecode_path)
        utils.check_read_access(remapped_bytecode_path)
        utils.check_write_access(output_dir)
        if not remapped_bytecode_path.is_dir():
            raise ValueError(
                "The standard/default remapping method requires a directory containing Python bytecode files"
            )
        if not standard_bytecode_path.is_dir():
            raise ValueError(
                "If you are going to provide your own reference opcode set, it must be a directory of "
                "Python bytecode files")
        remappings, version = standard_pyc_remap(standard_bytecode_path,
                                                 remapped_bytecode_path,
                                                 version=version)
        remapping_method = "Diff'ing against standard library bytecode"
    elif args.check_remapping:
        # Here, remapped_bytecode_path is not actually bytecode, its a remapping
        # file.
        utils.check_read_access(remapped_bytecode_path)
        remapping_file: TextIO
        with remapped_bytecode_path.open() as remapping_file:
            try:
                remapping_json: Dict["str", Union[str, int]] = json.loads(
                    remapping_file.read())
            except json.decoder.JSONDecodeError as e:
                e: json.decoder.JSONDecodeError
                logger.error(f"Could not read remapping file with error: {e}")
                sys.exit(1)
            version = remapping_json["python_version"]
            remappings_list: Dict[str, Union[
                bool, str, int]] = remapping_json["remapped_opcodes"]
            remapping_dict: Dict[str, int] = {
                d["opname"]: d["remapped_value"]
                for d in remappings_list
            }
            if bytecode.validate_opmap(version, remapping_dict):
                logger.info("[*] This opmap is valid.")
                return
            else:
                msg: str = "This opmap is not valid."
                if not logging_options["verbose"]:
                    msg += " Run with --verbose flag for more information."
                logger.warning(f"[!] {msg}")
                sys.exit(1)

    if remappings:
        remappings: Dict[int, int] = fix_remapping_conflicts(remappings)
        remappings: Dict[int,
                         Tuple[int,
                               bool]] = fill_opmap_gaps(remappings, version)
        output_file_path: pathlib.Path = write_remapping_file(
            remappings, version, remapping_method, cli, output_dir=output_dir)
        logger.info(
            f"[*] Remapping file {output_file_path.name} written to {output_file_path.parent}."
        )

        # If we successfully produced the remapping file, we want to also
        # include the logged output of remap.
        log_name: str = datetime.datetime.now().strftime(
            "log_%H_%M_%S_%b_%d_%Y.txt")
        log_file_ptr: TextIO
        with output_dir.joinpath(log_name).open("w") as log_file_ptr:
            log_file_ptr.write(log_stream.getvalue())
        logging_options: Dict[str, Union[bool, os.PathLike]] = {
            "log_path": output_dir.joinpath(log_name)
        }
        pydecipher.set_logging_options(**logging_options)
    else:
        logger.warning(
            "[!] Remap couldn't produce the new opmap. Run with --verbose for more information."
        )
        sys.exit(1)
Esempio n. 9
0
    version: float
    timestamp: int  # seconds since epoch
    magic_int: int
    is_pypy: bool
    source_size: int
    sip_hash: str
    try:
        (filename, co, version, timestamp, magic_int, is_pypy, source_size,
         sip_hash) = xdis.disasm.disassemble_file(str(opcode_file),
                                                  header=True,
                                                  outstream=open(
                                                      os.devnull, "w"))
    except Exception as e:
        e: Exception
        logger.error(
            f"[!] Couldn't disassemble opcode file {opcode_file} with error: {e}"
        )
        raise e

    built_opmap: Dict[str, int] = {}

    unused_opnames: List[str] = [
    ]  # opnames seen in the co_consts list not next to an integer
    unused_opcodes: List[int] = [
    ]  # opcodes seen in the co_consts list not after a string (opname)
    ignore_list: List[Union[str, int]] = [
        "HAVE_ARGUMENT",
        "BAD",
        256,
    ]  # known constants that are not part of the opmap yet appear in the co_consts attribute
Esempio n. 10
0
def decompile_pyc(
    arg_tuple: Tuple[pathlib.Path, Dict[str, int],
                     Dict[str, Union[bool, os.PathLike]]]
) -> str:
    """Decompile a single Python bytecode file.

    Parameters
    ----------
    arg_tuple: Tuple[pathlib.Path, Dict[str, int], Dict[str, Union[bool, os.PathLike]]]
        A tuple containing the arguments for this function. This is a tuple because pebble's
        Pool.map() function couldn't pass multiple arguments to a subprocessed function call.
        The tuple entries correspond to the following arguments:

            pyc_file : pathlib.Path
                The path to the compiled Python file
            alternate_opmap : Dict[str, int], optional
                If this bytecode file was produced by an interpreter with remapped
                opcodes, you must provide the opmap as a OPNAME: OPCODE dictionary
            logging_options: Dict[str, Union[bool, os.PathLike], optional
                A dictionary of logging options. This is only needed when pydecipher is
                performing multi-processed decompilation. The keys can be the following
                strings:

                    verbose: bool
                        True will enable verbose logging.
                    quiet: bool
                        True will silence all console logging.
                    log_path: pathlib.Path
                        If a path object is passed in as the log_path, the running
                        instance of pydecipher will continue logging to that file.

    Returns
    -------
    str
        There are several different return values:

            * **no_action**: This file was not decompiled.
            * **success**: This file was successfully decompiled.
            * **error**: This file could not be decompiled 100% successfully.
            * **opcode_error**: The error message returned by uncompyle6
              indicates this file may have remapped opcodes
    """
    pyc_file: pathlib.Path = arg_tuple[0]
    alternate_opmap: Dict[str, int] = arg_tuple[1] or None
    logging_options: Dict[str, Union[bool, os.PathLike]] = arg_tuple[2] or None

    if not pyc_file.is_file():
        return "no_action"

    # Because this function runs in a new pydecipher process entirely, logging
    # options set during runtime (from command-line flags) do not carry over
    # automatically. We must pass these through manually, and reset the options
    # for this specific process.
    if logging_options and not pydecipher.log_path:
        pydecipher.set_logging_options(**logging_options)

    hijacked_stdout: io.StringIO = io.StringIO()
    hijacked_stderr: io.StringIO = io.StringIO()
    with redirect_stdout(hijacked_stdout), redirect_stderr(hijacked_stderr):
        # Chop off c in pyc
        new_file_name: pathlib.Path = pathlib.Path(
            str(pyc_file.resolve())[:-1])

        # This prohibits the overwriting of existing files.
        # if new_file_name.exists() and new_file_name.stat().st_size:
        #     return "no_action"

        logger.debug(
            f"[*] Decompiling file {pyc_file} of size {pyc_file.stat().st_size}"
        )
        if not alternate_opmap:
            try:
                uncompyle6.decompile_file(str(pyc_file), outstream=sys.stdout)
            except uncompyle6.semantics.parser_error.ParserError as e:
                logger.warning(f"[!] Failed to decompile file {pyc_file}")
                if REMAPPED_OPCODE_ERROR_REGEX.match(str(e.error)):
                    logger.error(
                        f"[!] {pyc_file.name} failed to decompile with an error that indicate its opcode "
                        "mappings may have been remapped to prevent analysis.")
                    return "opcode_error"
                return "error"
            except Exception as e:
                e: Exception
                logger.error(
                    f"[!] Failed to decompile file {pyc_file} with error: {e}")
                stdout_val: str = hijacked_stdout.getvalue()
                if stdout_val:
                    with new_file_name.open("w") as file_ptr:
                        file_ptr.write(stdout_val)
                return "error"
            else:
                with new_file_name.open("w") as file_ptr:
                    file_ptr.write(hijacked_stdout.getvalue())
                logger.info(f"[+] Successfully decompiled {pyc_file}")
                return "success"
        else:
            filename: str
            co: CodeType  # can also be xdis.Code* objects
            version: float
            timestamp: int  # seconds since epoch
            magic_int: int
            is_pypy: bool
            source_size: int
            sip_hash: str
            try:
                (
                    filename,
                    co,
                    version,
                    timestamp,
                    magic_int,
                    is_pypy,
                    source_size,
                    sip_hash,
                ) = xdis.main.disassemble_file(str(pyc_file),
                                               outstream=open(os.devnull, "w"),
                                               alternate_opmap=alternate_opmap)
                output_file: TextIO
                with new_file_name.open(mode="w") as output_file:
                    uncompyle6.main.decompile(
                        version,
                        co,
                        timestamp=timestamp,
                        source_size=source_size,
                        magic_int=magic_int,
                        is_pypy=is_pypy,
                        out=output_file,
                    )
            except Exception as e:
                e: Exception
                logger.info(
                    f"[!] Failed to decompile file {pyc_file} with error: {e}")
                return "error"
            else:
                logger.info(f"[+] Successfully decompiled {pyc_file}")
            return "success"
Esempio n. 11
0
def process_pycs(pyc_iterable: Iterable[os.PathLike],
                 alternate_opmap: Dict[str, int] = None) -> None:
    """Multi-processed decompilation orchestration of compiled Python files.

    Currently, pydecipher uses `uncompyle6`_ as its decompiler. It works well
    with `xdis`_ (same author) and allows for the decompilation of Code objects
    using alternate opmaps (with our extension of xdis).

    This function will start up CPU count * 2 pydecipher processes to decompile
    the given Python. Attempts to check for debugger, in which case the
    decompilation will be single-threaded to make debugging easier.

    .. _uncompyle6: https://github.com/rocky/python-uncompyle6/
    .. _xdis: https://github.com/rocky/python-xdis

    Parameters
    ----------
    pyc_iterable : Iterable[os.PathLike]
        An iterable of pathlib.Path objects, referencing compiled Python files
        to decompile.
    alternate_opmap : Dict[str, int], optional
        An opcode map of OPNAME: OPCODE (i.e. 'POP_TOP': 1). This should be a
        complete opmap for the Python version of the files being decompiled.
        Even if only two opcodes were swapped, the opcode map passed in should
        contain all 100+ Python bytecode operations.
    """
    # This checks if the PyCharm debugger is attached.
    if sys.gettrace():
        # Single-threaded for easier debugging.
        logger.debug(
            "[!] Debugger detected, not using multiprocessing for decompilation of pyc files."
        )
        return_status_codes: List[str] = []
        pyc_file: pathlib.Path
        for pyc_file in pyc_iterable:
            return_status_codes.append(
                decompile_pyc((pyc_file, alternate_opmap,
                               pydecipher.get_logging_options())))
    else:
        return_status_codes: List[str] = []
        pool: pebble.ProcessPool
        with pebble.ProcessPool(os.cpu_count() * 2) as pool:
            iterables = [(pyc, alternate_opmap,
                          pydecipher.get_logging_options())
                         for pyc in pyc_iterable]
            future: pebble.ProcessMapFuture = pool.map(decompile_pyc,
                                                       iterables,
                                                       timeout=300)
            iterator: Iterable = future.result()
            index: int = 0
            while True:
                try:
                    result: Any = next(iterator)
                    return_status_codes.append(result)
                except StopIteration:
                    break
                except TimeoutError as e:
                    e: TimeoutError
                    failed_pyc_path: str = str(iterables[index][0])
                    logger.error(
                        f"[!] Timed out ({e.args[1]}s) trying to decompile {failed_pyc_path}."
                    )
                    return_status_codes.append("error")
                except pebble.ProcessExpired as e:
                    e: pebble.ProcessExpired
                    logger.error(
                        f"[!] Failed to decompile {failed_pyc_path} (process expired with status code {e.exitcode}."
                    )
                    return_status_codes.append("error")
                except Exception as e:
                    e: Exception
                    logger.error(
                        f"[!] Failed to decompile {failed_pyc_path} with unknown error: {e}"
                    )
                    return_status_codes.append("error")
                finally:
                    index += 1

    successes: int = return_status_codes.count("success")
    opcode_errors: int = return_status_codes.count("opcode_error")
    errors: int = return_status_codes.count("error") + opcode_errors
    if opcode_errors:
        logger.warning(
            f"[!] {opcode_errors} file(s) failed to decompile with an error "
            "that indicate its opcode mappings may have been remapped. Try using"
            "`remap` on this set of bytecode.")
    if successes and not errors:
        logger.info(f"[+] Successfully decompiled {successes} .pyc files.")
    elif successes and errors:
        logger.warning(
            f"[!] Successfully decompiled {successes} .pyc files. Failed to decompile {errors} files. "
            "See log for more information.")
    elif not successes and errors:
        logger.error(
            f"[!] Failed to decompile all {errors} .pyc files. See log for more information."
        )
    else:
        logger.warning(
            "[!] No pyc files were decompiled. See log for more information.")
Esempio n. 12
0
    def check_and_fix_pyc(
        pyc_file: pathlib.Path,
        provided_version: str = None
    ) -> Union[None, tempfile.NamedTemporaryFile]:
        """Fix a given pyc file so it can be properly disassembled by xdis.

        This function combats the following common obfuscations that may be
        applied to pyc files that would prevent them from easily being disassembled

            1. Missing the header entirely
            2. Missing only the magic bytes
            3. Magic bytes are there, but they don't match a known version
            4. Filename doesn't end in .pyc

        Parameters
        ----------
        pyc_file: pathlib.Path
            The path to the pyc file
        provided_version: str, optional
            The version of the Python that compiled the pyc, if known.

        Raises
        ------
        RuntimeError
            The pyc file is malformed and couldn't be corrected, likely due to
            a version not being given.

        Returns
        -------
        Union[None, tempfile.NamedTemporaryFile]
            If the pyc file is fine as is, this function returns None. If it
            needs to be fixed in some way, the temporary file object
            with the fixes is returned.
        """
        corrected_file_contents: bytes = b""
        all_bytes: bytes = b""
        utils.check_read_access(pyc_file)
        infile: BinaryIO
        with pyc_file.open("rb") as infile:
            first_24_bytes: bytes = infile.read(
                min(24,
                    pyc_file.stat().st_size))
            infile.seek(0)
            all_bytes = infile.read()

        if not any(True for p in Pyc.MARSHALLED_CODE_OBJECT_LEADING_BYTES
                   if p in first_24_bytes):
            raise RuntimeError(f"This file {str(pyc_file)} isn't pyc file!")

        if provided_version:
            correct_magic_num = bytecode.version_str_to_magic_num_int(
                provided_version)
            header = bytecode.create_pyc_header(correct_magic_num)
        if Pyc.is_headerless(first_24_bytes[:8]):
            # Is this pyc completely missing a header?
            if provided_version:
                corrected_file_contents = header
                corrected_file_contents += all_bytes
            else:
                logger.error(
                    "[!] The pyc file provided does not have a header. For remap to decompile this, please provide a"
                    " version with the --version flag")
                raise RuntimeError

        elif first_24_bytes[0:4] not in by_magic:
            # Does have a header of sorts, but can't recognize magic numbers.
            # We'll need a version from the user to proceed
            if not provided_version:
                logger.error(
                    "[!] This version has a header, but we can't recognize the magic number"
                    f" {struct.unpack('<H', first_24_bytes[0:2])[0]}. No version was provided to fix the header."
                )
                raise RuntimeError
            else:
                logger.debug(
                    "[*] This version has a header, but we can't recognize the magic number"
                    f" {struct.unpack('<H', first_24_bytes[0:2])[0]}. Using magic num {correct_magic_num} (from"
                    f" provided version {provided_version}) to fix the header."
                )
            code_object_begin_index: int = -1
            pattern: bytes
            for pattern in Pyc.MARSHALLED_CODE_OBJECT_LEADING_BYTES:
                if pattern in all_bytes:
                    code_object_begin_index = all_bytes.index(pattern)
                    break
            corrected_file_contents: bytes = header
            corrected_file_contents += all_bytes[code_object_begin_index:]

        bytes_to_write_out: bytes = b""
        if corrected_file_contents:
            bytes_to_write_out = corrected_file_contents
        elif pyc_file.suffix != ".pyc":
            # There was nothing to correct except the filename, so we just duplicate the file.
            bytes_to_write_out = all_bytes
        else:
            # There was nothing to do with this pyc file. It is seemingly valid.
            return

        temp_file: tempfile.NamedTemporaryFile = tempfile.NamedTemporaryFile(
            suffix=".pyc")
        pyc_fixed_file: pathlib.Path = pathlib.Path(temp_file.name)
        outfile: BinaryIO
        with pyc_fixed_file.open("wb") as outfile:
            outfile.write(bytes_to_write_out)
        return temp_file