コード例 #1
0
    def decrypt_file(self, data) -> Union[bytes, None]:
        CRYPT_BLOCK_SIZE = 16
        initialization_vector = data[:CRYPT_BLOCK_SIZE]

        if not self.encryption_key:
            while self.potential_keys:
                encryption_key = self.potential_keys.pop(0)
                try:
                    cipher: AES.AESCipher = AES.new(encryption_key.encode(),
                                                    AES.MODE_CFB,
                                                    initialization_vector)
                    decrypted_data = cipher.decrypt(
                        data[CRYPT_BLOCK_SIZE:]
                    )  # will silently fail if password is wrong
                    _ = zlib.decompress(
                        decrypted_data)  # ensures the password is correct
                except zlib.error as e:
                    logger.debug(
                        f"[!] Decryption of .pyc failed with password {encryption_key}. Discarding key."
                    )
                else:
                    self.encryption_key = encryption_key
                    logger.debug(
                        f"[!] Verified ZlibArchive password is {self.encryption_key}."
                    )
                    return decrypted_data
        else:
            try:
                cipher: AES.AESCipher = AES.new(self.encryption_key.encode(),
                                                AES.MODE_CFB,
                                                initialization_vector)
                return cipher.decrypt(data[CRYPT_BLOCK_SIZE:])
            except zlib.error as e:
                logger.error(f"[!] Failed to decrypt .pyc with error: {e}")
                return None
コード例 #2
0
    def check_for_password_file(self):
        self.potential_keys = []
        if hasattr(self, "archive_path"):
            dir_of_pyz = self.archive_path.parent
        else:
            dir_of_pyz = Path.cwd()

        key_file = dir_of_pyz / "pyimod00_crypto_key.pyc"
        if key_file.exists():
            self.encrypted = True
            logger.debug(
                f"[+] Found ZlibArchive encryption key file at path {key_file}"
            )
            crypto_key_filename: str  # full path of
            try:
                (
                    crypto_key_filename,
                    crypto_key_co,
                    crypto_key_python_version,
                    crypto_key_compilation_timestamp,
                    crypto_key_magic_int,
                    crypto_key_is_pypy,
                    crypto_key_source_size,
                    crypto_key_sip_hash,
                ) = disassemble_file(str(key_file),
                                     outstream=open(os.devnull, "w"))
            except Exception as e:
                logger.warning(
                    f"[!] Could not disassemble file {key_file}. Received error: {e}"
                )
            else:
                self.compilation_time = datetime.fromtimestamp(
                    crypto_key_compilation_timestamp)
                for const_string in crypto_key_co.co_consts:
                    if const_string and len(const_string) == 16:
                        self.potential_keys.append(const_string)
            # If we couldn't decompile the file to see the consts, lets just search the raw bytes of the file
            # for the password
            if not self.potential_keys:
                with key_file.open("rb") as file_ptr:
                    file_strings = utils.parse_for_strings(file_ptr.read())
                s: str
                for s in file_strings:
                    if len(s) >= 16 and "pyimod00_crypto_key" not in s:
                        while len(s) >= 16:
                            self.potential_keys.append(s[0:16])
                            s = s[1:]

            logger.info(
                f"[*] Found these potential PyInstaller PYZ Archive encryption keys: {self.potential_keys}"
            )

            if not self.potential_keys:
                logger.error(
                    f"[*] Encryption key file detected, however no password was able to be retrieved."
                )
コード例 #3
0
    def parse_toc(self) -> None:
        self.magic_int = magic2int(self.archive_contents[4:8])
        (toc_position, ) = struct.unpack("!i", self.archive_contents[8:12])
        self.toc = xdis.unmarshal.load_code(
            self.archive_contents[toc_position:],
            self.magic_int)  # TODO wrap this in try block?
        logger.debug(f"[*] Found {len(self.toc)} entries in this PYZ archive")

        # From PyInstaller 3.1+ toc is a list of tuples
        if isinstance(self.toc, list):
            self.toc = dict(self.toc)
コード例 #4
0
    def unpack(self) -> None:
        """Dump any interesting aspects of this PE for further investigation.

        This will log the PEs version info resource for manual inspection,
        dump any Authenticode certificates, and look for frozen Python artifacts
        within the PE's resources and overlay.
        """
        self.load_version_info()
        self.dump_certificates()

        unpack_me: List[pathlib.Path] = []
        overlay_path: pathlib.Path = self.dump_overlay()
        if overlay_path:
            unpack_me.append(overlay_path)

        version_strings: List[str] = utils.parse_for_version_strings(
            self.file_contents)
        if version_strings:
            logger.debug(
                "[*] Found the following strings (and their surrounding bytes, for context) in this PE, which may "
                "indicate the version of Python used to freeze the executable: \n"
                f"{pprint.pformat(version_strings, width=120)}")

        pythonscript_idx: int = None
        if hasattr(self.pe, "DIRECTORY_ENTRY_RESOURCE"):
            entry: pefile.ResourceDirEntryData
            for entry in self.pe.DIRECTORY_ENTRY_RESOURCE.entries:
                if entry.name is None:
                    continue
                resource_name: str = entry.name.string.decode()
                if any([
                        True for pattern in self.INTERESTING_RESOURCES
                        if re.match(pattern, resource_name, re.I)
                ]):
                    resource_path: pathlib.Path = self.dump_resource(
                        resource_name)
                    if resource_name == "PYTHONSCRIPT":
                        pythonscript_idx = len(unpack_me)
                    unpack_me.append(resource_path)

        if pythonscript_idx:
            # We want to unpack Py2Exe PYTHONSCRIPT last to give it highest chance of successfully determining version.
            unpack_me.append(unpack_me.pop(pythonscript_idx))

        artifact_path: pathlib.Path
        for artifact_path in unpack_me:
            output_dir_name: str = utils.slugify(
                str(artifact_path.name) + "_output")
            pydecipher.unpack(
                artifact_path,
                output_dir=self.output_dir.joinpath(output_dir_name),
                **self.kwargs,
            )
コード例 #5
0
ファイル: remap.py プロジェクト: jon1scr/pydecipher
def opcode_constants_remap(
        opcode_file: pathlib.Path,
        provided_version: str = None) -> Tuple[Dict[int, Dict[int, int]], str]:
    """Parse code object constants to try and recreate opcode mappings.

    This method walks the constants attribute of the opcode.pyc code object.
    See the remap documentation for more information on this method.

    Parameters
    ----------
    opcode_file: pathlib.Path
        The path on disk to the opcode.pyc file.
    provided_version: str, optional
        The version of Python that this opcode file corresponds to.

    Returns
    -------
     Tuple[Dict[int, Dict[int, int]], str]
        A tuple containing a dictionary of original_opcode to
        Dict[replacement_opcode:replacement_count] and the opmap's Python
        version. replacement_opcode is an opcode that was seen in place of
        original_opcode, and the replacement_count is the amount of times it was
        seen replacing the original_opcode throughout all the bytecode that was
        analyzed.
    """
    def get_nearest_opcode(opname: str, unused_opcodes: List[int],
                           version: str) -> int:
        xdis_opcode: ModuleType
        try:
            xdis_opcode = xdis.disasm.get_opcode(version, is_pypy=False)
            actual_opcode = getattr(xdis_opcode, opname)
        except Exception:
            return unused_opcodes[0]

        smallest_distance: int = 999
        closest_opcode: int = -1
        for opcode in unused_opcodes:
            if abs(actual_opcode - opcode) < smallest_distance:
                closest_opcode = opcode
                smallest_distance = abs(actual_opcode - opcode)
        return closest_opcode

    logger.debug(
        f"[*] Checking opcode.pyc file at {str(opcode_file)} to determine if opcode map is normal."
    )
    fixed_pyc_file: tempfile.NamedTemporaryFile
    if fixed_pyc_file := artifact_types.pyc.Pyc.check_and_fix_pyc(
            opcode_file, provided_version=provided_version):
        logger.error(
            f"[+] Duplicated opcode file {str(opcode_file)} to correct issues with the pyc. New filepath:"
            f" {fixed_pyc_file.name}")
        opcode_file = fixed_pyc_file.name
コード例 #6
0
ファイル: utils.py プロジェクト: neil-orans/pydecipher
def check_for_our_xdis() -> None:
    """Check that the pydecipher fork of xdis is installed.

    Exits if its not.
    """
    if hasattr(xdis.op_imports, "remap_opcodes"):
        logger.debug("[*] Custom version of xdis detected. All clear to proceed.")
    else:
        logger.error(
            "[!] It seems that the public/normal version of xdis has been installed. Please see the documentation"
            "on how to download the pydecipher-customized fork of xdis."
        )
        sys.exit(1)
コード例 #7
0
 def validate_pyinstaller_carchive(self):
     self.magic_index = self.archive_contents.find(self.MAGIC)
     cookie_size = len(self.archive_contents) - self.magic_index
     if self.magic_index > 0:
         if cookie_size == self.PYINST20_COOKIE_SIZE:
             self.pyinstaller_version = 2.0
             logger.debug("[*] PyInstaller version: 2.0")
             return True
         elif cookie_size == self.PYINST21_COOKIE_SIZE:
             self.pyinstaller_version = 2.1  # or greater
             return True
             logger.debug("[*] PyInstaller version: 2.1")
         else:
             logger.debug(
                 f"[!] PyInstaller cookie size is {cookie_size}, which does not correspond to a known "
                 "version of PyInstaller.")
             if cookie_size < 100:
                 # Some valid cookies were seen with size 94
                 self.pyinstaller_version = "unknown"
                 return True
             else:
                 return False
     else:
         logger.debug(
             "[!] Could not find PyInstaller magic within this archive.")
     return False
コード例 #8
0
def unpack(python_artifact: os.PathLike,
           output_dir: str = None,
           **kwargs) -> None:
    """Recursively extract interesting resources from the Python artifact.

    This function will cycle through all the registered ARTIFACT_TYPES. See
    usages of :py:meth:`pydecipher.__init__.register` for the creation of this
    list.


    ARTIFACT_TYPES consists of the different 'unpackable', registered
    (via decorator) Python artifact classes in a dictionary of the format
    <Artifact_Name : Class Instance of Artifact_Name>. A class's constructor
    should raise a TypeError if is being instantiated with something that
    isn't the correct type (i.e. Py2Exe resource being passed to a
    PyInstaller archive constructor).

    Parameters
    ----------
    python_artifact : pathlib.Path or io.IOBase (file-like object)
        The path to the Python artifact
    output_dir : str, optional
        Where to dump the extracted output of artifact parsers. If no
        directory is specified, a directory will be created in the
        current working directory.
    **kwargs
        Arbitrary keyword arguments. Including, but not limited to:

            version_hint: str
                The (potential) Python version of the artifact. If you know
                the version, you should pass it in. Otherwise, pydecipher
                will try to automatically figure out what version was used
                through string-analysis (and possibly brute-force decompilation).
                If
    """
    if output_dir:
        output_dir: pathlib.Path = pathlib.Path(output_dir).resolve()
    type_instance: type = None
    logger.info(f"[*] Unpacking {python_artifact}")
    for type_, class_ in pydecipher.ARTIFACT_TYPES.items():
        logger.debug(
            f"[*] Checking {type_} magic for file {python_artifact.name}")
        try:
            type_instance = class_(python_artifact,
                                   output_dir=output_dir,
                                   **kwargs)
            logger.debug(
                f"[*] Determined {python_artifact.name} type to be {type_}")
            break
        except TypeError:
            logger.debug(f"[*] Magic incorrect for type {type_}")
    else:
        # This should never be reached
        logger.debug(
            "[!] No artifact types found! Something went wrong. Please submit a bug report."
        )

    if type_instance:
        type_instance.unpack()
コード例 #9
0
    def disassemble_and_dump(self, brute_force: bool = False):
        code_bytes = self.resource_contents[self.marshalled_obj_start_idx:]
        hijacked_stderr = io.StringIO()
        with redirect_stderr(hijacked_stderr):
            try:  # TODO make this more specific error catching
                code_objects = load_code(code_bytes, self.magic_num)
                if not isinstance(code_objects, list):
                    # TODO make this a non-generic error
                    raise RuntimeError(
                        "Py2Exe should return a marshalled list of code objects"
                    )
                if not all(code_objects):
                    raise RuntimeError("NoneType code objects returned")
            except Exception:
                logger.debug(
                    f"[!] Failed to produce disassembly of bytecode with magic num {self.magic_num} "
                    f"(Python version {magicint2version[self.magic_num]})")
                self.magic_num = -1
                return
            else:
                logger.info(
                    f"[+] Successfully disassembled bytecode with magic number {self.magic_num}, "
                    f"corresponding to Python version {magicint2version[self.magic_num]}"
                )

        for co in code_objects:
            new_filename: str = self._clean_filename(co.co_filename)
            self.output_dir.mkdir(parents=True, exist_ok=True)
            if brute_force:
                bytecode_filepath: str = self.output_dir / magicint2version[
                    self.magic_num] / new_filename
                bytecode_filepath.parent.mkdir(exist_ok=True)
            else:
                bytecode_filepath: str = str(
                    self.output_dir.joinpath(new_filename))

            try:
                xdis.load.write_bytecode_file(bytecode_filepath, co,
                                              self.magic_num)
            except Exception as e:
                logger.error(
                    f"[!] Could not write file {bytecode_filepath.name} with error: {e}"
                )
            else:
                logger.info(
                    f"[+] Successfully wrote file {new_filename} to {self.output_dir}"
                )
コード例 #10
0
    def load_version_info(self, quiet: bool = False) -> None:
        """Extract the VersionInfo dictionary from the pefile.PE object.

        If pydecipher is running in anything but 'quiet' mode, it will print
        the version info to the log. Additionally, it will search for Python
        version strings within the version info.

        Parameters
        ----------
        quiet : bool, optional
            Whether or not to print the version info dictionary to the log.
        """
        if not hasattr(self.pe, "FileInfo"):
            return
        structure: pefile.Structure
        for structure in self.pe.FileInfo:
            sub_structure: pefile.Structure
            for sub_structure in structure:
                if sub_structure.Key != b"StringFileInfo":
                    continue
                if hasattr(sub_structure, "StringTable"):
                    string_table: pefile.Structure
                    for string_table in sub_structure.StringTable:
                        if string_table.entries:
                            self.version_info = {
                                x.decode("utf-8"): y.decode("utf-8")
                                for x, y in string_table.entries.items()
                            }
        formatted_version_info: Dict[str,
                                     str] = json.dumps(self.version_info,
                                                       indent=4,
                                                       separators=(",", ": "))
        if not quiet:
            logger.debug(
                f"[*] This PE had the following VersionInfo resource: {formatted_version_info}"
            )

        if "python" in str(self.version_info).lower():
            if "FileVersion" in self.version_info:
                self.python_version = self.version_info["FileVersion"]
            if "ProductVersion" in self.version_info:
                if self.python_version and len(self.python_version) < len(
                        self.version_info["ProductVersion"]):
                    # assume longer string means more detailed version info (we'd rather know it was 2.7.14 vs just 2.7)
                    self.python_version = self.version_info["ProductVersion"]
コード例 #11
0
    def extract_files(self) -> None:
        decompression_errors = 0
        successfully_extracted = 0
        for key in self.toc.keys():
            (type_code, position, compressed_data_size) = self.toc[key]
            if not hasattr(self, "compilation_time"):
                timestamp = None
            else:
                timestamp = self.compilation_time
            header_bytes = pydecipher.bytecode.create_pyc_header(
                self.magic_int, compilation_ts=timestamp, file_size=0)

            compressed_data = self.archive_contents[position:position +
                                                    compressed_data_size]
            if self.encrypted:
                compressed_data = self.decrypt_file(compressed_data)
            if compressed_data is None:
                # decrypt_file returns None on failure
                decompression_errors += 1
                continue

            try:
                uncompressed_data = zlib.decompress(compressed_data)
            except zlib.error as e:
                decompression_errors += 1
                logger.debug(
                    f"[!] PYZ zlib decompression failed with error: {e}")
            else:
                pyc_file = self.output_dir / str(key + ".pyc")
                self.output_dir.mkdir(parents=True, exist_ok=True)
                with pyc_file.open("wb") as pyc_file_ptr:
                    pyc_file_ptr.write(header_bytes + uncompressed_data)
                successfully_extracted += 1

        if decompression_errors:
            logger.debug(
                f"[!] Failed to write {decompression_errors} files due to decompression errors."
            )
        if successfully_extracted:
            logger.info(
                f"[+] Successfully extracted {successfully_extracted} files from this ZlibArchive."
            )
コード例 #12
0
ファイル: pyc.py プロジェクト: neil-orans/pydecipher
    def check_and_fix_pyc(
        pyc_file: pathlib.Path,
        provided_version: str = None
    ) -> Union[None, tempfile.NamedTemporaryFile]:
        """Fix a given pyc file so it can be properly disassembled by xdis.

        This function combats the following common obfuscations that may be
        applied to pyc files that would prevent them from easily being disassembled

            1. Missing the header entirely
            2. Missing only the magic bytes
            3. Magic bytes are there, but they don't match a known version
            4. Filename doesn't end in .pyc

        Parameters
        ----------
        pyc_file: pathlib.Path
            The path to the pyc file
        provided_version: str, optional
            The version of the Python that compiled the pyc, if known.

        Raises
        ------
        RuntimeError
            The pyc file is malformed and couldn't be corrected, likely due to
            a version not being given.

        Returns
        -------
        Union[None, tempfile.NamedTemporaryFile]
            If the pyc file is fine as is, this function returns None. If it
            needs to be fixed in some way, the temporary file object
            with the fixes is returned.
        """
        corrected_file_contents: bytes = b""
        all_bytes: bytes = b""
        utils.check_read_access(pyc_file)
        infile: BinaryIO
        with pyc_file.open("rb") as infile:
            first_24_bytes: bytes = infile.read(
                min(24,
                    pyc_file.stat().st_size))
            infile.seek(0)
            all_bytes = infile.read()

        if not any(True for p in Pyc.MARSHALLED_CODE_OBJECT_LEADING_BYTES
                   if p in first_24_bytes):
            raise RuntimeError(f"This file {str(pyc_file)} isn't pyc file!")

        if provided_version:
            correct_magic_num = bytecode.version_str_to_magic_num_int(
                provided_version)
            header = bytecode.create_pyc_header(correct_magic_num)
        if Pyc.is_headerless(first_24_bytes[:8]):
            # Is this pyc completely missing a header?
            if provided_version:
                corrected_file_contents = header
                corrected_file_contents += all_bytes
            else:
                logger.error(
                    "[!] The pyc file provided does not have a header. For remap to decompile this, please provide a"
                    " version with the --version flag")
                raise RuntimeError

        elif first_24_bytes[0:4] not in by_magic:
            # Does have a header of sorts, but can't recognize magic numbers.
            # We'll need a version from the user to proceed
            if not provided_version:
                logger.error(
                    "[!] This version has a header, but we can't recognize the magic number"
                    f" {struct.unpack('<H', first_24_bytes[0:2])[0]}. No version was provided to fix the header."
                )
                raise RuntimeError
            else:
                logger.debug(
                    "[*] This version has a header, but we can't recognize the magic number"
                    f" {struct.unpack('<H', first_24_bytes[0:2])[0]}. Using magic num {correct_magic_num} (from"
                    f" provided version {provided_version}) to fix the header."
                )
            code_object_begin_index: int = -1
            pattern: bytes
            for pattern in Pyc.MARSHALLED_CODE_OBJECT_LEADING_BYTES:
                if pattern in all_bytes:
                    code_object_begin_index = all_bytes.index(pattern)
                    break
            corrected_file_contents: bytes = header
            corrected_file_contents += all_bytes[code_object_begin_index:]

        bytes_to_write_out: bytes = b""
        if corrected_file_contents:
            bytes_to_write_out = corrected_file_contents
        elif pyc_file.suffix != ".pyc":
            # There was nothing to correct except the filename, so we just duplicate the file.
            bytes_to_write_out = all_bytes
        else:
            # There was nothing to do with this pyc file. It is seemingly valid.
            return

        temp_file: tempfile.NamedTemporaryFile = tempfile.NamedTemporaryFile(
            suffix=".pyc")
        pyc_fixed_file: pathlib.Path = pathlib.Path(temp_file.name)
        outfile: BinaryIO
        with pyc_fixed_file.open("wb") as outfile:
            outfile.write(bytes_to_write_out)
        return temp_file
コード例 #13
0
    def _determine_python_version(self):
        """Will attempt to determine what version of python was used when this
        py2exe PE was compiled. We need to know this because xdis requires
        knowledge of the python version to unmarshal the bytecode correctly"""
        potential_magic_nums = set()
        logger.debug("[*] Attempting to discover version for PYTHONSCRIPT resource")

        # Method 1: Looking for PythonXY.DLL resource in the same directory as the PYTHONSCRIPT resource. If there,
        # check to see if it has a VERSIONINFO resource with a FileVersion or ProductVersion field,
        # as these typically contain the python version. See https://github.com/erocarrera/pefile for more info on
        # the structures used below
        if hasattr(self, "archive_path"):
            parent_dir = self.archive_path.parents[0]
        else:
            parent_dir = pathlib.Path.cwd()
        for python_dll in os.listdir(parent_dir):
            if re.match(r"python[0-9]{0,2}\.dll", python_dll, re.I):
                logger.debug(f"[*] Found python DLL resource {str(python_dll)} in directory {parent_dir}")
                try:
                    dll_class_inst = PortableExecutable(parent_dir.joinpath(python_dll))
                except TypeError:
                    logger.debug(f"[!] PyDecipher could not create a PE/DLL class instance for {str(python_dll)}")
                else:
                    dll_class_inst.load_version_info(quiet=True)
                    if dll_class_inst.python_version:
                        potential_magic_nums.add(version_str_to_magic_num_int(dll_class_inst.python_version))
                finally:
                    break

        # Method 2: Check to see if there are pyc files in the same directory with magic numbers
        for pyc_file in parent_dir.rglob("*.pyc"):
            with pyc_file.open("rb") as pyc_file_ptr:
                try:
                    magic_bytes = pyc_file_ptr.read(4)
                    magic_num = magic2int(magic_bytes)
                except:  # TODO make more specific error catching
                    pass
                else:
                    potential_magic_nums.add(magic_num)
            break

        # Searching the PYTHONSCRIPT resource for strings like c:\python24\lib\site-packages\py2exe\boot_common.py
        b_python_regex = re.compile(b"(python)([0-9]{2})", re.I)
        script_re_obj = b_python_regex.search(self.resource_contents)
        if script_re_obj:
            version_str = script_re_obj.group(2).decode("utf-8")
            logger.info(
                "[*] Detected potential version string in PYTHONSCRIPT resource: {}".format(
                    script_re_obj.group().decode("utf-8")
                )
            )
            potential_magic_nums.add(version_str_to_magic_num_int(version_str[0] + "." + version_str[1]))

        if potential_magic_nums:
            logger.info(f"[*] Will attempt to unmarshal using these python magic numbers: {potential_magic_nums}")
            return potential_magic_nums
        else:
            logger.info(
                "[!] Couldn't find any python magic numbers to hint at the python version of this resource. "
                "Will attempt to brute-force determine the correct magic number."
            )
            return
コード例 #14
0
ファイル: remap.py プロジェクト: jon1scr/pydecipher
def standard_pyc_remap(
        standard_bytecode_path: pathlib.Path,
        remapped_bytecode_path: pathlib.Path,
        version: str = None) -> Tuple[Dict[int, Dict[int, int]], str]:
    """Diff compiled code objects from standard library and modified interpreter to try and recreate opcode mappings.

    This method is similar to the megafile method, but at a larger scale.
    See the remap documentation for more information on this method.

    Parameters
    ----------
    standard_bytecode_path: pathlib.Path
        The path on disk to the reference set of standard-compiled bytecode. The version of Python for the reference set
        must correspond to the version of Python used as a base for the modified interpreter.
    remapped_bytecode_path: pathlib.Path
        The path on disk to the set of bytecode compiled by the modified interpreter
    version: str, optional
        The version of Python that this opcode file corresponds to.

    Returns
    -------
     Tuple[Dict[int, Dict[int, int]], str]
        A tuple containing a dictionary of original_opcode to
        Dict[replacement_opcode:replacement_count] and the opmap's Python
        version. replacement_opcode is an opcode that was seen in place of
        original_opcode, and the replacement_count is the amount of times it was
        seen replacing the original_opcode throughout all the bytecode that was
        analyzed.
    """
    reference_files: Dict[str, List[pathlib.Path]] = {}
    determined_version: str = ""
    pyc_file: pathlib.Path
    for pyc_file in standard_bytecode_path.rglob("*.pyc"):
        pyc_file_name: str = pyc_file.name.split(".")[0]
        if pyc_file_name == "__init__":
            continue
        if not determined_version:
            try:
                infile: BinaryIO
                with pyc_file.open("rb") as infile:
                    pyc_magic_bytes: bytes = infile.read(4)
                    version_set: Set[str] = copy.deepcopy(
                        xdis.magics.by_magic[pyc_magic_bytes])
                    determined_version = version_set.pop()
            except Exception:
                pass
            else:
                logger.debug(
                    f"Determined version {determined_version} from reference bytecode."
                )
                if version and bytecode.version_str_to_magic_num_int(
                        determined_version
                ) != bytecode.version_str_to_magic_num_int(version):
                    logger.warning(
                        f"Provided version {version} does not equal the version determined in the reference pyc "
                        f"set ({determined_version}). We will proceed with the version you provided."
                    )
        if pyc_file_name in reference_files:
            reference_files[pyc_file_name].append(pyc_file)
        else:
            reference_files[pyc_file_name] = [pyc_file]

    if not version:
        version = determined_version

    remapped_files: Dict[str, List[pathlib.Path]] = {}
    for pyc_file in remapped_bytecode_path.rglob("*"):
        if not pyc_file.is_file():
            continue
        try:
            kwargs: Dict[str, str] = {"version_hint": version}
            artifact_types.pyc.Pyc(pyc_file, **kwargs)
        except TypeError:
            continue
        pyc_file_name: str = pyc_file.name.split(".")[0]
        if pyc_file_name == "__init__":
            # Too common a filename, causes more problems than its worth to try to include these
            # since they are usually empty anyway.
            continue
        if pyc_file_name in remapped_files:
            remapped_files[pyc_file_name].append(pyc_file)
        else:
            remapped_files[pyc_file_name] = [pyc_file]

    master_remapping_counts: Dict[int, Dict[int, int]] = {}
    pyc_filename: str
    list_of_filepaths: List[pathlib.Path]
    for pyc_filename, list_of_filepaths in remapped_files.items():
        if pyc_filename not in reference_files:
            continue
        pyc_filepath: pathlib.Path
        for pyc_filepath in list_of_filepaths:
            reference_file: pathlib.Path = None
            highest_similarity: int = 0
            ref_pyc_filepath: pathlib.Path
            for ref_pyc_filepath in reference_files[pyc_filename]:
                relative_reference_filepath: str = str(
                    ref_pyc_filepath.relative_to(standard_bytecode_path))
                relative_remapped_filepath: str = str(
                    pyc_filepath.relative_to(remapped_bytecode_path))
                path_similarity: float = textdistance.lcsstr.normalized_similarity(
                    relative_reference_filepath, relative_remapped_filepath)
                if path_similarity > highest_similarity:
                    highest_similarity = path_similarity
                    reference_file = ref_pyc_filepath
            if not reference_file:
                continue

            fixed_pyc_file: tempfile.NamedTemporaryFile
            if fixed_pyc_file := artifact_types.pyc.Pyc.check_and_fix_pyc(
                    pyc_filepath, provided_version=version):
                logger.debug(
                    f"[+] Duplicated file {str(pyc_filepath)} to correct issues with the pyc. New filepath:"
                    f" {fixed_pyc_file.name}")
                pyc_filepath = fixed_pyc_file.name

            try:
                remapped_filename: str
                remapped_co: CodeType  # can also be xdis codetypes
                remapped_version: float
                remapped_timestamp: int
                remapped_magic_int: int
                remapped_is_pypy: bool
                remapped_source_size: int
                remapped_sip_hash: str
                (
                    remapped_filename,
                    remapped_co,
                    remapped_version,
                    remapped_timestamp,
                    remapped_magic_int,
                    remapped_is_pypy,
                    remapped_source_size,
                    remapped_sip_hash,
                ) = xdis.disasm.disassemble_file(str(pyc_filepath),
                                                 header=True,
                                                 outstream=open(
                                                     os.devnull, "w"))

                reference_filename: str
                reference_co: CodeType  # can also be xdis codetypes
                reference_version: float
                reference_timestamp: int
                reference_magic_int: int
                reference_is_pypy: bool
                reference_source_size: int
                reference_sip_hash: str
                (
                    reference_filename,
                    reference_co,
                    reference_version,
                    reference_timestamp,
                    reference_magic_int,
                    reference_is_pypy,
                    reference_source_size,
                    reference_sip_hash,
                ) = xdis.disasm.disassemble_file(str(reference_file),
                                                 outstream=open(
                                                     os.devnull, "w"))
            except Exception:
                continue

            version = str(reference_version)

            try:
                remappings: Dict[int, int] = bytecode.diff_opcode(
                    reference_co, remapped_co, version)
            except RuntimeError:
                continue

            # merge these remappings into the larger dictionary.
            opcode_val: int
            remap_options: Dict[int, int]
            for opcode_val, remap_options in remappings.items():
                if opcode_val in master_remapping_counts:
                    remap_option: int
                    count: int
                    for remap_option, count in remap_options.items():
                        if remap_option in master_remapping_counts[opcode_val]:
                            master_remapping_counts[opcode_val][
                                remap_option] += count
                        else:
                            master_remapping_counts[opcode_val][
                                remap_option] = count
                else:
                    master_remapping_counts[opcode_val] = remap_options
コード例 #15
0
    def extract_files(self):
        magic_nums: set = set()
        decompression_errors = 0
        successfully_extracted = 0
        entry: CTOCEntry
        for entry in self.toc:
            data = self.archive_contents[entry.
                                         entry_offset:entry.entry_offset +
                                         entry.compressed_data_size]

            if entry.compression_flag:
                try:
                    data = zlib.decompress(data)
                except zlib.error as e:
                    decompression_errors += 1
                    logger.debug(
                        f"[!] PyInstaller CArchive decompression failed with error: {e}"
                    )
                    continue
                else:
                    if len(data) != entry.uncompressed_data_size:
                        logger.warning(
                            f"[!] {entry.name} entry in CArchive listed its uncompressed data size as"
                            f" {entry.uncompressed_data_size}, however in actuality, uncompressed to be {len(data)}"
                            " bytes. This may be a sign that the CArchive was manually altered."
                        )

            if "\\" in entry.name:
                tmp: PureWindowsPath = pathlib.PureWindowsPath(entry.name)
            else:
                tmp: Path = Path(entry.name)
            file_path = pathlib.Path(self.output_dir).joinpath(tmp)
            if len(file_path.parents) > 1:  # every path has '.' as a parent
                file_path.parent.mkdir(parents=True, exist_ok=True)

            if entry.type_code == self.ArchiveItem.PYSOURCE:
                if ord(data[:1]) == ord(xdis.marsh.TYPE_CODE) or ord(
                        data[:1]) == (ord(xdis.marsh.TYPE_CODE)
                                      | xdis.unmarshal.FLAG_REF):
                    file_path = file_path.parent / (file_path.name + ".pyc")
                    if len(magic_nums) > 1:
                        magic_num = next(iter(magic_nums))
                        logger.warning(
                            "[!] More than one magic number found within this CArchive. Using magic number"
                            f" {magic_num}, but also found numbers: {magic_nums}"
                        )
                    elif len(magic_nums) == 0:
                        logger.warning(
                            f"[!] No magic numbers have been found yet, queueing this file for later."
                        )
                        # TODO: add this file to a do-later list, when you know the magic num  #TODO does this actually happen? dig deeper...
                        pass
                    data = pydecipher.bytecode.create_pyc_header(
                        next(iter(magic_nums))) + data
                else:
                    file_path = file_path.parent / (file_path.name + ".py")
                if "pyi" not in entry.name:
                    logger.info(
                        f"[!] Potential entrypoint found at script {entry.name}.py"
                    )
            elif entry.type_code == self.ArchiveItem.PYMODULE:
                magic_bytes = data[:4]  # Python magic value
                magic_nums.add(magic2int(magic_bytes))
                file_path = file_path.parent / (file_path.name + ".pyc")

            if entry.type_code != self.ArchiveItem.RUNTIME_OPTION:
                self.output_dir.mkdir(parents=True, exist_ok=True)
                with file_path.open(mode="wb") as f:
                    f.write(data)
                    successfully_extracted += 1

            if entry.type_code in (self.ArchiveItem.PYZ,
                                   self.ArchiveItem.ZIPFILE):
                output_dir_name = (str(
                    file_path.parent.joinpath(
                        utils.slugify(file_path.name.split(".")[0]))) +
                                   "_output")
                pydecipher.unpack(file_path, output_dir=output_dir_name)

        if decompression_errors:
            logger.debug(
                f"[!] Failed to write {decompression_errors} files due to decompression errors."
            )
        if successfully_extracted:
            logger.info(
                f"[+] Successfully extracted {successfully_extracted} files from this CArchive."
            )
コード例 #16
0
ファイル: remap.py プロジェクト: jon1scr/pydecipher
def fill_opmap_gaps(remappings: Dict[int, int],
                    version: str) -> Dict[int, Tuple[int, bool]]:
    """Fill the opmap with any missing opcodes for a specific version.

    Since pydecipher can only take in a valid opmap, we must make sure remap
    dumps opmaps that contain complete sets of opcodes. Very rarely will an
    opcode remapping method be able to cover 100% of opcodes in use for a
    particular Python version, so we need to fill the gaps with some guesses.

    Parameters
    ----------
    remappings: Dict[int, int]
        A dictionary of original opcode to remapped opcode.
    version: str
        A version string `accepted by xdis`_.

        .. _accepted by xdis:
            https://github.com/rocky/python-xdis/blob/master/xdis/magics.py

    Returns
    -------
    Dict[int, Tuple[int, bool]]
        A dictionary of original opcode to remapped opcode and a boolean indicating
        whether or not this remapping was guessed or observed.
    """
    filled_remappings: Dict[int, Tuple[int, bool]] = {
        k: (v, False)
        for k, v in remappings.items()
    }
    is_pypy: bool = True if "pypy" in version else False
    try:
        opcode_obj: ModuleType = xdis.disasm.get_opcode(version, is_pypy)
    except KeyError:
        raise KeyError(
            f"[!] The version specified, {version}, is not supported by xdis.")
    xdis_opcode_map: Dict[str, int] = opcode_obj.opmap
    xdis_opcode_vals: Set[int] = set(xdis_opcode_map.values())
    remaining_options: List[int] = list(
        xdis_opcode_vals.difference(set(remappings.values())))
    logger.debug(
        f"[*] Set of opcodes available to assign from standard opmap: {remaining_options}"
    )
    missing_opcodes = list(xdis_opcode_vals.difference(set(remappings.keys())))
    logger.debug(
        f"[*] Set of opcodes that need an assignment in the modified opmap: {missing_opcodes}"
    )

    missing_opcode: int
    for missing_opcode in missing_opcodes:
        smallest_distance: int = 999
        best_option: int = -1
        option: int
        for option in remaining_options:
            distance: int = abs(option - missing_opcode)
            if distance < smallest_distance:
                best_option = option
                smallest_distance = distance
        filled_remappings[missing_opcode] = (best_option, True)
        remaining_options.remove(best_option)

    return filled_remappings
コード例 #17
0
ファイル: remap.py プロジェクト: jon1scr/pydecipher
def write_remapping_file(
    remappings: Dict[int, Tuple[int, bool]],
    version: str,
    method: str,
    cli: str,
    output_dir: Union[str, pathlib.Path] = ".",
) -> pathlib.Path:
    """Write the remappings dict to a JSON file that can be used by pydecipher.

    It is assumed that by this point `remappings` is a bijection of original
    opcodes and replacement opcodes.

    Parameters
    ----------
    remappings: Dict[int, (int, bool)]
        A dictionary of original_opcode to (replacement_opcode, guess).
        replacement_opcode is the remapped value of original_opcode, and the
        guess boolean is whether or not remap actually observed this remapping
        or had to 'guess' it in order to produce a complete set of opcodes.
    version: str
        A version string `accepted by xdis`_.
    output_dir: Union[str, os.PathLike]
        The path where the remapping file should be written.
    method: str
        A text description of the remapping method used
    cli: str
        The command line for the remap command that produced this file.

        .. _accepted by xdis:
            https://github.com/rocky/python-xdis/blob/master/xdis/magics.py

    Returns
    -------
    pathlib.Path
        The path to the remapping JSON file.
    """
    output_dict: Dict[str:Union[int, Dict[int, int]]] = {
        "python_version": str(version),
        "remapped_opcodes": [],
        "method": method,
        "command_line": json.dumps(cli),
    }

    xdis_opcode: ModuleType = None
    try:
        xdis_opcode = xdis.disasm.get_opcode(version, is_pypy=False)
    except Exception:
        logger.debug(
            f"[!] Couldn't retrieve version {version} from xdis! Continuing anyway..."
        )

    opcode_val: int
    remapping_dict: Dict[int, int]
    for opcode_val, remap_val in remappings.items():
        output_subdict: Dict[str, int] = {
            "opcode": opcode_val,
            "remapped_value": remap_val[0],
            "guess": True if remap_val[1] else False,
        }
        if xdis_opcode:
            opname: str = xdis_opcode.opname[opcode_val]
            output_subdict["opname"] = opname.replace("+", "_")
        output_dict["remapped_opcodes"].append(output_subdict)

    # We sort based on the original opcode value because it seems like the most
    # natural way to sort this, and it is useful to have a standardized
    # output for comparison purposes.
    output_dict["remapped_opcodes"] = sorted(output_dict["remapped_opcodes"],
                                             key=lambda i: i["opcode"])
    output_dir: pathlib.Path = pathlib.Path(output_dir).resolve()
    output_filepath: pathlib.Path = output_dir / "remapping.txt"
    if output_filepath.exists():
        logger.debug(
            f"[!] {str(output_filepath)} already exists. Incrementing filename until an available name is found."
        )
        counter: int = 1
        while True:
            new_filepath: pathlib.Path = output_dir / f"remapping-{counter}.txt"
            if not new_filepath.exists():
                break
            counter += 1
        output_filepath = new_filepath
    output_dir.mkdir(parents=True, exist_ok=True)
    with output_filepath.open("w") as output_file_ptr:
        output_file_ptr.write(json.dumps(output_dict, sort_keys=True,
                                         indent=4))
        logger.info(f"[+] {str(output_filepath)} successfully written")
    return output_filepath
コード例 #18
0
ファイル: remap.py プロジェクト: jon1scr/pydecipher
        remapped_source_size: int
        remapped_sip_hash: str
        (
            remapped_filename,
            remapped_co,
            remapped_version,
            remapped_timestamp,
            remapped_magic_int,
            remapped_is_pypy,
            remapped_source_size,
            remapped_sip_hash,
        ) = xdis.disasm.disassemble_file(str(remapped_bytecode_path),
                                         outstream=open(os.devnull, "w"))
    except Exception as e:
        e: Exception
        logger.debug(f"Error disassembling remap megafile: {e}")
        logger.debug(
            "It is possible that this custom interpreter has tampered with the Python code compilation process in such"
            " a way that xdis cannot disassemble it. You can try manually inspecting the file to learn more."
        )
        raise RuntimeError

    remappings: Dict[int,
                     Dict[int,
                          int]] = bytecode.diff_opcode(reference_co,
                                                       remapped_co,
                                                       str(reference_version))
    return remappings, str(reference_version)


def opcode_constants_remap(
コード例 #19
0
def validate_opmap(version: str, opmap: Dict[str, int]) -> bool:
    """Validate whether opmap is correct/well-formed for the given version.

    A well-formed opcode map should not have any duplicate keys or values, nor
    any missing or extraneous opnames or opcodes.

    Parameters
    ----------
    version : str
        Typically a string like '2.7' or '3.8.1'. However, the version string
        can be `any version accepted by xdis`_, including some weird alternate
        Python implementations like 2.7.1b3Jython or 3.5pypy.

        .. _any version accepted by xdis:
            https://github.com/rocky/python-xdis/blob/master/xdis/magics.py
    opmap : Dict[str, int]
        A dictionary of OPERATION NAME: OPCODE VALUE.

    Returns
    -------
    bool
        Whether or not this opcode map is valid and well-formed.

    """
    is_pypy: bool = True if "pypy" in version else False
    try:
        opcode_obj: ModuleType = xdis.main.get_opcode(version, is_pypy)
    except KeyError:
        raise KeyError(
            f"[!] The version specified, {version}, is not supported by xdis.")
    xdis_opcode_map: Dict[str, int] = opcode_obj.opmap
    validity: bool = True

    opname: str
    opcode: int
    for opname, opcode in opmap.items():
        if opname not in xdis_opcode_map.keys():
            logger.debug(
                f"[!] This opcode map contains the opname {opname}, which doesn't appear to be a valid "
                f"operation for Python {version}.")
            validity = False
        if list(opmap.keys()).count(opname) > 1:
            logger.debug(
                f"[!] This opcode map contains {list(opmap.keys()).count(opname)} entries for the opname {opname}."
            )
            validity = False
        if list(opmap.values()).count(opcode) > 1:
            logger.debug(
                f"[!] This opcode map contains {list(opmap.values()).count(opcode)} entries for the opcode {opcode}."
            )
            validity = False

    for opname, opcode in xdis_opcode_map.items():
        if opname not in opmap.keys():
            logger.debug(
                f"[!] This opcode map does not have an entry for the opname {opname}. In standard Python "
                f"{version}, this value is {opcode}.")
            validity = False

    if len(opmap.keys()) != len(xdis_opcode_map.keys()):
        logger.debug(
            f"[!] This opcode map has a size of {len(opmap.keys())}, when it should have a size of "
            f"{len(xdis_opcode_map.keys())} for Python version {version}.")
        validity = False

    return validity
コード例 #20
0
def decompile_pyc(
    arg_tuple: Tuple[pathlib.Path, Dict[str, int],
                     Dict[str, Union[bool, os.PathLike]]]
) -> str:
    """Decompile a single Python bytecode file.

    Parameters
    ----------
    arg_tuple: Tuple[pathlib.Path, Dict[str, int], Dict[str, Union[bool, os.PathLike]]]
        A tuple containing the arguments for this function. This is a tuple because pebble's
        Pool.map() function couldn't pass multiple arguments to a subprocessed function call.
        The tuple entries correspond to the following arguments:

            pyc_file : pathlib.Path
                The path to the compiled Python file
            alternate_opmap : Dict[str, int], optional
                If this bytecode file was produced by an interpreter with remapped
                opcodes, you must provide the opmap as a OPNAME: OPCODE dictionary
            logging_options: Dict[str, Union[bool, os.PathLike], optional
                A dictionary of logging options. This is only needed when pydecipher is
                performing multi-processed decompilation. The keys can be the following
                strings:

                    verbose: bool
                        True will enable verbose logging.
                    quiet: bool
                        True will silence all console logging.
                    log_path: pathlib.Path
                        If a path object is passed in as the log_path, the running
                        instance of pydecipher will continue logging to that file.

    Returns
    -------
    str
        There are several different return values:

            * **no_action**: This file was not decompiled.
            * **success**: This file was successfully decompiled.
            * **error**: This file could not be decompiled 100% successfully.
            * **opcode_error**: The error message returned by uncompyle6
              indicates this file may have remapped opcodes
    """
    pyc_file: pathlib.Path = arg_tuple[0]
    alternate_opmap: Dict[str, int] = arg_tuple[1] or None
    logging_options: Dict[str, Union[bool, os.PathLike]] = arg_tuple[2] or None

    if not pyc_file.is_file():
        return "no_action"

    # Because this function runs in a new pydecipher process entirely, logging
    # options set during runtime (from command-line flags) do not carry over
    # automatically. We must pass these through manually, and reset the options
    # for this specific process.
    if logging_options and not pydecipher.log_path:
        pydecipher.set_logging_options(**logging_options)

    hijacked_stdout: io.StringIO = io.StringIO()
    hijacked_stderr: io.StringIO = io.StringIO()
    with redirect_stdout(hijacked_stdout), redirect_stderr(hijacked_stderr):
        # Chop off c in pyc
        new_file_name: pathlib.Path = pathlib.Path(
            str(pyc_file.resolve())[:-1])

        # This prohibits the overwriting of existing files.
        # if new_file_name.exists() and new_file_name.stat().st_size:
        #     return "no_action"

        logger.debug(
            f"[*] Decompiling file {pyc_file} of size {pyc_file.stat().st_size}"
        )
        if not alternate_opmap:
            try:
                uncompyle6.decompile_file(str(pyc_file), outstream=sys.stdout)
            except uncompyle6.semantics.parser_error.ParserError as e:
                logger.warning(f"[!] Failed to decompile file {pyc_file}")
                if REMAPPED_OPCODE_ERROR_REGEX.match(str(e.error)):
                    logger.error(
                        f"[!] {pyc_file.name} failed to decompile with an error that indicate its opcode "
                        "mappings may have been remapped to prevent analysis.")
                    return "opcode_error"
                return "error"
            except Exception as e:
                e: Exception
                logger.error(
                    f"[!] Failed to decompile file {pyc_file} with error: {e}")
                stdout_val: str = hijacked_stdout.getvalue()
                if stdout_val:
                    with new_file_name.open("w") as file_ptr:
                        file_ptr.write(stdout_val)
                return "error"
            else:
                with new_file_name.open("w") as file_ptr:
                    file_ptr.write(hijacked_stdout.getvalue())
                logger.info(f"[+] Successfully decompiled {pyc_file}")
                return "success"
        else:
            filename: str
            co: CodeType  # can also be xdis.Code* objects
            version: float
            timestamp: int  # seconds since epoch
            magic_int: int
            is_pypy: bool
            source_size: int
            sip_hash: str
            try:
                (
                    filename,
                    co,
                    version,
                    timestamp,
                    magic_int,
                    is_pypy,
                    source_size,
                    sip_hash,
                ) = xdis.main.disassemble_file(str(pyc_file),
                                               outstream=open(os.devnull, "w"),
                                               alternate_opmap=alternate_opmap)
                output_file: TextIO
                with new_file_name.open(mode="w") as output_file:
                    uncompyle6.main.decompile(
                        version,
                        co,
                        timestamp=timestamp,
                        source_size=source_size,
                        magic_int=magic_int,
                        is_pypy=is_pypy,
                        out=output_file,
                    )
            except Exception as e:
                e: Exception
                logger.info(
                    f"[!] Failed to decompile file {pyc_file} with error: {e}")
                return "error"
            else:
                logger.info(f"[+] Successfully decompiled {pyc_file}")
            return "success"
コード例 #21
0
def process_pycs(pyc_iterable: Iterable[os.PathLike],
                 alternate_opmap: Dict[str, int] = None) -> None:
    """Multi-processed decompilation orchestration of compiled Python files.

    Currently, pydecipher uses `uncompyle6`_ as its decompiler. It works well
    with `xdis`_ (same author) and allows for the decompilation of Code objects
    using alternate opmaps (with our extension of xdis).

    This function will start up CPU count * 2 pydecipher processes to decompile
    the given Python. Attempts to check for debugger, in which case the
    decompilation will be single-threaded to make debugging easier.

    .. _uncompyle6: https://github.com/rocky/python-uncompyle6/
    .. _xdis: https://github.com/rocky/python-xdis

    Parameters
    ----------
    pyc_iterable : Iterable[os.PathLike]
        An iterable of pathlib.Path objects, referencing compiled Python files
        to decompile.
    alternate_opmap : Dict[str, int], optional
        An opcode map of OPNAME: OPCODE (i.e. 'POP_TOP': 1). This should be a
        complete opmap for the Python version of the files being decompiled.
        Even if only two opcodes were swapped, the opcode map passed in should
        contain all 100+ Python bytecode operations.
    """
    # This checks if the PyCharm debugger is attached.
    if sys.gettrace():
        # Single-threaded for easier debugging.
        logger.debug(
            "[!] Debugger detected, not using multiprocessing for decompilation of pyc files."
        )
        return_status_codes: List[str] = []
        pyc_file: pathlib.Path
        for pyc_file in pyc_iterable:
            return_status_codes.append(
                decompile_pyc((pyc_file, alternate_opmap,
                               pydecipher.get_logging_options())))
    else:
        return_status_codes: List[str] = []
        pool: pebble.ProcessPool
        with pebble.ProcessPool(os.cpu_count() * 2) as pool:
            iterables = [(pyc, alternate_opmap,
                          pydecipher.get_logging_options())
                         for pyc in pyc_iterable]
            future: pebble.ProcessMapFuture = pool.map(decompile_pyc,
                                                       iterables,
                                                       timeout=300)
            iterator: Iterable = future.result()
            index: int = 0
            while True:
                try:
                    result: Any = next(iterator)
                    return_status_codes.append(result)
                except StopIteration:
                    break
                except TimeoutError as e:
                    e: TimeoutError
                    failed_pyc_path: str = str(iterables[index][0])
                    logger.error(
                        f"[!] Timed out ({e.args[1]}s) trying to decompile {failed_pyc_path}."
                    )
                    return_status_codes.append("error")
                except pebble.ProcessExpired as e:
                    e: pebble.ProcessExpired
                    logger.error(
                        f"[!] Failed to decompile {failed_pyc_path} (process expired with status code {e.exitcode}."
                    )
                    return_status_codes.append("error")
                except Exception as e:
                    e: Exception
                    logger.error(
                        f"[!] Failed to decompile {failed_pyc_path} with unknown error: {e}"
                    )
                    return_status_codes.append("error")
                finally:
                    index += 1

    successes: int = return_status_codes.count("success")
    opcode_errors: int = return_status_codes.count("opcode_error")
    errors: int = return_status_codes.count("error") + opcode_errors
    if opcode_errors:
        logger.warning(
            f"[!] {opcode_errors} file(s) failed to decompile with an error "
            "that indicate its opcode mappings may have been remapped. Try using"
            "`remap` on this set of bytecode.")
    if successes and not errors:
        logger.info(f"[+] Successfully decompiled {successes} .pyc files.")
    elif successes and errors:
        logger.warning(
            f"[!] Successfully decompiled {successes} .pyc files. Failed to decompile {errors} files. "
            "See log for more information.")
    elif not successes and errors:
        logger.error(
            f"[!] Failed to decompile all {errors} .pyc files. See log for more information."
        )
    else:
        logger.warning(
            "[!] No pyc files were decompiled. See log for more information.")
コード例 #22
0
    def parse_toc(self):
        # Read CArchive cookie
        if self.pyinstaller_version == 2.0 or self.pyinstaller_version == "unknown":
            try:
                (
                    magic,
                    self.length_of_package,
                    self.toc_offset,
                    self.toc_size,
                    self.python_version,
                ) = struct.unpack(
                    "!8siiii",
                    self.archive_contents[self.magic_index:self.magic_index +
                                          self.PYINST20_COOKIE_SIZE],
                )
            except:
                pass
            else:
                self.pyinstaller_version = 2.0
        if self.pyinstaller_version == 2.1 or self.pyinstaller_version == "unknown":
            try:
                (
                    magic,
                    self.length_of_package,
                    self.toc_offset,
                    self.toc_size,
                    self.python_version,
                    self.python_dynamic_lib,
                ) = struct.unpack(
                    "!8siiii64s",
                    self.archive_contents[self.magic_index:self.magic_index +
                                          self.PYINST21_COOKIE_SIZE],
                )
            except:
                pass
            else:
                self.pyinstaller_version = 2.1
                if self.python_dynamic_lib:
                    self.python_dynamic_lib = self.python_dynamic_lib.decode(
                        "ascii").rstrip("\x00")

        if self.pyinstaller_version == "unknown":
            logger.warning(
                "[!] Could not parse CArchive because PyInstaller version is unknown."
            )
            return

        self.python_version = float(self.python_version) / 10
        logger.info(
            f"[*] This CArchive was built with Python {self.python_version}")
        logger.debug(f"[*] CArchive Package Size: {self.length_of_package}")
        logger.debug(f"[*] CArchive Python Version: {self.python_version}")
        if self.pyinstaller_version == 2.1:
            logger.debug(
                f"[*] CArchive Python Dynamic Library Name: {self.python_dynamic_lib}"
            )

        self.toc = []
        toc_bytes = self.archive_contents[self.toc_offset:self.toc_offset +
                                          self.toc_size]
        while toc_bytes:
            (entry_size, ) = struct.unpack("!i", toc_bytes[0:4])
            name_length = entry_size - self.CTOCEntry.ENTRYLEN
            (
                entry_offset,
                compressed_data_size,
                uncompressed_data_size,
                compression_flag,
                type_code,
                name,
            ) = struct.unpack(f"!iiiBB{name_length}s", toc_bytes[4:entry_size])

            name = name.decode("utf-8").rstrip("\0")
            if name == "":
                name = str(uniquename())
                logger.debug(
                    f"[!] Warning: Found an unnamed file in CArchive. Using random name {name}"
                )

            type_code = chr(type_code)
            self.toc.append(
                self.CTOCEntry(
                    entry_offset,
                    compressed_data_size,
                    uncompressed_data_size,
                    compression_flag,
                    type_code,
                    name,
                ))

            toc_bytes = toc_bytes[entry_size:]
        logger.debug(
            f"[*] Found {len(self.toc)} entries in this PyInstaller CArchive")
コード例 #23
0
    def dump_certificates(self, output_dir: pathlib.Path = None) -> None:
        """Dump Authenticode certificates from the PE's certificate attribute table.

        Parameters
        ----------
        output_dir: pathlib.Path, optional
            An optional alternative output directory to dump the certificates, besides
            the class's output directory.
        """
        certificate_table_entry: pefile.Structure = None
        if hasattr(self.pe, "OPTIONAL_HEADER") and hasattr(
                self.pe.OPTIONAL_HEADER, "DATA_DIRECTORY"):
            idx: int
            for idx in range(len(self.pe.OPTIONAL_HEADER.DATA_DIRECTORY)):
                directory: pefile.Structure = self.pe.OPTIONAL_HEADER.DATA_DIRECTORY[
                    idx]
                if directory.name == "IMAGE_DIRECTORY_ENTRY_SECURITY" and directory.Size:
                    logger.debug("[*] This PE has a certificate table.")
                    certificate_table_entry = directory
                    break

        if certificate_table_entry is None:
            return

        if output_dir is None:
            certificate_extraction_dir: pathlib.Path = self.output_dir.joinpath(
                "Authenticode_Certificates")
        else:
            certificate_extraction_dir: pathlib.Path = output_dir
        certificate_extraction_dir.mkdir(parents=True, exist_ok=True)

        certificate_table_data: bytes = self.pe.__data__[
            certificate_table_entry.VirtualAddress:]
        while certificate_table_data:
            # https://docs.microsoft.com/en-us/windows/desktop/Debug/pe-format#the-attribute-certificate-table-image-only
            cert_length: int = int.from_bytes(certificate_table_data[0:4],
                                              byteorder="little")
            cert_version: bytes = certificate_table_data[4:6]  # noqa
            cert_type = certificate_table_data[6:8]  # noqa
            cert: bytes = certificate_table_data[8:8 + cert_length]
            certificate_table_data: bytes = certificate_table_data[
                8 + cert_length:]

            # Extract all the X509 certificates from the PKCS#7 structure
            authenticode_structure: signify.authenticode.AuthenticodeSignedData = AuthenticodeSignedData.from_envelope(
                cert)
            cert_obj: signify.certificates.Certificate
            for cert_obj in authenticode_structure.certificates:
                cert_name_obj: asn1crypto.x509.Name = cert_obj.to_asn1crypto.subject
                preferred_name_fields: List[str] = [
                    "organizational_unit_name",
                    "organization_name",
                    "common_name",
                ]
                name_selected: bool = False
                preferred_field_name: str
                for preferred_field_name in preferred_name_fields:
                    name_tuple: Tuple[str, str]
                    for name_tuple in cert_name_obj.native.items():
                        field: str = name_tuple[0]
                        value: str = name_tuple[1]
                        if field == preferred_field_name:
                            name_selected = True
                            cert_name: str = value
                            break
                    if name_selected:
                        break
                if not name_selected:
                    cert_name: str = f"{len(os.listdir(certificate_extraction_dir))}"
                cert_name: str = utils.slugify(cert_name,
                                               allow_unicode=True) + ".pem"

                logger.debug(
                    f"[+] Extracting Authenticode certificate {cert_name}.")
                f: BinaryIO
                with certificate_extraction_dir.joinpath(cert_name).open(
                        "wb") as f:
                    der_bytes: bytes = cert_obj.to_asn1crypto.dump()
                    pem_bytes: bytes = pem.armor("CERTIFICATE", der_bytes)
                    f.write(pem_bytes)
        self.certificates_dumped = True