Beispiel #1
0
    def dump_resource(self, resource_name: str) -> pathlib.Path:
        """Dump the specified resource to the output directory on disk.

        Parameters
        ----------
        resource_name
            The name of the resource within the PE's resources to extract.

        Returns
        -------
        pathlib.Path
            The path to the dumped resource.
        """
        entry: pefile.ResourceDirEntryData
        for entry in self.pe.DIRECTORY_ENTRY_RESOURCE.entries:
            if entry.name.string.decode() == resource_name:
                rva: int = entry.directory.entries[0].directory.entries[
                    0].data.struct.OffsetToData
                size: int = entry.directory.entries[0].directory.entries[
                    0].data.struct.Size

                self.output_dir.mkdir(parents=True, exist_ok=True)
                resource_dump: pathlib.Path = self.output_dir / resource_name
                outfile_ptr: BinaryIO
                with resource_dump.open("wb") as outfile_ptr:
                    outfile_ptr.write(self.pe.get_data(rva, size))
                logger.info(
                    f"[+] Successfully dumped PE resource {resource_name} to disk at {self.output_dir}"
                )
                return resource_dump
Beispiel #2
0
    def unpack(self):
        """Validate as best as possible that this is a well-formed compiled Python file.

        If any obfuscations are detected, we will write a new, corrected file to disk. Does
        not overwrite the original file.
        """
        if not hasattr(self, "file_path"):
            temp_file: tempfile.NamedTemporaryFile = tempfile.NamedTemporaryFile(
                suffix=".pyc")
            pyc_file: pathlib.Path = pathlib.Path(temp_file.name)
            with pyc_file.open("wb") as outfile:
                outfile.write(self.file_contents)

            # The file was passed in as bytes.
            counter: int = 0
            new_filename_prefix = "pyc_"
            while True:
                new_filepath = self.output_dir.joinpath(
                    f"{new_filename_prefix}{counter}.pyc")
                if not new_filepath.exists():
                    break
                counter += 1
            new_filename: pathlib.Path = new_filepath
        else:
            pyc_file = self.file_path
            new_filename: pathlib.Path = self.file_path.with_suffix(
                ".corrected.pyc")

        if fixed_pyc_tempfile := self.check_and_fix_pyc(
                pyc_file, provided_version=self.version_hint):
            logger.info(
                f"[+] Writing fixed pyc file {new_filename.name} to {new_filename.parent}"
            )
            copyfile(fixed_pyc_tempfile.name, new_filename)
Beispiel #3
0
def unpack(python_artifact: os.PathLike,
           output_dir: str = None,
           **kwargs) -> None:
    """Recursively extract interesting resources from the Python artifact.

    This function will cycle through all the registered ARTIFACT_TYPES. See
    usages of :py:meth:`pydecipher.__init__.register` for the creation of this
    list.


    ARTIFACT_TYPES consists of the different 'unpackable', registered
    (via decorator) Python artifact classes in a dictionary of the format
    <Artifact_Name : Class Instance of Artifact_Name>. A class's constructor
    should raise a TypeError if is being instantiated with something that
    isn't the correct type (i.e. Py2Exe resource being passed to a
    PyInstaller archive constructor).

    Parameters
    ----------
    python_artifact : pathlib.Path or io.IOBase (file-like object)
        The path to the Python artifact
    output_dir : str, optional
        Where to dump the extracted output of artifact parsers. If no
        directory is specified, a directory will be created in the
        current working directory.
    **kwargs
        Arbitrary keyword arguments. Including, but not limited to:

            version_hint: str
                The (potential) Python version of the artifact. If you know
                the version, you should pass it in. Otherwise, pydecipher
                will try to automatically figure out what version was used
                through string-analysis (and possibly brute-force decompilation).
                If
    """
    if output_dir:
        output_dir: pathlib.Path = pathlib.Path(output_dir).resolve()
    type_instance: type = None
    logger.info(f"[*] Unpacking {python_artifact}")
    for type_, class_ in pydecipher.ARTIFACT_TYPES.items():
        logger.debug(
            f"[*] Checking {type_} magic for file {python_artifact.name}")
        try:
            type_instance = class_(python_artifact,
                                   output_dir=output_dir,
                                   **kwargs)
            logger.debug(
                f"[*] Determined {python_artifact.name} type to be {type_}")
            break
        except TypeError:
            logger.debug(f"[*] Magic incorrect for type {type_}")
    else:
        # This should never be reached
        logger.debug(
            "[!] No artifact types found! Something went wrong. Please submit a bug report."
        )

    if type_instance:
        type_instance.unpack()
Beispiel #4
0
    def check_for_password_file(self):
        self.potential_keys = []
        if hasattr(self, "archive_path"):
            dir_of_pyz = self.archive_path.parent
        else:
            dir_of_pyz = Path.cwd()

        key_file = dir_of_pyz / "pyimod00_crypto_key.pyc"
        if key_file.exists():
            self.encrypted = True
            logger.debug(
                f"[+] Found ZlibArchive encryption key file at path {key_file}"
            )
            crypto_key_filename: str  # full path of
            try:
                (
                    crypto_key_filename,
                    crypto_key_co,
                    crypto_key_python_version,
                    crypto_key_compilation_timestamp,
                    crypto_key_magic_int,
                    crypto_key_is_pypy,
                    crypto_key_source_size,
                    crypto_key_sip_hash,
                ) = disassemble_file(str(key_file),
                                     outstream=open(os.devnull, "w"))
            except Exception as e:
                logger.warning(
                    f"[!] Could not disassemble file {key_file}. Received error: {e}"
                )
            else:
                self.compilation_time = datetime.fromtimestamp(
                    crypto_key_compilation_timestamp)
                for const_string in crypto_key_co.co_consts:
                    if const_string and len(const_string) == 16:
                        self.potential_keys.append(const_string)
            # If we couldn't decompile the file to see the consts, lets just search the raw bytes of the file
            # for the password
            if not self.potential_keys:
                with key_file.open("rb") as file_ptr:
                    file_strings = utils.parse_for_strings(file_ptr.read())
                s: str
                for s in file_strings:
                    if len(s) >= 16 and "pyimod00_crypto_key" not in s:
                        while len(s) >= 16:
                            self.potential_keys.append(s[0:16])
                            s = s[1:]

            logger.info(
                f"[*] Found these potential PyInstaller PYZ Archive encryption keys: {self.potential_keys}"
            )

            if not self.potential_keys:
                logger.error(
                    f"[*] Encryption key file detected, however no password was able to be retrieved."
                )
Beispiel #5
0
    def dump_overlay(self) -> pathlib.Path:
        """
        Check to see if this binary has data appended, and if so, dump it for further analysis.

        python's pefile library puts the certificate table in the overlay section even
        though its not really traditional overlay data.

        Relevant links:
        https://github.com/erocarrera/pefile/issues/104#issuecomment-429037686
        https://www.cs.auckland.ac.nz/~pgut001/pubs/authenticode.txt
        https://blog.barthe.ph/2009/02/22/change-signed-executable/

        Returns
        -------
        pathlib.Path
            The path to the dumped overlay on disk.
        """
        certificate_table_entry: pefile.Structure = None
        if hasattr(self.pe, "OPTIONAL_HEADER") and hasattr(
                self.pe.OPTIONAL_HEADER, "DATA_DIRECTORY"):
            idx: int
            for idx in range(len(self.pe.OPTIONAL_HEADER.DATA_DIRECTORY)):
                directory: pefile.Structure = self.pe.OPTIONAL_HEADER.DATA_DIRECTORY[
                    idx]
                if directory.name == "IMAGE_DIRECTORY_ENTRY_SECURITY" and directory.Size:
                    certificate_table_entry = directory
                    break

        # Get overlay data, excluding certificate table if its there
        if certificate_table_entry:
            overlay_start: int = self.pe.get_overlay_data_start_offset()
            certificate_start: int = certificate_table_entry.VirtualAddress
            self.overlay = self.pe.__data__[overlay_start:certificate_start]
        else:
            self.overlay = self.pe.get_overlay()

        if self.overlay:
            overlay_path: pathlib.Path = self.output_dir.joinpath(
                "overlay_data")
            self.output_dir.mkdir(parents=True, exist_ok=True)
            overlay_file_ptr: BinaryIO
            with overlay_path.open("wb") as overlay_file_ptr:
                overlay_file_ptr.write(self.overlay)
            logger.info(
                f"[+] Dumped this PE's overlay data to {overlay_path.relative_to(self.output_dir.parent)}"
            )
            return overlay_path
Beispiel #6
0
    def disassemble_and_dump(self, brute_force: bool = False):
        code_bytes = self.resource_contents[self.marshalled_obj_start_idx:]
        hijacked_stderr = io.StringIO()
        with redirect_stderr(hijacked_stderr):
            try:  # TODO make this more specific error catching
                code_objects = load_code(code_bytes, self.magic_num)
                if not isinstance(code_objects, list):
                    # TODO make this a non-generic error
                    raise RuntimeError(
                        "Py2Exe should return a marshalled list of code objects"
                    )
                if not all(code_objects):
                    raise RuntimeError("NoneType code objects returned")
            except Exception:
                logger.debug(
                    f"[!] Failed to produce disassembly of bytecode with magic num {self.magic_num} "
                    f"(Python version {magicint2version[self.magic_num]})")
                self.magic_num = -1
                return
            else:
                logger.info(
                    f"[+] Successfully disassembled bytecode with magic number {self.magic_num}, "
                    f"corresponding to Python version {magicint2version[self.magic_num]}"
                )

        for co in code_objects:
            new_filename: str = self._clean_filename(co.co_filename)
            self.output_dir.mkdir(parents=True, exist_ok=True)
            if brute_force:
                bytecode_filepath: str = self.output_dir / magicint2version[
                    self.magic_num] / new_filename
                bytecode_filepath.parent.mkdir(exist_ok=True)
            else:
                bytecode_filepath: str = str(
                    self.output_dir.joinpath(new_filename))

            try:
                xdis.load.write_bytecode_file(bytecode_filepath, co,
                                              self.magic_num)
            except Exception as e:
                logger.error(
                    f"[!] Could not write file {bytecode_filepath.name} with error: {e}"
                )
            else:
                logger.info(
                    f"[+] Successfully wrote file {new_filename} to {self.output_dir}"
                )
Beispiel #7
0
    def unpack(self):
        """Dump the pyc file from the Py2Exe object."""
        if self.archive_name:
            logger.info(f"[*] Archive name: {self.archive_name}")

        if self.magic_num == -1:
            potential_magic_nums = self._determine_python_version()
            for magic_num in potential_magic_nums:
                self.magic_num = magic_num
                self.disassemble_and_dump()
        else:
            self.disassemble_and_dump()

        if self.magic_num == -1:
            # Brute force disassembly because we still don't know what version was used
            all_magic_nums = [magic_num for magic_num, python_version in magicint2version.items()]
            for magic_num in all_magic_nums:
                self.magic_num = magic_num
                self.disassemble_and_dump(brute_force=True)
Beispiel #8
0
    def extract_files(self) -> None:
        decompression_errors = 0
        successfully_extracted = 0
        for key in self.toc.keys():
            (type_code, position, compressed_data_size) = self.toc[key]
            if not hasattr(self, "compilation_time"):
                timestamp = None
            else:
                timestamp = self.compilation_time
            header_bytes = pydecipher.bytecode.create_pyc_header(
                self.magic_int, compilation_ts=timestamp, file_size=0)

            compressed_data = self.archive_contents[position:position +
                                                    compressed_data_size]
            if self.encrypted:
                compressed_data = self.decrypt_file(compressed_data)
            if compressed_data is None:
                # decrypt_file returns None on failure
                decompression_errors += 1
                continue

            try:
                uncompressed_data = zlib.decompress(compressed_data)
            except zlib.error as e:
                decompression_errors += 1
                logger.debug(
                    f"[!] PYZ zlib decompression failed with error: {e}")
            else:
                pyc_file = self.output_dir / str(key + ".pyc")
                self.output_dir.mkdir(parents=True, exist_ok=True)
                with pyc_file.open("wb") as pyc_file_ptr:
                    pyc_file_ptr.write(header_bytes + uncompressed_data)
                successfully_extracted += 1

        if decompression_errors:
            logger.debug(
                f"[!] Failed to write {decompression_errors} files due to decompression errors."
            )
        if successfully_extracted:
            logger.info(
                f"[+] Successfully extracted {successfully_extracted} files from this ZlibArchive."
            )
Beispiel #9
0
    def unpack(self) -> None:
        """Recursively search this artifact for frozen Python artifacts."""
        zip_bytes: io.BytesIO
        with io.BytesIO(self.archive_contents) as zip_bytes:
            self.output_dir.mkdir(parents=True, exist_ok=True)
            try:
                f: zipfile.PyZipFile = zipfile.PyZipFile(
                    zip_bytes, "r", zipfile.ZIP_DEFLATED)
                f.extractall(self.output_dir)
            except (zipfile.BadZipfile, zlib.error):
                pass
            else:
                seen_errors: List[str] = []
                list_of_files: List[os.PathLike] = []
                for (dirpath, dirnames, filenames) in os.walk(self.output_dir):
                    for filename in filenames:
                        full_path: pathlib.Path = Path(dirpath).joinpath(
                            filename)
                        list_of_files.append(full_path)

                logger.info(
                    f"[*] Unpacking {len(list_of_files)} files found in this zip file..."
                )
                fp: pathlib.Path
                for fp in list_of_files:
                    try:
                        pydecipher.unpack(fp, **self.kwargs)
                    except RuntimeError as e:
                        if str(e) and str(e) not in seen_errors:
                            seen_errors.append(str(e))

                if seen_errors:
                    logger.error(
                        f"[!] The following {len(seen_errors)} errors were encountered during the unpacking"
                        " of this zip file.")
                    err: str
                    for err in seen_errors:
                        logger.error(err)
Beispiel #10
0
    def parse_toc(self):
        # Read CArchive cookie
        if self.pyinstaller_version == 2.0 or self.pyinstaller_version == "unknown":
            try:
                (
                    magic,
                    self.length_of_package,
                    self.toc_offset,
                    self.toc_size,
                    self.python_version,
                ) = struct.unpack(
                    "!8siiii",
                    self.archive_contents[self.magic_index:self.magic_index +
                                          self.PYINST20_COOKIE_SIZE],
                )
            except:
                pass
            else:
                self.pyinstaller_version = 2.0
        if self.pyinstaller_version == 2.1 or self.pyinstaller_version == "unknown":
            try:
                (
                    magic,
                    self.length_of_package,
                    self.toc_offset,
                    self.toc_size,
                    self.python_version,
                    self.python_dynamic_lib,
                ) = struct.unpack(
                    "!8siiii64s",
                    self.archive_contents[self.magic_index:self.magic_index +
                                          self.PYINST21_COOKIE_SIZE],
                )
            except:
                pass
            else:
                self.pyinstaller_version = 2.1
                if self.python_dynamic_lib:
                    self.python_dynamic_lib = self.python_dynamic_lib.decode(
                        "ascii").rstrip("\x00")

        if self.pyinstaller_version == "unknown":
            logger.warning(
                "[!] Could not parse CArchive because PyInstaller version is unknown."
            )
            return

        self.python_version = float(self.python_version) / 10
        logger.info(
            f"[*] This CArchive was built with Python {self.python_version}")
        logger.debug(f"[*] CArchive Package Size: {self.length_of_package}")
        logger.debug(f"[*] CArchive Python Version: {self.python_version}")
        if self.pyinstaller_version == 2.1:
            logger.debug(
                f"[*] CArchive Python Dynamic Library Name: {self.python_dynamic_lib}"
            )

        self.toc = []
        toc_bytes = self.archive_contents[self.toc_offset:self.toc_offset +
                                          self.toc_size]
        while toc_bytes:
            (entry_size, ) = struct.unpack("!i", toc_bytes[0:4])
            name_length = entry_size - self.CTOCEntry.ENTRYLEN
            (
                entry_offset,
                compressed_data_size,
                uncompressed_data_size,
                compression_flag,
                type_code,
                name,
            ) = struct.unpack(f"!iiiBB{name_length}s", toc_bytes[4:entry_size])

            name = name.decode("utf-8").rstrip("\0")
            if name == "":
                name = str(uniquename())
                logger.debug(
                    f"[!] Warning: Found an unnamed file in CArchive. Using random name {name}"
                )

            type_code = chr(type_code)
            self.toc.append(
                self.CTOCEntry(
                    entry_offset,
                    compressed_data_size,
                    uncompressed_data_size,
                    compression_flag,
                    type_code,
                    name,
                ))

            toc_bytes = toc_bytes[entry_size:]
        logger.debug(
            f"[*] Found {len(self.toc)} entries in this PyInstaller CArchive")
Beispiel #11
0
    def _determine_python_version(self):
        """Will attempt to determine what version of python was used when this
        py2exe PE was compiled. We need to know this because xdis requires
        knowledge of the python version to unmarshal the bytecode correctly"""
        potential_magic_nums = set()
        logger.debug("[*] Attempting to discover version for PYTHONSCRIPT resource")

        # Method 1: Looking for PythonXY.DLL resource in the same directory as the PYTHONSCRIPT resource. If there,
        # check to see if it has a VERSIONINFO resource with a FileVersion or ProductVersion field,
        # as these typically contain the python version. See https://github.com/erocarrera/pefile for more info on
        # the structures used below
        if hasattr(self, "archive_path"):
            parent_dir = self.archive_path.parents[0]
        else:
            parent_dir = pathlib.Path.cwd()
        for python_dll in os.listdir(parent_dir):
            if re.match(r"python[0-9]{0,2}\.dll", python_dll, re.I):
                logger.debug(f"[*] Found python DLL resource {str(python_dll)} in directory {parent_dir}")
                try:
                    dll_class_inst = PortableExecutable(parent_dir.joinpath(python_dll))
                except TypeError:
                    logger.debug(f"[!] PyDecipher could not create a PE/DLL class instance for {str(python_dll)}")
                else:
                    dll_class_inst.load_version_info(quiet=True)
                    if dll_class_inst.python_version:
                        potential_magic_nums.add(version_str_to_magic_num_int(dll_class_inst.python_version))
                finally:
                    break

        # Method 2: Check to see if there are pyc files in the same directory with magic numbers
        for pyc_file in parent_dir.rglob("*.pyc"):
            with pyc_file.open("rb") as pyc_file_ptr:
                try:
                    magic_bytes = pyc_file_ptr.read(4)
                    magic_num = magic2int(magic_bytes)
                except:  # TODO make more specific error catching
                    pass
                else:
                    potential_magic_nums.add(magic_num)
            break

        # Searching the PYTHONSCRIPT resource for strings like c:\python24\lib\site-packages\py2exe\boot_common.py
        b_python_regex = re.compile(b"(python)([0-9]{2})", re.I)
        script_re_obj = b_python_regex.search(self.resource_contents)
        if script_re_obj:
            version_str = script_re_obj.group(2).decode("utf-8")
            logger.info(
                "[*] Detected potential version string in PYTHONSCRIPT resource: {}".format(
                    script_re_obj.group().decode("utf-8")
                )
            )
            potential_magic_nums.add(version_str_to_magic_num_int(version_str[0] + "." + version_str[1]))

        if potential_magic_nums:
            logger.info(f"[*] Will attempt to unmarshal using these python magic numbers: {potential_magic_nums}")
            return potential_magic_nums
        else:
            logger.info(
                "[!] Couldn't find any python magic numbers to hint at the python version of this resource. "
                "Will attempt to brute-force determine the correct magic number."
            )
            return
Beispiel #12
0
def run(_args: List[str] = None) -> None:
    """Orchestrate the flow of the remap command.

    This is the entry-point of the remap command. It calls out to other routines
    and attempts to follow this high-level flow:

        1.  Check that program is running in sufficiently new Python
            environment, and parse any arguments
        2.  Determine what type of input was passed to program, which will
            ultimately decide what method remap uses to recover the opmap.
        3.  Attempt one of the opmap recovery methods (see documentation for
            more on these methods)
        4.  If the opmap was successfully recovered, validate it, then write
            it to a file.

    Parameters
    ----------
    _args : List[str]
        If this function is being called from other Python code, remap
        flags and other command-line options can be passed in as a list.
    """
    if sys.version_info < (3, 8):
        logger.critical(
            "[!] This tool can only be run in Python 3.8 or later.")
        sys.exit(1)
    utils.check_for_our_xdis()

    args: argparse.Namespace = _parse_args(_args)

    logging_options: Dict[str, Union[bool, os.PathLike]] = {
        "verbose": args.verbose,
        "quiet": args.quiet
    }
    pydecipher.set_logging_options(**logging_options)

    remapped_bytecode_path: pathlib.Path = pathlib.Path(
        args.remapped_bytecode_path).resolve()

    if args.output:
        output_dir: pathlib.Path = pathlib.Path(args.output.strip()).resolve()
    else:
        output_dir: pathlib.Path = pathlib.Path.cwd()
    output_dir = output_dir / f"remap_output_{utils.slugify(remapped_bytecode_path.name)}"

    # The following block sets up logging to a stringIO stream, which will
    # eventually be placed in a file. We don't immediately log to a file because
    # we don't want to leave a log file on disk unless the program succeeds.
    log_stream: io.StringIO = io.StringIO()
    log_stream__handler: logging.StreamHandler = logging.StreamHandler(
        log_stream)
    log_stream__handler.setFormatter(pydecipher.log_format)
    log_stream__handler.setLevel(logging.DEBUG)
    logger.addHandler(log_stream__handler)

    remappings: Dict[int, Dict[int, int]] = {}
    version: str = ""
    remapping_method: str = ""
    cli: str = " ".join(sys.argv) if not _args else " ".join(_args)
    if args.version:
        version = args.version
    if args.megafile:
        # Determine if argument is a version or a path
        if pathlib.Path(args.megafile).exists():
            standard_bytecode_path: pathlib.Path = pathlib.Path(args.megafile)
        else:
            potential_version: str = args.megafile
            magic_num: int = bytecode.version_str_to_magic_num_int(
                potential_version)
            if magic_num:
                compiled_file: str
                for compiled_file in os.listdir(
                        pathlib.Path(__file__).parent / "reference_files" /
                        "compiled"):
                    full_path_obj: pathlib.Path = (
                        pathlib.Path(__file__).parent / "reference_files" /
                        "compiled" / compiled_file)
                    infile: BinaryIO
                    with full_path_obj.open("rb") as infile:
                        if xdis.magics.magic2int(infile.read(4)) == magic_num:
                            logger.info(
                                f"[*] Found matching megafile for version {potential_version}"
                            )
                            standard_bytecode_path: pathlib.Path = full_path_obj
                            break
            if not standard_bytecode_path:
                logger.error(
                    "[!] Something went wrong. remap could not find a standard compiled version of this megafile."
                )  # Next, find the path of the reference file
                sys.exit(1)
        remappings, version = megafile_remap(standard_bytecode_path,
                                             remapped_bytecode_path)
        remapping_method = "Megafile"
    elif args.opcode_file:
        remappings, version = opcode_constants_remap(remapped_bytecode_path,
                                                     provided_version=version)
        remapping_method = "opcode.pyc constants-walking"
    elif args.standard_bytecode_path:
        standard_bytecode_path: pathlib.Path = pathlib.Path(
            args.standard_bytecode_path).resolve()
        utils.check_read_access(standard_bytecode_path)
        utils.check_read_access(remapped_bytecode_path)
        utils.check_write_access(output_dir)
        if not remapped_bytecode_path.is_dir():
            raise ValueError(
                "The standard/default remapping method requires a directory containing Python bytecode files"
            )
        if not standard_bytecode_path.is_dir():
            raise ValueError(
                "If you are going to provide your own reference opcode set, it must be a directory of "
                "Python bytecode files")
        remappings, version = standard_pyc_remap(standard_bytecode_path,
                                                 remapped_bytecode_path,
                                                 version=version)
        remapping_method = "Diff'ing against standard library bytecode"
    elif args.check_remapping:
        # Here, remapped_bytecode_path is not actually bytecode, its a remapping
        # file.
        utils.check_read_access(remapped_bytecode_path)
        remapping_file: TextIO
        with remapped_bytecode_path.open() as remapping_file:
            try:
                remapping_json: Dict["str", Union[str, int]] = json.loads(
                    remapping_file.read())
            except json.decoder.JSONDecodeError as e:
                e: json.decoder.JSONDecodeError
                logger.error(f"Could not read remapping file with error: {e}")
                sys.exit(1)
            version = remapping_json["python_version"]
            remappings_list: Dict[str, Union[
                bool, str, int]] = remapping_json["remapped_opcodes"]
            remapping_dict: Dict[str, int] = {
                d["opname"]: d["remapped_value"]
                for d in remappings_list
            }
            if bytecode.validate_opmap(version, remapping_dict):
                logger.info("[*] This opmap is valid.")
                return
            else:
                msg: str = "This opmap is not valid."
                if not logging_options["verbose"]:
                    msg += " Run with --verbose flag for more information."
                logger.warning(f"[!] {msg}")
                sys.exit(1)

    if remappings:
        remappings: Dict[int, int] = fix_remapping_conflicts(remappings)
        remappings: Dict[int,
                         Tuple[int,
                               bool]] = fill_opmap_gaps(remappings, version)
        output_file_path: pathlib.Path = write_remapping_file(
            remappings, version, remapping_method, cli, output_dir=output_dir)
        logger.info(
            f"[*] Remapping file {output_file_path.name} written to {output_file_path.parent}."
        )

        # If we successfully produced the remapping file, we want to also
        # include the logged output of remap.
        log_name: str = datetime.datetime.now().strftime(
            "log_%H_%M_%S_%b_%d_%Y.txt")
        log_file_ptr: TextIO
        with output_dir.joinpath(log_name).open("w") as log_file_ptr:
            log_file_ptr.write(log_stream.getvalue())
        logging_options: Dict[str, Union[bool, os.PathLike]] = {
            "log_path": output_dir.joinpath(log_name)
        }
        pydecipher.set_logging_options(**logging_options)
    else:
        logger.warning(
            "[!] Remap couldn't produce the new opmap. Run with --verbose for more information."
        )
        sys.exit(1)
Beispiel #13
0
def write_remapping_file(
    remappings: Dict[int, Tuple[int, bool]],
    version: str,
    method: str,
    cli: str,
    output_dir: Union[str, pathlib.Path] = ".",
) -> pathlib.Path:
    """Write the remappings dict to a JSON file that can be used by pydecipher.

    It is assumed that by this point `remappings` is a bijection of original
    opcodes and replacement opcodes.

    Parameters
    ----------
    remappings: Dict[int, (int, bool)]
        A dictionary of original_opcode to (replacement_opcode, guess).
        replacement_opcode is the remapped value of original_opcode, and the
        guess boolean is whether or not remap actually observed this remapping
        or had to 'guess' it in order to produce a complete set of opcodes.
    version: str
        A version string `accepted by xdis`_.
    output_dir: Union[str, os.PathLike]
        The path where the remapping file should be written.
    method: str
        A text description of the remapping method used
    cli: str
        The command line for the remap command that produced this file.

        .. _accepted by xdis:
            https://github.com/rocky/python-xdis/blob/master/xdis/magics.py

    Returns
    -------
    pathlib.Path
        The path to the remapping JSON file.
    """
    output_dict: Dict[str:Union[int, Dict[int, int]]] = {
        "python_version": str(version),
        "remapped_opcodes": [],
        "method": method,
        "command_line": json.dumps(cli),
    }

    xdis_opcode: ModuleType = None
    try:
        xdis_opcode = xdis.disasm.get_opcode(version, is_pypy=False)
    except Exception:
        logger.debug(
            f"[!] Couldn't retrieve version {version} from xdis! Continuing anyway..."
        )

    opcode_val: int
    remapping_dict: Dict[int, int]
    for opcode_val, remap_val in remappings.items():
        output_subdict: Dict[str, int] = {
            "opcode": opcode_val,
            "remapped_value": remap_val[0],
            "guess": True if remap_val[1] else False,
        }
        if xdis_opcode:
            opname: str = xdis_opcode.opname[opcode_val]
            output_subdict["opname"] = opname.replace("+", "_")
        output_dict["remapped_opcodes"].append(output_subdict)

    # We sort based on the original opcode value because it seems like the most
    # natural way to sort this, and it is useful to have a standardized
    # output for comparison purposes.
    output_dict["remapped_opcodes"] = sorted(output_dict["remapped_opcodes"],
                                             key=lambda i: i["opcode"])
    output_dir: pathlib.Path = pathlib.Path(output_dir).resolve()
    output_filepath: pathlib.Path = output_dir / "remapping.txt"
    if output_filepath.exists():
        logger.debug(
            f"[!] {str(output_filepath)} already exists. Incrementing filename until an available name is found."
        )
        counter: int = 1
        while True:
            new_filepath: pathlib.Path = output_dir / f"remapping-{counter}.txt"
            if not new_filepath.exists():
                break
            counter += 1
        output_filepath = new_filepath
    output_dir.mkdir(parents=True, exist_ok=True)
    with output_filepath.open("w") as output_file_ptr:
        output_file_ptr.write(json.dumps(output_dict, sort_keys=True,
                                         indent=4))
        logger.info(f"[+] {str(output_filepath)} successfully written")
    return output_filepath
Beispiel #14
0
def decompile_pyc(
    arg_tuple: Tuple[pathlib.Path, Dict[str, int],
                     Dict[str, Union[bool, os.PathLike]]]
) -> str:
    """Decompile a single Python bytecode file.

    Parameters
    ----------
    arg_tuple: Tuple[pathlib.Path, Dict[str, int], Dict[str, Union[bool, os.PathLike]]]
        A tuple containing the arguments for this function. This is a tuple because pebble's
        Pool.map() function couldn't pass multiple arguments to a subprocessed function call.
        The tuple entries correspond to the following arguments:

            pyc_file : pathlib.Path
                The path to the compiled Python file
            alternate_opmap : Dict[str, int], optional
                If this bytecode file was produced by an interpreter with remapped
                opcodes, you must provide the opmap as a OPNAME: OPCODE dictionary
            logging_options: Dict[str, Union[bool, os.PathLike], optional
                A dictionary of logging options. This is only needed when pydecipher is
                performing multi-processed decompilation. The keys can be the following
                strings:

                    verbose: bool
                        True will enable verbose logging.
                    quiet: bool
                        True will silence all console logging.
                    log_path: pathlib.Path
                        If a path object is passed in as the log_path, the running
                        instance of pydecipher will continue logging to that file.

    Returns
    -------
    str
        There are several different return values:

            * **no_action**: This file was not decompiled.
            * **success**: This file was successfully decompiled.
            * **error**: This file could not be decompiled 100% successfully.
            * **opcode_error**: The error message returned by uncompyle6
              indicates this file may have remapped opcodes
    """
    pyc_file: pathlib.Path = arg_tuple[0]
    alternate_opmap: Dict[str, int] = arg_tuple[1] or None
    logging_options: Dict[str, Union[bool, os.PathLike]] = arg_tuple[2] or None

    if not pyc_file.is_file():
        return "no_action"

    # Because this function runs in a new pydecipher process entirely, logging
    # options set during runtime (from command-line flags) do not carry over
    # automatically. We must pass these through manually, and reset the options
    # for this specific process.
    if logging_options and not pydecipher.log_path:
        pydecipher.set_logging_options(**logging_options)

    hijacked_stdout: io.StringIO = io.StringIO()
    hijacked_stderr: io.StringIO = io.StringIO()
    with redirect_stdout(hijacked_stdout), redirect_stderr(hijacked_stderr):
        # Chop off c in pyc
        new_file_name: pathlib.Path = pathlib.Path(
            str(pyc_file.resolve())[:-1])

        # This prohibits the overwriting of existing files.
        # if new_file_name.exists() and new_file_name.stat().st_size:
        #     return "no_action"

        logger.debug(
            f"[*] Decompiling file {pyc_file} of size {pyc_file.stat().st_size}"
        )
        if not alternate_opmap:
            try:
                uncompyle6.decompile_file(str(pyc_file), outstream=sys.stdout)
            except uncompyle6.semantics.parser_error.ParserError as e:
                logger.warning(f"[!] Failed to decompile file {pyc_file}")
                if REMAPPED_OPCODE_ERROR_REGEX.match(str(e.error)):
                    logger.error(
                        f"[!] {pyc_file.name} failed to decompile with an error that indicate its opcode "
                        "mappings may have been remapped to prevent analysis.")
                    return "opcode_error"
                return "error"
            except Exception as e:
                e: Exception
                logger.error(
                    f"[!] Failed to decompile file {pyc_file} with error: {e}")
                stdout_val: str = hijacked_stdout.getvalue()
                if stdout_val:
                    with new_file_name.open("w") as file_ptr:
                        file_ptr.write(stdout_val)
                return "error"
            else:
                with new_file_name.open("w") as file_ptr:
                    file_ptr.write(hijacked_stdout.getvalue())
                logger.info(f"[+] Successfully decompiled {pyc_file}")
                return "success"
        else:
            filename: str
            co: CodeType  # can also be xdis.Code* objects
            version: float
            timestamp: int  # seconds since epoch
            magic_int: int
            is_pypy: bool
            source_size: int
            sip_hash: str
            try:
                (
                    filename,
                    co,
                    version,
                    timestamp,
                    magic_int,
                    is_pypy,
                    source_size,
                    sip_hash,
                ) = xdis.main.disassemble_file(str(pyc_file),
                                               outstream=open(os.devnull, "w"),
                                               alternate_opmap=alternate_opmap)
                output_file: TextIO
                with new_file_name.open(mode="w") as output_file:
                    uncompyle6.main.decompile(
                        version,
                        co,
                        timestamp=timestamp,
                        source_size=source_size,
                        magic_int=magic_int,
                        is_pypy=is_pypy,
                        out=output_file,
                    )
            except Exception as e:
                e: Exception
                logger.info(
                    f"[!] Failed to decompile file {pyc_file} with error: {e}")
                return "error"
            else:
                logger.info(f"[+] Successfully decompiled {pyc_file}")
            return "success"
Beispiel #15
0
def process_pycs(pyc_iterable: Iterable[os.PathLike],
                 alternate_opmap: Dict[str, int] = None) -> None:
    """Multi-processed decompilation orchestration of compiled Python files.

    Currently, pydecipher uses `uncompyle6`_ as its decompiler. It works well
    with `xdis`_ (same author) and allows for the decompilation of Code objects
    using alternate opmaps (with our extension of xdis).

    This function will start up CPU count * 2 pydecipher processes to decompile
    the given Python. Attempts to check for debugger, in which case the
    decompilation will be single-threaded to make debugging easier.

    .. _uncompyle6: https://github.com/rocky/python-uncompyle6/
    .. _xdis: https://github.com/rocky/python-xdis

    Parameters
    ----------
    pyc_iterable : Iterable[os.PathLike]
        An iterable of pathlib.Path objects, referencing compiled Python files
        to decompile.
    alternate_opmap : Dict[str, int], optional
        An opcode map of OPNAME: OPCODE (i.e. 'POP_TOP': 1). This should be a
        complete opmap for the Python version of the files being decompiled.
        Even if only two opcodes were swapped, the opcode map passed in should
        contain all 100+ Python bytecode operations.
    """
    # This checks if the PyCharm debugger is attached.
    if sys.gettrace():
        # Single-threaded for easier debugging.
        logger.debug(
            "[!] Debugger detected, not using multiprocessing for decompilation of pyc files."
        )
        return_status_codes: List[str] = []
        pyc_file: pathlib.Path
        for pyc_file in pyc_iterable:
            return_status_codes.append(
                decompile_pyc((pyc_file, alternate_opmap,
                               pydecipher.get_logging_options())))
    else:
        return_status_codes: List[str] = []
        pool: pebble.ProcessPool
        with pebble.ProcessPool(os.cpu_count() * 2) as pool:
            iterables = [(pyc, alternate_opmap,
                          pydecipher.get_logging_options())
                         for pyc in pyc_iterable]
            future: pebble.ProcessMapFuture = pool.map(decompile_pyc,
                                                       iterables,
                                                       timeout=300)
            iterator: Iterable = future.result()
            index: int = 0
            while True:
                try:
                    result: Any = next(iterator)
                    return_status_codes.append(result)
                except StopIteration:
                    break
                except TimeoutError as e:
                    e: TimeoutError
                    failed_pyc_path: str = str(iterables[index][0])
                    logger.error(
                        f"[!] Timed out ({e.args[1]}s) trying to decompile {failed_pyc_path}."
                    )
                    return_status_codes.append("error")
                except pebble.ProcessExpired as e:
                    e: pebble.ProcessExpired
                    logger.error(
                        f"[!] Failed to decompile {failed_pyc_path} (process expired with status code {e.exitcode}."
                    )
                    return_status_codes.append("error")
                except Exception as e:
                    e: Exception
                    logger.error(
                        f"[!] Failed to decompile {failed_pyc_path} with unknown error: {e}"
                    )
                    return_status_codes.append("error")
                finally:
                    index += 1

    successes: int = return_status_codes.count("success")
    opcode_errors: int = return_status_codes.count("opcode_error")
    errors: int = return_status_codes.count("error") + opcode_errors
    if opcode_errors:
        logger.warning(
            f"[!] {opcode_errors} file(s) failed to decompile with an error "
            "that indicate its opcode mappings may have been remapped. Try using"
            "`remap` on this set of bytecode.")
    if successes and not errors:
        logger.info(f"[+] Successfully decompiled {successes} .pyc files.")
    elif successes and errors:
        logger.warning(
            f"[!] Successfully decompiled {successes} .pyc files. Failed to decompile {errors} files. "
            "See log for more information.")
    elif not successes and errors:
        logger.error(
            f"[!] Failed to decompile all {errors} .pyc files. See log for more information."
        )
    else:
        logger.warning(
            "[!] No pyc files were decompiled. See log for more information.")
Beispiel #16
0
def run(args_in: List[str] = None) -> None:
    """Orchestrate the flow of the pydecipher command.

    This function is the entry-point of the pydecipher command.  It calls out to
    other routines and generally attempts to follow this high-level flow:

        1.  Parse program arguments.
        2.  Check that input files are readable and output locations are
            writeable, including that the the program is running in a
            sufficiently new Python environment (3.6+).
        3.  Recursively call unpack on the artifact until all items of
            interest are extracted.
        4.  Decompile any Python bytecode found through the unpacking
            process.

    Parameters
    ----------
    args_in : List[str]
        If this function is being called from other Python code, pydecipher
        flags and other command-line options can be passed in as a list.
    """
    if sys.version_info < (3, 8):
        logger.critical(
            "[!] This tool can only be run in Python 3.8 or later.")
        sys.exit(1)
    utils.check_for_our_xdis()

    args: argparse.Namespace = _parse_args(args_in)

    logging_options: Dict[str, Union[bool, os.PathLike]] = {
        "verbose": args.verbose,
        "quiet": args.quiet
    }
    pydecipher.set_logging_options(**logging_options)

    artifact_path: pathlib.Path = pathlib.Path(args.artifact_path).resolve()
    utils.check_read_access(artifact_path)

    relocate_pys: bool = False
    pyc_files: Iterable[os.PathLike] = []
    if args.output:
        output_dir: pathlib.Path = pathlib.Path(args.output.strip()).resolve()
        if artifact_path.is_dir():
            relocate_pys = True
    elif artifact_path.is_dir():
        output_dir = artifact_path
        relocate_pys = True
    else:
        output_dir: pathlib.Path = (
            pathlib.Path.cwd() /
            f"pydecipher_output_{utils.slugify(artifact_path.name.split('.')[0])}"
        )

    if artifact_path.is_file() and os.path.splitext(
            artifact_path)[1].lower() in (".pyc", ".pyo"):
        relocate_pys = True
        pyc_files = [artifact_path]

    # The following block sets up logging to a stringIO stream, which will
    # eventually be placed in a file. We don't immediately log to a file
    # because we don't want to leave a log file on disk unless the program
    # succeeds, at least past the 'unpack' call.
    log_stream: io.StringIO = io.StringIO()
    log_stream__handler: logging.StreamHandler = logging.StreamHandler(
        log_stream)
    log_stream__handler.setFormatter(pydecipher.log_format)
    log_stream__handler.setLevel(logging.DEBUG)
    logger.addHandler(log_stream__handler)

    version_hint: str = args.version_hint

    alternate_opmap: Dict[str, int] = None
    if args.remapping_file:
        remap_file: pathlib.Path = pathlib.Path(args.remapping_file).resolve()
        logger.info(f"[*] Using remap file {remap_file}")
        utils.check_read_access(remap_file)
        alternate_opmap: Dict[str, int] = bytecode.create_opmap_from_file(
            remap_file)

        with remap_file.open("r") as remapping_file:
            file_json: str = json.loads(remapping_file.read())
            remap_file_version: str = file_json["python_version"]
            version_hint = remap_file_version

    utils.check_write_access(output_dir)
    # Dump all pyc files
    if artifact_path.is_dir():
        kwargs: Dict[str, str] = {"version_hint": version_hint}
        dirpath: str
        dirnames: List[str]
        filenames: List[str]
        for (dirpath, dirnames, filenames) in os.walk(artifact_path):
            filename: str
            for filename in filenames:
                if os.path.splitext(filename)[1].lower() in (".pyc", ".pyo"):
                    full_path: pathlib.Path = pathlib.Path(dirpath).joinpath(
                        filename)
                    try:
                        pyc_class_obj: artifact_types.pyc.Pyc = artifact_types.pyc.Pyc(
                            full_path, output_dir=full_path.parent, **kwargs)
                    except TypeError:
                        pass
                    else:
                        pyc_class_obj.unpack()
        pyc_files: List[pathlib.Path] = list(
            artifact_path.rglob("*.[pP][yY][cCoO]"))
    else:
        unpack(artifact_path,
               output_dir=str(output_dir),
               version_hint=version_hint)

    # If we produced files, we want to also include the logged output of
    # pydecipher. If we didn't produce anything, we can assume the program
    # failed/had uninteresting output that doesn't need to be kept. The one
    # exception to this is when we pass in a single pyc file, or a directory of
    # pyc files, to be decompiled.
    if (output_dir.exists() and os.listdir(output_dir)) or pyc_files:
        output_dir.mkdir(parents=True, exist_ok=True)
        log_name: str = datetime.datetime.now().strftime(
            "log_%H_%M_%S_%b_%d_%Y.txt")
        with output_dir.joinpath(log_name).open("w") as log_file_ptr:
            log_file_ptr.write(log_stream.getvalue())
        logging_options: Dict[str, pathlib.Path] = {
            "log_path": output_dir.joinpath(log_name)
        }
        pydecipher.set_logging_options(**logging_options)
    else:
        logger.warning("[!] This artifact produced no additional output.")
        return

    # Determine which pyc files to decompile
    if not pyc_files:
        pyc_files: Generator[os.PathLike, None,
                             None] = output_dir.rglob("*.[pP][yY][cCoO]")
        if not args.decompile_all:
            max_depth: int = 10
            # Search output directory with increasing recursive depth to find
            # first level of directories with .pyc files
            depth: int
            for depth in range(max_depth):
                tmp: List[os.PathLike] = list(
                    pydecipher.utils.rglob_limit_depth(output_dir,
                                                       "*.[pP][yY][cCoO]",
                                                       depth))
                if tmp:
                    pyc_files = tmp
                    break

    # Dispatch a pool of processes to decompile the specified group of pyc files
    bytecode.process_pycs(pyc_files, alternate_opmap=alternate_opmap)

    # If any decompiled python needs to be moved to the output directory, do
    # that now. This will only happen if the user passed in a pyc artifact
    # (single file or dir). We decompile the .pyc file into a .py file alongside
    # the .pyc file on disk, then move it to the designated output directory.
    if artifact_path.is_file():
        relative_root: pathlib.Path = artifact_path.parent
    else:
        relative_root: pathlib.Path = artifact_path
    if relocate_pys:
        pyc_file: pathlib.Path
        for pyc_file in pyc_files:
            py_file: pathlib.Path = pathlib.Path(str(pyc_file)[:-1])
            if not py_file.exists():
                continue
            rel_path: pathlib.Path = py_file.relative_to(relative_root)
            new_filepath: pathlib.Path = output_dir.joinpath(rel_path)
            py_file.rename(new_filepath)

    # Perform any cleanup functions on output of decompilation
    pydecipher.artifact_types.py2exe.PYTHONSCRIPT.cleanup(output_dir)
Beispiel #17
0
    def extract_files(self):
        magic_nums: set = set()
        decompression_errors = 0
        successfully_extracted = 0
        entry: CTOCEntry
        for entry in self.toc:
            data = self.archive_contents[entry.
                                         entry_offset:entry.entry_offset +
                                         entry.compressed_data_size]

            if entry.compression_flag:
                try:
                    data = zlib.decompress(data)
                except zlib.error as e:
                    decompression_errors += 1
                    logger.debug(
                        f"[!] PyInstaller CArchive decompression failed with error: {e}"
                    )
                    continue
                else:
                    if len(data) != entry.uncompressed_data_size:
                        logger.warning(
                            f"[!] {entry.name} entry in CArchive listed its uncompressed data size as"
                            f" {entry.uncompressed_data_size}, however in actuality, uncompressed to be {len(data)}"
                            " bytes. This may be a sign that the CArchive was manually altered."
                        )

            if "\\" in entry.name:
                tmp: PureWindowsPath = pathlib.PureWindowsPath(entry.name)
            else:
                tmp: Path = Path(entry.name)
            file_path = pathlib.Path(self.output_dir).joinpath(tmp)
            if len(file_path.parents) > 1:  # every path has '.' as a parent
                file_path.parent.mkdir(parents=True, exist_ok=True)

            if entry.type_code == self.ArchiveItem.PYSOURCE:
                if ord(data[:1]) == ord(xdis.marsh.TYPE_CODE) or ord(
                        data[:1]) == (ord(xdis.marsh.TYPE_CODE)
                                      | xdis.unmarshal.FLAG_REF):
                    file_path = file_path.parent / (file_path.name + ".pyc")
                    if len(magic_nums) > 1:
                        magic_num = next(iter(magic_nums))
                        logger.warning(
                            "[!] More than one magic number found within this CArchive. Using magic number"
                            f" {magic_num}, but also found numbers: {magic_nums}"
                        )
                    elif len(magic_nums) == 0:
                        logger.warning(
                            f"[!] No magic numbers have been found yet, queueing this file for later."
                        )
                        # TODO: add this file to a do-later list, when you know the magic num  #TODO does this actually happen? dig deeper...
                        pass
                    data = pydecipher.bytecode.create_pyc_header(
                        next(iter(magic_nums))) + data
                else:
                    file_path = file_path.parent / (file_path.name + ".py")
                if "pyi" not in entry.name:
                    logger.info(
                        f"[!] Potential entrypoint found at script {entry.name}.py"
                    )
            elif entry.type_code == self.ArchiveItem.PYMODULE:
                magic_bytes = data[:4]  # Python magic value
                magic_nums.add(magic2int(magic_bytes))
                file_path = file_path.parent / (file_path.name + ".pyc")

            if entry.type_code != self.ArchiveItem.RUNTIME_OPTION:
                self.output_dir.mkdir(parents=True, exist_ok=True)
                with file_path.open(mode="wb") as f:
                    f.write(data)
                    successfully_extracted += 1

            if entry.type_code in (self.ArchiveItem.PYZ,
                                   self.ArchiveItem.ZIPFILE):
                output_dir_name = (str(
                    file_path.parent.joinpath(
                        utils.slugify(file_path.name.split(".")[0]))) +
                                   "_output")
                pydecipher.unpack(file_path, output_dir=output_dir_name)

        if decompression_errors:
            logger.debug(
                f"[!] Failed to write {decompression_errors} files due to decompression errors."
            )
        if successfully_extracted:
            logger.info(
                f"[+] Successfully extracted {successfully_extracted} files from this CArchive."
            )