예제 #1
0
    def __init__(
        self,
        pe_path_or_bytes: Union[str, os.PathLike, BinaryIO],
        output_dir: os.PathLike = None,
        **kwargs,
    ) -> None:
        if isinstance(pe_path_or_bytes, str):
            pe_path_or_bytes: pathlib.Path = pathlib.Path(pe_path_or_bytes)
        if isinstance(pe_path_or_bytes, pathlib.Path):
            utils.check_read_access(pe_path_or_bytes)
            self.file_path = pe_path_or_bytes
            with self.file_path.open("rb") as input_file:
                self.file_contents = input_file.read()
        if isinstance(pe_path_or_bytes, io.BufferedIOBase):
            self.file_contents = pe_path_or_bytes.read()

        try:
            self.pe = pefile.PE(data=self.file_contents)
        except pefile.PEFormatError as e:
            raise TypeError(e)

        if output_dir:
            self.output_dir = output_dir
        else:
            if hasattr(self, "file_path"):
                self.output_dir = self.file_path.parent / utils.slugify(
                    self.file_path.name + "_output")
            else:
                self.output_dir = pathlib.Path.cwd()
        utils.check_write_access(self.output_dir)
        self.kwargs = kwargs
예제 #2
0
    def unpack(self) -> None:
        """Dump any interesting aspects of this PE for further investigation.

        This will log the PEs version info resource for manual inspection,
        dump any Authenticode certificates, and look for frozen Python artifacts
        within the PE's resources and overlay.
        """
        self.load_version_info()
        self.dump_certificates()

        unpack_me: List[pathlib.Path] = []
        overlay_path: pathlib.Path = self.dump_overlay()
        if overlay_path:
            unpack_me.append(overlay_path)

        version_strings: List[str] = utils.parse_for_version_strings(
            self.file_contents)
        if version_strings:
            logger.debug(
                "[*] Found the following strings (and their surrounding bytes, for context) in this PE, which may "
                "indicate the version of Python used to freeze the executable: \n"
                f"{pprint.pformat(version_strings, width=120)}")

        pythonscript_idx: int = None
        if hasattr(self.pe, "DIRECTORY_ENTRY_RESOURCE"):
            entry: pefile.ResourceDirEntryData
            for entry in self.pe.DIRECTORY_ENTRY_RESOURCE.entries:
                if entry.name is None:
                    continue
                resource_name: str = entry.name.string.decode()
                if any([
                        True for pattern in self.INTERESTING_RESOURCES
                        if re.match(pattern, resource_name, re.I)
                ]):
                    resource_path: pathlib.Path = self.dump_resource(
                        resource_name)
                    if resource_name == "PYTHONSCRIPT":
                        pythonscript_idx = len(unpack_me)
                    unpack_me.append(resource_path)

        if pythonscript_idx:
            # We want to unpack Py2Exe PYTHONSCRIPT last to give it highest chance of successfully determining version.
            unpack_me.append(unpack_me.pop(pythonscript_idx))

        artifact_path: pathlib.Path
        for artifact_path in unpack_me:
            output_dir_name: str = utils.slugify(
                str(artifact_path.name) + "_output")
            pydecipher.unpack(
                artifact_path,
                output_dir=self.output_dir.joinpath(output_dir_name),
                **self.kwargs,
            )
예제 #3
0
    def __init__(
        self,
        zip_path_or_bytes: Union[str, pathlib.Path, BinaryIO],
        output_dir: pathlib.Path = None,
        **kwargs,
    ) -> None:
        """Construct a zip file artifact.

        Parameters
        ----------
        zip_path_or_bytes : Union[str, os.PathLike, BinaryIO]
            The path to the zip file, or a bytes-like object of a zip file in memory.
        output_dir : os.PathLike, optional
            Where any output extracted from this artifact should get dumped.
        **kwargs
            Any keyword arguments needed for the parsing of this artifact, or for
            parsing nested artifacts.

        Raises
        ------
        TypeError
            Will raise a TypeError if the zip_path_or_bytes item is not a zip archive.
        """
        if isinstance(zip_path_or_bytes, str):
            zip_path_or_bytes: Path = Path(zip_path_or_bytes)
        if isinstance(zip_path_or_bytes, Path):
            utils.check_read_access(zip_path_or_bytes)
            self.archive_path = zip_path_or_bytes
            input_file: BinaryIO
            with self.archive_path.open("rb") as input_file:
                self.archive_contents = input_file.read()
        if isinstance(zip_path_or_bytes, io.BufferedIOBase):
            self.archive_contents = zip_path_or_bytes.read()

        if output_dir:
            self.output_dir = output_dir
        else:
            if hasattr(self, "archive_path"):
                self.output_dir = self.archive_path.parent / utils.slugify(
                    self.archive_path.name + "_output")
            else:
                self.output_dir = Path.cwd()

        self.kwargs = kwargs
        utils.check_write_access(self.output_dir.parent)
        if not self.validate_zip_archive():
            raise TypeError("[!] This is not a zip archive.")
예제 #4
0
    def __init__(
        self,
        pythonscript_path_or_bytes: Union[str, os.PathLike, BinaryIO],
        output_dir: os.PathLike = None,
        **kwargs,
    ):
        if isinstance(pythonscript_path_or_bytes, str):
            pythonscript_path_or_bytes: Path = Path(pythonscript_path_or_bytes)
            # TODO try a path resolve here and fail if not working
        if isinstance(pythonscript_path_or_bytes, Path):
            if not pythonscript_path_or_bytes.exists():
                msg = f"[!] Could not find the provided path: {str(pythonscript_path_or_bytes)}."
                raise FileNotFoundError(msg)
            if not os.access(pythonscript_path_or_bytes, os.R_OK):
                msg = f"[!] Lacking read permissions on: {str(pythonscript_path_or_bytes)}."
                raise PermissionError(msg)
            self.archive_path = pythonscript_path_or_bytes
            with self.archive_path.open("rb") as input_file:
                self.resource_contents = input_file.read()
        if isinstance(pythonscript_path_or_bytes, io.BufferedIOBase):
            self.resource_contents = pythonscript_path_or_bytes.read()

        if output_dir:
            self.output_dir = output_dir
        else:
            if hasattr(self, "file_path"):
                self.output_dir = self.file_path.parent / utils.slugify(
                    self.file_path.name + "_output")
            else:
                self.output_dir = Path.cwd()
        if not os.access(self.output_dir.parent, os.W_OK):
            msg = f"[!] Cannot write output directory to dir: {str(self.output_dir)}."
            raise PermissionError(msg)

        if not self.validate_pythonscript_resource():
            raise TypeError("[!] This is not a Py2Exe PYTHONSCRIPT resource.")

        self.version_hint = kwargs.get("version_hint", None)
        if self.version_hint:
            try:
                self.magic_num = bytecode.version_str_to_magic_num_int(
                    self.version_hint)
            except:
                pass  # TODO improve this error handling
        self.kwargs = kwargs
예제 #5
0
    def __init__(
        self,
        zlibarchive_path_or_bytes: Union[str, os.PathLike, BinaryIO],
        output_dir: os.PathLike = None,
        **kwargs,
    ):
        if isinstance(zlibarchive_path_or_bytes, str):
            zlibarchive_path_or_bytes: Path = Path(zlibarchive_path_or_bytes)
        if isinstance(zlibarchive_path_or_bytes, Path):
            if not zlibarchive_path_or_bytes.exists():
                msg = f"[!] Could not find the provided path: {str(zlibarchive_path_or_bytes)}."
                raise FileNotFoundError(msg)
            if not os.access(zlibarchive_path_or_bytes, os.R_OK):
                msg = f"[!] Lacking read permissions on: {str(zlibarchive_path_or_bytes)}."
                raise PermissionError(msg)
            self.archive_path = zlibarchive_path_or_bytes
            with self.archive_path.open("rb") as input_file:
                self.archive_contents = input_file.read()
        if isinstance(zlibarchive_path_or_bytes, io.BufferedIOBase):
            self.archive_contents = zlibarchive_path_or_bytes.read()

        if output_dir:
            self.output_dir = output_dir
        else:
            if hasattr(self, "file_path"):
                self.output_dir = self.file_path.parent / utils.slugify(
                    self.file_path.name + "_output")
            else:
                self.output_dir = Path.cwd()
        if not os.access(self.output_dir.parent, os.W_OK):
            msg = f"[!] Cannot write output directory to dir: {str(self.output_dir)}."
            raise PermissionError(msg)
        # if not self.output_dir.exists():
        #     self.output_dir.mkdir(parents=True)

        if not self.validate_zlibarchive():
            raise TypeError(
                "[!] This is not a PyInstaller ZlibArchive (or is an archive of an unsupported PyInstaller version"
            )
예제 #6
0
    def __init__(
        self,
        file_path_or_bytes: Union[str, pathlib.Path, BinaryIO],
        output_dir: pathlib.Path = None,
        **kwargs,
    ) -> None:
        if isinstance(file_path_or_bytes, str):
            file_path_or_bytes: pathlib.Path = pathlib.Path(file_path_or_bytes)
        if isinstance(file_path_or_bytes, pathlib.Path):
            utils.check_read_access(file_path_or_bytes)
            self.file_path = file_path_or_bytes
            input_file: BinaryIO
            with self.file_path.open("rb") as input_file:
                self.file_contents = input_file.read()
        if isinstance(file_path_or_bytes, io.BufferedIOBase):
            self.file_contents = file_path_or_bytes.read()

        if output_dir:
            self.output_dir = output_dir
        else:
            if hasattr(self, "file_path"):
                self.output_dir = self.file_path.parent / utils.slugify(
                    self.file_path.name + "_output")
            else:
                self.output_dir = pathlib.Path.cwd()
        utils.check_write_access(self.output_dir)

        if not self.validate_pyc_file():
            raise TypeError("[!] This is not a compiled Python file.")
        self.version_hint = kwargs.get("version_hint", None)
        if self.version_hint:
            try:
                self.magic_num = bytecode.version_str_to_magic_num_int(
                    self.version_hint)
            except Exception:
                raise RuntimeError(
                    f"Failed to produce magic number from version hint {self.version_hint}. Please try a different"
                    " version.")
예제 #7
0
    def dump_certificates(self, output_dir: pathlib.Path = None) -> None:
        """Dump Authenticode certificates from the PE's certificate attribute table.

        Parameters
        ----------
        output_dir: pathlib.Path, optional
            An optional alternative output directory to dump the certificates, besides
            the class's output directory.
        """
        certificate_table_entry: pefile.Structure = None
        if hasattr(self.pe, "OPTIONAL_HEADER") and hasattr(
                self.pe.OPTIONAL_HEADER, "DATA_DIRECTORY"):
            idx: int
            for idx in range(len(self.pe.OPTIONAL_HEADER.DATA_DIRECTORY)):
                directory: pefile.Structure = self.pe.OPTIONAL_HEADER.DATA_DIRECTORY[
                    idx]
                if directory.name == "IMAGE_DIRECTORY_ENTRY_SECURITY" and directory.Size:
                    logger.debug("[*] This PE has a certificate table.")
                    certificate_table_entry = directory
                    break

        if certificate_table_entry is None:
            return

        if output_dir is None:
            certificate_extraction_dir: pathlib.Path = self.output_dir.joinpath(
                "Authenticode_Certificates")
        else:
            certificate_extraction_dir: pathlib.Path = output_dir
        certificate_extraction_dir.mkdir(parents=True, exist_ok=True)

        certificate_table_data: bytes = self.pe.__data__[
            certificate_table_entry.VirtualAddress:]
        while certificate_table_data:
            # https://docs.microsoft.com/en-us/windows/desktop/Debug/pe-format#the-attribute-certificate-table-image-only
            cert_length: int = int.from_bytes(certificate_table_data[0:4],
                                              byteorder="little")
            cert_version: bytes = certificate_table_data[4:6]  # noqa
            cert_type = certificate_table_data[6:8]  # noqa
            cert: bytes = certificate_table_data[8:8 + cert_length]
            certificate_table_data: bytes = certificate_table_data[
                8 + cert_length:]

            # Extract all the X509 certificates from the PKCS#7 structure
            authenticode_structure: signify.authenticode.AuthenticodeSignedData = AuthenticodeSignedData.from_envelope(
                cert)
            cert_obj: signify.certificates.Certificate
            for cert_obj in authenticode_structure.certificates:
                cert_name_obj: asn1crypto.x509.Name = cert_obj.to_asn1crypto.subject
                preferred_name_fields: List[str] = [
                    "organizational_unit_name",
                    "organization_name",
                    "common_name",
                ]
                name_selected: bool = False
                preferred_field_name: str
                for preferred_field_name in preferred_name_fields:
                    name_tuple: Tuple[str, str]
                    for name_tuple in cert_name_obj.native.items():
                        field: str = name_tuple[0]
                        value: str = name_tuple[1]
                        if field == preferred_field_name:
                            name_selected = True
                            cert_name: str = value
                            break
                    if name_selected:
                        break
                if not name_selected:
                    cert_name: str = f"{len(os.listdir(certificate_extraction_dir))}"
                cert_name: str = utils.slugify(cert_name,
                                               allow_unicode=True) + ".pem"

                logger.debug(
                    f"[+] Extracting Authenticode certificate {cert_name}.")
                f: BinaryIO
                with certificate_extraction_dir.joinpath(cert_name).open(
                        "wb") as f:
                    der_bytes: bytes = cert_obj.to_asn1crypto.dump()
                    pem_bytes: bytes = pem.armor("CERTIFICATE", der_bytes)
                    f.write(pem_bytes)
        self.certificates_dumped = True
예제 #8
0
    def extract_files(self):
        magic_nums: set = set()
        decompression_errors = 0
        successfully_extracted = 0
        entry: CTOCEntry
        for entry in self.toc:
            data = self.archive_contents[entry.
                                         entry_offset:entry.entry_offset +
                                         entry.compressed_data_size]

            if entry.compression_flag:
                try:
                    data = zlib.decompress(data)
                except zlib.error as e:
                    decompression_errors += 1
                    logger.debug(
                        f"[!] PyInstaller CArchive decompression failed with error: {e}"
                    )
                    continue
                else:
                    if len(data) != entry.uncompressed_data_size:
                        logger.warning(
                            f"[!] {entry.name} entry in CArchive listed its uncompressed data size as"
                            f" {entry.uncompressed_data_size}, however in actuality, uncompressed to be {len(data)}"
                            " bytes. This may be a sign that the CArchive was manually altered."
                        )

            if "\\" in entry.name:
                tmp: PureWindowsPath = pathlib.PureWindowsPath(entry.name)
            else:
                tmp: Path = Path(entry.name)
            file_path = pathlib.Path(self.output_dir).joinpath(tmp)
            if len(file_path.parents) > 1:  # every path has '.' as a parent
                file_path.parent.mkdir(parents=True, exist_ok=True)

            if entry.type_code == self.ArchiveItem.PYSOURCE:
                if ord(data[:1]) == ord(xdis.marsh.TYPE_CODE) or ord(
                        data[:1]) == (ord(xdis.marsh.TYPE_CODE)
                                      | xdis.unmarshal.FLAG_REF):
                    file_path = file_path.parent / (file_path.name + ".pyc")
                    if len(magic_nums) > 1:
                        magic_num = next(iter(magic_nums))
                        logger.warning(
                            "[!] More than one magic number found within this CArchive. Using magic number"
                            f" {magic_num}, but also found numbers: {magic_nums}"
                        )
                    elif len(magic_nums) == 0:
                        logger.warning(
                            f"[!] No magic numbers have been found yet, queueing this file for later."
                        )
                        # TODO: add this file to a do-later list, when you know the magic num  #TODO does this actually happen? dig deeper...
                        pass
                    data = pydecipher.bytecode.create_pyc_header(
                        next(iter(magic_nums))) + data
                else:
                    file_path = file_path.parent / (file_path.name + ".py")
                if "pyi" not in entry.name:
                    logger.info(
                        f"[!] Potential entrypoint found at script {entry.name}.py"
                    )
            elif entry.type_code == self.ArchiveItem.PYMODULE:
                magic_bytes = data[:4]  # Python magic value
                magic_nums.add(magic2int(magic_bytes))
                file_path = file_path.parent / (file_path.name + ".pyc")

            if entry.type_code != self.ArchiveItem.RUNTIME_OPTION:
                self.output_dir.mkdir(parents=True, exist_ok=True)
                with file_path.open(mode="wb") as f:
                    f.write(data)
                    successfully_extracted += 1

            if entry.type_code in (self.ArchiveItem.PYZ,
                                   self.ArchiveItem.ZIPFILE):
                output_dir_name = (str(
                    file_path.parent.joinpath(
                        utils.slugify(file_path.name.split(".")[0]))) +
                                   "_output")
                pydecipher.unpack(file_path, output_dir=output_dir_name)

        if decompression_errors:
            logger.debug(
                f"[!] Failed to write {decompression_errors} files due to decompression errors."
            )
        if successfully_extracted:
            logger.info(
                f"[+] Successfully extracted {successfully_extracted} files from this CArchive."
            )