def __init__( self, pe_path_or_bytes: Union[str, os.PathLike, BinaryIO], output_dir: os.PathLike = None, **kwargs, ) -> None: if isinstance(pe_path_or_bytes, str): pe_path_or_bytes: pathlib.Path = pathlib.Path(pe_path_or_bytes) if isinstance(pe_path_or_bytes, pathlib.Path): utils.check_read_access(pe_path_or_bytes) self.file_path = pe_path_or_bytes with self.file_path.open("rb") as input_file: self.file_contents = input_file.read() if isinstance(pe_path_or_bytes, io.BufferedIOBase): self.file_contents = pe_path_or_bytes.read() try: self.pe = pefile.PE(data=self.file_contents) except pefile.PEFormatError as e: raise TypeError(e) if output_dir: self.output_dir = output_dir else: if hasattr(self, "file_path"): self.output_dir = self.file_path.parent / utils.slugify( self.file_path.name + "_output") else: self.output_dir = pathlib.Path.cwd() utils.check_write_access(self.output_dir) self.kwargs = kwargs
def unpack(self) -> None: """Dump any interesting aspects of this PE for further investigation. This will log the PEs version info resource for manual inspection, dump any Authenticode certificates, and look for frozen Python artifacts within the PE's resources and overlay. """ self.load_version_info() self.dump_certificates() unpack_me: List[pathlib.Path] = [] overlay_path: pathlib.Path = self.dump_overlay() if overlay_path: unpack_me.append(overlay_path) version_strings: List[str] = utils.parse_for_version_strings( self.file_contents) if version_strings: logger.debug( "[*] Found the following strings (and their surrounding bytes, for context) in this PE, which may " "indicate the version of Python used to freeze the executable: \n" f"{pprint.pformat(version_strings, width=120)}") pythonscript_idx: int = None if hasattr(self.pe, "DIRECTORY_ENTRY_RESOURCE"): entry: pefile.ResourceDirEntryData for entry in self.pe.DIRECTORY_ENTRY_RESOURCE.entries: if entry.name is None: continue resource_name: str = entry.name.string.decode() if any([ True for pattern in self.INTERESTING_RESOURCES if re.match(pattern, resource_name, re.I) ]): resource_path: pathlib.Path = self.dump_resource( resource_name) if resource_name == "PYTHONSCRIPT": pythonscript_idx = len(unpack_me) unpack_me.append(resource_path) if pythonscript_idx: # We want to unpack Py2Exe PYTHONSCRIPT last to give it highest chance of successfully determining version. unpack_me.append(unpack_me.pop(pythonscript_idx)) artifact_path: pathlib.Path for artifact_path in unpack_me: output_dir_name: str = utils.slugify( str(artifact_path.name) + "_output") pydecipher.unpack( artifact_path, output_dir=self.output_dir.joinpath(output_dir_name), **self.kwargs, )
def __init__( self, zip_path_or_bytes: Union[str, pathlib.Path, BinaryIO], output_dir: pathlib.Path = None, **kwargs, ) -> None: """Construct a zip file artifact. Parameters ---------- zip_path_or_bytes : Union[str, os.PathLike, BinaryIO] The path to the zip file, or a bytes-like object of a zip file in memory. output_dir : os.PathLike, optional Where any output extracted from this artifact should get dumped. **kwargs Any keyword arguments needed for the parsing of this artifact, or for parsing nested artifacts. Raises ------ TypeError Will raise a TypeError if the zip_path_or_bytes item is not a zip archive. """ if isinstance(zip_path_or_bytes, str): zip_path_or_bytes: Path = Path(zip_path_or_bytes) if isinstance(zip_path_or_bytes, Path): utils.check_read_access(zip_path_or_bytes) self.archive_path = zip_path_or_bytes input_file: BinaryIO with self.archive_path.open("rb") as input_file: self.archive_contents = input_file.read() if isinstance(zip_path_or_bytes, io.BufferedIOBase): self.archive_contents = zip_path_or_bytes.read() if output_dir: self.output_dir = output_dir else: if hasattr(self, "archive_path"): self.output_dir = self.archive_path.parent / utils.slugify( self.archive_path.name + "_output") else: self.output_dir = Path.cwd() self.kwargs = kwargs utils.check_write_access(self.output_dir.parent) if not self.validate_zip_archive(): raise TypeError("[!] This is not a zip archive.")
def __init__( self, pythonscript_path_or_bytes: Union[str, os.PathLike, BinaryIO], output_dir: os.PathLike = None, **kwargs, ): if isinstance(pythonscript_path_or_bytes, str): pythonscript_path_or_bytes: Path = Path(pythonscript_path_or_bytes) # TODO try a path resolve here and fail if not working if isinstance(pythonscript_path_or_bytes, Path): if not pythonscript_path_or_bytes.exists(): msg = f"[!] Could not find the provided path: {str(pythonscript_path_or_bytes)}." raise FileNotFoundError(msg) if not os.access(pythonscript_path_or_bytes, os.R_OK): msg = f"[!] Lacking read permissions on: {str(pythonscript_path_or_bytes)}." raise PermissionError(msg) self.archive_path = pythonscript_path_or_bytes with self.archive_path.open("rb") as input_file: self.resource_contents = input_file.read() if isinstance(pythonscript_path_or_bytes, io.BufferedIOBase): self.resource_contents = pythonscript_path_or_bytes.read() if output_dir: self.output_dir = output_dir else: if hasattr(self, "file_path"): self.output_dir = self.file_path.parent / utils.slugify( self.file_path.name + "_output") else: self.output_dir = Path.cwd() if not os.access(self.output_dir.parent, os.W_OK): msg = f"[!] Cannot write output directory to dir: {str(self.output_dir)}." raise PermissionError(msg) if not self.validate_pythonscript_resource(): raise TypeError("[!] This is not a Py2Exe PYTHONSCRIPT resource.") self.version_hint = kwargs.get("version_hint", None) if self.version_hint: try: self.magic_num = bytecode.version_str_to_magic_num_int( self.version_hint) except: pass # TODO improve this error handling self.kwargs = kwargs
def __init__( self, zlibarchive_path_or_bytes: Union[str, os.PathLike, BinaryIO], output_dir: os.PathLike = None, **kwargs, ): if isinstance(zlibarchive_path_or_bytes, str): zlibarchive_path_or_bytes: Path = Path(zlibarchive_path_or_bytes) if isinstance(zlibarchive_path_or_bytes, Path): if not zlibarchive_path_or_bytes.exists(): msg = f"[!] Could not find the provided path: {str(zlibarchive_path_or_bytes)}." raise FileNotFoundError(msg) if not os.access(zlibarchive_path_or_bytes, os.R_OK): msg = f"[!] Lacking read permissions on: {str(zlibarchive_path_or_bytes)}." raise PermissionError(msg) self.archive_path = zlibarchive_path_or_bytes with self.archive_path.open("rb") as input_file: self.archive_contents = input_file.read() if isinstance(zlibarchive_path_or_bytes, io.BufferedIOBase): self.archive_contents = zlibarchive_path_or_bytes.read() if output_dir: self.output_dir = output_dir else: if hasattr(self, "file_path"): self.output_dir = self.file_path.parent / utils.slugify( self.file_path.name + "_output") else: self.output_dir = Path.cwd() if not os.access(self.output_dir.parent, os.W_OK): msg = f"[!] Cannot write output directory to dir: {str(self.output_dir)}." raise PermissionError(msg) # if not self.output_dir.exists(): # self.output_dir.mkdir(parents=True) if not self.validate_zlibarchive(): raise TypeError( "[!] This is not a PyInstaller ZlibArchive (or is an archive of an unsupported PyInstaller version" )
def __init__( self, file_path_or_bytes: Union[str, pathlib.Path, BinaryIO], output_dir: pathlib.Path = None, **kwargs, ) -> None: if isinstance(file_path_or_bytes, str): file_path_or_bytes: pathlib.Path = pathlib.Path(file_path_or_bytes) if isinstance(file_path_or_bytes, pathlib.Path): utils.check_read_access(file_path_or_bytes) self.file_path = file_path_or_bytes input_file: BinaryIO with self.file_path.open("rb") as input_file: self.file_contents = input_file.read() if isinstance(file_path_or_bytes, io.BufferedIOBase): self.file_contents = file_path_or_bytes.read() if output_dir: self.output_dir = output_dir else: if hasattr(self, "file_path"): self.output_dir = self.file_path.parent / utils.slugify( self.file_path.name + "_output") else: self.output_dir = pathlib.Path.cwd() utils.check_write_access(self.output_dir) if not self.validate_pyc_file(): raise TypeError("[!] This is not a compiled Python file.") self.version_hint = kwargs.get("version_hint", None) if self.version_hint: try: self.magic_num = bytecode.version_str_to_magic_num_int( self.version_hint) except Exception: raise RuntimeError( f"Failed to produce magic number from version hint {self.version_hint}. Please try a different" " version.")
def dump_certificates(self, output_dir: pathlib.Path = None) -> None: """Dump Authenticode certificates from the PE's certificate attribute table. Parameters ---------- output_dir: pathlib.Path, optional An optional alternative output directory to dump the certificates, besides the class's output directory. """ certificate_table_entry: pefile.Structure = None if hasattr(self.pe, "OPTIONAL_HEADER") and hasattr( self.pe.OPTIONAL_HEADER, "DATA_DIRECTORY"): idx: int for idx in range(len(self.pe.OPTIONAL_HEADER.DATA_DIRECTORY)): directory: pefile.Structure = self.pe.OPTIONAL_HEADER.DATA_DIRECTORY[ idx] if directory.name == "IMAGE_DIRECTORY_ENTRY_SECURITY" and directory.Size: logger.debug("[*] This PE has a certificate table.") certificate_table_entry = directory break if certificate_table_entry is None: return if output_dir is None: certificate_extraction_dir: pathlib.Path = self.output_dir.joinpath( "Authenticode_Certificates") else: certificate_extraction_dir: pathlib.Path = output_dir certificate_extraction_dir.mkdir(parents=True, exist_ok=True) certificate_table_data: bytes = self.pe.__data__[ certificate_table_entry.VirtualAddress:] while certificate_table_data: # https://docs.microsoft.com/en-us/windows/desktop/Debug/pe-format#the-attribute-certificate-table-image-only cert_length: int = int.from_bytes(certificate_table_data[0:4], byteorder="little") cert_version: bytes = certificate_table_data[4:6] # noqa cert_type = certificate_table_data[6:8] # noqa cert: bytes = certificate_table_data[8:8 + cert_length] certificate_table_data: bytes = certificate_table_data[ 8 + cert_length:] # Extract all the X509 certificates from the PKCS#7 structure authenticode_structure: signify.authenticode.AuthenticodeSignedData = AuthenticodeSignedData.from_envelope( cert) cert_obj: signify.certificates.Certificate for cert_obj in authenticode_structure.certificates: cert_name_obj: asn1crypto.x509.Name = cert_obj.to_asn1crypto.subject preferred_name_fields: List[str] = [ "organizational_unit_name", "organization_name", "common_name", ] name_selected: bool = False preferred_field_name: str for preferred_field_name in preferred_name_fields: name_tuple: Tuple[str, str] for name_tuple in cert_name_obj.native.items(): field: str = name_tuple[0] value: str = name_tuple[1] if field == preferred_field_name: name_selected = True cert_name: str = value break if name_selected: break if not name_selected: cert_name: str = f"{len(os.listdir(certificate_extraction_dir))}" cert_name: str = utils.slugify(cert_name, allow_unicode=True) + ".pem" logger.debug( f"[+] Extracting Authenticode certificate {cert_name}.") f: BinaryIO with certificate_extraction_dir.joinpath(cert_name).open( "wb") as f: der_bytes: bytes = cert_obj.to_asn1crypto.dump() pem_bytes: bytes = pem.armor("CERTIFICATE", der_bytes) f.write(pem_bytes) self.certificates_dumped = True
def extract_files(self): magic_nums: set = set() decompression_errors = 0 successfully_extracted = 0 entry: CTOCEntry for entry in self.toc: data = self.archive_contents[entry. entry_offset:entry.entry_offset + entry.compressed_data_size] if entry.compression_flag: try: data = zlib.decompress(data) except zlib.error as e: decompression_errors += 1 logger.debug( f"[!] PyInstaller CArchive decompression failed with error: {e}" ) continue else: if len(data) != entry.uncompressed_data_size: logger.warning( f"[!] {entry.name} entry in CArchive listed its uncompressed data size as" f" {entry.uncompressed_data_size}, however in actuality, uncompressed to be {len(data)}" " bytes. This may be a sign that the CArchive was manually altered." ) if "\\" in entry.name: tmp: PureWindowsPath = pathlib.PureWindowsPath(entry.name) else: tmp: Path = Path(entry.name) file_path = pathlib.Path(self.output_dir).joinpath(tmp) if len(file_path.parents) > 1: # every path has '.' as a parent file_path.parent.mkdir(parents=True, exist_ok=True) if entry.type_code == self.ArchiveItem.PYSOURCE: if ord(data[:1]) == ord(xdis.marsh.TYPE_CODE) or ord( data[:1]) == (ord(xdis.marsh.TYPE_CODE) | xdis.unmarshal.FLAG_REF): file_path = file_path.parent / (file_path.name + ".pyc") if len(magic_nums) > 1: magic_num = next(iter(magic_nums)) logger.warning( "[!] More than one magic number found within this CArchive. Using magic number" f" {magic_num}, but also found numbers: {magic_nums}" ) elif len(magic_nums) == 0: logger.warning( f"[!] No magic numbers have been found yet, queueing this file for later." ) # TODO: add this file to a do-later list, when you know the magic num #TODO does this actually happen? dig deeper... pass data = pydecipher.bytecode.create_pyc_header( next(iter(magic_nums))) + data else: file_path = file_path.parent / (file_path.name + ".py") if "pyi" not in entry.name: logger.info( f"[!] Potential entrypoint found at script {entry.name}.py" ) elif entry.type_code == self.ArchiveItem.PYMODULE: magic_bytes = data[:4] # Python magic value magic_nums.add(magic2int(magic_bytes)) file_path = file_path.parent / (file_path.name + ".pyc") if entry.type_code != self.ArchiveItem.RUNTIME_OPTION: self.output_dir.mkdir(parents=True, exist_ok=True) with file_path.open(mode="wb") as f: f.write(data) successfully_extracted += 1 if entry.type_code in (self.ArchiveItem.PYZ, self.ArchiveItem.ZIPFILE): output_dir_name = (str( file_path.parent.joinpath( utils.slugify(file_path.name.split(".")[0]))) + "_output") pydecipher.unpack(file_path, output_dir=output_dir_name) if decompression_errors: logger.debug( f"[!] Failed to write {decompression_errors} files due to decompression errors." ) if successfully_extracted: logger.info( f"[+] Successfully extracted {successfully_extracted} files from this CArchive." )