def process(self, data): pe = PE(data=data, fast_load=True) pe.parse_data_directories(directories=[IMAGE_DIRECTORY_ENTRY_IMPORT]) th = pe.get_imphash() if not th: raise ValueError('no import directory.') return th.encode(self.codec) if self.args.text else bytes.fromhex(th)
def pdb_guid(file): pe = PE(file, fast_load=True) pe.parse_data_directories() try: codeview = next( filter( lambda x: x.struct.Type == DEBUG_TYPE[ "IMAGE_DEBUG_TYPE_CODEVIEW"], pe.DIRECTORY_ENTRY_DEBUG, )) except StopIteration: print("Failed to find CodeView in pdb") raise RuntimeError("Failed to find GUID age") offset = codeview.struct.PointerToRawData size = codeview.struct.SizeOfData tmp = CV_RSDS_HEADER.parse(pe.__data__[offset:offset + size]) guidstr = "%08x%04x%04x%s%x" % ( tmp.GUID.Data1, tmp.GUID.Data2, tmp.GUID.Data3, hexlify(tmp.GUID.Data4).decode("ascii"), tmp.Age, ) return {"filename": tmp.Filename, "GUID": guidstr}
def parse_file_info(cls, pe: PE) -> dict: """ Extracts a JSON-serializable and human readable dictionary with information about the version resource of an input PE file, if available. """ try: pe.parse_data_directories(directories=[ DIRECTORY_ENTRY['IMAGE_DIRECTORY_ENTRY_RESOURCE'] ]) FileInfoList = pe.FileInfo except AttributeError: return None for FileInfo in FileInfoList: for FileInfoEntry in FileInfo: with suppress(AttributeError): for StringTableEntry in FileInfoEntry.StringTable: StringTableEntryParsed = cls._parse_pedict( StringTableEntry.entries) with suppress(AttributeError): LangID = StringTableEntry.entries.get( 'LangID', None) or StringTableEntry.LangID LangID = int(LangID, 0x10) if not isinstance( LangID, int) else LangID LangHi = LangID >> 0x10 LangLo = LangID & 0xFFFF Language = cls._LCID.get(LangHi, 'Language Neutral') Charset = cls._CHARSET.get(LangLo, 'Unknown Charset') StringTableEntryParsed.update( LangID=F'{LangID:08X}', Charset=Charset, Language=Language) return StringTableEntryParsed
def _algorithm(self, data): pe = PE(data=data, fast_load=True) pe.parse_data_directories(directories=[IMAGE_DIRECTORY_ENTRY_IMPORT]) th = pe.get_imphash() if not th: raise ValueError('no import directory.') return bytes.fromhex(th)
def parse_time_stamps(cls, pe: PE, raw_time_stamps: bool) -> dict: """ Extracts time stamps from the PE header (link time), as well as from the imports, exports, debug, and resource directory. The resource time stamp is also parsed as a DOS time stamp and returned as the "Delphi" time stamp. """ if raw_time_stamps: def dt(ts): return ts else: def dt(ts): # parse as UTC but then forget time zone information return datetime.fromtimestamp( ts, tz=timezone.utc).replace(tzinfo=None) pe.parse_data_directories(directories=[ DIRECTORY_ENTRY['IMAGE_DIRECTORY_ENTRY_IMPORT'], DIRECTORY_ENTRY['IMAGE_DIRECTORY_ENTRY_EXPORT'], DIRECTORY_ENTRY['IMAGE_DIRECTORY_ENTRY_DEBUG'], DIRECTORY_ENTRY['IMAGE_DIRECTORY_ENTRY_RESOURCE'] ]) info = {} with suppress(AttributeError): info.update(Linker=dt(pe.FILE_HEADER.TimeDateStamp)) with suppress(AttributeError): for entry in pe.DIRECTORY_ENTRY_IMPORT: info.update(Import=dt(entry.TimeDateStamp())) with suppress(AttributeError): for entry in pe.DIRECTORY_ENTRY_DEBUG: info.update(DbgDir=dt(entry.struct.TimeDateStamp)) with suppress(AttributeError): Export = pe.DIRECTORY_ENTRY_EXPORT.struct.TimeDateStamp if Export: info.update(Export=dt(Export)) with suppress(AttributeError): res_timestamp = pe.DIRECTORY_ENTRY_RESOURCE.struct.TimeDateStamp if res_timestamp: with suppress(ValueError): from ...misc.datefix import datefix dos = datefix.dostime(res_timestamp) info.update(Delphi=dos) info.update(RsrcTS=dt(res_timestamp)) def norm(value): if isinstance(value, int): return value return str(value) return {key: norm(value) for key, value in info.items()}
def parse_pe_fetch_pdb(symbol_server, file_path): ''' Attempt to fetch a symbol that relates to a PE file. The file must have a valid IMAGE_DEBUG_DIRECTORY and as well as a IMAGE_DEBUG_TYPE_CODEVIEW directroy entry. ''' try: guid = None pdb_filename = None pe = PE(file_path, fast_load=True) pe.parse_data_directories(directories=[DIRECTORY_ENTRY['IMAGE_DIRECTORY_ENTRY_DEBUG']]) code_view_entry = None for debug_entry in pe.DIRECTORY_ENTRY_DEBUG: if DEBUG_TYPE[debug_entry.struct.Type] == "IMAGE_DEBUG_TYPE_CODEVIEW": code_view_entry = debug_entry break if code_view_entry == None: logger.warn("%s doesn't have symbol information", basename(file_path)) return None, None symbol_type_offset = code_view_entry.struct.PointerToRawData symbol_type_size = code_view_entry.struct.SizeOfData symbol_type_data = pe.__data__[symbol_type_offset:symbol_type_offset+symbol_type_size] if symbol_type_data[:4] == "RSDS": rsds = CV_RSDS_HEADER.parse(symbol_type_data) guid = "%08x%04x%04x%s%x" % (rsds.GUID.Data1, rsds.GUID.Data2, rsds.GUID.Data3, rsds.GUID.Data4.encode('hex'), rsds.Age) pdb_filename = ntbasename(rsds.Filename) elif symbol_type_data[:4] == "NB10": nb10 = CV_NB10_HEADER.parse(symbol_type_data) guid = "%x%x" % (nb10.Timestamp, nb10.Age) pdb_filename = ntbasename(nb10.Filename) else: logger.error("%s unsupported symbol type", symbol_type_data[:4]) return None, None assert guid assert pdb_filename symbol = __fetch__(symbol_server, guid, file_path, pdb_filename) if symbol[:4] == 'MSCF': # TODO, unpack cabinet else: logger.error("Excpected symbol server to return a cabinet file") return None, None return symbol, basename(pdb_filename) except Exception: logger.error(format_exc()) return None, None
def map_and_load(self, path, execute_now=False): ql = self.ql pe = PE(path, fast_load=True) # Make sure no module will occupy the NULL page if self.next_image_base > pe.OPTIONAL_HEADER.ImageBase: IMAGE_BASE = self.next_image_base pe.relocate_image(IMAGE_BASE) else: IMAGE_BASE = pe.OPTIONAL_HEADER.ImageBase IMAGE_SIZE = ql.mem.align(pe.OPTIONAL_HEADER.SizeOfImage, 0x1000) while IMAGE_BASE + IMAGE_SIZE < self.heap_base_address: if not ql.mem.is_mapped(IMAGE_BASE, 1): self.next_image_base = IMAGE_BASE + 0x10000 ql.mem.map(IMAGE_BASE, IMAGE_SIZE) pe.parse_data_directories() data = bytearray(pe.get_memory_mapped_image()) ql.mem.write(IMAGE_BASE, bytes(data)) logging.info("[+] Loading %s to 0x%x" % (path, IMAGE_BASE)) entry_point = IMAGE_BASE + pe.OPTIONAL_HEADER.AddressOfEntryPoint if self.entry_point == 0: # Setting entry point to the first loaded module entry point, so the debugger can break. self.entry_point = entry_point logging.info("[+] PE entry point at 0x%x" % entry_point) self.install_loaded_image_protocol(IMAGE_BASE, IMAGE_SIZE) self.images.append( self.coverage_image( IMAGE_BASE, IMAGE_BASE + pe.NT_HEADERS.OPTIONAL_HEADER.SizeOfImage, path)) if execute_now: logging.info( f'[+] Running from 0x{entry_point:x} of {path}') assembler = self.ql.create_assembler() code = f""" mov rcx, {IMAGE_BASE} mov rdx, {self.gST} mov rax, {entry_point} call rax """ runcode, _ = assembler.asm(code) ptr = ql.os.heap.alloc(len(runcode)) ql.mem.write(ptr, bytes(runcode)) ql.os.exec_arbitrary(ptr, ptr + len(runcode)) else: self.modules.append((path, IMAGE_BASE, entry_point, pe)) return True else: IMAGE_BASE += 0x10000 pe.relocate_image(IMAGE_BASE) return False
class PEDebugData(object): def __init__(self, path, filename=None): self.pe = PE(path, fast_load=True) self.path = path self.filename = filename if filename is None: self.filename = os.path.basename(path) @property def symbol_id(self): return self.codeview_info().symbol_id @property def executable_id(self): retval = None if self.filename is not None: retval = '%s/%X%X' % (self.filename.lower(), self.pe.FILE_HEADER.TimeDateStamp, self.pe.OPTIONAL_HEADER.SizeOfImage) return retval def codeview_info(self): info = None data = self.debug_data() if data is not None: if data[:4] == 'RSDS': info = CodeViewRSDS(data) elif data[:4] == 'NB10': info = CodeViewNB10(data) else: raise PEUnknownDebugDataError('Unknown CodeView type: %s' % data[:4]) else: raise PEMissingDebugDataError() return info def debug_data(self): data = None if not hasattr(self.pe, 'DIRECTORY_ENTRY_DEBUG'): self.pe.parse_data_directories( DIRECTORY_ENTRY['IMAGE_DIRECTORY_ENTRY_DEBUG'] ) if hasattr(self.pe, 'DIRECTORY_ENTRY_DEBUG'): for entry in self.pe.DIRECTORY_ENTRY_DEBUG: off = entry.struct.PointerToRawData if (entry.struct.Type == DEBUG_TYPE['IMAGE_DEBUG_TYPE_CODEVIEW'] or entry.struct.Type == DEBUG_TYPE['IMAGE_DEBUG_TYPE_MISC']): data = self.pe.__data__[off:off+entry.struct.SizeOfData] if data is not None: break return data
def process(self, data: bytearray) -> bytearray: pe = PE(data=data, fast_load=True) pe.parse_data_directories(directories=[self._SECDIRID]) security = pe.OPTIONAL_HEADER.DATA_DIRECTORY[self._SECDIRID] sgnoff = security.VirtualAddress + 8 sgnend = sgnoff + security.Size length, revision, certtype = unpack('<IHH', data[sgnoff - 8:sgnoff]) signature = data[sgnoff:sgnend] if len(signature) + 8 != length: raise RefineryPartialResult( F'Found {len(signature) + 8} bytes of signature, but length should be {length}.', partial=signature) return signature
def _locate_extra_dependencies(dll_filepath, search_path_dll_map, extra_dependencies, package_dir): dll_filepath = Path(dll_filepath) header = ModuleHeader(dll_filepath) architecture = header.getArchitecture() pe = PE(dll_filepath) pe.parse_data_directories(import_dllnames_only=True) if dll_filepath.name in extra_dependencies: return extra_dependencies[dll_filepath.name] = dll_filepath for dependency in pe.DIRECTORY_ENTRY_IMPORT: dependency_dll_name = dependency.dll.decode('utf-8') try: result = WindowsApi.loadModule(dependency_dll_name, cwd=dll_filepath.parent, architecture=architecture) assert result in (0, 126) except AssertionError: print( f"Encountered error {result} for dependency {dependency_dll_name}. Aborting ..." ) exit(1) if result == 126: try: search_path = search_path_dll_map[dependency_dll_name] if dependency_dll_name not in extra_dependencies: dependency_dll_filepath = Path(search_path, dependency_dll_name) dependency_dll_copy_filepath = Path( package_dir, dependency_dll_filepath.name) copyfile(dependency_dll_filepath, dependency_dll_copy_filepath) _locate_extra_dependencies(dependency_dll_copy_filepath, search_path_dll_map, extra_dependencies, package_dir) except KeyError: print( f"Dependency {dependency_dll_name} was not found in any search path. Aborting ..." ) exit(1)
def get_import_table_string(file_path): import_table_bytes = b'' import_table_string = '' try: pe = PE(file_path, fast_load=True) pe.parse_data_directories() for entry in pe.DIRECTORY_ENTRY_IMPORT: # import_table_string += str(entry.dll) for imp in entry.imports: # import_table_string += hex(imp.address) try: import_table_bytes += imp.name import_table_string += imp.name.decode("utf-8") + '\n' except: pass except Exception as e: print(e) f = open(file_path + ".log", "w+") f.write(import_table_string) return import_table_bytes
def process(self, data: bytearray) -> bytearray: pe = PE(data=data, fast_load=True) pe.parse_data_directories(directories=[self._SECDIRID]) security = pe.OPTIONAL_HEADER.DATA_DIRECTORY[self._SECDIRID] self.log_info(F'signature offset: 0x{security.VirtualAddress:08X}') self.log_info(F'signature length: 0x{security.Size:08X}') if security.VirtualAddress == 0 or security.Size == 0: raise ValueError( F'IMAGE_DIRECTORY_ENTRY_SECURITY ({self._SECDIRID}) is corrupt.' ) sgnoff = security.VirtualAddress + 8 sgnend = sgnoff + security.Size length, revision, certtype = unpack('<IHH', data[sgnoff - 8:sgnoff]) signature = data[sgnoff:sgnend] if len(signature) + 8 != length: raise RefineryPartialResult( F'Found {len(signature) + 8} bytes of signature, but length should be {length}.', partial=signature) return signature
def map_and_load(self, path: str, exec_now: bool = False): """Map and load a module into memory. The specified module would be mapped and loaded into the address set in the `next_image_base` member. It is the caller's responsibility to make sure that the memory is available. On success, `next_image_base` will be updated accordingly. Args: path : path of the module binary to load exec_now : execute module right away; will be enququed if not Raises: QlMemoryMappedError : when `next_image_base` is not available """ ql = self.ql pe = PE(path, fast_load=True) # use image base only if it does not point to NULL image_base = pe.OPTIONAL_HEADER.ImageBase or self.next_image_base image_size = ql.mem.align(pe.OPTIONAL_HEADER.SizeOfImage, 0x1000) assert (image_base % 0x1000) == 0, 'image base is expected to be page-aligned' if image_base != pe.OPTIONAL_HEADER.ImageBase: pe.relocate_image(image_base) pe.parse_data_directories() data = bytes(pe.get_memory_mapped_image()) ql.mem.map(image_base, image_size, info="[module]") ql.mem.write(image_base, data) ql.log.info(f'Module {path} loaded to {image_base:#x}') entry_point = image_base + pe.OPTIONAL_HEADER.AddressOfEntryPoint ql.log.info(f'Module entry point at {entry_point:#x}') # the 'entry_point' member is used by the debugger. if not set, set it # to the first loaded module entry point so the debugger can break if self.entry_point == 0: self.entry_point = entry_point self.install_loaded_image_protocol(image_base, image_size) # this would be used later be os.find_containing_image self.images.append( self.coverage_image(image_base, image_base + image_size, path)) # update next memory slot to allow sequencial loading. its availability # is unknown though self.next_image_base = image_base + image_size module_info = (path, image_base, entry_point) # execute the module right away or enqueue it if exec_now: # call entry point while retaining the current return address self.execute_module(*module_info, eoe_trap=None) else: self.modules.append(module_info)
class PEInfo: def __init__(self, path): self.path = path self.type = 'PE' self.parser = PE(path, fast_load=True) self.parser.parse_data_directories() self.imageBase = self.parser.OPTIONAL_HEADER.ImageBase self.entryPoint = self.parser.OPTIONAL_HEADER.AddressOfEntryPoint self.sections = [] self.stringAddrs = [] self.strings = self.strings() for section in self.parser.sections: s = Section(section.Name.decode().replace('\x00', ''), section.VirtualAddress + self.imageBase, section.Misc_VirtualSize) self.sections.append(s) self.imports = [] for entry in self.parser.DIRECTORY_ENTRY_IMPORT: for imp in entry.imports: importFunc = ImportFunction(imp.name.decode(), imp.address, entry.dll.decode()) self.imports.append(importFunc) self.exports = [] if hasattr(self.parser, "DIRECTORY_ENTRY_EXPORT"): for exp in self.parser.DIRECTORY_ENTRY_EXPORT.symbols: exportFunc = ExportFunction(self.imageBase + exp.address, exp.name.decode()) self.exports.append(exportFunc) self.findStrings() def getOffsetAtAddress(self, address): for section in self.parser.sections: if section.contains_rva(address): return section.get_offset_from_rva(address) return None @property def codeRange(self): for section in self.parser.sections: if '.text' in section.Name.decode(): return interval([( section.VirtualAddress + self.imageBase, section.VirtualAddress + self.imageBase + section.Misc_VirtualSize)]) @property def dataRange(self): dataRanges = [] for section in self.parser.sections: if 'data' in section.Name.decode(): dataRanges.append((section.VirtualAddress + self.imageBase, section.VirtualAddress + self.imageBase + section.Misc_VirtualSize)) return dataRanges def inDataSection(self, address): for start, end in self.dataRange: if start <= address and address < end: return True else: return False def getData(self, start, size): f = open(self.path, 'rb') f.seek(start) data = f.read(size) f.close() return data def findStrings(self): strings = {} for section in self.parser.sections: if 'data' in section.Name.decode(): start = section.PointerToRawData size = section.SizeOfRawData address = self.imageBase + section.VirtualAddress data = self.getData(start, size) indexs = re.finditer(b"([a-zA-Z0-9` \n~!@#$%^&*()-_=+|';\":.,?><*-]{2,})", data) for index in indexs: strings[address + index.start(0)] = str(data[index.start(0):index.end(0)])[2:-1] return strings def strings(self): strings = [] for address, string in self.findStrings().items(): strings.append((hex(address), string)) for section in self.parser.sections: if 'data' not in section.name: start = section.PointerToRawData size = section.SizeOfRawData vAddress = self.imageBase + section.VirtualAddress data = self.getData(start, size) indexs = re.finditer(b"([a-zA-Z0-9` \n~!@#$%^&*()-_=+|';\":.,?><*-]{2,})", data) for index in indexs: address = hex(vAddress + index.start(0)) string = str(data[index.start(0):index.end(0)])[2:-1] strings.append((address, string)) self.stringAddrs.append(vAddress + index.start(0)) return strings def info(self): text = 'File name: <b>' + os.path.basename(self.path) + '</b><br/>' text += 'Type: <b>' + self.type + '</b><br/>' text += 'Imagebase: <b>' + hex(self.imageBase) + '</b><br/>' text += 'Entrypoint: <b>' + hex(self.entryPoint) +'</b>' return text
def parse_resources(pe: pefile.PE): pe.parse_data_directories( directories=[pefile.DIRECTORY_ENTRY['IMAGE_DIRECTORY_ENTRY_RESOURCE']]) return has_resources(pe)