def __init__(self, path, fn): self.fn = fn self.pe = pefile.PE(path, fast_load=True) if self.pe.OPTIONAL_HEADER.Magic == 0x20b: print(fn) raise pefile.PEFormatError('x64 binary') self.neg_history = list() self.ep = self.pe.OPTIONAL_HEADER.AddressOfEntryPoint self.imageBase = self.pe.OPTIONAL_HEADER.ImageBase self.md = Cs(CS_ARCH_X86, CS_MODE_32) self.data = self.pe.get_memory_mapped_image() self.offset = 100 self.importTable = list() self.pe.full_load() self.main_func_patterns = [ ['push r', 'push i', 'push r', 'push r', 'call', 'push r', 'call'], ['push r', 'push r', 'push r', 'push r', 'call', 'push r', 'call'], ['push r', 'push r', 'push n', 'push n', 'call', 'push r', 'call'], ['push r', 'push r', 'push n', 'push n', 'call'], ['push r', 'push r', 'push r', 'push n', 'call'], ['push i', 'push i', 'push i', 'call'], ['push r', 'push r', 'push i', 'call'] ] self.exit_proc_funcs = [ 'abort', 'exitprocess', 'exit', 'thunrtmain', 'afxwinmain', 'terminateprocess' ] self.not_exit_proc_funcs = ['msg', 'exitwindow'] self.pe.parse_data_directories() try: for entry in self.pe.DIRECTORY_ENTRY_IMPORT: for imp in entry.imports: self.importTable.append((imp.address, imp.name)) except: print('[*] no IMP')
def process_pe_section(self, pe, section): """ Retrieve desired PE section @param pe: PE object @param header: PE section to search @return a dict containing section and dump file associated """ search_header = re.search(r'^(.+)(:header)$', section) # Iterate through all existing PE sections for sec in pe.sections: if search_header and search_header.group(1) == sec.Name.translate( None, '\x00'): # Get section header return {'section': section, 'data': sec.__pack__()} elif section == sec.Name.translate(None, '\x00'): # Get section data return {'section': section, 'data': sec.get_data()} header = search_header.group(1) if search_header else section raise pefile.PEFormatError('Section {0} not found'.format(header))
def process_pe_section(self, pe, section): """ Retrieve all PE section @param dump_path: PE dump file @param pe: PE object @param header: PE section to search @return a dict containing section and dump file associated """ search_header = re.search(r'^(.+)(:header)$', section) # Iterate through all existing PE sections for index, sec in enumerate(pe.sections): if search_header and search_header.group(1) == sec.Name.translate( None, '\x00'): # Get section header return { 'section': section, 'data': sec.__pack__(), 'offset': 0, 'size': len(sec.__pack__()) } elif section == sec.Name.translate(None, '\x00'): # Get section data return { 'section': sec.Name, 'data': sec.data, 'offset': sec.VirtualAddress, 'size': sec.SizeOfRawData } header = search_header.group(1) if search_header else section raise pefile.PEFormatError('Section {0} not found'.format(header))
def __init__(self, file_name): self.filepath = file_name self.pe = pefile.PE(self.filepath) if not self.pe: raise pefile.PEFormatError('__EmptyFile__')
def run(self): results = {} try: pe = pefile.PE(self.filepath) if not pe: raise pefile.PEFormatError("Empty file?") full_dump = pe.dump_dict() results["imphash"] = pe.get_imphash() results["warnings"] = pe.get_warnings() if pe.is_dll(): results["type"] = "DLL" elif pe.is_driver(): results["type"] = "DRIVER" elif pe.is_exe(): results["type"] = "EXE" sections = [] for section in pe.sections: try: name = section.Name.decode() except UnicodeDecodeError as e: name = "UnableToDecode" logger.warning( f"Unable to decode section {section.Name} exception {e}" ) section_item = { "name": name, "address": hex(section.VirtualAddress), "virtual_size": hex(section.Misc_VirtualSize), "size": section.SizeOfRawData, "entropy": section.get_entropy(), } sections.append(section_item) results["sections"] = sections machine_value = pe.FILE_HEADER.Machine results["machine"] = machine_value mt = {"0x14c": "x86", "0x0200": "Itanium", "0x8664": "x64"} architecture = "" if isinstance(machine_value, int): architecture = mt.get(str(hex(machine_value)), "") if not architecture: architecture = str(machine_value) + " => Not x86/64 or Itanium" results["architecture"] = architecture results["os"] = "{}.{}".format( pe.OPTIONAL_HEADER.MajorOperatingSystemVersion, pe.OPTIONAL_HEADER.MinorOperatingSystemVersion, ) results["entrypoint"] = hex(pe.OPTIONAL_HEADER.AddressOfEntryPoint) results["imagebase"] = hex(pe.OPTIONAL_HEADER.ImageBase) timestamp = pe.FILE_HEADER.TimeDateStamp results["compilation_timestamp"] = datetime.utcfromtimestamp( timestamp).strftime("%Y-%m-%d %H:%M:%S") import_table = [] directory_entry_import = getattr(pe, "DIRECTORY_ENTRY_IMPORT", []) for entry in directory_entry_import: imp = { "entryname": entry.dll.decode() if entry.dll else None, "symbols": [], } for symbol in entry.imports: if symbol.name: imp["symbols"].append(symbol.name.decode()) import_table.append(imp) results["import_table"] = import_table export_table = [] for entry in full_dump.get("Exported symbols", []): symbol_name = entry.get("Name", None) # in case it is a dictionary, we do not mind it try: export_table.append(symbol_name.decode()) except (UnicodeDecodeError, AttributeError) as e: logger.debug( f"PE info error while decoding export table symbols: {e}" ) # this is to reduce the output export_table = export_table[:100] results["export_table"] = export_table results["flags"] = full_dump.get("Flags", []) except pefile.PEFormatError as e: warning_message = ( "job_id:{} analyzer:{} md5:{} filename: {} PEFormatError {}" "".format(self.job_id, self.analyzer_name, self.md5, self.filename, e)) logger.warning(warning_message) self.report.errors.append(warning_message) self.report.status = self.report.Status.FAILED self.report.save() return results
def collect(filename): """ Collects the data from the PE format of the specified filename. It assumes that the file already exists in the working directory. :param filename: String representing the file's name :return: Dictionary containing all collected data """ data = {} file = pefile.PE(filename, fast_load=False) if file.DOS_HEADER.dump_dict().get('e_magic').get('Value') != 23117: with open(filename, mode='rb') as file: file_data = file.read() signature = file_data[:2].decode("utf-8") if signature != 'MZ': pass else: raise pefile.PEFormatError( "File: %s doesn't contain 'MZ' magic value" % filename) data.update({'Section Count': file.FILE_HEADER.NumberOfSections}) data.update( {'Page Count': file.DOS_HEADER.dump_dict().get('e_cp').get('Value')}) data.update({ 'Time': str(datetime.datetime.fromtimestamp(file.FILE_HEADER.TimeDateStamp)) }) # TODO: Add more machines (MIPS, RISC-V, Hitachi) # Sets the machine by comparing returned integer from FILE_HEADER to a predetermined dictionary of machines machine = file.FILE_HEADER.dump_dict().get('Machine').get('Value') machines_dict = { 34404: "x86-64", 332: "Intel i386", 448: "ARM", 43620: "ARM64", 3772: "EFI" } if machines_dict.get(machine): data.update({'Machine': machines_dict.get(machine)}) else: data.update({'Machine': "Unknown"}) data.update({ 'Entry Point': file.OPTIONAL_HEADER.dump_dict().get('AddressOfEntryPoint').get( 'Value') }) # Sets the subsystem by comparing returned integer from OPTIONAL_HEADER to a predetermined dictionary of subsystems subsystem = file.OPTIONAL_HEADER.dump_dict().get('Subsystem').get('Value') subsystems_dict = { 1: "Native", 2: "Windows GUI", 3: "Windows CUI", 5: "OS/2 CUI", 7: "POSIX CUI", 8: "Native Windows", 9: "Windows CE", 10: "EFI Application", 16: "Windows Boot Application" } if subsystems_dict.get(subsystem): data.update({'Subsystem': subsystems_dict.get(subsystem)}) else: data.update({'Subsystem': "Unknown"}) # Gets information on each section including name, entropy, address, virtual size, and raw size sections_dict = {} for section in file.sections: section_name = section.Name.decode("utf-8").rstrip('\x00') sections_dict.update({ section_name: { 'name': section_name, 'entropy': round(get_entropy(section), 5), 'address': hex(section.VirtualAddress), 'Virtual Size': hex(section.Misc_VirtualSize), 'Raw Size': hex(section.SizeOfRawData) } }) # Updates the data dictionary with a sections dictionary data.update({'Sections': sections_dict}) return data