class ParsePE32(object): def __init__(self): self.bin_contents = None self.section_headers_info = None self.pe_section_utils = None def execute(self, bin_path): self.bin_contents = FileUtils.read_file(bin_path) if self.bin_contents: self.parse_pe32() def parse_pe32(self): self.pe_section_utils = PESectionUtils(self.bin_contents) self.section_headers_info = self.pe_section_utils.get_sections_info() imported_dlls = self.get_imported_dlls() logging.info('PE Imported DLLs #: {}'.format(len(imported_dlls))) logging.info('PE Imported DLLs: \n- {}'.format('\n- '.join(imported_dlls))) def get_imported_dlls(self): dll_names = [] current_import_descriptor = self.get_first_import_descriptor_foff() import_descriptor_size = 0x14 # self.get_import_descriptor_size() while True: name = self.get_dll_name(current_import_descriptor) if not name: break dll_names.append(name) current_import_descriptor += import_descriptor_size return dll_names def get_data_directory_array_offs(self): # _IMAGE_DATA_DIRECTORY DataDirectory[16]; from OptionalHeader optional_header_offs = self.pe_section_utils.get_optional_header_offs() # PE32+: offsets in Ero's poster are wrong starting with the second QWORD (SizeOfStackReserve). data_directories_offs = optional_header_offs + 0x60 + 0x10 # The +0x10 adjusts it. return data_directories_offs def get_import_directory_offs(self): # IMAGE_DIRECTORY_ENTRY_IMPORT (DataDirectory[1]) data_directory_array_offs = self.get_data_directory_array_offs() image_data_directory_size = 0x8 # two dwords return data_directory_array_offs + image_data_directory_size * 1 def get_import_descriptor_size(self): import_directory_offs = self.get_import_directory_offs() size_offs = 0x4 import_descriptor_size = self._unpack_word(self.bin_contents, import_directory_offs + size_offs) return import_descriptor_size def get_first_import_descriptor_rva(self): import_directory_offs = self.get_import_directory_offs() virtual_address_offs = 0x0 import_descriptor_rva = self._unpack_word(self.bin_contents, import_directory_offs + virtual_address_offs) return import_descriptor_rva def get_first_import_descriptor_foff(self): import_directory_offs = self.get_first_import_descriptor_rva() return self._rva_to_file_offset(import_directory_offs) def get_dll_name(self, import_descriptor_addr): name_offs = 0xC name_rva = self._unpack_dword(self.bin_contents, import_descriptor_addr + name_offs) name_foff = self._rva_to_file_offset(name_rva) name = self._read_null_terminated_ascii_string(name_foff) return name def _read_null_terminated_ascii_string(self, starting_addr): idx = 0 ascii_string = '' while ord(self.bin_contents[starting_addr + idx]) != 0 and starting_addr + idx < len(self.bin_contents): ascii_string += self.bin_contents[starting_addr + idx] idx += 1 return ascii_string def _rva_to_file_offset(self, rva): file_offs = -1 for section_info in self.section_headers_info: if self._rva_belongs_to_section(rva, section_info): file_offs = rva - section_info['Virtual Address'] + section_info['Pointer to Raw Data'] break return file_offs @staticmethod def _rva_belongs_to_section(rva, section_info): return section_info['Virtual Address'] <= rva < section_info['Virtual Address'] + section_info['Virtual Size'] @staticmethod def _unpack_dword(contents, offs): # format string: <: little endian. L: 4 bytes (1 dword) return struct.unpack_from('<L', contents, offs)[0] @staticmethod def _unpack_word(contents, offs): # format string: <: little endian. H: 2 bytes (1 word) return struct.unpack_from('<H', contents, offs)[0]
class ParsePE32(object): def __init__(self): self.bin_contents = None self.section_headers_info = None self.pe_section_utils = None def execute(self, bin_path): self.bin_contents = FileUtils.read_file(bin_path) if self.bin_contents: self.parse_pe32() def parse_pe32(self): self.pe_section_utils = PESectionUtils(self.bin_contents) self.section_headers_info = self.pe_section_utils.get_sections_info() imported_dlls_functions = self.get_pe_dll_imported_functions() logging.info('PE Imported DLLs #: {}'.format( len(imported_dlls_functions))) for imported_dll_functions in imported_dlls_functions: logging.info('- {}: {}'.format( imported_dll_functions['dll_name'], ', '.join(imported_dll_functions['imported_functions']))) def get_pe_dll_imported_functions(self): imported_dll_functions = [] current_import_descriptor = self.get_first_import_descriptor_foff() import_descriptor_size = 0x14 while True: dll_name = self.get_dll_name(current_import_descriptor) if dll_name: imported_functions = self.get_dll_imported_functions( current_import_descriptor) imported_dll_functions.append({ 'dll_name': dll_name, 'imported_functions': imported_functions }) current_import_descriptor += import_descriptor_size else: break return imported_dll_functions def get_first_import_descriptor_foff(self): import_directory_offs = self.get_first_import_descriptor_rva() return self._rva_to_file_offset(import_directory_offs) def get_first_import_descriptor_rva(self): import_directory_offs = self.get_import_directory_offs() virtual_address_offs = 0x0 import_descriptor_rva = self._unpack_word( self.bin_contents, import_directory_offs + virtual_address_offs) return import_descriptor_rva def get_import_directory_offs( self): # IMAGE_DIRECTORY_ENTRY_IMPORT (DataDirectory[1]) data_directory_array_offs = self.get_data_directory_array_offs() image_data_directory_size = 0x8 # two dwords return data_directory_array_offs + image_data_directory_size * 1 def get_data_directory_array_offs( self ): # _IMAGE_DATA_DIRECTORY DataDirectory[16]; from OptionalHeader optional_header_offs = self.pe_section_utils.get_optional_header_offs() # PE32+: offsets in Ero's poster are wrong starting with the second QWORD (SizeOfStackReserve). data_directories_offs = optional_header_offs + 0x60 + 0x10 # The +0x10 adjusts it. return data_directories_offs def get_dll_name(self, import_descriptor_addr): name_offs = 0xC name_rva = self._unpack_dword(self.bin_contents, import_descriptor_addr + name_offs) name_foff = self._rva_to_file_offset(name_rva) name = self._read_null_terminated_ascii_string(name_foff) return name def get_dll_imported_functions(self, import_descriptor_addr): function_names = [] first_thunk_foff = self.get_first_thunk_foff(import_descriptor_addr) while True: function_name = self.get_imported_function_name(first_thunk_foff) if function_name: function_names.append(function_name) # PE32+: FirstThunk and OriginalFirstThunk point to QWORD in PE32+ instead of DWORD first_thunk_foff += 0x8 else: break return function_names def get_first_thunk_foff(self, import_descriptor_addr): first_thunk_off = 0x10 first_thunk_rva = self._unpack_dword( self.bin_contents, import_descriptor_addr + first_thunk_off) first_thunk_foff = self._rva_to_file_offset(first_thunk_rva) return first_thunk_foff def get_imported_function_name(self, first_thunk_foff): name = None name_off = 0x2 image_import_by_name_foff = self.get_import_by_name_foff( first_thunk_foff) if image_import_by_name_foff != -1: name = self._read_null_terminated_ascii_string( image_import_by_name_foff + name_off) return name def get_import_by_name_foff(self, first_thunk_foff): address_of_data_off = 0 # PE32+: FirstThunk and OriginalFirstThunk point to QWORD in PE32+ instead of DWORD import_by_name_rva = self._unpack_qword( self.bin_contents, first_thunk_foff + address_of_data_off) import_by_name_foff = self._rva_to_file_offset(import_by_name_rva) return import_by_name_foff def _read_null_terminated_ascii_string(self, starting_addr): idx = 0 ascii_string = '' while ord(self.bin_contents[starting_addr + idx]) != 0 and starting_addr + idx < len( self.bin_contents): ascii_string += self.bin_contents[starting_addr + idx] idx += 1 return ascii_string def _rva_to_file_offset(self, rva): file_offs = -1 for section_info in self.section_headers_info: if self._rva_belongs_to_section(rva, section_info): file_offs = rva - section_info[ 'Virtual Address'] + section_info['Pointer to Raw Data'] break return file_offs @staticmethod def _rva_belongs_to_section(rva, section_info): return section_info['Virtual Address'] <= rva < section_info[ 'Virtual Address'] + section_info['Virtual Size'] @staticmethod def _unpack_qword(contents, offs): # format string: <: little endian. Q: 8 bytes (2 dword) return struct.unpack_from('<Q', contents, offs)[0] @staticmethod def _unpack_dword(contents, offs): # format string: <: little endian. L: 4 bytes (1 dword) return struct.unpack_from('<L', contents, offs)[0] @staticmethod def _unpack_word(contents, offs): # format string: <: little endian. H: 2 bytes (1 word) return struct.unpack_from('<H', contents, offs)[0]