def __init__(self,f): elf = ELFFile(f) if elf.get_machine_arch() == 'x64': self.bit = 64 elif elf.get_machine_arch() == 'x86': self.bit = 32 self.text = elf.get_section_by_name('.text').data() self.text_addr = elf.get_section_by_name('.text').header['sh_addr'] dynsym = elf.get_section_by_name('.dynsym') dynsym_list = [0] if self.bit == 64: relplt = elf.get_section_by_name('.rela.plt') elif self.bit == 32: relplt = elf.get_section_by_name('.rel.plt') relpltdata = relplt.data() for x in range(0,len(relpltdata),relplt.header['sh_entsize']): tmp = relpltdata[x:x+relplt.header['sh_entsize']] if self.bit == 64: _,_,num,_ = unpack('QIIQ',tmp) elif self.bit == 32: _,num = unpack('II',tmp) num = num >> 8 dynsym_list.append(num) c = 0 for sym in dynsym.iter_symbols(): if c in dynsym_list: dynsym_list[dynsym_list.index(c)] = sym.name c+=1 got_plt = elf.get_section_by_name('.got.plt') got_plt_data = got_plt.data() plt = elf.get_section_by_name('.plt') plt_data = plt.data() self.funcs = {} for n in range(0,len(plt_data),16): tmp = n + plt.header['sh_addr'] self.funcs[tmp] = dynsym_list[n/16] before = None if self.bit == 32: for i in md32.disasm(self.text,self.text_addr): if i and i.mnemonic == 'call' and i.operands[0].imm in self.funcs and self.funcs[i.operands[0].imm] == '__libc_start_main': self.ep = before.operands[0].imm break before = i elif self.bit == 64: for i in md64.disasm(self.text,self.text_addr): if i and i.mnemonic == 'call' and i.operands[0].imm in self.funcs and self.funcs[i.operands[0].imm] == '__libc_start_main': self.ep = before.operands[1].imm break before = i self.rodata = elf.get_section_by_name('.rodata') self.rodata_data = self.rodata.data() f.close()
def process_elf(elf: ELFFile): if elf.get_machine_arch() != "ARM": raise Exception("Invalid machine arch {} (not ARM)".format( elf.get_machine_arch())) for symbol_table in find_section(elf, SymbolTableSection): symbols = [ SimpleSymbol.from_symbol(obj) for obj in symbol_table.iter_symbols() if (obj['st_info']['type'] == "STT_FUNC" or obj['st_info']['type'] == "STT_OBJECT") and obj['st_info']['bind'] == "STB_GLOBAL" ] for symbol in sorted(symbols, key=lambda x: x.st_value): yield symbol
def get_frame_base(filename, pc, rebased_addr): """ Call to get frame base :param filename: name of the executable file :param pc: The address of the beginning of the function :param rebased_addr: Should be project.loader.memory.min_addr :return: the frame base for the function """ target_loc = pc - rebased_addr with open(filename, 'rb') as f: elffile = ELFFile(f) if not elffile.has_dwarf_info(): print(' file has no DWARF info') return # get_dwarf_info returns a DWARFInfo context object, which is the # starting point for all DWARF-based processing in pyelftools. dwarfinfo = elffile.get_dwarf_info() # This is required for the descriptions module to correctly decode # register names contained in DWARF expressions. set_global_machine_arch(elffile.get_machine_arch()) min_greater = 1000000000000000000000 offset = 0 for CFI in dwarfinfo.EH_CFI_entries(): if isinstance(CFI, FDE): decoded = CFI.get_decoded() for entry in decoded.table: if entry['pc'] >= target_loc and entry['pc'] < min_greater: offset = entry['cfa'].offset min_greater = entry['pc'] return offset
def library_to_sqlalchemy(filepath, filename=None): with open(filepath) as fileobj: elf_data = fileobj.read() checksum = hash_algo(elf_data).hexdigest() if filename is None: filename = os.path.basename(filepath) elf = ELFFile(StringIO(elf_data)) library = Library(name=filename, checksum=checksum, filepath=filepath, elfclass=elf.elfclass, machine_arch=elf.get_machine_arch()) symtab = elf.get_section_by_name(".symtab") dynsym = elf.get_section_by_name(".dynsym") if not symtab and not dynsym: raise Exception("No symbol table found") elif symtab and dynsym: symbols = chain(symtab.iter_symbols(), dynsym.iter_symbols()) elif symtab: symbols = symtab.iter_symbols() else: symbols = dynsym.iter_symbols() seen_symbols = set() symbol_entities = [] for symbol in symbols: if not symbol.name or not symbol.entry["st_value"] or \ symbol.name in seen_symbols: continue symbol_entities.append( Symbol(name=symbol.name, addr=symbol.entry["st_value"], library=library)) seen_symbols.add(symbol.name) return library, symbol_entities
def _validate_elf(firmware_path: pathlib.Path, platform: Platform) -> None: # Check if firmware's architecture matches system's architecture with open(firmware_path, "rb") as file: try: elf_file = ELFFile(file) firm_arch = elf_file.get_machine_arch() except Exception as error: raise InvalidFirmwareFile( f"Given file is not a valid ELF: {error}") from error running_arch = system_platform.machine() if firm_arch != get_correspondent_elf_arch(running_arch): raise InvalidFirmwareFile( f"Firmware's architecture ({firm_arch}) does not match system's ({running_arch})." ) # Check if firmware's platform matches system platform try: firm_decoder = Decoder() firm_decoder.process(firmware_path) firm_board = firm_decoder.fwversion.board_type firm_sub_board = firm_decoder.fwversion.board_subtype current_decoder_platform = get_correspondent_decoder_platform( platform) if not current_decoder_platform in [firm_board, firm_sub_board]: InvalidFirmwareFile( f"Firmware's platform ({current_decoder_platform}) does not match system's ({platform})." ) except Exception as error: raise InvalidFirmwareFile( "Given firmware is not a supported version.") from error
def library_to_sqlalchemy(filepath, filename=None): with open(filepath) as fileobj: elf_data = fileobj.read() checksum = hash_algo(elf_data).hexdigest() if filename is None: filename = os.path.basename(filepath) elf = ELFFile(StringIO(elf_data)) library = Library(name=filename, checksum=checksum, filepath=filepath, elfclass=elf.elfclass, machine_arch=elf.get_machine_arch()) symtab = elf.get_section_by_name(".symtab") dynsym = elf.get_section_by_name(".dynsym") if not symtab and not dynsym: raise Exception("No symbol table found") elif symtab and dynsym: symbols = chain(symtab.iter_symbols(), dynsym.iter_symbols()) elif symtab: symbols = symtab.iter_symbols() else: symbols = dynsym.iter_symbols() seen_symbols = set() symbol_entities = [] for symbol in symbols: if not symbol.name or not symbol.entry["st_value"] or \ symbol.name in seen_symbols: continue symbol_entities.append(Symbol(name=symbol.name, addr=symbol.entry["st_value"], library=library)) seen_symbols.add(symbol.name) return library, symbol_entities
def open(io): elf_o = ELFFile(io) info('parsed elf file with %s sections and %s segments' % (elf_o.num_sections(), elf_o.num_segments())) arch = sefi.arch.from_elf_machine_arch(elf_o.get_machine_arch()) info(' elf file arch is %s' % (arch)) return (elf_o, arch)
def process_file(filename): with open(filename, 'rb') as f: elffile = ELFFile(f) # elfclass is a public attribute of ELFFile, read from its header print('%s: elfclass is %s' % (filename, elffile.elfclass)) print(elffile.get_machine_arch()) ss = elffile.get_section_by_name('.text') print ss
def process_file(filename): print('Processing file:', filename) with open(filename, 'rb') as f: elffile = ELFFile(f) if not elffile.has_dwarf_info(): print(' file has no DWARF info') return # get_dwarf_info returns a DWARFInfo context object, which is the # starting point for all DWARF-based processing in pyelftools. dwarfinfo = elffile.get_dwarf_info() # The location lists are extracted by DWARFInfo from the .debug_loc # section, and returned here as a LocationLists object. location_lists = dwarfinfo.location_lists() # This is required for the descriptions module to correctly decode # register names contained in DWARF expressions. set_global_machine_arch(elffile.get_machine_arch()) # Create a LocationParser object that parses the DIE attributes and # creates objects representing the actual location information. loc_parser = LocationParser(location_lists) for CU in dwarfinfo.iter_CUs(): # DWARFInfo allows to iterate over the compile units contained in # the .debug_info section. CU is a CompileUnit object, with some # computed attributes (such as its offset in the section) and # a header which conforms to the DWARF standard. The access to # header elements is, as usual, via item-lookup. print(' Found a compile unit at offset %s, length %s' % (CU.cu_offset, CU['unit_length'])) # A CU provides a simple API to iterate over all the DIEs in it. for DIE in CU.iter_DIEs(): # Go over all attributes of the DIE. Each attribute is an # AttributeValue object (from elftools.dwarf.die), which we # can examine. for attr in itervalues(DIE.attributes): # Check if this attribute contains location information # pdb.set_trace() if loc_parser.attribute_has_location(attr, CU['version']): var_name = DIE.attributes['DW_AT_name'].value print(' Varname:%s' % (var_name)) print(' DIE %s. attr %s.' % (DIE.tag, attr.name)) loc = loc_parser.parse_from_attribute( attr, CU['version']) # We either get a list (in case the attribute is a # reference to the .debug_loc section) or a LocationExpr # object (in case the attribute itself contains location # information). if isinstance(loc, LocationExpr): print(' %s' % (describe_DWARF_expr( loc.loc_expr, dwarfinfo.structs))) elif isinstance(loc, list): print(show_loclist(loc, dwarfinfo, indent=' '))
def META_ELF(s, buff): elffile = ELFFile(StringIO(buff)) META_ELF = { 'Arch' : elffile.get_machine_arch(), 'Debug Entries' : get_die_entries(elffile) } META_ELF['Section Names'], META_ELF['Symbol Names'] = get_section_names(elffile) return META_ELF
def test_hello(self): with open(os.path.join('test', 'testfiles', 'simple_gcc.elf.arm'), 'rb') as f: elf = ELFFile(f) self.assertEqual(elf.get_machine_arch(), 'ARM') # Check some other properties of this ELF file derived from readelf self.assertEqual(elf['e_entry'], 0x8018) self.assertEqual(elf.num_sections(), 14) self.assertEqual(elf.num_segments(), 2)
def open(io): elf_o = ELFFile(io) info('parsed elf file with %s sections and %s segments' % (elf_o.num_sections(), elf_o.num_segments()) ) arch = sefi.arch.from_elf_machine_arch(elf_o.get_machine_arch()) info(' elf file arch is %s' % (arch)) return (elf_o, arch)
def print_basic_info(filename: str) -> None: with open(filename, "rb") as f: elffile = ELFFile(f) # ELF object # variables sections = "" debug = RED + "No" + RESET fileMD5 = file_MD5sum(filename) filesha1 = file_sha1sum(filename) filesha256 = file_sha256sum(filename) fileSSDEEP = file_ssdeepsum(filename) vtlink = tinyurl("https://www.virustotal.com/gui/file/" + filesha256) # logic if not vtlink: vtlink = "https://www.virustotal.com/gui/file/" + filesha256 for x in range(elffile.num_sections()): if len(elffile.get_section(x).name) > 0: sections += "{}{} {}({}) ".format( GREEN, elffile.get_section(x).name, RESET, hex(elffile.get_section(x).data_size)) if x % 4 == 0 and x > 0: sections += "\n" if not sections: sections = RED + "No sections found" + RESET # has debug info? if elffile.has_dwarf_info(): debug = GREEN + "Yes" + RESET info_table = [ ["Filename:", filename], ["Filesize:", file_size(filename)], [ "Filetype:", GREEN + "ELF " + str(elffile.get_machine_arch()) + RESET ], [ "Subsystem:", GREEN + describe_e_type(elffile.header['e_type']) + RESET ], ["MD5: ", fileMD5], ["SHA1: ", filesha1], ["SHA256: ", filesha256], ["SSDEEP:", fileSSDEEP], ["VT link:", vtlink], ["Symbols:", debug], ["Entropy:", str(file_entropy(filename))], ["Sections:\n(with size)", sections], ["Entrypoint:", "{}".format(hex(elffile.header["e_entry"]))] ] print("") print( AsciiTable( title="Basic Information", table_data=info_table, ).table) print("")
def test_hello(self): with open(os.path.join('test', 'testfiles_for_unittests', 'simple_gcc.elf.arm'), 'rb') as f: elf = ELFFile(f) self.assertEqual(elf.get_machine_arch(), 'ARM') # Check some other properties of this ELF file derived from readelf self.assertEqual(elf['e_entry'], 0x8018) self.assertEqual(elf.num_sections(), 14) self.assertEqual(elf.num_segments(), 2)
def test_hello(self): with open(os.path.join('test', 'testfiles_for_unittests', 'simple_gcc.elf.mips'), 'rb') as f: elf = ELFFile(f) self.assertEqual(elf.get_machine_arch(), 'MIPS') # Check some other properties of this ELF file derived from readelf self.assertEqual(elf['e_entry'], 0x0) self.assertEqual(elf.num_sections(), 25) self.assertEqual(elf.num_segments(), 0)
class Elf(Binary): def __init__(self, filename): super().__init__(filename) self.elf = ELFFile(open(filename, "rb")) self.arch = { "x86": "i386", "x64": "amd64" }[self.elf.get_machine_arch()] assert self.elf.header.e_type in ["ET_DYN", "ET_EXEC", "ET_CORE"] # Get interpreter elf self.interpreter = None for elf_segment in self.elf.iter_segments(): if elf_segment.header.p_type != "PT_INTERP": continue self.interpreter = Elf(elf_segment.data()[:-1]) break if self.interpreter is not None: assert self.interpreter.arch == self.arch assert self.interpreter.elf.header.e_type in ["ET_DYN", "ET_EXEC"] def __del__(self): if self.elf is not None: self.elf.stream.close() def maps(self): for elf_segment in self.elf.iter_segments(): if elf_segment.header.p_type != "PT_LOAD" or elf_segment.header.p_memsz == 0: continue flags = elf_segment.header.p_flags # PF_X 0x1 Execute - PF_W 0x2 Write - PF_R 0x4 Read perms = [" ", " x", " w ", " wx", "r ", "r x", "rw ", "rwx"][flags & 7] if "r" not in perms: raise BinaryException( "Not readable map from cgc elf not supported") # CGCMAP-- assert elf_segment.header.p_filesz != 0 or elf_segment.header.p_memsz != 0 yield (( elf_segment.header.p_vaddr, elf_segment.header.p_memsz, perms, elf_segment.stream.name, elf_segment.header.p_offset, elf_segment.header.p_filesz, )) def getInterpreter(self): return self.interpreter def threads(self): yield (("Running", {"EIP": self.elf.header.e_entry}))
def __init__(self, elf_file: ELFFile, debug_root: str = None): self._elf_file = elf_file self._debug_root = debug_root self._arch = elf_file.get_machine_arch() self._dwarf_info = elf_file.get_dwarf_info() self._range_lists = self._dwarf_info.range_lists() self._location_lists = self._dwarf_info.location_lists() self._die_map = dict() self._line_programs = dict() self._debug_str = None self._logger = logging.getLogger('DWARFData') self._index()
def process_dwarf_info(in_file, out_file): ''' Main function processing the dwarf informations from debug sections ''' DEBUG('Processing file: {0}'.format(in_file)) with open(in_file, 'rb') as f: f_elf = ELFFile(f) if not f_elf.has_dwarf_info(): DEBUG("{0} has no debug informations!".format(file)) return False M = CFG_pb2.Module() M.name = "GlobalVariable".format('utf-8') set_global_machine_arch(f_elf.get_machine_arch()) dwarf_info = f_elf.get_dwarf_info() process_types(dwarf_info, TYPES_MAP) process_frames(dwarf_info, EH_FRAMES) section_offset = dwarf_info.debug_info_sec.global_offset # Iterate through all the compile units for CU in dwarf_info.iter_CUs(): DEBUG('Found a compile unit at offset {0}, length {1}'.format( CU.cu_offset, CU['unit_length'])) top_DIE = CU.get_top_DIE() c_unit = CUnit(top_DIE, CU['unit_length'], CU.cu_offset, section_offset) c_unit.decode_control_unit(M, GLOBAL_VARIABLES) for key, value in GLOBAL_VARIABLES.iteritems(): if value["size"] > 0: gvar = M.global_vars.add() gvar.name = value["name"] gvar.ea = value["addr"] gvar.size = value["size"] else: DEBUG("Look for {}".format(pprint.pformat(value))) #for func in M.funcs: # DEBUG("Function name {}".format(func.name)) # for sv in func.stackvars: # DEBUG_PUSH() # DEBUG("{} : {}, ".format(sv.name, sv.sp_offset)) # DEBUG_POP() with open(out_file, "w") as outf: outf.write(M.SerializeToString()) DEBUG("Global Vars\n") DEBUG('Number of Global Vars: {0}'.format(len(GLOBAL_VARIABLES))) DEBUG("{}".format(pprint.pformat(GLOBAL_VARIABLES))) DEBUG("End Global Vars\n")
class CGCElf(Binary): @staticmethod def _cgc2elf(filename): # hack begin so we can use upstream Elftool with open(filename, "rb") as fd: stream = io.BytesIO(fd.read()) stream.write(b"\x7fELF") stream.name = fd.name return stream def __init__(self, filename): super().__init__(filename) stream = self._cgc2elf(filename) self.elf = ELFFile(stream) self.arch = { "x86": "i386", "x64": "amd64" }[self.elf.get_machine_arch()] assert "i386" == self.arch assert self.elf.header.e_type in ["ET_EXEC"] def maps(self): for elf_segment in self.elf.iter_segments(): if elf_segment.header.p_type not in [ "PT_LOAD", "PT_NULL", "PT_PHDR", "PT_CGCPOV2" ]: raise BinaryException("Not Supported Section") if elf_segment.header.p_type != "PT_LOAD" or elf_segment.header.p_memsz == 0: continue flags = elf_segment.header.p_flags # PF_X 0x1 Execute - PF_W 0x2 Write - PF_R 0x4 Read perms = [" ", " x", " w ", " wx", "r ", "r x", "rw ", "rwx"][flags & 7] if "r" not in perms: raise BinaryException( "Not readable map from cgc elf not supported") # CGCMAP-- assert elf_segment.header.p_filesz != 0 or elf_segment.header.p_memsz != 0 yield (( elf_segment.header.p_vaddr, elf_segment.header.p_memsz, perms, elf_segment.stream.name, elf_segment.header.p_offset, elf_segment.header.p_filesz, )) def threads(self): yield (("Running", {"EIP": self.elf.header.e_entry}))
def _postprocessing_candidates(src_dir: str) -> PostprocessingCandidates: """Search for binaries that need to be post-processed.""" strip_arm = [] strip_x86 = [] patch_rm2fb = [] for directory, _, files in os.walk(src_dir): for file_name in files: file_path = os.path.join(directory, file_name) try: with open(file_path, "rb") as file: info = ELFFile(file) symtab = info.get_section_by_name(".symtab") if info.get_machine_arch() == "ARM": if symtab: strip_arm.append(file_path) dynamic = info.get_section_by_name(".dynamic") rodata = info.get_section_by_name(".rodata") if (dynamic and rodata and rodata.data().find(b"/dev/fb0") != -1): patch_rm2fb.append(file_path) elif (info.get_machine_arch() in ("x86", "x64") and symtab): strip_x86.append(file_path) except ELFError: # Ignore non-ELF files pass except IsADirectoryError: # Ignore directories pass return PostprocessingCandidates( strip_arm=strip_arm, strip_x86=strip_x86, patch_rm2fb=patch_rm2fb, )
def process_dwarf_info(in_file, out_file): ''' Main function processing the dwarf informations from debug sections ''' DEBUG('Processing file: {0}'.format(in_file)) with open(in_file, 'rb') as f: f_elf = ELFFile(f) if not f_elf.has_dwarf_info(): DEBUG("{0} has no debug informations!".format(file)) return False M = CFG_pb2.Module() M.name = "GlobalVariable".format('utf-8') set_global_machine_arch(f_elf.get_machine_arch()) dwarf_info = f_elf.get_dwarf_info() process_types(dwarf_info, TYPES_MAP) process_frames(dwarf_info, EH_FRAMES) section_offset = dwarf_info.debug_info_sec.global_offset # Iterate through all the compile units for CU in dwarf_info.iter_CUs(): DEBUG('Found a compile unit at offset {0}, length {1}'.format(CU.cu_offset, CU['unit_length'])) top_DIE = CU.get_top_DIE() c_unit = CUnit(top_DIE, CU['unit_length'], CU.cu_offset, section_offset) c_unit.decode_control_unit(M, GLOBAL_VARIABLES) for key, value in GLOBAL_VARIABLES.iteritems(): if value["size"] > 0: gvar = M.global_vars.add() gvar.name = value["name"] gvar.ea = value["addr"] gvar.size = value["size"] else: DEBUG("Look for {}".format(pprint.pformat(value))) #for func in M.funcs: # DEBUG("Function name {}".format(func.name)) # for sv in func.stackvars: # DEBUG_PUSH() # DEBUG("{} : {}, ".format(sv.name, sv.sp_offset)) # DEBUG_POP() with open(out_file, "w") as outf: outf.write(M.SerializeToString()) DEBUG("Global Vars\n") DEBUG('Number of Global Vars: {0}'.format(len(GLOBAL_VARIABLES))) DEBUG("{}".format(pprint.pformat(GLOBAL_VARIABLES))) DEBUG("End Global Vars\n")
def loadELF(self, filename): try: elf = ELFFile(open(filename, 'rb')) except: raise Exception("[-] This file is not an ELF file: %s" % filename) self.arch = elf.get_machine_arch() self.entry = elf.header.e_entry self.memory = self.load_code_segments(elf.iter_segments(), filename) self.symtab, self.thumbtab, self.code_addrs = self.load_section_info(elf.iter_sections()) self.thumbtab.sort(key=lambda tup: tup[0]) self.code_addrs = sorted(self.code_addrs, key=lambda k: k['address'])
def __init__(self, image, trace=False, syms=False, timeout=None, preformatted_image=os.path.join('..', 'floppy.img.zip'), argv=None, keep_temps=False, qemu_opts=[]): self.image = image self.trace = trace self.syms = syms self.timeout = timeout self.argv = argv self.keep_temps = keep_temps self.qemu_opts = qemu_opts assert os.path.exists(self.image) with open(self.image, 'rb') as fd: elffile = ELFFile(fd) if elffile.get_machine_arch() == 'x86': self.arch = 'X86' else: raise RuntimeError("Unknown architecture: %s" % elf.get_machine_arch()) if syms: # Get the symbols in the file. self.symbols = {} for section in elffile.iter_sections(): if not isinstance(section, SymbolTableSection): continue for sym in section.iter_symbols(): self.symbols[sym['st_value']] = sym.name if self.arch == 'X86': if os.environ.get('MODEL', '').lower() == 'bochs': self.model = Bochs('bochs', []) else: self.model = Qemu('qemu-system-i386', self.qemu_opts) else: raise RuntimeError("Unknown architecture: %s" % self.arch) fd, self.tmpimage = tempfile.mkstemp() os.close(fd) self.floppy_image = Image(self.tmpimage, preformatted_image) self.floppy_image.create_grub_conf(args=self.argv) self.floppy_image.copy(self.image, '/kernel')
class Elf(Binary): def __init__(self, filename): super().__init__(filename) self.elf = ELFFile(open(filename, 'rb')) self.arch = { 'x86': 'i386', 'x64': 'amd64' }[self.elf.get_machine_arch()] assert self.elf.header.e_type in ['ET_DYN', 'ET_EXEC', 'ET_CORE'] # Get interpreter elf self.interpreter = None for elf_segment in self.elf.iter_segments(): if elf_segment.header.p_type != 'PT_INTERP': continue self.interpreter = Elf(elf_segment.data()[:-1]) break if self.interpreter is not None: assert self.interpreter.arch == self.arch assert self.interpreter.elf.header.e_type in ['ET_DYN', 'ET_EXEC'] def maps(self): for elf_segment in self.elf.iter_segments(): if elf_segment.header.p_type != 'PT_LOAD' or elf_segment.header.p_memsz == 0: continue flags = elf_segment.header.p_flags # PF_X 0x1 Execute - PF_W 0x2 Write - PF_R 0x4 Read perms = [' ', ' x', ' w ', ' wx', 'r ', 'r x', 'rw ', 'rwx'][flags & 7] if 'r' not in perms: raise Exception("Not readable map from cgc elf not supported") # CGCMAP-- assert elf_segment.header.p_filesz != 0 or elf_segment.header.p_memsz != 0 yield ((elf_segment.header.p_vaddr, elf_segment.header.p_memsz, perms, elf_segment.stream.name, elf_segment.header.p_offset, elf_segment.header.p_filesz)) def getInterpreter(self): """Get the dynamic linker Returns the dynamic linker(if it is specified) as an :obj:`Elf` object otherwise, return none. :rtype: :obj:`Elf` or None """ return self.interpreter def threads(self): yield (('Running', {'EIP': self.elf.header.e_entry}))
def loadELF(self, filename): try: elf = ELFFile(open(filename, 'rb')) except: raise Exception("[-] This file is not an ELF file: %s" % filename) self.arch = elf.get_machine_arch() self.entry = elf.header.e_entry self.memory = self.load_code_segments(elf.iter_segments(), filename) self.symtab, self.thumbtab, self.code_addrs = self.load_section_info( elf.iter_sections()) self.thumbtab.sort(key=lambda tup: tup[0]) self.code_addrs = sorted(self.code_addrs, key=lambda k: k['address'])
def parse_elf(self): self.f = open(self.f, 'rb') #read binary form commad line elff = ELFFile(self.f) arch = elff.get_machine_arch() if arch == "x64": cs_arch = CS_ARCH_X86 cs_mode = CS_MODE_64 elif arch == "x86": cs_arch = CS_ARCH_X86 cs_mode = CS_MODE_32 else: print("ELF architecture '%s' currently not supported" % arch) return """ Initialize capstone """ self.md = Cs(cs_arch, cs_mode) s = elff.get_section(1) self.align = s['sh_addr'] - s['sh_offset'] s = elff.get_section_by_name('.plt') if s: print('.plt') self.plt_start, self.plt_end = s[ 'sh_addr'], s['sh_addr'] + s['sh_size'] self.plti = self.plt_start + 16 print('0x%x 0x%x' % (self.plt_start, self.plt_end)) s = elff.get_section_by_name('.dynsym') if s: print(s.name) syms = self.parse_symbols(s) if self.to_look: self.f.close() return syms else: print('No Dynamic Symbols table (.dynsym)') s = elff.get_section_by_name('.symtab') if s: print(s.name) self.parse_symbols(s) else: print('No Symbols Table (.symtab)') else: print('No plt table (.plt)') self.f.close() return self.plts
def get_func_bounds(filename, function_name): with open(filename, 'rb') as f: elffile = ELFFile(f) if not elffile.has_dwarf_info(): print(' file has no DWARF info') return # get_dwarf_info returns a DWARFInfo context object, which is the # starting point for all DWARF-based processing in pyelftools. dwarfinfo = elffile.get_dwarf_info() # The location lists are extracted by DWARFInfo from the .debug_loc # section, and returned here as a LocationLists object. location_lists = dwarfinfo.location_lists() # This is required for the descriptions module to correctly decode # register names contained in DWARF expressions. set_global_machine_arch(elffile.get_machine_arch()) # Create a LocationParser object that parses the DIE attributes and # creates objects representing the actual location information. loc_parser = LocationParser(location_lists) for CU in dwarfinfo.iter_CUs(): # DWARFInfo allows to iterate over the compile units contained in # the .debug_info section. CU is a CompileUnit object, with some # computed attributes (such as its offset in the section) and # a header which conforms to the DWARF standard. The access to # header elements is, as usual, via item-lookup. # A CU provides a simple API to iterate over all the DIEs in it. for DIE in CU.iter_DIEs(): # Find the function if DIE.tag == "DW_TAG_subprogram": fname = "" high_addr = 0 low_addr = 0 c = False for attr in itervalues(DIE.attributes): if attr.name == "DW_AT_name": fname = attr.value if attr.name == "DW_AT_low_pc": low_addr = attr.value if attr.name == "DW_AT_high_pc": high_addr = attr.value if high_addr < low_addr: high_addr = low_addr + high_addr if fname == function_name: return (low_addr, high_addr)
def test_basic(self): with open(os.path.join('test', 'testfiles_for_unittests', 'simple_gcc.elf.mips'), 'rb') as f: elf = ELFFile(f) self.assertEqual(elf.get_machine_arch(), 'MIPS') # Check some other properties of this ELF file derived from readelf self.assertEqual(elf['e_entry'], 0x0) self.assertEqual(elf.num_sections(), 25) self.assertEqual(elf.num_segments(), 0) # Test that Mips-specific section types work; these types are # available only when the file is identified as MIPS in the # e_machine header field. sec9 = elf.get_section(9) self.assertEqual(sec9['sh_type'], 'SHT_MIPS_DWARF')
def process_file(filename): print('Processing file:', filename) with open(filename, 'rb') as f: elffile = ELFFile(f) if not elffile.has_dwarf_info(): print(' file has no DWARF info') return # get_dwarf_info returns a DWARFInfo context object, which is the # starting point for all DWARF-based processing in pyelftools. dwarfinfo = elffile.get_dwarf_info() # The location lists are extracted by DWARFInfo from the .debug_loc # section, and returned here as a LocationLists object. location_lists = dwarfinfo.location_lists() # This is required for the descriptions module to correctly decode # register names contained in DWARF expressions. set_global_machine_arch(elffile.get_machine_arch()) for CU in dwarfinfo.iter_CUs(): # DWARFInfo allows to iterate over the compile units contained in # the .debug_info section. CU is a CompileUnit object, with some # computed attributes (such as its offset in the section) and # a header which conforms to the DWARF standard. The access to # header elements is, as usual, via item-lookup. print(' Found a compile unit at offset %s, length %s' % ( CU.cu_offset, CU['unit_length'])) # A CU provides a simple API to iterate over all the DIEs in it. for DIE in CU.iter_DIEs(): # Go over all attributes of the DIE. Each attribute is an # AttributeValue object (from elftools.dwarf.die), which we # can examine. for attr in itervalues(DIE.attributes): if attribute_has_location_list(attr): # This is a location list. Its value is an offset into # the .debug_loc section, so we can use the location # lists object to decode it. loclist = location_lists.get_location_list_at_offset( attr.value) print(' DIE %s. attr %s.\n%s' % ( DIE.tag, attr.name, show_loclist(loclist, dwarfinfo, indent=' ')))
class CGCElf(Binary): @staticmethod def _cgc2elf(filename): # hack begin so we can use upstream Elftool with open(filename, 'rb') as fd: stream = io.BytesIO(fd.read()) stream.write(b'\x7fELF') stream.name = fd.name return stream def __init__(self, filename): super().__init__(filename) stream = self._cgc2elf(filename) self.elf = ELFFile(stream) self.arch = { 'x86': 'i386', 'x64': 'amd64' }[self.elf.get_machine_arch()] assert 'i386' == self.arch assert self.elf.header.e_type in ['ET_EXEC'] def maps(self): for elf_segment in self.elf.iter_segments(): if elf_segment.header.p_type not in [ 'PT_LOAD', 'PT_NULL', 'PT_PHDR', 'PT_CGCPOV2' ]: raise Exception("Not Supported Section") if elf_segment.header.p_type != 'PT_LOAD' or elf_segment.header.p_memsz == 0: continue flags = elf_segment.header.p_flags # PF_X 0x1 Execute - PF_W 0x2 Write - PF_R 0x4 Read perms = [' ', ' x', ' w ', ' wx', 'r ', 'r x', 'rw ', 'rwx'][flags & 7] if 'r' not in perms: raise Exception("Not readable map from cgc elf not supported") # CGCMAP-- assert elf_segment.header.p_filesz != 0 or elf_segment.header.p_memsz != 0 yield ((elf_segment.header.p_vaddr, elf_segment.header.p_memsz, perms, elf_segment.stream.name, elf_segment.header.p_offset, elf_segment.header.p_filesz)) def threads(self): yield (('Running', {'EIP': self.elf.header.e_entry}))
def __init__( self, image, trace=False, syms=False, timeout=None, preformatted_image=os.path.join("..", "floppy.img.zip"), argv=None, keep_temps=False, ): self.image = image self.trace = trace self.syms = syms self.timeout = timeout self.argv = argv self.keep_temps = keep_temps assert os.path.exists(self.image) with open(self.image, "rb") as fd: elffile = ELFFile(fd) if elffile.get_machine_arch() == "x86": self.arch = "X86" else: raise RuntimeError("Unknown architecture: %s" % elf.get_machine_arch()) if syms: # Get the symbols in the file. self.symbols = {} for section in elffile.iter_sections(): if not isinstance(section, SymbolTableSection): continue for sym in section.iter_symbols(): self.symbols[sym["st_value"]] = sym.name if self.arch == "X86": self.model = Qemu("qemu-system-i386", []) else: raise RuntimeError("Unknown architecture: %s" % self.arch) fd, self.tmpimage = tempfile.mkstemp() os.close(fd) self.floppy_image = Image(self.tmpimage, preformatted_image) self.floppy_image.create_grub_conf(args=self.argv) self.floppy_image.copy(self.image, "/kernel")
def get_executable_arch(path): """ Returns the architecture of an executable binary Parameters ---------- path : str path to the Go binaries generated Returns ------- str Architecture type of the generated binaries """ with open(str(path), "rb") as f: e = ELFFile(f) return e.get_machine_arch()
def test_basic(self): with open( os.path.join('test', 'testfiles_for_unittests', 'simple_gcc.elf.mips'), 'rb') as f: elf = ELFFile(f) self.assertEqual(elf.get_machine_arch(), 'MIPS') # Check some other properties of this ELF file derived from readelf self.assertEqual(elf['e_entry'], 0x0) self.assertEqual(elf.num_sections(), 25) self.assertEqual(elf.num_segments(), 0) # Test that Mips-specific section types work; these types are # available only when the file is identified as MIPS in the # e_machine header field. sec9 = elf.get_section(9) self.assertEqual(sec9['sh_type'], 'SHT_MIPS_DWARF')
def process_file(filename): #print('Processing file:', filename) with open(filename, 'rb') as f: elffile = ELFFile(f) if not elffile.has_dwarf_info(): raise ValueError(filename + ' has no DWARF info') # get_dwarf_info returns a DWARFInfo context object, which is the # starting point for all DWARF-based processing in pyelftools. dwarfinfo = elffile.get_dwarf_info() # This is required for the descriptions module to correctly decode # register names contained in DWARF expressions. set_global_machine_arch(elffile.get_machine_arch()) alldies = OrderedDict() for CU in dwarfinfo.iter_CUs(): # A CU provides a simple API to iterate over all the DIEs in it. for DIE in CU.iter_DIEs(): alldies[DIE.offset] = DIE return alldies
def __init__(self, image, trace=False, syms=False, timeout=None, preformatted_image=os.path.join('..','floppy.img.zip'), argv=None, keep_temps=False, qemu_opts=[]): self.image = image self.trace = trace self.syms = syms self.timeout = timeout self.argv = argv self.keep_temps = keep_temps self.qemu_opts = qemu_opts assert os.path.exists(self.image) with open(self.image, 'rb') as fd: elffile = ELFFile(fd) if elffile.get_machine_arch() == 'x86': self.arch = 'X86' else: raise RuntimeError("Unknown architecture: %s" % elf.get_machine_arch()) if syms: # Get the symbols in the file. self.symbols = {} for section in elffile.iter_sections(): if not isinstance(section, SymbolTableSection): continue for sym in section.iter_symbols(): self.symbols[sym['st_value']] = sym.name if self.arch == 'X86': if os.environ.get('MODEL', '').lower() == 'bochs': self.model = Bochs('bochs', []) else: self.model = Qemu('qemu-system-i386', self.qemu_opts) else: raise RuntimeError("Unknown architecture: %s" % self.arch) fd, self.tmpimage = tempfile.mkstemp() os.close(fd) self.floppy_image = Image(self.tmpimage, preformatted_image) self.floppy_image.create_grub_conf(args=self.argv) self.floppy_image.copy(self.image, '/kernel')
class Elf(Binary): def __init__(self, filename): super(Elf, self).__init__(filename) self.elf = ELFFile(file(filename)) self.arch = {'x86':'i386','x64':'amd64'}[self.elf.get_machine_arch()] assert self.elf.header.e_type in ['ET_DYN', 'ET_EXEC', 'ET_CORE'] #Get interpreter elf self.interpreter = None for elf_segment in self.elf.iter_segments(): if elf_segment.header.p_type != 'PT_INTERP': continue self.interpreter = Elf(elf_segment.data()[:-1]) break if not self.interpreter is None: assert self.interpreter.arch == self.arch assert self.interpreter.elf.header.e_type in ['ET_DYN', 'ET_EXEC'] def maps(self): for elf_segment in self.elf.iter_segments(): if elf_segment.header.p_type != 'PT_LOAD' or elf_segment.header.p_memsz == 0: continue flags = elf_segment.header.p_flags #PF_X 0x1 Execute - PF_W 0x2 Write - PF_R 0x4 Read perms = [' ', ' x', ' w ', ' wx', 'r ', 'r x', 'rw ', 'rwx'][flags&7] if 'r' not in perms: raise Exception("Not readable map from cgc elf not supported") #CGCMAP-- assert elf_segment.header.p_filesz != 0 or elf_segment.header.p_memsz != 0 yield((elf_segment.header.p_vaddr, elf_segment.header.p_memsz, perms, elf_segment.stream.name, elf_segment.header.p_offset, elf_segment.header.p_filesz)) def getInterpreter(self): return self.interpreter def threads(self): yield(('Running', {'EIP': self.elf.header.e_entry}))
class CGCElf(Binary): @staticmethod def _cgc2elf(filename): #hack begin so we can use upstream Elftool with open(filename, 'rb') as fd: stream = StringIO.StringIO(fd.read()) stream.write('\x7fELF') stream.name = fd.name return stream def __init__(self, filename): super(CGCElf, self).__init__(filename) stream = self._cgc2elf(filename) self.elf = ELFFile(stream) self.arch = {'x86':'i386','x64':'amd64'}[self.elf.get_machine_arch()] assert 'i386' == self.arch assert self.elf.header.e_type in ['ET_EXEC'] def maps(self): for elf_segment in self.elf.iter_segments(): if elf_segment.header.p_type not in ['PT_LOAD', 'PT_NULL', 'PT_PHDR', 'PT_CGCPOV2']: raise Exception("Not Supported Section") if elf_segment.header.p_type != 'PT_LOAD' or elf_segment.header.p_memsz == 0: continue flags = elf_segment.header.p_flags #PF_X 0x1 Execute - PF_W 0x2 Write - PF_R 0x4 Read perms = [' ', ' x', ' w ', ' wx', 'r ', 'r x', 'rw ', 'rwx'][flags&7] if 'r' not in perms: raise Exception("Not readable map from cgc elf not supported") #CGCMAP-- assert elf_segment.header.p_filesz != 0 or elf_segment.header.p_memsz != 0 yield((elf_segment.header.p_vaddr, elf_segment.header.p_memsz, perms, elf_segment.stream.name, elf_segment.header.p_offset, elf_segment.header.p_filesz)) def threads(self): yield(('Running', {'EIP': self.elf.header.e_entry}))
def main(argv): elf_file = "" mode = "" try: opts, args = getopt.getopt(argv, "b:m:", ["elf_file=", "mode="]) except getopt.GetoptError: help_msg() sys.exit(2) if len(argv) == 0: help_msg() sys.exit(2) for opt, arg in opts: if opt == "-h" or opt == "": help_msg() sys.exit(2) elif "-b" in opt: elf_file = arg elif "-m" in opt: mode = arg print("[+] load elf : " + elf_file) print("[+] using mode : " + str(mode)) with open(elf_file, 'rb') as f: e = ELFFile(f) binarch = e.get_machine_arch() if mode == '10' and binarch == "x86": payload = load_elfx86(elf_file) print("\n\n") print(payload) elif mode == "20" and binarch == "x64": payload = load_elfx64(elf_file) print("\n\n") print(payload) else: print("[!] not supported") help_msg()
def main(path_to_sample): global dwarf_info global location_lists global call_frame_information_entries with open(path_to_sample, 'rb') as f: pyelftools_elf_file = ELFFile(f) #print elffile.little_endian assert pyelftools_elf_file.has_dwarf_info(), 'file has no DWARF info' # get_dwarf_info returns a DWARFInfo context object, which is the # starting point for all DWARF-based processing in pyelftools. dwarf_info = pyelftools_elf_file.get_dwarf_info() call_frame_information_entries = dwarf_info.CFI_entries() location_lists = dwarf_info.location_lists() # This is required for the descriptions module to correctly decode # register names contained in DWARF expressions. set_global_machine_arch(pyelftools_elf_file.get_machine_arch()) # DWARFInfo allows to iterate over the compile units contained in # the .debug_info section. CU is a CompileUnit object, with some # computed attributes (such as its offset in the section) and # a header which conforms to the DWARF standard. The access to # header elements is, as usual, via item-lookup. for compile_unit in dwarf_info.iter_CUs(): process_compile_unit(dwarf_info, pyelftools_elf_file, compile_unit) with open('funcs.json', 'wb') as f: # remove dw_op_call_frame_cda, as it is not serializable map(lambda f: f.pop('dw_op_call_frame_cfa'), functions) # convert into a nice dict funcs = {f['address']: f for f in functions} #import IPython; IPython.embed() f.write(json.dumps(funcs))
def main(): source_dir = "/mnt/mnt/libcdb/libc" snippet_size = 32 test_amount = 100 random_seed = 'looks_like_someone_fucked_with_your_RNG' db = kiss.DatabaseInstance(source_dir, "result") db.generate_database_to_file() db.construct_from_file() # print("DONE constructing database") return random.seed(random_seed) start_time = time.time() search_engine = kiss.SearchEngine("result.cdb", "result.ofst") database_load_time = time.time() - start_time print("AAAAAAAA") # Go through files recursivly and pick a snippet in their .text segment at random file_list = [] data_sizes = [] for file_name in glob.glob(source_dir + '/**/*', recursive=True): if os.path.islink(file_name): continue try: with open(file_name, 'rb') as f: elffile = ELFFile(f) if (elffile.get_machine_arch() != 'x64'): continue if (elffile.elfclass != 64): continue data_size = len(elffile.get_section_by_name('.text').data()) # print(file_name) # print("text_offset: " + hex(elffile.get_section_by_name('.text')['sh_offset'])) # return file_list.append([file_name, data_size]) data_sizes.append(data_size) except Exception: continue print("test base has {} files".format(len(file_list))) # Make random selections in the testbase test_locations = {} for _ in range(0, tests_amount): # Choose with relative weights chosen = random.choices(population=file_list, weights=data_sizes)[0] position = random.randint(0, chosen[1] - SNIPPET_SIZE) if (chosen[0] in test_locations): test_locations[chosen[0]].append(position) else: test_locations[chosen[0]] = [position] # Statistics mp.set_start_method('fork') manager = mp.Manager() stats = manager.dict() stats["unidentified"] = 0 stats["no_fun_name"] = 0 stats["wrong_fun_name"] = 0 stats["time_sum"] = 0 stats["wrong_hit_count"] = manager.dict() # stats["counter"] = 0 # fork off tests processes_num = min(NUM_PROCESSES, len(test_locations)) splitted_test_locations = chunks(test_locations, int(tests_amount / processes_num)) jobs = [] for l in splitted_test_locations: p = mp.Process(target=forked_test, args=(search_engine, l, stats)) jobs.append(p) p.start() # wait for them all to finish for proc in jobs: proc.join() time_avg = stats["time_sum"] / tests_amount print("\n\nTested {} snippets of size {} and had".format( tests_amount, SNIPPET_SIZE)) print("\t\t{} succesful identifications".format(tests_amount - stats["unidentified"])) # print("\t\t{} false positives".format(sum(stats["wrong_hit_count"]))) # print("\t\t{} average amount of wrong hits per function".format(statistics.mean(stats["wrong_hit_count"]))) # print("\t\t{} stdev of amount of wrong hits per function".format(statistics.stdev(stats["wrong_hit_count"]))) print("\t\t{} wrong function names".format(stats["wrong_fun_name"])) print("\t\tthe average query time was {} seconds".format(time_avg)) print("\t\tthe library was loaded in {}".format(database_load_time)) # convert wrong_hit_count dict to list # wrong_hit_count_list = [] # print(wrong_hit_count_list) with open('false_positives.csv', 'w') as f: # Just use 'w' mode in 3.x writer = csv.writer(f, delimiter=',') writer.writerow(["false_positives", "amount"]) for k, v in stats["wrong_hit_count"].items(): # wrong_hit_count_list.append([k, v]) writer.writerow([k, v])
if __name__ == "__main__": # Validate command line args try: libcoldstart_path = sys.argv[1] except IndexError: libcoldstart_path = os.path.join(os.getcwd(), "libcoldstart.so") try: new_path = sys.argv[2] except IndexError: new_path = os.path.join(os.path.dirname(libcoldstart_path), "libcoldstart-patched.so") f = open(libcoldstart_path, "rb") # Validate input file elf = ELFFile(f) arch = elf.get_machine_arch() if arch not in ("ARM", "x86", "AArch64"): print( "[!] ERROR: Unknown architecture in libcoldstart.so, this script only supports ARM and x86!" ) shutil.copyfile(libcoldstart_path, new_path) patched = False patcher13 = TLS13Patcher(f, elf, arch, new_path) if patcher13.find_error_strings(): print("[+] Patching TLS1.3 stack!") patcher13.patch() patched = True else:
class ELF: def __init__(self, classbinary, filename): import capstone as CAPSTONE fd = open(filename, "rb") self.elf = ELFFile(fd) self.classbinary = classbinary self.__data_sections = [] self.__data_sections_content = [] self.__exec_sections = [] self.arch_lookup = { "x86": CAPSTONE.CS_ARCH_X86, "x64": CAPSTONE.CS_ARCH_X86, "ARM": CAPSTONE.CS_ARCH_ARM, } self.arch_mode_lookup = { "x86": CAPSTONE.CS_MODE_32, "x64": CAPSTONE.CS_MODE_64, "ARM": CAPSTONE.CS_ARCH_ARM, } def load_static_sym(self): symtab = self.elf.get_section_by_name(b".symtab") if symtab is None: return for sy in symtab.iter_symbols(): if sy.entry.st_value != 0 and sy.name != b"": self.classbinary.reverse_symbols[sy.entry.st_value] = sy.name.decode() self.classbinary.symbols[sy.name.decode()] = sy.entry.st_value # print("%x\t%s" % (sy.entry.st_value, sy.name.decode())) def load_dyn_sym(self): rel = (self.elf.get_section_by_name(b".rela.plt") or self.elf.get_section_by_name(b".rel.plt")) dyn = self.elf.get_section_by_name(b".dynsym") if rel is None or dyn is None: return # TODO : are constants ? PLT_SIZE = { "x86": 16, "x64": 16, "ARM": 12, } PLT_FIRST_ENTRY_OFF = { "x86": 16, "x64": 16, "ARM": 20, } arch = self.elf.get_machine_arch() relitems = list(rel.iter_relocations()) dynsym = list(dyn.iter_symbols()) plt = self.elf.get_section_by_name(b".plt") plt_entry_size = PLT_SIZE[arch] off = plt.header.sh_addr + PLT_FIRST_ENTRY_OFF[arch] k = 0 while off < plt.header.sh_addr + plt.header.sh_size : idx = relitems[k].entry.r_info_sym name = dynsym[idx].name.decode() self.classbinary.reverse_symbols[off] = name + "@plt" self.classbinary.symbols[name + "@plt"] = off off += plt_entry_size k += 1 def load_data_sections(self): for s in self.elf.iter_sections(): if self.__section_is_data(s): self.__data_sections.append(s) self.__data_sections_content.append(s.data()) def __get_data_section_idx(self, addr): for i, s in enumerate(self.__data_sections): start = s.header.sh_addr end = start + s.header.sh_size if start <= addr < end: return i return -1 def __section_is_data(self, s): mask = SH_FLAGS.SHF_WRITE | SH_FLAGS.SHF_ALLOC return s.header.sh_flags & mask and not self.__section_is_exec(s) def is_address(self, imm): for s in self.elf.iter_sections(): start = s.header.sh_addr if start == 0: continue end = start + s.header.sh_size if start <= imm < end: return s.name.decode(), self.__section_is_data(s) return None, False def __get_cached_exec_section(self, addr): for s in self.__exec_sections: start = s.header.sh_addr end = start + s.header.sh_size if start <= addr < end: return s return None def __find_section(self, addr): for s in self.elf.iter_sections(): start = s.header.sh_addr end = start + s.header.sh_size if start <= addr < end: return s return None def __get_section(self, addr): s = self.__get_cached_exec_section(addr) if s is not None: return s s = self.__find_section(addr) if s is None: return None self.__exec_sections.append(s) return s def check_addr(self, addr): s = self.__get_section(addr) return (s is not None, self.__section_is_exec(s)) def get_section_start(self, addr): s = self.__get_section(addr) if s is None: return 0 return s.header.sh_addr def section_stream_read(self, addr, size): s = self.__get_section(addr) off = addr - s.header.sh_addr s.stream.seek(s.header.sh_offset + off) return s.stream.read(size) def __section_is_exec(self, s): return s.header.sh_flags & SH_FLAGS.SHF_EXECINSTR def get_string(self, addr, max_data_size): i = self.__get_data_section_idx(addr) if i == -1: return "" s = self.__data_sections[i] data = self.__data_sections_content[i] off = addr - s.header.sh_addr txt = ['"'] i = 0 while i < max_data_size and \ off < s.header.sh_size: c = data[off] if c == 0: break txt.append(lib.utils.get_char(c)) off += 1 i += 1 if c != 0 and off != s.header.sh_size: txt.append("...") return ''.join(txt) + '"' def get_arch(self): return self.arch_lookup.get(self.elf.get_machine_arch(), None), \ self.arch_mode_lookup.get(self.elf.get_machine_arch(), None) def get_arch_string(self): return self.elf.get_machine_arch() def get_entry_point(self): return self.elf.header['e_entry'] def iter_sections(self): for s in self.elf.iter_sections(): start = s.header.sh_addr end = start + s.header.sh_size if s.name != b"": yield (s.name.decode(), start, end)
class ELFExecutable(BaseExecutable): def __init__(self, file_path): super(ELFExecutable, self).__init__(file_path) self.helper = ELFFile(self.binary) self.architecture = self._identify_arch() if self.architecture is None: raise Exception('Architecture is not recognized') logging.debug('Initialized {} {} with file \'{}\''.format(self.architecture, type(self).__name__, file_path)) self.pack_endianness = '<' if self.helper.little_endian else '>' self.address_pack_type = 'I' if self.helper.elfclass == 32 else 'Q' self.sections = [section_from_elf_section(s) for s in self.helper.iter_sections()] self.executable_segment = [s for s in self.helper.iter_segments() if s['p_type'] == 'PT_LOAD' and s['p_flags'] & 0x1][0] dyn = self.helper.get_section_by_name('.dynamic') if dyn: self.libraries = [t.needed for t in dyn.iter_tags() if t['d_tag'] == 'DT_NEEDED'] self.next_injection_offset = None self.next_injection_vaddr = None def _identify_arch(self): machine = self.helper.get_machine_arch() if machine == 'x86': return ARCHITECTURE.X86 elif machine == 'x64': return ARCHITECTURE.X86_64 elif machine == 'ARM': return ARCHITECTURE.ARM elif machine == 'AArch64': return ARCHITECTURE.ARM_64 else: return None def entry_point(self): return self.helper['e_entry'] def executable_segment_vaddr(self): return self.executable_segment['p_vaddr'] def executable_segment_size(self): # TODO: Maybe limit this because we use this as part of our injection method? return self.executable_segment['p_memsz'] def iter_string_sections(self): STRING_SECTIONS = ['.rodata', '.data', '.bss'] for s in self.sections: if s.name in STRING_SECTIONS: yield s def _extract_symbol_table(self): # Add in symbols from the PLT/rela.plt # .rela.plt contains indexes to reference both .dynsym (symbol names) and .plt (jumps to GOT) if self.is_64_bit(): reloc_section = self.helper.get_section_by_name('.rela.plt') else: reloc_section = self.helper.get_section_by_name('.rel.plt') if reloc_section: dynsym = self.helper.get_section(reloc_section['sh_link']) # .dynsym if isinstance(dynsym, SymbolTableSection): plt = self.helper.get_section_by_name('.plt') for idx, reloc in enumerate(reloc_section.iter_relocations()): # Get the symbol's name from dynsym symbol_name = dynsym.get_symbol(reloc['r_info_sym']).name # The address of this function in the PLT is the base PLT offset + the index of the relocation. # However, since there is the extra "trampoline" entity at the top of the PLT, we need to add one to the # index to account for it. # While sh_entsize is sometimes defined, it appears to be incorrect in some cases so we just ignore that # and calculate it based off of the total size / num_relocations (plus the trampoline entity) entsize = (plt['sh_size'] / (reloc_section.num_relocations() + 1)) plt_addr = plt['sh_addr'] + ((idx+1) * entsize) logging.debug('Directly adding PLT function {} at vaddr {}'.format(symbol_name, hex(plt_addr))) f = Function(plt_addr, entsize, symbol_name + '@PLT', self, type=Function.DYNAMIC_FUNC) self.functions[plt_addr] = f else: logging.debug('Relocation section had sh_link to {}. Not parsing symbols...'.format(dynsym)) # Some things in the symtab have st_size = 0 which confuses analysis later on. To solve this, we keep track of # where each address is in the `function_vaddrs` set and go back after all symbols have been iterated to compute # size by taking the difference between the current address and the next recorded address. # We do this for each executable section so that the produced functions cannot span multiple sections. for section in self.helper.iter_sections(): if self.executable_segment.section_in_segment(section): name_for_addr = {} function_vaddrs = set([section['sh_addr'] + section['sh_size']]) symbol_table = self.helper.get_section_by_name('.symtab') if symbol_table: for symbol in symbol_table.iter_symbols(): if symbol['st_info']['type'] == 'STT_FUNC' and symbol['st_shndx'] != 'SHN_UNDEF': if section['sh_addr'] <= symbol['st_value'] < section['sh_addr'] + section['sh_size']: name_for_addr[symbol['st_value']] = symbol.name function_vaddrs.add(symbol['st_value']) if symbol['st_size']: logging.debug('Eagerly adding function {} from .symtab at vaddr {} with size {}' .format(symbol.name, hex(symbol['st_value']), hex(symbol['st_size']))) f = Function(symbol['st_value'], symbol['st_size'], symbol.name, self) self.functions[symbol['st_value']] = f function_vaddrs = sorted(list(function_vaddrs)) for cur_addr, next_addr in zip(function_vaddrs[:-1], function_vaddrs[1:]): # If st_size was set, we already added the function above, so don't add it again. if cur_addr not in self.functions: func_name = name_for_addr[cur_addr] size = next_addr - cur_addr logging.debug('Lazily adding function {} from .symtab at vaddr {} with size {}' .format(func_name, hex(cur_addr), hex(size))) f = Function(cur_addr, next_addr - cur_addr, name_for_addr[cur_addr], self, type=Function.DYNAMIC_FUNC) self.functions[cur_addr] = f # TODO: Automatically find and label main from call to libc_start_main def prepare_for_injection(self): """ Derived from http://vxheavens.com/lib/vsc01.html """ modified = StringIO(self.binary.getvalue()) # Add INJECTION_SIZE to the section header list offset to make room for our injected code elf_hdr = self.helper.header.copy() elf_hdr.e_shoff += INJECTION_SIZE logging.debug('Changing e_shoff to {}'.format(elf_hdr.e_shoff)) modified.seek(0) modified.write(self.helper.structs.Elf_Ehdr.build(elf_hdr)) # Find the main RX LOAD segment and also adjust other segment offsets along the way executable_segment = None for segment_idx, segment in enumerate(self.helper.iter_segments()): segment_hdr = segment.header.copy() segment_hdr_offset = self.helper._segment_offset(segment_idx) if executable_segment is not None: # Already past the executable segment, so just update the offset if needed (i.e. don't update things # that come before the expanded section) if segment_hdr.p_offset > last_exec_section['sh_offset']: segment_hdr.p_offset += INJECTION_SIZE elif segment['p_type'] == 'PT_LOAD' and segment['p_flags'] & P_FLAGS.PF_X: # Found the executable LOAD segment. # Make room for our injected code. logging.debug('Found executable LOAD segment at index {}'.format(segment_idx)) executable_segment = segment last_exec_section_idx = max([idx for idx in range(self.helper.num_sections()) if executable_segment.section_in_segment(self.helper.get_section(idx))]) last_exec_section = self.helper.get_section(last_exec_section_idx) segment_hdr.p_filesz += INJECTION_SIZE segment_hdr.p_memsz += INJECTION_SIZE logging.debug('Rewriting segment filesize and memsize to {} and {}'.format( segment_hdr.p_filesz, segment_hdr.p_memsz) ) modified.seek(segment_hdr_offset) modified.write(self.helper.structs.Elf_Phdr.build(segment_hdr)) if executable_segment is None: logging.error("Could not locate an executable LOAD segment. Cannot continue injection.") return False logging.debug('Last section in executable LOAD segment is at index {} ({})'.format(last_exec_section_idx, last_exec_section.name)) self.next_injection_offset = last_exec_section['sh_offset'] + last_exec_section['sh_size'] self.next_injection_vaddr = last_exec_section['sh_addr'] + last_exec_section['sh_size'] # Update sh_size for the section we grew section_header_offset = self.helper._section_offset(last_exec_section_idx) section_header = last_exec_section.header.copy() section_header.sh_size += INJECTION_SIZE modified.seek(section_header_offset) modified.write(self.helper.structs.Elf_Shdr.build(section_header)) # Update sh_offset for each section past the last section in the executable segment for section_idx in range(last_exec_section_idx + 1, self.helper.num_sections()): section_header_offset = self.helper._section_offset(section_idx) section_header = self.helper.get_section(section_idx).header.copy() section_header.sh_offset += INJECTION_SIZE logging.debug('Rewriting section {}\'s offset to {}'.format(section_idx, section_header.sh_offset)) modified.seek(section_header_offset) modified.write(self.helper.structs.Elf_Shdr.build(section_header)) # TODO: Architecture-specific padding # Should be something that won't immediately crash, but can be caught (e.g. SIGTRAP on x86) modified = StringIO(modified.getvalue()[:self.next_injection_offset] + '\xCC'*INJECTION_SIZE + modified.getvalue()[self.next_injection_offset:]) self.binary = modified self.helper = ELFFile(self.binary) return True def inject(self, asm, update_entry=False): if self.next_injection_offset is None or self.next_injection_vaddr is None: logging.warning( 'prepare_for_injection() was not called before inject(). This may cause unexpected behavior') self.prepare_for_injection() for segment in self.helper.iter_segments(): if segment['p_type'] == 'PT_LOAD' and segment['p_flags'] & P_FLAGS.PF_X: injection_section_idx = max(i for i in range(self.helper.num_sections()) if segment.section_in_segment(self.helper.get_section(i))) break injection_section = self.helper.get_section(injection_section_idx) # If we haven't injected code before or need to expand the section again for this injection, go ahead and # shift stuff around. if injection_section['sh_size'] < INJECTION_SIZE or \ injection_section['sh_offset'] + injection_section['sh_size'] < self.next_injection_offset + len(asm): logging.debug('Automatically expanding injection section to accommodate for assembly') # NOTE: Could this change the destination address for the code that gets injected? self.prepare_for_injection() elif self.next_injection_offset == 0: used_code_len = len(injection_section.data().rstrip('\xCC')) self.next_injection_offset = injection_section['sh_offset'] + used_code_len self.next_injection_vaddr = injection_section['sh_addr'] + used_code_len # "Inject" the assembly logging.debug('Injecting {} bytes of assembly at offset {}'.format(len(asm), self.next_injection_offset)) self.binary.seek(self.next_injection_offset) self.binary.write(asm) # Update e_entry if requested if update_entry: logging.debug('Rewriting ELF entry address to {}'.format(self.next_injection_vaddr)) elf_hdr = self.helper.header elf_hdr.e_entry = self.next_injection_vaddr self.binary.seek(0) self.binary.write(self.helper.structs.Elf_Ehdr.build(elf_hdr)) self.helper = ELFFile(self.binary) self.next_injection_vaddr += len(asm) self.next_injection_offset += len(asm) return self.next_injection_vaddr - len(asm)
class ELF(object): def __init__(self, elf, name=''): """ This constructor is overloaded and can accept either a string as the parameter 'elf', or a stream to ELF data. 'name' is only used when generating CapDL from the ELF file. """ if isinstance(elf, str): f = open(elf, 'rb') else: f = elf self._elf = ELFFile(f) self.name = name self.symtab = {} def get_entry_point(self): return self._elf['e_entry'] def _get_symbol(self, symbol): if symbol in self.symtab: return self.symtab[symbol] table = self._elf.get_section_by_name('.symtab') if not table: # This ELF file has been stripped. raise Exception('No symbol table available') for s in table.iter_symbols(): self.symtab[s.name] = s if s.name == symbol: return s return None def get_symbol_vaddr(self, symbol): sym = self._get_symbol(symbol) if sym: return sym['st_value'] return None def get_symbol_size(self, symbol): sym = self._get_symbol(symbol) if sym: return sym['st_size'] return None def _safe_name(self): """ Replace characters that the CapDL tools parse differently. """ return re.sub(r'[^A-Za-z0-9]', '_', self.name) def get_arch(self): return self._elf.get_machine_arch() def get_pages(self, infer_asid=True, pd=None): """ Returns a dictionary of pages keyed on base virtual address, that are required to ELF load this file. Each dictionary entry is a dictionary containing booleans 'read', 'write' and 'execute' for the permissions of the page. """ pages = PageCollection(self._safe_name(), self.get_arch(), infer_asid, pd) for seg in self._elf.iter_segments(): if not seg['p_type'] == 'PT_LOAD': continue if seg['p_memsz'] == 0: continue vaddr = round_down(int(seg['p_vaddr'])) r = (seg['p_flags'] & P_FLAGS.PF_R) > 0 w = (seg['p_flags'] & P_FLAGS.PF_W) > 0 x = (seg['p_flags'] & P_FLAGS.PF_X) > 0 map(lambda y: pages.add_page(y, r, w, x), xrange(vaddr, int(seg['p_vaddr']) + int(seg['p_memsz']), PAGE_SIZE)) return pages def get_spec(self, infer_tcb=True, infer_asid=True, pd=None): """ Return a CapDL spec with as much information as can be derived from the ELF file in isolation. """ pages = self.get_pages(infer_asid, pd) spec = pages.get_spec() if infer_tcb: # Create a single TCB. tcb = TCB('tcb_%s' % self._safe_name(), ip=self.get_entry_point(), \ elf=self.name) spec.add_object(tcb) tcb['vspace'] = pages.get_page_directory()[1] return spec def __repr__(self): return str(self._elf)
class ELF: def __init__(self, classbinary, filename): fd = open(filename, "rb") self.elf = ELFFile(fd) self.classbinary = classbinary self.__data_sections = [] self.__data_sections_data = [] self.arch_lookup = { "x86": lib.fileformat.binary.ARCH_x86, "x64": lib.fileformat.binary.ARCH_x64 } def load_static_sym(self): symtab = self.elf.get_section_by_name(b".symtab") if symtab is None: return for sy in symtab.iter_symbols(): if sy.entry.st_value != 0 and sy.name != b"": self.classbinary.reverse_symbols[sy.entry.st_value] = sy.name.decode() self.classbinary.symbols[sy.name.decode()] = sy.entry.st_value # print("%x\t%s" % (sy.entry.st_value, sy.name.decode())) def load_dyn_sym(self): rel = (self.elf.get_section_by_name(b".rela.plt") or self.elf.get_section_by_name(b".rel.plt")) dyn = self.elf.get_section_by_name(b".dynsym") relitems = list(rel.iter_relocations()) dynsym = list(dyn.iter_symbols()) plt = self.elf.get_section_by_name(b".plt") plt_entry_size = 16 # TODO off = plt.header.sh_addr + plt_entry_size k = 0 while off < plt.header.sh_addr + plt.header.sh_size : idx = relitems[k].entry.r_info_sym name = dynsym[idx].name.decode() self.classbinary.reverse_symbols[off] = name + "@plt" self.classbinary.symbols[name + "@plt"] = off off += plt_entry_size k += 1 def load_data_sections(self): for s in self.elf.iter_sections(): if self.__section_is_data(s): self.__data_sections.append(s) self.__data_sections_data.append(s.data()) def __get_data_section_idx(self, addr): for i, s in enumerate(self.__data_sections): start = s.header.sh_addr end = start + s.header.sh_size if start <= addr < end: return i return -1 def __section_is_data(self, s): mask = SH_FLAGS.SHF_WRITE | SH_FLAGS.SHF_ALLOC return s.header.sh_flags & mask and not self.__section_is_exec(s) def is_address(self, imm): for s in self.elf.iter_sections(): start = s.header.sh_addr if start == 0: continue end = start + s.header.sh_size if start <= imm < end: return s.name.decode(), self.__section_is_data(s) return None, False def __find_section(self, addr): for s in self.elf.iter_sections(): start = s.header.sh_addr end = start + s.header.sh_size if start <= addr < end: return s return None def get_section(self, addr): s = self.__find_section(addr) flags = { "exec": self.__section_is_exec(s) } return (s.data(), s.header.sh_addr, flags) def __section_is_exec(self, s): return s.header.sh_flags & SH_FLAGS.SHF_EXECINSTR def get_string(self, addr): i = self.__get_data_section_idx(addr) if i == -1: return "" s = self.__data_sections[i] data = self.__data_sections_data[i] off = addr - s.header.sh_addr txt = ['"'] i = 0 while i < lib.fileformat.binary.MAX_STRING_DATA and \ off < s.header.sh_size: c = data[off] if c == 0: break txt.append(lib.utils.get_char(c)) off += 1 i += 1 if c != 0 and off != s.header.sh_size: txt.append("...") return ''.join(txt) + '"' def get_arch(self): return self.arch_lookup.get(self.elf.get_machine_arch(), \ lib.fileformat.binary.ARCH_INVALID) def get_entry_point(self): return self.elf.header['e_entry']
class ELF(Binary): def __init__(self, db, filename): Binary.__init__(self) fd = open(filename, "rb") self.elf = ELFFile(fd) self.db = db self.__parsed_reloc_tables = set() self.dtags = {} self.jmprel = [] self.dynamic_seg = None self.set_arch_name() if self.arch == "MIPS32": self.dynamic_tag_translation = { 0x70000001: "DT_MIPS_RLD_VERSION", 0x70000005: "DT_MIPS_FLAGS", 0x70000006: "DT_MIPS_BASE_ADDRESS", 0x7000000a: "DT_MIPS_LOCAL_GOTNO", 0x70000011: "DT_MIPS_SYMTABNO", 0x70000012: "DT_MIPS_UNREFEXTNO", 0x70000013: "DT_MIPS_GOTSYM", 0x70000016: "DT_MIPS_RLD_MAP", 0x70000032: "DT_MIPS_PLTGOT" } elif self.arch == "MIPS64": self.dynamic_tag_translation = { 0x70000001: "DT_MIPS_RLD_VERSION", 0x70000005: "DT_MIPS_FLAGS", 0x70000006: "DT_MIPS_BASE_ADDRESS", 0x7000000a: "DT_MIPS_LOCAL_GOTNO", 0x70000011: "DT_MIPS_SYMTABNO", 0x70000012: "DT_MIPS_UNREFEXTNO", 0x70000013: "DT_MIPS_GOTSYM", 0x70000016: "DT_MIPS_RLD_MAP" } else: self.dynamic_tag_translation = {} reloc = 0 # Load sections for s in self.elf.iter_sections(): if not s.name: continue # Keep only sections R|W|X # TODO : is it sufficiant ? if s.header.sh_flags & 0xf == 0: continue name = s.name.decode() start = s.header.sh_addr if start == 0: start = reloc reloc += s.header.sh_size data = s.data() self.add_section( start, s.name.decode(), s.header.sh_size, len(data), self.__section_is_exec(s), self.__section_is_data(s), data) # Load segments rename_counter = 1 seen = set() for seg in self.elf.iter_segments(): if seg.header.p_type == "PT_DYNAMIC": self.dynamic_seg = seg if seg.header.p_type != "PT_LOAD": continue name = seg.header.p_type if name in seen: name += "_%d" % rename_counter rename_counter += 1 seen.add(name) start = seg.header.p_vaddr bisect.insort_left(self._sorted_segments, start) is_data = self.__segment_is_data(seg) is_exec = self.__segment_is_exec(seg) data = seg.data() self._abs_segments[start] = SegmentAbs( name, start, seg.header.p_memsz, len(data), is_exec, is_data, data, seg.header.p_offset, not self.elf.little_endian) # No section headers, we add segments in sections if len(self._abs_sections) == 0: self._abs_sections = self._abs_segments self._sorted_sections = self._sorted_segments def read_addr_at(self, ad): seg = self.get_segment(ad) if self.wordsize == 4: return seg.read_dword(ad) else: return seg.read_qword(ad) def __translate_dynamic_tag(self, tag): if isinstance(tag, int): return self.dynamic_tag_translation[tag] return tag def __get_offset(self, ad): seg = self.get_segment(ad) return seg.file_offset + ad - seg.start def load_dyn_sym(self): if self.dynamic_seg is None: return self.dtags = {} for tag in self.dynamic_seg.iter_tags(): # Create a dictionary, mapping DT_* strings to their values tagstr = self.__translate_dynamic_tag(tag.entry.d_tag) self.dtags[tagstr] = tag.entry.d_val # None of the following things make sense without a string table if "DT_STRTAB" not in self.dtags: return # To handle binaries without section headers, we need to hack around # pyreadelf's assumptions make our own string table fakestrtabheader = { "sh_offset": self.__get_offset(self.dtags["DT_STRTAB"]), } strtab = StringTableSection( fakestrtabheader, "strtab_plasma", self.elf.stream) # ... # Here in CLE was checked the DT_SONAME # ... # None of the following structures can be used without a symbol table if "DT_SYMTAB" not in self.dtags or "DT_SYMENT" not in self.dtags: return # Construct our own symbol table to hack around pyreadelf # assuming section headers are around fakesymtabheader = { "sh_offset": self.__get_offset(self.dtags["DT_SYMTAB"]), "sh_entsize": self.dtags["DT_SYMENT"], "sh_size": 0 } # bogus size: no iteration allowed self.dynsym = SymbolTableSection( fakesymtabheader, "symtab_plasma", self.elf.stream, self.elf, strtab) # mips' relocations are absolutely screwed up, handle some of them here. self.__relocate_mips() # perform a lot of checks to figure out what kind of relocation # tables are around rela_type = None if "DT_PLTREL" in self.dtags: if self.dtags["DT_PLTREL"] == 7: rela_type = "RELA" relentsz = self.elf.structs.Elf_Rela.sizeof() elif self.dtags["DT_PLTREL"] == 17: rela_type = "REL" relentsz = self.elf.structs.Elf_Rel.sizeof() else: raise ExcElf("DT_PLTREL is not REL or RELA?") else: if "DT_RELA" in self.dtags: rela_type = "RELA" relentsz = self.elf.structs.Elf_Rela.sizeof() elif "DT_REL" in self.dtags: rela_type = "REL" relentsz = self.elf.structs.Elf_Rel.sizeof() else: return # try to parse relocations out of a table of type DT_REL{,A} if "DT_" + rela_type in self.dtags: reloffset = self.dtags["DT_" + rela_type] relsz = self.dtags["DT_" + rela_type + "SZ"] fakerelheader = { "sh_offset": self.__get_offset(reloffset), "sh_type": "SHT_" + rela_type, "sh_entsize": relentsz, "sh_size": relsz } reloc_sec = RelocationSection( fakerelheader, "reloc_plasma", self.elf.stream, self.elf) self.__register_relocs(reloc_sec) # try to parse relocations out of a table of type DT_JMPREL if "DT_JMPREL" in self.dtags: jmpreloffset = self.dtags["DT_JMPREL"] jmprelsz = self.dtags["DT_PLTRELSZ"] fakejmprelheader = { "sh_offset": self.__get_offset(jmpreloffset), "sh_type": "SHT_" + rela_type, "sh_entsize": relentsz, "sh_size": jmprelsz } jmprel_sec = RelocationSection( fakejmprelheader, "jmprel_plasma", self.elf.stream, self.elf) self.jmprel = self.__register_relocs(jmprel_sec) self.__resolve_plt() def __relocate_mips(self): if 'DT_MIPS_BASE_ADDRESS' not in self.dtags: return # The MIPS GOT is an array of addresses, simple as that. # number of local GOT entries got_local_num = self.dtags['DT_MIPS_LOCAL_GOTNO'] # a.k.a the index of the first global GOT entry # index of first symbol w/ GOT entry symtab_got_idx = self.dtags['DT_MIPS_GOTSYM'] symbol_count = self.dtags['DT_MIPS_SYMTABNO'] gotaddr = self.dtags['DT_PLTGOT'] for i in range(2, got_local_num): symbol = self.dynsym.get_symbol(i) reloc = MipsLocalReloc(self, symbol, gotaddr + i * self.wordsize) self.__save_symbol(reloc, reloc.symbol.entry.st_value) for i in range(symbol_count - symtab_got_idx): symbol = self.dynsym.get_symbol(i + symtab_got_idx) reloc = MipsGlobalReloc(self, symbol, gotaddr + (i + got_local_num) * self.wordsize) self.__save_symbol(reloc, reloc.symbol.entry.st_value) self.jmprel.append(reloc) def __resolve_plt(self): # For PPC32 and PPC64 the address to save is 'got' if self.arch in ('x86', 'x64'): for rel in self.jmprel: got = rel.addr # 0x6 is the size of the plt's jmpq instruction in x86_64 ad = self.read_addr_at(got) - 6 self.__save_symbol(rel, ad) elif self.arch in ('ARM', 'AARCH64', 'MIPS32', 'MIPS64'): for rel in self.jmprel: got = rel.addr ad = self.read_addr_at(got) self.__save_symbol(rel, ad) def __save_symbol(self, rel, ad): if ad == 0: return name = rel.symbol.name.decode() if name in self.symbols: name = self.rename_sym(name) if rel.is_import: self.imports[ad] = True if self.is_function(rel.symbol): self.db.functions[ad] = None self.reverse_symbols[ad] = name self.symbols[name] = ad def __register_relocs(self, section): if section.header["sh_offset"] in self.__parsed_reloc_tables: return self.__parsed_reloc_tables.add(section.header["sh_offset"]) relocs = [] for r in section.iter_relocations(): # MIPS64 is just plain old f****d up # https://www.sourceware.org/ml/libc-alpha/2003-03/msg00153.html if self.arch == "MIPS64": # Little endian addionally needs one of its fields reversed... WHY if self.elf.little_endian: r.entry.r_info_sym = r.entry.r_info & 0xFFFFFFFF r.entry.r_info = struct.unpack(">Q", struct.pack("<Q", r.entry.r_info))[0] type_1 = r.entry.r_info & 0xFF type_2 = r.entry.r_info >> 8 & 0xFF type_3 = r.entry.r_info >> 16 & 0xFF extra_sym = r.entry.r_info >> 24 & 0xFF if extra_sym != 0: die("r_info_extra_sym is nonzero??? PLEASE SEND HELP") sym = self.dynsym.get_symbol(r.entry.r_info_sym) if type_1 != 0: r.entry.r_info_type = type_1 reloc = self._make_reloc(r, sym) if reloc is not None: relocs.append(reloc) self.__save_symbol(reloc, reloc.symbol.entry.st_value) if type_2 != 0: r.entry.r_info_type = type_2 reloc = self._make_reloc(r, sym) if reloc is not None: relocs.append(reloc) self.__save_symbol(reloc, reloc.symbol.entry.st_value) if type_3 != 0: r.entry.r_info_type = type_3 reloc = self._make_reloc(r, sym) if reloc is not None: relocs.append(reloc) self.__save_symbol(reloc, reloc.symbol.entry.st_value) else: if "sh_link" in section.header: symtab = self.reader.get_section(section.header["sh_link"]) sym = symtab.get_symbol(r.entry.r_info_sym) else: sym = self.dynsym.get_symbol(r.entry.r_info_sym) reloc = self._make_reloc(r, sym) if reloc is not None: relocs.append(reloc) self.__save_symbol(reloc, reloc.symbol.entry.st_value) return relocs def _make_reloc(self, reloc_sec, symbol): addend = reloc_sec.entry.r_addend if reloc_sec.is_RELA() else None RelocClass = get_relocation(self.arch, reloc_sec.entry.r_info_type) if RelocClass is None: return None return RelocClass(self, symbol, reloc_sec.entry.r_offset, addend) def load_static_sym(self): symtab = self.elf.get_section_by_name(b".symtab") if symtab is None: return dont_save = [b"$a", b"$t", b"$d"] is_arm = self.arch == "ARM" for sy in symtab.iter_symbols(): if is_arm and sy.name in dont_save: continue ad = sy.entry.st_value if ad != 0 and sy.name != b"": name = sy.name.decode() if self.is_address(ad): if name in self.symbols: name = self.rename_sym(name) self.reverse_symbols[ad] = name self.symbols[name] = ad if self.is_function(sy): self.db.functions[ad] = None def __section_is_data(self, s): mask = SH_FLAGS.SHF_WRITE | SH_FLAGS.SHF_ALLOC return s.header.sh_flags & mask and not self.__section_is_exec(s) def __section_is_exec(self, s): return s.header.sh_flags & SH_FLAGS.SHF_EXECINSTR def __segment_is_data(self, s): mask = P_FLAGS.PF_W | P_FLAGS.PF_R return s.header.p_flags & mask and not self.__segment_is_exec(s) def __segment_is_exec(self, s): return s.header.p_flags & P_FLAGS.PF_X def is_function(self, sy): return sy.entry.st_info.type == "STT_FUNC" def set_arch_name(self): arch = self.elf.get_machine_arch() if arch == "MIPS": if self.elf.elfclass == 32: arch += "32" elif self.elf.elfclass == 64: arch += "64" self.arch = arch def is_big_endian(self): return not self.elf.little_endian def get_entry_point(self): return self.elf.header['e_entry']
class ReadElf(object): """ display_* methods are used to emit output into the output stream """ def __init__(self, file, output): """ file: stream object with the ELF file to read output: output stream to write to """ self.elffile = ELFFile(file) self.output = output # Lazily initialized if a debug dump is requested self._dwarfinfo = None def display_file_header(self): """ Display the ELF file header """ self._emitline('ELF Header:') self._emit(' Magic: ') self._emitline(' '.join('%2.2x' % byte2int(b) for b in self.elffile.e_ident_raw)) header = self.elffile.header e_ident = header['e_ident'] self._emitline(' Class: %s' % describe_ei_class(e_ident['EI_CLASS'])) self._emitline(' Data: %s' % describe_ei_data(e_ident['EI_DATA'])) self._emitline(' Version: %s' % describe_ei_version(e_ident['EI_VERSION'])) self._emitline(' OS/ABI: %s' % describe_ei_osabi(e_ident['EI_OSABI'])) self._emitline(' ABI Version: %d' % e_ident['EI_ABIVERSION']) self._emitline(' Type: %s' % describe_e_type(header['e_type'])) self._emitline(' Machine: %s' % describe_e_machine(header['e_machine'])) self._emitline(' Version: %s' % describe_e_version_numeric(header['e_version'])) self._emitline(' Entry point address: %s' % self._format_hex(header['e_entry'])) self._emit(' Start of program headers: %s' % header['e_phoff']) self._emitline(' (bytes into file)') self._emit(' Start of section headers: %s' % header['e_shoff']) self._emitline(' (bytes into file)') self._emitline(' Flags: %s' % self._format_hex(header['e_flags'])) self._emitline(' Size of this header: %s (bytes)' % header['e_ehsize']) self._emitline(' Size of program headers: %s (bytes)' % header['e_phentsize']) self._emitline(' Number of program headers: %s' % header['e_phnum']) self._emitline(' Size of section headers: %s (bytes)' % header['e_shentsize']) self._emitline(' Number of section headers: %s' % header['e_shnum']) self._emitline(' Section header string table index: %s' % header['e_shstrndx']) def display_program_headers(self, show_heading=True): """ Display the ELF program headers. If show_heading is True, displays the heading for this information (Elf file type is...) """ self._emitline() if self.elffile.num_segments() == 0: self._emitline('There are no program headers in this file.') return elfheader = self.elffile.header if show_heading: self._emitline('Elf file type is %s' % describe_e_type(elfheader['e_type'])) self._emitline('Entry point is %s' % self._format_hex(elfheader['e_entry'])) # readelf weirness - why isn't e_phoff printed as hex? (for section # headers, it is...) self._emitline('There are %s program headers, starting at offset %s' % ( elfheader['e_phnum'], elfheader['e_phoff'])) self._emitline() self._emitline('Program Headers:') # Now comes the table of program headers with their attributes. Note # that due to different formatting constraints of 32-bit and 64-bit # addresses, there are some conditions on elfclass here. # # First comes the table heading # if self.elffile.elfclass == 32: self._emitline(' Type Offset VirtAddr PhysAddr FileSiz MemSiz Flg Align') else: self._emitline(' Type Offset VirtAddr PhysAddr') self._emitline(' FileSiz MemSiz Flags Align') # Now the entries # for segment in self.elffile.iter_segments(): self._emit(' %-14s ' % describe_p_type(segment['p_type'])) if self.elffile.elfclass == 32: self._emitline('%s %s %s %s %s %-3s %s' % ( self._format_hex(segment['p_offset'], fieldsize=6), self._format_hex(segment['p_vaddr'], fullhex=True), self._format_hex(segment['p_paddr'], fullhex=True), self._format_hex(segment['p_filesz'], fieldsize=5), self._format_hex(segment['p_memsz'], fieldsize=5), describe_p_flags(segment['p_flags']), self._format_hex(segment['p_align']))) else: # 64 self._emitline('%s %s %s' % ( self._format_hex(segment['p_offset'], fullhex=True), self._format_hex(segment['p_vaddr'], fullhex=True), self._format_hex(segment['p_paddr'], fullhex=True))) self._emitline(' %s %s %-3s %s' % ( self._format_hex(segment['p_filesz'], fullhex=True), self._format_hex(segment['p_memsz'], fullhex=True), describe_p_flags(segment['p_flags']), # lead0x set to False for p_align, to mimic readelf. # No idea why the difference from 32-bit mode :-| self._format_hex(segment['p_align'], lead0x=False))) if isinstance(segment, InterpSegment): self._emitline(' [Requesting program interpreter: %s]' % bytes2str(segment.get_interp_name())) # Sections to segments mapping # if self.elffile.num_sections() == 0: # No sections? We're done return self._emitline('\n Section to Segment mapping:') self._emitline(' Segment Sections...') for nseg, segment in enumerate(self.elffile.iter_segments()): self._emit(' %2.2d ' % nseg) for section in self.elffile.iter_sections(): if ( not section.is_null() and segment.section_in_segment(section)): self._emit('%s ' % bytes2str(section.name)) self._emitline('') def display_section_headers(self, show_heading=True): """ Display the ELF section headers """ elfheader = self.elffile.header if show_heading: self._emitline('There are %s section headers, starting at offset %s' % ( elfheader['e_shnum'], self._format_hex(elfheader['e_shoff']))) self._emitline('\nSection Header%s:' % ( 's' if elfheader['e_shnum'] > 1 else '')) # Different formatting constraints of 32-bit and 64-bit addresses # if self.elffile.elfclass == 32: self._emitline(' [Nr] Name Type Addr Off Size ES Flg Lk Inf Al') else: self._emitline(' [Nr] Name Type Address Offset') self._emitline(' Size EntSize Flags Link Info Align') # Now the entries # for nsec, section in enumerate(self.elffile.iter_sections()): self._emit(' [%2u] %-17.17s %-15.15s ' % ( nsec, bytes2str(section.name), describe_sh_type(section['sh_type']))) if self.elffile.elfclass == 32: self._emitline('%s %s %s %s %3s %2s %3s %2s' % ( self._format_hex(section['sh_addr'], fieldsize=8, lead0x=False), self._format_hex(section['sh_offset'], fieldsize=6, lead0x=False), self._format_hex(section['sh_size'], fieldsize=6, lead0x=False), self._format_hex(section['sh_entsize'], fieldsize=2, lead0x=False), describe_sh_flags(section['sh_flags']), section['sh_link'], section['sh_info'], section['sh_addralign'])) else: # 64 self._emitline(' %s %s' % ( self._format_hex(section['sh_addr'], fullhex=True, lead0x=False), self._format_hex(section['sh_offset'], fieldsize=16 if section['sh_offset'] > 0xffffffff else 8, lead0x=False))) self._emitline(' %s %s %3s %2s %3s %s' % ( self._format_hex(section['sh_size'], fullhex=True, lead0x=False), self._format_hex(section['sh_entsize'], fullhex=True, lead0x=False), describe_sh_flags(section['sh_flags']), section['sh_link'], section['sh_info'], section['sh_addralign'])) self._emitline('Key to Flags:') self._emit(' W (write), A (alloc), X (execute), M (merge), S (strings)') if self.elffile['e_machine'] in ('EM_X86_64', 'EM_L10M'): self._emitline(', l (large)') else: self._emitline() self._emitline(' I (info), L (link order), G (group), T (TLS), E (exclude), x (unknown)') self._emitline(' O (extra OS processing required) o (OS specific), p (processor specific)') def display_symbol_tables(self): """ Display the symbol tables contained in the file """ for section in self.elffile.iter_sections(): if not isinstance(section, SymbolTableSection): continue if section['sh_entsize'] == 0: self._emitline("\nSymbol table '%s' has a sh_entsize of zero!" % ( bytes2str(section.name))) continue self._emitline("\nSymbol table '%s' contains %s entries:" % ( bytes2str(section.name), section.num_symbols())) if self.elffile.elfclass == 32: self._emitline(' Num: Value Size Type Bind Vis Ndx Name') else: # 64 self._emitline(' Num: Value Size Type Bind Vis Ndx Name') for nsym, symbol in enumerate(section.iter_symbols()): # symbol names are truncated to 25 chars, similarly to readelf self._emitline('%6d: %s %5d %-7s %-6s %-7s %4s %.25s' % ( nsym, self._format_hex(symbol['st_value'], fullhex=True, lead0x=False), symbol['st_size'], describe_symbol_type(symbol['st_info']['type']), describe_symbol_bind(symbol['st_info']['bind']), describe_symbol_visibility(symbol['st_other']['visibility']), describe_symbol_shndx(symbol['st_shndx']), bytes2str(symbol.name))) def display_relocations(self): """ Display the relocations contained in the file """ has_relocation_sections = False for section in self.elffile.iter_sections(): if not isinstance(section, RelocationSection): continue has_relocation_sections = True self._emitline("\nRelocation section '%s' at offset %s contains %s entries:" % ( bytes2str(section.name), self._format_hex(section['sh_offset']), section.num_relocations())) if section.is_RELA(): self._emitline(" Offset Info Type Sym. Value Sym. Name + Addend") else: self._emitline(" Offset Info Type Sym.Value Sym. Name") # The symbol table section pointed to in sh_link symtable = self.elffile.get_section(section['sh_link']) for rel in section.iter_relocations(): hexwidth = 8 if self.elffile.elfclass == 32 else 12 self._emit('%s %s %-17.17s' % ( self._format_hex(rel['r_offset'], fieldsize=hexwidth, lead0x=False), self._format_hex(rel['r_info'], fieldsize=hexwidth, lead0x=False), describe_reloc_type( rel['r_info_type'], self.elffile))) if rel['r_info_sym'] == 0: self._emitline() continue symbol = symtable.get_symbol(rel['r_info_sym']) # Some symbols have zero 'st_name', so instead what's used is # the name of the section they point at if symbol['st_name'] == 0: symsec = self.elffile.get_section(symbol['st_shndx']) symbol_name = symsec.name else: symbol_name = symbol.name self._emit(' %s %s%22.22s' % ( self._format_hex( symbol['st_value'], fullhex=True, lead0x=False), ' ' if self.elffile.elfclass == 32 else '', bytes2str(symbol_name))) if section.is_RELA(): self._emit(' %s %x' % ( '+' if rel['r_addend'] >= 0 else '-', abs(rel['r_addend']))) self._emitline() if not has_relocation_sections: self._emitline('\nThere are no relocations in this file.') def display_hex_dump(self, section_spec): """ Display a hex dump of a section. section_spec is either a section number or a name. """ section = self._section_from_spec(section_spec) if section is None: self._emitline("Section '%s' does not exist in the file!" % ( section_spec)) return self._emitline("\nHex dump of section '%s':" % bytes2str(section.name)) self._note_relocs_for_section(section) addr = section['sh_addr'] data = section.data() dataptr = 0 while dataptr < len(data): bytesleft = len(data) - dataptr # chunks of 16 bytes per line linebytes = 16 if bytesleft > 16 else bytesleft self._emit(' %s ' % self._format_hex(addr, fieldsize=8)) for i in range(16): if i < linebytes: self._emit('%2.2x' % byte2int(data[dataptr + i])) else: self._emit(' ') if i % 4 == 3: self._emit(' ') for i in range(linebytes): c = data[dataptr + i : dataptr + i + 1] if byte2int(c[0]) >= 32 and byte2int(c[0]) < 0x7f: self._emit(bytes2str(c)) else: self._emit(bytes2str(b'.')) self._emitline() addr += linebytes dataptr += linebytes self._emitline() def display_string_dump(self, section_spec): """ Display a strings dump of a section. section_spec is either a section number or a name. """ section = self._section_from_spec(section_spec) if section is None: self._emitline("Section '%s' does not exist in the file!" % ( section_spec)) return self._emitline("\nString dump of section '%s':" % bytes2str(section.name)) found = False data = section.data() dataptr = 0 while dataptr < len(data): while ( dataptr < len(data) and not (32 <= byte2int(data[dataptr]) <= 127)): dataptr += 1 if dataptr >= len(data): break endptr = dataptr while endptr < len(data) and byte2int(data[endptr]) != 0: endptr += 1 found = True self._emitline(' [%6x] %s' % ( dataptr, bytes2str(data[dataptr:endptr]))) dataptr = endptr if not found: self._emitline(' No strings found in this section.') else: self._emitline() def display_debug_dump(self, dump_what): """ Dump a DWARF section """ self._init_dwarfinfo() if self._dwarfinfo is None: return set_global_machine_arch(self.elffile.get_machine_arch()) if dump_what == 'info': self._dump_debug_info() elif dump_what == 'decodedline': self._dump_debug_line_programs() elif dump_what == 'frames': self._dump_debug_frames() elif dump_what == 'frames-interp': self._dump_debug_frames_interp() else: self._emitline('debug dump not yet supported for "%s"' % dump_what) def _format_hex(self, addr, fieldsize=None, fullhex=False, lead0x=True): """ Format an address into a hexadecimal string. fieldsize: Size of the hexadecimal field (with leading zeros to fit the address into. For example with fieldsize=8, the format will be %08x If None, the minimal required field size will be used. fullhex: If True, override fieldsize to set it to the maximal size needed for the elfclass lead0x: If True, leading 0x is added """ s = '0x' if lead0x else '' if fullhex: fieldsize = 8 if self.elffile.elfclass == 32 else 16 if fieldsize is None: field = '%x' else: field = '%' + '0%sx' % fieldsize return s + field % addr def _section_from_spec(self, spec): """ Retrieve a section given a "spec" (either number or name). Return None if no such section exists in the file. """ try: num = int(spec) if num < self.elffile.num_sections(): return self.elffile.get_section(num) else: return None except ValueError: # Not a number. Must be a name then return self.elffile.get_section_by_name(str2bytes(spec)) def _note_relocs_for_section(self, section): """ If there are relocation sections pointing to the givne section, emit a note about it. """ for relsec in self.elffile.iter_sections(): if isinstance(relsec, RelocationSection): info_idx = relsec['sh_info'] if self.elffile.get_section(info_idx) == section: self._emitline(' Note: This section has relocations against it, but these have NOT been applied to this dump.') return def _init_dwarfinfo(self): """ Initialize the DWARF info contained in the file and assign it to self._dwarfinfo. Leave self._dwarfinfo at None if no DWARF info was found in the file """ if self._dwarfinfo is not None: return if self.elffile.has_dwarf_info(): self._dwarfinfo = self.elffile.get_dwarf_info() else: self._dwarfinfo = None def _dump_debug_info(self): """ Dump the debugging info section. """ self._emitline('Contents of the .debug_info section:\n') # Offset of the .debug_info section in the stream section_offset = self._dwarfinfo.debug_info_sec.global_offset for cu in self._dwarfinfo.iter_CUs(): self._emitline(' Compilation Unit @ offset %s:' % self._format_hex(cu.cu_offset)) self._emitline(' Length: %s (%s)' % ( self._format_hex(cu['unit_length']), '%s-bit' % cu.dwarf_format())) self._emitline(' Version: %s' % cu['version']), self._emitline(' Abbrev Offset: %s' % cu['debug_abbrev_offset']), self._emitline(' Pointer Size: %s' % cu['address_size']) # The nesting depth of each DIE within the tree of DIEs must be # displayed. To implement this, a counter is incremented each time # the current DIE has children, and decremented when a null die is # encountered. Due to the way the DIE tree is serialized, this will # correctly reflect the nesting depth # die_depth = 0 for die in cu.iter_DIEs(): if die.is_null(): die_depth -= 1 continue self._emitline(' <%s><%x>: Abbrev Number: %s (%s)' % ( die_depth, die.offset, die.abbrev_code, die.tag)) for attr in itervalues(die.attributes): name = attr.name # Unknown attribute values are passed-through as integers if isinstance(name, int): name = 'Unknown AT value: %x' % name self._emitline(' <%2x> %-18s: %s' % ( attr.offset, name, describe_attr_value( attr, die, section_offset))) if die.has_children: die_depth += 1 self._emitline() def _dump_debug_line_programs(self): """ Dump the (decoded) line programs from .debug_line The programs are dumped in the order of the CUs they belong to. """ self._emitline('Decoded dump of debug contents of section .debug_line:\n') for cu in self._dwarfinfo.iter_CUs(): lineprogram = self._dwarfinfo.line_program_for_CU(cu) cu_filename = '' if len(lineprogram['include_directory']) > 0: cu_filename = '%s/%s' % ( bytes2str(lineprogram['include_directory'][0]), bytes2str(lineprogram['file_entry'][0].name)) else: cu_filename = bytes2str(lineprogram['file_entry'][0].name) self._emitline('CU: %s:' % cu_filename) self._emitline('File name Line number Starting address') # Print each state's file, line and address information. For some # instructions other output is needed to be compatible with # readelf. for entry in lineprogram.get_entries(): state = entry.state if state is None: # Special handling for commands that don't set a new state if entry.command == DW_LNS_set_file: file_entry = lineprogram['file_entry'][entry.args[0] - 1] if file_entry.dir_index == 0: # current directory self._emitline('\n./%s:[++]' % ( bytes2str(file_entry.name))) else: self._emitline('\n%s/%s:' % ( bytes2str(lineprogram['include_directory'][file_entry.dir_index - 1]), bytes2str(file_entry.name))) elif entry.command == DW_LNE_define_file: self._emitline('%s:' % ( bytes2str(lineprogram['include_directory'][entry.args[0].dir_index]))) elif not state.end_sequence: # readelf doesn't print the state after end_sequence # instructions. I think it's a bug but to be compatible # I don't print them too. self._emitline('%-35s %11d %18s' % ( bytes2str(lineprogram['file_entry'][state.file - 1].name), state.line, '0' if state.address == 0 else self._format_hex(state.address))) if entry.command == DW_LNS_copy: # Another readelf oddity... self._emitline() def _dump_debug_frames(self): """ Dump the raw frame information from .debug_frame """ if not self._dwarfinfo.has_CFI(): return self._emitline('Contents of the .debug_frame section:') for entry in self._dwarfinfo.CFI_entries(): if isinstance(entry, CIE): self._emitline('\n%08x %08x %08x CIE' % ( entry.offset, entry['length'], entry['CIE_id'])) self._emitline(' Version: %d' % entry['version']) self._emitline(' Augmentation: "%s"' % bytes2str(entry['augmentation'])) self._emitline(' Code alignment factor: %u' % entry['code_alignment_factor']) self._emitline(' Data alignment factor: %d' % entry['data_alignment_factor']) self._emitline(' Return address column: %d' % entry['return_address_register']) self._emitline() else: # FDE self._emitline('\n%08x %08x %08x FDE cie=%08x pc=%08x..%08x' % ( entry.offset, entry['length'], entry['CIE_pointer'], entry.cie.offset, entry['initial_location'], entry['initial_location'] + entry['address_range'])) self._emit(describe_CFI_instructions(entry)) self._emitline() def _dump_debug_frames_interp(self): """ Dump the interpreted (decoded) frame information from .debug_frame """ if not self._dwarfinfo.has_CFI(): return self._emitline('Contents of the .debug_frame section:') for entry in self._dwarfinfo.CFI_entries(): if isinstance(entry, CIE): self._emitline('\n%08x %08x %08x CIE "%s" cf=%d df=%d ra=%d' % ( entry.offset, entry['length'], entry['CIE_id'], bytes2str(entry['augmentation']), entry['code_alignment_factor'], entry['data_alignment_factor'], entry['return_address_register'])) ra_regnum = entry['return_address_register'] else: # FDE self._emitline('\n%08x %08x %08x FDE cie=%08x pc=%08x..%08x' % ( entry.offset, entry['length'], entry['CIE_pointer'], entry.cie.offset, entry['initial_location'], entry['initial_location'] + entry['address_range'])) ra_regnum = entry.cie['return_address_register'] # Print the heading row for the decoded table self._emit(' LOC') self._emit(' ' if entry.structs.address_size == 4 else ' ') self._emit(' CFA ') # Decode the table nad look at the registers it describes. # We build reg_order here to match readelf's order. In particular, # registers are sorted by their number, and the register matching # ra_regnum is always listed last with a special heading. decoded_table = entry.get_decoded() reg_order = sorted(ifilter( lambda r: r != ra_regnum, decoded_table.reg_order)) # Headings for the registers for regnum in reg_order: self._emit('%-6s' % describe_reg_name(regnum)) self._emitline('ra ') # Now include ra_regnum in reg_order to print its values similarly # to the other registers. reg_order.append(ra_regnum) for line in decoded_table.table: self._emit(self._format_hex( line['pc'], fullhex=True, lead0x=False)) self._emit(' %-9s' % describe_CFI_CFA_rule(line['cfa'])) for regnum in reg_order: if regnum in line: s = describe_CFI_register_rule(line[regnum]) else: s = 'u' self._emit('%-6s' % s) self._emitline() self._emitline() def _emit(self, s=''): """ Emit an object to output """ self.output.write(str(s)) def _emitline(self, s=''): """ Emit an object to output, followed by a newline """ self.output.write(str(s) + '\n')
class ReadElf(object): """ display_* methods are used to emit output into the output stream """ def __init__(self, file, output): """ file: stream object with the ELF file to read output: output stream to write to """ self.elffile = ELFFile(file) self.output = output # Lazily initialized if a debug dump is requested self._dwarfinfo = None self._versioninfo = None def display_file_header(self): """ Display the ELF file header """ self._emitline('ELF Header:') self._emit(' Magic: ') self._emitline(' '.join('%2.2x' % byte2int(b) for b in self.elffile.e_ident_raw)) header = self.elffile.header e_ident = header['e_ident'] self._emitline(' Class: %s' % describe_ei_class(e_ident['EI_CLASS'])) self._emitline(' Data: %s' % describe_ei_data(e_ident['EI_DATA'])) self._emitline(' Version: %s' % describe_ei_version(e_ident['EI_VERSION'])) self._emitline(' OS/ABI: %s' % describe_ei_osabi(e_ident['EI_OSABI'])) self._emitline(' ABI Version: %d' % e_ident['EI_ABIVERSION']) self._emitline(' Type: %s' % describe_e_type(header['e_type'])) self._emitline(' Machine: %s' % describe_e_machine(header['e_machine'])) self._emitline(' Version: %s' % describe_e_version_numeric(header['e_version'])) self._emitline(' Entry point address: %s' % self._format_hex(header['e_entry'])) self._emit(' Start of program headers: %s' % header['e_phoff']) self._emitline(' (bytes into file)') self._emit(' Start of section headers: %s' % header['e_shoff']) self._emitline(' (bytes into file)') self._emitline(' Flags: %s%s' % (self._format_hex(header['e_flags']), self.decode_flags(header['e_flags']))) self._emitline(' Size of this header: %s (bytes)' % header['e_ehsize']) self._emitline(' Size of program headers: %s (bytes)' % header['e_phentsize']) self._emitline(' Number of program headers: %s' % header['e_phnum']) self._emitline(' Size of section headers: %s (bytes)' % header['e_shentsize']) self._emitline(' Number of section headers: %s' % header['e_shnum']) self._emitline(' Section header string table index: %s' % header['e_shstrndx']) def decode_flags(self, flags): description = "" if self.elffile['e_machine'] == "EM_ARM": if flags & E_FLAGS.EF_ARM_HASENTRY: description += ", has entry point" version = flags & E_FLAGS.EF_ARM_EABIMASK if version == E_FLAGS.EF_ARM_EABI_VER5: description += ", Version5 EABI" return description def display_program_headers(self, show_heading=True): """ Display the ELF program headers. If show_heading is True, displays the heading for this information (Elf file type is...) """ self._emitline() if self.elffile.num_segments() == 0: self._emitline('There are no program headers in this file.') return elfheader = self.elffile.header if show_heading: self._emitline('Elf file type is %s' % describe_e_type(elfheader['e_type'])) self._emitline('Entry point is %s' % self._format_hex(elfheader['e_entry'])) # readelf weirness - why isn't e_phoff printed as hex? (for section # headers, it is...) self._emitline('There are %s program headers, starting at offset %s' % ( elfheader['e_phnum'], elfheader['e_phoff'])) self._emitline() self._emitline('Program Headers:') # Now comes the table of program headers with their attributes. Note # that due to different formatting constraints of 32-bit and 64-bit # addresses, there are some conditions on elfclass here. # # First comes the table heading # if self.elffile.elfclass == 32: self._emitline(' Type Offset VirtAddr PhysAddr FileSiz MemSiz Flg Align') else: self._emitline(' Type Offset VirtAddr PhysAddr') self._emitline(' FileSiz MemSiz Flags Align') # Now the entries # for segment in self.elffile.iter_segments(): self._emit(' %-14s ' % describe_p_type(segment['p_type'])) if self.elffile.elfclass == 32: self._emitline('%s %s %s %s %s %-3s %s' % ( self._format_hex(segment['p_offset'], fieldsize=6), self._format_hex(segment['p_vaddr'], fullhex=True), self._format_hex(segment['p_paddr'], fullhex=True), self._format_hex(segment['p_filesz'], fieldsize=5), self._format_hex(segment['p_memsz'], fieldsize=5), describe_p_flags(segment['p_flags']), self._format_hex(segment['p_align']))) else: # 64 self._emitline('%s %s %s' % ( self._format_hex(segment['p_offset'], fullhex=True), self._format_hex(segment['p_vaddr'], fullhex=True), self._format_hex(segment['p_paddr'], fullhex=True))) self._emitline(' %s %s %-3s %s' % ( self._format_hex(segment['p_filesz'], fullhex=True), self._format_hex(segment['p_memsz'], fullhex=True), describe_p_flags(segment['p_flags']), # lead0x set to False for p_align, to mimic readelf. # No idea why the difference from 32-bit mode :-| self._format_hex(segment['p_align'], lead0x=False))) if isinstance(segment, InterpSegment): self._emitline(' [Requesting program interpreter: %s]' % bytes2str(segment.get_interp_name())) # Sections to segments mapping # if self.elffile.num_sections() == 0: # No sections? We're done return self._emitline('\n Section to Segment mapping:') self._emitline(' Segment Sections...') for nseg, segment in enumerate(self.elffile.iter_segments()): self._emit(' %2.2d ' % nseg) for section in self.elffile.iter_sections(): if ( not section.is_null() and segment.section_in_segment(section)): self._emit('%s ' % bytes2str(section.name)) self._emitline('') def display_section_headers(self, show_heading=True): """ Display the ELF section headers """ elfheader = self.elffile.header if show_heading: self._emitline('There are %s section headers, starting at offset %s' % ( elfheader['e_shnum'], self._format_hex(elfheader['e_shoff']))) self._emitline('\nSection Header%s:' % ( 's' if elfheader['e_shnum'] > 1 else '')) # Different formatting constraints of 32-bit and 64-bit addresses # if self.elffile.elfclass == 32: self._emitline(' [Nr] Name Type Addr Off Size ES Flg Lk Inf Al') else: self._emitline(' [Nr] Name Type Address Offset') self._emitline(' Size EntSize Flags Link Info Align') # Now the entries # for nsec, section in enumerate(self.elffile.iter_sections()): self._emit(' [%2u] %-17.17s %-15.15s ' % ( nsec, bytes2str(section.name), describe_sh_type(section['sh_type']))) if self.elffile.elfclass == 32: self._emitline('%s %s %s %s %3s %2s %3s %2s' % ( self._format_hex(section['sh_addr'], fieldsize=8, lead0x=False), self._format_hex(section['sh_offset'], fieldsize=6, lead0x=False), self._format_hex(section['sh_size'], fieldsize=6, lead0x=False), self._format_hex(section['sh_entsize'], fieldsize=2, lead0x=False), describe_sh_flags(section['sh_flags']), section['sh_link'], section['sh_info'], section['sh_addralign'])) else: # 64 self._emitline(' %s %s' % ( self._format_hex(section['sh_addr'], fullhex=True, lead0x=False), self._format_hex(section['sh_offset'], fieldsize=16 if section['sh_offset'] > 0xffffffff else 8, lead0x=False))) self._emitline(' %s %s %3s %2s %3s %s' % ( self._format_hex(section['sh_size'], fullhex=True, lead0x=False), self._format_hex(section['sh_entsize'], fullhex=True, lead0x=False), describe_sh_flags(section['sh_flags']), section['sh_link'], section['sh_info'], section['sh_addralign'])) self._emitline('Key to Flags:') self._emit(' W (write), A (alloc), X (execute), M (merge), S (strings)') if self.elffile['e_machine'] in ('EM_X86_64', 'EM_L10M'): self._emitline(', l (large)') else: self._emitline() self._emitline(' I (info), L (link order), G (group), T (TLS), E (exclude), x (unknown)') self._emitline(' O (extra OS processing required) o (OS specific), p (processor specific)') def display_symbol_tables(self): """ Display the symbol tables contained in the file """ self._init_versioninfo() for section in self.elffile.iter_sections(): if not isinstance(section, SymbolTableSection): continue if section['sh_entsize'] == 0: self._emitline("\nSymbol table '%s' has a sh_entsize of zero!" % ( bytes2str(section.name))) continue self._emitline("\nSymbol table '%s' contains %s entries:" % ( bytes2str(section.name), section.num_symbols())) if self.elffile.elfclass == 32: self._emitline(' Num: Value Size Type Bind Vis Ndx Name') else: # 64 self._emitline(' Num: Value Size Type Bind Vis Ndx Name') for nsym, symbol in enumerate(section.iter_symbols()): version_info = '' # readelf doesn't display version info for Solaris versioning if (section['sh_type'] == 'SHT_DYNSYM' and self._versioninfo['type'] == 'GNU'): version = self._symbol_version(nsym) if (version['name'] != bytes2str(symbol.name) and version['index'] not in ('VER_NDX_LOCAL', 'VER_NDX_GLOBAL')): if version['filename']: # external symbol version_info = '@%(name)s (%(index)i)' % version else: # internal symbol if version['hidden']: version_info = '@%(name)s' % version else: version_info = '@@%(name)s' % version # symbol names are truncated to 25 chars, similarly to readelf self._emitline('%6d: %s %5d %-7s %-6s %-7s %4s %.25s%s' % ( nsym, self._format_hex( symbol['st_value'], fullhex=True, lead0x=False), symbol['st_size'], describe_symbol_type(symbol['st_info']['type']), describe_symbol_bind(symbol['st_info']['bind']), describe_symbol_visibility(symbol['st_other']['visibility']), describe_symbol_shndx(symbol['st_shndx']), bytes2str(symbol.name), version_info)) def display_dynamic_tags(self): """ Display the dynamic tags contained in the file """ has_dynamic_sections = False for section in self.elffile.iter_sections(): if not isinstance(section, DynamicSection): continue has_dynamic_sections = True self._emitline("\nDynamic section at offset %s contains %s entries:" % ( self._format_hex(section['sh_offset']), section.num_tags())) self._emitline(" Tag Type Name/Value") padding = 20 + (8 if self.elffile.elfclass == 32 else 0) for tag in section.iter_tags(): if tag.entry.d_tag == 'DT_NEEDED': parsed = 'Shared library: [%s]' % bytes2str(tag.needed) elif tag.entry.d_tag == 'DT_RPATH': parsed = 'Library rpath: [%s]' % bytes2str(tag.rpath) elif tag.entry.d_tag == 'DT_RUNPATH': parsed = 'Library runpath: [%s]' % bytes2str(tag.runpath) elif tag.entry.d_tag == 'DT_SONAME': parsed = 'Library soname: [%s]' % bytes2str(tag.soname) elif (tag.entry.d_tag.endswith('SZ') or tag.entry.d_tag.endswith('ENT')): parsed = '%i (bytes)' % tag['d_val'] elif (tag.entry.d_tag.endswith('NUM') or tag.entry.d_tag.endswith('COUNT')): parsed = '%i' % tag['d_val'] elif tag.entry.d_tag == 'DT_PLTREL': s = describe_dyn_tag(tag.entry.d_val) if s.startswith('DT_'): s = s[3:] parsed = '%s' % s else: parsed = '%#x' % tag['d_val'] self._emitline(" %s %-*s %s" % ( self._format_hex(ENUM_D_TAG.get(tag.entry.d_tag, tag.entry.d_tag), fullhex=True, lead0x=True), padding, '(%s)' % (tag.entry.d_tag[3:],), parsed)) if not has_dynamic_sections: # readelf only prints this if there is at least one segment if self.elffile.num_segments(): self._emitline("\nThere is no dynamic section in this file.") def display_relocations(self): """ Display the relocations contained in the file """ has_relocation_sections = False for section in self.elffile.iter_sections(): if not isinstance(section, RelocationSection): continue has_relocation_sections = True self._emitline("\nRelocation section '%s' at offset %s contains %s entries:" % ( bytes2str(section.name), self._format_hex(section['sh_offset']), section.num_relocations())) if section.is_RELA(): self._emitline(" Offset Info Type Sym. Value Sym. Name + Addend") else: self._emitline(" Offset Info Type Sym.Value Sym. Name") # The symbol table section pointed to in sh_link symtable = self.elffile.get_section(section['sh_link']) for rel in section.iter_relocations(): hexwidth = 8 if self.elffile.elfclass == 32 else 12 self._emit('%s %s %-17.17s' % ( self._format_hex(rel['r_offset'], fieldsize=hexwidth, lead0x=False), self._format_hex(rel['r_info'], fieldsize=hexwidth, lead0x=False), describe_reloc_type( rel['r_info_type'], self.elffile))) if rel['r_info_sym'] == 0: self._emitline() continue symbol = symtable.get_symbol(rel['r_info_sym']) # Some symbols have zero 'st_name', so instead what's used is # the name of the section they point at if symbol['st_name'] == 0: symsec = self.elffile.get_section(symbol['st_shndx']) symbol_name = symsec.name else: symbol_name = symbol.name self._emit(' %s %s%22.22s' % ( self._format_hex( symbol['st_value'], fullhex=True, lead0x=False), ' ' if self.elffile.elfclass == 32 else '', bytes2str(symbol_name))) if section.is_RELA(): self._emit(' %s %x' % ( '+' if rel['r_addend'] >= 0 else '-', abs(rel['r_addend']))) self._emitline() if not has_relocation_sections: self._emitline('\nThere are no relocations in this file.') def display_version_info(self): """ Display the version info contained in the file """ self._init_versioninfo() if not self._versioninfo['type']: self._emitline("\nNo version information found in this file.") return for section in self.elffile.iter_sections(): if isinstance(section, GNUVerSymSection): self._print_version_section_header( section, 'Version symbols', lead0x=False) num_symbols = section.num_symbols() # Symbol version info are printed four by four entries for idx_by_4 in range(0, num_symbols, 4): self._emit(' %03x:' % idx_by_4) for idx in range(idx_by_4, min(idx_by_4 + 4, num_symbols)): symbol_version = self._symbol_version(idx) if symbol_version['index'] == 'VER_NDX_LOCAL': version_index = 0 version_name = '(*local*)' elif symbol_version['index'] == 'VER_NDX_GLOBAL': version_index = 1 version_name = '(*global*)' else: version_index = symbol_version['index'] version_name = '(%(name)s)' % symbol_version visibility = 'h' if symbol_version['hidden'] else ' ' self._emit('%4x%s%-13s' % ( version_index, visibility, version_name)) self._emitline() elif isinstance(section, GNUVerDefSection): self._print_version_section_header( section, 'Version definition', indent=2) offset = 0 for verdef, verdaux_iter in section.iter_versions(): verdaux = next(verdaux_iter) name = verdaux.name if verdef['vd_flags']: flags = describe_ver_flags(verdef['vd_flags']) # Mimic exactly the readelf output flags += ' ' else: flags = 'none' self._emitline(' %s: Rev: %i Flags: %s Index: %i' ' Cnt: %i Name: %s' % ( self._format_hex(offset, fieldsize=6, alternate=True), verdef['vd_version'], flags, verdef['vd_ndx'], verdef['vd_cnt'], bytes2str(name))) verdaux_offset = ( offset + verdef['vd_aux'] + verdaux['vda_next']) for idx, verdaux in enumerate(verdaux_iter, start=1): self._emitline(' %s: Parent %i: %s' % (self._format_hex(verdaux_offset, fieldsize=4), idx, bytes2str(verdaux.name))) verdaux_offset += verdaux['vda_next'] offset += verdef['vd_next'] elif isinstance(section, GNUVerNeedSection): self._print_version_section_header(section, 'Version needs') offset = 0 for verneed, verneed_iter in section.iter_versions(): self._emitline(' %s: Version: %i File: %s Cnt: %i' % ( self._format_hex(offset, fieldsize=6, alternate=True), verneed['vn_version'], bytes2str(verneed.name), verneed['vn_cnt'])) vernaux_offset = offset + verneed['vn_aux'] for idx, vernaux in enumerate(verneed_iter, start=1): if vernaux['vna_flags']: flags = describe_ver_flags(vernaux['vna_flags']) # Mimic exactly the readelf output flags += ' ' else: flags = 'none' self._emitline( ' %s: Name: %s Flags: %s Version: %i' % ( self._format_hex(vernaux_offset, fieldsize=4), bytes2str(vernaux.name), flags, vernaux['vna_other'])) vernaux_offset += vernaux['vna_next'] offset += verneed['vn_next'] def display_hex_dump(self, section_spec): """ Display a hex dump of a section. section_spec is either a section number or a name. """ section = self._section_from_spec(section_spec) if section is None: self._emitline("Section '%s' does not exist in the file!" % ( section_spec)) return self._emitline("\nHex dump of section '%s':" % bytes2str(section.name)) self._note_relocs_for_section(section) addr = section['sh_addr'] data = section.data() dataptr = 0 while dataptr < len(data): bytesleft = len(data) - dataptr # chunks of 16 bytes per line linebytes = 16 if bytesleft > 16 else bytesleft self._emit(' %s ' % self._format_hex(addr, fieldsize=8)) for i in range(16): if i < linebytes: self._emit('%2.2x' % byte2int(data[dataptr + i])) else: self._emit(' ') if i % 4 == 3: self._emit(' ') for i in range(linebytes): c = data[dataptr + i : dataptr + i + 1] if byte2int(c[0]) >= 32 and byte2int(c[0]) < 0x7f: self._emit(bytes2str(c)) else: self._emit(bytes2str(b'.')) self._emitline() addr += linebytes dataptr += linebytes self._emitline() def display_string_dump(self, section_spec): """ Display a strings dump of a section. section_spec is either a section number or a name. """ section = self._section_from_spec(section_spec) if section is None: self._emitline("Section '%s' does not exist in the file!" % ( section_spec)) return self._emitline("\nString dump of section '%s':" % bytes2str(section.name)) found = False data = section.data() dataptr = 0 while dataptr < len(data): while ( dataptr < len(data) and not (32 <= byte2int(data[dataptr]) <= 127)): dataptr += 1 if dataptr >= len(data): break endptr = dataptr while endptr < len(data) and byte2int(data[endptr]) != 0: endptr += 1 found = True self._emitline(' [%6x] %s' % ( dataptr, bytes2str(data[dataptr:endptr]))) dataptr = endptr if not found: self._emitline(' No strings found in this section.') else: self._emitline() def display_debug_dump(self, dump_what): """ Dump a DWARF section """ self._init_dwarfinfo() if self._dwarfinfo is None: return set_global_machine_arch(self.elffile.get_machine_arch()) if dump_what == 'info': self._dump_debug_info() elif dump_what == 'decodedline': self._dump_debug_line_programs() elif dump_what == 'frames': self._dump_debug_frames() elif dump_what == 'frames-interp': self._dump_debug_frames_interp() else: self._emitline('debug dump not yet supported for "%s"' % dump_what) def _format_hex(self, addr, fieldsize=None, fullhex=False, lead0x=True, alternate=False): """ Format an address into a hexadecimal string. fieldsize: Size of the hexadecimal field (with leading zeros to fit the address into. For example with fieldsize=8, the format will be %08x If None, the minimal required field size will be used. fullhex: If True, override fieldsize to set it to the maximal size needed for the elfclass lead0x: If True, leading 0x is added alternate: If True, override lead0x to emulate the alternate hexadecimal form specified in format string with the # character: only non-zero values are prefixed with 0x. This form is used by readelf. """ if alternate: if addr == 0: lead0x = False else: lead0x = True fieldsize -= 2 s = '0x' if lead0x else '' if fullhex: fieldsize = 8 if self.elffile.elfclass == 32 else 16 if fieldsize is None: field = '%x' else: field = '%' + '0%sx' % fieldsize return s + field % addr def _print_version_section_header(self, version_section, name, lead0x=True, indent=1): """ Print a section header of one version related section (versym, verneed or verdef) with some options to accomodate readelf little differences between each header (e.g. indentation and 0x prefixing). """ if hasattr(version_section, 'num_versions'): num_entries = version_section.num_versions() else: num_entries = version_section.num_symbols() self._emitline("\n%s section '%s' contains %s entries:" % (name, bytes2str(version_section.name), num_entries)) self._emitline('%sAddr: %s Offset: %s Link: %i (%s)' % ( ' ' * indent, self._format_hex( version_section['sh_addr'], fieldsize=16, lead0x=lead0x), self._format_hex( version_section['sh_offset'], fieldsize=6, lead0x=True), version_section['sh_link'], bytes2str( self.elffile.get_section(version_section['sh_link']).name) ) ) def _init_versioninfo(self): """ Search and initialize informations about version related sections and the kind of versioning used (GNU or Solaris). """ if self._versioninfo is not None: return self._versioninfo = {'versym': None, 'verdef': None, 'verneed': None, 'type': None} for section in self.elffile.iter_sections(): if isinstance(section, GNUVerSymSection): self._versioninfo['versym'] = section elif isinstance(section, GNUVerDefSection): self._versioninfo['verdef'] = section elif isinstance(section, GNUVerNeedSection): self._versioninfo['verneed'] = section elif isinstance(section, DynamicSection): for tag in section.iter_tags(): if tag['d_tag'] == 'DT_VERSYM': self._versioninfo['type'] = 'GNU' break if not self._versioninfo['type'] and ( self._versioninfo['verneed'] or self._versioninfo['verdef']): self._versioninfo['type'] = 'Solaris' def _symbol_version(self, nsym): """ Return a dict containing information on the or None if no version information is available """ self._init_versioninfo() symbol_version = dict.fromkeys(('index', 'name', 'filename', 'hidden')) if (not self._versioninfo['versym'] or nsym >= self._versioninfo['versym'].num_symbols()): return None symbol = self._versioninfo['versym'].get_symbol(nsym) index = symbol.entry['ndx'] if not index in ('VER_NDX_LOCAL', 'VER_NDX_GLOBAL'): index = int(index) if self._versioninfo['type'] == 'GNU': # In GNU versioning mode, the highest bit is used to # store wether the symbol is hidden or not if index & 0x8000: index &= ~0x8000 symbol_version['hidden'] = True if (self._versioninfo['verdef'] and index <= self._versioninfo['verdef'].num_versions()): _, verdaux_iter = \ self._versioninfo['verdef'].get_version(index) symbol_version['name'] = bytes2str(next(verdaux_iter).name) else: verneed, vernaux = \ self._versioninfo['verneed'].get_version(index) symbol_version['name'] = bytes2str(vernaux.name) symbol_version['filename'] = bytes2str(verneed.name) symbol_version['index'] = index return symbol_version def _section_from_spec(self, spec): """ Retrieve a section given a "spec" (either number or name). Return None if no such section exists in the file. """ try: num = int(spec) if num < self.elffile.num_sections(): return self.elffile.get_section(num) else: return None except ValueError: # Not a number. Must be a name then return self.elffile.get_section_by_name(str2bytes(spec)) def _note_relocs_for_section(self, section): """ If there are relocation sections pointing to the givne section, emit a note about it. """ for relsec in self.elffile.iter_sections(): if isinstance(relsec, RelocationSection): info_idx = relsec['sh_info'] if self.elffile.get_section(info_idx) == section: self._emitline(' Note: This section has relocations against it, but these have NOT been applied to this dump.') return def _init_dwarfinfo(self): """ Initialize the DWARF info contained in the file and assign it to self._dwarfinfo. Leave self._dwarfinfo at None if no DWARF info was found in the file """ if self._dwarfinfo is not None: return if self.elffile.has_dwarf_info(): self._dwarfinfo = self.elffile.get_dwarf_info() else: self._dwarfinfo = None def _dump_debug_info(self): """ Dump the debugging info section. """ self._emitline('Contents of the .debug_info section:\n') # Offset of the .debug_info section in the stream section_offset = self._dwarfinfo.debug_info_sec.global_offset for cu in self._dwarfinfo.iter_CUs(): self._emitline(' Compilation Unit @ offset %s:' % self._format_hex(cu.cu_offset)) self._emitline(' Length: %s (%s)' % ( self._format_hex(cu['unit_length']), '%s-bit' % cu.dwarf_format())) self._emitline(' Version: %s' % cu['version']), self._emitline(' Abbrev Offset: %s' % ( self._format_hex(cu['debug_abbrev_offset']))), self._emitline(' Pointer Size: %s' % cu['address_size']) # The nesting depth of each DIE within the tree of DIEs must be # displayed. To implement this, a counter is incremented each time # the current DIE has children, and decremented when a null die is # encountered. Due to the way the DIE tree is serialized, this will # correctly reflect the nesting depth # die_depth = 0 for die in cu.iter_DIEs(): self._emitline(' <%s><%x>: Abbrev Number: %s%s' % ( die_depth, die.offset, die.abbrev_code, (' (%s)' % die.tag) if not die.is_null() else '')) if die.is_null(): die_depth -= 1 continue for attr in itervalues(die.attributes): name = attr.name # Unknown attribute values are passed-through as integers if isinstance(name, int): name = 'Unknown AT value: %x' % name self._emitline(' <%2x> %-18s: %s' % ( attr.offset, name, describe_attr_value( attr, die, section_offset))) if die.has_children: die_depth += 1 self._emitline() def _dump_debug_line_programs(self): """ Dump the (decoded) line programs from .debug_line The programs are dumped in the order of the CUs they belong to. """ self._emitline('Decoded dump of debug contents of section .debug_line:\n') for cu in self._dwarfinfo.iter_CUs(): lineprogram = self._dwarfinfo.line_program_for_CU(cu) cu_filename = bytes2str(lineprogram['file_entry'][0].name) if len(lineprogram['include_directory']) > 0: dir_index = lineprogram['file_entry'][0].dir_index if dir_index > 0: dir = lineprogram['include_directory'][dir_index - 1] else: dir = b'.' cu_filename = '%s/%s' % (bytes2str(dir), cu_filename) self._emitline('CU: %s:' % cu_filename) self._emitline('File name Line number Starting address') # Print each state's file, line and address information. For some # instructions other output is needed to be compatible with # readelf. for entry in lineprogram.get_entries(): state = entry.state if state is None: # Special handling for commands that don't set a new state if entry.command == DW_LNS_set_file: file_entry = lineprogram['file_entry'][entry.args[0] - 1] if file_entry.dir_index == 0: # current directory self._emitline('\n./%s:[++]' % ( bytes2str(file_entry.name))) else: self._emitline('\n%s/%s:' % ( bytes2str(lineprogram['include_directory'][file_entry.dir_index - 1]), bytes2str(file_entry.name))) elif entry.command == DW_LNE_define_file: self._emitline('%s:' % ( bytes2str(lineprogram['include_directory'][entry.args[0].dir_index]))) elif not state.end_sequence: # readelf doesn't print the state after end_sequence # instructions. I think it's a bug but to be compatible # I don't print them too. self._emitline('%-35s %11d %18s' % ( bytes2str(lineprogram['file_entry'][state.file - 1].name), state.line, '0' if state.address == 0 else self._format_hex(state.address))) if entry.command == DW_LNS_copy: # Another readelf oddity... self._emitline() def _dump_debug_frames(self): """ Dump the raw frame information from .debug_frame """ if not self._dwarfinfo.has_CFI(): return self._emitline('Contents of the .debug_frame section:') for entry in self._dwarfinfo.CFI_entries(): if isinstance(entry, CIE): self._emitline('\n%08x %08x %08x CIE' % ( entry.offset, entry['length'], entry['CIE_id'])) self._emitline(' Version: %d' % entry['version']) self._emitline(' Augmentation: "%s"' % bytes2str(entry['augmentation'])) self._emitline(' Code alignment factor: %u' % entry['code_alignment_factor']) self._emitline(' Data alignment factor: %d' % entry['data_alignment_factor']) self._emitline(' Return address column: %d' % entry['return_address_register']) self._emitline() else: # FDE self._emitline('\n%08x %08x %08x FDE cie=%08x pc=%08x..%08x' % ( entry.offset, entry['length'], entry['CIE_pointer'], entry.cie.offset, entry['initial_location'], entry['initial_location'] + entry['address_range'])) self._emit(describe_CFI_instructions(entry)) self._emitline() def _dump_debug_frames_interp(self): """ Dump the interpreted (decoded) frame information from .debug_frame """ if not self._dwarfinfo.has_CFI(): return self._emitline('Contents of the .debug_frame section:') for entry in self._dwarfinfo.CFI_entries(): if isinstance(entry, CIE): self._emitline('\n%08x %08x %08x CIE "%s" cf=%d df=%d ra=%d' % ( entry.offset, entry['length'], entry['CIE_id'], bytes2str(entry['augmentation']), entry['code_alignment_factor'], entry['data_alignment_factor'], entry['return_address_register'])) ra_regnum = entry['return_address_register'] else: # FDE self._emitline('\n%08x %08x %08x FDE cie=%08x pc=%08x..%08x' % ( entry.offset, entry['length'], entry['CIE_pointer'], entry.cie.offset, entry['initial_location'], entry['initial_location'] + entry['address_range'])) ra_regnum = entry.cie['return_address_register'] # Print the heading row for the decoded table self._emit(' LOC') self._emit(' ' if entry.structs.address_size == 4 else ' ') self._emit(' CFA ') # Decode the table nad look at the registers it describes. # We build reg_order here to match readelf's order. In particular, # registers are sorted by their number, and the register matching # ra_regnum is always listed last with a special heading. decoded_table = entry.get_decoded() reg_order = sorted(ifilter( lambda r: r != ra_regnum, decoded_table.reg_order)) # Headings for the registers for regnum in reg_order: self._emit('%-6s' % describe_reg_name(regnum)) self._emitline('ra ') # Now include ra_regnum in reg_order to print its values similarly # to the other registers. reg_order.append(ra_regnum) for line in decoded_table.table: self._emit(self._format_hex( line['pc'], fullhex=True, lead0x=False)) self._emit(' %-9s' % describe_CFI_CFA_rule(line['cfa'])) for regnum in reg_order: if regnum in line: s = describe_CFI_register_rule(line[regnum]) else: s = 'u' self._emit('%-6s' % s) self._emitline() self._emitline() def _emit(self, s=''): """ Emit an object to output """ self.output.write(str(s)) def _emitline(self, s=''): """ Emit an object to output, followed by a newline """ self.output.write(str(s) + '\n')
class Image(object): def __init__(self, fname): if platform.system() == "Windows": elf_data = open(fname, "r") else: with open(fname, "r") as f: elf_data = StringIO(f.read()) self.elf = ELFFile(elf_data) if self.elf.has_dwarf_info(): self.dwarf = self.elf.get_dwarf_info() set_global_machine_arch(self.elf.get_machine_arch()) self.__tame_dwarf() self.get_expr_evaluator = lambda: ExprLiveEval(self) @property def executable(self): try: return self._exe except: self._exe = self._build_executable() return self._exe def _build_executable(self): s = self.elf.get_section(1) assert s.header["sh_flags"] & 2 and s.header["sh_type"] == "SHT_PROGBITS" base_addr = s.header["sh_addr"] img = s.data() s = self.elf.get_section(2) if s.header["sh_flags"] & 2 and s.header["sh_type"] == "SHT_PROGBITS": if s.header["sh_addr"] != base_addr + len(img): raise Exception("bad section vaddr - #2 should follow #1") img += s.data() s = self.elf.get_section(3) print "%s" % str(s.header) if s.header["sh_flags"] & 2 and s.header["sh_type"] == "SHT_PROGBITS": if s.header["sh_addr"] != base_addr + len(img): raise Exception("bad section vaddr - #3 should follow #2") img += s.data() return (base_addr, img) def __tame_dwarf(self): dw = self.dwarf self._compile_units = {} self._addresses = {} self._lowest_known_address = None location_lists = dw.location_lists() cfi = None if dw.has_EH_CFI(): cfi = dw.EH_CFI_entries() print "we have EH CFI entries" elif dw.has_CFI(): cfi = dw.CFI_entries() print "we have CFI entries" else: print "no (EH) CFI" if None is not cfi: self._cfa_rule = {} for c in cfi: try: decoded = c.get_decoded() except: print "CFI decoding exception" break for entry in decoded.table: if entry["pc"] in self._cfa_rule: print "duplicate cfa rule found at pc %x" % entry["pc"] print "\t%s" % str(self._cfa_rule[entry["pc"]]) print "\t%s" % str(entry) print #assert (not entry["pc"] in self._cfa_rule) or (self._cfa_rule[entry["pc"]] == entry) self._cfa_rule[entry["pc"]] = entry for c in dw.iter_CUs(): functions = {} variables = {} td = c.get_top_DIE() for d in td.iter_children(): if d.tag == 'DW_TAG_subprogram': if 'DW_AT_declaration' in d.attributes: continue lpc = d.attributes['DW_AT_low_pc'].value hpc = d.attributes['DW_AT_high_pc'].value if hpc < lpc: hpc += lpc function_name = d.attributes['DW_AT_name'].value f = {} f["lpc"] = lpc f["hpc"] = hpc f["args"] = {} f["vars"] = {} if 'DW_AT_frame_base' in d.attributes: a = d.attributes['DW_AT_frame_base'] if a.form == 'DW_FORM_data4' or a.form == 'DW_FORM_sec_offset': f["fb"] = location_lists.get_location_list_at_offset(a.value) else: f["fb"] = a.value for child in d.iter_children(): if child.tag == "DW_TAG_formal_parameter": name = child.attributes['DW_AT_name'].value v = {} try: if child.attributes['DW_AT_location'].form in ['DW_FORM_sec_offset', 'DW_FORM_data4']: v["location"] = location_lists.get_location_list_at_offset(child.attributes['DW_AT_location'].value) else: v["location"] = child.attributes['DW_AT_location'].value except: v["location"] = [] f["args"][name] = v if child.tag == "DW_TAG_variable": name = child.attributes['DW_AT_name'].value v = {} try: if child.attributes['DW_AT_location'].form in ['DW_FORM_sec_offset', 'DW_FORM_data4']: v["location"] = location_lists.get_location_list_at_offset(child.attributes['DW_AT_location'].value) else: v["location"] = child.attributes['DW_AT_location'].value except: v["location"] = [] f["vars"][name] = v functions[function_name] = f elif d.tag == 'DW_TAG_variable': if d.attributes['DW_AT_decl_file'].value == 1: try: name = d.attributes['DW_AT_name'].value except: name = '(%s)' % str(d.attributes['DW_AT_name']) v = {} try: v["location"] = d.attributes['DW_AT_location'].value except: v["location"] = [] variables[name] = v x = {} fname = td.attributes['DW_AT_name'].value x["line_program"] = dw.line_program_for_CU(c).get_entries() x["lpc"] = td.attributes['DW_AT_low_pc'].value x["hpc"] = td.attributes['DW_AT_high_pc'].value x["comp_dir"] = td.attributes['DW_AT_comp_dir'].value x["functions"] = functions x["variables"] = variables self._compile_units[fname] = x if ((self._lowest_known_address is None) or (self._lowest_known_address > x["lpc"])): self._lowest_known_address = x["lpc"] for c in self._compile_units: self._compile_units[c]["lines"] = {} for line in self._compile_units[c]["line_program"]: state = line.state if state is not None and not (state.end_sequence or state.basic_block or state.epilogue_begin or state.prologue_end): cl = "%s+%d" % (c, state.line) if state.address in self._addresses and self._addresses[state.address] != cl: raise Exception("addr %x is both \"%s\" and \"%s+%d\"" % (state.address, self._addresses[state.address], c, state.line)) self._addresses[state.address] = cl try: self._compile_units[c]["lines"][state.line] += [state.address] except: self._compile_units[c]["lines"][state.line] = [state.address] if not cfi is None: print "CFA table:" for pc in sorted(self._cfa_rule.keys()): print "%x: %s\t\t(%s)" % (pc, str(self._cfa_rule[pc]), self.addr2line(pc)) def addr2line(self, addr): try: return self._addresses[addr] except: return '' def loc_at(self, addr): line = self.addr2line(addr) while '' == line and addr >= self._lowest_known_address: addr -= 4 line = self.addr2line(addr) if '' == line: return ("unknown", "", 0, "") cuname, culine = line.split("+") fname = "" c = self._compile_units[cuname] for f in c["functions"]: if ((c["functions"][f]["lpc"] <= addr) and (c["functions"][f]["hpc"] >= addr)): fname = f break return (fname, cuname, culine, c["comp_dir"]) def line2addr(self, fname, line): return self._compile_units[fname]["lines"][line]
class ELF: def __init__(self, mem, classbinary, filename): import capstone as CAPSTONE fd = open(filename, "rb") self.elf = ELFFile(fd) self.classbinary = classbinary self.mem = mem self.arch_lookup = { "x86": CAPSTONE.CS_ARCH_X86, "x64": CAPSTONE.CS_ARCH_X86, "ARM": CAPSTONE.CS_ARCH_ARM, "MIPS": CAPSTONE.CS_ARCH_MIPS, } self.arch_mode_lookup = { "x86": CAPSTONE.CS_MODE_32, "x64": CAPSTONE.CS_MODE_64, "ARM": CAPSTONE.CS_ARCH_ARM, "MIPS": { 32: CAPSTONE.CS_MODE_MIPS32, 64: CAPSTONE.CS_MODE_MIPS64, } } self.sym_type_lookup = { "STT_FUNC": MEM_FUNC, } self.__sections = {} # start address -> elf section for s in self.elf.iter_sections(): if not s.name: continue start = s.header.sh_addr if s.header.sh_flags & 0xf != 0: bisect.insort_left(classbinary._sorted_sections, start) self.__sections[start] = s is_data = self.__section_is_data(s) is_exec = self.__section_is_exec(s) data = s.data() classbinary._abs_sections[start] = SectionAbs( s.name.decode(), start, s.header.sh_size, len(data), is_exec, is_data, data) def load_section_names(self): # Used for the auto-completion for s in self.elf.iter_sections(): if s.header.sh_flags & 0xf != 0: ad = s.header.sh_addr name = s.name.decode() self.classbinary.section_names[name] = ad def load_static_sym(self): symtab = self.elf.get_section_by_name(b".symtab") if symtab is None: return dont_save = [b"$a", b"$t", b"$d"] arch = self.elf.get_machine_arch() is_arm = arch == "ARM" for sy in symtab.iter_symbols(): if is_arm and sy.name in dont_save: continue ad = sy.entry.st_value if ad != 0 and sy.name != b"": name = sy.name.decode() if name in self.classbinary.symbols: name = self.classbinary.rename_sym(name) self.classbinary.reverse_symbols[ad] = name self.classbinary.symbols[name] = ad ty = self.sym_type_lookup.get(sy.entry.st_info.type, MEM_UNK) self.mem.add(ad, 1, ty) def __x86_resolve_reloc(self, rel, symtab, plt, got_plt, addr_size): # Save all got offsets with the corresponding symbol got_off = {} for r in rel.iter_relocations(): sym = symtab.get_symbol(r.entry.r_info_sym) name = sym.name.decode() ad = r.entry.r_offset if name and ad: ty = self.sym_type_lookup.get(sym.entry.st_info.type, MEM_UNK) got_off[ad] = [name + "@plt", ty] data = got_plt.data() unpack_str = "<" if self.elf.little_endian else ">" unpack_str += str(int(len(data) / addr_size)) unpack_str += "Q" if addr_size == 8 else "I" got_values = struct.unpack(unpack_str, data) plt_data = plt.data() wrong_jump_opcode = False off = got_plt.header.sh_addr # Read the .got.plt and for each address in the plt, substract 6 # to go at the begining of the plt entry. opcode_jmp = [b"\xff\x25", b"\xff\xa3"] for jump_in_plt in got_values: if off in got_off: plt_start = jump_in_plt - 6 plt_off = plt_start - plt.header.sh_addr # Check "jmp *(ADDR)" opcode. if plt_data[plt_off:plt_off+2] not in opcode_jmp: wrong_jump_opcode = True continue name, ty = got_off[off] if name in self.classbinary.symbols: continue self.classbinary.imports[plt_start] = True self.classbinary.reverse_symbols[plt_start] = name self.classbinary.symbols[name] = plt_start self.mem.add(plt_start, 1, ty) off += addr_size if wrong_jump_opcode: warning("I'm expecting to see a jmp *(ADDR) on each plt entry") warning("opcode \\xff\\x25 was not found, please report") def __resolve_symtab(self, rel, symtab, arch): # TODO: don't know why st_value is not 0 like x86 # In some executables I've tested, it seems that st_value # is the address of the plt entry # TODO: really useful to iter on relocations and get the symbol # from the symtab ? # for r in rel.iter_relocations(): # sym = symtab.get_symbol(r.entry.r_info_sym) for sym in symtab.iter_symbols(): ad = sym.entry.st_value if ad != 0: name = sym.name.decode() if arch == "ARM": name += "@plt" if name in self.classbinary.symbols: continue self.classbinary.imports[ad] = True self.classbinary.reverse_symbols[ad] = name self.classbinary.symbols[name] = ad ty = self.sym_type_lookup.get(sym.entry.st_info.type, MEM_UNK) self.mem.add(ad, 1, ty) def __iter_reloc(self): for rel in self.elf.iter_sections(): if rel.header.sh_type in ["SHT_RELA", "SHT_REL"]: symtab = self.elf.get_section(rel.header.sh_link) if symtab is None: continue yield (rel, symtab) def load_dyn_sym(self): arch = self.elf.get_machine_arch() if arch == "ARM" or arch == "MIPS": for (rel, symtab) in self.__iter_reloc(): self.__resolve_symtab(rel, symtab, arch) return # x86/x64 # TODO: .plt can be renamed ? plt = self.elf.get_section_by_name(b".plt") if plt is None: warning(".plt section not found") return # TODO: .got.plt can be renamed or may be removed ? got_plt = self.elf.get_section_by_name(b".got.plt") addr_size = 8 if arch == "x64" else 4 if got_plt is None: warning(".got.plt section not found") return for (rel, symtab) in self.__iter_reloc(): self.__x86_resolve_reloc(rel, symtab, plt, got_plt, addr_size) def __section_is_data(self, s): mask = SH_FLAGS.SHF_WRITE | SH_FLAGS.SHF_ALLOC return s.header.sh_flags & mask and not self.__section_is_exec(s) def __section_is_exec(self, s): if s is None: return 0 return s.header.sh_flags & SH_FLAGS.SHF_EXECINSTR def section_stream_read(self, addr, size): s = self.classbinary.get_section(addr) if s is None: return b"" s = self.__sections[s.start] off = addr - s.header.sh_addr end = s.header.sh_addr + s.header.sh_size s.stream.seek(s.header.sh_offset + off) return s.stream.read(min(size, end - addr)) def get_arch(self): import capstone as CAPSTONE arch = self.arch_lookup.get(self.elf.get_machine_arch(), None) mode = self.arch_mode_lookup.get(self.elf.get_machine_arch(), None) if arch is None: return None, None # If one arch name has multiple "word size" if isinstance(mode, dict): mode = mode[self.elf.elfclass] if self.elf.little_endian: mode |= CAPSTONE.CS_MODE_LITTLE_ENDIAN else: mode |= CAPSTONE.CS_MODE_BIG_ENDIAN return arch, mode def get_arch_string(self): return self.elf.get_machine_arch() def get_entry_point(self): return self.elf.header['e_entry']
def exe(self, filename, argv=[], envp=[], stdin='stdin', stdout='stdout', stderr='stderr'): ''' Loads and an ELF program in memory and prepares the initial CPU state. Creates the stack and loads the environment variables and the arguments in it. @param filename: pathname of the file to be executed. @param argv: list of parameters for the program to execute. @param envp: list of environment variables for the program to execute. @raise error: - 'Not matching cpu': if the program is compiled for a different architecture - 'Not matching memory': if the program is compiled for a different address size @todo: define va_randomize and read_implies_exec personality ''' #Set standar file descriptors self.files = [ File(stdin,'rb'), File(stdout,'wb'), File(stderr,'wb')] #load elf See binfmt_elf.c #read the ELF object file elf = ELFFile(file(filename)) arch = {'x86':'i386','x64':'amd64'}[elf.get_machine_arch()] addressbitsize = {'x86':32, 'x64':64}[elf.get_machine_arch()] logger.info("Loading %s as a %s elf"%(filename,arch)) logger.info("\tArguments: %s"%repr(argv)) logger.debug("\tEnvironmen:") for e in envp: logger.debug("\t\t%s"%repr(e)) assert self.cpu.machine == arch, "Not matching cpu" assert self.mem.addressbitsize == addressbitsize, "Not matching memory" assert elf.header.e_type in ['ET_DYN', 'ET_EXEC'] cpu = self.cpu #Get interpreter elf interpreter = None for elf_segment in elf.iter_segments(): if elf_segment.header.p_type != 'PT_INTERP': continue interpreter = ELFFile(file(elf_segment.data()[:-1])) break if not interpreter is None: assert interpreter.get_machine_arch() == elf.get_machine_arch() assert interpreter.header.e_type in ['ET_DYN', 'ET_EXEC'] #Stack Executability executable_stack = False for elf_segment in elf.iter_segments(): if elf_segment.header.p_type != 'PT_GNU_STACK': continue if elf_segment.header.p_flags & 0x01: executable_stack = True else: executable_stack = False break base = 0 elf_bss = 0 end_code = 0 end_data = 0 elf_brk = 0 load_addr = 0 base = 0 for elf_segment in elf.iter_segments(): if elf_segment.header.p_type != 'PT_LOAD': continue align = 0x1000 #elf_segment.header.p_align ELF_PAGEOFFSET = elf_segment.header.p_vaddr & (align-1) flags = elf_segment.header.p_flags memsz = elf_segment.header.p_memsz + ELF_PAGEOFFSET offset = elf_segment.header.p_offset - ELF_PAGEOFFSET filesz = elf_segment.header.p_filesz + ELF_PAGEOFFSET vaddr = elf_segment.header.p_vaddr - ELF_PAGEOFFSET memsz = self.mem._ceil(memsz+1) # (memsz + align ) & ~(align-1) if base == 0 and elf.header.e_type == 'ET_DYN': assert vaddr == 0 if addressbitsize == 32: base = 0x56555000 else: base = 0x555555554000 #PF_X 0x1 Execute #PF_W 0x2 Write #PF_R 0x4 Read #base = cpu.mem.mmap(base+vaddr,memsz,flags&0x4,flags&0x2,flags&0x1,data) - vaddr perms = [' ', ' x', ' w ', ' wx', 'r ', 'r x', 'rw ', 'rwx'][flags&7] hint = base+vaddr if hint == 0: hint = None base = self.mem.mmapFile(hint,memsz,perms,elf_segment.stream.name,offset) - vaddr logger.debug("Loading elf offset: %08x addr:%08x %08x %s" %(offset, base+vaddr, base+vaddr+memsz, perms)) if load_addr == 0 : load_addr = base + vaddr k = base + vaddr + filesz; if k > elf_bss : elf_bss = k; if (flags & 4) and end_code < k: #PF_X end_code = k if end_data < k: end_data = k k = base + vaddr + memsz if k > elf_brk: elf_brk = k elf_entry = elf.header.e_entry if elf.header.e_type == 'ET_DYN': elf_entry += load_addr entry = elf_entry real_elf_brk = elf_brk # We need to explicitly zero any fractional pages # after the data section (i.e. bss). This would # contain the junk from the file that should not # be in memory #TODO: #cpu.write(elf_bss, '\x00'*((elf_bss | (align-1))-elf_bss)) logger.debug("Zeroing main elf fractional pages. From %x to %x.", elf_bss, elf_brk) logger.debug("Main elf bss:%x"%elf_bss) logger.debug("Main elf brk %x:"%elf_brk) self.mem.mprotect(self.mem._floor(elf_bss), elf_brk-elf_bss, 'rw') for i in xrange(elf_bss, elf_brk): try: self.mem.putchar(i, '\x00') except Exception, e: logger.debug("Exception zeroing main elf fractional pages: %s"%str(e))
class User: def __init__(self, exe): # uses StringIO so we don't burn the file descriptor with open(exe, 'rb') as f: self.fp = StringIO(f.read()) magic = self.fp.read(4).encode('hex') self.fp.seek(0) self.elf = None self.macho = None self.arch = None self.info = None self.symtab = None self.entry = None if magic == '7f454c46': self.elf = ELFFile(self.fp) self.arch = self.elf.get_machine_arch() self.entry = self.elf['e_entry'] self.symtab = self.elf.get_section_by_name('.symtab') self.info = ARCH_INFO.get(self.arch) elif magic in ('cafebabe', 'feedface', 'feedfacf', 'cefaedfe', 'cffaedfe'): macho = FileMachO(exe, self.fp) for header in macho.headers: if header.endian == '<': self.macho = header self.arch = mach_o.CPU_TYPE_NAMES.get(header.header.cputype) self.arch = ARCH_MAP.get(self.arch, self.arch) self.info = ARCH_INFO.get(self.arch) for lc, cmd, data in header.commands: # entry point if lc.cmd == mach_o.LC_MAIN or lc.cmd == mach_o.LC_UNIXTHREAD: if self.info['bits'] == 64: ip = 2 * 4 + 16 * 8 self.entry = struct.unpack(header.endian + 'Q', data[ip:ip+8])[0] else: ip = 2 * 4 + 10 * 4 self.entry = struct.unpack(header.endian + 'L', data[ip:ip+4])[0] break else: raise NotImplementedError('Could not find suitable MachO arch.') else: raise NotImplementedError('Unrecognized file magic: %s' % magic) if not self.info: raise NotImplementedError('Unsupported Unicorn arch: %s' % self.arch) self.bits = self.info['bits'] self.bsz = self.bits / 8 self.sp = self.info['sp'] self.regs = REG_MAP.get(self.arch, []) self.memory = [] self.saved_regs = {} # start Unicorn helpers def mapped(self, addr, size): for a, b in self.memory: b += a if addr < a and addr + size > a: return (a, b) if addr >= a and addr < b: return (a, b) return False def mem_map(self, addr, size): # TODO: this tracking could be replaced by a Unicorn api to get memory map # FIXME: if you overlap with the end of an existing map it will silently fail mapped = self.mapped(addr, size) if mapped: a, b = mapped if addr < a: size = a - addr elif addr < b and addr + size > b: right = addr + size addr = b size = right - addr else: return addr, size = align(addr, size, grow=True) self.memory.append((addr, size)) return self.mu.mem_map(addr, size) def mmap(self, size, addr_hint=0): if not addr_hint: addr_hint = BASE _, size = align(0, size, grow=True) addr_hint, size = align(addr_hint, size) for addr in xrange(addr_hint, 2 ** 32, UC_MEM_ALIGN): if not self.mapped(addr, size): # FIXME: why is this broken without size + 1 self.mem_map(addr, size + 1) return addr else: raise MemoryError('could not allocate %d bytes' % size) def push(self, n): sp = self.reg_read(self.sp) self.reg_write(self.sp, sp - self.bsz) self.mem_write(sp - self.bsz, self.pack_addr(n)) def pop(self): data = self.mem_read(self.reg_read(self.sp), self.bsz) self.reg_write(self.sp, sp + self.bsz) return self.unpack_addr(data) def mem_write(self, addr, data): return self.mu.mem_write(addr, data) def mem_read(self, addr, size): return self.mu.mem_read(addr, size) def mem_read_cstr(self, addr): # FIXME: this might be buggy s = '' while not '\0' in s: s += self.mu.mem_read(addr, 4) addr += 4 return str(s.split('\0', 1)[0]) def reg_write(self, reg, n): return self.mu.reg_write(reg, n) def reg_read(self, reg): return self.mu.reg_read(reg) def mem_hex(self, addr, size): data = binascii.hexlify(self.mem_read(addr, size)) return spaces(data, self.bsz * 2) def read_regs(self): return [(enum, name, self.reg_read(enum)) for enum, name in self.regs] def print_regs(self, regs=None): if regs is None: regs = self.read_regs() for i, (enum, name, val) in enumerate(regs): if i % 4 == 0 and i > 0: print print ('%3s=0x%08x' % (name, val)), print def print_changed_regs(self): regs = self.read_regs() changed = [(enum, name, val) for enum, name, val in regs if self.saved_regs.get(enum) != val] self.print_regs(changed) for enum, name, val in changed: self.saved_regs[enum] = val def print_dis(self, addr, size): mem = self.mem_read(addr, size) print disas(mem, addr, self.info) def pack_addr(self, n): if self.bits == 64: return struct.pack('<Q', n) else: return struct.pack('<I', n) def unpack_addr(self, data): if self.bits == 64: n, = struct.unpack('<Q', data) else: n, = struct.unpack('<I', data) return n # end Unicorn helpers def symbolicate(self, addr): if self.symtab: matches = defaultdict(list) for sym in self.symtab.iter_symbols(): val = sym['st_value'] size = sym['st_size'] if sym['st_info']['type'] == 'STT_FUNC' and val <= addr and val + size > addr: matches[addr - val].append(sym) if matches: # TODO pick the smallest matching symbol? # or indicate when you're inside multiple symbols? dist = sorted(matches.keys())[0] sym = matches[dist][0] return '%s+0x%02x' % (sym.name, dist) return '0x%x' % addr def map_segments(self): if self.elf: for s in self.elf.iter_segments(): addr, size = s['p_paddr'], s['p_memsz'] if not size: continue self.mem_map(addr, size) self.mem_write(addr, s.data()) elif self.macho: for lc, cmd, data in self.macho.commands: if lc.cmd in (mach_o.LC_SEGMENT, mach_o.LC_SEGMENT_64): c = self.fp.tell() for seg in data: self.fp.seek(seg.offset) sd = self.fp.read(seg.size) self.mem_map(seg.addr, seg.size) self.mem_write(seg.addr, sd) self.fp.seek(c) self.stack = self.mmap(STACK_SIZE, STACK_BASE) self.reg_write(self.sp, self.stack + STACK_SIZE - self.bsz) def write_argv(self, argv): size = sum([len(a) + 1 for a in argv]) argv_addr = self.mmap(size) pos = argv_addr + size addrs = [] for arg in reversed(argv): asz = len(arg) + 1 self.mem_write(pos - asz, arg) pos -= asz addrs.append(pos) for addr in [0] + addrs: self.push(addr) return argv_addr # hooks def hook_mem_invalid(self, uc, access, address, size, value, user_data): if access == UC_MEM_WRITE: print(">>> Memory fault on WRITE at 0x%x, data size = %u, data value = 0x%x" % (address, size, value)) self.mem_map(address, 2 * 1024 * 1024) return True else: # stop emulation return False def hook_intr(self, mu, intno, user_data): if intno == 80: if self.arch == 'x64': regs = [X86_REG_RAX, X86_REG_RDI, X86_REG_RSI, X86_REG_RDX, X86_REG_R10, X86_REG_R8, X86_REG_R9] num, a1, a2, a3, a4, a5, a6 = [self.reg_read(r) for r in regs] ret = 0 if num == 0: # SYS_read tmp = os.read(a1, a3) self.mem_write(a2, tmp + '\0') ret = len(tmp) elif num == 1: # SYS_write ret = os.write(a1, self.mem_read(a2, a3)) elif num == 2: # SYS_open ret = os.open(self.mem_read_cstr(a1), a2, a3) elif num == 3: # SYS_close os.close(a1) elif num == 8: # SYS_lseek ret = os.lseek(a1, a2, a3) elif num == 9: # SYS_mmap ret = self.mmap(a2, addr_hint=a1) elif num == 11: # SYS_munmap pass elif num == 60: # SYS_exit sys.exit(a1) else: print 'Unsupported syscall:', num sys.exit(1) self.reg_write(X86_REG_RAX, ret) else: print 'Arch not supported.' sys.exit(1) def hook_block(self, uc, address, size, user_data): name = self.symbolicate(address) print(">>> Basic block at %s, block size = 0x%x <<<" % (name, size)) self.print_changed_regs() def hook_code(self, uc, addr, size, user_data): if size > 128: print 'Makeshift SIGILL' sys.exit(1) print '>', self.print_dis(addr, size) def hook_mem_access(self, uc, access, addr, size, value, user_data): if access == UC_MEM_WRITE: print 'W @0x%x 0x%x = 0x%x' % (addr, size, value) else: print ('R @0x%x 0x%x =' % (addr, size)), self.mem_hex(addr, size) def run(self, *argv): self.mu = Uc(self.info['ucarch'], self.info['ucbits']) self.map_segments() # self.mu.hook_add(UC_HOOK_BLOCK, self.hook_block) # self.mu.hook_add(UC_HOOK_CODE, self.hook_code) self.mu.hook_add(UC_HOOK_INTR, self.hook_intr) self.mu.hook_add(UC_HOOK_MEM_INVALID, self.hook_mem_invalid) # self.mu.hook_add(UC_HOOK_MEM_READ_WRITE, self.hook_mem_access) # put argv into target memory self.push(0) # envp argv_addr = self.write_argv(argv) self.push(len(argv)) # argc argv_size = sum([len(a) + 1 for a in argv]) + self.bsz * (len(argv) + 1) print '[argv]', self.mem_hex(argv_addr, argv_size) print '[entry point]' self.print_dis(self.entry, 64) print '[initial stack]', self.mem_hex(self.reg_read(self.sp), 64) print '=====================================' print '==== Program output begins here. ====' print '=====================================' self.mu.emu_start(self.entry, 0)
class ELF(object): def __init__(self, elf, name='', arch=None): """ This constructor is overloaded and can accept either a string as the parameter 'elf', or a stream to ELF data. 'name' is only used when generating CapDL from the ELF file. """ if isinstance(elf, six.string_types): f = open(elf, 'rb') else: f = elf self._elf = ELFFile(f) self.name = name self._symtab = None self.arch = arch or self.get_arch() def get_entry_point(self): return self._elf['e_entry'] def _get_symbol(self, symbol): # If possible, let elftools do all the work. if hasattr(self._elf, 'get_symbol_by_name'): # From 46ae4bd this functionality is in elftools. sym = self._elf.get_symbol_by_name(symbol) if isinstance(sym, list): # From 9da4c45 get_symbol_by_name returns a list. return sym[0] return sym if self._symtab is None: table = self._elf.get_section_by_name('.symtab') if not table: # This ELF file has been stripped. raise Exception('No symbol table available') self._symtab = dict([(s.name, s) for s in table.iter_symbols()]) return self._symtab.get(symbol) def get_symbol_vaddr(self, symbol): sym = self._get_symbol(symbol) if sym: return sym['st_value'] return None def get_symbol_size(self, symbol): sym = self._get_symbol(symbol) if sym: return sym['st_size'] return None def _safe_name(self): """ Replace characters that the CapDL tools parse differently. """ return re.sub(r'[^A-Za-z0-9]', '_', self.name) def get_arch(self): return self._elf.get_machine_arch() def get_pages(self, infer_asid=True, pd=None, use_large_frames=True): """ Returns a dictionary of pages keyed on base virtual address, that are required to ELF load this file. Each dictionary entry is a dictionary containing booleans 'read', 'write' and 'execute' for the permissions of the page. """ pages = PageCollection(self._safe_name(), self.arch, infer_asid, pd) # Various CAmkES output sections we are expecting to see in the ELF. TYPE = {"ignore": 1, "shared": 2, "persistent": 3, "guarded": 4} regex = re.compile("^(ignore_|shared_|persistent|guarded)"); sections = [x for x in self._elf.iter_sections() if regex.match(_decode(x.name))] for seg in self._elf.iter_segments(): if not seg['p_type'] == 'PT_LOAD': continue if seg['p_memsz'] == 0: continue regions = [{'addr': seg['p_vaddr'], 'size': seg['p_memsz'], 'type': 0}] relevant_sections = filter(seg.section_in_segment, sections) for sec in relevant_sections: region = [x for x in regions if sec['sh_addr'] >= x['addr'] and sec['sh_addr'] < (x['addr'] + x['size'])] assert len(region) == 1 region = region[0] orig_size = region['size'] # Shrink the region to the range preceding this section. region['size'] = sec['sh_addr'] - region['addr'] # Append a region for this section itself and that following # this section. regions += [{'addr': sec['sh_addr'], 'size': sec['sh_size'], 'type': TYPE[_decode(sec.name).split('_')[0]]}, {'addr': sec['sh_addr'] + sec['sh_size'], 'size': orig_size - region['size'] - sec['sh_size'], 'type': 0}] # Remove empty regions. regions[:] = [x for x in regions if x['size'] != 0] r = (seg['p_flags'] & P_FLAGS.PF_R) > 0 w = (seg['p_flags'] & P_FLAGS.PF_W) > 0 x = (seg['p_flags'] & P_FLAGS.PF_X) > 0 # Allocate pages for reg in regions: if reg['type'] in [1, 2, 3, 4]: # A range that must be backed by small pages. vaddr = round_down(reg['addr']) while vaddr < reg['addr'] + reg['size']: pages.add_page(vaddr, r, w, x) vaddr += PAGE_SIZE else: # A range that is eligible for promotion. possible_pages = list(reversed(page_sizes(self.arch))) vaddr = round_down(reg['addr']) remain = reg['addr'] + reg['size'] - vaddr while vaddr < reg['addr'] + reg['size']: size = PAGE_SIZE if use_large_frames: for p in possible_pages: if remain >= p and vaddr % p == 0: size = p break pages.add_page(vaddr, r, w, x, size) vaddr += size remain -= size return pages def get_spec(self, infer_tcb=True, infer_asid=True, pd=None, use_large_frames=True): """ Return a CapDL spec with as much information as can be derived from the ELF file in isolation. """ pages = self.get_pages(infer_asid, pd, use_large_frames) spec = pages.get_spec() if infer_tcb: # Create a single TCB. tcb = TCB('tcb_%s' % self._safe_name(), ip=self.get_entry_point(), elf=self.name) spec.add_object(tcb) tcb['vspace'] = pages.get_page_directory()[1] return spec def __repr__(self): return str(self._elf)
def parse(srcfile): f = open(srcfile,'rb') e = ELFFile(f) print "Current so is under ",e.get_machine_arch() dynamic_table = e.get_section_by_name(".dynamic") for tags in dynamic_table.iter_tags(): if tags["d_tag"] == 'DT_INIT': inittag = tags break ''' union { dptr dval } ''' print "Entry Point : 0x%x"%(inittag["d_ptr"]) for sections in e.iter_sections(): if sections.header["sh_offset"] <= inittag["d_ptr"] <= sections.header["sh_offset"]+ sections.header["sh_size"]: entrysection = sections if sections.header.sh_type== 'SHT_LOUSER': datasection = sections entrysectiondata = entrysection.data() encrypteddata = datasection.data() print "LOUSER offset:0x%x"%(datasection.header.sh_offset) #should i search firstush move and find -0x4 byte -> find the start of info secondsegraw_offset = struct.unpack("<I",entrysectiondata[0x1c:0x1c+0x4])[0] secondsegraw_size = struct.unpack("<I",entrysectiondata[0x1c+0x4:0x1c+0x8])[0] secondsegraw_entry = struct.unpack("<I",entrysectiondata[0x10:0x10+0x4])[0] print hex(secondsegraw_offset +datasection.header.sh_offset ) print hex(secondsegraw_size) secondsegraw = encrypteddata[secondsegraw_offset :secondsegraw_offset+secondsegraw_size ] secondsegdecrypted = "".join(decryptSegment(secondsegraw)) jmpsize = ord(secondsegdecrypted[0]) # print "Jump additional header size:%x" %(jmpsize) segs = struct.unpack("<I",secondsegdecrypted[3*0x4:3*0x4+0x4])[0] #eachseginfosize = 0xc # src 0x4 dst 0x4 srcsize 0x2 dstsize 0x2 siginfostart = struct.unpack("<I",secondsegdecrypted[2*0x4:2*0x4+0x4])[0] #x86 data in the [1] seg, code in the [2] seg #arm data in the [0] seg, code in the [1] seg #but we can just find data seg [-3] for offset data seg [-2] for size # dataseg[-6] is next entry with memoff(next's memoff) if e.get_machine_arch() == 'x86': (src,dst,srcsize,dstsize) = struct.unpack("<IIHH",secondsegdecrypted[siginfostart+ 1*0xc:siginfostart+(1+1)*0xc]) pass elif e.get_machine_arch() == 'ARM': (src,dst,srcsize,dstsize) = struct.unpack("<IIHH",secondsegdecrypted[siginfostart+ 0*0xc:siginfostart+(0+1)*0xc]) pass (todo_offset , todo_size)= struct.unpack("<II",secondsegdecrypted[jmpsize+src+srcsize - 3*0x4:jmpsize+src+srcsize - 0x4]) todo_entry = struct.unpack("<I",secondsegdecrypted[jmpsize+src+srcsize - 6*0x4:jmpsize+src+srcsize - (6-1 )* 0x4])[0] print "Find val:0x%x"%(todo_offset) print "Find val:0x%x"%(todo_size) print "Entry :0x%x"%(todo_entry) todo_raw = encrypteddata[todo_offset :todo_offset+todo_size ] todo_decrypted = "".join(decryptSegment(todo_raw)) todojmpsize = jmpsize = ord(todo_decrypted[0]) for i in range(struct.unpack("<I",todo_decrypted[3*0x4:3*0x4+0x4])[0]): (src,dst,srcsize,dstsize) = struct.unpack("<IIHH",todo_decrypted[struct.unpack("<I",todo_decrypted[2*0x4:2*0x4+0x4])[0]+ i*0xc:struct.unpack("<I",todo_decrypted[2*0x4:2*0x4+0x4])[0]+(i+1)*0xc]) if dst <= todo_entry <= dst+dstsize: print "src:0x%x dst:0x%x srcsize:0x%x dstsize:0x%x"%(src,dst,srcsize,dstsize) print "code in this seg" break if e.get_machine_arch() == 'x86': cs = capstone.Cs(capstone.CS_ARCH_X86, capstone.CS_MODE_32) elif e.get_machine_arch() == 'ARM': cs = capstone.Cs(capstone.CS_ARCH_ARM,capstone.CS_MODE_ARM) if e.get_machine_arch() == 'x86': tmp1 = re.findall('(\x55\x89\xe5.*\xe8....).*\xc9\xc3?', todo_decrypted[todojmpsize+todo_entry - (dst-src): src+srcsize+todojmpsize])[0] for i in cs.disasm(tmp1,todo_entry ): print("0x%x:\t%s\t%s" %(i.address, i.mnemonic, i.op_str)) next_func_entry = int(i.op_str,16) next_func_end = todo_decrypted[next_func_entry+todojmpsize:].find('\xc3') print "next func entry :%x ends :%d"%(next_func_entry,next_func_end) next_func_text = todo_decrypted[next_func_entry - (dst-src)+todojmpsize:todojmpsize+next_func_entry + next_func_end+1] prev2 = None prev1 = None for i in cs.disasm("".join(next_func_text),next_func_entry): print("0x%x:\t%s\t%s\t" %(i.address, i.mnemonic, i.op_str)) if i.id == capstone.x86.X86_INS_JE and prev2.id ==capstone.x86.X86_INS_MOV and prev1.id == capstone.x86.X86_INS_CMP: print "Found" break prev2 = prev1 prev1 = i modifytosegoffset = todojmpsize+ i.address - (dst-src) print "MemAddress:%x"%(i.address) print "Segoffset: %x"%( modifytosegoffset) (newbyte,oldbyte) = getEncryptedFixByte(todo_raw, modifytosegoffset, 0xeb) #write back to modifyrawfilepos = datasection.header.sh_offset + todo_offset+modifytosegoffset print "Modify raw file @0x%x = 0x%x + 0x%x + 0x%x" %(modifyrawfilepos, datasection.header.sh_offset , todo_offset, modifytosegoffset) with open(srcfile,'r+b') as wf: wf.seek(modifyrawfilepos) cmpbyte = wf.read(1) assert cmpbyte == oldbyte wf.seek(modifyrawfilepos) wf.write(newbyte) print "DONE!!!" elif e.get_machine_arch() == 'ARM': #push {fp, lr} --> bl xxx --> pop {fp, pc} tmp1 = re.findall('(\x00\x48\x2d\xe9.*...\xeb).*\x00\x88\xbd\xe8?', todo_decrypted[todojmpsize+todo_entry - (dst-src): src+srcsize+todojmpsize])[0] for i in cs.disasm(tmp1,todo_entry ): print("0x%x:\t%s\t%s" %(i.address, i.mnemonic, i.op_str)) next_func_entry = int(i.op_str[1:],16) #remove trailing '#' next_func_end = todo_decrypted[next_func_entry+todojmpsize:].find('\x00\x88\xbd\xe8') print "next func entry :%x ends :%d"%(next_func_entry,next_func_end) next_func_text = todo_decrypted[next_func_entry - (dst-src)+todojmpsize:todojmpsize+next_func_entry + next_func_end+4] prev1 = None for i in cs.disasm("".join(next_func_text),next_func_entry): print("0x%x:\t%s\t%s\t" %(i.address, i.mnemonic, i.op_str)) if i.id == capstone.arm.ARM_INS_B and prev1.id == capstone.arm.ARM_INS_CMP: print "Found" break prev1 = i modifytosegoffset = todojmpsize+ i.address - (dst-src) +0x3 # cause operand in th end print "MemAddress:%x"%(i.address) print "Segoffset: %x"%( modifytosegoffset) (newbyte,oldbyte) = getEncryptedFixByte(todo_raw, modifytosegoffset, 0xea) # beq(0a) -> b(ea) #write back to modifyrawfilepos = datasection.header.sh_offset + todo_offset+modifytosegoffset print "Modify raw file @0x%x = 0x%x + 0x%x + 0x%x" %(modifyrawfilepos, datasection.header.sh_offset , todo_offset, modifytosegoffset) with open(srcfile,'r+b') as wf: wf.seek(modifyrawfilepos) cmpbyte = wf.read(1) assert cmpbyte == oldbyte wf.seek(modifyrawfilepos) wf.write(newbyte) print "DONE!!!"
def main(): filename = sys.argv[1] elf = ELFFile(file(filename)) print('[II] Object %s is a %s_%s elf' % (filename, elf.get_machine_arch(), elf.elfclass)) assert elf.elfclass == 64 and elf.get_machine_arch() == 'x64' print "[II] Elf has %d sections."% elf.num_sections() selected_sections = [] for section_prefix in ['.text', '.data', '.rodata', '.bss']: for section in elf.iter_sections(): if section.name.startswith(section_prefix): selected_sections.append(section.name) offsets = {} shellcode = StringIO('') for section_name in selected_sections: offsets[section_name] = shellcode.len try: s = elf.get_section_by_name(section_name) if s['sh_type'] == 'SHT_NOBITS': data = chr(0) * s['sh_size'] else: data = elf.get_section_by_name(section_name).data() print "[II] Section %s is %d bytes offset %d"%(section_name,len(data),offsets[section_name]) except: data = '' print '[WW] No %s section'%section_name shellcode.write(data) # padding to 16 shellcode.write(chr(0) * (16-shellcode.len % 16)) print "[II] Total packed data size %d" % shellcode.len relocs = [] for section_name in selected_sections: reloc_section = find_relocations_for_section(elf, section_name) if reloc_section is None: continue symtab = elf.get_section(reloc_section['sh_link']) for reloc in reloc_section.iter_relocations(): #print reloc #assert elf.get_machine_arch() == 'x64' and not reloc.is_RELA() assert elf.get_machine_arch() == 'x64' and reloc.is_RELA() reloc_base = offsets[section_name] reloc_offset = reloc['r_offset'] reloc_type = reloc['r_info_type'] target_symbol = symtab.get_symbol(reloc['r_info_sym']) target_name = elf.get_section(target_symbol['st_shndx']).name target_base = offsets[target_name] target_offset = target_symbol['st_value'] shellcode.seek(reloc_base+reloc_offset) value = struct.unpack("<l",shellcode.read(4))[0] #+ reloc['r_addend'] #print "RELOC:",section_name, '0x%x' % reloc_base, '0x%x' % reloc_offset, "=>", target_name, '0x%x' % target_base,'0x%x' % target_offset, value, '(%s)' % target_symbol.name if reloc_type == ENUM_RELOC_TYPE_x64['R_X86_64_32']: value = target_base + target_offset + value + reloc['r_addend'] relocs.append(reloc_base+reloc_offset) print "[II] Offset ",reloc_base+reloc_offset, "added to reloc list" elif reloc_type == ENUM_RELOC_TYPE_x64['R_X86_64_PC32']: value = (target_base + target_offset) - (reloc_base + reloc_offset) + value + reloc['r_addend'] elif reloc_type == ENUM_RELOC_TYPE_x64['R_X86_64_32S']: value = target_base + target_offset + value+ reloc['r_addend'] relocs.append(reloc_base+reloc_offset) else: assert reloc_type == ENUM_RELOC_TYPE_x64['R_X86_64_NONE'] shellcode.seek(reloc_base + reloc_offset) shellcode.write(struct.pack("<L",value&0xffffffff)) shellcode.seek(shellcode.len) def to_c_array(s): if len(s) % 4 != 0: s += chr(0) * (4 - len(s) % 4) bs = map(ord, s) result = '' for i in range(0, len(bs), 8): result += ' ' + ''.join(' 0x%02x,' % b for b in bs[i:i+8]) + '\n' return result def to_c_array2(arr): result = '' for i in range(0, len(arr), 10): result += ' ' + ''.join(' %d,' % x for x in arr[i:i+10]) + '\n' return result with file('bot_opt.cc', 'w') as fp: bss_size = elf.get_section_by_name('.bss')['sh_size'] assert shellcode.getvalue()[-bss_size:] == chr(0) * bss_size pagesize = 4096 fp.write('''#include <sys/mman.h> #include "bot_opt.h" static unsigned char code[%d] __attribute__((aligned(4096))) = { %s}; static int patch[] = { %s}; ''' % ( (shellcode.len + pagesize-1) / pagesize * pagesize, to_c_array(shellcode.getvalue()[:-bss_size]), to_c_array2(relocs), )) #fp.write('reloc %d\n' % rel) fp.write(''' void load_code() { if ((uintptr_t)code > 0xffffffffull) return; mprotect(code, sizeof(code), PROT_READ|PROT_WRITE|PROT_EXEC); for (unsigned int i = 0; i < sizeof(patch)/sizeof(patch[0]); i++) { *(uint32_t*)(void*)(code + patch[i]) += (uintptr_t)code; } ''') #with file('bot_opt.bin', 'wb') as fp: # fp.write(shellcode.getvalue()) # export symbols for entry in ( 'root_search_move', 'init_bot', 'max_lookahead', 'maybe_dead_threshold', 'search_threshold', 'cache1_clear', ): symbol = None for s in elf.get_section_by_name('.symtab').iter_symbols(): if s.name == entry: symbol = s assert symbol section = elf.get_section(symbol['st_shndx']).name base = offsets[section] offset = symbol['st_value'] start = base + offset print section, entry, start if section == '.text': fp.write(' %s_func = (%s_func_t)((char*)code + %d);\n' % ( entry, entry, start)) else: fp.write(' %s_ptr = (%s_ptr_t)((char*)code + %d);\n' % ( entry, entry, start)) #fp.write('%s %s %d\n' % (section, entry, start)) fp.write('''} ''')
class Disassembler(): memory = [] def __init__(self, filename): self.filename = filename self.loadELF(filename) def readMemory(self, address, size): for vaddr, foffset, memsize, mem in self.memory: if address >= vaddr and address <= vaddr + memsize: if size: return mem[address - vaddr : address - vaddr + size] else: return mem[address - vaddr:] return "" def writeMemory(self, address, data): offset = self.addr2offset(address) for idx, (vaddr, foffset, memsize, mem) in enumerate(self.memory): if offset >= foffset and offset <= foffset + memsize: mem=list(mem) for i in range(0, len(data)): if offset - foffset + i < len(mem): mem[offset - foffset + i] = data[i] else: mem.append(data[i]) memsize+=1 self.memory[idx] = (vaddr, foffset, memsize, ''.join(mem)) def addr2offset(self, address): for vaddr, foffset, memsize, mem in self.memory: if address >= vaddr and address <= vaddr + memsize: return address - vaddr + foffset return -1 def loadELF(self, filename): try: self.elf = ELFFile(file(sys.argv[1])) except: print "[-] It is not ELF file: "+sys.argv[1] sys.exit() self.arch = self.elf.get_machine_arch() if self.arch == 'ARM': self.arm_arch = self.get_tag_cpu_arch() # Load code segments for elf_segment in self.elf.iter_segments(): if elf_segment.header.p_type != 'PT_LOAD': continue align = 0x1000 ELF_PAGEOFFSET = elf_segment.header.p_vaddr & (align-1) memsz = elf_segment.header.p_memsz + ELF_PAGEOFFSET offset = elf_segment.header.p_offset - ELF_PAGEOFFSET filesz = elf_segment.header.p_filesz + ELF_PAGEOFFSET vaddr = elf_segment.header.p_vaddr - ELF_PAGEOFFSET memsz = (memsz + align ) & ~(align-1) with open(sys.argv[1], 'rb') as f: f.seek(offset, 0) data = f.read(filesz) self.memory.append((vaddr, offset, memsz, data)) self.entry = self.elf.header.e_entry # Load symbol table self.symtab = dict() self.thumbtab = list() for section in self.elf.iter_sections(): if isinstance(section, SymbolTableSection): for symbol in section.iter_symbols(): if symbol['st_info']['type'] == 'STT_FUNC': if self.isThumb(symbol['st_value']): self.symtab[symbol['st_value'] - 1] = symbol.name else: self.symtab[symbol['st_value']] = symbol.name elif self.arch == 'ARM' and symbol['st_info']['type'] == 'STT_NOTYPE': if symbol.name == '$t': # Thumb self.thumbtab.append((symbol['st_value'], True)) elif symbol.name == '$a': #ARM self.thumbtab.append((symbol['st_value'], False)) self.thumbtab.sort(key=lambda tup: tup[0]) text_section = self.elf.get_section_by_name(b'.text') self.text = text_section.data() self.text_addr = text_section['sh_addr'] self.text_size = text_section['sh_size'] arch = {'x86':CS_ARCH_X86,'x64':CS_ARCH_X86, 'ARM':CS_ARCH_ARM}[self.arch] mode = {'x86':CS_MODE_32, 'x64':CS_MODE_64, 'ARM':CS_MODE_ARM}[self.arch] self.md = Cs(arch, mode) if self.arch == 'ARM': self.t_md = Cs(arch, CS_MODE_THUMB) def disasm(self, address, size=None): if self.arch == 'ARM': disasms = [] thumb = False if (address & 1) == 1: thumb = True address = address & -2 for addr, isThumb in self.thumbtab: if address < addr: if thumb: disasms.extend([(i, True) for i in self.t_md.disasm(self.readMemory(address, addr-address), address)]) else: disasms.extend([(i, False) for i in self.md.disasm(self.readMemory(address, addr-address), address)]) address = addr thumb = isThumb return disasms else: return [(i, False) for i in self.md.disasm(self.readMemory(address, size), address)] def save(self): def saveBinary(filename): def saveBinaryYes(yn, filename): if yn == 'y': try: original_binary = open(self.filename, 'rb').read() f = open(filename, 'wb') f.write(original_binary) for vaddr, foffset, memsize, mem in self.memory: f.seek(foffset, 0) f.write(mem) f.close() os.chmod(filename, 0755) return "Successfully save to '%s'" % filename except Exception, e: return "Fail to save binary: "+str(e) return "Fail to save binary" if os.path.exists(filename): return (filename+" already exists, Overwrite?", saveBinaryYes, filename) else: return saveBinaryYes('y', filename) signals.set_prompt.send(self, text="Save to (filename): ", callback=saveBinary)
class ImageInfo(object): ## # Initialize internals. # # @param ImagePath file path to image # def __init__(self, ImagePath): # internals self._Path = ImagePath self._Handle = None self._SizeBytes = 0 self._Elf = None self._IsExecutable = False self._Sections = {} self._SectionsFast = None self._Segments = [] self._Strings = {} self._Symbols = {} self._TextInstructions = {} self._PLTInstructions = {} self._SymbolsFast = None self._TextSection = None self._PLTSection = None self._SymbolTable = None self._StringTable = None ## # Get infos about the given image. # # @return none # def parseImage(self): # basic stats self._SizeBytes = os.path.getsize(self._Path) # open self._Handle = open(self._Path, 'rb') self._Elf = ELFFile(self._Handle) # executable if self._Elf['e_type'] == 'ET_EXEC': self._IsExecutable = True # header string table hdstrtbl = None cnt = 0 for sec in self._Elf.iter_sections(): if (sec['sh_type'] == 'SHT_STRTAB') and (self._Elf['e_shstrndx'] == cnt): hdstrtbl = sec break cnt += 1 if hdstrtbl is None: raise Exception("[ERROR] Could not find header string table!") # register segments for seg in self._Elf.iter_segments(): self._Segments.append(seg) # register sections for sec in self._Elf.iter_sections(): curname = hdstrtbl.get_string(sec['sh_name']) if (sec['sh_addr'] != 0): cursec = SectionInfo() cursec._Name = curname cursec._Addr = sec['sh_addr'] cursec._Size = sec['sh_size'] cursec._Obj = sec self._Sections[cursec._Addr] = cursec # special sections secnames = [] for sec in self._Elf.iter_sections(): if sec['sh_size'] > 0: secnames.append(hdstrtbl.get_string(sec['sh_name'])) if ('.text' not in secnames): raise Exception("[ERROR] No text section found!") if (('.symtab' not in secnames) and ('.dynsym' not in secnames)): raise Exception("[ERROR] No symbol table found!") if (('.strtab' not in secnames) and ('.dynstr' not in secnames)): raise Exception("[ERROR] No string table found!") usedebugtables = (('.symtab' in secnames) and ('.strtab' in secnames)) # register special sections for sec in self._Elf.iter_sections(): if sec['sh_size'] > 0: cursec = SectionInfo() cursec._Name = hdstrtbl.get_string(sec['sh_name']) cursec._Addr = sec['sh_addr'] cursec._Size = sec['sh_size'] cursec._Obj = sec if cursec._Name == '.text': self._TextSection = cursec elif cursec._Name == '.plt': self._PLTSection = cursec elif (cursec._Name == '.symtab') and usedebugtables: self._SymbolTable = cursec elif (cursec._Name == '.strtab') and usedebugtables: self._StringTable = cursec elif (cursec._Name == '.dynsym') and not usedebugtables: self._SymbolTable = cursec elif (cursec._Name == '.dynstr') and not usedebugtables: self._StringTable = cursec # sanity check if (self._TextSection is None): raise Exception("[ERROR] Could not assign text section!") if (self._PLTSection is None): raise Exception("[ERROR] Could not assign plt section!") if (self._SymbolTable is None): raise Exception("[ERROR] Could not assign symbol table!") if (self._StringTable is None): raise Exception("[ERROR] Could not assign string table!") # parse strings binstr = self._StringTable._Obj.data() binstrdec = binstr.decode() curstart = 0 for cmatch in re.finditer('\x00', binstrdec): curstr = binstr[curstart:cmatch.start()].decode("utf-8") if curstr != "": self._Strings[curstart] = curstr curstart = cmatch.start() + 1 self._Strings[0] = '' # register symbols for symb in self._SymbolTable._Obj.iter_symbols(): if (symb['st_value'] != 0) and \ (symb['st_info']['type'] != 'STT_SECTION') and \ (symb['st_info']['type'] != 'STT_FILE') and \ (symb['st_info']['type'] != 'STT_NOTYPE') and \ (symb['st_info']['bind'] != 'STB_LOCAL'): # new symbol cursymb = SymbolInfo() cursymb._Name = symb.name cursymb._Addr = symb['st_value'] cursymb._Size = symb['st_size'] cursymb._Type = symb['st_info']['type'] cursymb._Obj = symb # fix name if cursymb._Name == '': cursymb._Name = '0x%08x' % cursymb._Addr # safe add if cursymb._Addr in self._Symbols.keys(): if sys.stdout.isatty(): print ("[INFO] Symbols with same start addr: new=%s and old=%s" \ % (cursymb._Name, self._Symbols[cursymb._Addr]._Name)) if cursymb._Size == self._Symbols[cursymb._Addr]._Size: self._Symbols[cursymb._Addr]._Name += ("+%s" % cursymb._Name) elif cursymb._Size > self._Symbols[cursymb._Addr]._Size: cursymb._Name += ("+%s(len=%d)" % \ (self._Symbols[cursymb._Addr]._Name, \ self._Symbols[cursymb._Addr]._Size)) self._Symbols[cursymb._Addr] = cursymb elif cursymb._Size < self._Symbols[cursymb._Addr]._Size: self._Symbols[cursymb._Addr]._Name += ("+%s(len=%d)" % \ (cursymb._Name, \ cursymb._Size)) else: self._Symbols[cursymb._Addr] = cursymb # prune overlay functions ksort = sorted(self._Symbols.keys()) krem = [] for i in range(0, len(ksort)-1): if ((self._Symbols[ksort[i]]._Addr + self._Symbols[ksort[i]]._Size) > \ self._Symbols[ksort[i+1]]._Addr) and \ ((self._Symbols[ksort[i]]._Addr + self._Symbols[ksort[i]]._Size) == \ (self._Symbols[ksort[i+1]]._Addr + self._Symbols[ksort[i+1]]._Size)): krem.append((ksort[i], ksort[i+1])) for k in krem: if sys.stdout.isatty(): print ("[INFO] Pruning overlay function %s." % self._Symbols[k[1]]._Name) self._Symbols[k[0]]._Name += ("+%s(%d)" % \ (self._Symbols[k[1]]._Name, k[1]-k[0])) self._Symbols.pop(k[1]) # fast access self._SectionsFast = numpy.zeros(len(self._Sections), \ dtype=numpy.dtype([('Start', numpy.uintp, 1), \ ('Size', numpy.uintp, 1)])) ksort = sorted(self._Sections.keys()) for i in range(0, len(self._Sections)): self._SectionsFast[i]['Start'] = self._Sections[ksort[i]]._Addr self._SectionsFast[i]['Size'] = self._Sections[ksort[i]]._Size self._SymbolsFast = numpy.zeros(len(self._Symbols), \ dtype=numpy.dtype([('Start', numpy.uintp, 1), \ ('Size', numpy.uintp, 1)])) ksort = sorted(self._Symbols.keys()) for i in range(0, len(self._Symbols)): self._SymbolsFast[i]['Start'] = self._Symbols[ksort[i]]._Addr self._SymbolsFast[i]['Size'] = self._Symbols[ksort[i]]._Size # consistency check for i in range(0, len(self._SectionsFast)-1): if self._SectionsFast[i]['Start'] + self._SectionsFast[i]['Size'] > \ self._SectionsFast[i+1]['Start']: raise Exception('[ERROR] Inconsistent section placement!') for i in range(0, len(self._SymbolsFast)-1): if self._SymbolsFast[i]['Start'] + self._SymbolsFast[i]['Size'] > \ self._SymbolsFast[i+1]['Start']: raise Exception('[ERROR] Inconsistent symbol placement: %s -> %s!' % \ (self._Symbols[self._SymbolsFast[i]['Start']]._Name, \ self._Symbols[self._SymbolsFast[i+1]['Start']]._Name)) # set up disassembler if 'x64' in self._Elf.get_machine_arch().lower(): md = capstone.Cs(capstone.CS_ARCH_X86, capstone.CS_MODE_64) elif 'x86' in self._Elf.get_machine_arch().lower(): md = capstone.Cs(capstone.CS_ARCH_X86, capstone.CS_MODE_32) elif 'arm' in self._Elf.get_machine_arch().lower(): md = capstone.Cs(capstone.CS_ARCH_ARM, capstone.CS_MODE_ARM) elif 'aarch64' in self._Elf.get_machine_arch().lower(): md = capstone.Cs(capstone.CS_ARCH_ARM64, capstone.CS_MODE_ARM + \ capstone.CS_MODE_V8) else: raise Exception("[ERROR] Image architecture currently not supported!") md.skipdata = True # parse .text section instructions = md.disasm_lite(self._TextSection._Obj.data(), \ self._TextSection._Addr) for (address, size, mnemonic, op_str) in instructions: self._TextInstructions[address] = (size, "%s\t%s" % (mnemonic, op_str)) # parse .plt instructions instructions = md.disasm_lite(self._PLTSection._Obj.data(), \ self._PLTSection._Addr) for (address, size, mnemonic, op_str) in instructions: self._PLTInstructions[address] = (size, "%s\t%s" % (mnemonic, op_str)) ## # Get section from given address. # # @param Address address within image # @return the section of the address (None if error) # def getSection(self, Address): # find idx = numpy.argwhere(self._SectionsFast[:]['Start'] <= Address).flatten() if len(idx) == 0: return None # check if Address < self._SectionsFast[idx[-1]]['Start'] + \ self._SectionsFast[idx[-1]]['Size']: return (self._Sections[self._SectionsFast[idx[-1]]['Start']]) else: return None ## # Get symbol from given address. # # @param Address address within image # @return the symbol of the address (None if error) # def getSymbol(self, Address): # find idx = numpy.argwhere(self._SymbolsFast[:]['Start'] <= Address).flatten() if len(idx) == 0: return None # check if Address < self._SymbolsFast[idx[-1]]['Start'] + \ self._SymbolsFast[idx[-1]]['Size']: return (self._Symbols[self._SymbolsFast[idx[-1]]['Start']]) else: return None ## # Get instruction from given address. # # @param Address address within image # @return size of instr. and assembly code (None if error) # def getInstruction(self, Address): # get section sec = self.getSection(Address) if sec is None: return None # search if sec._Name == '.text': if Address in self._TextInstructions.keys(): return (self._TextInstructions[Address]) elif sec._Name == '.plt': if Address in self._PLTInstructions.keys(): return (self._PLTInstructions[Address]) # error return None