def verify_symbol(self, file_path, symbol): """ Given a file path, verify that a given symbol exists within that file. Return True if the symbol exists, otherwise return False @note - Primarily used to validate SP0 executables @param file_path: Path to executable file to check @param symbol: Name of symbol to verify within executable file """ if self.is_param_none(symbol): log.error("Symbol not specified.") return False status = False if os.path.isfile(file_path): with open(file_path, 'rb') as file: elffile = ELFFile(file) log.debug('{} sections in file {}'.format( elffile.num_sections(), file_path)) section = elffile.get_section_by_name('.symtab') if not section: log.error("Invalid ELF file no symbol table found.") status = False elif isinstance(section, SymbolTableSection): status = any(s.name == symbol for s in section.iter_symbols()) if not status: log.error( "Section does not have the symbol: {}".format( symbol)) else: log.error("File {} does not exist.".format(file_path)) return status
def init_mem(self): print("init_mem") sw_image = cocotb.plusargs["SW_IMAGE"] with open(sw_image, "rb") as f: elffile = ELFFile(f) symtab = elffile.get_section_by_name('.symtab') begin_signature = symtab.get_symbol_by_name("begin_signature")[0]["st_value"] end_signature = symtab.get_symbol_by_name("end_signature")[0]["st_value"] addr = begin_signature # Find the section that contains the data we need section = None for i in range(elffile.num_sections()): shdr = elffile._get_section_header(i) if begin_signature >= shdr['sh_addr'] and begin_signature <= (shdr['sh_addr'] + shdr['sh_size']): section = elffile.get_section(i) begin_signature_offset = begin_signature - shdr['sh_addr'] break data = section.data() for addr in range(begin_signature, end_signature, 4): word = ( (data[begin_signature_offset+0] << (8*0)) | (data[begin_signature_offset+1] << (8*1)) | (data[begin_signature_offset+2] << (8*2)) | (data[begin_signature_offset+3] << (8*3)) ); self.mem[(addr & 0xFFFF) >> 2] = word begin_signature_offset += 4
def section_info_highlevel(stream): print('High level API...') elffile = ELFFile(stream) # Just use the public methods of ELFFile to get what we need # Note that section names, like everything read from the file, are bytes # objects. print(' %s sections' % elffile.num_sections()) section = elffile.get_section_by_name(b'.symtab') if not section: print(' No symbol table found. Perhaps this ELF has been stripped?') return # A section type is in its header, but the name was decoded and placed in # a public attribute. # bytes2str is used to print the name of the section for consistency of # output between Python 2 and 3. The section name is a bytes object. print(' Section name: %s, type: %s' % (bytes2str(section.name), section['sh_type'])) # But there's more... If this section is a symbol table section (which is # the case in the sample ELF file that comes with the examples), we can # get some more information about it. if isinstance(section, SymbolTableSection): num_symbols = section.num_symbols() print(" It's a symbol section with %s symbols" % num_symbols) print(" The name of the last symbol in the section is: %s" % (bytes2str(section.get_symbol(num_symbols - 1).name)))
def collect_needed(self, sofile): try: with open(sofile, 'rb') as f: try: elffile = ELFFile(f) # we try to avoid superfluous work by not calling # elffile.itersections() directly # Instead we use the lower level API and continue # if the section type is not SHT_DYNAMIC # We can thus avoid to construct Section objects for i in range(elffile.num_sections()): section_header = elffile._get_section_header(i) sectype = section_header['sh_type'] if sectype != 'SHT_DYNAMIC': continue name = elffile._get_section_name(section_header) section = DynamicSection(section_header, name, elffile.stream, elffile) for tag in section.iter_tags('DT_NEEDED'): self.lib2required_by[tag.needed].append(sofile) break # there should only be one dyanmic section except ELFError: pass # not an ELF file except PermissionError: warn("Could not open {}; please check permissions".format(sofile))
class ElfParser(object): def __init__(self, f): self.elffile = ELFFile(open(f, "rb")) self.text_section = self.elffile.get_section_by_name(".text") self.code = self.text_section.data() self.code_len = len(self.code) self.text_offset = self.text_section.header.sh_addr self.funcs = FunctionsList() self.init_functions_list() def get_functions_list(self): return self.funcs def get_code_and_funcs(self): return self.get_binary_code(), self.get_functions_list() def get_functions_num(self): return len(funcs) def get_code_len(self): return self.code_len def get_binary_code(self): return self.code def get_section_idx(self, section): for i in xrange(self.elffile.num_sections()): if self.elffile.get_section(i) == section: return i def va_to_offset(self, va): return va - self.text_offset def offset_to_va(self, offset): return offset + self.text_offset @staticmethod def is_function_symbol(symbol, section_idx): if symbol.entry.st_info.type == "STT_FUNC": if symbol.entry.st_shndx == section_idx: if symbol.entry.st_size > 0: return True return False def init_functions_list(self): symtab = self.elffile.get_section_by_name(".symtab") text_section_idx = self.get_section_idx(self.text_section) if not isinstance(symtab, SymbolTableSection): raise Exception for symbol in symtab.iter_symbols(): if self.is_function_symbol(symbol, text_section_idx): sym_offset = self.va_to_offset(symbol.entry.st_value) self.funcs.append(symbol.name, sym_offset, symbol.entry.st_size) def print_functions_list(self): print "%-30s\t%8s\t%8s" % ("Name", "Offset", "Size") print "-" * 58 for func in self.funcs: print func
def dump_sections(self): with open(self.filename, "rb") as f: elf = ELFFile(f) sections = [] for i in range(elf.num_sections()): sec = elf.get_section(i) sections.append(sec) return sections
def open(io): elf_o = ELFFile(io) info('parsed elf file with %s sections and %s segments' % (elf_o.num_sections(), elf_o.num_segments())) arch = sefi.arch.from_elf_machine_arch(elf_o.get_machine_arch()) info(' elf file arch is %s' % (arch)) return (elf_o, arch)
def print_basic_info(filename: str) -> None: with open(filename, "rb") as f: elffile = ELFFile(f) # ELF object # variables sections = "" debug = RED + "No" + RESET fileMD5 = file_MD5sum(filename) filesha1 = file_sha1sum(filename) filesha256 = file_sha256sum(filename) fileSSDEEP = file_ssdeepsum(filename) vtlink = tinyurl("https://www.virustotal.com/gui/file/" + filesha256) # logic if not vtlink: vtlink = "https://www.virustotal.com/gui/file/" + filesha256 for x in range(elffile.num_sections()): if len(elffile.get_section(x).name) > 0: sections += "{}{} {}({}) ".format( GREEN, elffile.get_section(x).name, RESET, hex(elffile.get_section(x).data_size)) if x % 4 == 0 and x > 0: sections += "\n" if not sections: sections = RED + "No sections found" + RESET # has debug info? if elffile.has_dwarf_info(): debug = GREEN + "Yes" + RESET info_table = [ ["Filename:", filename], ["Filesize:", file_size(filename)], [ "Filetype:", GREEN + "ELF " + str(elffile.get_machine_arch()) + RESET ], [ "Subsystem:", GREEN + describe_e_type(elffile.header['e_type']) + RESET ], ["MD5: ", fileMD5], ["SHA1: ", filesha1], ["SHA256: ", filesha256], ["SSDEEP:", fileSSDEEP], ["VT link:", vtlink], ["Symbols:", debug], ["Entropy:", str(file_entropy(filename))], ["Sections:\n(with size)", sections], ["Entrypoint:", "{}".format(hex(elffile.header["e_entry"]))] ] print("") print( AsciiTable( title="Basic Information", table_data=info_table, ).table) print("")
def test_hello(self): with open(os.path.join('test', 'testfiles_for_unittests', 'simple_gcc.elf.arm'), 'rb') as f: elf = ELFFile(f) self.assertEqual(elf.get_machine_arch(), 'ARM') # Check some other properties of this ELF file derived from readelf self.assertEqual(elf['e_entry'], 0x8018) self.assertEqual(elf.num_sections(), 14) self.assertEqual(elf.num_segments(), 2)
async def test(top): await pybfms.init() u_bram = pybfms.find_bfm(".*u_bram") u_dbg_bfm: RiscvDebugBfm = pybfms.find_bfm(".*u_dbg_bfm") u_uart_bfm: UartBfm = pybfms.find_bfm(".*u_uart_bfm") uart_bfm_sw: UartBfmSwAPI = UartBfmSwAPI([u_uart_bfm]) sw_image = cocotb.plusargs["sw.image"] u_dbg_bfm.load_elf(sw_image) u_dbg_bfm.register_export_api(UartBfmSwAPI) u_dbg_bfm.set_export_impl(UartBfmSwAPI, uart_bfm_sw) print("Note: loading image " + sw_image) with open(sw_image, "rb") as f: elffile = ELFFile(f) symtab = elffile.get_section_by_name('.symtab') # Find the section that contains the data we need section = None for i in range(elffile.num_sections()): shdr = elffile._get_section_header(i) # print("sh_addr=" + hex(shdr['sh_addr']) + " sh_size=" + hex(shdr['sh_size']) + " flags=" + hex(shdr['sh_flags'])) # print(" keys=" + str(shdr.keys())) if shdr['sh_size'] != 0 and (shdr['sh_flags'] & 0x2) == 0x2: section = elffile.get_section(i) data = section.data() addr = shdr['sh_addr'] j = 0 while j < len(data): word = (data[j + 0] << (8 * 0)) word |= (data[j + 1] << (8 * 1)) if j + 1 < len(data) else 0 word |= (data[j + 2] << (8 * 2)) if j + 2 < len(data) else 0 word |= (data[j + 3] << (8 * 3)) if j + 3 < len(data) else 0 # print("Write: " + hex(addr) + "(" + hex(int((addr & 0xFFFFF)/4)) + ") " + hex(word)) u_bram.write_nb(int((addr & 0xFFFFF) / 4), word, 0xF) addr += 4 j += 4 # Wait for the main function to exit print("--> wait main") await u_dbg_bfm.on_exit("main") print("<-- wait main") # Wait for all objections to be dropped await pybfms.objection.inst().wait()
def check(self): status = True sw_image = hpi.get_plusarg("SW_IMAGE") print("SW_IMAGE=" + sw_image) with open(sw_image, "rb") as f: elffile = ELFFile(f) symtab = elffile.get_section_by_name('.symtab') start_expected = symtab.get_symbol_by_name( "start_expected")[0]["st_value"] end_expected = symtab.get_symbol_by_name( "end_expected")[0]["st_value"] section = None for i in range(elffile.num_sections()): shdr = elffile._get_section_header(i) if (start_expected >= shdr['sh_addr']) and ( end_expected <= (shdr['sh_addr'] + shdr['sh_size'])): start_expected -= shdr['sh_addr'] end_expected -= shdr['sh_addr'] section = elffile.get_section(i) break data = section.data() exp_l = [] for i in range(start_expected, end_expected, 8): reg = data[i + 0] | (data[i + 1] << 8) | ( data[i + 2] << 16) | (data[i + 3] << 24) exp = data[i + 4] | (data[i + 5] << 8) | ( data[i + 6] << 16) | (data[i + 7] << 24) exp_l.append([reg, exp]) # Now, check results for exp in exp_l: print("Expect: R[" + str(exp[0]) + "] = " + str(exp[1])) for exp in exp_l: if not self.regs[exp[0]][1]: print("Error: R[" + str(exp[0]) + "] not written") status = False if self.regs[exp[0]][0] != exp[1]: print("Error: R[" + str(exp[0]) + "] has unexpected value") return status
def process(self, data): try: elf = ELFFile(io.BytesIO(data)) except Exception: elf = None if not elf: raise ValueError('unable to parse input as ELF file') if not self.args.offset.section: return data[self._slice(self._data_offset(elf, self.args.offset.address))] for k in range(elf.num_sections()): section = elf.get_section(k) if self.args.offset.section == section.name: section_data = section.get_data() return section_data[self._slice(self.args.offset.address)] else: raise ValueError(F'unable to find section {self.args.offset.section}')
def check(self): reg_data = [] sw_image = cocotb.plusargs["SW_IMAGE"] testname = cocotb.plusargs["TESTNAME"] print("SW_IMAGE=" + sw_image) with open(sw_image, "rb") as f: elffile = ELFFile(f) symtab = elffile.get_section_by_name('.symtab') start_expected = symtab.get_symbol_by_name( "start_expected")[0]["st_value"] end_expected = symtab.get_symbol_by_name( "end_expected")[0]["st_value"] section = None for i in range(elffile.num_sections()): shdr = elffile._get_section_header(i) if (start_expected >= shdr['sh_addr']) and ( end_expected <= (shdr['sh_addr'] + shdr['sh_size'])): start_expected -= shdr['sh_addr'] end_expected -= shdr['sh_addr'] section = elffile.get_section(i) break data = section.data() exp_l = [] for i in range(start_expected, end_expected, 8): reg = data[i + 0] | (data[i + 1] << 8) | ( data[i + 2] << 16) | (data[i + 3] << 24) exp = data[i + 4] | (data[i + 5] << 8) | ( data[i + 6] << 16) | (data[i + 7] << 24) exp_l.append([reg, exp]) for i in range(64): info = yield self.tracer_bfm.get_reg_info(i) reg_data.append(info) if not self.complete: print("FAIL: " + testname) else: print("PASS: " + testname)
def test_basic(self): with open( os.path.join('test', 'testfiles_for_unittests', 'simple_gcc.elf.mips'), 'rb') as f: elf = ELFFile(f) self.assertEqual(elf.get_machine_arch(), 'MIPS') # Check some other properties of this ELF file derived from readelf self.assertEqual(elf['e_entry'], 0x0) self.assertEqual(elf.num_sections(), 25) self.assertEqual(elf.num_segments(), 0) # Test that Mips-specific section types work; these types are # available only when the file is identified as MIPS in the # e_machine header field. sec9 = elf.get_section(9) self.assertEqual(sec9['sh_type'], 'SHT_MIPS_DWARF')
async def test(top): await pybfms.init() u_bram = pybfms.find_bfm(".*u_bram") u_dbg_bfm: RiscvDebugBfm = pybfms.find_bfm(".*u_dbg_bfm") sw_image = cocotb.plusargs["sw.image"] u_dbg_bfm.load_elf(sw_image) print("Note: loading image " + sw_image) with open(sw_image, "rb") as f: elffile = ELFFile(f) # Find the section that contains the data we need section = None for i in range(elffile.num_sections()): shdr = elffile._get_section_header(i) # print("sh_addr=" + hex(shdr['sh_addr']) + " sh_size=" + hex(shdr['sh_size']) + " flags=" + hex(shdr['sh_flags'])) # print(" keys=" + str(shdr.keys())) print("sh_size=" + hex(shdr['sh_size']) + " sh_flags=" + hex(shdr['sh_flags'])) if shdr['sh_size'] != 0 and (shdr['sh_flags'] & 0x2) == 0x2: section = elffile.get_section(i) data = section.data() addr = shdr['sh_addr'] j = 0 while j < len(data): word = (data[j + 0] << (8 * 0)) word |= (data[j + 1] << (8 * 1)) if j + 1 < len(data) else 0 word |= (data[j + 2] << (8 * 2)) if j + 2 < len(data) else 0 word |= (data[j + 3] << (8 * 3)) if j + 3 < len(data) else 0 print("Write: " + hex(addr) + "(" + hex(int((addr & 0xFFFFF) / 4)) + ") " + hex(word)) u_bram.write_nb(int((addr & 0xFFFFF) / 4), word, 0xF) addr += 4 j += 4 print("Hello") print("--> wait main") await u_dbg_bfm.on_exit("done") print("<-- wait main")
def get_debug_symbols(filename): print('Processing file:', filename) with open(filename, 'rb') as f: elffile = ELFFile(f) section_name = '.gnu_debugdata' debugdata = elffile.get_section_by_name(section_name) if not isinstance(debugdata, Section): print(' The file has no %s section' % section_name) return print(' Found %s section' % section_name) debugdata_filename = filename + '-symbols.elf' with open(debugdata_filename, 'wb') as f: print(' Extracting...') decompressor = LZMADecompressor(FORMAT_XZ) data = decompressor.decompress(debugdata.data()) f.write(data) symbols = [] with open(debugdata_filename, 'rb') as f: elffile = ELFFile(f) symbol_tables = [ s for s in elffile.iter_sections() if isinstance(s, SymbolTableSection) ] if not symbol_tables and elffile.num_sections() == 0: print(' INFO: No debug symbols.') for section in symbol_tables: symbols += [(symbol['st_value'], symbol.name) for symbol in section.iter_symbols() if len(symbol.name) != 0] remove(debugdata_filename) return symbols
def verify_symbol(self, file, symbol): if self.is_param_none(symbol): log.error("Symbol not specified.") return False status = False if os.path.isfile(file): with open(file, 'rb') as f: elffile = ELFFile(f) log.debug(' %s sections' % elffile.num_sections()) section = elffile.get_section_by_name('.symtab') if not section: log.error("Invalid ELF file no symbol table found.") status = False elif isinstance(section, SymbolTableSection): num_symbols = section.num_symbols() for symbolNum in range(0, num_symbols): if section.get_symbol(symbolNum).name == symbol: status = True break else: log.error("File {} does not exist.".format(file)) return status
def section_info_highlevel(stream): result = set() elffile = ELFFile(stream) # Just use the public methods of ELFFile to get what we need # Note that section names are strings. print(" {} sections".format(elffile.num_sections())) section = elffile.get_section_by_name('.symtab') if not section: print(' No symbol table found. Perhaps this ELF has been stripped?') return result # A section type is in its header, but the name was decoded and placed in # a public attribute. print(' Section name: %s, type: %s' % (section.name, section['sh_type'])) # But there's more... If this section is a symbol table section (which is # the case in the sample ELF file that comes with the examples), we can # get some more information about it. if not isinstance(section, SymbolTableSection): return result num_symbols = section.num_symbols() print(" It's a symbol section with %s symbols" % num_symbols) for i in range(0, num_symbols): symbol_type = section.get_symbol(i)['st_info']['type'] if symbol_type != 'STT_FUNC': continue symbol_name = section.get_symbol(i).name if symbol_name[0:3] != "cw_": continue if "localalias" in symbol_name: # e.g. cw_send_representation_partial.localalias.2; TODO: why do we have symbols like this one? continue result.add(symbol_name) return result
class ElfFile: def __init__(self, stream): self.stream = stream self.elf = ELFFile(self.stream) self.symbols = None self.dynamic_symbols = None @property def syms(self): if self.symbols is None: self.symbols = self.__section_symbols__('.symtab') return self.symbols @property def dynsyms(self): if self.dynamic_symbols is None: self.symbols = self.__section_symbols__('.dynsym') return self.symbols def get_header(self): return ElfHeader(self.elf['e_type'], self.elf['e_machine']) def __get_section__(self, name): section = self.elf.get_section_by_name(name) if section and section['sh_type'] == "SHT_NOBITS": print "section '%s' type is NOBITS" % name print "Perhabs this ELF was stripped" sys.exit(1) return section def __section_symbols__(self, section_name): symbols = {} section = self.__get_section__(section_name) if section is None: return None for num in range(0, section.num_symbols()): s = section.get_symbol(num) symbols[num] = ElfSym(num, s['st_value'], s['st_size'], s['st_info'].type, s['st_info'].bind, s['st_other'].visibility, s['st_shndx'], s.name) return symbols def get_symbols(self): return self.__section_symbols__('.symtab') def get_sections(self): sections = {} for i in range(self.elf.num_sections()): s = self.elf.get_section(i) sections[s.name] = ElfSection(s['sh_offset'], s['sh_addr'], s['sh_size']) return sections def get_segments(self): segments = [] for s in self.elf.iter_segments(): segment = ElfSegment(s['p_type'], s['p_offset'], s['p_vaddr'], s['p_paddr'], s['p_memsz'], s['p_flags'], s['p_align'], s['p_filesz']) segments.append(segment) return segments def build_id(self): section = '.note.gnu.build-id' try: n_type = 'NT_GNU_BUILD_ID' bid = self.__get_section__(section) if section is None: return None for note in bid.iter_notes(): if note['n_type'] == n_type: return note['n_desc'] print("ELF section %s doesn't have %s descriptor" % (section, n_type)) except AttributeError: print "ELF file doesn't have %s section" % section return None
print( "You must provide this script with an elf binary file you want to examine" ) exit(1) print(f"Mapping between segments and sections in the file {sys.argv[1]}") elffile = ELFFile(open(sys.argv[1], 'rb')) segments = list() for segment_idx in range(elffile.num_segments()): segments.insert(segment_idx, dict()) segments[segment_idx]['segment'] = elffile.get_segment(segment_idx) segments[segment_idx]['sections'] = list() for section_idx in range(elffile.num_sections()): section = elffile.get_section(section_idx) for segment in segments: if segment['segment'].section_in_segment(section): segment['sections'].append(section) for segment in segments: seg_head = segment['segment'].header print("Segment:") print( f"Type: {seg_head.p_type}\nOffset: {hex(seg_head.p_offset)}\nVirtual address: {hex(seg_head.p_vaddr)}\nPhysical address: {(seg_head.p_paddr)}\nSize in file: {hex(seg_head.p_filesz)}\nSize in memory: {hex(seg_head.p_memsz)}\n" ) if segment['sections']: print("Segment's sections:") print([(section.name, hex(section['sh_addr']))
class ReadElf(object): """ display_* methods are used to emit output into the output stream """ def __init__(self, file, output): """ file: stream object with the ELF file to read output: output stream to write to """ self.elffile = ELFFile(file) self.output = output # Lazily initialized if a debug dump is requested self._dwarfinfo = None self._versioninfo = None def _section_from_spec(self, spec): """ Retrieve a section given a "spec" (either number or name). Return None if no such section exists in the file. """ try: num = int(spec) if num < self.elffile.num_sections(): return self.elffile.get_section(num) return None except ValueError: # Not a number. Must be a name then section = self.elffile.get_section_by_name(force_unicode(spec)) if section is None: # No match with a unicode name. # Some versions of pyelftools (<= 0.23) store internal strings # as bytes. Try again with the name encoded as bytes. section = self.elffile.get_section_by_name(force_bytes(spec)) return section def pretty_print_pmdinfo(self, pmdinfo): global pcidb for i in pmdinfo["pci_ids"]: vendor = pcidb.find_vendor(i[0]) device = vendor.find_device(i[1]) subdev = device.find_subid(i[2], i[3]) print( "%s (%s) : %s (%s) %s" % (vendor.name, vendor.ID, device.name, device.ID, subdev.name)) def parse_pmd_info_string(self, mystring): global raw_output global pcidb optional_pmd_info = [{ 'id': 'params', 'tag': 'PMD PARAMETERS' }, { 'id': 'kmod', 'tag': 'PMD KMOD DEPENDENCIES' }] i = mystring.index("=") mystring = mystring[i + 2:] pmdinfo = json.loads(mystring) if raw_output: print(json.dumps(pmdinfo)) return print("PMD NAME: " + pmdinfo["name"]) for i in optional_pmd_info: try: print("%s: %s" % (i['tag'], pmdinfo[i['id']])) except KeyError: continue if pmdinfo["pci_ids"]: print("PMD HW SUPPORT:") if pcidb is not None: self.pretty_print_pmdinfo(pmdinfo) else: print("VENDOR\t DEVICE\t SUBVENDOR\t SUBDEVICE") for i in pmdinfo["pci_ids"]: print("0x%04x\t 0x%04x\t 0x%04x\t\t 0x%04x" % (i[0], i[1], i[2], i[3])) print("") def display_pmd_info_strings(self, section_spec): """ Display a strings dump of a section. section_spec is either a section number or a name. """ section = self._section_from_spec(section_spec) if section is None: return data = section.data() dataptr = 0 while dataptr < len(data): while (dataptr < len(data) and not 32 <= byte2int(data[dataptr]) <= 127): dataptr += 1 if dataptr >= len(data): break endptr = dataptr while endptr < len(data) and byte2int(data[endptr]) != 0: endptr += 1 # pyelftools may return byte-strings, force decode them mystring = force_unicode(data[dataptr:endptr]) rc = mystring.find("PMD_INFO_STRING") if rc != -1: self.parse_pmd_info_string(mystring[rc:]) dataptr = endptr def find_librte_eal(self, section): for tag in section.iter_tags(): # pyelftools may return byte-strings, force decode them if force_unicode(tag.entry.d_tag) == 'DT_NEEDED': if "librte_eal" in force_unicode(tag.needed): return force_unicode(tag.needed) return None def search_for_autoload_path(self): scanelf = self scanfile = None library = None section = self._section_from_spec(".dynamic") try: eallib = self.find_librte_eal(section) if eallib is not None: ldlibpath = os.environ.get('LD_LIBRARY_PATH') if ldlibpath is None: ldlibpath = "" dtr = self.get_dt_runpath(section) library = search_file( eallib, dtr + ":" + ldlibpath + ":/usr/lib64:/lib64:/usr/lib:/lib") if library is None: return (None, None) if not raw_output: print("Scanning for autoload path in %s" % library) scanfile = open(library, 'rb') scanelf = ReadElf(scanfile, sys.stdout) except AttributeError: # Not a dynamic binary pass except ELFError: scanfile.close() return (None, None) section = scanelf._section_from_spec(".rodata") if section is None: if scanfile is not None: scanfile.close() return (None, None) data = section.data() dataptr = 0 while dataptr < len(data): while (dataptr < len(data) and not 32 <= byte2int(data[dataptr]) <= 127): dataptr += 1 if dataptr >= len(data): break endptr = dataptr while endptr < len(data) and byte2int(data[endptr]) != 0: endptr += 1 # pyelftools may return byte-strings, force decode them mystring = force_unicode(data[dataptr:endptr]) rc = mystring.find("DPDK_PLUGIN_PATH") if rc != -1: rc = mystring.find("=") return (mystring[rc + 1:], library) dataptr = endptr if scanfile is not None: scanfile.close() return (None, None) def get_dt_runpath(self, dynsec): for tag in dynsec.iter_tags(): # pyelftools may return byte-strings, force decode them if force_unicode(tag.entry.d_tag) == 'DT_RUNPATH': return force_unicode(tag.runpath) return "" def process_dt_needed_entries(self): """ Look to see if there are any DT_NEEDED entries in the binary And process those if there are """ runpath = "" ldlibpath = os.environ.get('LD_LIBRARY_PATH') if ldlibpath is None: ldlibpath = "" dynsec = self._section_from_spec(".dynamic") try: runpath = self.get_dt_runpath(dynsec) except AttributeError: # dynsec is None, just return return for tag in dynsec.iter_tags(): # pyelftools may return byte-strings, force decode them if force_unicode(tag.entry.d_tag) == 'DT_NEEDED': if 'librte_' in force_unicode(tag.needed): library = search_file( force_unicode(tag.needed), runpath + ":" + ldlibpath + ":/usr/lib64:/lib64:/usr/lib:/lib") if library is not None: with open(library, 'rb') as file: try: libelf = ReadElf(file, sys.stdout) except ELFError: print("%s is no an ELF file" % library) continue libelf.process_dt_needed_entries() libelf.display_pmd_info_strings(".rodata") file.close()
def compile_function(code, compiler_flags="", bits=32, little_endian=False, entry=0x0, symbols=None, data_only=False, prefix=""): with utils.tempdir() as td: c_fname = os.path.join(td, "code.c") object_fname = os.path.join(td, "code.o") object2_fname = os.path.join(td, "code.2.o") linker_script_fname = os.path.join(td, "code.lds") data_fname = os.path.join(td, "data") rodata_sec_index = rodata_sym_index_old = rodata_sym_index_new = -1 # C -> Object File with open(c_fname, 'w') as fp: fp.write(code) target = ("powerpcle-linux-gnu" if little_endian else "powerpc-linux-gnu") if bits == 32 else ( "powerpc64le-linux-gnu" if little_endian else "powerpc64-linux-gnu") res = utils.exec_cmd("clang -target %s -o %s -c %s %s" \ % (target, object_fname, c_fname, compiler_flags), shell=True) if res[2] != 0: raise CLangException("CLang error: " + str(res[0] + res[1], 'utf-8')) # Setup Linker Script linker_script = "SECTIONS { .text : { *(.text) " if symbols: for i in symbols: if i == ".rodata": linker_script += i + " = " + hex(symbols[i] - ( (entry - 0x10700000) & ~0xFFFF)) + ";" else: linker_script += i + " = " + hex(symbols[i] - entry) + ";" linker_script += "} .rodata : { *(.rodata*) } }" with open(linker_script_fname, 'w') as fp: fp.write(linker_script) # Object File --LinkerScript--> Object File res = utils.exec_cmd( "ld.lld -relocatable %s -T %s -o %s" % (object_fname, linker_script_fname, object2_fname), shell=True) if res[2] != 0: raise Exception("Linking Error: " + str(res[0] + res[1], 'utf-8')) # Load Object File ld = cle.Loader(object2_fname, main_opts={"base_addr": 0x0}, perform_relocations=False) # Figure Out .text Section Size for section in ld.all_objects[0].sections: if section.name == ".text": text_section_size = section.filesize break # Modify Symbols in Object File to Trick Loader with open(object2_fname, "rb+") as f: elf = ELFFile(f) # Find the Index of .rodata Section for i in range(elf.num_sections()): if elf.get_section(i).name == ".rodata": rodata_sec_index = i break # Find the Index of the src and dest Symbol symtab_section = elf.get_section_by_name(".symtab") for i in range(symtab_section.num_symbols()): if symtab_section.get_symbol( i )['st_shndx'] == rodata_sec_index and symtab_section.get_symbol( i)['st_info']['type'] == 'STT_SECTION': rodata_sym_index_old = i if symtab_section.get_symbol(i).name == ".rodata": rodata_sym_index_new = i # Rewrite the Symbol if rodata_sym_index_new != -1 and rodata_sec_index != -1 and rodata_sym_index_old != -1: for i in range(elf.num_sections()): if elf.get_section(i).header[ 'sh_name'] == symtab_section.header['sh_name']: f.seek(0) content = f.read() f.seek(symtab_section['sh_offset'] + rodata_sym_index_new * symtab_section['sh_entsize']) rodata_sym_new = f.read( symtab_section['sh_entsize']) content = utils.bytes_overwrite( content, rodata_sym_new, symtab_section['sh_offset'] + rodata_sym_index_old * symtab_section['sh_entsize']) f.seek(0) f.write(content) f.truncate() break # Replace all R_PPC_PLTREL24 to R_PPC_REL24 rela_section = elf.get_section_by_name(".rela.text") if rela_section is not None: for i in range(rela_section.num_relocations()): if rela_section.get_relocation(i)['r_info_type'] == 18: reloc = rela_section.get_relocation(i).entry reloc['r_info'] -= 8 for j in range(elf.num_sections()): if elf.get_section(j).header[ 'sh_name'] == rela_section.header[ 'sh_name']: f.seek(0) content = f.read() content = utils.bytes_overwrite( content, elf.structs.Elf_Rela.build(reloc), rela_section['sh_offset'] + i * rela_section['sh_entsize']) f.seek(0) f.write(content) f.truncate() break # Load the Modified Object File and Return compiled Data or Code ld = cle.Loader(object2_fname, main_opts={ "base_addr": 0x0, "entry_point": 0x0 }) if data_only: patches = [] for section in ld.all_objects[0].sections: if section.name == ".rodata": res = utils.exec_cmd( "objcopy -B i386 -O binary -j %s %s %s" % (section.name, object2_fname, data_fname), shell=True) if res[2] != 0: raise ObjcopyException("Objcopy Error: " + str(res[0] + res[1], 'utf-8')) with open(data_fname, "rb") as fp: patches.append( AddRODataPatch(fp.read(), name=prefix + section.name)) break return patches else: compiled = ld.memory.load(ld.all_objects[0].entry, text_section_size) return compiled
def process_elf_file(filename): try: f = open(filename, 'rb') elffile = ELFFile(f) except ELFError as e: sys.exit("Error parsing ELF file: " + str(e)) except IOError as e: sys.exit("Error opening file: " + str(e)) """ arch = elffile.get_machine_arch() if arch == "x64": a = archinfo.ArchAMD64() elif arch == "x86": a = archinfo.ArchX86() else: print "ELF architecture '%s' currently not supported" % (arch) sys.exit(1) """ all_syms = [] secnum = 0 #print "NUM OF SECTIONS: %d" % (elffile.num_sections()) for s in elffile.iter_sections(): if isinstance(s, SymbolTableSection): print "SECTION: %d %s %s" % (secnum, s.name, s['sh_size']) process_symbol_section(s, all_syms) secnum = secnum + 1 sym_size = [0] * (elffile.num_sections() + 1) print "SEC\tBIND\t\tSIZE\tVALUE\t" for sym in all_syms: info = sym['st_info'] print "%s\t%s\t%s\t%x\t%s" % (sym['st_shndx'], info['bind'], sym['st_size'], int( sym['st_value']), sym.name) sym_size[sym['st_shndx']] += sym['st_size'] """ if t == 'SHT_SYMTAB': process_symbol_section(s, all_syms) elif t == 'SHT_NULL': continue elif t == 'SHT_NOTE': continue elif t == 'SHT_NOBITS': continue elif t == 'SHT_PROGBITS': continue elif t == 'SHT_STRTAB': continue else: print "SECTION: %s %s" % (s.name, s['sh_type']) print "Don't know how to handle this section, change me!" sys.exit(1) """ for i in range(0, elffile.num_sections()): if sym_size[i] > 0: print "total: %s %d" % (elffile.get_section(i).name, sym_size[i])
class ReadElf(object): """ display_* methods are used to emit output into the output stream """ def __init__(self, file, output): """ file: stream object with the ELF file to read output: output stream to write to """ self.elffile = ELFFile(file) self.output = output # Lazily initialized if a debug dump is requested self._dwarfinfo = None def display_file_header(self): """ Display the ELF file header """ self._emitline('ELF Header:') self._emit(' Magic: ') self._emitline(' '.join('%2.2x' % byte2int(b) for b in self.elffile.e_ident_raw)) header = self.elffile.header e_ident = header['e_ident'] self._emitline(' Class: %s' % describe_ei_class(e_ident['EI_CLASS'])) self._emitline(' Data: %s' % describe_ei_data(e_ident['EI_DATA'])) self._emitline(' Version: %s' % describe_ei_version(e_ident['EI_VERSION'])) self._emitline(' OS/ABI: %s' % describe_ei_osabi(e_ident['EI_OSABI'])) self._emitline(' ABI Version: %d' % e_ident['EI_ABIVERSION']) self._emitline(' Type: %s' % describe_e_type(header['e_type'])) self._emitline(' Machine: %s' % describe_e_machine(header['e_machine'])) self._emitline(' Version: %s' % describe_e_version_numeric(header['e_version'])) self._emitline(' Entry point address: %s' % self._format_hex(header['e_entry'])) self._emit(' Start of program headers: %s' % header['e_phoff']) self._emitline(' (bytes into file)') self._emit(' Start of section headers: %s' % header['e_shoff']) self._emitline(' (bytes into file)') self._emitline(' Flags: %s' % self._format_hex(header['e_flags'])) self._emitline(' Size of this header: %s (bytes)' % header['e_ehsize']) self._emitline(' Size of program headers: %s (bytes)' % header['e_phentsize']) self._emitline(' Number of program headers: %s' % header['e_phnum']) self._emitline(' Size of section headers: %s (bytes)' % header['e_shentsize']) self._emitline(' Number of section headers: %s' % header['e_shnum']) self._emitline(' Section header string table index: %s' % header['e_shstrndx']) def display_program_headers(self, show_heading=True): """ Display the ELF program headers. If show_heading is True, displays the heading for this information (Elf file type is...) """ self._emitline() if self.elffile.num_segments() == 0: self._emitline('There are no program headers in this file.') return elfheader = self.elffile.header if show_heading: self._emitline('Elf file type is %s' % describe_e_type(elfheader['e_type'])) self._emitline('Entry point is %s' % self._format_hex(elfheader['e_entry'])) # readelf weirness - why isn't e_phoff printed as hex? (for section # headers, it is...) self._emitline( 'There are %s program headers, starting at offset %s' % (elfheader['e_phnum'], elfheader['e_phoff'])) self._emitline() self._emitline('Program Headers:') # Now comes the table of program headers with their attributes. Note # that due to different formatting constraints of 32-bit and 64-bit # addresses, there are some conditions on elfclass here. # # First comes the table heading # if self.elffile.elfclass == 32: self._emitline( ' Type Offset VirtAddr PhysAddr FileSiz MemSiz Flg Align' ) else: self._emitline( ' Type Offset VirtAddr PhysAddr' ) self._emitline( ' FileSiz MemSiz Flags Align' ) # Now the entries # for segment in self.elffile.iter_segments(): self._emit(' %-14s ' % describe_p_type(segment['p_type'])) if self.elffile.elfclass == 32: self._emitline( '%s %s %s %s %s %-3s %s' % (self._format_hex(segment['p_offset'], fieldsize=6), self._format_hex(segment['p_vaddr'], fullhex=True), self._format_hex(segment['p_paddr'], fullhex=True), self._format_hex(segment['p_filesz'], fieldsize=5), self._format_hex(segment['p_memsz'], fieldsize=5), describe_p_flags(segment['p_flags']), self._format_hex(segment['p_align']))) else: # 64 self._emitline( '%s %s %s' % (self._format_hex(segment['p_offset'], fullhex=True), self._format_hex(segment['p_vaddr'], fullhex=True), self._format_hex(segment['p_paddr'], fullhex=True))) self._emitline(' %s %s %-3s %s' % ( self._format_hex(segment['p_filesz'], fullhex=True), self._format_hex(segment['p_memsz'], fullhex=True), describe_p_flags(segment['p_flags']), # lead0x set to False for p_align, to mimic readelf. # No idea why the difference from 32-bit mode :-| self._format_hex(segment['p_align'], lead0x=False))) if isinstance(segment, InterpSegment): self._emitline(' [Requesting program interpreter: %s]' % bytes2str(segment.get_interp_name())) # Sections to segments mapping # if self.elffile.num_sections() == 0: # No sections? We're done return self._emitline('\n Section to Segment mapping:') self._emitline(' Segment Sections...') for nseg, segment in enumerate(self.elffile.iter_segments()): self._emit(' %2.2d ' % nseg) for section in self.elffile.iter_sections(): if (not section.is_null() and segment.section_in_segment(section)): self._emit('%s ' % bytes2str(section.name)) self._emitline('') def display_section_headers(self, show_heading=True): """ Display the ELF section headers """ elfheader = self.elffile.header if show_heading: self._emitline( 'There are %s section headers, starting at offset %s' % (elfheader['e_shnum'], self._format_hex(elfheader['e_shoff']))) self._emitline('\nSection Header%s:' % ('s' if elfheader['e_shnum'] > 1 else '')) # Different formatting constraints of 32-bit and 64-bit addresses # if self.elffile.elfclass == 32: self._emitline( ' [Nr] Name Type Addr Off Size ES Flg Lk Inf Al' ) else: self._emitline( ' [Nr] Name Type Address Offset' ) self._emitline( ' Size EntSize Flags Link Info Align' ) # Now the entries # for nsec, section in enumerate(self.elffile.iter_sections()): self._emit( ' [%2u] %-17.17s %-15.15s ' % (nsec, bytes2str( section.name), describe_sh_type(section['sh_type']))) if self.elffile.elfclass == 32: self._emitline( '%s %s %s %s %3s %2s %3s %2s' % (self._format_hex( section['sh_addr'], fieldsize=8, lead0x=False), self._format_hex( section['sh_offset'], fieldsize=6, lead0x=False), self._format_hex( section['sh_size'], fieldsize=6, lead0x=False), self._format_hex( section['sh_entsize'], fieldsize=2, lead0x=False), describe_sh_flags( section['sh_flags']), section['sh_link'], section['sh_info'], section['sh_addralign'])) else: # 64 self._emitline( ' %s %s' % (self._format_hex( section['sh_addr'], fullhex=True, lead0x=False), self._format_hex(section['sh_offset'], fieldsize=16 if section['sh_offset'] > 0xffffffff else 8, lead0x=False))) self._emitline( ' %s %s %3s %2s %3s %s' % (self._format_hex( section['sh_size'], fullhex=True, lead0x=False), self._format_hex( section['sh_entsize'], fullhex=True, lead0x=False), describe_sh_flags( section['sh_flags']), section['sh_link'], section['sh_info'], section['sh_addralign'])) self._emitline('Key to Flags:') self._emit( ' W (write), A (alloc), X (execute), M (merge), S (strings)') if self.elffile['e_machine'] in ('EM_X86_64', 'EM_L10M'): self._emitline(', l (large)') else: self._emitline() self._emitline( ' I (info), L (link order), G (group), T (TLS), E (exclude), x (unknown)' ) self._emitline( ' O (extra OS processing required) o (OS specific), p (processor specific)' ) def display_symbol_tables(self): """ Display the symbol tables contained in the file """ for section in self.elffile.iter_sections(): if not isinstance(section, SymbolTableSection): continue if section['sh_entsize'] == 0: self._emitline( "\nSymbol table '%s' has a sh_entsize of zero!" % (bytes2str(section.name))) continue self._emitline("\nSymbol table '%s' contains %s entries:" % (bytes2str(section.name), section.num_symbols())) if self.elffile.elfclass == 32: self._emitline( ' Num: Value Size Type Bind Vis Ndx Name') else: # 64 self._emitline( ' Num: Value Size Type Bind Vis Ndx Name' ) for nsym, symbol in enumerate(section.iter_symbols()): # symbol names are truncated to 25 chars, similarly to readelf self._emitline( '%6d: %s %5d %-7s %-6s %-7s %4s %.25s' % (nsym, self._format_hex(symbol['st_value'], fullhex=True, lead0x=False), symbol['st_size'], describe_symbol_type(symbol['st_info']['type']), describe_symbol_bind(symbol['st_info']['bind']), describe_symbol_visibility( symbol['st_other']['visibility']), describe_symbol_shndx( symbol['st_shndx']), bytes2str(symbol.name))) def display_relocations(self): """ Display the relocations contained in the file """ has_relocation_sections = False for section in self.elffile.iter_sections(): if not isinstance(section, RelocationSection): continue has_relocation_sections = True self._emitline( "\nRelocation section '%s' at offset %s contains %s entries:" % (bytes2str(section.name), self._format_hex( section['sh_offset']), section.num_relocations())) if section.is_RELA(): self._emitline( " Offset Info Type Sym. Value Sym. Name + Addend" ) else: self._emitline( " Offset Info Type Sym.Value Sym. Name") # The symbol table section pointed to in sh_link symtable = self.elffile.get_section(section['sh_link']) for rel in section.iter_relocations(): hexwidth = 8 if self.elffile.elfclass == 32 else 12 self._emit( '%s %s %-17.17s' % (self._format_hex( rel['r_offset'], fieldsize=hexwidth, lead0x=False), self._format_hex( rel['r_info'], fieldsize=hexwidth, lead0x=False), describe_reloc_type(rel['r_info_type'], self.elffile))) if rel['r_info_sym'] == 0: self._emitline() continue symbol = symtable.get_symbol(rel['r_info_sym']) # Some symbols have zero 'st_name', so instead what's used is # the name of the section they point at if symbol['st_name'] == 0: symsec = self.elffile.get_section(symbol['st_shndx']) symbol_name = symsec.name else: symbol_name = symbol.name self._emit(' %s %s%22.22s' % (self._format_hex( symbol['st_value'], fullhex=True, lead0x=False), ' ' if self.elffile.elfclass == 32 else '', bytes2str(symbol_name))) if section.is_RELA(): self._emit(' %s %x' % ('+' if rel['r_addend'] >= 0 else '-', abs(rel['r_addend']))) self._emitline() if not has_relocation_sections: self._emitline('\nThere are no relocations in this file.') def display_hex_dump(self, section_spec): """ Display a hex dump of a section. section_spec is either a section number or a name. """ section = self._section_from_spec(section_spec) if section is None: self._emitline("Section '%s' does not exist in the file!" % (section_spec)) return self._emitline("\nHex dump of section '%s':" % bytes2str(section.name)) self._note_relocs_for_section(section) addr = section['sh_addr'] data = section.data() dataptr = 0 while dataptr < len(data): bytesleft = len(data) - dataptr # chunks of 16 bytes per line linebytes = 16 if bytesleft > 16 else bytesleft self._emit(' %s ' % self._format_hex(addr, fieldsize=8)) for i in range(16): if i < linebytes: self._emit('%2.2x' % byte2int(data[dataptr + i])) else: self._emit(' ') if i % 4 == 3: self._emit(' ') for i in range(linebytes): c = data[dataptr + i:dataptr + i + 1] if byte2int(c[0]) >= 32 and byte2int(c[0]) < 0x7f: self._emit(bytes2str(c)) else: self._emit(bytes2str(b'.')) self._emitline() addr += linebytes dataptr += linebytes self._emitline() def display_string_dump(self, section_spec): """ Display a strings dump of a section. section_spec is either a section number or a name. """ section = self._section_from_spec(section_spec) if section is None: self._emitline("Section '%s' does not exist in the file!" % (section_spec)) return self._emitline("\nString dump of section '%s':" % bytes2str(section.name)) found = False data = section.data() dataptr = 0 while dataptr < len(data): while (dataptr < len(data) and not (32 <= byte2int(data[dataptr]) <= 127)): dataptr += 1 if dataptr >= len(data): break endptr = dataptr while endptr < len(data) and byte2int(data[endptr]) != 0: endptr += 1 found = True self._emitline(' [%6x] %s' % (dataptr, bytes2str(data[dataptr:endptr]))) dataptr = endptr if not found: self._emitline(' No strings found in this section.') else: self._emitline() def display_debug_dump(self, dump_what): """ Dump a DWARF section """ self._init_dwarfinfo() if self._dwarfinfo is None: return set_global_machine_arch(self.elffile.get_machine_arch()) if dump_what == 'info': self._dump_debug_info() elif dump_what == 'decodedline': self._dump_debug_line_programs() elif dump_what == 'frames': self._dump_debug_frames() elif dump_what == 'frames-interp': self._dump_debug_frames_interp() else: self._emitline('debug dump not yet supported for "%s"' % dump_what) def _format_hex(self, addr, fieldsize=None, fullhex=False, lead0x=True): """ Format an address into a hexadecimal string. fieldsize: Size of the hexadecimal field (with leading zeros to fit the address into. For example with fieldsize=8, the format will be %08x If None, the minimal required field size will be used. fullhex: If True, override fieldsize to set it to the maximal size needed for the elfclass lead0x: If True, leading 0x is added """ s = '0x' if lead0x else '' if fullhex: fieldsize = 8 if self.elffile.elfclass == 32 else 16 if fieldsize is None: field = '%x' else: field = '%' + '0%sx' % fieldsize return s + field % addr def _section_from_spec(self, spec): """ Retrieve a section given a "spec" (either number or name). Return None if no such section exists in the file. """ try: num = int(spec) if num < self.elffile.num_sections(): return self.elffile.get_section(num) else: return None except ValueError: # Not a number. Must be a name then return self.elffile.get_section_by_name(str2bytes(spec)) def _note_relocs_for_section(self, section): """ If there are relocation sections pointing to the givne section, emit a note about it. """ for relsec in self.elffile.iter_sections(): if isinstance(relsec, RelocationSection): info_idx = relsec['sh_info'] if self.elffile.get_section(info_idx) == section: self._emitline( ' Note: This section has relocations against it, but these have NOT been applied to this dump.' ) return def _init_dwarfinfo(self): """ Initialize the DWARF info contained in the file and assign it to self._dwarfinfo. Leave self._dwarfinfo at None if no DWARF info was found in the file """ if self._dwarfinfo is not None: return if self.elffile.has_dwarf_info(): self._dwarfinfo = self.elffile.get_dwarf_info() else: self._dwarfinfo = None def _dump_debug_info(self): """ Dump the debugging info section. """ self._emitline('Contents of the .debug_info section:\n') # Offset of the .debug_info section in the stream section_offset = self._dwarfinfo.debug_info_sec.global_offset for cu in self._dwarfinfo.iter_CUs(): self._emitline(' Compilation Unit @ offset %s:' % self._format_hex(cu.cu_offset)) self._emitline(' Length: %s (%s)' % (self._format_hex( cu['unit_length']), '%s-bit' % cu.dwarf_format())) self._emitline(' Version: %s' % cu['version']), self._emitline(' Abbrev Offset: %s' % cu['debug_abbrev_offset']), self._emitline(' Pointer Size: %s' % cu['address_size']) # The nesting depth of each DIE within the tree of DIEs must be # displayed. To implement this, a counter is incremented each time # the current DIE has children, and decremented when a null die is # encountered. Due to the way the DIE tree is serialized, this will # correctly reflect the nesting depth # die_depth = 0 for die in cu.iter_DIEs(): if die.is_null(): die_depth -= 1 continue self._emitline( ' <%s><%x>: Abbrev Number: %s (%s)' % (die_depth, die.offset, die.abbrev_code, die.tag)) for attr in itervalues(die.attributes): name = attr.name # Unknown attribute values are passed-through as integers if isinstance(name, int): name = 'Unknown AT value: %x' % name self._emitline( ' <%2x> %-18s: %s' % (attr.offset, name, describe_attr_value(attr, die, section_offset))) if die.has_children: die_depth += 1 self._emitline() def _dump_debug_line_programs(self): """ Dump the (decoded) line programs from .debug_line The programs are dumped in the order of the CUs they belong to. """ self._emitline( 'Decoded dump of debug contents of section .debug_line:\n') for cu in self._dwarfinfo.iter_CUs(): lineprogram = self._dwarfinfo.line_program_for_CU(cu) cu_filename = '' if len(lineprogram['include_directory']) > 0: cu_filename = '%s/%s' % ( bytes2str(lineprogram['include_directory'][0]), bytes2str(lineprogram['file_entry'][0].name)) else: cu_filename = bytes2str(lineprogram['file_entry'][0].name) self._emitline('CU: %s:' % cu_filename) self._emitline( 'File name Line number Starting address' ) # Print each state's file, line and address information. For some # instructions other output is needed to be compatible with # readelf. for entry in lineprogram.get_entries(): state = entry.state if state is None: # Special handling for commands that don't set a new state if entry.command == DW_LNS_set_file: file_entry = lineprogram['file_entry'][entry.args[0] - 1] if file_entry.dir_index == 0: # current directory self._emitline('\n./%s:[++]' % (bytes2str(file_entry.name))) else: self._emitline( '\n%s/%s:' % (bytes2str(lineprogram['include_directory'][ file_entry.dir_index - 1]), bytes2str(file_entry.name))) elif entry.command == DW_LNE_define_file: self._emitline( '%s:' % (bytes2str(lineprogram['include_directory'] [entry.args[0].dir_index]))) elif not state.end_sequence: # readelf doesn't print the state after end_sequence # instructions. I think it's a bug but to be compatible # I don't print them too. self._emitline( '%-35s %11d %18s' % (bytes2str( lineprogram['file_entry'][state.file - 1].name), state.line, '0' if state.address == 0 else self._format_hex(state.address))) if entry.command == DW_LNS_copy: # Another readelf oddity... self._emitline() def _dump_debug_frames(self): """ Dump the raw frame information from .debug_frame """ if not self._dwarfinfo.has_CFI(): return self._emitline('Contents of the .debug_frame section:') for entry in self._dwarfinfo.CFI_entries(): if isinstance(entry, CIE): self._emitline( '\n%08x %08x %08x CIE' % (entry.offset, entry['length'], entry['CIE_id'])) self._emitline(' Version: %d' % entry['version']) self._emitline(' Augmentation: "%s"' % bytes2str(entry['augmentation'])) self._emitline(' Code alignment factor: %u' % entry['code_alignment_factor']) self._emitline(' Data alignment factor: %d' % entry['data_alignment_factor']) self._emitline(' Return address column: %d' % entry['return_address_register']) self._emitline() else: # FDE self._emitline( '\n%08x %08x %08x FDE cie=%08x pc=%08x..%08x' % (entry.offset, entry['length'], entry['CIE_pointer'], entry.cie.offset, entry['initial_location'], entry['initial_location'] + entry['address_range'])) self._emit(describe_CFI_instructions(entry)) self._emitline() def _dump_debug_frames_interp(self): """ Dump the interpreted (decoded) frame information from .debug_frame """ if not self._dwarfinfo.has_CFI(): return self._emitline('Contents of the .debug_frame section:') for entry in self._dwarfinfo.CFI_entries(): if isinstance(entry, CIE): self._emitline('\n%08x %08x %08x CIE "%s" cf=%d df=%d ra=%d' % (entry.offset, entry['length'], entry['CIE_id'], bytes2str(entry['augmentation']), entry['code_alignment_factor'], entry['data_alignment_factor'], entry['return_address_register'])) ra_regnum = entry['return_address_register'] else: # FDE self._emitline( '\n%08x %08x %08x FDE cie=%08x pc=%08x..%08x' % (entry.offset, entry['length'], entry['CIE_pointer'], entry.cie.offset, entry['initial_location'], entry['initial_location'] + entry['address_range'])) ra_regnum = entry.cie['return_address_register'] # Print the heading row for the decoded table self._emit(' LOC') self._emit(' ' if entry.structs.address_size == 4 else ' ') self._emit(' CFA ') # Decode the table nad look at the registers it describes. # We build reg_order here to match readelf's order. In particular, # registers are sorted by their number, and the register matching # ra_regnum is always listed last with a special heading. decoded_table = entry.get_decoded() reg_order = sorted( ifilter(lambda r: r != ra_regnum, decoded_table.reg_order)) # Headings for the registers for regnum in reg_order: self._emit('%-6s' % describe_reg_name(regnum)) self._emitline('ra ') # Now include ra_regnum in reg_order to print its values similarly # to the other registers. reg_order.append(ra_regnum) for line in decoded_table.table: self._emit( self._format_hex(line['pc'], fullhex=True, lead0x=False)) self._emit(' %-9s' % describe_CFI_CFA_rule(line['cfa'])) for regnum in reg_order: if regnum in line: s = describe_CFI_register_rule(line[regnum]) else: s = 'u' self._emit('%-6s' % s) self._emitline() self._emitline() def _emit(self, s=''): """ Emit an object to output """ self.output.write(str(s)) def _emitline(self, s=''): """ Emit an object to output, followed by a newline """ self.output.write(str(s) + '\n')
def GetObjectData(Filename, AllSymbols): #get a dump of the exported objects and see what we need to fill in, we use the readelf tool due to potential file differences #from what any python elf parser may expect. We expect aarch64-linux-android-readelf is in the path f = open(Filename, "rb") elf = ELFFile(f) #go through each entry and find the undefined entries, see if they are in our list ElfData = dict() ElfData["NeededData"] = [] ElfData["PatchFuncs"] = [] ElfData["ExternalSymbols"] = [] ElfData["DefinedSymbols"] = [] ElfData["TextSize"] = 0 ElfData["RODataSize"] = 0 ElfData["DataSize"] = 0 ElfData["BSSSize"] = 0 SymbolSection = elf.get_section_by_name(".symtab") AddedSections = [] for Entry in xrange(0, SymbolSection.num_symbols()): Symbol = SymbolSection.get_symbol(Entry) #if undefined then see if we have a symbol for it if (Symbol.entry.st_shndx == "SHN_UNDEF") and (Symbol.name in AllSymbols): ElfData["NeededData"].append(Symbol.name) elif (Symbol.entry.st_info.bind == "STB_GLOBAL") and (Symbol.entry.st_shndx == "SHN_UNDEF"): #unknown symbol, add it to our external list ElfData["ExternalSymbols"].append(Symbol.name) elif (Symbol.entry.st_info.bind == "STB_GLOBAL") and (Symbol.entry.st_shndx == "SHN_COMMON"): #symbol is part of the BSS itself ElfData["BSSSize"] += Symbol.entry.st_size elif Symbol.name.split("_")[0] in ["HOOKBEFORE", "HOOKAFTER"]: #add a known symbol ElfData["DefinedSymbols"].append(Symbol.name) AddedSections.append(elf.get_section(Symbol.entry.st_shndx).name) #it is one of our special entries, add it #note the size of the function may not match the section due to some architectures adding extra data #so look up the section size FuncName = Symbol.name.split("_") PatchFuncEntry = dict() PatchFuncEntry["type"] = FuncName[0] PatchFuncEntry["name"] = "_".join(FuncName[1:]) PatchFuncEntry["address"] = -1 PatchFuncEntry["kallsyms"] = 1 #generate the section name and look it up to get the proper size if FuncName[0][0:4] == "HOOK": FuncName[0] = FuncName[0][4:] SymbolName = FuncName[0] + "." + PatchFuncEntry["name"] PatchSection = elf.get_section_by_name(".text." + SymbolName.lower()) PatchFuncEntry["size"] = PatchSection.data_size PatchFuncEntry["alignment"] = 0 ElfData["PatchFuncs"].append(PatchFuncEntry) elif (type(Symbol.entry.st_shndx) == int) and elf.get_section( Symbol.entry.st_shndx).name.startswith( ".text.sub.") and Symbol.entry.st_size: #this is a special entry, it isn't a function being hooked but is used to break up where code is placed #add a known symbol ElfData["DefinedSymbols"].append(Symbol.name) #get the section, if it isn't already in the list then add it Section = elf.get_section(Symbol.entry.st_shndx) if Section.name not in AddedSections: AddedSections.append(Section.name) #add in an entry for this section PatchFuncEntry = dict() PatchFuncEntry["type"] = "PLACED" PatchFuncEntry["name"] = Section.name PatchFuncEntry["size"] = Section.data_size PatchFuncEntry["address"] = -1 PatchFuncEntry["kallsyms"] = 0 PatchFuncEntry["alignment"] = 0 ElfData["PatchFuncs"].append(PatchFuncEntry) elif (type(Symbol.entry.st_shndx) == int) and len( Symbol.name) and (Symbol.entry.st_info.bind == "STB_GLOBAL"): #add a known symbol ElfData["DefinedSymbols"].append(Symbol.name) #symbols are handled, calculate how much space is required for data and normal .text for Entry in xrange(0, elf.num_sections()): Section = elf.get_section(Entry) if Section.name.startswith(".text"): if Section.name not in AddedSections: ElfData["TextSize"] += Section.data_size elif Section.name.startswith(".rodata"): ElfData["RODataSize"] += Section.data_size elif Section.name.startswith(".data"): ElfData["DataSize"] += Section.data_size f.close() #return what we require return ElfData
class ELFParser: """ Class to help ELF parsing """ def __init__(self, filename): self.fn = filename self.f = open(self.fn, 'rb') self.bin = self.f.read() self.elf = ELFFile(self.f) self.struct_elf = { 'e_type': "Object File Type", 'e_machine': "Architecture", 'e_entry': "Entry Point VA", 'e_phoff': "Program header table file offset", 'e_shoff': "Section header table file offset", 'e_ehsize': "ELF header size in bytes", 'e_phentsize': "Program header table entry size", 'e_phnum': "Program header table entry count", 'e_shentsize': "Section header table entry size", 'e_shnum': "Section header table entry count", 'e_shstrndx': "Section header string table index" } self.struct_section = { 'sh_type': "Section Type", 'sh_addralign': "Section Address Align", 'sh_offset': "Section Offset", 'sh_entsize': "Section Entry Size", 'sh_name': "Section Name", 'sh_flags': "Section Flags", 'sh_size': "Section Size", 'sh_addr': "Section VA", 'sh_link': "Section Link", 'sh_info': "Section Info" } # Relocation Types: Value, Name, Field and Calculation from linux64-ABI self.struct_relocation = { 0: "R_X86_64_NONE", # none, none 1: "R_X86_64_64", # word64, S + A 2: "R_X86_64_PC32", # word32, S + A - P 3: "R_X86_64_GOT32", # word32, G + A 4: "R_X86_64_PLT32", # word32, L + A - P 5: "R_X86_64_COPY", # none, none 6: "R_X86_64_GLOB_DAT", # wordclass, S 7: "R_X86_64_JUMP_SLOT", # wordclass, S 8: "R_X86_64_RELATIVE", # wordclass, B + A 9: "R_X86_64_GOTPCREL", # word32, G + GOT + A - P 10: "R_X86_64_32", # word32, S + A 11: "R_X86_64_32S", # word32, S + A 12: "R_X86_64_16", # word16, S + A 13: "R_X86_64_PC16", # word16, S + A - P 14: "R_X86_64_8", # word8, S + A 15: "R_X86_64_PC8", # word8, S + A - P 16: "R_X86_64_DTPMOD64", # word64 17: "R_X86_64_DTPOFF64", # word64 18: "R_X86_64_TPOFF64", # word64 19: "R_X86_64_TLSGD", # word32 20: "R_X86_64_TLSLD", # word32 21: "R_X86_64_DTPOFF32", # word32 22: "R_X86_64_GOTTPOFF", # word32 23: "R_X86_64_TPOFF32", # word32 24: "R_X86_64_PC64", # word64, S + A - P (only for LP64) 25: "R_X86_64_GOTOFF64", # word64, S + A - GOT (only for LP64) 26: "R_X86_64_GOTPC32", # word32, GOT + A - P 32: "R_X86_64_SIZE32", # word32, Z + A 33: "R_X86_64_SIZE64", # word64, Z + A (only for LP64) 34: "R_X86_64_GOTPC32_TLSDESC", # word32 35: "R_X86_64_TLSDESC_CALL", # none 36: "R_X86_64_TLSDESC", # word64 * 2 37: "R_X86_64_IRELATIVE", # wordclass, indirect (B + A) 38: "R_X86_64_RELATIVE64" # word64, B + A (only for ILP32 executable or shared objects) } self.section_ranges = {} self.extractSectionVAs() def readElfHdr(self): print 'ELF Header (%s)' % self.fn elf_info = self.elf._parse_elf_header() for i in sorted(self.struct_elf.keys()): elf_decr = self.struct_elf[i].ljust(35) if isinstance(elf_info[i], int): val = '(' + hex(elf_info[i]) + ')' print " %s: %s%s" % (elf_decr, elf_info[i], val.rjust(15)) else: print " %s: %s" % (elf_decr, elf_info[i]) def readRelocations(self): """ Read the relocation sections in a given ELF binary :return: """ # There are several different sections for relocation: # '.rela.plt', '.rela.dyn', '.rel.plt', '.rel.dyn' # The postfix .dyn represents the table for dynamic linker reloc_section_names = [ '.rela.plt', '.rela.dyn', '.rel.plt', '.rel.dyn' ] for reloc_name in reloc_section_names: rel = self.elf.get_section_by_name(reloc_name) if isinstance(rel, RelocationSection): print 'Relocation Section: %s (%d)' % (reloc_name, rel.num_relocations()) # Lookup all entry attributes for i, r in enumerate(rel.iter_relocations()): print '\t[%3d] Offset + Addend: %s +' % ( i + 1, hex(r['r_offset'])), if 'rela' in reloc_name: print r['r_addend'], print '\tInfo (Type, Symbol): %s (%s, %s)' \ % (hex(r['r_info']), self.struct_relocation[r['r_info_type']],r['r_info_sym']) def readSections(self): """ Read all sections in a given ELF binary """ def sectionInfo(s): section = self.elf.get_section(s) # A section type is in its header, but the name was decoded and placed in a public attribute. print ' [%2d] Section %s' % (s, section.name) for s in sorted(self.struct_section.keys()): sec_desc = self.struct_section[s].ljust(25) print '\t%s : %s' % (sec_desc, section[s]) # Case: a section table contains a symbol table section if isinstance(section, SymbolTableSection): for sym_no in range(section.num_symbols()): symbol = section.get_symbol(sym_no) print " [%2d] Symbol: %s (Ty=%-7s, Bind=%-6s, Sym_Other=%-7s, Shndx=%4s, Val=0x%x, Sz=0x%x)" % \ (sym_no, symbol.name, symbol['st_info']['type'], symbol['st_info']['bind'], symbol['st_other']['visibility'], symbol['st_shndx'], symbol['st_value'], symbol['st_size']) sec_cnt = self.elf.num_sections() print 'Found %s sections' % sec_cnt for s in range(sec_cnt): sectionInfo(s) def extractSectionVAs(self): for s in range(1, self.elf.num_sections()): sec = self.elf.get_section(s) va = sec['sh_addr'] if va > 0: self.section_ranges[sec.name] = ((va, va + sec['sh_size'])) def getSectionVA(self, sn): return self.section_ranges[sn][0] def getSectionByVA(self, va): secNames = self.section_ranges.keys() for sn in secNames: s, e = self.section_ranges[sn] if s <= va < e: return sn def isVAinSection(self, kind, va): for s, e in self.section_ranges[kind]: if s <= va < e: return True return False
def main(): filename = sys.argv[1] elf = ELFFile(file(filename)) print('[II] Object %s is a %s_%s elf' % (filename, elf.get_machine_arch(), elf.elfclass)) assert elf.elfclass == 64 and elf.get_machine_arch() == 'x64' print "[II] Elf has %d sections." % elf.num_sections() selected_sections = [] for section_prefix in ['.text', '.data', '.rodata', '.bss']: for section in elf.iter_sections(): if section.name.startswith(section_prefix): selected_sections.append(section.name) offsets = {} shellcode = StringIO('') for section_name in selected_sections: offsets[section_name] = shellcode.len try: s = elf.get_section_by_name(section_name) if s['sh_type'] == 'SHT_NOBITS': data = chr(0) * s['sh_size'] else: data = elf.get_section_by_name(section_name).data() print "[II] Section %s is %d bytes offset %d" % ( section_name, len(data), offsets[section_name]) except: data = '' print '[WW] No %s section' % section_name shellcode.write(data) # padding to 16 shellcode.write(chr(0) * (16 - shellcode.len % 16)) print "[II] Total packed data size %d" % shellcode.len relocs = [] for section_name in selected_sections: reloc_section = find_relocations_for_section(elf, section_name) if reloc_section is None: continue symtab = elf.get_section(reloc_section['sh_link']) for reloc in reloc_section.iter_relocations(): #print reloc #assert elf.get_machine_arch() == 'x64' and not reloc.is_RELA() assert elf.get_machine_arch() == 'x64' and reloc.is_RELA() reloc_base = offsets[section_name] reloc_offset = reloc['r_offset'] reloc_type = reloc['r_info_type'] target_symbol = symtab.get_symbol(reloc['r_info_sym']) target_name = elf.get_section(target_symbol['st_shndx']).name target_base = offsets[target_name] target_offset = target_symbol['st_value'] shellcode.seek(reloc_base + reloc_offset) value = struct.unpack("<l", shellcode.read(4))[0] #+ reloc['r_addend'] #print "RELOC:",section_name, '0x%x' % reloc_base, '0x%x' % reloc_offset, "=>", target_name, '0x%x' % target_base,'0x%x' % target_offset, value, '(%s)' % target_symbol.name if reloc_type == ENUM_RELOC_TYPE_x64['R_X86_64_32']: value = target_base + target_offset + value + reloc['r_addend'] relocs.append(reloc_base + reloc_offset) print "[II] Offset ", reloc_base + reloc_offset, "added to reloc list" elif reloc_type == ENUM_RELOC_TYPE_x64['R_X86_64_PC32']: value = (target_base + target_offset) - ( reloc_base + reloc_offset) + value + reloc['r_addend'] elif reloc_type == ENUM_RELOC_TYPE_x64['R_X86_64_32S']: value = target_base + target_offset + value + reloc['r_addend'] relocs.append(reloc_base + reloc_offset) else: assert reloc_type == ENUM_RELOC_TYPE_x64['R_X86_64_NONE'] shellcode.seek(reloc_base + reloc_offset) shellcode.write(struct.pack("<L", value & 0xffffffff)) shellcode.seek(shellcode.len) def to_c_array(s): if len(s) % 4 != 0: s += chr(0) * (4 - len(s) % 4) bs = map(ord, s) result = '' for i in range(0, len(bs), 8): result += ' ' + ''.join(' 0x%02x,' % b for b in bs[i:i + 8]) + '\n' return result def to_c_array2(arr): result = '' for i in range(0, len(arr), 10): result += ' ' + ''.join(' %d,' % x for x in arr[i:i + 10]) + '\n' return result with file('bot_opt.cc', 'w') as fp: bss_size = elf.get_section_by_name('.bss')['sh_size'] assert shellcode.getvalue()[-bss_size:] == chr(0) * bss_size pagesize = 4096 fp.write('''#include <sys/mman.h> #include "bot_opt.h" static unsigned char code[%d] __attribute__((aligned(4096))) = { %s}; static int patch[] = { %s}; ''' % ( (shellcode.len + pagesize - 1) / pagesize * pagesize, to_c_array(shellcode.getvalue()[:-bss_size]), to_c_array2(relocs), )) #fp.write('reloc %d\n' % rel) fp.write(''' void load_code() { if ((uintptr_t)code > 0xffffffffull) return; mprotect(code, sizeof(code), PROT_READ|PROT_WRITE|PROT_EXEC); for (unsigned int i = 0; i < sizeof(patch)/sizeof(patch[0]); i++) { *(uint32_t*)(void*)(code + patch[i]) += (uintptr_t)code; } ''') #with file('bot_opt.bin', 'wb') as fp: # fp.write(shellcode.getvalue()) # export symbols for entry in ( 'root_search_move', 'init_bot', 'max_lookahead', 'maybe_dead_threshold', 'search_threshold', 'cache1_clear', ): symbol = None for s in elf.get_section_by_name('.symtab').iter_symbols(): if s.name == entry: symbol = s assert symbol section = elf.get_section(symbol['st_shndx']).name base = offsets[section] offset = symbol['st_value'] start = base + offset print section, entry, start if section == '.text': fp.write(' %s_func = (%s_func_t)((char*)code + %d);\n' % (entry, entry, start)) else: fp.write(' %s_ptr = (%s_ptr_t)((char*)code + %d);\n' % (entry, entry, start)) #fp.write('%s %s %d\n' % (section, entry, start)) fp.write('''} ''')
class QlReadELF(object): def __init__(self, ql: Qiling, elf_stream): self.ql = ql self.elffile = ELFFile(elf_stream) self._versioninfo = None def elf_file_header(self): elf_header = {} def add_info(key, value): elf_header[key] = value header = self.elffile.header e_ident = header['e_ident'] add_info( 'Magic', ' '.join('%2.2x' % byte2int(b) for b in self.elffile.e_ident_raw)) add_info('Class', describe_ei_class(e_ident['EI_CLASS'])) add_info('Data', describe_ei_data(e_ident['EI_DATA'])) add_info('Version', e_ident['EI_VERSION']) add_info('OS/ABI', describe_ei_osabi(e_ident['EI_OSABI'])) add_info('ABI Version', e_ident['EI_ABIVERSION']) add_info('Type', describe_e_type(header['e_type'])) add_info('Machine', describe_e_machine(header['e_machine'])) add_info('Version_e', describe_e_version_numeric(header['e_version'])) add_info('Entry point address', self._format_hex(header['e_entry'])) add_info('Start of program headers', header['e_phoff']) add_info('Start of section headers', header['e_shoff']) add_info('Flags', [ self._format_hex(header['e_flags']), self.decode_flags(header['e_flags']) ]) add_info('Size of this header', header['e_ehsize']) add_info('Size of program headers', header['e_phentsize']) add_info('Number of program headers', header['e_phnum']) add_info('Size of section headers', header['e_shentsize']) add_info('Number of section headers', header['e_shnum']) add_info('Section header string table index', header['e_shstrndx']) return elf_header def elf_program_headers(self): program_headers = [] def add_info(dic): program_headers.append(dic) if self.elffile.num_segments() == 0: return None for segment in self.elffile.iter_segments(): program_hdr = {} program_hdr['Type'] = describe_p_type(segment['p_type']) program_hdr['Offset'] = self._format_hex(segment['p_offset'], fieldsize=6) program_hdr['VirtAddr'] = self._format_hex(segment['p_vaddr'], fullhex=True) program_hdr['PhysAddr'] = self._format_hex(segment['p_paddr'], fullhex=True) program_hdr['FileSiz'] = self._format_hex(segment['p_filesz'], fieldsize=5) program_hdr['MemSiz'] = self._format_hex(segment['p_memsz'], fieldsize=5) program_hdr['Flg'] = describe_p_flags(segment['p_flags']) program_hdr['Align'] = self._format_hex(segment['p_align']) add_info(program_hdr) return program_headers def elf_section_headers(self): section_headers = [] def add_info(dic): section_headers.append(dic) if self.elffile.num_sections() == 0: return None for nsec, section in enumerate(self.elffile.iter_sections()): section_hdr = {} section_hdr['index'] = nsec section_hdr['Name'] = section.name section_hdr['Type'] = describe_sh_type(section['sh_type']) section_hdr['Addr'] = self._format_hex(section['sh_addr'], fieldsize=8, lead0x=False) section_hdr['Offset'] = self._format_hex(section['sh_offset'], fieldsize=6, lead0x=False) section_hdr['Size'] = self._format_hex(section['sh_size'], fieldsize=6, lead0x=False) section_hdr['ES'] = self._format_hex(section['sh_entsize'], fieldsize=2, lead0x=False) section_hdr['Flag'] = describe_sh_flags(section['sh_flags']) section_hdr['Lk'] = section['sh_link'] section_hdr['Inf'] = section['sh_info'] section_hdr['Al'] = section['sh_addralign'] add_info(section_hdr) return section_headers def elf_symbol_tables(self): symbol_tables_list = [] def add_info(dic): symbol_tables_list.append(dic) self._init_versioninfo() symbol_tables = [ s for s in self.elffile.iter_sections() if isinstance(s, SymbolTableSection) ] if not symbol_tables and self.elffile.num_sections() == 0: return None for section in symbol_tables: if not isinstance(section, SymbolTableSection): continue if section['sh_entsize'] == 0: continue for nsym, symbol in enumerate(section.iter_symbols()): version_info = '' if (section['sh_type'] == 'SHT_DYNSYM' and self._versioninfo['type'] == 'GNU'): version = self._symbol_version(nsym) if (version['name'] != symbol.name and version['index'] not in ('VER_NDX_LOCAL', 'VER_NDX_GLOBAL')): if version['filename']: # external symbol version_info = '@%(name)s (%(index)i)' % version else: # internal symbol if version['hidden']: version_info = '@%(name)s' % version else: version_info = '@@%(name)s' % version symbol_info = {} symbol_info['index'] = nsym symbol_info['Value'] = self._format_hex(symbol['st_value'], fullhex=True, lead0x=False) symbol_info['Size'] = symbol['st_size'] symbol_info['Type'] = describe_symbol_type( symbol['st_info']['type']) symbol_info['Bind'] = describe_symbol_bind( symbol['st_info']['bind']) symbol_info['Vis'] = describe_symbol_visibility( symbol['st_other']['visibility']) symbol_info['Ndx'] = describe_symbol_shndx(symbol['st_shndx']) symbol_info['Name'] = symbol.name symbol_info['version_info'] = version_info add_info(symbol_info) return symbol_tables_list def decode_flags(self, flags): description = "" if self.elffile['e_machine'] == "EM_ARM": eabi = flags & E_FLAGS.EF_ARM_EABIMASK flags &= ~E_FLAGS.EF_ARM_EABIMASK if flags & E_FLAGS.EF_ARM_RELEXEC: description += ', relocatable executabl' flags &= ~E_FLAGS.EF_ARM_RELEXEC if eabi == E_FLAGS.EF_ARM_EABI_VER5: EF_ARM_KNOWN_FLAGS = E_FLAGS.EF_ARM_ABI_FLOAT_SOFT | E_FLAGS.EF_ARM_ABI_FLOAT_HARD | E_FLAGS.EF_ARM_LE8 | E_FLAGS.EF_ARM_BE8 description += ', Version5 EABI' if flags & E_FLAGS.EF_ARM_ABI_FLOAT_SOFT: description += ", soft-float ABI" elif flags & E_FLAGS.EF_ARM_ABI_FLOAT_HARD: description += ", hard-float ABI" if flags & E_FLAGS.EF_ARM_BE8: description += ", BE8" elif flags & E_FLAGS.EF_ARM_LE8: description += ", LE8" if flags & ~EF_ARM_KNOWN_FLAGS: description += ', <unknown>' else: description += ', <unrecognized EABI>' elif self.elffile['e_machine'] == "EM_MIPS": if flags & E_FLAGS.EF_MIPS_NOREORDER: description += ", noreorder" if flags & E_FLAGS.EF_MIPS_PIC: description += ", pic" if flags & E_FLAGS.EF_MIPS_CPIC: description += ", cpic" if (flags & E_FLAGS.EF_MIPS_ABI2): description += ", abi2" if (flags & E_FLAGS.EF_MIPS_32BITMODE): description += ", 32bitmode" if (flags & E_FLAGS_MASKS.EFM_MIPS_ABI_O32): description += ", o32" elif (flags & E_FLAGS_MASKS.EFM_MIPS_ABI_O64): description += ", o64" elif (flags & E_FLAGS_MASKS.EFM_MIPS_ABI_EABI32): description += ", eabi32" elif (flags & E_FLAGS_MASKS.EFM_MIPS_ABI_EABI64): description += ", eabi64" if (flags & E_FLAGS.EF_MIPS_ARCH) == E_FLAGS.EF_MIPS_ARCH_1: description += ", mips1" if (flags & E_FLAGS.EF_MIPS_ARCH) == E_FLAGS.EF_MIPS_ARCH_2: description += ", mips2" if (flags & E_FLAGS.EF_MIPS_ARCH) == E_FLAGS.EF_MIPS_ARCH_3: description += ", mips3" if (flags & E_FLAGS.EF_MIPS_ARCH) == E_FLAGS.EF_MIPS_ARCH_4: description += ", mips4" if (flags & E_FLAGS.EF_MIPS_ARCH) == E_FLAGS.EF_MIPS_ARCH_5: description += ", mips5" if (flags & E_FLAGS.EF_MIPS_ARCH) == E_FLAGS.EF_MIPS_ARCH_32R2: description += ", mips32r2" if (flags & E_FLAGS.EF_MIPS_ARCH) == E_FLAGS.EF_MIPS_ARCH_64R2: description += ", mips64r2" if (flags & E_FLAGS.EF_MIPS_ARCH) == E_FLAGS.EF_MIPS_ARCH_32: description += ", mips32" if (flags & E_FLAGS.EF_MIPS_ARCH) == E_FLAGS.EF_MIPS_ARCH_64: description += ", mips64" return description def _format_hex(self, addr, fieldsize=None, fullhex=False, lead0x=True, alternate=False): """ Format an address into a hexadecimal string. fieldsize: Size of the hexadecimal field (with leading zeros to fit the address into. For example with fieldsize=8, the format will be %08x If None, the minimal required field size will be used. fullhex: If True, override fieldsize to set it to the maximal size needed for the elfclass lead0x: If True, leading 0x is added alternate: If True, override lead0x to emulate the alternate hexadecimal form specified in format string with the # character: only non-zero values are prefixed with 0x. This form is used by readelf. """ if alternate: if addr == 0: lead0x = False else: lead0x = True fieldsize -= 2 s = '0x' if lead0x else '' if fullhex: fieldsize = 8 if self.elffile.elfclass == 32 else 16 if fieldsize is None: field = '%x' else: field = '%' + '0%sx' % fieldsize return s + field % addr def _init_versioninfo(self): """ Search and initialize informations about version related sections and the kind of versioning used (GNU or Solaris). """ if self._versioninfo is not None: return self._versioninfo = { 'versym': None, 'verdef': None, 'verneed': None, 'type': None } for section in self.elffile.iter_sections(): if isinstance(section, GNUVerSymSection): self._versioninfo['versym'] = section elif isinstance(section, GNUVerDefSection): self._versioninfo['verdef'] = section elif isinstance(section, GNUVerNeedSection): self._versioninfo['verneed'] = section elif isinstance(section, DynamicSection): for tag in section.iter_tags(): if tag['d_tag'] == 'DT_VERSYM': self._versioninfo['type'] = 'GNU' break if not self._versioninfo['type'] and (self._versioninfo['verneed'] or self._versioninfo['verdef']): self._versioninfo['type'] = 'Solaris' def _symbol_version(self, nsym): """ Return a dict containing information on the or None if no version information is available """ self._init_versioninfo() symbol_version = dict.fromkeys(('index', 'name', 'filename', 'hidden')) if (not self._versioninfo['versym'] or nsym >= self._versioninfo['versym'].num_symbols()): return None symbol = self._versioninfo['versym'].get_symbol(nsym) index = symbol.entry['ndx'] if not index in ('VER_NDX_LOCAL', 'VER_NDX_GLOBAL'): index = int(index) if self._versioninfo['type'] == 'GNU': # In GNU versioning mode, the highest bit is used to # store wether the symbol is hidden or not if index & 0x8000: index &= ~0x8000 symbol_version['hidden'] = True if (self._versioninfo['verdef'] and index <= self._versioninfo['verdef'].num_versions()): _, verdaux_iter = \ self._versioninfo['verdef'].get_version(index) symbol_version['name'] = next(verdaux_iter).name else: verneed, vernaux = \ self._versioninfo['verneed'].get_version(index) symbol_version['name'] = vernaux.name symbol_version['filename'] = verneed.name symbol_version['index'] = index return symbol_version
class ELFExecutable(BaseExecutable): def __init__(self, file_path): super(ELFExecutable, self).__init__(file_path) self.helper = ELFFile(self.binary) self.architecture = self._identify_arch() if self.architecture is None: raise Exception('Architecture is not recognized') logging.debug('Initialized {} {} with file \'{}\''.format( self.architecture, type(self).__name__, file_path)) self.pack_endianness = '<' if self.helper.little_endian else '>' self.address_pack_type = 'I' if self.helper.elfclass == 32 else 'Q' self.sections = [ section_from_elf_section(s) for s in self.helper.iter_sections() ] self.executable_segment = [ s for s in self.helper.iter_segments() if s['p_type'] == 'PT_LOAD' and s['p_flags'] & 0x1 ][0] dyn = self.helper.get_section_by_name('.dynamic') if dyn: self.libraries = [ t.needed for t in dyn.iter_tags() if t['d_tag'] == 'DT_NEEDED' ] self.next_injection_offset = 0 self.next_injection_vaddr = 0 def _identify_arch(self): machine = self.helper.get_machine_arch() if machine == 'x86': return ARCHITECTURE.X86 elif machine == 'x64': return ARCHITECTURE.X86_64 elif machine == 'ARM': return ARCHITECTURE.ARM elif machine == 'AArch64': return ARCHITECTURE.ARM_64 else: return None def entry_point(self): return self.helper['e_entry'] def executable_segment_vaddr(self): return self.executable_segment['p_vaddr'] def executable_segment_size(self): # TODO: Maybe limit this because we use this as part of our injection method? return self.executable_segment['p_memsz'] def iter_string_sections(self): STRING_SECTIONS = ['.rodata', '.data', '.bss'] for s in self.sections: if s.name in STRING_SECTIONS: yield s def _extract_symbol_table(self): # Add in symbols from the PLT/rela.plt # .rela.plt contains indexes to reference both .dynsym (symbol names) and .plt (jumps to GOT) if self.is_64_bit(): reloc_section = self.helper.get_section_by_name('.rela.plt') else: reloc_section = self.helper.get_section_by_name('.rel.plt') if reloc_section: dynsym = self.helper.get_section( reloc_section['sh_link']) # .dynsym if isinstance(dynsym, SymbolTableSection): plt = self.helper.get_section_by_name('.plt') for idx, reloc in enumerate(reloc_section.iter_relocations()): # Get the symbol's name from dynsym symbol_name = dynsym.get_symbol(reloc['r_info_sym']).name # The address of this function in the PLT is the base PLT offset + the index of the relocation. # However, since there is the extra "trampoline" entity at the top of the PLT, we need to add one to the # index to account for it. # While sh_entsize is sometimes defined, it appears to be incorrect in some cases so we just ignore that # and calculate it based off of the total size / num_relocations (plus the trampoline entity) entsize = (plt['sh_size'] / (reloc_section.num_relocations() + 1)) plt_addr = plt['sh_addr'] + ((idx + 1) * entsize) logging.debug( 'Directly adding PLT function {} at vaddr {}'.format( symbol_name, hex(plt_addr))) f = Function(plt_addr, entsize, symbol_name + '@PLT', self, type=Function.DYNAMIC_FUNC) self.functions[plt_addr] = f else: logging.debug( 'Relocation section had sh_link to {}. Not parsing symbols...' .format(dynsym)) # Some things in the symtab have st_size = 0 which confuses analysis later on. To solve this, we keep track of # where each address is in the `function_vaddrs` set and go back after all symbols have been iterated to compute # size by taking the difference between the current address and the next recorded address. # We do this for each executable section so that the produced functions cannot span multiple sections. for section in self.helper.iter_sections(): if self.executable_segment.section_in_segment(section): name_for_addr = {} function_vaddrs = set( [section['sh_addr'] + section['sh_size']]) symbol_table = self.helper.get_section_by_name('.symtab') if symbol_table: for symbol in symbol_table.iter_symbols(): if symbol['st_info']['type'] == 'STT_FUNC' and symbol[ 'st_shndx'] != 'SHN_UNDEF': if section['sh_addr'] <= symbol[ 'st_value'] < section['sh_addr'] + section[ 'sh_size']: name_for_addr[symbol['st_value']] = symbol.name function_vaddrs.add(symbol['st_value']) if symbol['st_size']: logging.debug( 'Eagerly adding function {} from .symtab at vaddr {} with size {}' .format(symbol.name, hex(symbol['st_value']), hex(symbol['st_size']))) f = Function(symbol['st_value'], symbol['st_size'], symbol.name, self) self.functions[symbol['st_value']] = f function_vaddrs = sorted(list(function_vaddrs)) for cur_addr, next_addr in zip(function_vaddrs[:-1], function_vaddrs[1:]): # If st_size was set, we already added the function above, so don't add it again. if cur_addr not in self.functions: func_name = name_for_addr[cur_addr] size = next_addr - cur_addr logging.debug( 'Lazily adding function {} from .symtab at vaddr {} with size {}' .format(func_name, hex(cur_addr), hex(size))) f = Function(cur_addr, next_addr - cur_addr, name_for_addr[cur_addr], self, type=Function.DYNAMIC_FUNC) self.functions[cur_addr] = f # TODO: Automatically find and label main from call to libc_start_main def _prepare_for_injection(self): """ Derived from http://vxheavens.com/lib/vsc01.html """ modified = StringIO(self.binary.getvalue()) # Add INJECTION_SIZE to the section header list offset to make room for our injected code elf_hdr = self.helper.header.copy() elf_hdr.e_shoff += INJECTION_SIZE logging.debug('Changing e_shoff to {}'.format(elf_hdr.e_shoff)) modified.seek(0) modified.write(self.helper.structs.Elf_Ehdr.build(elf_hdr)) # Find the main RX LOAD segment and also adjust other segment offsets along the way executable_segment = None for segment_idx, segment in enumerate(self.helper.iter_segments()): segment_hdr = segment.header.copy() segment_hdr_offset = self.helper._segment_offset(segment_idx) if executable_segment is not None: # Already past the executable segment, so just update the offset if needed (i.e. don't update things # that come before the expanded section) if segment_hdr.p_offset > last_exec_section['sh_offset']: segment_hdr.p_offset += INJECTION_SIZE elif segment['p_type'] == 'PT_LOAD' and segment[ 'p_flags'] & P_FLAGS.PF_X: # Found the executable LOAD segment. # Make room for our injected code. logging.debug( 'Found executable LOAD segment at index {}'.format( segment_idx)) executable_segment = segment last_exec_section_idx = max([ idx for idx in range(self.helper.num_sections()) if executable_segment.section_in_segment( self.helper.get_section(idx)) ]) last_exec_section = self.helper.get_section( last_exec_section_idx) segment_hdr.p_filesz += INJECTION_SIZE segment_hdr.p_memsz += INJECTION_SIZE logging.debug( 'Rewriting segment filesize and memsize to {} and {}'. format(segment_hdr.p_filesz, segment_hdr.p_memsz)) modified.seek(segment_hdr_offset) modified.write(self.helper.structs.Elf_Phdr.build(segment_hdr)) if executable_segment is None: logging.error( "Could not locate an executable LOAD segment. Cannot continue injection." ) return False logging.debug( 'Last section in executable LOAD segment is at index {} ({})'. format(last_exec_section_idx, last_exec_section.name)) self.next_injection_offset = last_exec_section[ 'sh_offset'] + last_exec_section['sh_size'] self.next_injection_vaddr = last_exec_section[ 'sh_addr'] + last_exec_section['sh_size'] # Update sh_size for the section we grew section_header_offset = self.helper._section_offset( last_exec_section_idx) section_header = last_exec_section.header.copy() section_header.sh_size += INJECTION_SIZE modified.seek(section_header_offset) modified.write(self.helper.structs.Elf_Shdr.build(section_header)) # Update sh_offset for each section past the last section in the executable segment for section_idx in range(last_exec_section_idx + 1, self.helper.num_sections()): section_header_offset = self.helper._section_offset(section_idx) section_header = self.helper.get_section(section_idx).header.copy() section_header.sh_offset += INJECTION_SIZE logging.debug('Rewriting section {}\'s offset to {}'.format( section_idx, section_header.sh_offset)) modified.seek(section_header_offset) modified.write(self.helper.structs.Elf_Shdr.build(section_header)) modified = StringIO(modified.getvalue()[:self.next_injection_offset] + '\xCC' * INJECTION_SIZE + modified.getvalue()[self.next_injection_offset:]) self.binary = modified self.helper = ELFFile(self.binary) return True def inject(self, asm, update_entry=False): for segment in self.helper.iter_segments(): if segment['p_type'] == 'PT_LOAD' and segment[ 'p_flags'] & P_FLAGS.PF_X: injection_section_idx = max( i for i in range(self.helper.num_sections()) if segment.section_in_segment(self.helper.get_section(i))) break injection_section = self.helper.get_section(injection_section_idx) # If we haven't injected code before or need to expand the section again for this injection, go ahead and # shift stuff around. if injection_section['sh_size'] < INJECTION_SIZE or \ injection_section['sh_offset'] + injection_section['sh_size'] < self.next_injection_offset + len(asm): self._prepare_for_injection() elif self.next_injection_offset == 0: used_code_len = len(injection_section.data().rstrip('\xCC')) self.next_injection_offset = injection_section[ 'sh_offset'] + used_code_len self.next_injection_vaddr = injection_section[ 'sh_addr'] + used_code_len # "Inject" the assembly self.binary.seek(self.next_injection_offset) self.binary.write(asm) # Update e_entry if requested if update_entry: logging.debug('Rewriting ELF entry address to {}'.format( self.next_injection_vaddr)) elf_hdr = self.helper.header elf_hdr.e_entry = self.next_injection_vaddr self.binary.seek(0) self.binary.write(self.helper.structs.Elf_Ehdr.build(elf_hdr)) self.helper = ELFFile(self.binary) self.next_injection_vaddr += len(asm) self.next_injection_offset += len(asm) return self.next_injection_vaddr - len(asm) def replace_instruction(self, vaddr, new_asm): if not self.analyzer.ins_map[vaddr]: raise Exception( 'Starting virtual address to replace must be an existing instruction' ) overwritten_insns = self.analyzer.ins_map[vaddr:vaddr + len(new_asm)] for ins in overwritten_insns: if ins.address in self.xrefs: logging.warning( '{} will be overwritten but there are xrefs to it: {}'. format(ins, self.xrefs[ins.address])) self.binary.seek(self.vaddr_binary_offset(vaddr)) self.binary.write(new_asm) self.binary.write( self.analyzer.NOP_INSTRUCTION * ((vaddr - len(new_asm)) / len(self.analyzer.NOP_INSTRUCTION))) new_instructions = self.analyzer.disassemble_range( vaddr, vaddr + new_asm) func = self.function_containing_vaddr(vaddr) insert_point = func.instructions.index(overwritten_insns[0]) # Remove the old instructions from the function for ins in overwritten_insns: func.instructions.remove(ins) # Insert the new instructions where we just removed the old ones func.instructions = func.instructions[: insert_point] + new_instructions + func.instructions[ insert_point:] # Re-analyze the function for BBs func.do_bb_analysis() # Finally clear the instructions out from the global instruction map for ins in overwritten_insns: del self.analyzer.ins_map[ins.address] for ins in new_instructions: self.analyzer.ins_map[ins.address] = ins
async def test(top): await pybfms.init() u_bram = pybfms.find_bfm(".*u_bram") u_dbg_bfm: RiscvDebugBfm = pybfms.find_bfm(".*u_dbg_bfm") sw_image = cocotb.plusargs["sw.image"] u_dbg_bfm.load_elf(sw_image) u_dbg_bfm.set_trace_level(RiscvDebugTraceLevel.All) ram_console = 0 ram_console_sz = 0 print("Note: loading image " + sw_image) with open(sw_image, "rb") as f: elffile = ELFFile(f) symtab = elffile.get_section_by_name('.symtab') ram_console = 0 ram_console_sz = 0 if symtab.get_symbol_by_name("ram_console") is not None: ram_console = symtab.get_symbol_by_name( "ram_console")[0]["st_value"] ram_console_sz = symtab.get_symbol_by_name( "CONFIG_RAM_CONSOLE_BUFFER_SIZE")[0]["st_value"] # Find the section that contains the data we need section = None for i in range(elffile.num_sections()): shdr = elffile._get_section_header(i) # print("sh_addr=" + hex(shdr['sh_addr']) + " sh_size=" + hex(shdr['sh_size']) + " flags=" + hex(shdr['sh_flags'])) # print(" keys=" + str(shdr.keys())) if shdr['sh_size'] != 0 and (shdr['sh_flags'] & 0x2) == 0x2: section = elffile.get_section(i) data = section.data() addr = shdr['sh_addr'] j = 0 while j < len(data): word = (data[j + 0] << (8 * 0)) word |= (data[j + 1] << (8 * 1)) if j + 1 < len(data) else 0 word |= (data[j + 2] << (8 * 2)) if j + 2 < len(data) else 0 word |= (data[j + 3] << (8 * 3)) if j + 3 < len(data) else 0 # print("Write: " + hex(addr) + "(" + hex(int((addr & 0xFFFFF)/4)) + ") " + hex(word)) u_bram.write_nb(int((addr & 0xFFFFF) / 4), word, 0xF) addr += 4 j += 4 if ram_console != 0: console = RamConsole(ram_console, ram_console_sz) u_dbg_bfm.add_memwrite_cb(console.memwrite) # Wait for the main function to exit print("--> wait main") await u_dbg_bfm.on_exit("main") print("<-- wait main") # Wait for the OS to go idle print("--> wait idle") await u_dbg_bfm.on_entry("idle") print("<-- wait idle") # Wait for all objections to be dropped await pybfms.objection.inst().wait()