def main(): with open(sys.argv[1], 'rb') as f: elf = ELFFile(f) if not elf.has_dwarf_info(): print("Object file has no dwarf info!") sys.exit(1) types = {} global_offset = elf.get_dwarf_info().debug_info_sec.global_offset for cu in elf.get_dwarf_info().iter_CUs(): cu_name = cu.get_top_DIE().attributes['DW_AT_name'].value.decode( 'utf-8') print('\x1b[32m\x1b[1mProcessing %s\x1b[0m' % cu_name) # First, map top level types dies = list(cu.iter_DIEs()) i = 0 while i < len(dies): offset = dies[i].offset current = dies[i] i += 1 common_types = { 'DW_TAG_structure_type': Struct, 'DW_TAG_class_type': Struct, 'DW_TAG_base_type': Primitive, 'DW_TAG_typedef': Typedef, 'DW_TAG_array_type': Array, } if current.tag in common_types: assert offset not in types types[offset] = common_types[current.tag](current) else: pass # print("Skipping processing of '%s'" % current.tag) for t in types.values(): t.finalize(types) header = '%-4s |\t%-100s |\t%s' % ('#', 'type', 'size') print(header) print('-' * len(header.expandtabs())) for o, t in types.items(): color = '\x1b[31m\x1b[31m' if t.has_padding() else '' print('{:04x} |\t{color}{:100}\x1b[0m |\t{}'.format( o, repr(t), t.byte_size, color=color)) print('-' * len(header.expandtabs())) print() for o, t in types.items(): if t.has_padding(): print( "Found padded type '%s' at %s:%u" % (t, cu_name, t.source_object.attributes['DW_AT_decl_line'].value)) for p in t.get_padding_list(): print('\t%s' % p)
def process_file(filename): with open(filename, 'rb') as f: elffile = ELFFile(f) if not elffile.has_dwarf_info(): print(' file has no DWARF info') return {}, {} # get_dwarf_info returns a DWARFInfo context object, which is the # starting point for all DWARF-based processing in pyelftools. dwarfinfo = elffile.get_dwarf_info() func_map, global_map, type_map, struct_map, global_access_map = {}, {}, {}, {}, {} for CU in dwarfinfo.iter_CUs(): # DWARFInfo allows to iterate over the compile units contained in # the .debug_info section. CU is a CompileUnit object, with some # computed attributes (such as its offset in the section) and # a header which conforms to the DWARF standard. The access to top_DIE = CU.get_top_DIE() variables = {} members = {} die_info_rec_struct(top_DIE, struct_map, members, global_access_map) # Display DIEs recursively starting with top_DIE die_info_rec(top_DIE, func_map, global_map, type_map, struct_map, variables, global_access_map) return func_map, global_map, type_map, global_access_map
def resolve_addr(fn, ip): if fn in open_files: elffile = open_files[fn] else: f = open(fn, 'rb') elffile = ELFFile(f) open_files[fn] = elffile if fn not in lines and elffile.has_dwarf_info(): lines[fn] = build_line_table(elffile.get_dwarf_info()) if fn not in symtables: symtables[fn] = build_symtab(elffile) loc = None offset = None if fn in symtables: sym = find_le(symtables[fn], ip) if sym: loc, offset = sym[2], ip - sym[0] src = None if fn in lines: pos = find_le(lines[fn], ip) if pos: src = "%s:%d" % (pos[2], pos[3]) return loc, offset, src
def get_hex(): """ Get byte encodings corresponding to each source code line """ f = open('a.out', 'rb') info = ELFFile(f) dwarf = info.get_dwarf_info() cu = next(dwarf.iter_CUs()) lp = dwarf.line_program_for_CU(cu).get_entries() textsec = info.get_section_by_name('.text') voff = textsec.header['sh_addr'] - textsec.header['sh_offset'] curr_line = 0 res = [''] * (lp[-1].state.line + 1) f.seek(lp.pop(0).args[0] - voff, os.SEEK_SET) for e in lp: if len(e.args) == 0: curr_line = e.state.line - 1 elif len(e.args) == 1: res[curr_line] += f.read(e.args[0]) elif len(e.args) > 1: if e.args[0] == 0: f.seek(e.args[1], os.SEEK_CUR) curr_line = e.state.line - 1 else: res[curr_line] += f.read(e.args[1]) curr_line += e.args[0] f.close() return res
def _get_impalad_dwarf_info(self): """ Read the impalad_path ELF binary, which is supposed to contain DWARF, and read the DWARF to understand the compiler options. Return a 2-tuple of the two useful DIE attributes of the first compile unit: the DW_AT_name and DW_AT_producer. If something goes wrong doing this, log a warning and return nothing. """ # Some useful references: # - be/CMakeLists.txt # - gcc(1), especially -grecord-gcc-switches, -g, -ggdb, -gdwarf-2 # - readelf(1) # - general reading about DWARF # A useful command for exploration without having to wade through many bytes is: # readelf --debug-dump=info --dwarf-depth=1 impalad # The DWARF lines are long, raw, and nasty; I'm hesitant to paste them here, so # curious readers are highly encouraged to try the above, or read IMPALA-3501. die_name = None die_producer = None try: with open(self.impalad_path, 'rb') as fh: impalad_elf = ELFFile(fh) if impalad_elf.has_dwarf_info(): dwarf_info = impalad_elf.get_dwarf_info() # We only need the first CU, hence the unconventional use of the iterator # protocol. cu_iterator = dwarf_info.iter_CUs() first_cu = next(cu_iterator) top_die = first_cu.get_top_DIE() die_name = top_die.attributes['DW_AT_name'].value die_producer = top_die.attributes['DW_AT_producer'].value except Exception as e: LOG.warn('Failure to read DWARF info from {0}: {1}'.format(self.impalad_path, str(e))) return die_name, die_producer
def process_file(filename): print('Processing file:', filename) with open(filename, 'rb') as f: elffile = ELFFile(f) if not elffile.has_dwarf_info(): print(' file has no DWARF info') return # get_dwarf_info returns a DWARFInfo context object, which is the # starting point for all DWARF-based processing in pyelftools. dwarfinfo = elffile.get_dwarf_info() for CU in dwarfinfo.iter_CUs(): # DWARFInfo allows to iterate over the compile units contained in # the .debug_info section. CU is a CompileUnit object, with some # computed attributes (such as its offset in the section) and # a header which conforms to the DWARF standard. The access to # header elements is, as usual, via item-lookup. print(' Found a compile unit at offset {0!s}, length {1!s}'.format( CU.cu_offset, CU['unit_length'])) # The first DIE in each compile unit describes it. top_DIE = CU.get_top_DIE() print(' Top DIE with tag={0!s}'.format(top_DIE.tag)) # We're interested in the filename... print(' name={0!s}'.format(top_DIE.get_full_path()))
def process_file(filename): print('Processing file:', filename) with open(filename, 'rb') as f: elffile = ELFFile(f) if not elffile.has_dwarf_info(): print(' file has no DWARF info') return # get_dwarf_info returns a DWARFInfo context object, which is the # starting point for all DWARF-based processing in pyelftools. dwarfinfo = elffile.get_dwarf_info() for CU in dwarfinfo.iter_CUs(): # DWARFInfo allows to iterate over the compile units contained in # the .debug_info section. CU is a CompileUnit object, with some # computed attributes (such as its offset in the section) and # a header which conforms to the DWARF standard. The access to # header elements is, as usual, via item-lookup. print(' Found a compile unit at offset %s, length %s' % (CU.cu_offset, CU['unit_length'])) # The first DIE in each compile unit describes it. top_DIE = CU.get_top_DIE() print(' Top DIE with tag=%s' % top_DIE.tag) # We're interested in the filename... print(' name=%s' % top_DIE.get_full_path())
class DwarfInfo: def __init__(self, exe): self.binary = exe self.fd = open(exe, 'rb') self.elf_file = ELFFile(self.fd) if not self.elf_file.has_dwarf_info(): raise ('Binary contains no dwarf info section.') self.dwarf_info = self.elf_file.get_dwarf_info() def __del__(self): self.fd.close() def lookup(self, address): # iterate over the compile units(CUs) for CU in self.dwarf_info.iter_CUs(): line_progs = self.dwarf_info.line_program_for_CU(CU) prev_state = None # debug-line parse the table like `dwarfdump -debug-line ./main` for entry in line_progs.get_entries(): if entry.state is None: continue if entry.state.end_sequence: prev_state = None continue if prev_state and prev_state.address <= address < entry.state.address: file_name = line_progs['file_entry'][prev_state.file - 1].name line = prev_state.line return file_name, line prev_state = entry.state raise ('Could not find address')
def process_file(filename): print('Processing file:', filename) with open(filename, 'rb') as f: elffile = ELFFile(f) if not elffile.has_dwarf_info(): print(' file has no DWARF info') return # get_dwarf_info returns a DWARFInfo context object, which is the # starting point for all DWARF-based processing in pyelftools. dwarfinfo = elffile.get_dwarf_info() for CU in dwarfinfo.iter_CUs(): # DWARFInfo allows to iterate over the compile units contained in # the .debug_info section. CU is a CompileUnit object, with some # computed attributes (such as its offset in the section) and # a header which conforms to the DWARF standard. The access to # header elements is, as usual, via item-lookup. print(' Found a compile unit at offset %s, length %s' % ( CU.cu_offset, CU['unit_length'])) # Start with the top DIE, the root for this CU's DIE tree top_DIE = CU.get_top_DIE() print(' Top DIE with tag=%s' % top_DIE.tag) # We're interested in the filename... print(' name=%s' % top_DIE.get_full_path()) # Display DIEs recursively starting with top_DIE die_info_rec(top_DIE)
def retrieve_pub_functions(binary): symbols = [] with open(binary, 'rb') as f: elffile = ELFFile(f) if not elffile.has_dwarf_info(): print(' file has no DWARF info') return -1 # get_dwarf_info returns a DWARFInfo context object, which is the # starting point for all DWARF-based processing in pyelftools. dwarfinfo = elffile.get_dwarf_info() # get .debug_pubtypes section. pubnames = dwarfinfo.get_pubnames() if pubnames is None: print('ERROR: No .debug_pubnames section found in ELF.') return -1 else: print('%d entries found in .debug_pubnames' % len(pubnames)) # dump all entries in .debug_pubnames section. print('Dumping .debug_pubnames table ...') print('-' * 66) print('%50s%8s%8s' % ('Symbol', 'CU_OFS', 'DIE_OFS')) print('-' * 66) for (name, entry) in pubnames.items(): symbols.append(name) print('%50s%8d%8d' % (name, entry.cu_ofs, entry.die_ofs)) #print(entry) print('-' * 66) print(symbols) return symbols
def run(self): elf = ELFFile(self.f) if not elf.has_dwarf_info(): GLib.idle_add(self.window.display_error, "This file has no DWARF info.") return di = elf.get_dwarf_info() builder = DwarfModelBuilder(di, self.verbose) total = builder.num_cus() n = 0 generator = builder.build_step() file_elem = next(generator) while not file_elem: if self.stop_requested: return GLib.idle_add(self.window.load_progress, float(n) / total) n = n + 1 file_elem = next(generator) #root_elem = builder.build() if self.stop_requested: return GLib.idle_add(self.window.done_loading, file_elem)
def __init__(self, filename): self.filename = filename with open(filename, 'rb') as fp: elf = ELFFile(fp) assert elf.has_dwarf_info(), \ "No DWARF information for '{}'".format(filename) self.parseAddressRanges(elf.get_dwarf_info())
def __init__(self, binary: str): with open(binary, "rb") as b: elffile = ELFFile(b) #Symbol table for section in elffile.iter_sections(): if isinstance(section, SymbolTableSection): self.symbols = [Symbol(sym.name, sym['st_value'], sym['st_info']['type']) for sym in section.iter_symbols() if len(sym.name) > 0] continue if not elffile.has_dwarf_info(): raise Exception("This tool needs gdb info.") dbg = elffile.get_dwarf_info() def file_entry_to_abs(file_entry, linep: LineProgram) -> str: di = file_entry.dir_index if di > 0: return path.join(linep['include_directory'][di-1].decode(), file_entry.name.decode()) else: return path.join('.', file_entry.name.decode()) cu_helper = [(cu, dbg.line_program_for_CU(cu)) for cu in dbg.iter_CUs()] self.compile_units = [ CompileUnitInput(die.attributes['DW_AT_name'].value.decode(), die.attributes['DW_AT_comp_dir'].value.decode(), [file_entry_to_abs(fe, linep) for fe in linep['file_entry']]) for cu, linep in cu_helper for die in cu.iter_DIEs() if die.tag == 'DW_TAG_compile_unit' ] # find compile units self.markers = [] for msym in (sym for sym in self.symbols if sym.name.startswith('__metal_serial_')): try: nx : Tuple[LineProgramEntry, LineProgram] = next((entry, linep) for (cu, linep) in cu_helper for entry in linep.get_entries() if entry.state is not None and entry.state.address == msym.address) (loc, linep) = nx abs_file_entry = file_entry_to_abs(linep['file_entry'][loc.state.file - 1], linep) # check if marker already exists - #for existing_marker in self.markers: # if loc.state.line == existing_marker.line and loc.state.column == existing_marker.column and existing_marker.file == abs_file_entry: # print(msym.name, existing_marker.name) # raise Exception("Duplicate code markers found at {}({})".format(existing_marker.file, existing_marker.line)) self.markers.append(Marker( msym.name, msym.address, msym.symbol_type, abs_file_entry, loc.state.line, loc.state.column )) except StopIteration: raise Exception('Could not find code location for {} at 0x{:x} - this is most likely due to missing gdb symbols.'.format(msym.name, msym.address))
def process_file(filename, root, pkgroot): res = set() if os.path.isdir(filename) or not os.access(filename, os.R_OK): return res try: with open(filename, 'rb') as f: try: elffile = ELFFile(f) if not elffile.has_dwarf_info(): return res dwarfinfo = elffile.get_dwarf_info() for CU in dwarfinfo.iter_CUs(): # Every compilation unit in the DWARF information may or may not # have a corresponding line program in .debug_line. line_program = dwarfinfo.line_program_for_CU(CU) if line_program is None: continue # Print a reverse mapping of filename -> #entries res.update(line_entry_mapping(line_program)) except ELFError: return find_in_source_root(filename, root, pkgroot) except OSError: pass return res
def generate_header(self, data_out_filename, glob_data_out, namesp_out): """Find all top level (global) variables in the ELF file and generate a header. """ glob_data_out.write("/* generated by userspace-header-gen.py */\n") glob_data_out.write("#include <rtems/linkersets.h>\n") namesp_out.write("/* generated by userspace-header-gen.py */\n") for objfile in self._objfiles: elffile = ELFFile(objfile) if not elffile.has_dwarf_info(): raise NoDwarfInfoError() # Don't relocate DWARF sections. This is not necessary for us but # makes problems on ARM with current pyelftools (version 0.24) dwarfinfo = elffile.get_dwarf_info(relocate_dwarf_sections=False) for cu in dwarfinfo.iter_CUs(): if self._verbose >= VERBOSE_SOME: self._err.write('Found a CU at offset %s, length %s\n' % \ (cu.cu_offset, cu['unit_length'])) lineprog = dwarfinfo.line_program_for_CU(cu) headergen = HeaderGenCU(cu, self._progname, lineprog, self._err, self._verbose, self._filterre) headergen.generate_header(data_out_filename, glob_data_out, namesp_out)
def get_all_offsets_from_ELF(filename, structs): # Do argument validation at the beginning, so that if there's a problem, we don't have to wait for the file to parse first names = [] for struct in structs: kind, name = _validate_struct(struct) names.append((KIND2TAG[kind], name.encode('ascii'))) with open(filename, 'rb') as f: elffile = ELFFile(f) dwarf = elffile.get_dwarf_info() items = get_items_from_DWARF(dwarf, names=set(names)) cus = {cu for cu, item in items.values()} cu2offset2die = { cu: {die.offset: die for die in cu.iter_DIEs()} for cu in cus } for struct, (kind, value) in zip(structs, names): cu, item = items[kind, value] offset2die = cu2offset2die[cu] if kind == 'typedef': item = offset2die[item.attributes['DW_AT_type'].value] for field, offset in get_offsets_from_DIE(item, offset2die): yield struct, field, offset
class struct_parser(object): def __init__(self, path): with open(path, 'rb') as f: self.elffile = ELFFile(f) # check if we have debug info if not has_debug_info(self.elffile): print("File %s not have debug information!" % path) self.dw = 0 else: self.dw = self.elffile.get_dwarf_info() def find_struct_die(self, sname): sname = bytes(sname, encoding="UTF-8") for cu in self.dw.iter_CUs(): top_die = cu.get_top_DIE() for die in top_die.iter_children(): if die.tag == "DW_TAG_structure_type": if "DW_AT_name" in die.attributes: if (die.attributes["DW_AT_name"].value == sname): return die, cu return None def find_struct_die_with_filename(self, fname, sname): fname = bytes(fname, encoding="UTF-8") sname = bytes(sname, encoding="UTF-8") for cu in self.dw.iter_CUs(): top_die = cu.get_top_DIE() if fname == top_die.attributes["DW_AT_name"].value: for die in top_die.iter_children(): if die.tag == "DW_TAG_structure_type": if "DW_AT_name" in die.attributes: if (die.attributes["DW_AT_name"].value == sname): return die, cu return None
def get_frame_base(filename, pc, rebased_addr): """ Call to get frame base :param filename: name of the executable file :param pc: The address of the beginning of the function :param rebased_addr: Should be project.loader.memory.min_addr :return: the frame base for the function """ target_loc = pc - rebased_addr with open(filename, 'rb') as f: elffile = ELFFile(f) if not elffile.has_dwarf_info(): print(' file has no DWARF info') return # get_dwarf_info returns a DWARFInfo context object, which is the # starting point for all DWARF-based processing in pyelftools. dwarfinfo = elffile.get_dwarf_info() # This is required for the descriptions module to correctly decode # register names contained in DWARF expressions. set_global_machine_arch(elffile.get_machine_arch()) min_greater = 1000000000000000000000 offset = 0 for CFI in dwarfinfo.EH_CFI_entries(): if isinstance(CFI, FDE): decoded = CFI.get_decoded() for entry in decoded.table: if entry['pc'] >= target_loc and entry['pc'] < min_greater: offset = entry['cfa'].offset min_greater = entry['pc'] return offset
def get_subprograms(self): """ Generate the subprograms according to the dwarf info debugging information """ f = open(self.path, 'rb') elffile = ELFFile(f) dwarfinfo = elffile.get_dwarf_info() for CU in dwarfinfo.iter_CUs(): for DIE in CU.iter_DIEs(): if DIE.tag == "DW_TAG_subprogram": if "DW_AT_low_pc" in DIE.attributes.keys( ) and "DW_AT_high_pc" in DIE.attributes.keys() and ( "DW_AT_name" in DIE.attributes.keys() or "DW_AT_specification" in DIE.attributes.keys()): ret = 0 lowpc = DIE.attributes["DW_AT_low_pc"].value highpc = DIE.attributes["DW_AT_high_pc"].value if DIE.attributes[ "DW_AT_high_pc"].form == "DW_FORM_data4": highpc = lowpc + highpc name = "" ret = 0 num_args = 0 subprogram = Function(name, lowpc, highpc, "", "", ret, num_args, self) self.subprograms.append(subprogram) self.func_starts.append(subprogram.low_pc)
def load_dwarf_info(mmap): """ Load or reload all dwarf info from mmap. """ for filename in mmap: if filename.startswith("["): continue elffile = ELFFile(open(filename, "rb")) if not elffile.has_dwarf_info(): continue dwarfinfo = elffile.get_dwarf_info() # Information from Compilation Units (CUs) cus = [] for cu in dwarfinfo.iter_CUs(): lineprog = dwarfinfo.line_program_for_CU(cu) states = [ entry.state for entry in lineprog.get_entries() if entry.state and not entry.state.end_sequence ] addresses = [state.address for state in states] dies = [{ "entry": die, "bounds": die_bounds(die), "name": die.attributes['DW_AT_name'].value } for die in cu.iter_DIEs() if die.tag == 'DW_TAG_subprogram'] cus.append({ "lineprog": lineprog, "states": states, "addresses": addresses, "entries": dies }) all_dwarf_info[filename] = {"dwarfinfo": dwarfinfo, "units": cus}
def test_range_list_absence(self): with open( os.path.join('test', 'testfiles_for_unittests', 'arm_with_form_indirect.elf'), 'rb') as f: elffile = ELFFile(f) self.assertTrue(elffile.has_dwarf_info()) self.assertIsNone(elffile.get_dwarf_info().range_lists())
def get_cfi(path): ''' Get the CFI entries from the ELF at the provided path ''' try: with open(path, 'rb') as file_handle: elf_file = ELFFile(file_handle) if not elf_file.has_dwarf_info(): print("No DWARF") return None dw_info = elf_file.get_dwarf_info() if dw_info.has_CFI(): cfis = dw_info.CFI_entries() elif dw_info.has_EH_CFI(): cfis = dw_info.EH_CFI_entries() else: print("No CFI") return None except ELFError: print("ELF Error") return None except DWARFError: print("DWARF Error") return None except PermissionError: print("Permission Error") return None except KeyError: print("Key Error") return None return cfis
def run(self): elf = ELFFile(self.f) if not elf.has_dwarf_info(): GLib.idle_add(self.window.display_error, "This file has no DWARF info.") return di = elf.get_dwarf_info() builder = DwarfModelBuilder(di, self.verbose) total = builder.num_cus() n = 0 generator = builder.build_step() file_elem = next(generator) while not file_elem: if self.stop_requested: return GLib.idle_add(self.window.load_progress, float(n) / total) n = n + 1 file_elem = next(generator) #root_elem = builder.build() if self.stop_requested: return GLib.idle_add(self.window.done_loading, file_elem)
def translate_callstacks( callstack_to_count, executable_path, address_to_translation=None ): translated_callstack_to_count = {} with open( executable_path, "rb" ) as executable_infile: # Load in the executable elf_file = ELFFile( executable_infile ) # Get its debug info dwarf_info = elf_file.get_dwarf_info() ## Translate if address_to_translation is None: address_to_translation = {} for callstack,count in callstack_to_count.items(): translated_callstack = [] for address in callstack[:-1]: # If we haven't translated this address before, do so if address not in address_to_translation: func_name = decode_address( dwarf_info, address ) if func_name is not None: func_name = str( func_name, encoding="ascii" ) address_to_translation[ address ] = func_name # If we have, just look it up else: func_name = address_to_translation[ address ] # Append the newly translated callstack translated_callstack.append( func_name ) # Filter out any parts of the callstack that were not translated, translated_callstack = list( filter( lambda x: x is not None, translated_callstack ) ) # Tack MPI function back on translated_callstack = [ callstack[-1] ] + translated_callstack # Convert back to strings # Make tuple so key-able translated_callstack = tuple( reversed(translated_callstack) ) translated_callstack_to_count[ translated_callstack ] = count return translated_callstack_to_count, address_to_translation
def test_range_list_presence(self): with open( os.path.join('test', 'testfiles_for_unittests', 'sample_exe64.elf'), 'rb') as f: elffile = ELFFile(f) self.assertTrue(elffile.has_dwarf_info()) self.assertIsNotNone(elffile.get_dwarf_info().range_lists())
def process_file(filename): print('Processing file:', filename) with open(filename, 'rb') as f: elffile = ELFFile(f) if not elffile.has_dwarf_info(): print(' file has no DWARF info') return # get_dwarf_info returns a DWARFInfo context object, which is the # starting point for all DWARF-based processing in pyelftools. dwarfinfo = elffile.get_dwarf_info() for CU in dwarfinfo.iter_CUs(): # DWARFInfo allows to iterate over the compile units contained in # the .debug_info section. CU is a CompileUnit object, with some # computed attributes (such as its offset in the section) and # a header which conforms to the DWARF standard. The access to # header elements is, as usual, via item-lookup. print(' Found a compile unit at offset %s, length %s' % ( CU.cu_offset, CU['unit_length'])) # The first DIE in each compile unit describes it. top_DIE = CU.get_top_DIE() print(' Top DIE with tag=%s' % top_DIE.tag) # Each DIE holds an OrderedDict of attributes, mapping names to # values. Values are represented by AttributeValue objects in # elftools/dwarf/die.py # We're interested in the DW_AT_name attribute. Note that its value # is usually a string taken from the .debug_str section. This # is done transparently by the library, and such a value will be # simply given as a string. name_attr = top_DIE.attributes['DW_AT_name'] print(' name=%s' % bytes2str(name_attr.value))
def test(self, file): """Checks if file contains DWARF debugging data""" try: elf_file = ELFFile(file) return elf_file.has_dwarf_info() and elf_file.get_dwarf_info( ).has_debug_info except ELFError: return False
def _get_dwarf_info(binary_path, relocate_dwarf_sections=True): with open(binary_path, 'rb') as f: elf_file = ELFFile(f) if not elf_file.has_dwarf_info(): raise RuntimeError(f'{binary_path} has no DWARF info') dwarf_info = elf_file.get_dwarf_info( relocate_dwarf_sections=relocate_dwarf_sections) return dwarf_info
def process_file(filename): print('Processing file:', filename) with open(filename, 'rb') as f: elffile = ELFFile(f) if not elffile.has_dwarf_info(): print(' file has no DWARF info') return # get_dwarf_info returns a DWARFInfo context object, which is the # starting point for all DWARF-based processing in pyelftools. dwarfinfo = elffile.get_dwarf_info() # The location lists are extracted by DWARFInfo from the .debug_loc # section, and returned here as a LocationLists object. location_lists = dwarfinfo.location_lists() # This is required for the descriptions module to correctly decode # register names contained in DWARF expressions. set_global_machine_arch(elffile.get_machine_arch()) # Create a LocationParser object that parses the DIE attributes and # creates objects representing the actual location information. loc_parser = LocationParser(location_lists) for CU in dwarfinfo.iter_CUs(): # DWARFInfo allows to iterate over the compile units contained in # the .debug_info section. CU is a CompileUnit object, with some # computed attributes (such as its offset in the section) and # a header which conforms to the DWARF standard. The access to # header elements is, as usual, via item-lookup. print(' Found a compile unit at offset %s, length %s' % (CU.cu_offset, CU['unit_length'])) # A CU provides a simple API to iterate over all the DIEs in it. for DIE in CU.iter_DIEs(): # Go over all attributes of the DIE. Each attribute is an # AttributeValue object (from elftools.dwarf.die), which we # can examine. for attr in itervalues(DIE.attributes): # Check if this attribute contains location information # pdb.set_trace() if loc_parser.attribute_has_location(attr, CU['version']): var_name = DIE.attributes['DW_AT_name'].value print(' Varname:%s' % (var_name)) print(' DIE %s. attr %s.' % (DIE.tag, attr.name)) loc = loc_parser.parse_from_attribute( attr, CU['version']) # We either get a list (in case the attribute is a # reference to the .debug_loc section) or a LocationExpr # object (in case the attribute itself contains location # information). if isinstance(loc, LocationExpr): print(' %s' % (describe_DWARF_expr( loc.loc_expr, dwarfinfo.structs))) elif isinstance(loc, list): print(show_loclist(loc, dwarfinfo, indent=' '))
def test_empty_pubtypes(self): test_dir = os.path.join('test', 'testfiles_for_unittests') with open(os.path.join(test_dir, 'empty_pubtypes', 'main'), 'rb') as f: elf = ELFFile(f) # This test targets `ELFParseError` caused by buggy handling # of ".debug_pubtypes" section which only has zero terminator # entry. self.assertEqual(len(elf.get_dwarf_info().get_pubtypes()), 0)
def test_empty_pubtypes(self): test_dir = os.path.join('test', 'testfiles_for_unittests') with open(os.path.join(test_dir, 'empty_pubtypes', 'main'), 'rb') as f: elf = ELFFile(f) # This test targets `ELFParseError` caused by buggy handling # of ".debug_pubtypes" section which only has zero terminator # entry. self.assertEqual(len(elf.get_dwarf_info().get_pubtypes()), 0)
def __init__(self, stream: TraceStream, elf_file: io.IOBase, basedir: str=''): self.stream = stream self.basedir = basedir self.cache = {} elf = ELFFile(elf_file) if not elf.has_dwarf_info(): raise ValueError(f'ELF file {elf} has no DWARF info') self.dwarf = elf.get_dwarf_info()
def test_dwarfv5_parses(self): dwarfv5_basic = os.path.join('test', 'testfiles_for_unittests', 'dwarfv5_basic.elf') with open(dwarfv5_basic, 'rb') as f: elf = ELFFile(f) # DWARFv5 debugging information is detected. self.assertTrue(elf.has_dwarf_info()) # Fetching DWARFInfo for DWARFv5 doesn't completely explode. dwarf = elf.get_dwarf_info() self.assertIsNotNone(dwarf)
def test_die_size(self): with open( os.path.join('test', 'testfiles_for_unittests', 'trailing_null_dies.elf'), 'rb') as f: elffile = ELFFile(f) self.assertTrue(elffile.has_dwarf_info()) dwarfinfo = elffile.get_dwarf_info() for CU in dwarfinfo.iter_CUs(): for child in CU.get_top_DIE().iter_children(): self.assertEqual(child.size, 3)
def get_type_from_file(filename, name): with open(str(filename), 'rb') as f: elffile = ELFFile(f) dwarf_info = elffile.get_dwarf_info() cu = list(dwarf_info.iter_CUs())[0] die = [ die for die in (cu.iter_DIEs()) if 'DW_AT_name' in die.attributes and die.attributes['DW_AT_name'].value == name.encode('utf-8') ][0] return get_die_type(filename, cu, die)
def fetch_lineno(bin_name, func_addrs): addr_to_line = {} with open(bin_name, "rb") as f: elffile = ELFFile(f) if not elffile.has_dwarf_info(): print("No Dwarf Found in ", bin_name) else: dwarf = elffile.get_dwarf_info() addr_to_line = decode_file_line(dwarf, func_addrs) return addr_to_line
def get_producer(path): with open(path, "rb") as f: elffile = ELFFile(f) dwarfinfo = elffile.get_dwarf_info() for CU in dwarfinfo.iter_CUs(): # DWARFInfo allows to iterate over the compile units # contained in the .debug_info section. CU is a CompileUnit # object, with some computed attributes (such as its offset # in the section) and a header which conforms to the DWARF # standard. The access to header elements is, as usual, via # item-lookup. # print(' Found a compile unit at offset %s, length %s' % ( # CU.cu_offset, CU['unit_length'])) # Start with the top DIE, the root for this CU's DIE tree top_DIE = CU.get_top_DIE() try: attrs = top_DIE.attributes['DW_AT_producer'] if attrs.form == 'DW_FORM_GNU_strp_alt': # DWARF extensions elfutils recognizes/supports are # described at, # # https://fedorahosted.org/elfutils/wiki/DwarfExtensions # # You can find the alt dwz file by reading the # .gnu_debugaltlink section. Which contains a file name # followed by the build-id of the dwz file. The build-id # symlink will point at the /usr/lib/debug/.dwz/ file. # # export nm=".gnu_debugaltlink" # objdump -s -j $nm /usr/lib/debug/.build-id/XY/34...debug # print("DWZ has the string!") # # DW_FORM_GNU_ref_alt is like DW_FORM_ref, but it refers to # an offset in the .dwz file, not in the main file. # DW_FORM_GNU_strp_alt is like DW_FORM_strp, but it refers # to a string in the .dwz file, not in the main file. for section in elffile.iter_sections(): name = bytes2str(section.name) if name == ".gnu_debugaltlink": data = section.data() fdata = data[0:data.find(b"\x00")] i = fdata.find(".dwz/") rpath = os.path.join("/usr/lib/debug/", fdata[i:].decode("utf-8")) # offset in alternate (.dwz/...)'s .debug_str" return get_dwz(rpath, offset=attrs.value) elif attrs.form == 'DW_FORM_strp': # lucky ;) return attrs.value else: assert 0 except: pass
class LocateUndef(object): """ Locate symbol in the compilation unit. For each symbol (by a name string) returns an object of the pyelftools/dwarf/die type (with includes reference to the compilation unit as an attribute. Public methods: findDies -- for the list of names (of the symbols) prepares the list of references to DIE getDies -- return the list of found DIEs """ def __init__(self,fname): """ fname: file name of object file """ self.dies = {} self.fh = open( fname, 'rb' ) if self.fh: self.elffile = ELFFile(self.fh) if not self.elffile.has_dwarf_info(): return self.dwarfinfo = self.elffile.get_dwarf_info() else: raise IOError def __del__(self): if self.fh: self.fh.close() def findDies( self, namesList ): """ namesList -- list of symbol names (strings). Each symbols is expected to be once in the nameList""" for cu in self.dwarfinfo.iter_CUs(): for die in cu.iter_DIEs(): if die.is_null(): continue if 'DW_AT_name' in die.attributes: name = die.attributes['DW_AT_name'].value.decode('ascii') if name in namesList: self.dies[name] = (cu,die) namesList.remove(name) # not sure whether it works # have to check false definitions if len(namesList) < 1: return def getDies(self): """ returns list of DIEs object (its name is DIE.attributes['DW_AT_name'] """ return self.dies
def read(self, view): self.log.info('Reading file %s', self.file) with open(self.file, "rb") as f: elffile = ELFFile(f) if not elffile.has_dwarf_info(): print(' file has no DWARF info') return # get_dwarf_info returns a DWARFInfo context object, which is the # starting point for all DWARF-based processing in pyelftools. dwarfinfo = elffile.get_dwarf_info() for CU in dwarfinfo.iter_CUs(): #self.symbols.append(DwarfSymbol.new_from_die(CU.get_top_DIE())) # DWARFInfo allows to iterate over the compile units contained in # the .debug_info section. CU is a CompileUnit object, with some # computed attributes (such as its offset in the section) and # a header which conforms to the DWARF standard. The access to # header elements is, as usual, via item-lookup. print(' Found a compile unit at offset %s, length %s' % ( CU.cu_offset, CU['unit_length'])) # structs = [die for die in CU.iter_DIEs() if die.tag=='DW_TAG_structure_type'] for die in CU.iter_DIEs(): #print('DIE %s' % (self.die_repr(die))) if 'DW_TAG_structure_type' == die.tag: if 'DW_AT_name' in die.attributes: name = die.attributes['DW_AT_name'].value.decode() else: name = "{}:{}".format( die.attributes['DW_AT_decl_file'].value, die.attributes['DW_AT_decl_line'].value ) if 'DW_AT_byte_size' not in die.attributes: continue size = die.attributes['DW_AT_byte_size'].value members = [] if die.has_children: for child in die.iter_children(): if 'DW_TAG_member' == child.tag: members.append(child.attributes['DW_AT_name'].value.decode()) pass pass pass view.add(name, "Struct", size, members) pass pass pass #for s in self.symbols: # print('Sym: %s' % str(s)) # pass pass
def process_dwarf_info(in_file, out_file): ''' Main function processing the dwarf informations from debug sections ''' DEBUG('Processing file: {0}'.format(in_file)) with open(in_file, 'rb') as f: f_elf = ELFFile(f) if not f_elf.has_dwarf_info(): DEBUG("{0} has no debug informations!".format(file)) return False M = CFG_pb2.Module() M.name = "GlobalVariable".format('utf-8') set_global_machine_arch(f_elf.get_machine_arch()) dwarf_info = f_elf.get_dwarf_info() process_types(dwarf_info, TYPES_MAP) process_frames(dwarf_info, EH_FRAMES) section_offset = dwarf_info.debug_info_sec.global_offset # Iterate through all the compile units for CU in dwarf_info.iter_CUs(): DEBUG('Found a compile unit at offset {0}, length {1}'.format(CU.cu_offset, CU['unit_length'])) top_DIE = CU.get_top_DIE() c_unit = CUnit(top_DIE, CU['unit_length'], CU.cu_offset, section_offset) c_unit.decode_control_unit(M, GLOBAL_VARIABLES) for key, value in GLOBAL_VARIABLES.iteritems(): if value["size"] > 0: gvar = M.global_vars.add() gvar.name = value["name"] gvar.ea = value["addr"] gvar.size = value["size"] else: DEBUG("Look for {}".format(pprint.pformat(value))) #for func in M.funcs: # DEBUG("Function name {}".format(func.name)) # for sv in func.stackvars: # DEBUG_PUSH() # DEBUG("{} : {}, ".format(sv.name, sv.sp_offset)) # DEBUG_POP() with open(out_file, "w") as outf: outf.write(M.SerializeToString()) DEBUG("Global Vars\n") DEBUG('Number of Global Vars: {0}'.format(len(GLOBAL_VARIABLES))) DEBUG("{}".format(pprint.pformat(GLOBAL_VARIABLES))) DEBUG("End Global Vars\n")
def process_file(filename): with open(filename, 'rb') as f: elffile = ELFFile(f) # elfclass is a public attribute of ELFFile, read from its header print('%s: elfclass is %s' % (filename, elffile.elfclass)) if elffile.has_dwarf_info(): dwarfinfo = elffile.get_dwarf_info() for CU in dwarfinfo.iter_CUs(): # cu_offset is a public attribute of CU # address_size is part of the CU header print(' CU at offset 0x%x. address_size is %s' % ( CU.cu_offset, CU['address_size']))
def test_DWARF_indirect_forms(self): # This file uses a lot of DW_FORM_indirect, and is also an ARM ELF # with non-trivial DWARF info. # So this is a simple sanity check that we can successfully parse it # and extract the expected amount of CUs. with open(os.path.join('test', 'testfiles_for_unittests', 'arm_with_form_indirect.elf'), 'rb') as f: elffile = ELFFile(f) self.assertTrue(elffile.has_dwarf_info()) dwarfinfo = elffile.get_dwarf_info() all_CUs = list(dwarfinfo.iter_CUs()) self.assertEqual(len(all_CUs), 9)
def process_file(filename): print('Processing file:', filename) with open(filename, 'rb') as f: elffile = ELFFile(f) if not elffile.has_dwarf_info(): print(' file has no DWARF info') return # get_dwarf_info returns a DWARFInfo context object, which is the # starting point for all DWARF-based processing in pyelftools. dwarfinfo = elffile.get_dwarf_info() for CU in dwarfinfo.iter_CUs(): # DWARFInfo allows to iterate over the compile units contained in # the .debug_info section. CU is a CompileUnit object, with some # computed attributes (such as its offset in the section) and # a header which conforms to the DWARF standard. The access to # header elements is, as usual, via item-lookup. print(' Found a compile unit at offset %s, length %s' % ( CU.cu_offset, CU['unit_length'])) # Start with the top DIE, the root for this CU's DIE tree top_DIE = CU.get_top_DIE() print(' Top DIE with tag=%s' % top_DIE.tag) # Each DIE holds an OrderedDict of attributes, mapping names to # values. Values are represented by AttributeValue objects in # elftools/dwarf/die.py # We're interested in the filename, which is the join of # 'DW_AT_comp_dir' and 'DW_AT_name', either of which may be # missing in practice. Note that its value # is usually a string taken from the .debug_string section. This # is done transparently by the library, and such a value will be # simply given as a string. try: comp_dir_attr = top_DIE.attributes['DW_AT_comp_dir'] comp_dir = bytes2str(comp_dir_attr.value) try: name_attr = top_DIE.attributes['DW_AT_name'] name = bytes2str(name_attr.value) name = os.path.join(comp_dir, name) except KeyError as e: name = comp_dir except KeyError as e: name_attr = top_DIE.attributes['DW_AT_name'] name = bytes2str(name_attr.value) print(' name=%s' % name) # Display DIEs recursively starting with top_DIE die_info_rec(top_DIE)
def generate_docs_from_stream(filename, fd): elffile = ELFFile(fd) if not elffile.has_dwarf_info(): print(' ' + filename + ' has no DWARF info') return [] dwarfinfo = elffile.get_dwarf_info() # cu: compilation unit # DIE: debug information entry docs = [mdoc((cu, cu.get_top_DIE())) for cu in dwarfinfo.iter_CUs()] return sequence(docs)
def extract(self, binary): protos = dict() with open(binary, 'rb') as f: elf_file = ELFFile(f) if not elf_file.has_dwarf_info(): print(' File has no debug info (DWARF format expected) !') return protos dwarf_info = elf_file.get_dwarf_info() for CU in dwarf_info.iter_CUs(): for DIE in CU.iter_DIEs(): self.__extract_DIE(CU, DIE, protos) return protos
class SharedObjectInfo(): def __init__(self, path, baddr): self.path = path self._set_elf_file() self.low_addr = baddr self.high_addr = baddr + self._get_mem_size() # Check whether the ELF file is position independent code self.is_pic = self.elf_file.header['e_type'] == 'ET_DYN' # Don't set the so info's dwarf_info initially, only when # symbol lookup is first required self._dwarf_info = None @property def dwarf_info(self): if self._dwarf_info is None: self._set_dwarf_info() return self._dwarf_info def _set_elf_file(self): try: binary_file = open(self.path, 'rb') self.elf_file = ELFFile(binary_file) except IOError: print('Failed to open ' + self.path, file=sys.stderr) sys.exit(-1) def _set_dwarf_info(self): if not self.elf_file.has_dwarf_info(): print('Binary ' + self.path + ' has no DWARF info', file=sys.stderr) sys.exit(-1) self._dwarf_info = self.elf_file.get_dwarf_info() def _get_mem_size(self): mem_size = 0 for segment in self.elf_file.iter_segments(): if segment['p_type'] == 'PT_LOAD': alignment = segment['p_align'] segment_size = segment['p_memsz'] aligned_size = math.ceil(segment_size / alignment) * alignment mem_size += aligned_size return mem_size
def process_file(filename): print('Processing file:', filename) with open(filename, 'rb') as f: elffile = ELFFile(f) if not elffile.has_dwarf_info(): print(' file has no DWARF info') return # get_dwarf_info returns a DWARFInfo context object, which is the # starting point for all DWARF-based processing in pyelftools. dwarfinfo = elffile.get_dwarf_info() # The location lists are extracted by DWARFInfo from the .debug_loc # section, and returned here as a LocationLists object. location_lists = dwarfinfo.location_lists() # This is required for the descriptions module to correctly decode # register names contained in DWARF expressions. set_global_machine_arch(elffile.get_machine_arch()) for CU in dwarfinfo.iter_CUs(): # DWARFInfo allows to iterate over the compile units contained in # the .debug_info section. CU is a CompileUnit object, with some # computed attributes (such as its offset in the section) and # a header which conforms to the DWARF standard. The access to # header elements is, as usual, via item-lookup. print(' Found a compile unit at offset %s, length %s' % ( CU.cu_offset, CU['unit_length'])) # A CU provides a simple API to iterate over all the DIEs in it. for DIE in CU.iter_DIEs(): # Go over all attributes of the DIE. Each attribute is an # AttributeValue object (from elftools.dwarf.die), which we # can examine. for attr in itervalues(DIE.attributes): if attribute_has_location_list(attr): # This is a location list. Its value is an offset into # the .debug_loc section, so we can use the location # lists object to decode it. loclist = location_lists.get_location_list_at_offset( attr.value) print(' DIE %s. attr %s.\n%s' % ( DIE.tag, attr.name, show_loclist(loclist, dwarfinfo, indent=' ')))
def process_file(filename): print('Processing file:', filename) with open(filename, 'rb') as f: elffile = ELFFile(f) if not elffile.has_dwarf_info(): print(' file has no DWARF info') return # get_dwarf_info returns a DWARFInfo context object, which is the # starting point for all DWARF-based processing in pyelftools. dwarfinfo = elffile.get_dwarf_info() # The range lists are extracted by DWARFInfo from the .debug_ranges # section, and returned here as a RangeLists object. range_lists = dwarfinfo.range_lists() if range_lists is None: print(' file has no .debug_ranges section') return for CU in dwarfinfo.iter_CUs(): # DWARFInfo allows to iterate over the compile units contained in # the .debug_info section. CU is a CompileUnit object, with some # computed attributes (such as its offset in the section) and # a header which conforms to the DWARF standard. The access to # header elements is, as usual, via item-lookup. print(' Found a compile unit at offset {0!s}, length {1!s}'.format( CU.cu_offset, CU['unit_length'])) # A CU provides a simple API to iterate over all the DIEs in it. for DIE in CU.iter_DIEs(): # Go over all attributes of the DIE. Each attribute is an # AttributeValue object (from elftools.dwarf.die), which we # can examine. for attr in itervalues(DIE.attributes): if attribute_has_range_list(attr): # This is a range list. Its value is an offset into # the .debug_ranges section, so we can use the range # lists object to decode it. rangelist = range_lists.get_range_list_at_offset( attr.value) print(' DIE {0!s}. attr {1!s}.\n{2!s}'.format( DIE.tag, attr.name, rangelist))
def process_file(filename, outfile): with open(filename, 'rb') as f: elffile = ELFFile(f) if not elffile.has_dwarf_info(): raise IOError("ERROR: {} has no DWARF info".format(filename)) # get_dwarf_info returns a DWARFInfo context object, which is the # starting point for all DWARF-based processing in pyelftools. dwarfinfo = elffile.get_dwarf_info() with open(outfile, 'w') as outFp: for CU in dwarfinfo.iter_CUs(): dies = get_dies_by_offset(CU) types = get_serialization_types(["object_t"], CU) emit.emit_serializers(outFp, types)
def main(): parser = argparse.ArgumentParser() parser.add_argument('filename', nargs='?', default='a.out') parser.add_argument('--ignore', action='append', default=[]) parser.add_argument('--demangle', action='store_true') parser.add_argument('--declaration', action='store_true') args = parser.parse_args() filename = args.filename ignore = tuple(args.ignore) with open(filename, 'rb') as input_file: elf = ELFFile(input_file) dwarf = elf.get_dwarf_info() def iter_inlined(): for cu in dwarf.iter_CUs(): # iterate over all functions functions = iter_functions(cu) # only include inlined functions functions = (func for func in functions if func.is_inlined) # skip functions with no filename assigned functions = (func for func in functions if func.filename) # skip ignored functions from ignored files if ignore: functions = (func for func in functions if not func.filename.startswith(ignore)) # skip unnamed functions functions = (func for func in functions if func.linkage_name) for func in functions: yield func functions = sorted(set(iter_inlined())) for func in functions: if args.declaration: print '%s:%i' % (func.filename, func.line), if args.demangle: print func.linkage_name_unmangled else: print func.linkage_name
def process_file(filename, address): print('Processing file:', filename) with open(filename, 'rb') as f: elffile = ELFFile(f) if not elffile.has_dwarf_info(): print(' file has no DWARF info') return # get_dwarf_info returns a DWARFInfo context object, which is the # starting point for all DWARF-based processing in pyelftools. dwarfinfo = elffile.get_dwarf_info() funcname = decode_funcname(dwarfinfo, address) file, line = decode_file_line(dwarfinfo, address) print('Function:', bytes2str(funcname)) print('File:', bytes2str(file)) print('Line:', line)
def test_files(fns,quiet=False,profile=False): for fn in fns: try: elf = ELFFile(open(fn)) except ELFError: if not quiet: print "Skipping non-ELF file:",fn continue if not elf.has_dwarf_info(): if not quiet: print "No dwarf info for {}.".format(fn) continue dwarfinfo = elf.get_dwarf_info() dwarf_functions = get_functions(dwarfinfo) engine_functions = {} for engine in ENGINES: this_engine = Static(fn, debug=0, static_engine=engine) #no debug output if args.profile: #needs pycallgraph from pycallgraph import PyCallGraph from pycallgraph.output import GraphvizOutput graphviz = GraphvizOutput() graphviz.output_file = 'prof.png' with PyCallGraph(output=graphviz): this_engine.process() else: this_engine.process() engine_functions[engine] = {x.start for x in this_engine['functions']} for engine,functions in engine_functions.iteritems(): missed = dwarf_functions - functions total_fxns = len(dwarf_functions) short_fn = fn.split("/")[-1] if "/" in fn else fn if len(missed) == 0: print "{} {}: {} found all {} function(s).".format(ok_green, short_fn, engine, total_fxns) else: fmt = "{} {}: {} missed {}/{} functions: {}." print fmt.format(warn, short_fn, engine, len(missed), total_fxns, ", ".join(hex(fxn) for fxn in missed))
def get_files_from_executable(filename): with open(filename, 'rb') as f: # ELFFile looks for magic number, if there's none, ELFError is raised try: elffile = ELFFile(f) except ELFError: logging.info("%s is invalid elf file" % filename) return [] if not elffile.has_dwarf_info(): logging.info("File does not have dwarf info, no sources in the project file") return dwarfinfo = elffile.get_dwarf_info() files = [] # Go over all the line programs in the DWARF information and get source files paths for CU in dwarfinfo.iter_CUs(): top_DIE = CU.get_top_DIE() files.append(top_DIE.get_full_path()) return files
def get_executable_src_files(exec_path): assert (os.path.isabs(exec_path)) exec_src_paths = [] with open(exec_path, 'rb') as elf_file_handle: try: elf_file = ELFFile(elf_file_handle) except: print ('-- Executable \'' + exec_path + '\' is not an ELF file') return [] if not elf_file.has_dwarf_info(): print ('-- Executable \'' + exec_path + '\' has no DWARF information') return [] dwarf_info = elf_file.get_dwarf_info() for CU in dwarf_info.iter_CUs(): DIE = CU.get_top_DIE() name = '' comp_dir = '' for attr in itervalues(DIE.attributes): if attr.name == 'DW_AT_name': name = attr.value if attr.name == 'DW_AT_comp_dir': comp_dir = attr.value # If the source path in the executable is not an absolute # path then use the DW_AT_comp_dir attribute to get the # build directory to make it absolute # # Once we have an absolute path, use realpath to resolve any # symbolic links src_path = name if not os.path.isabs(name): assert (comp_dir != '') src_path = os.path.join(comp_dir, name) assert(os.path.isabs(src_path)) src_path = os.path.realpath(src_path) exec_src_paths.append(src_path.decode()) return exec_src_paths
def get_producer(debugfile, dwzfile, fast): elffile = ELFFile(debugfile) dwarfinfo = elffile.get_dwarf_info() producers = set() for CU in dwarfinfo.iter_CUs(): # Start with the top DIE, the root for this CU's DIE tree top_DIE = CU.get_top_DIE() try: attrs = top_DIE.attributes['DW_AT_producer'] if attrs.form == 'DW_FORM_GNU_strp_alt': producers.add(get_dwz(dwzfile, offset=attrs.value)) elif attrs.form == 'DW_FORM_strp': # lucky ;) producers.add(attrs.value) else: assert 0 if fast: # one producer is enough ;( break except: pass return producers
def process_file(filename): f = open(filename, 'r+b') elffile = ELFFile(f) symtab, ftable_addr = get_symtab(elffile) if symtab is None: print "Cannot find symbol table. Compiled without debug symbols?" sys.exit(1) if ftable_addr is None: print "The provided file does not contain symbol `%s'" % FTABLE print "Please ensure there is a reference to `%s' in traceback.c" % FTABLE sys.exit(1) rodata_addr, rodata_off = find_rodata(elffile) # get_dwarf_info returns a DWARFInfo context object, which is the # starting point for all DWARF-based processing in pyelftools. dwarfinfo = elffile.get_dwarf_info() typemap = dict() process_types(dwarfinfo, typemap) process_funcs(dwarfinfo, symtab, typemap) i = 0 f.seek(ftable_addr - rodata_addr + rodata_off) for func in sorted(symtab, key=lambda x : symtab[x].offset): if len(func) == 0: continue if i > FUNCTS_MAX_NUM: break write_func(f, func, symtab[func]) i += 1 f.close()
class Image(object): def __init__(self, fname): if platform.system() == "Windows": elf_data = open(fname, "r") else: with open(fname, "r") as f: elf_data = StringIO(f.read()) self.elf = ELFFile(elf_data) if self.elf.has_dwarf_info(): self.dwarf = self.elf.get_dwarf_info() set_global_machine_arch(self.elf.get_machine_arch()) self.__tame_dwarf() self.get_expr_evaluator = lambda: ExprLiveEval(self) @property def executable(self): try: return self._exe except: self._exe = self._build_executable() return self._exe def _build_executable(self): s = self.elf.get_section(1) assert s.header["sh_flags"] & 2 and s.header["sh_type"] == "SHT_PROGBITS" base_addr = s.header["sh_addr"] img = s.data() s = self.elf.get_section(2) if s.header["sh_flags"] & 2 and s.header["sh_type"] == "SHT_PROGBITS": if s.header["sh_addr"] != base_addr + len(img): raise Exception("bad section vaddr - #2 should follow #1") img += s.data() s = self.elf.get_section(3) print "%s" % str(s.header) if s.header["sh_flags"] & 2 and s.header["sh_type"] == "SHT_PROGBITS": if s.header["sh_addr"] != base_addr + len(img): raise Exception("bad section vaddr - #3 should follow #2") img += s.data() return (base_addr, img) def __tame_dwarf(self): dw = self.dwarf self._compile_units = {} self._addresses = {} self._lowest_known_address = None location_lists = dw.location_lists() cfi = None if dw.has_EH_CFI(): cfi = dw.EH_CFI_entries() print "we have EH CFI entries" elif dw.has_CFI(): cfi = dw.CFI_entries() print "we have CFI entries" else: print "no (EH) CFI" if None is not cfi: self._cfa_rule = {} for c in cfi: try: decoded = c.get_decoded() except: print "CFI decoding exception" break for entry in decoded.table: if entry["pc"] in self._cfa_rule: print "duplicate cfa rule found at pc %x" % entry["pc"] print "\t%s" % str(self._cfa_rule[entry["pc"]]) print "\t%s" % str(entry) print #assert (not entry["pc"] in self._cfa_rule) or (self._cfa_rule[entry["pc"]] == entry) self._cfa_rule[entry["pc"]] = entry for c in dw.iter_CUs(): functions = {} variables = {} td = c.get_top_DIE() for d in td.iter_children(): if d.tag == 'DW_TAG_subprogram': if 'DW_AT_declaration' in d.attributes: continue lpc = d.attributes['DW_AT_low_pc'].value hpc = d.attributes['DW_AT_high_pc'].value if hpc < lpc: hpc += lpc function_name = d.attributes['DW_AT_name'].value f = {} f["lpc"] = lpc f["hpc"] = hpc f["args"] = {} f["vars"] = {} if 'DW_AT_frame_base' in d.attributes: a = d.attributes['DW_AT_frame_base'] if a.form == 'DW_FORM_data4' or a.form == 'DW_FORM_sec_offset': f["fb"] = location_lists.get_location_list_at_offset(a.value) else: f["fb"] = a.value for child in d.iter_children(): if child.tag == "DW_TAG_formal_parameter": name = child.attributes['DW_AT_name'].value v = {} try: if child.attributes['DW_AT_location'].form in ['DW_FORM_sec_offset', 'DW_FORM_data4']: v["location"] = location_lists.get_location_list_at_offset(child.attributes['DW_AT_location'].value) else: v["location"] = child.attributes['DW_AT_location'].value except: v["location"] = [] f["args"][name] = v if child.tag == "DW_TAG_variable": name = child.attributes['DW_AT_name'].value v = {} try: if child.attributes['DW_AT_location'].form in ['DW_FORM_sec_offset', 'DW_FORM_data4']: v["location"] = location_lists.get_location_list_at_offset(child.attributes['DW_AT_location'].value) else: v["location"] = child.attributes['DW_AT_location'].value except: v["location"] = [] f["vars"][name] = v functions[function_name] = f elif d.tag == 'DW_TAG_variable': if d.attributes['DW_AT_decl_file'].value == 1: try: name = d.attributes['DW_AT_name'].value except: name = '(%s)' % str(d.attributes['DW_AT_name']) v = {} try: v["location"] = d.attributes['DW_AT_location'].value except: v["location"] = [] variables[name] = v x = {} fname = td.attributes['DW_AT_name'].value x["line_program"] = dw.line_program_for_CU(c).get_entries() x["lpc"] = td.attributes['DW_AT_low_pc'].value x["hpc"] = td.attributes['DW_AT_high_pc'].value x["comp_dir"] = td.attributes['DW_AT_comp_dir'].value x["functions"] = functions x["variables"] = variables self._compile_units[fname] = x if ((self._lowest_known_address is None) or (self._lowest_known_address > x["lpc"])): self._lowest_known_address = x["lpc"] for c in self._compile_units: self._compile_units[c]["lines"] = {} for line in self._compile_units[c]["line_program"]: state = line.state if state is not None and not (state.end_sequence or state.basic_block or state.epilogue_begin or state.prologue_end): cl = "%s+%d" % (c, state.line) if state.address in self._addresses and self._addresses[state.address] != cl: raise Exception("addr %x is both \"%s\" and \"%s+%d\"" % (state.address, self._addresses[state.address], c, state.line)) self._addresses[state.address] = cl try: self._compile_units[c]["lines"][state.line] += [state.address] except: self._compile_units[c]["lines"][state.line] = [state.address] if not cfi is None: print "CFA table:" for pc in sorted(self._cfa_rule.keys()): print "%x: %s\t\t(%s)" % (pc, str(self._cfa_rule[pc]), self.addr2line(pc)) def addr2line(self, addr): try: return self._addresses[addr] except: return '' def loc_at(self, addr): line = self.addr2line(addr) while '' == line and addr >= self._lowest_known_address: addr -= 4 line = self.addr2line(addr) if '' == line: return ("unknown", "", 0, "") cuname, culine = line.split("+") fname = "" c = self._compile_units[cuname] for f in c["functions"]: if ((c["functions"][f]["lpc"] <= addr) and (c["functions"][f]["hpc"] >= addr)): fname = f break return (fname, cuname, culine, c["comp_dir"]) def line2addr(self, fname, line): return self._compile_units[fname]["lines"][line]
# please ignore it! # from __future__ import print_function import sys, pprint from elftools.elf.structs import ELFStructs from elftools.elf.elffile import ELFFile from elftools.elf.sections import * from elftools.elf.relocation import * stream = open('test/testfiles/exe_simple64.elf', 'rb') efile = ELFFile(stream) print('elfclass', efile.elfclass) print('===> %s sections!' % efile.num_sections()) print(efile.header) dinfo = efile.get_dwarf_info() from elftools.dwarf.locationlists import LocationLists from elftools.dwarf.descriptions import describe_DWARF_expr llists = LocationLists(dinfo.debug_loc_sec.stream, dinfo.structs) for loclist in llists.iter_location_lists(): print('----> loclist!') for li in loclist: print(li) print(describe_DWARF_expr(li.loc_expr, dinfo.structs))