def process_file(filename): print('Processing file:', filename) with open(filename, 'rb') as f: elffile = ELFFile(f) if not elffile.has_dwarf_info(): print(' file has no DWARF info') return # get_dwarf_info returns a DWARFInfo context object, which is the # starting point for all DWARF-based processing in pyelftools. dwarfinfo = elffile.get_dwarf_info() for CU in dwarfinfo.iter_CUs(): # DWARFInfo allows to iterate over the compile units contained in # the .debug_info section. CU is a CompileUnit object, with some # computed attributes (such as its offset in the section) and # a header which conforms to the DWARF standard. The access to # header elements is, as usual, via item-lookup. print(' Found a compile unit at offset %s, length %s' % ( CU.cu_offset, CU['unit_length'])) # The first DIE in each compile unit describes it. top_DIE = CU.get_top_DIE() print(' Top DIE with tag=%s' % top_DIE.tag) # Each DIE holds an OrderedDict of attributes, mapping names to # values. Values are represented by AttributeValue objects in # elftools/dwarf/die.py # We're interested in the DW_AT_name attribute. Note that its value # is usually a string taken from the .debug_str section. This # is done transparently by the library, and such a value will be # simply given as a string. name_attr = top_DIE.attributes['DW_AT_name'] print(' name=%s' % bytes2str(name_attr.value))
def process_file(filename): with open(filename, 'rb') as f: elffile = ELFFile(f) if not elffile.has_dwarf_info(): print(' file has no DWARF info') return {}, {} # get_dwarf_info returns a DWARFInfo context object, which is the # starting point for all DWARF-based processing in pyelftools. dwarfinfo = elffile.get_dwarf_info() func_map, global_map, type_map, struct_map, global_access_map = {}, {}, {}, {}, {} for CU in dwarfinfo.iter_CUs(): # DWARFInfo allows to iterate over the compile units contained in # the .debug_info section. CU is a CompileUnit object, with some # computed attributes (such as its offset in the section) and # a header which conforms to the DWARF standard. The access to top_DIE = CU.get_top_DIE() variables = {} members = {} die_info_rec_struct(top_DIE, struct_map, members, global_access_map) # Display DIEs recursively starting with top_DIE die_info_rec(top_DIE, func_map, global_map, type_map, struct_map, variables, global_access_map) return func_map, global_map, type_map, global_access_map
def __init__(self, filename): self.filename = filename with open(filename, 'rb') as fp: elf = ELFFile(fp) assert elf.has_dwarf_info(), \ "No DWARF information for '{}'".format(filename) self.parseAddressRanges(elf.get_dwarf_info())
def load_dwarf_info(mmap): """ Load or reload all dwarf info from mmap. """ for filename in mmap: if filename.startswith("["): continue elffile = ELFFile(open(filename, "rb")) if not elffile.has_dwarf_info(): continue dwarfinfo = elffile.get_dwarf_info() # Information from Compilation Units (CUs) cus = [] for cu in dwarfinfo.iter_CUs(): lineprog = dwarfinfo.line_program_for_CU(cu) states = [ entry.state for entry in lineprog.get_entries() if entry.state and not entry.state.end_sequence ] addresses = [state.address for state in states] dies = [{ "entry": die, "bounds": die_bounds(die), "name": die.attributes['DW_AT_name'].value } for die in cu.iter_DIEs() if die.tag == 'DW_TAG_subprogram'] cus.append({ "lineprog": lineprog, "states": states, "addresses": addresses, "entries": dies }) all_dwarf_info[filename] = {"dwarfinfo": dwarfinfo, "units": cus}
def test_range_list_absence(self): with open( os.path.join('test', 'testfiles_for_unittests', 'arm_with_form_indirect.elf'), 'rb') as f: elffile = ELFFile(f) self.assertTrue(elffile.has_dwarf_info()) self.assertIsNone(elffile.get_dwarf_info().range_lists())
def _get_impalad_dwarf_info(self): """ Read the impalad_path ELF binary, which is supposed to contain DWARF, and read the DWARF to understand the compiler options. Return a 2-tuple of the two useful DIE attributes of the first compile unit: the DW_AT_name and DW_AT_producer. If something goes wrong doing this, log a warning and return nothing. """ # Some useful references: # - be/CMakeLists.txt # - gcc(1), especially -grecord-gcc-switches, -g, -ggdb, -gdwarf-2 # - readelf(1) # - general reading about DWARF # A useful command for exploration without having to wade through many bytes is: # readelf --debug-dump=info --dwarf-depth=1 impalad # The DWARF lines are long, raw, and nasty; I'm hesitant to paste them here, so # curious readers are highly encouraged to try the above, or read IMPALA-3501. die_name = None die_producer = None try: with open(self.impalad_path, 'rb') as fh: impalad_elf = ELFFile(fh) if impalad_elf.has_dwarf_info(): dwarf_info = impalad_elf.get_dwarf_info() # We only need the first CU, hence the unconventional use of the iterator # protocol. cu_iterator = dwarf_info.iter_CUs() first_cu = next(cu_iterator) top_die = first_cu.get_top_DIE() die_name = top_die.attributes['DW_AT_name'].value die_producer = top_die.attributes['DW_AT_producer'].value except Exception as e: LOG.warn('Failure to read DWARF info from {0}: {1}'.format(self.impalad_path, str(e))) return die_name, die_producer
def generate_header(self, data_out_filename, glob_data_out, namesp_out): """Find all top level (global) variables in the ELF file and generate a header. """ glob_data_out.write("/* generated by userspace-header-gen.py */\n") glob_data_out.write("#include <rtems/linkersets.h>\n") namesp_out.write("/* generated by userspace-header-gen.py */\n") for objfile in self._objfiles: elffile = ELFFile(objfile) if not elffile.has_dwarf_info(): raise NoDwarfInfoError() # Don't relocate DWARF sections. This is not necessary for us but # makes problems on ARM with current pyelftools (version 0.24) dwarfinfo = elffile.get_dwarf_info(relocate_dwarf_sections=False) for cu in dwarfinfo.iter_CUs(): if self._verbose >= VERBOSE_SOME: self._err.write('Found a CU at offset %s, length %s\n' % \ (cu.cu_offset, cu['unit_length'])) lineprog = dwarfinfo.line_program_for_CU(cu) headergen = HeaderGenCU(cu, self._progname, lineprog, self._err, self._verbose, self._filterre) headergen.generate_header(data_out_filename, glob_data_out, namesp_out)
def resolve_addr(fn, ip): if fn in open_files: elffile = open_files[fn] else: f = open(fn, 'rb') elffile = ELFFile(f) open_files[fn] = elffile if fn not in lines and elffile.has_dwarf_info(): lines[fn] = build_line_table(elffile.get_dwarf_info()) if fn not in symtables: symtables[fn] = build_symtab(elffile) loc = None offset = None if fn in symtables: sym = find_le(symtables[fn], ip) if sym: loc, offset = sym[2], ip - sym[0] src = None if fn in lines: pos = find_le(lines[fn], ip) if pos: src = "%s:%d" % (pos[2], pos[3]) return loc, offset, src
def process_file(filename, root, pkgroot): res = set() if os.path.isdir(filename) or not os.access(filename, os.R_OK): return res try: with open(filename, 'rb') as f: try: elffile = ELFFile(f) if not elffile.has_dwarf_info(): return res dwarfinfo = elffile.get_dwarf_info() for CU in dwarfinfo.iter_CUs(): # Every compilation unit in the DWARF information may or may not # have a corresponding line program in .debug_line. line_program = dwarfinfo.line_program_for_CU(CU) if line_program is None: continue # Print a reverse mapping of filename -> #entries res.update(line_entry_mapping(line_program)) except ELFError: return find_in_source_root(filename, root, pkgroot) except OSError: pass return res
def get_functions(lib_path): functions = {} data = None offset = None try: from elftools.elf.elffile import ELFFile from elftools.common.exceptions import ELFError with open(lib_path, 'rb') as stream: elffile = ELFFile(stream) if not elffile.has_dwarf_info(): print('file has no DWARF info') scan_section(functions, elffile, lib_path, elffile.get_section_by_name('.symtab')) scan_section(functions, elffile, lib_path, elffile.get_section_by_name('.dynsym')) # .text section hex dump section = elffile.get_section_by_name('.text') if section: data = section.data() offset = section['sh_offset'] return functions, offset, data except Exception as e: exc_type, exc_obj, exc_tb = sys.exc_info() fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1] print "[%s, %s, %s] Error extracting functions: %s" % ( exc_type, fname, exc_tb.tb_lineno, str(e))
def process_file(filename): print('Processing file:', filename) with open(filename, 'rb') as f: elffile = ELFFile(f) if not elffile.has_dwarf_info(): print(' file has no DWARF info') return # get_dwarf_info returns a DWARFInfo context object, which is the # starting point for all DWARF-based processing in pyelftools. dwarfinfo = elffile.get_dwarf_info() for CU in dwarfinfo.iter_CUs(): # DWARFInfo allows to iterate over the compile units contained in # the .debug_info section. CU is a CompileUnit object, with some # computed attributes (such as its offset in the section) and # a header which conforms to the DWARF standard. The access to # header elements is, as usual, via item-lookup. print(' Found a compile unit at offset {0!s}, length {1!s}'.format( CU.cu_offset, CU['unit_length'])) # The first DIE in each compile unit describes it. top_DIE = CU.get_top_DIE() print(' Top DIE with tag={0!s}'.format(top_DIE.tag)) # We're interested in the filename... print(' name={0!s}'.format(top_DIE.get_full_path()))
class DwarfInfo: def __init__(self, exe): self.binary = exe self.fd = open(exe, 'rb') self.elf_file = ELFFile(self.fd) if not self.elf_file.has_dwarf_info(): raise ('Binary contains no dwarf info section.') self.dwarf_info = self.elf_file.get_dwarf_info() def __del__(self): self.fd.close() def lookup(self, address): # iterate over the compile units(CUs) for CU in self.dwarf_info.iter_CUs(): line_progs = self.dwarf_info.line_program_for_CU(CU) prev_state = None # debug-line parse the table like `dwarfdump -debug-line ./main` for entry in line_progs.get_entries(): if entry.state is None: continue if entry.state.end_sequence: prev_state = None continue if prev_state and prev_state.address <= address < entry.state.address: file_name = line_progs['file_entry'][prev_state.file - 1].name line = prev_state.line return file_name, line prev_state = entry.state raise ('Could not find address')
def process_file(filename): print('Processing file:', filename) with open(filename, 'rb') as f: elffile = ELFFile(f) if not elffile.has_dwarf_info(): print(' file has no DWARF info') return # get_dwarf_info returns a DWARFInfo context object, which is the # starting point for all DWARF-based processing in pyelftools. dwarfinfo = elffile.get_dwarf_info() for CU in dwarfinfo.iter_CUs(): # DWARFInfo allows to iterate over the compile units contained in # the .debug_info section. CU is a CompileUnit object, with some # computed attributes (such as its offset in the section) and # a header which conforms to the DWARF standard. The access to # header elements is, as usual, via item-lookup. print(' Found a compile unit at offset %s, length %s' % ( CU.cu_offset, CU['unit_length'])) # Start with the top DIE, the root for this CU's DIE tree top_DIE = CU.get_top_DIE() print(' Top DIE with tag=%s' % top_DIE.tag) # We're interested in the filename... print(' name=%s' % top_DIE.get_full_path()) # Display DIEs recursively starting with top_DIE die_info_rec(top_DIE)
def retrieve_pub_functions(binary): symbols = [] with open(binary, 'rb') as f: elffile = ELFFile(f) if not elffile.has_dwarf_info(): print(' file has no DWARF info') return -1 # get_dwarf_info returns a DWARFInfo context object, which is the # starting point for all DWARF-based processing in pyelftools. dwarfinfo = elffile.get_dwarf_info() # get .debug_pubtypes section. pubnames = dwarfinfo.get_pubnames() if pubnames is None: print('ERROR: No .debug_pubnames section found in ELF.') return -1 else: print('%d entries found in .debug_pubnames' % len(pubnames)) # dump all entries in .debug_pubnames section. print('Dumping .debug_pubnames table ...') print('-' * 66) print('%50s%8s%8s' % ('Symbol', 'CU_OFS', 'DIE_OFS')) print('-' * 66) for (name, entry) in pubnames.items(): symbols.append(name) print('%50s%8d%8d' % (name, entry.cu_ofs, entry.die_ofs)) #print(entry) print('-' * 66) print(symbols) return symbols
def __init__(self, binary: str): with open(binary, "rb") as b: elffile = ELFFile(b) #Symbol table for section in elffile.iter_sections(): if isinstance(section, SymbolTableSection): self.symbols = [Symbol(sym.name, sym['st_value'], sym['st_info']['type']) for sym in section.iter_symbols() if len(sym.name) > 0] continue if not elffile.has_dwarf_info(): raise Exception("This tool needs gdb info.") dbg = elffile.get_dwarf_info() def file_entry_to_abs(file_entry, linep: LineProgram) -> str: di = file_entry.dir_index if di > 0: return path.join(linep['include_directory'][di-1].decode(), file_entry.name.decode()) else: return path.join('.', file_entry.name.decode()) cu_helper = [(cu, dbg.line_program_for_CU(cu)) for cu in dbg.iter_CUs()] self.compile_units = [ CompileUnitInput(die.attributes['DW_AT_name'].value.decode(), die.attributes['DW_AT_comp_dir'].value.decode(), [file_entry_to_abs(fe, linep) for fe in linep['file_entry']]) for cu, linep in cu_helper for die in cu.iter_DIEs() if die.tag == 'DW_TAG_compile_unit' ] # find compile units self.markers = [] for msym in (sym for sym in self.symbols if sym.name.startswith('__metal_serial_')): try: nx : Tuple[LineProgramEntry, LineProgram] = next((entry, linep) for (cu, linep) in cu_helper for entry in linep.get_entries() if entry.state is not None and entry.state.address == msym.address) (loc, linep) = nx abs_file_entry = file_entry_to_abs(linep['file_entry'][loc.state.file - 1], linep) # check if marker already exists - #for existing_marker in self.markers: # if loc.state.line == existing_marker.line and loc.state.column == existing_marker.column and existing_marker.file == abs_file_entry: # print(msym.name, existing_marker.name) # raise Exception("Duplicate code markers found at {}({})".format(existing_marker.file, existing_marker.line)) self.markers.append(Marker( msym.name, msym.address, msym.symbol_type, abs_file_entry, loc.state.line, loc.state.column )) except StopIteration: raise Exception('Could not find code location for {} at 0x{:x} - this is most likely due to missing gdb symbols.'.format(msym.name, msym.address))
def process_file(filename): print('Processing file:', filename) with open(filename, 'rb') as f: elffile = ELFFile(f) if not elffile.has_dwarf_info(): print(' file has no DWARF info') return # get_dwarf_info returns a DWARFInfo context object, which is the # starting point for all DWARF-based processing in pyelftools. dwarfinfo = elffile.get_dwarf_info() for CU in dwarfinfo.iter_CUs(): # DWARFInfo allows to iterate over the compile units contained in # the .debug_info section. CU is a CompileUnit object, with some # computed attributes (such as its offset in the section) and # a header which conforms to the DWARF standard. The access to # header elements is, as usual, via item-lookup. print(' Found a compile unit at offset %s, length %s' % (CU.cu_offset, CU['unit_length'])) # The first DIE in each compile unit describes it. top_DIE = CU.get_top_DIE() print(' Top DIE with tag=%s' % top_DIE.tag) # We're interested in the filename... print(' name=%s' % top_DIE.get_full_path())
def run(self): elf = ELFFile(self.f) if not elf.has_dwarf_info(): GLib.idle_add(self.window.display_error, "This file has no DWARF info.") return di = elf.get_dwarf_info() builder = DwarfModelBuilder(di, self.verbose) total = builder.num_cus() n = 0 generator = builder.build_step() file_elem = next(generator) while not file_elem: if self.stop_requested: return GLib.idle_add(self.window.load_progress, float(n) / total) n = n + 1 file_elem = next(generator) #root_elem = builder.build() if self.stop_requested: return GLib.idle_add(self.window.done_loading, file_elem)
def test_range_list_presence(self): with open( os.path.join('test', 'testfiles_for_unittests', 'sample_exe64.elf'), 'rb') as f: elffile = ELFFile(f) self.assertTrue(elffile.has_dwarf_info()) self.assertIsNotNone(elffile.get_dwarf_info().range_lists())
def run(self): elf = ELFFile(self.f) if not elf.has_dwarf_info(): GLib.idle_add(self.window.display_error, "This file has no DWARF info.") return di = elf.get_dwarf_info() builder = DwarfModelBuilder(di, self.verbose) total = builder.num_cus() n = 0 generator = builder.build_step() file_elem = next(generator) while not file_elem: if self.stop_requested: return GLib.idle_add(self.window.load_progress, float(n) / total) n = n + 1 file_elem = next(generator) #root_elem = builder.build() if self.stop_requested: return GLib.idle_add(self.window.done_loading, file_elem)
def get_frame_base(filename, pc, rebased_addr): """ Call to get frame base :param filename: name of the executable file :param pc: The address of the beginning of the function :param rebased_addr: Should be project.loader.memory.min_addr :return: the frame base for the function """ target_loc = pc - rebased_addr with open(filename, 'rb') as f: elffile = ELFFile(f) if not elffile.has_dwarf_info(): print(' file has no DWARF info') return # get_dwarf_info returns a DWARFInfo context object, which is the # starting point for all DWARF-based processing in pyelftools. dwarfinfo = elffile.get_dwarf_info() # This is required for the descriptions module to correctly decode # register names contained in DWARF expressions. set_global_machine_arch(elffile.get_machine_arch()) min_greater = 1000000000000000000000 offset = 0 for CFI in dwarfinfo.EH_CFI_entries(): if isinstance(CFI, FDE): decoded = CFI.get_decoded() for entry in decoded.table: if entry['pc'] >= target_loc and entry['pc'] < min_greater: offset = entry['cfa'].offset min_greater = entry['pc'] return offset
def get_cfi(path): ''' Get the CFI entries from the ELF at the provided path ''' try: with open(path, 'rb') as file_handle: elf_file = ELFFile(file_handle) if not elf_file.has_dwarf_info(): print("No DWARF") return None dw_info = elf_file.get_dwarf_info() if dw_info.has_CFI(): cfis = dw_info.CFI_entries() elif dw_info.has_EH_CFI(): cfis = dw_info.EH_CFI_entries() else: print("No CFI") return None except ELFError: print("ELF Error") return None except DWARFError: print("DWARF Error") return None except PermissionError: print("Permission Error") return None except KeyError: print("Key Error") return None return cfis
def main(): with open(sys.argv[1], 'rb') as f: elf = ELFFile(f) if not elf.has_dwarf_info(): print("Object file has no dwarf info!") sys.exit(1) types = {} global_offset = elf.get_dwarf_info().debug_info_sec.global_offset for cu in elf.get_dwarf_info().iter_CUs(): cu_name = cu.get_top_DIE().attributes['DW_AT_name'].value.decode( 'utf-8') print('\x1b[32m\x1b[1mProcessing %s\x1b[0m' % cu_name) # First, map top level types dies = list(cu.iter_DIEs()) i = 0 while i < len(dies): offset = dies[i].offset current = dies[i] i += 1 common_types = { 'DW_TAG_structure_type': Struct, 'DW_TAG_class_type': Struct, 'DW_TAG_base_type': Primitive, 'DW_TAG_typedef': Typedef, 'DW_TAG_array_type': Array, } if current.tag in common_types: assert offset not in types types[offset] = common_types[current.tag](current) else: pass # print("Skipping processing of '%s'" % current.tag) for t in types.values(): t.finalize(types) header = '%-4s |\t%-100s |\t%s' % ('#', 'type', 'size') print(header) print('-' * len(header.expandtabs())) for o, t in types.items(): color = '\x1b[31m\x1b[31m' if t.has_padding() else '' print('{:04x} |\t{color}{:100}\x1b[0m |\t{}'.format( o, repr(t), t.byte_size, color=color)) print('-' * len(header.expandtabs())) print() for o, t in types.items(): if t.has_padding(): print( "Found padded type '%s' at %s:%u" % (t, cu_name, t.source_object.attributes['DW_AT_decl_line'].value)) for p in t.get_padding_list(): print('\t%s' % p)
def process_file(debugfile, dwzfile, fast=False): elffile = ELFFile(debugfile) if not elffile.has_dwarf_info(): assert 0 else: # this file itself has the DWARF information, must be my lucky day! return get_producer(debugfile, dwzfile, fast)
def test(self, file): """Checks if file contains DWARF debugging data""" try: elf_file = ELFFile(file) return elf_file.has_dwarf_info() and elf_file.get_dwarf_info( ).has_debug_info except ELFError: return False
def create_module_from_ELF_DWARF_file(file: Union[str, bytes, int], query = dict()) -> Optional[Module]: elf_file = ELFFile(open(file, 'rb')) if elf_file.has_dwarf_info(): module = Module() import_ELF_DWARF_into_module(elf_file, module) return module else: return None
def _get_dwarf_info(binary_path, relocate_dwarf_sections=True): with open(binary_path, 'rb') as f: elf_file = ELFFile(f) if not elf_file.has_dwarf_info(): raise RuntimeError(f'{binary_path} has no DWARF info') dwarf_info = elf_file.get_dwarf_info( relocate_dwarf_sections=relocate_dwarf_sections) return dwarf_info
def process_file(filename): print('Processing file:', filename) with open(filename, 'rb') as f: elffile = ELFFile(f) if not elffile.has_dwarf_info(): print(' file has no DWARF info') return # get_dwarf_info returns a DWARFInfo context object, which is the # starting point for all DWARF-based processing in pyelftools. dwarfinfo = elffile.get_dwarf_info() # The location lists are extracted by DWARFInfo from the .debug_loc # section, and returned here as a LocationLists object. location_lists = dwarfinfo.location_lists() # This is required for the descriptions module to correctly decode # register names contained in DWARF expressions. set_global_machine_arch(elffile.get_machine_arch()) # Create a LocationParser object that parses the DIE attributes and # creates objects representing the actual location information. loc_parser = LocationParser(location_lists) for CU in dwarfinfo.iter_CUs(): # DWARFInfo allows to iterate over the compile units contained in # the .debug_info section. CU is a CompileUnit object, with some # computed attributes (such as its offset in the section) and # a header which conforms to the DWARF standard. The access to # header elements is, as usual, via item-lookup. print(' Found a compile unit at offset %s, length %s' % (CU.cu_offset, CU['unit_length'])) # A CU provides a simple API to iterate over all the DIEs in it. for DIE in CU.iter_DIEs(): # Go over all attributes of the DIE. Each attribute is an # AttributeValue object (from elftools.dwarf.die), which we # can examine. for attr in itervalues(DIE.attributes): # Check if this attribute contains location information # pdb.set_trace() if loc_parser.attribute_has_location(attr, CU['version']): var_name = DIE.attributes['DW_AT_name'].value print(' Varname:%s' % (var_name)) print(' DIE %s. attr %s.' % (DIE.tag, attr.name)) loc = loc_parser.parse_from_attribute( attr, CU['version']) # We either get a list (in case the attribute is a # reference to the .debug_loc section) or a LocationExpr # object (in case the attribute itself contains location # information). if isinstance(loc, LocationExpr): print(' %s' % (describe_DWARF_expr( loc.loc_expr, dwarfinfo.structs))) elif isinstance(loc, list): print(show_loclist(loc, dwarfinfo, indent=' '))
def print_basic_info(filename: str) -> None: with open(filename, "rb") as f: elffile = ELFFile(f) # ELF object # variables sections = "" debug = RED + "No" + RESET fileMD5 = file_MD5sum(filename) filesha1 = file_sha1sum(filename) filesha256 = file_sha256sum(filename) fileSSDEEP = file_ssdeepsum(filename) vtlink = tinyurl("https://www.virustotal.com/gui/file/" + filesha256) # logic if not vtlink: vtlink = "https://www.virustotal.com/gui/file/" + filesha256 for x in range(elffile.num_sections()): if len(elffile.get_section(x).name) > 0: sections += "{}{} {}({}) ".format( GREEN, elffile.get_section(x).name, RESET, hex(elffile.get_section(x).data_size)) if x % 4 == 0 and x > 0: sections += "\n" if not sections: sections = RED + "No sections found" + RESET # has debug info? if elffile.has_dwarf_info(): debug = GREEN + "Yes" + RESET info_table = [ ["Filename:", filename], ["Filesize:", file_size(filename)], [ "Filetype:", GREEN + "ELF " + str(elffile.get_machine_arch()) + RESET ], [ "Subsystem:", GREEN + describe_e_type(elffile.header['e_type']) + RESET ], ["MD5: ", fileMD5], ["SHA1: ", filesha1], ["SHA256: ", filesha256], ["SSDEEP:", fileSSDEEP], ["VT link:", vtlink], ["Symbols:", debug], ["Entropy:", str(file_entropy(filename))], ["Sections:\n(with size)", sections], ["Entrypoint:", "{}".format(hex(elffile.header["e_entry"]))] ] print("") print( AsciiTable( title="Basic Information", table_data=info_table, ).table) print("")
def __init__(self, stream: TraceStream, elf_file: io.IOBase, basedir: str=''): self.stream = stream self.basedir = basedir self.cache = {} elf = ELFFile(elf_file) if not elf.has_dwarf_info(): raise ValueError(f'ELF file {elf} has no DWARF info') self.dwarf = elf.get_dwarf_info()
def test_die_size(self): with open( os.path.join('test', 'testfiles_for_unittests', 'trailing_null_dies.elf'), 'rb') as f: elffile = ELFFile(f) self.assertTrue(elffile.has_dwarf_info()) dwarfinfo = elffile.get_dwarf_info() for CU in dwarfinfo.iter_CUs(): for child in CU.get_top_DIE().iter_children(): self.assertEqual(child.size, 3)
def test_dwarfv5_parses(self): dwarfv5_basic = os.path.join('test', 'testfiles_for_unittests', 'dwarfv5_basic.elf') with open(dwarfv5_basic, 'rb') as f: elf = ELFFile(f) # DWARFv5 debugging information is detected. self.assertTrue(elf.has_dwarf_info()) # Fetching DWARFInfo for DWARFv5 doesn't completely explode. dwarf = elf.get_dwarf_info() self.assertIsNotNone(dwarf)
def get_file_line_coverage(target_path, addr_counts): with open(target_path, 'r') as f: elf = ELFFile(f) if not elf.has_dwarf_info(): raise CommandError( '%s has no DWARF info. Please recompile with ``-g``' % target_path) dwarf_info = elf.get_dwarf_info() return _parse_info(dwarf_info, addr_counts)
def fetch_lineno(bin_name, func_addrs): addr_to_line = {} with open(bin_name, "rb") as f: elffile = ELFFile(f) if not elffile.has_dwarf_info(): print("No Dwarf Found in ", bin_name) else: dwarf = elffile.get_dwarf_info() addr_to_line = decode_file_line(dwarf, func_addrs) return addr_to_line
class LocateUndef(object): """ Locate symbol in the compilation unit. For each symbol (by a name string) returns an object of the pyelftools/dwarf/die type (with includes reference to the compilation unit as an attribute. Public methods: findDies -- for the list of names (of the symbols) prepares the list of references to DIE getDies -- return the list of found DIEs """ def __init__(self,fname): """ fname: file name of object file """ self.dies = {} self.fh = open( fname, 'rb' ) if self.fh: self.elffile = ELFFile(self.fh) if not self.elffile.has_dwarf_info(): return self.dwarfinfo = self.elffile.get_dwarf_info() else: raise IOError def __del__(self): if self.fh: self.fh.close() def findDies( self, namesList ): """ namesList -- list of symbol names (strings). Each symbols is expected to be once in the nameList""" for cu in self.dwarfinfo.iter_CUs(): for die in cu.iter_DIEs(): if die.is_null(): continue if 'DW_AT_name' in die.attributes: name = die.attributes['DW_AT_name'].value.decode('ascii') if name in namesList: self.dies[name] = (cu,die) namesList.remove(name) # not sure whether it works # have to check false definitions if len(namesList) < 1: return def getDies(self): """ returns list of DIEs object (its name is DIE.attributes['DW_AT_name'] """ return self.dies
def read(self, view): self.log.info('Reading file %s', self.file) with open(self.file, "rb") as f: elffile = ELFFile(f) if not elffile.has_dwarf_info(): print(' file has no DWARF info') return # get_dwarf_info returns a DWARFInfo context object, which is the # starting point for all DWARF-based processing in pyelftools. dwarfinfo = elffile.get_dwarf_info() for CU in dwarfinfo.iter_CUs(): #self.symbols.append(DwarfSymbol.new_from_die(CU.get_top_DIE())) # DWARFInfo allows to iterate over the compile units contained in # the .debug_info section. CU is a CompileUnit object, with some # computed attributes (such as its offset in the section) and # a header which conforms to the DWARF standard. The access to # header elements is, as usual, via item-lookup. print(' Found a compile unit at offset %s, length %s' % ( CU.cu_offset, CU['unit_length'])) # structs = [die for die in CU.iter_DIEs() if die.tag=='DW_TAG_structure_type'] for die in CU.iter_DIEs(): #print('DIE %s' % (self.die_repr(die))) if 'DW_TAG_structure_type' == die.tag: if 'DW_AT_name' in die.attributes: name = die.attributes['DW_AT_name'].value.decode() else: name = "{}:{}".format( die.attributes['DW_AT_decl_file'].value, die.attributes['DW_AT_decl_line'].value ) if 'DW_AT_byte_size' not in die.attributes: continue size = die.attributes['DW_AT_byte_size'].value members = [] if die.has_children: for child in die.iter_children(): if 'DW_TAG_member' == child.tag: members.append(child.attributes['DW_AT_name'].value.decode()) pass pass pass view.add(name, "Struct", size, members) pass pass pass #for s in self.symbols: # print('Sym: %s' % str(s)) # pass pass
def load_dwarfinfo(filename): elffile = ELFFile(filename) if not elffile.has_dwarf_info(): print(filename.name + ': ELF file has no DWARF info!') sys.exit(1) # get_dwarf_info returns a DWARFInfo context object, which is the # starting point for all DWARF-based processing in pyelftools. dwarfinfo = elffile.get_dwarf_info() return dwarfinfo
def load_binary(self, file): file = os.path.abspath(file) assert os.path.isfile(file) with open(file, "r") as binary_file: elffile = ELFFile(binary_file) assert elffile.has_dwarf_info() self.loaded_file = file self.program_info = ProgramInfo(elffile) self.runner = ProgramRunner(self, file)
def process_dwarf_info(in_file, out_file): ''' Main function processing the dwarf informations from debug sections ''' DEBUG('Processing file: {0}'.format(in_file)) with open(in_file, 'rb') as f: f_elf = ELFFile(f) if not f_elf.has_dwarf_info(): DEBUG("{0} has no debug informations!".format(file)) return False M = CFG_pb2.Module() M.name = "GlobalVariable".format('utf-8') set_global_machine_arch(f_elf.get_machine_arch()) dwarf_info = f_elf.get_dwarf_info() process_types(dwarf_info, TYPES_MAP) process_frames(dwarf_info, EH_FRAMES) section_offset = dwarf_info.debug_info_sec.global_offset # Iterate through all the compile units for CU in dwarf_info.iter_CUs(): DEBUG('Found a compile unit at offset {0}, length {1}'.format( CU.cu_offset, CU['unit_length'])) top_DIE = CU.get_top_DIE() c_unit = CUnit(top_DIE, CU['unit_length'], CU.cu_offset, section_offset) c_unit.decode_control_unit(M, GLOBAL_VARIABLES) for key, value in GLOBAL_VARIABLES.iteritems(): if value["size"] > 0: gvar = M.global_vars.add() gvar.name = value["name"] gvar.ea = value["addr"] gvar.size = value["size"] else: DEBUG("Look for {}".format(pprint.pformat(value))) #for func in M.funcs: # DEBUG("Function name {}".format(func.name)) # for sv in func.stackvars: # DEBUG_PUSH() # DEBUG("{} : {}, ".format(sv.name, sv.sp_offset)) # DEBUG_POP() with open(out_file, "w") as outf: outf.write(M.SerializeToString()) DEBUG("Global Vars\n") DEBUG('Number of Global Vars: {0}'.format(len(GLOBAL_VARIABLES))) DEBUG("{}".format(pprint.pformat(GLOBAL_VARIABLES))) DEBUG("End Global Vars\n")
def process_dwarf_info(in_file, out_file): ''' Main function processing the dwarf informations from debug sections ''' DEBUG('Processing file: {0}'.format(in_file)) with open(in_file, 'rb') as f: f_elf = ELFFile(f) if not f_elf.has_dwarf_info(): DEBUG("{0} has no debug informations!".format(file)) return False M = CFG_pb2.Module() M.name = "GlobalVariable".format('utf-8') set_global_machine_arch(f_elf.get_machine_arch()) dwarf_info = f_elf.get_dwarf_info() process_types(dwarf_info, TYPES_MAP) process_frames(dwarf_info, EH_FRAMES) section_offset = dwarf_info.debug_info_sec.global_offset # Iterate through all the compile units for CU in dwarf_info.iter_CUs(): DEBUG('Found a compile unit at offset {0}, length {1}'.format(CU.cu_offset, CU['unit_length'])) top_DIE = CU.get_top_DIE() c_unit = CUnit(top_DIE, CU['unit_length'], CU.cu_offset, section_offset) c_unit.decode_control_unit(M, GLOBAL_VARIABLES) for key, value in GLOBAL_VARIABLES.iteritems(): if value["size"] > 0: gvar = M.global_vars.add() gvar.name = value["name"] gvar.ea = value["addr"] gvar.size = value["size"] else: DEBUG("Look for {}".format(pprint.pformat(value))) #for func in M.funcs: # DEBUG("Function name {}".format(func.name)) # for sv in func.stackvars: # DEBUG_PUSH() # DEBUG("{} : {}, ".format(sv.name, sv.sp_offset)) # DEBUG_POP() with open(out_file, "w") as outf: outf.write(M.SerializeToString()) DEBUG("Global Vars\n") DEBUG('Number of Global Vars: {0}'.format(len(GLOBAL_VARIABLES))) DEBUG("{}".format(pprint.pformat(GLOBAL_VARIABLES))) DEBUG("End Global Vars\n")
def test_DWARF_indirect_forms(self): # This file uses a lot of DW_FORM_indirect, and is also an ARM ELF # with non-trivial DWARF info. # So this is a simple sanity check that we can successfully parse it # and extract the expected amount of CUs. with open(os.path.join('test', 'testfiles_for_unittests', 'arm_with_form_indirect.elf'), 'rb') as f: elffile = ELFFile(f) self.assertTrue(elffile.has_dwarf_info()) dwarfinfo = elffile.get_dwarf_info() all_CUs = list(dwarfinfo.iter_CUs()) self.assertEqual(len(all_CUs), 9)
def process_file(filename): with open(filename, 'rb') as f: elffile = ELFFile(f) # elfclass is a public attribute of ELFFile, read from its header print('%s: elfclass is %s' % (filename, elffile.elfclass)) if elffile.has_dwarf_info(): dwarfinfo = elffile.get_dwarf_info() for CU in dwarfinfo.iter_CUs(): # cu_offset is a public attribute of CU # address_size is part of the CU header print(' CU at offset 0x%x. address_size is %s' % ( CU.cu_offset, CU['address_size']))
def process_file(filename): print('Processing file:', filename) with open(filename, 'rb') as f: elffile = ELFFile(f) if not elffile.has_dwarf_info(): print(' file has no DWARF info') return # get_dwarf_info returns a DWARFInfo context object, which is the # starting point for all DWARF-based processing in pyelftools. dwarfinfo = elffile.get_dwarf_info() for CU in dwarfinfo.iter_CUs(): # DWARFInfo allows to iterate over the compile units contained in # the .debug_info section. CU is a CompileUnit object, with some # computed attributes (such as its offset in the section) and # a header which conforms to the DWARF standard. The access to # header elements is, as usual, via item-lookup. print(' Found a compile unit at offset %s, length %s' % ( CU.cu_offset, CU['unit_length'])) # Start with the top DIE, the root for this CU's DIE tree top_DIE = CU.get_top_DIE() print(' Top DIE with tag=%s' % top_DIE.tag) # Each DIE holds an OrderedDict of attributes, mapping names to # values. Values are represented by AttributeValue objects in # elftools/dwarf/die.py # We're interested in the filename, which is the join of # 'DW_AT_comp_dir' and 'DW_AT_name', either of which may be # missing in practice. Note that its value # is usually a string taken from the .debug_string section. This # is done transparently by the library, and such a value will be # simply given as a string. try: comp_dir_attr = top_DIE.attributes['DW_AT_comp_dir'] comp_dir = bytes2str(comp_dir_attr.value) try: name_attr = top_DIE.attributes['DW_AT_name'] name = bytes2str(name_attr.value) name = os.path.join(comp_dir, name) except KeyError as e: name = comp_dir except KeyError as e: name_attr = top_DIE.attributes['DW_AT_name'] name = bytes2str(name_attr.value) print(' name=%s' % name) # Display DIEs recursively starting with top_DIE die_info_rec(top_DIE)
def generate_docs_from_stream(filename, fd): elffile = ELFFile(fd) if not elffile.has_dwarf_info(): print(' ' + filename + ' has no DWARF info') return [] dwarfinfo = elffile.get_dwarf_info() # cu: compilation unit # DIE: debug information entry docs = [mdoc((cu, cu.get_top_DIE())) for cu in dwarfinfo.iter_CUs()] return sequence(docs)
def extract(self, binary): protos = dict() with open(binary, 'rb') as f: elf_file = ELFFile(f) if not elf_file.has_dwarf_info(): print(' File has no debug info (DWARF format expected) !') return protos dwarf_info = elf_file.get_dwarf_info() for CU in dwarf_info.iter_CUs(): for DIE in CU.iter_DIEs(): self.__extract_DIE(CU, DIE, protos) return protos
class SharedObjectInfo(): def __init__(self, path, baddr): self.path = path self._set_elf_file() self.low_addr = baddr self.high_addr = baddr + self._get_mem_size() # Check whether the ELF file is position independent code self.is_pic = self.elf_file.header['e_type'] == 'ET_DYN' # Don't set the so info's dwarf_info initially, only when # symbol lookup is first required self._dwarf_info = None @property def dwarf_info(self): if self._dwarf_info is None: self._set_dwarf_info() return self._dwarf_info def _set_elf_file(self): try: binary_file = open(self.path, 'rb') self.elf_file = ELFFile(binary_file) except IOError: print('Failed to open ' + self.path, file=sys.stderr) sys.exit(-1) def _set_dwarf_info(self): if not self.elf_file.has_dwarf_info(): print('Binary ' + self.path + ' has no DWARF info', file=sys.stderr) sys.exit(-1) self._dwarf_info = self.elf_file.get_dwarf_info() def _get_mem_size(self): mem_size = 0 for segment in self.elf_file.iter_segments(): if segment['p_type'] == 'PT_LOAD': alignment = segment['p_align'] segment_size = segment['p_memsz'] aligned_size = math.ceil(segment_size / alignment) * alignment mem_size += aligned_size return mem_size
def process_file(filename): print('Processing file:', filename) with open(filename, 'rb') as f: elffile = ELFFile(f) if not elffile.has_dwarf_info(): print(' file has no DWARF info') return # get_dwarf_info returns a DWARFInfo context object, which is the # starting point for all DWARF-based processing in pyelftools. dwarfinfo = elffile.get_dwarf_info() # The location lists are extracted by DWARFInfo from the .debug_loc # section, and returned here as a LocationLists object. location_lists = dwarfinfo.location_lists() # This is required for the descriptions module to correctly decode # register names contained in DWARF expressions. set_global_machine_arch(elffile.get_machine_arch()) for CU in dwarfinfo.iter_CUs(): # DWARFInfo allows to iterate over the compile units contained in # the .debug_info section. CU is a CompileUnit object, with some # computed attributes (such as its offset in the section) and # a header which conforms to the DWARF standard. The access to # header elements is, as usual, via item-lookup. print(' Found a compile unit at offset %s, length %s' % ( CU.cu_offset, CU['unit_length'])) # A CU provides a simple API to iterate over all the DIEs in it. for DIE in CU.iter_DIEs(): # Go over all attributes of the DIE. Each attribute is an # AttributeValue object (from elftools.dwarf.die), which we # can examine. for attr in itervalues(DIE.attributes): if attribute_has_location_list(attr): # This is a location list. Its value is an offset into # the .debug_loc section, so we can use the location # lists object to decode it. loclist = location_lists.get_location_list_at_offset( attr.value) print(' DIE %s. attr %s.\n%s' % ( DIE.tag, attr.name, show_loclist(loclist, dwarfinfo, indent=' ')))
def process_file(filename, outfile): with open(filename, 'rb') as f: elffile = ELFFile(f) if not elffile.has_dwarf_info(): raise IOError("ERROR: {} has no DWARF info".format(filename)) # get_dwarf_info returns a DWARFInfo context object, which is the # starting point for all DWARF-based processing in pyelftools. dwarfinfo = elffile.get_dwarf_info() with open(outfile, 'w') as outFp: for CU in dwarfinfo.iter_CUs(): dies = get_dies_by_offset(CU) types = get_serialization_types(["object_t"], CU) emit.emit_serializers(outFp, types)
def process_file(filename): print('Processing file:', filename) with open(filename, 'rb') as f: elffile = ELFFile(f) if not elffile.has_dwarf_info(): print(' file has no DWARF info') return # get_dwarf_info returns a DWARFInfo context object, which is the # starting point for all DWARF-based processing in pyelftools. dwarfinfo = elffile.get_dwarf_info() # The range lists are extracted by DWARFInfo from the .debug_ranges # section, and returned here as a RangeLists object. range_lists = dwarfinfo.range_lists() if range_lists is None: print(' file has no .debug_ranges section') return for CU in dwarfinfo.iter_CUs(): # DWARFInfo allows to iterate over the compile units contained in # the .debug_info section. CU is a CompileUnit object, with some # computed attributes (such as its offset in the section) and # a header which conforms to the DWARF standard. The access to # header elements is, as usual, via item-lookup. print(' Found a compile unit at offset {0!s}, length {1!s}'.format( CU.cu_offset, CU['unit_length'])) # A CU provides a simple API to iterate over all the DIEs in it. for DIE in CU.iter_DIEs(): # Go over all attributes of the DIE. Each attribute is an # AttributeValue object (from elftools.dwarf.die), which we # can examine. for attr in itervalues(DIE.attributes): if attribute_has_range_list(attr): # This is a range list. Its value is an offset into # the .debug_ranges section, so we can use the range # lists object to decode it. rangelist = range_lists.get_range_list_at_offset( attr.value) print(' DIE {0!s}. attr {1!s}.\n{2!s}'.format( DIE.tag, attr.name, rangelist))
def process_file(filename, address): print('Processing file:', filename) with open(filename, 'rb') as f: elffile = ELFFile(f) if not elffile.has_dwarf_info(): print(' file has no DWARF info') return # get_dwarf_info returns a DWARFInfo context object, which is the # starting point for all DWARF-based processing in pyelftools. dwarfinfo = elffile.get_dwarf_info() funcname = decode_funcname(dwarfinfo, address) file, line = decode_file_line(dwarfinfo, address) print('Function:', bytes2str(funcname)) print('File:', bytes2str(file)) print('Line:', line)
def test_files(fns,quiet=False,profile=False): for fn in fns: try: elf = ELFFile(open(fn)) except ELFError: if not quiet: print "Skipping non-ELF file:",fn continue if not elf.has_dwarf_info(): if not quiet: print "No dwarf info for {}.".format(fn) continue dwarfinfo = elf.get_dwarf_info() dwarf_functions = get_functions(dwarfinfo) engine_functions = {} for engine in ENGINES: this_engine = Static(fn, debug=0, static_engine=engine) #no debug output if args.profile: #needs pycallgraph from pycallgraph import PyCallGraph from pycallgraph.output import GraphvizOutput graphviz = GraphvizOutput() graphviz.output_file = 'prof.png' with PyCallGraph(output=graphviz): this_engine.process() else: this_engine.process() engine_functions[engine] = {x.start for x in this_engine['functions']} for engine,functions in engine_functions.iteritems(): missed = dwarf_functions - functions total_fxns = len(dwarf_functions) short_fn = fn.split("/")[-1] if "/" in fn else fn if len(missed) == 0: print "{} {}: {} found all {} function(s).".format(ok_green, short_fn, engine, total_fxns) else: fmt = "{} {}: {} missed {}/{} functions: {}." print fmt.format(warn, short_fn, engine, len(missed), total_fxns, ", ".join(hex(fxn) for fxn in missed))
def get_files_from_executable(filename): with open(filename, 'rb') as f: # ELFFile looks for magic number, if there's none, ELFError is raised try: elffile = ELFFile(f) except ELFError: logging.info("%s is invalid elf file" % filename) return [] if not elffile.has_dwarf_info(): logging.info("File does not have dwarf info, no sources in the project file") return dwarfinfo = elffile.get_dwarf_info() files = [] # Go over all the line programs in the DWARF information and get source files paths for CU in dwarfinfo.iter_CUs(): top_DIE = CU.get_top_DIE() files.append(top_DIE.get_full_path()) return files
def get_executable_src_files(exec_path): assert (os.path.isabs(exec_path)) exec_src_paths = [] with open(exec_path, 'rb') as elf_file_handle: try: elf_file = ELFFile(elf_file_handle) except: print ('-- Executable \'' + exec_path + '\' is not an ELF file') return [] if not elf_file.has_dwarf_info(): print ('-- Executable \'' + exec_path + '\' has no DWARF information') return [] dwarf_info = elf_file.get_dwarf_info() for CU in dwarf_info.iter_CUs(): DIE = CU.get_top_DIE() name = '' comp_dir = '' for attr in itervalues(DIE.attributes): if attr.name == 'DW_AT_name': name = attr.value if attr.name == 'DW_AT_comp_dir': comp_dir = attr.value # If the source path in the executable is not an absolute # path then use the DW_AT_comp_dir attribute to get the # build directory to make it absolute # # Once we have an absolute path, use realpath to resolve any # symbolic links src_path = name if not os.path.isabs(name): assert (comp_dir != '') src_path = os.path.join(comp_dir, name) assert(os.path.isabs(src_path)) src_path = os.path.realpath(src_path) exec_src_paths.append(src_path.decode()) return exec_src_paths
class ReadElf(object): """ display_* methods are used to emit output into the output stream """ def __init__(self, file, output): """ file: stream object with the ELF file to read output: output stream to write to """ self.elffile = ELFFile(file) self.output = output # Lazily initialized if a debug dump is requested self._dwarfinfo = None self._versioninfo = None def display_file_header(self): """ Display the ELF file header """ self._emitline('ELF Header:') self._emit(' Magic: ') self._emitline(' '.join('%2.2x' % byte2int(b) for b in self.elffile.e_ident_raw)) header = self.elffile.header e_ident = header['e_ident'] self._emitline(' Class: %s' % describe_ei_class(e_ident['EI_CLASS'])) self._emitline(' Data: %s' % describe_ei_data(e_ident['EI_DATA'])) self._emitline(' Version: %s' % describe_ei_version(e_ident['EI_VERSION'])) self._emitline(' OS/ABI: %s' % describe_ei_osabi(e_ident['EI_OSABI'])) self._emitline(' ABI Version: %d' % e_ident['EI_ABIVERSION']) self._emitline(' Type: %s' % describe_e_type(header['e_type'])) self._emitline(' Machine: %s' % describe_e_machine(header['e_machine'])) self._emitline(' Version: %s' % describe_e_version_numeric(header['e_version'])) self._emitline(' Entry point address: %s' % self._format_hex(header['e_entry'])) self._emit(' Start of program headers: %s' % header['e_phoff']) self._emitline(' (bytes into file)') self._emit(' Start of section headers: %s' % header['e_shoff']) self._emitline(' (bytes into file)') self._emitline(' Flags: %s%s' % (self._format_hex(header['e_flags']), self.decode_flags(header['e_flags']))) self._emitline(' Size of this header: %s (bytes)' % header['e_ehsize']) self._emitline(' Size of program headers: %s (bytes)' % header['e_phentsize']) self._emitline(' Number of program headers: %s' % header['e_phnum']) self._emitline(' Size of section headers: %s (bytes)' % header['e_shentsize']) self._emitline(' Number of section headers: %s' % header['e_shnum']) self._emitline(' Section header string table index: %s' % header['e_shstrndx']) def decode_flags(self, flags): description = "" if self.elffile['e_machine'] == "EM_ARM": if flags & E_FLAGS.EF_ARM_HASENTRY: description += ", has entry point" version = flags & E_FLAGS.EF_ARM_EABIMASK if version == E_FLAGS.EF_ARM_EABI_VER5: description += ", Version5 EABI" return description def display_program_headers(self, show_heading=True): """ Display the ELF program headers. If show_heading is True, displays the heading for this information (Elf file type is...) """ self._emitline() if self.elffile.num_segments() == 0: self._emitline('There are no program headers in this file.') return elfheader = self.elffile.header if show_heading: self._emitline('Elf file type is %s' % describe_e_type(elfheader['e_type'])) self._emitline('Entry point is %s' % self._format_hex(elfheader['e_entry'])) # readelf weirness - why isn't e_phoff printed as hex? (for section # headers, it is...) self._emitline('There are %s program headers, starting at offset %s' % ( elfheader['e_phnum'], elfheader['e_phoff'])) self._emitline() self._emitline('Program Headers:') # Now comes the table of program headers with their attributes. Note # that due to different formatting constraints of 32-bit and 64-bit # addresses, there are some conditions on elfclass here. # # First comes the table heading # if self.elffile.elfclass == 32: self._emitline(' Type Offset VirtAddr PhysAddr FileSiz MemSiz Flg Align') else: self._emitline(' Type Offset VirtAddr PhysAddr') self._emitline(' FileSiz MemSiz Flags Align') # Now the entries # for segment in self.elffile.iter_segments(): self._emit(' %-14s ' % describe_p_type(segment['p_type'])) if self.elffile.elfclass == 32: self._emitline('%s %s %s %s %s %-3s %s' % ( self._format_hex(segment['p_offset'], fieldsize=6), self._format_hex(segment['p_vaddr'], fullhex=True), self._format_hex(segment['p_paddr'], fullhex=True), self._format_hex(segment['p_filesz'], fieldsize=5), self._format_hex(segment['p_memsz'], fieldsize=5), describe_p_flags(segment['p_flags']), self._format_hex(segment['p_align']))) else: # 64 self._emitline('%s %s %s' % ( self._format_hex(segment['p_offset'], fullhex=True), self._format_hex(segment['p_vaddr'], fullhex=True), self._format_hex(segment['p_paddr'], fullhex=True))) self._emitline(' %s %s %-3s %s' % ( self._format_hex(segment['p_filesz'], fullhex=True), self._format_hex(segment['p_memsz'], fullhex=True), describe_p_flags(segment['p_flags']), # lead0x set to False for p_align, to mimic readelf. # No idea why the difference from 32-bit mode :-| self._format_hex(segment['p_align'], lead0x=False))) if isinstance(segment, InterpSegment): self._emitline(' [Requesting program interpreter: %s]' % bytes2str(segment.get_interp_name())) # Sections to segments mapping # if self.elffile.num_sections() == 0: # No sections? We're done return self._emitline('\n Section to Segment mapping:') self._emitline(' Segment Sections...') for nseg, segment in enumerate(self.elffile.iter_segments()): self._emit(' %2.2d ' % nseg) for section in self.elffile.iter_sections(): if ( not section.is_null() and segment.section_in_segment(section)): self._emit('%s ' % bytes2str(section.name)) self._emitline('') def display_section_headers(self, show_heading=True): """ Display the ELF section headers """ elfheader = self.elffile.header if show_heading: self._emitline('There are %s section headers, starting at offset %s' % ( elfheader['e_shnum'], self._format_hex(elfheader['e_shoff']))) self._emitline('\nSection Header%s:' % ( 's' if elfheader['e_shnum'] > 1 else '')) # Different formatting constraints of 32-bit and 64-bit addresses # if self.elffile.elfclass == 32: self._emitline(' [Nr] Name Type Addr Off Size ES Flg Lk Inf Al') else: self._emitline(' [Nr] Name Type Address Offset') self._emitline(' Size EntSize Flags Link Info Align') # Now the entries # for nsec, section in enumerate(self.elffile.iter_sections()): self._emit(' [%2u] %-17.17s %-15.15s ' % ( nsec, bytes2str(section.name), describe_sh_type(section['sh_type']))) if self.elffile.elfclass == 32: self._emitline('%s %s %s %s %3s %2s %3s %2s' % ( self._format_hex(section['sh_addr'], fieldsize=8, lead0x=False), self._format_hex(section['sh_offset'], fieldsize=6, lead0x=False), self._format_hex(section['sh_size'], fieldsize=6, lead0x=False), self._format_hex(section['sh_entsize'], fieldsize=2, lead0x=False), describe_sh_flags(section['sh_flags']), section['sh_link'], section['sh_info'], section['sh_addralign'])) else: # 64 self._emitline(' %s %s' % ( self._format_hex(section['sh_addr'], fullhex=True, lead0x=False), self._format_hex(section['sh_offset'], fieldsize=16 if section['sh_offset'] > 0xffffffff else 8, lead0x=False))) self._emitline(' %s %s %3s %2s %3s %s' % ( self._format_hex(section['sh_size'], fullhex=True, lead0x=False), self._format_hex(section['sh_entsize'], fullhex=True, lead0x=False), describe_sh_flags(section['sh_flags']), section['sh_link'], section['sh_info'], section['sh_addralign'])) self._emitline('Key to Flags:') self._emit(' W (write), A (alloc), X (execute), M (merge), S (strings)') if self.elffile['e_machine'] in ('EM_X86_64', 'EM_L10M'): self._emitline(', l (large)') else: self._emitline() self._emitline(' I (info), L (link order), G (group), T (TLS), E (exclude), x (unknown)') self._emitline(' O (extra OS processing required) o (OS specific), p (processor specific)') def display_symbol_tables(self): """ Display the symbol tables contained in the file """ self._init_versioninfo() for section in self.elffile.iter_sections(): if not isinstance(section, SymbolTableSection): continue if section['sh_entsize'] == 0: self._emitline("\nSymbol table '%s' has a sh_entsize of zero!" % ( bytes2str(section.name))) continue self._emitline("\nSymbol table '%s' contains %s entries:" % ( bytes2str(section.name), section.num_symbols())) if self.elffile.elfclass == 32: self._emitline(' Num: Value Size Type Bind Vis Ndx Name') else: # 64 self._emitline(' Num: Value Size Type Bind Vis Ndx Name') for nsym, symbol in enumerate(section.iter_symbols()): version_info = '' # readelf doesn't display version info for Solaris versioning if (section['sh_type'] == 'SHT_DYNSYM' and self._versioninfo['type'] == 'GNU'): version = self._symbol_version(nsym) if (version['name'] != bytes2str(symbol.name) and version['index'] not in ('VER_NDX_LOCAL', 'VER_NDX_GLOBAL')): if version['filename']: # external symbol version_info = '@%(name)s (%(index)i)' % version else: # internal symbol if version['hidden']: version_info = '@%(name)s' % version else: version_info = '@@%(name)s' % version # symbol names are truncated to 25 chars, similarly to readelf self._emitline('%6d: %s %5d %-7s %-6s %-7s %4s %.25s%s' % ( nsym, self._format_hex( symbol['st_value'], fullhex=True, lead0x=False), symbol['st_size'], describe_symbol_type(symbol['st_info']['type']), describe_symbol_bind(symbol['st_info']['bind']), describe_symbol_visibility(symbol['st_other']['visibility']), describe_symbol_shndx(symbol['st_shndx']), bytes2str(symbol.name), version_info)) def display_dynamic_tags(self): """ Display the dynamic tags contained in the file """ has_dynamic_sections = False for section in self.elffile.iter_sections(): if not isinstance(section, DynamicSection): continue has_dynamic_sections = True self._emitline("\nDynamic section at offset %s contains %s entries:" % ( self._format_hex(section['sh_offset']), section.num_tags())) self._emitline(" Tag Type Name/Value") padding = 20 + (8 if self.elffile.elfclass == 32 else 0) for tag in section.iter_tags(): if tag.entry.d_tag == 'DT_NEEDED': parsed = 'Shared library: [%s]' % bytes2str(tag.needed) elif tag.entry.d_tag == 'DT_RPATH': parsed = 'Library rpath: [%s]' % bytes2str(tag.rpath) elif tag.entry.d_tag == 'DT_RUNPATH': parsed = 'Library runpath: [%s]' % bytes2str(tag.runpath) elif tag.entry.d_tag == 'DT_SONAME': parsed = 'Library soname: [%s]' % bytes2str(tag.soname) elif (tag.entry.d_tag.endswith('SZ') or tag.entry.d_tag.endswith('ENT')): parsed = '%i (bytes)' % tag['d_val'] elif (tag.entry.d_tag.endswith('NUM') or tag.entry.d_tag.endswith('COUNT')): parsed = '%i' % tag['d_val'] elif tag.entry.d_tag == 'DT_PLTREL': s = describe_dyn_tag(tag.entry.d_val) if s.startswith('DT_'): s = s[3:] parsed = '%s' % s else: parsed = '%#x' % tag['d_val'] self._emitline(" %s %-*s %s" % ( self._format_hex(ENUM_D_TAG.get(tag.entry.d_tag, tag.entry.d_tag), fullhex=True, lead0x=True), padding, '(%s)' % (tag.entry.d_tag[3:],), parsed)) if not has_dynamic_sections: # readelf only prints this if there is at least one segment if self.elffile.num_segments(): self._emitline("\nThere is no dynamic section in this file.") def display_relocations(self): """ Display the relocations contained in the file """ has_relocation_sections = False for section in self.elffile.iter_sections(): if not isinstance(section, RelocationSection): continue has_relocation_sections = True self._emitline("\nRelocation section '%s' at offset %s contains %s entries:" % ( bytes2str(section.name), self._format_hex(section['sh_offset']), section.num_relocations())) if section.is_RELA(): self._emitline(" Offset Info Type Sym. Value Sym. Name + Addend") else: self._emitline(" Offset Info Type Sym.Value Sym. Name") # The symbol table section pointed to in sh_link symtable = self.elffile.get_section(section['sh_link']) for rel in section.iter_relocations(): hexwidth = 8 if self.elffile.elfclass == 32 else 12 self._emit('%s %s %-17.17s' % ( self._format_hex(rel['r_offset'], fieldsize=hexwidth, lead0x=False), self._format_hex(rel['r_info'], fieldsize=hexwidth, lead0x=False), describe_reloc_type( rel['r_info_type'], self.elffile))) if rel['r_info_sym'] == 0: self._emitline() continue symbol = symtable.get_symbol(rel['r_info_sym']) # Some symbols have zero 'st_name', so instead what's used is # the name of the section they point at if symbol['st_name'] == 0: symsec = self.elffile.get_section(symbol['st_shndx']) symbol_name = symsec.name else: symbol_name = symbol.name self._emit(' %s %s%22.22s' % ( self._format_hex( symbol['st_value'], fullhex=True, lead0x=False), ' ' if self.elffile.elfclass == 32 else '', bytes2str(symbol_name))) if section.is_RELA(): self._emit(' %s %x' % ( '+' if rel['r_addend'] >= 0 else '-', abs(rel['r_addend']))) self._emitline() if not has_relocation_sections: self._emitline('\nThere are no relocations in this file.') def display_version_info(self): """ Display the version info contained in the file """ self._init_versioninfo() if not self._versioninfo['type']: self._emitline("\nNo version information found in this file.") return for section in self.elffile.iter_sections(): if isinstance(section, GNUVerSymSection): self._print_version_section_header( section, 'Version symbols', lead0x=False) num_symbols = section.num_symbols() # Symbol version info are printed four by four entries for idx_by_4 in range(0, num_symbols, 4): self._emit(' %03x:' % idx_by_4) for idx in range(idx_by_4, min(idx_by_4 + 4, num_symbols)): symbol_version = self._symbol_version(idx) if symbol_version['index'] == 'VER_NDX_LOCAL': version_index = 0 version_name = '(*local*)' elif symbol_version['index'] == 'VER_NDX_GLOBAL': version_index = 1 version_name = '(*global*)' else: version_index = symbol_version['index'] version_name = '(%(name)s)' % symbol_version visibility = 'h' if symbol_version['hidden'] else ' ' self._emit('%4x%s%-13s' % ( version_index, visibility, version_name)) self._emitline() elif isinstance(section, GNUVerDefSection): self._print_version_section_header( section, 'Version definition', indent=2) offset = 0 for verdef, verdaux_iter in section.iter_versions(): verdaux = next(verdaux_iter) name = verdaux.name if verdef['vd_flags']: flags = describe_ver_flags(verdef['vd_flags']) # Mimic exactly the readelf output flags += ' ' else: flags = 'none' self._emitline(' %s: Rev: %i Flags: %s Index: %i' ' Cnt: %i Name: %s' % ( self._format_hex(offset, fieldsize=6, alternate=True), verdef['vd_version'], flags, verdef['vd_ndx'], verdef['vd_cnt'], bytes2str(name))) verdaux_offset = ( offset + verdef['vd_aux'] + verdaux['vda_next']) for idx, verdaux in enumerate(verdaux_iter, start=1): self._emitline(' %s: Parent %i: %s' % (self._format_hex(verdaux_offset, fieldsize=4), idx, bytes2str(verdaux.name))) verdaux_offset += verdaux['vda_next'] offset += verdef['vd_next'] elif isinstance(section, GNUVerNeedSection): self._print_version_section_header(section, 'Version needs') offset = 0 for verneed, verneed_iter in section.iter_versions(): self._emitline(' %s: Version: %i File: %s Cnt: %i' % ( self._format_hex(offset, fieldsize=6, alternate=True), verneed['vn_version'], bytes2str(verneed.name), verneed['vn_cnt'])) vernaux_offset = offset + verneed['vn_aux'] for idx, vernaux in enumerate(verneed_iter, start=1): if vernaux['vna_flags']: flags = describe_ver_flags(vernaux['vna_flags']) # Mimic exactly the readelf output flags += ' ' else: flags = 'none' self._emitline( ' %s: Name: %s Flags: %s Version: %i' % ( self._format_hex(vernaux_offset, fieldsize=4), bytes2str(vernaux.name), flags, vernaux['vna_other'])) vernaux_offset += vernaux['vna_next'] offset += verneed['vn_next'] def display_hex_dump(self, section_spec): """ Display a hex dump of a section. section_spec is either a section number or a name. """ section = self._section_from_spec(section_spec) if section is None: self._emitline("Section '%s' does not exist in the file!" % ( section_spec)) return self._emitline("\nHex dump of section '%s':" % bytes2str(section.name)) self._note_relocs_for_section(section) addr = section['sh_addr'] data = section.data() dataptr = 0 while dataptr < len(data): bytesleft = len(data) - dataptr # chunks of 16 bytes per line linebytes = 16 if bytesleft > 16 else bytesleft self._emit(' %s ' % self._format_hex(addr, fieldsize=8)) for i in range(16): if i < linebytes: self._emit('%2.2x' % byte2int(data[dataptr + i])) else: self._emit(' ') if i % 4 == 3: self._emit(' ') for i in range(linebytes): c = data[dataptr + i : dataptr + i + 1] if byte2int(c[0]) >= 32 and byte2int(c[0]) < 0x7f: self._emit(bytes2str(c)) else: self._emit(bytes2str(b'.')) self._emitline() addr += linebytes dataptr += linebytes self._emitline() def display_string_dump(self, section_spec): """ Display a strings dump of a section. section_spec is either a section number or a name. """ section = self._section_from_spec(section_spec) if section is None: self._emitline("Section '%s' does not exist in the file!" % ( section_spec)) return self._emitline("\nString dump of section '%s':" % bytes2str(section.name)) found = False data = section.data() dataptr = 0 while dataptr < len(data): while ( dataptr < len(data) and not (32 <= byte2int(data[dataptr]) <= 127)): dataptr += 1 if dataptr >= len(data): break endptr = dataptr while endptr < len(data) and byte2int(data[endptr]) != 0: endptr += 1 found = True self._emitline(' [%6x] %s' % ( dataptr, bytes2str(data[dataptr:endptr]))) dataptr = endptr if not found: self._emitline(' No strings found in this section.') else: self._emitline() def display_debug_dump(self, dump_what): """ Dump a DWARF section """ self._init_dwarfinfo() if self._dwarfinfo is None: return set_global_machine_arch(self.elffile.get_machine_arch()) if dump_what == 'info': self._dump_debug_info() elif dump_what == 'decodedline': self._dump_debug_line_programs() elif dump_what == 'frames': self._dump_debug_frames() elif dump_what == 'frames-interp': self._dump_debug_frames_interp() else: self._emitline('debug dump not yet supported for "%s"' % dump_what) def _format_hex(self, addr, fieldsize=None, fullhex=False, lead0x=True, alternate=False): """ Format an address into a hexadecimal string. fieldsize: Size of the hexadecimal field (with leading zeros to fit the address into. For example with fieldsize=8, the format will be %08x If None, the minimal required field size will be used. fullhex: If True, override fieldsize to set it to the maximal size needed for the elfclass lead0x: If True, leading 0x is added alternate: If True, override lead0x to emulate the alternate hexadecimal form specified in format string with the # character: only non-zero values are prefixed with 0x. This form is used by readelf. """ if alternate: if addr == 0: lead0x = False else: lead0x = True fieldsize -= 2 s = '0x' if lead0x else '' if fullhex: fieldsize = 8 if self.elffile.elfclass == 32 else 16 if fieldsize is None: field = '%x' else: field = '%' + '0%sx' % fieldsize return s + field % addr def _print_version_section_header(self, version_section, name, lead0x=True, indent=1): """ Print a section header of one version related section (versym, verneed or verdef) with some options to accomodate readelf little differences between each header (e.g. indentation and 0x prefixing). """ if hasattr(version_section, 'num_versions'): num_entries = version_section.num_versions() else: num_entries = version_section.num_symbols() self._emitline("\n%s section '%s' contains %s entries:" % (name, bytes2str(version_section.name), num_entries)) self._emitline('%sAddr: %s Offset: %s Link: %i (%s)' % ( ' ' * indent, self._format_hex( version_section['sh_addr'], fieldsize=16, lead0x=lead0x), self._format_hex( version_section['sh_offset'], fieldsize=6, lead0x=True), version_section['sh_link'], bytes2str( self.elffile.get_section(version_section['sh_link']).name) ) ) def _init_versioninfo(self): """ Search and initialize informations about version related sections and the kind of versioning used (GNU or Solaris). """ if self._versioninfo is not None: return self._versioninfo = {'versym': None, 'verdef': None, 'verneed': None, 'type': None} for section in self.elffile.iter_sections(): if isinstance(section, GNUVerSymSection): self._versioninfo['versym'] = section elif isinstance(section, GNUVerDefSection): self._versioninfo['verdef'] = section elif isinstance(section, GNUVerNeedSection): self._versioninfo['verneed'] = section elif isinstance(section, DynamicSection): for tag in section.iter_tags(): if tag['d_tag'] == 'DT_VERSYM': self._versioninfo['type'] = 'GNU' break if not self._versioninfo['type'] and ( self._versioninfo['verneed'] or self._versioninfo['verdef']): self._versioninfo['type'] = 'Solaris' def _symbol_version(self, nsym): """ Return a dict containing information on the or None if no version information is available """ self._init_versioninfo() symbol_version = dict.fromkeys(('index', 'name', 'filename', 'hidden')) if (not self._versioninfo['versym'] or nsym >= self._versioninfo['versym'].num_symbols()): return None symbol = self._versioninfo['versym'].get_symbol(nsym) index = symbol.entry['ndx'] if not index in ('VER_NDX_LOCAL', 'VER_NDX_GLOBAL'): index = int(index) if self._versioninfo['type'] == 'GNU': # In GNU versioning mode, the highest bit is used to # store wether the symbol is hidden or not if index & 0x8000: index &= ~0x8000 symbol_version['hidden'] = True if (self._versioninfo['verdef'] and index <= self._versioninfo['verdef'].num_versions()): _, verdaux_iter = \ self._versioninfo['verdef'].get_version(index) symbol_version['name'] = bytes2str(next(verdaux_iter).name) else: verneed, vernaux = \ self._versioninfo['verneed'].get_version(index) symbol_version['name'] = bytes2str(vernaux.name) symbol_version['filename'] = bytes2str(verneed.name) symbol_version['index'] = index return symbol_version def _section_from_spec(self, spec): """ Retrieve a section given a "spec" (either number or name). Return None if no such section exists in the file. """ try: num = int(spec) if num < self.elffile.num_sections(): return self.elffile.get_section(num) else: return None except ValueError: # Not a number. Must be a name then return self.elffile.get_section_by_name(str2bytes(spec)) def _note_relocs_for_section(self, section): """ If there are relocation sections pointing to the givne section, emit a note about it. """ for relsec in self.elffile.iter_sections(): if isinstance(relsec, RelocationSection): info_idx = relsec['sh_info'] if self.elffile.get_section(info_idx) == section: self._emitline(' Note: This section has relocations against it, but these have NOT been applied to this dump.') return def _init_dwarfinfo(self): """ Initialize the DWARF info contained in the file and assign it to self._dwarfinfo. Leave self._dwarfinfo at None if no DWARF info was found in the file """ if self._dwarfinfo is not None: return if self.elffile.has_dwarf_info(): self._dwarfinfo = self.elffile.get_dwarf_info() else: self._dwarfinfo = None def _dump_debug_info(self): """ Dump the debugging info section. """ self._emitline('Contents of the .debug_info section:\n') # Offset of the .debug_info section in the stream section_offset = self._dwarfinfo.debug_info_sec.global_offset for cu in self._dwarfinfo.iter_CUs(): self._emitline(' Compilation Unit @ offset %s:' % self._format_hex(cu.cu_offset)) self._emitline(' Length: %s (%s)' % ( self._format_hex(cu['unit_length']), '%s-bit' % cu.dwarf_format())) self._emitline(' Version: %s' % cu['version']), self._emitline(' Abbrev Offset: %s' % ( self._format_hex(cu['debug_abbrev_offset']))), self._emitline(' Pointer Size: %s' % cu['address_size']) # The nesting depth of each DIE within the tree of DIEs must be # displayed. To implement this, a counter is incremented each time # the current DIE has children, and decremented when a null die is # encountered. Due to the way the DIE tree is serialized, this will # correctly reflect the nesting depth # die_depth = 0 for die in cu.iter_DIEs(): self._emitline(' <%s><%x>: Abbrev Number: %s%s' % ( die_depth, die.offset, die.abbrev_code, (' (%s)' % die.tag) if not die.is_null() else '')) if die.is_null(): die_depth -= 1 continue for attr in itervalues(die.attributes): name = attr.name # Unknown attribute values are passed-through as integers if isinstance(name, int): name = 'Unknown AT value: %x' % name self._emitline(' <%2x> %-18s: %s' % ( attr.offset, name, describe_attr_value( attr, die, section_offset))) if die.has_children: die_depth += 1 self._emitline() def _dump_debug_line_programs(self): """ Dump the (decoded) line programs from .debug_line The programs are dumped in the order of the CUs they belong to. """ self._emitline('Decoded dump of debug contents of section .debug_line:\n') for cu in self._dwarfinfo.iter_CUs(): lineprogram = self._dwarfinfo.line_program_for_CU(cu) cu_filename = bytes2str(lineprogram['file_entry'][0].name) if len(lineprogram['include_directory']) > 0: dir_index = lineprogram['file_entry'][0].dir_index if dir_index > 0: dir = lineprogram['include_directory'][dir_index - 1] else: dir = b'.' cu_filename = '%s/%s' % (bytes2str(dir), cu_filename) self._emitline('CU: %s:' % cu_filename) self._emitline('File name Line number Starting address') # Print each state's file, line and address information. For some # instructions other output is needed to be compatible with # readelf. for entry in lineprogram.get_entries(): state = entry.state if state is None: # Special handling for commands that don't set a new state if entry.command == DW_LNS_set_file: file_entry = lineprogram['file_entry'][entry.args[0] - 1] if file_entry.dir_index == 0: # current directory self._emitline('\n./%s:[++]' % ( bytes2str(file_entry.name))) else: self._emitline('\n%s/%s:' % ( bytes2str(lineprogram['include_directory'][file_entry.dir_index - 1]), bytes2str(file_entry.name))) elif entry.command == DW_LNE_define_file: self._emitline('%s:' % ( bytes2str(lineprogram['include_directory'][entry.args[0].dir_index]))) elif not state.end_sequence: # readelf doesn't print the state after end_sequence # instructions. I think it's a bug but to be compatible # I don't print them too. self._emitline('%-35s %11d %18s' % ( bytes2str(lineprogram['file_entry'][state.file - 1].name), state.line, '0' if state.address == 0 else self._format_hex(state.address))) if entry.command == DW_LNS_copy: # Another readelf oddity... self._emitline() def _dump_debug_frames(self): """ Dump the raw frame information from .debug_frame """ if not self._dwarfinfo.has_CFI(): return self._emitline('Contents of the .debug_frame section:') for entry in self._dwarfinfo.CFI_entries(): if isinstance(entry, CIE): self._emitline('\n%08x %08x %08x CIE' % ( entry.offset, entry['length'], entry['CIE_id'])) self._emitline(' Version: %d' % entry['version']) self._emitline(' Augmentation: "%s"' % bytes2str(entry['augmentation'])) self._emitline(' Code alignment factor: %u' % entry['code_alignment_factor']) self._emitline(' Data alignment factor: %d' % entry['data_alignment_factor']) self._emitline(' Return address column: %d' % entry['return_address_register']) self._emitline() else: # FDE self._emitline('\n%08x %08x %08x FDE cie=%08x pc=%08x..%08x' % ( entry.offset, entry['length'], entry['CIE_pointer'], entry.cie.offset, entry['initial_location'], entry['initial_location'] + entry['address_range'])) self._emit(describe_CFI_instructions(entry)) self._emitline() def _dump_debug_frames_interp(self): """ Dump the interpreted (decoded) frame information from .debug_frame """ if not self._dwarfinfo.has_CFI(): return self._emitline('Contents of the .debug_frame section:') for entry in self._dwarfinfo.CFI_entries(): if isinstance(entry, CIE): self._emitline('\n%08x %08x %08x CIE "%s" cf=%d df=%d ra=%d' % ( entry.offset, entry['length'], entry['CIE_id'], bytes2str(entry['augmentation']), entry['code_alignment_factor'], entry['data_alignment_factor'], entry['return_address_register'])) ra_regnum = entry['return_address_register'] else: # FDE self._emitline('\n%08x %08x %08x FDE cie=%08x pc=%08x..%08x' % ( entry.offset, entry['length'], entry['CIE_pointer'], entry.cie.offset, entry['initial_location'], entry['initial_location'] + entry['address_range'])) ra_regnum = entry.cie['return_address_register'] # Print the heading row for the decoded table self._emit(' LOC') self._emit(' ' if entry.structs.address_size == 4 else ' ') self._emit(' CFA ') # Decode the table nad look at the registers it describes. # We build reg_order here to match readelf's order. In particular, # registers are sorted by their number, and the register matching # ra_regnum is always listed last with a special heading. decoded_table = entry.get_decoded() reg_order = sorted(ifilter( lambda r: r != ra_regnum, decoded_table.reg_order)) # Headings for the registers for regnum in reg_order: self._emit('%-6s' % describe_reg_name(regnum)) self._emitline('ra ') # Now include ra_regnum in reg_order to print its values similarly # to the other registers. reg_order.append(ra_regnum) for line in decoded_table.table: self._emit(self._format_hex( line['pc'], fullhex=True, lead0x=False)) self._emit(' %-9s' % describe_CFI_CFA_rule(line['cfa'])) for regnum in reg_order: if regnum in line: s = describe_CFI_register_rule(line[regnum]) else: s = 'u' self._emit('%-6s' % s) self._emitline() self._emitline() def _emit(self, s=''): """ Emit an object to output """ self.output.write(str(s)) def _emitline(self, s=''): """ Emit an object to output, followed by a newline """ self.output.write(str(s) + '\n')
args = parser.parse_args() def die(fmt, *args): sys.stderr.write(('struct2java.py: ' + fmt + '\n') % args) sys.exit(1) def DIE_to_name(DIE): name_attr = DIE.attributes.get('DW_AT_name') if name_attr is not None: return name_attr.value.decode('ascii') return None with open(args.elffile, 'rb') as f: elffile = ELFFile(f) if not elffile.has_dwarf_info(): die('file does not contain debug information') dwarfinfo = elffile.get_dwarf_info() structs_by_offset = {} structs_by_name = {} typedefs_by_name = {} for CU in dwarfinfo.iter_CUs(): for DIE in CU.iter_DIEs(): if DIE.tag == 'DW_TAG_typedef': name = DIE_to_name(DIE) if name is not None: typedefs_by_name[name] = DIE
class Image(object): def __init__(self, fname): if platform.system() == "Windows": elf_data = open(fname, "r") else: with open(fname, "r") as f: elf_data = StringIO(f.read()) self.elf = ELFFile(elf_data) if self.elf.has_dwarf_info(): self.dwarf = self.elf.get_dwarf_info() set_global_machine_arch(self.elf.get_machine_arch()) self.__tame_dwarf() self.get_expr_evaluator = lambda: ExprLiveEval(self) @property def executable(self): try: return self._exe except: self._exe = self._build_executable() return self._exe def _build_executable(self): s = self.elf.get_section(1) assert s.header["sh_flags"] & 2 and s.header["sh_type"] == "SHT_PROGBITS" base_addr = s.header["sh_addr"] img = s.data() s = self.elf.get_section(2) if s.header["sh_flags"] & 2 and s.header["sh_type"] == "SHT_PROGBITS": if s.header["sh_addr"] != base_addr + len(img): raise Exception("bad section vaddr - #2 should follow #1") img += s.data() s = self.elf.get_section(3) print "%s" % str(s.header) if s.header["sh_flags"] & 2 and s.header["sh_type"] == "SHT_PROGBITS": if s.header["sh_addr"] != base_addr + len(img): raise Exception("bad section vaddr - #3 should follow #2") img += s.data() return (base_addr, img) def __tame_dwarf(self): dw = self.dwarf self._compile_units = {} self._addresses = {} self._lowest_known_address = None location_lists = dw.location_lists() cfi = None if dw.has_EH_CFI(): cfi = dw.EH_CFI_entries() print "we have EH CFI entries" elif dw.has_CFI(): cfi = dw.CFI_entries() print "we have CFI entries" else: print "no (EH) CFI" if None is not cfi: self._cfa_rule = {} for c in cfi: try: decoded = c.get_decoded() except: print "CFI decoding exception" break for entry in decoded.table: if entry["pc"] in self._cfa_rule: print "duplicate cfa rule found at pc %x" % entry["pc"] print "\t%s" % str(self._cfa_rule[entry["pc"]]) print "\t%s" % str(entry) print #assert (not entry["pc"] in self._cfa_rule) or (self._cfa_rule[entry["pc"]] == entry) self._cfa_rule[entry["pc"]] = entry for c in dw.iter_CUs(): functions = {} variables = {} td = c.get_top_DIE() for d in td.iter_children(): if d.tag == 'DW_TAG_subprogram': if 'DW_AT_declaration' in d.attributes: continue lpc = d.attributes['DW_AT_low_pc'].value hpc = d.attributes['DW_AT_high_pc'].value if hpc < lpc: hpc += lpc function_name = d.attributes['DW_AT_name'].value f = {} f["lpc"] = lpc f["hpc"] = hpc f["args"] = {} f["vars"] = {} if 'DW_AT_frame_base' in d.attributes: a = d.attributes['DW_AT_frame_base'] if a.form == 'DW_FORM_data4' or a.form == 'DW_FORM_sec_offset': f["fb"] = location_lists.get_location_list_at_offset(a.value) else: f["fb"] = a.value for child in d.iter_children(): if child.tag == "DW_TAG_formal_parameter": name = child.attributes['DW_AT_name'].value v = {} try: if child.attributes['DW_AT_location'].form in ['DW_FORM_sec_offset', 'DW_FORM_data4']: v["location"] = location_lists.get_location_list_at_offset(child.attributes['DW_AT_location'].value) else: v["location"] = child.attributes['DW_AT_location'].value except: v["location"] = [] f["args"][name] = v if child.tag == "DW_TAG_variable": name = child.attributes['DW_AT_name'].value v = {} try: if child.attributes['DW_AT_location'].form in ['DW_FORM_sec_offset', 'DW_FORM_data4']: v["location"] = location_lists.get_location_list_at_offset(child.attributes['DW_AT_location'].value) else: v["location"] = child.attributes['DW_AT_location'].value except: v["location"] = [] f["vars"][name] = v functions[function_name] = f elif d.tag == 'DW_TAG_variable': if d.attributes['DW_AT_decl_file'].value == 1: try: name = d.attributes['DW_AT_name'].value except: name = '(%s)' % str(d.attributes['DW_AT_name']) v = {} try: v["location"] = d.attributes['DW_AT_location'].value except: v["location"] = [] variables[name] = v x = {} fname = td.attributes['DW_AT_name'].value x["line_program"] = dw.line_program_for_CU(c).get_entries() x["lpc"] = td.attributes['DW_AT_low_pc'].value x["hpc"] = td.attributes['DW_AT_high_pc'].value x["comp_dir"] = td.attributes['DW_AT_comp_dir'].value x["functions"] = functions x["variables"] = variables self._compile_units[fname] = x if ((self._lowest_known_address is None) or (self._lowest_known_address > x["lpc"])): self._lowest_known_address = x["lpc"] for c in self._compile_units: self._compile_units[c]["lines"] = {} for line in self._compile_units[c]["line_program"]: state = line.state if state is not None and not (state.end_sequence or state.basic_block or state.epilogue_begin or state.prologue_end): cl = "%s+%d" % (c, state.line) if state.address in self._addresses and self._addresses[state.address] != cl: raise Exception("addr %x is both \"%s\" and \"%s+%d\"" % (state.address, self._addresses[state.address], c, state.line)) self._addresses[state.address] = cl try: self._compile_units[c]["lines"][state.line] += [state.address] except: self._compile_units[c]["lines"][state.line] = [state.address] if not cfi is None: print "CFA table:" for pc in sorted(self._cfa_rule.keys()): print "%x: %s\t\t(%s)" % (pc, str(self._cfa_rule[pc]), self.addr2line(pc)) def addr2line(self, addr): try: return self._addresses[addr] except: return '' def loc_at(self, addr): line = self.addr2line(addr) while '' == line and addr >= self._lowest_known_address: addr -= 4 line = self.addr2line(addr) if '' == line: return ("unknown", "", 0, "") cuname, culine = line.split("+") fname = "" c = self._compile_units[cuname] for f in c["functions"]: if ((c["functions"][f]["lpc"] <= addr) and (c["functions"][f]["hpc"] >= addr)): fname = f break return (fname, cuname, culine, c["comp_dir"]) def line2addr(self, fname, line): return self._compile_units[fname]["lines"][line]
def test_files(fns,quiet=False,profile=False,runtime=False): for fn in fns: short_fn = fn.split("/")[-1] if "/" in fn else fn if os.path.isdir(fn): if not quiet: print("{} {}: skipping directory".format(notice, short_fn)) continue try: elf = ELFFile(open(fn, "rb")) except ELFError: if not quiet: print("{} {}: skipping non-ELF file".format(notice, short_fn)) continue arch = elf['e_machine'] if arch not in SUPPORTED: if not quiet: print("{} {}: skipping ELF with unsupported architecture `{}`".format(notice, short_fn, arch)) continue engine_functions = {} engine = "builtin" try: this_engine = Static(fn, debug=0) #no debug output if args.profile: #needs pycallgraph from pycallgraph import PyCallGraph from pycallgraph.output import GraphvizOutput graphviz = GraphvizOutput() graphviz.output_file = 'prof.png' with PyCallGraph(output=graphviz): this_engine.process() else: this_engine.process() engine_functions[engine] = {x.start for x in this_engine['functions']} except KeyboardInterrupt: print("{} User stopped processing test cases.".format(notice)) sys.exit() except MemoryError: #print("{} {}: bap encountered a memory error.".format(fail, short_fn, engine) continue except Exception as e: print("{} {}: {} engine failed to process file with `{}'".format(fail, short_fn, engine, e)) continue if runtime: if not quiet: print("{} {}: {} ran without exceptions".format(ok_green, short_fn, engine)) continue if runtime: continue if elf.has_dwarf_info(): dwarfinfo = elf.get_dwarf_info() dwarf_functions = get_functions(dwarfinfo) for engine,functions in engine_functions.items(): missed = dwarf_functions - functions total_fxns = len(dwarf_functions) if len(missed) == 0: print("{} {}: {} engine found all {} function(s)".format(ok_green, short_fn, engine, total_fxns)) else: status = fail if len(missed) == total_fxns else warn if args.verbose: fmt = "{} {}: {} engine missed {}/{} function(s): {}" missed_s = ", ".join(hex(fxn) for fxn in missed) print(fmt.format(status, short_fn, engine, len(missed), total_fxns, missed_s)) else: fmt = "{} {}: {} engine missed {}/{} function(s)" print(fmt.format(status, short_fn, engine, len(missed), total_fxns)) else: for engine,functions in engine_functions.items(): status = fail if len(functions) == 0 else ok_blue print("{} {}: {} engine found {} function(s). (dwarf info unavailable)".format(status, short_fn, engine, len(functions)))
class ReadElf(object): """ display_* methods are used to emit output into the output stream """ def __init__(self, file, output): """ file: stream object with the ELF file to read output: output stream to write to """ self.elffile = ELFFile(file) self.output = output # Lazily initialized if a debug dump is requested self._dwarfinfo = None def display_file_header(self): """ Display the ELF file header """ self._emitline('ELF Header:') self._emit(' Magic: ') self._emitline(' '.join('%2.2x' % byte2int(b) for b in self.elffile.e_ident_raw)) header = self.elffile.header e_ident = header['e_ident'] self._emitline(' Class: %s' % describe_ei_class(e_ident['EI_CLASS'])) self._emitline(' Data: %s' % describe_ei_data(e_ident['EI_DATA'])) self._emitline(' Version: %s' % describe_ei_version(e_ident['EI_VERSION'])) self._emitline(' OS/ABI: %s' % describe_ei_osabi(e_ident['EI_OSABI'])) self._emitline(' ABI Version: %d' % e_ident['EI_ABIVERSION']) self._emitline(' Type: %s' % describe_e_type(header['e_type'])) self._emitline(' Machine: %s' % describe_e_machine(header['e_machine'])) self._emitline(' Version: %s' % describe_e_version_numeric(header['e_version'])) self._emitline(' Entry point address: %s' % self._format_hex(header['e_entry'])) self._emit(' Start of program headers: %s' % header['e_phoff']) self._emitline(' (bytes into file)') self._emit(' Start of section headers: %s' % header['e_shoff']) self._emitline(' (bytes into file)') self._emitline(' Flags: %s' % self._format_hex(header['e_flags'])) self._emitline(' Size of this header: %s (bytes)' % header['e_ehsize']) self._emitline(' Size of program headers: %s (bytes)' % header['e_phentsize']) self._emitline(' Number of program headers: %s' % header['e_phnum']) self._emitline(' Size of section headers: %s (bytes)' % header['e_shentsize']) self._emitline(' Number of section headers: %s' % header['e_shnum']) self._emitline(' Section header string table index: %s' % header['e_shstrndx']) def display_program_headers(self, show_heading=True): """ Display the ELF program headers. If show_heading is True, displays the heading for this information (Elf file type is...) """ self._emitline() if self.elffile.num_segments() == 0: self._emitline('There are no program headers in this file.') return elfheader = self.elffile.header if show_heading: self._emitline('Elf file type is %s' % describe_e_type(elfheader['e_type'])) self._emitline('Entry point is %s' % self._format_hex(elfheader['e_entry'])) # readelf weirness - why isn't e_phoff printed as hex? (for section # headers, it is...) self._emitline('There are %s program headers, starting at offset %s' % ( elfheader['e_phnum'], elfheader['e_phoff'])) self._emitline() self._emitline('Program Headers:') # Now comes the table of program headers with their attributes. Note # that due to different formatting constraints of 32-bit and 64-bit # addresses, there are some conditions on elfclass here. # # First comes the table heading # if self.elffile.elfclass == 32: self._emitline(' Type Offset VirtAddr PhysAddr FileSiz MemSiz Flg Align') else: self._emitline(' Type Offset VirtAddr PhysAddr') self._emitline(' FileSiz MemSiz Flags Align') # Now the entries # for segment in self.elffile.iter_segments(): self._emit(' %-14s ' % describe_p_type(segment['p_type'])) if self.elffile.elfclass == 32: self._emitline('%s %s %s %s %s %-3s %s' % ( self._format_hex(segment['p_offset'], fieldsize=6), self._format_hex(segment['p_vaddr'], fullhex=True), self._format_hex(segment['p_paddr'], fullhex=True), self._format_hex(segment['p_filesz'], fieldsize=5), self._format_hex(segment['p_memsz'], fieldsize=5), describe_p_flags(segment['p_flags']), self._format_hex(segment['p_align']))) else: # 64 self._emitline('%s %s %s' % ( self._format_hex(segment['p_offset'], fullhex=True), self._format_hex(segment['p_vaddr'], fullhex=True), self._format_hex(segment['p_paddr'], fullhex=True))) self._emitline(' %s %s %-3s %s' % ( self._format_hex(segment['p_filesz'], fullhex=True), self._format_hex(segment['p_memsz'], fullhex=True), describe_p_flags(segment['p_flags']), # lead0x set to False for p_align, to mimic readelf. # No idea why the difference from 32-bit mode :-| self._format_hex(segment['p_align'], lead0x=False))) if isinstance(segment, InterpSegment): self._emitline(' [Requesting program interpreter: %s]' % bytes2str(segment.get_interp_name())) # Sections to segments mapping # if self.elffile.num_sections() == 0: # No sections? We're done return self._emitline('\n Section to Segment mapping:') self._emitline(' Segment Sections...') for nseg, segment in enumerate(self.elffile.iter_segments()): self._emit(' %2.2d ' % nseg) for section in self.elffile.iter_sections(): if ( not section.is_null() and segment.section_in_segment(section)): self._emit('%s ' % bytes2str(section.name)) self._emitline('') def display_section_headers(self, show_heading=True): """ Display the ELF section headers """ elfheader = self.elffile.header if show_heading: self._emitline('There are %s section headers, starting at offset %s' % ( elfheader['e_shnum'], self._format_hex(elfheader['e_shoff']))) self._emitline('\nSection Header%s:' % ( 's' if elfheader['e_shnum'] > 1 else '')) # Different formatting constraints of 32-bit and 64-bit addresses # if self.elffile.elfclass == 32: self._emitline(' [Nr] Name Type Addr Off Size ES Flg Lk Inf Al') else: self._emitline(' [Nr] Name Type Address Offset') self._emitline(' Size EntSize Flags Link Info Align') # Now the entries # for nsec, section in enumerate(self.elffile.iter_sections()): self._emit(' [%2u] %-17.17s %-15.15s ' % ( nsec, bytes2str(section.name), describe_sh_type(section['sh_type']))) if self.elffile.elfclass == 32: self._emitline('%s %s %s %s %3s %2s %3s %2s' % ( self._format_hex(section['sh_addr'], fieldsize=8, lead0x=False), self._format_hex(section['sh_offset'], fieldsize=6, lead0x=False), self._format_hex(section['sh_size'], fieldsize=6, lead0x=False), self._format_hex(section['sh_entsize'], fieldsize=2, lead0x=False), describe_sh_flags(section['sh_flags']), section['sh_link'], section['sh_info'], section['sh_addralign'])) else: # 64 self._emitline(' %s %s' % ( self._format_hex(section['sh_addr'], fullhex=True, lead0x=False), self._format_hex(section['sh_offset'], fieldsize=16 if section['sh_offset'] > 0xffffffff else 8, lead0x=False))) self._emitline(' %s %s %3s %2s %3s %s' % ( self._format_hex(section['sh_size'], fullhex=True, lead0x=False), self._format_hex(section['sh_entsize'], fullhex=True, lead0x=False), describe_sh_flags(section['sh_flags']), section['sh_link'], section['sh_info'], section['sh_addralign'])) self._emitline('Key to Flags:') self._emit(' W (write), A (alloc), X (execute), M (merge), S (strings)') if self.elffile['e_machine'] in ('EM_X86_64', 'EM_L10M'): self._emitline(', l (large)') else: self._emitline() self._emitline(' I (info), L (link order), G (group), T (TLS), E (exclude), x (unknown)') self._emitline(' O (extra OS processing required) o (OS specific), p (processor specific)') def display_symbol_tables(self): """ Display the symbol tables contained in the file """ for section in self.elffile.iter_sections(): if not isinstance(section, SymbolTableSection): continue if section['sh_entsize'] == 0: self._emitline("\nSymbol table '%s' has a sh_entsize of zero!" % ( bytes2str(section.name))) continue self._emitline("\nSymbol table '%s' contains %s entries:" % ( bytes2str(section.name), section.num_symbols())) if self.elffile.elfclass == 32: self._emitline(' Num: Value Size Type Bind Vis Ndx Name') else: # 64 self._emitline(' Num: Value Size Type Bind Vis Ndx Name') for nsym, symbol in enumerate(section.iter_symbols()): # symbol names are truncated to 25 chars, similarly to readelf self._emitline('%6d: %s %5d %-7s %-6s %-7s %4s %.25s' % ( nsym, self._format_hex(symbol['st_value'], fullhex=True, lead0x=False), symbol['st_size'], describe_symbol_type(symbol['st_info']['type']), describe_symbol_bind(symbol['st_info']['bind']), describe_symbol_visibility(symbol['st_other']['visibility']), describe_symbol_shndx(symbol['st_shndx']), bytes2str(symbol.name))) def display_relocations(self): """ Display the relocations contained in the file """ has_relocation_sections = False for section in self.elffile.iter_sections(): if not isinstance(section, RelocationSection): continue has_relocation_sections = True self._emitline("\nRelocation section '%s' at offset %s contains %s entries:" % ( bytes2str(section.name), self._format_hex(section['sh_offset']), section.num_relocations())) if section.is_RELA(): self._emitline(" Offset Info Type Sym. Value Sym. Name + Addend") else: self._emitline(" Offset Info Type Sym.Value Sym. Name") # The symbol table section pointed to in sh_link symtable = self.elffile.get_section(section['sh_link']) for rel in section.iter_relocations(): hexwidth = 8 if self.elffile.elfclass == 32 else 12 self._emit('%s %s %-17.17s' % ( self._format_hex(rel['r_offset'], fieldsize=hexwidth, lead0x=False), self._format_hex(rel['r_info'], fieldsize=hexwidth, lead0x=False), describe_reloc_type( rel['r_info_type'], self.elffile))) if rel['r_info_sym'] == 0: self._emitline() continue symbol = symtable.get_symbol(rel['r_info_sym']) # Some symbols have zero 'st_name', so instead what's used is # the name of the section they point at if symbol['st_name'] == 0: symsec = self.elffile.get_section(symbol['st_shndx']) symbol_name = symsec.name else: symbol_name = symbol.name self._emit(' %s %s%22.22s' % ( self._format_hex( symbol['st_value'], fullhex=True, lead0x=False), ' ' if self.elffile.elfclass == 32 else '', bytes2str(symbol_name))) if section.is_RELA(): self._emit(' %s %x' % ( '+' if rel['r_addend'] >= 0 else '-', abs(rel['r_addend']))) self._emitline() if not has_relocation_sections: self._emitline('\nThere are no relocations in this file.') def display_hex_dump(self, section_spec): """ Display a hex dump of a section. section_spec is either a section number or a name. """ section = self._section_from_spec(section_spec) if section is None: self._emitline("Section '%s' does not exist in the file!" % ( section_spec)) return self._emitline("\nHex dump of section '%s':" % bytes2str(section.name)) self._note_relocs_for_section(section) addr = section['sh_addr'] data = section.data() dataptr = 0 while dataptr < len(data): bytesleft = len(data) - dataptr # chunks of 16 bytes per line linebytes = 16 if bytesleft > 16 else bytesleft self._emit(' %s ' % self._format_hex(addr, fieldsize=8)) for i in range(16): if i < linebytes: self._emit('%2.2x' % byte2int(data[dataptr + i])) else: self._emit(' ') if i % 4 == 3: self._emit(' ') for i in range(linebytes): c = data[dataptr + i : dataptr + i + 1] if byte2int(c[0]) >= 32 and byte2int(c[0]) < 0x7f: self._emit(bytes2str(c)) else: self._emit(bytes2str(b'.')) self._emitline() addr += linebytes dataptr += linebytes self._emitline() def display_string_dump(self, section_spec): """ Display a strings dump of a section. section_spec is either a section number or a name. """ section = self._section_from_spec(section_spec) if section is None: self._emitline("Section '%s' does not exist in the file!" % ( section_spec)) return self._emitline("\nString dump of section '%s':" % bytes2str(section.name)) found = False data = section.data() dataptr = 0 while dataptr < len(data): while ( dataptr < len(data) and not (32 <= byte2int(data[dataptr]) <= 127)): dataptr += 1 if dataptr >= len(data): break endptr = dataptr while endptr < len(data) and byte2int(data[endptr]) != 0: endptr += 1 found = True self._emitline(' [%6x] %s' % ( dataptr, bytes2str(data[dataptr:endptr]))) dataptr = endptr if not found: self._emitline(' No strings found in this section.') else: self._emitline() def display_debug_dump(self, dump_what): """ Dump a DWARF section """ self._init_dwarfinfo() if self._dwarfinfo is None: return set_global_machine_arch(self.elffile.get_machine_arch()) if dump_what == 'info': self._dump_debug_info() elif dump_what == 'decodedline': self._dump_debug_line_programs() elif dump_what == 'frames': self._dump_debug_frames() elif dump_what == 'frames-interp': self._dump_debug_frames_interp() else: self._emitline('debug dump not yet supported for "%s"' % dump_what) def _format_hex(self, addr, fieldsize=None, fullhex=False, lead0x=True): """ Format an address into a hexadecimal string. fieldsize: Size of the hexadecimal field (with leading zeros to fit the address into. For example with fieldsize=8, the format will be %08x If None, the minimal required field size will be used. fullhex: If True, override fieldsize to set it to the maximal size needed for the elfclass lead0x: If True, leading 0x is added """ s = '0x' if lead0x else '' if fullhex: fieldsize = 8 if self.elffile.elfclass == 32 else 16 if fieldsize is None: field = '%x' else: field = '%' + '0%sx' % fieldsize return s + field % addr def _section_from_spec(self, spec): """ Retrieve a section given a "spec" (either number or name). Return None if no such section exists in the file. """ try: num = int(spec) if num < self.elffile.num_sections(): return self.elffile.get_section(num) else: return None except ValueError: # Not a number. Must be a name then return self.elffile.get_section_by_name(str2bytes(spec)) def _note_relocs_for_section(self, section): """ If there are relocation sections pointing to the givne section, emit a note about it. """ for relsec in self.elffile.iter_sections(): if isinstance(relsec, RelocationSection): info_idx = relsec['sh_info'] if self.elffile.get_section(info_idx) == section: self._emitline(' Note: This section has relocations against it, but these have NOT been applied to this dump.') return def _init_dwarfinfo(self): """ Initialize the DWARF info contained in the file and assign it to self._dwarfinfo. Leave self._dwarfinfo at None if no DWARF info was found in the file """ if self._dwarfinfo is not None: return if self.elffile.has_dwarf_info(): self._dwarfinfo = self.elffile.get_dwarf_info() else: self._dwarfinfo = None def _dump_debug_info(self): """ Dump the debugging info section. """ self._emitline('Contents of the .debug_info section:\n') # Offset of the .debug_info section in the stream section_offset = self._dwarfinfo.debug_info_sec.global_offset for cu in self._dwarfinfo.iter_CUs(): self._emitline(' Compilation Unit @ offset %s:' % self._format_hex(cu.cu_offset)) self._emitline(' Length: %s (%s)' % ( self._format_hex(cu['unit_length']), '%s-bit' % cu.dwarf_format())) self._emitline(' Version: %s' % cu['version']), self._emitline(' Abbrev Offset: %s' % cu['debug_abbrev_offset']), self._emitline(' Pointer Size: %s' % cu['address_size']) # The nesting depth of each DIE within the tree of DIEs must be # displayed. To implement this, a counter is incremented each time # the current DIE has children, and decremented when a null die is # encountered. Due to the way the DIE tree is serialized, this will # correctly reflect the nesting depth # die_depth = 0 for die in cu.iter_DIEs(): if die.is_null(): die_depth -= 1 continue self._emitline(' <%s><%x>: Abbrev Number: %s (%s)' % ( die_depth, die.offset, die.abbrev_code, die.tag)) for attr in itervalues(die.attributes): name = attr.name # Unknown attribute values are passed-through as integers if isinstance(name, int): name = 'Unknown AT value: %x' % name self._emitline(' <%2x> %-18s: %s' % ( attr.offset, name, describe_attr_value( attr, die, section_offset))) if die.has_children: die_depth += 1 self._emitline() def _dump_debug_line_programs(self): """ Dump the (decoded) line programs from .debug_line The programs are dumped in the order of the CUs they belong to. """ self._emitline('Decoded dump of debug contents of section .debug_line:\n') for cu in self._dwarfinfo.iter_CUs(): lineprogram = self._dwarfinfo.line_program_for_CU(cu) cu_filename = '' if len(lineprogram['include_directory']) > 0: cu_filename = '%s/%s' % ( bytes2str(lineprogram['include_directory'][0]), bytes2str(lineprogram['file_entry'][0].name)) else: cu_filename = bytes2str(lineprogram['file_entry'][0].name) self._emitline('CU: %s:' % cu_filename) self._emitline('File name Line number Starting address') # Print each state's file, line and address information. For some # instructions other output is needed to be compatible with # readelf. for entry in lineprogram.get_entries(): state = entry.state if state is None: # Special handling for commands that don't set a new state if entry.command == DW_LNS_set_file: file_entry = lineprogram['file_entry'][entry.args[0] - 1] if file_entry.dir_index == 0: # current directory self._emitline('\n./%s:[++]' % ( bytes2str(file_entry.name))) else: self._emitline('\n%s/%s:' % ( bytes2str(lineprogram['include_directory'][file_entry.dir_index - 1]), bytes2str(file_entry.name))) elif entry.command == DW_LNE_define_file: self._emitline('%s:' % ( bytes2str(lineprogram['include_directory'][entry.args[0].dir_index]))) elif not state.end_sequence: # readelf doesn't print the state after end_sequence # instructions. I think it's a bug but to be compatible # I don't print them too. self._emitline('%-35s %11d %18s' % ( bytes2str(lineprogram['file_entry'][state.file - 1].name), state.line, '0' if state.address == 0 else self._format_hex(state.address))) if entry.command == DW_LNS_copy: # Another readelf oddity... self._emitline() def _dump_debug_frames(self): """ Dump the raw frame information from .debug_frame """ if not self._dwarfinfo.has_CFI(): return self._emitline('Contents of the .debug_frame section:') for entry in self._dwarfinfo.CFI_entries(): if isinstance(entry, CIE): self._emitline('\n%08x %08x %08x CIE' % ( entry.offset, entry['length'], entry['CIE_id'])) self._emitline(' Version: %d' % entry['version']) self._emitline(' Augmentation: "%s"' % bytes2str(entry['augmentation'])) self._emitline(' Code alignment factor: %u' % entry['code_alignment_factor']) self._emitline(' Data alignment factor: %d' % entry['data_alignment_factor']) self._emitline(' Return address column: %d' % entry['return_address_register']) self._emitline() else: # FDE self._emitline('\n%08x %08x %08x FDE cie=%08x pc=%08x..%08x' % ( entry.offset, entry['length'], entry['CIE_pointer'], entry.cie.offset, entry['initial_location'], entry['initial_location'] + entry['address_range'])) self._emit(describe_CFI_instructions(entry)) self._emitline() def _dump_debug_frames_interp(self): """ Dump the interpreted (decoded) frame information from .debug_frame """ if not self._dwarfinfo.has_CFI(): return self._emitline('Contents of the .debug_frame section:') for entry in self._dwarfinfo.CFI_entries(): if isinstance(entry, CIE): self._emitline('\n%08x %08x %08x CIE "%s" cf=%d df=%d ra=%d' % ( entry.offset, entry['length'], entry['CIE_id'], bytes2str(entry['augmentation']), entry['code_alignment_factor'], entry['data_alignment_factor'], entry['return_address_register'])) ra_regnum = entry['return_address_register'] else: # FDE self._emitline('\n%08x %08x %08x FDE cie=%08x pc=%08x..%08x' % ( entry.offset, entry['length'], entry['CIE_pointer'], entry.cie.offset, entry['initial_location'], entry['initial_location'] + entry['address_range'])) ra_regnum = entry.cie['return_address_register'] # Print the heading row for the decoded table self._emit(' LOC') self._emit(' ' if entry.structs.address_size == 4 else ' ') self._emit(' CFA ') # Decode the table nad look at the registers it describes. # We build reg_order here to match readelf's order. In particular, # registers are sorted by their number, and the register matching # ra_regnum is always listed last with a special heading. decoded_table = entry.get_decoded() reg_order = sorted(ifilter( lambda r: r != ra_regnum, decoded_table.reg_order)) # Headings for the registers for regnum in reg_order: self._emit('%-6s' % describe_reg_name(regnum)) self._emitline('ra ') # Now include ra_regnum in reg_order to print its values similarly # to the other registers. reg_order.append(ra_regnum) for line in decoded_table.table: self._emit(self._format_hex( line['pc'], fullhex=True, lead0x=False)) self._emit(' %-9s' % describe_CFI_CFA_rule(line['cfa'])) for regnum in reg_order: if regnum in line: s = describe_CFI_register_rule(line[regnum]) else: s = 'u' self._emit('%-6s' % s) self._emitline() self._emitline() def _emit(self, s=''): """ Emit an object to output """ self.output.write(str(s)) def _emitline(self, s=''): """ Emit an object to output, followed by a newline """ self.output.write(str(s) + '\n')