def load(aspace, fname): f = open(fname, "rb") elffile = ELFFile(f) #print(elffile) #print(elffile.header) #print("entry: %x" % elffile["e_entry"]) #print() for seg in elffile.iter_segments(): #print(seg) #print(seg.header) #print("p_vaddr=%x p_memsz=%x" % (seg["p_vaddr"], seg["p_memsz"])) #print() if seg["p_type"] == "PT_LOAD": aspace.add_area(seg["p_vaddr"], seg["p_vaddr"] + seg["p_memsz"] - 1, "TODO") seg.stream.seek(seg['p_offset']) aspace.load_content(seg.stream, seg["p_vaddr"], seg["p_filesz"]) elif seg["p_type"] == "PT_DYNAMIC": for s in seg.iter_symbols(): if s["st_shndx"] != "SHN_UNDEF": #print(s.name, hex(s["st_value"]), s.entry) aspace.set_label(s["st_value"], str(s.name, "utf-8")) if s["st_info"]["type"] == "STT_FUNC": aspace.analisys_stack_push(s["st_value"]) if s["st_info"]["type"] == "STT_OBJECT": # TODO: Set as data of given s["st_size"] pass return elffile["e_entry"]
def load(aspace, fname): f = open(fname, "rb") elffile = ELFFile(f) #print(elffile) #print(elffile.header) #print("entry: %x" % elffile["e_entry"]) #print() if elffile.num_sections(): return load_sections(aspace, elffile) if elffile.num_segments(): return load_segments(aspace, elffile) assert False, "No ELF sections or segments found"
def main(): parser = argparse.ArgumentParser( description='Parse DWARF information and generate C header files') parser.add_argument('file', help='path of input ELF binary', action=ValidFile) parser.add_argument('-o', '--out', help='output directory for header files (optional)', action=ValidDir) parser.add_argument('-s', '--suppress', dest='suppr', action='store_true', help='do not generate output headers') args = parser.parse_args() with open(args.file, 'rb') as f: elffile = ELFFile(f) if elffile.has_dwarf_info(): dwarfinfo = elffile.get_dwarf_info() for cu in dwarfinfo.iter_CUs(): items = {} for die in cu.iter_DIEs(): die_item = parse_die_json(die) items[die_item['offset']] = die_item print('Found compile unit at offset {}'.format(cu.cu_offset)) print('Reading DWARF information...') result = parse_cu(cu) print('Found {} nodes in compilation unit.'.format( len(result['items']))) if args.out: with open( os.path.join(args.out, 'cu_{}.h'.format(cu.cu_offset)), 'w') as output: print_all(result, output, gen_headers=not args.suppr) with open( os.path.join(args.out, 'cu_{}.json'.format(cu.cu_offset)), 'w') as output: print(json.dumps(items, indent=2), file=output) else: print_all(result, gen_headers=not args.suppr) print('Done.') else: print('The specified file does not contain DWARF information.')
def detect(fname): f = open(fname, "rb") try: elffile = ELFFile(f) except ELFError: return None #print(elffile.header) #print(elffile["e_ident"]["EI_CLASS"]) bitness = 32 if elffile["e_ident"]["EI_CLASS"] == "ELFCLASS32" else 64 return "%s_%s" % (MACH_MAP[elffile["e_machine"]], bitness)
def load(aspace, fname): f = open(fname, "rb") elffile = ELFFile(f) #print(elffile) #print(elffile.header) #print("entry: %x" % elffile["e_entry"]) #print() for seg in elffile.iter_segments(): if seg["p_type"] != "PT_LOAD": continue #print(seg) #print(seg.header) #print("p_vaddr=%x p_memsz=%x" % (seg["p_vaddr"], seg["p_memsz"])) #print() aspace.add_area(seg["p_vaddr"], seg["p_vaddr"] + seg["p_memsz"] - 1, "TODO") seg.stream.seek(seg['p_offset']) aspace.load_content(seg.stream, seg["p_vaddr"], seg["p_filesz"]) return elffile["e_entry"]
def detect(fname): f = open(fname, "rb") try: elffile = ELFFile(f) except ELFError: return None #print(elffile.header) #print(elffile["e_ident"]["EI_CLASS"]) if elffile["e_ident"]["EI_CLASS"] == "ELFCLASS32": variant = 32 elif elffile["e_ident"]["EI_CLASS"] == "ELFCLASS64": variant = 64 else: assert 0, "Unknown ELF bitness: %s" % elffile["e_ident"]["EI_CLASS"] return "%s_%s" % (MACH_MAP[elffile["e_machine"]], variant)
def __init__(self, filepath): self._fd = open(filepath, 'rb') ELFFile_.__init__(self, self._fd) self._symbol_interval_tree = self._build_symbol_tree()
def process_file(filename, xml_root): with open(filename, 'rb') as f: # read the binary elffile = ELFFile(f) if not elffile.has_dwarf_info(): print('{} has no DWARF info'.format(filename), file=sys.stderr) return # extract dwarf info dwarfinfo = elffile.get_dwarf_info() # set global state info about elf set_global_machine_arch(elffile.get_machine_arch()) # For optimized code, the machine code doesn't exactly correspond to the source lines # In such cases, compiler adds .debug_loc section in the ELF to aid with this source # location matching. We parse that section here. It would usually be emptyin absence of # -O flags passed to the compiler location_lists = dwarfinfo.location_lists() loc_parser = LocationParser(location_lists) # Add path to binary file_xml_root = XMLSubElement(xml_root, 'file') file_xml_root.set('name', str(filename)) # Each source file (.cpp/.c) with all it's included headers etc. after preprocessing is called # a translation unit (TU). The compiler compiles it into object code which is referred to as a # compilation unit (CU). So, a .so/.a library built of many .o's would contain many CU's # Each DWARF information line is called a Debugging Information Entry (DIE) # Each DIE has an offset which is the byte offset of that DIE from the debug section start # We are iterating over the dwarf info for all CU's for CU in dwarfinfo.iter_CUs(): top_DIE = CU.get_top_DIE() # cu_xml_root is the xml root holding info for this CU cu_xml_root = XMLSubElement(file_xml_root, str(top_DIE.tag)) for key, value in CU.header.items() : cu_xml_root.set(key, str(value)) # path of the TU that generated this CU cu_xml_root.set('path', str(top_DIE.get_full_path())) cu_xml_root.set('offset', str(CU.cu_offset)) # This function adds the file table for each CU to cu_xml_root. Note that a CU may be composed of # multiple files as there are the included files as well in the TU. def add_source_file_info(xml_parent) : # Add source file name index from the debug_line section filepath = os.path.abspath(filename) line_prog = dwarfinfo.line_program_for_CU(CU) # The include directories are held in dirs # Note that the path dirs = [ os.path.dirname(filepath) ] + \ list(map(lambda x: x.decode('utf-8'), line_prog['include_directory'])) file_table_node = XMLSubElement(xml_parent, str("sources")) for i, file_entry in enumerate(line_prog['file_entry'], 1) : idx = file_entry['dir_index'] source_file = file_entry['name'].decode('utf-8') entry = os.path.join(dirs[idx], source_file) # adding file table entry to xml xml_node = XMLSubElement(file_table_node, 'entry') xml_node.set('id', str(i)) xml_node.set('file', entry) add_source_file_info(cu_xml_root) def add_die_info(DIE, xml_parent) : """ Recursively process all DIEs. """ xml_node = XMLSubElement(xml_parent, DIE.tag) xml_node.set('offset', str(DIE.offset)) for attr_name, attr_values in DIE.attributes.items() : # FIXME: update pyelftools/elftools/dwarf/enums.py if not isinstance(attr_name, str) : if not args.quiet : print("{} warning : {}:".format(sys.argv[0], filename), " unknown attribute :", attr_values, file=sys.stderr) continue xml_attr = attr_values.value if loc_parser.attribute_has_location( attr_values, CU['version']) : loc = loc_parser.parse_from_attribute( attr_values, CU['version']) if isinstance(loc, LocationExpr) : # location lists not yet supported xml_attr = describe_DWARF_expr( loc.loc_expr, dwarfinfo.structs) if 'DW_FORM_ref' in attr_values.form: xml_attr += CU.cu_offset if isinstance(xml_attr, bytes) : xml_node.set(attr_name, xml_attr.decode('utf-8')) else : xml_node.set(attr_name, str(xml_attr)) # NOTE: There is the implicit belief that the specification comes after the # referred DIE. If that turns out not to be the case later, which is quite unlikely, # we will need to do this linking in two passes # add node to map if 'offset' in xml_node.attrib: refmap[xml_node.attrib['offset']] = xml_node # after node construction, if it contains specification, add info onto refed node if 'DW_AT_specification' in xml_node.attrib: refed_node = refmap[xml_node.attrib['DW_AT_specification']] for k, v in xml_node.attrib.items(): if k not in refed_node.attrib and k != 'DW_AT_specification': refed_node.set(k, v) for child_DIE in DIE.iter_children() : add_die_info(child_DIE, xml_node) # Process DIEs recursively starting with top_DIE for child in top_DIE.iter_children() : add_die_info(child, cu_xml_root)