def process_file(filename): print('Processing file:', filename) with open(filename, 'rb') as f: elffile = ELFFile(f) if not elffile.has_dwarf_info(): print(' file has no DWARF info') return # get_dwarf_info returns a DWARFInfo context object, which is the # starting point for all DWARF-based processing in pyelftools. dwarfinfo = elffile.get_dwarf_info() # The location lists are extracted by DWARFInfo from the .debug_loc # section, and returned here as a LocationLists object. location_lists = dwarfinfo.location_lists() # This is required for the descriptions module to correctly decode # register names contained in DWARF expressions. set_global_machine_arch(elffile.get_machine_arch()) # Create a LocationParser object that parses the DIE attributes and # creates objects representing the actual location information. loc_parser = LocationParser(location_lists) for CU in dwarfinfo.iter_CUs(): # DWARFInfo allows to iterate over the compile units contained in # the .debug_info section. CU is a CompileUnit object, with some # computed attributes (such as its offset in the section) and # a header which conforms to the DWARF standard. The access to # header elements is, as usual, via item-lookup. print(' Found a compile unit at offset %s, length %s' % (CU.cu_offset, CU['unit_length'])) # A CU provides a simple API to iterate over all the DIEs in it. for DIE in CU.iter_DIEs(): # Go over all attributes of the DIE. Each attribute is an # AttributeValue object (from elftools.dwarf.die), which we # can examine. for attr in itervalues(DIE.attributes): # Check if this attribute contains location information # pdb.set_trace() if loc_parser.attribute_has_location(attr, CU['version']): var_name = DIE.attributes['DW_AT_name'].value print(' Varname:%s' % (var_name)) print(' DIE %s. attr %s.' % (DIE.tag, attr.name)) loc = loc_parser.parse_from_attribute( attr, CU['version']) # We either get a list (in case the attribute is a # reference to the .debug_loc section) or a LocationExpr # object (in case the attribute itself contains location # information). if isinstance(loc, LocationExpr): print(' %s' % (describe_DWARF_expr( loc.loc_expr, dwarfinfo.structs))) elif isinstance(loc, list): print(show_loclist(loc, dwarfinfo, indent=' '))
def _test_file(self, filename): filepath = os.path.join('test', 'testfiles_for_unittests', filename) with open(filepath, 'rb') as f: elffile = ELFFile(f) dwarfinfo = elffile.get_dwarf_info() locparser = LocationParser(dwarfinfo.location_lists()) for CU in dwarfinfo.iter_CUs(): ver = CU['version'] for DIE in CU.iter_DIEs(): for key in DIE.attributes: attr = DIE.attributes[key] if LocationParser.attribute_has_location(attr, ver): # This will crash on unpatched library on DIE at 0x9f locparser.parse_from_attribute(attr, ver)
def extract_string_variables(elf): """ Find all string variables (char) in all Compilation Units and Debug information Entry (DIE) in ELF file. """ dwarf_info = elf.get_dwarf_info() loc_lists = dwarf_info.location_lists() loc_parser = LocationParser(loc_lists) strings = [] # Loop through all Compilation Units and # Debug information Entry (DIE) to extract all string variables for compile_unit in dwarf_info.iter_CUs(): for die in compile_unit.iter_DIEs(): # Only care about variables with location information # and of type "char" if die.tag == 'DW_TAG_variable': if ('DW_AT_type' in die.attributes and 'DW_AT_location' in die.attributes and is_die_var_const_char(compile_unit, die)): # Extract location information, which is # its address in memory. loc_attr = die.attributes['DW_AT_location'] if loc_parser.attribute_has_location( loc_attr, die.cu['version']): loc = loc_parser.parse_from_attribute( loc_attr, die.cu['version']) if isinstance(loc, LocationExpr): try: addr = describe_DWARF_expr( loc.loc_expr, dwarf_info.structs) matcher = DT_LOCATION_REGEX.match(addr) if matcher: addr = int(matcher.group(1), 16) if addr > 0: strings.append({ 'name': die.attributes['DW_AT_name'].value, 'addr': addr, 'die': die }) except KeyError: pass return strings
def test_dwarfinfo(di): # Some global cache setup in line with the app proper di._ranges = None di._CUs = [cu for cu in di.iter_CUs()] di._locparser = None m = False dummy_index = QModelIndex() for (i, CU) in enumerate(di._CUs): top_die = CU.get_top_DIE() print("%s" % strip_path(top_die.attributes['DW_AT_name'].value. decode('utf-8', errors='ignore')) if 'DW_AT_name' in top_die.attributes else "(no name)") CU._lineprogram = None CU._exprparser = None for die in CU.iter_DIEs(): if not die.is_null(): assert die.tag.startswith('DW_TAG_') if not m: # With prefix, with low level data, decimal m = DIETableModel(die, True, True, False, True) else: m.display_DIE(die) rc = m.rowCount(dummy_index) cc = m.columnCount(dummy_index) keys = list(die.attributes.keys()) # Assuming rows correspond to attributes; # if we introduce non-attribute metadata into the DIE table, this will break for r in range(m.meta_count, rc): key = keys[r - m.meta_count] attr = die.attributes[key] form = attr.form value = attr.value # Check the elftools' results first # Check if the key is interpreted properly assert str(key).startswith('DW_AT_') assert str(form).startswith('DW_FORM_') # Check if attributes with locations are all found if form == 'DW_FORM_exprloc': assert LocationParser.attribute_has_location( attr, CU['version']) # The converse is not true; on DWARF2, location expressions can have form DW_FORM_block1 # Now check the spell out logic for c in range(0, cc): m.data(m.index(r, c, dummy_index), Qt.DisplayRole) details = m.get_attribute_details( m.index(r, 0, dummy_index)) if form == 'DW_FORM_section_offset': assert details is not None
def get_attribute_details(self, index): row = index.row() if row >= self.meta_count: row -= self.meta_count key = self.keys[row] attr = self.attributes[key] form = attr.form if key == "DW_AT_ranges": di = self.die.dwarfinfo if not di._ranges: di._ranges = di.range_lists() if not di._ranges: # Absent in the DWARF file return None ranges = di._ranges.get_range_list_at_offset(attr.value) # TODO: handle base addresses. Never seen those so far... cu_base = get_cu_base(self.die) return GenericTableModel(("Start offset", "End offset"), ((hex(cu_base + r.begin_offset), hex(cu_base + r.end_offset)) for r in ranges)) elif LocationParser.attribute_has_location(attr, self.die.cu['version']): # Expression is a list of ints ll = self.parse_location(attr) if isinstance(ll, LocationExpr): return GenericTableModel(("Command",), ((cmd,) for cmd in self.dump_expr(ll.loc_expr))) else: cu_base = get_cu_base(self.die) if self.lowlevel: headers = ("Start offset", "End offset", "Expr bytes", "Expression") values = ((hex(cu_base + l.begin_offset), hex(cu_base + l.end_offset), ' '.join("%02x" % b for b in l.loc_expr), '; '.join(self.dump_expr(l.loc_expr))) for l in ll) else: headers = ("Start offset", "End offset", "Expression") values = ((hex(cu_base + l.begin_offset), hex(cu_base + l.end_offset), '; '.join(self.dump_expr(l.loc_expr))) for l in ll) return GenericTableModel(headers, values) elif key == 'DW_AT_stmt_list': if self.die.cu._lineprogram is None: self.die.cu._lineprogram = self.die.dwarfinfo.line_program_for_CU(self.die.cu) lpe = self.die.cu._lineprogram.get_entries() files = self.die.cu._lineprogram.header.file_entry def format_state(state): return (hex(state.address), files[state.file-1].name.decode('utf-8', errors='ignore') if state.file > 0 else '(N/A)', state.line, 'Y' if state.is_stmt else '', 'Y' if state.basic_block else '', 'Y' if state.end_sequence else '', 'Y' if state.prologue_end else '', 'Y' if state.epilogue_begin else '') states = [format_state(e.state) for e in lpe if e.state] # TODO: low level flavor with extra details # TODO: commands vs states return GenericTableModel(('Address', 'File', 'Line', 'Stmt', 'Basic block', 'End seq', 'End prologue', 'Begin epilogue'), states) return None
def get_func_bounds(filename, function_name): with open(filename, 'rb') as f: elffile = ELFFile(f) if not elffile.has_dwarf_info(): print(' file has no DWARF info') return # get_dwarf_info returns a DWARFInfo context object, which is the # starting point for all DWARF-based processing in pyelftools. dwarfinfo = elffile.get_dwarf_info() # The location lists are extracted by DWARFInfo from the .debug_loc # section, and returned here as a LocationLists object. location_lists = dwarfinfo.location_lists() # This is required for the descriptions module to correctly decode # register names contained in DWARF expressions. set_global_machine_arch(elffile.get_machine_arch()) # Create a LocationParser object that parses the DIE attributes and # creates objects representing the actual location information. loc_parser = LocationParser(location_lists) for CU in dwarfinfo.iter_CUs(): # DWARFInfo allows to iterate over the compile units contained in # the .debug_info section. CU is a CompileUnit object, with some # computed attributes (such as its offset in the section) and # a header which conforms to the DWARF standard. The access to # header elements is, as usual, via item-lookup. # A CU provides a simple API to iterate over all the DIEs in it. for DIE in CU.iter_DIEs(): # Find the function if DIE.tag == "DW_TAG_subprogram": fname = "" high_addr = 0 low_addr = 0 c = False for attr in itervalues(DIE.attributes): if attr.name == "DW_AT_name": fname = attr.value if attr.name == "DW_AT_low_pc": low_addr = attr.value if attr.name == "DW_AT_high_pc": high_addr = attr.value if high_addr < low_addr: high_addr = low_addr + high_addr if fname == function_name: return (low_addr, high_addr)
def _test_file(self, filename): filepath = os.path.join('test', 'testfiles_for_unittests', filename) with open(filepath, 'rb') as f: elffile = ELFFile(f) dwarfinfo = elffile.get_dwarf_info() for CU in dwarfinfo.iter_CUs(): ver = CU['version'] for DIE in CU.iter_DIEs(): for key in DIE.attributes: attr = DIE.attributes[key] if attr.form == 'DW_FORM_exprloc': self.assertTrue( LocationParser.attribute_has_location( attr, CU['version']), "Attribute %s not recognized as a location" % key)
def format_value(self, attr): key = attr.name val = attr.value form = attr.form if form == 'DW_FORM_addr' and isinstance(val, int): return hex(val) elif form == 'DW_FORM_flag_present': return '' elif form in ('DW_FORM_ref0', 'DW_FORM_ref1', 'DW_FORM_ref2', 'DW_FORM_ref4', 'DW_FORM_ref8', 'DW_FORM_ref_addr'): return "Ref: 0x%x" % val # There are several other reference forms in the spec elif LocationParser.attribute_has_location(attr, self.die.cu['version']): ll = self.parse_location(attr) if isinstance(ll, LocationExpr): return '; '.join(self.dump_expr(ll.loc_expr)) else: return "Loc list: 0x%x" % attr.value elif key == 'DW_AT_language': return "%d %s" % (val, _DESCR_DW_LANG[val]) if val in _DESCR_DW_LANG else val elif key == 'DW_AT_encoding': return "%d %s" % (val, _DESCR_DW_ATE[val]) if val in _DESCR_DW_ATE else val elif key == 'DW_AT_accessibility': return "%d %s" % (val, _DESCR_DW_ACCESS[val]) if val in _DESCR_DW_ACCESS else val elif key == 'DW_AT_inline': return "%d %s" % (val, _DESCR_DW_INL[val]) if val in _DESCR_DW_INL else val elif key == 'DW_AT_decl_file': if self.die.cu._lineprogram is None: self.die.cu._lineprogram = self.die.dwarfinfo.line_program_for_CU(self.die.cu) return "%d: %s" % (val, self.die.cu._lineprogram.header.file_entry[val-1].name.decode('utf-8', errors='ignore')) if val > 0 else "0: (N/A)" elif key == 'DW_AT_stmt_list': return 'LNP at 0x%x' % val elif isinstance(val, bytes): if form in ('DW_FORM_strp', 'DW_FORM_string'): return val.decode('utf-8', errors='ignore') elif val == b'': # What's a good value for a blank blob? return '[]' else: return ' '.join("%02x" % b for b in val) # Something like "01 ff 33 55" elif isinstance(val, list): # block1 comes across as this if val == []: return '[]' elif isinstance(val[0], int): # Assuming it's a byte array diguised as int array return ' '.join("%02x" % b for b in val) else: # List of something else return str(val) else: return hex(val) if self.hex and isinstance(val, int) else str(val)
def get_var_offset(filename, function_name, var_name): with open(filename, 'rb') as f: elffile = ELFFile(f) if not elffile.has_dwarf_info(): print(' file has no DWARF info') return # get_dwarf_info returns a DWARFInfo context object, which is the # starting point for all DWARF-based processing in pyelftools. dwarfinfo = elffile.get_dwarf_info() # The location lists are extracted by DWARFInfo from the .debug_loc # section, and returned here as a LocationLists object. location_lists = dwarfinfo.location_lists() # This is required for the descriptions module to correctly decode # register names contained in DWARF expressions. set_global_machine_arch(elffile.get_machine_arch()) # Create a LocationParser object that parses the DIE attributes and # creates objects representing the actual location information. loc_parser = LocationParser(location_lists) for CU in dwarfinfo.iter_CUs(): # DWARFInfo allows to iterate over the compile units contained in # the .debug_info section. CU is a CompileUnit object, with some # computed attributes (such as its offset in the section) and # a header which conforms to the DWARF standard. The access to # header elements is, as usual, via item-lookup. # A CU provides a simple API to iterate over all the DIEs in it. for DIE in CU.iter_DIEs(): # Find the function if DIE.tag == "DW_TAG_subprogram": fname = "" base = 0 for attr in itervalues(DIE.attributes): if attr.name == "DW_AT_name": fname = attr.value if fname == function_name: for CHILD in DIE.iter_children(): if CHILD.tag == "DW_TAG_variable" or CHILD.tag == "DW_TAG_formal_parameter": right_name = False location = 0 for attr in itervalues(CHILD.attributes): if attr.name == "DW_AT_name": if attr.value == var_name: right_name = True # Check if this attribute contains location information if attr.name == "DW_AT_location": loc = loc_parser.parse_from_attribute(attr, CU['version']) if isinstance(loc, LocationExpr): parser = DWARFExprParser(dwarfinfo.structs) parsed = parser.parse_expr(loc.loc_expr) for op in parsed: if op.op_name == 'DW_OP_fbreg': location = op.args[0] if right_name: return location
def parse_location(self, attr): di = self.die.dwarfinfo if di._locparser is None: di._locparser = LocationParser(di.location_lists()) return di._locparser.parse_from_attribute(attr, self.die.cu['version'])
def do_address_range_matching(elf, symbol_dict, processed): """ Match symbols indirectly using address ranges. This uses the address ranges of DIEs and map them to symbols residing within those ranges, and works on DIEs that have not been mapped in previous steps. This works on symbol names that do not match the names in DIEs, e.g. "<func>" in DIE, but "<func>.constprop.*" in symbol name list. This also helps with mapping the mangled function names in C++, since the names in DIE are actual function names in source code and not mangled version of them. """ if "unmapped_dies" not in processed: return mapped_symbols = processed["mapped_symbols"] mapped_addresses = processed["mapped_addr"] unmapped_symbols = processed["unmapped_symbols"] newly_mapped_syms = set() dwarfinfo = elf.get_dwarf_info() location_lists = dwarfinfo.location_lists() location_parser = LocationParser(location_lists) unmapped_dies = processed["unmapped_dies"] # Group DIEs by compile units cu_list = dict() for die in unmapped_dies: cu = die.cu if cu not in cu_list: cu_list[cu] = {"dies": set()} cu_list[cu]["dies"].add(die) # Loop through all compile units for cu in cu_list: lineprog = dwarfinfo.line_program_for_CU(cu) # Map offsets from DIEs offset_map = dict() for die in cu.iter_DIEs(): offset_map[die.offset] = die for die in cu_list[cu]["dies"]: if not die.tag == "DW_TAG_subprogram": continue path = None # Has direct reference to file, so use it if "DW_AT_decl_file" in die.attributes: path = get_die_filename(die, lineprog) # Loop through indirect reference until a direct # reference to file is found if ("DW_AT_abstract_origin" in die.attributes) or ("DW_AT_specification" in die.attributes): die_ptr = die while path is None: if not (die_ptr.tag == "DW_TAG_subprogram") or not ( ("DW_AT_abstract_origin" in die_ptr.attributes) or ("DW_AT_specification" in die_ptr.attributes)): break if "DW_AT_abstract_origin" in die_ptr.attributes: ofname = "DW_AT_abstract_origin" elif "DW_AT_specification" in die_ptr.attributes: ofname = "DW_AT_specification" offset = die_ptr.attributes[ofname].value offset += die_ptr.cu.cu_offset # There is nothing to reference so no need to continue if offset not in offset_map: break die_ptr = offset_map[offset] if "DW_AT_decl_file" in die_ptr.attributes: path = get_die_filename(die_ptr, lineprog) # Nothing to map if path is not None: low, high = get_die_mapped_address(die, location_parser, dwarfinfo) if low is None: continue for ums in unmapped_symbols: for one_sym in symbol_dict[ums]: symbol = one_sym["symbol"] symaddr = symbol["st_value"] if symaddr not in mapped_addresses: if low <= symaddr < high: one_sym["mapped_files"].add(path) mapped_addresses.add(symaddr) newly_mapped_syms.add(ums) mapped_symbols = mapped_symbols.union(newly_mapped_syms) unmapped_symbols = unmapped_symbols.difference(newly_mapped_syms) processed["mapped_symbols"] = mapped_symbols processed["mapped_addr"] = mapped_addresses processed["unmapped_symbols"] = unmapped_symbols
def do_simple_name_matching(elf, symbol_dict, processed): """ Sequentially process DIEs in compiler units with direct file mappings within the DIEs themselves, and do simply matching between DIE names and symbol names. """ mapped_symbols = processed["mapped_symbols"] mapped_addresses = processed["mapped_addr"] unmapped_symbols = processed["unmapped_symbols"] newly_mapped_syms = set() dwarfinfo = elf.get_dwarf_info() location_lists = dwarfinfo.location_lists() location_parser = LocationParser(location_lists) unmapped_dies = set() # Loop through all compile units for compile_unit in dwarfinfo.iter_CUs(): lineprog = dwarfinfo.line_program_for_CU(compile_unit) if lineprog is None: continue # Loop through each DIE and find variables and # subprograms (i.e. functions) for die in compile_unit.iter_DIEs(): sym_name = None # Process variables if die.tag == "DW_TAG_variable": # DW_AT_declaration # having "DW_AT_location" means this maps # to an actual address (e.g. not an extern) if "DW_AT_location" in die.attributes: sym_name = die.get_full_path() # Process subprograms (i.e. functions) if they are valid if die.tag == "DW_TAG_subprogram": # Refer to another DIE for name if ("DW_AT_abstract_origin" in die.attributes) or ("DW_AT_specification" in die.attributes): unmapped_dies.add(die) # having "DW_AT_low_pc" means it maps to # an actual address elif "DW_AT_low_pc" in die.attributes: # DW_AT_low_pc == 0 is a weak function # which has been overridden if die.attributes["DW_AT_low_pc"].value != 0: sym_name = die.get_full_path() # For mangled function names, the linkage name # is what appears in the symbol list if "DW_AT_linkage_name" in die.attributes: linkage = die.attributes["DW_AT_linkage_name"] sym_name = linkage.value.decode() if sym_name is not None: # Skip DIE with no reference back to a file if "DW_AT_decl_file" not in die.attributes: continue is_die_mapped = False if sym_name in symbol_dict: mapped_symbols.add(sym_name) symlist = symbol_dict[sym_name] symbol = match_symbol_address(symlist, die, location_parser, dwarfinfo) if symbol is not None: symaddr = symbol["symbol"]["st_value"] if symaddr not in mapped_addresses: is_die_mapped = True path = get_die_filename(die, lineprog) symbol["mapped_files"].add(path) mapped_addresses.add(symaddr) newly_mapped_syms.add(sym_name) if not is_die_mapped: unmapped_dies.add(die) mapped_symbols = mapped_symbols.union(newly_mapped_syms) unmapped_symbols = unmapped_symbols.difference(newly_mapped_syms) processed["mapped_symbols"] = mapped_symbols processed["mapped_addr"] = mapped_addresses processed["unmapped_symbols"] = unmapped_symbols processed["unmapped_dies"] = unmapped_dies
def __init__(self, elf_file_path, inputfile, resultdir): self.elf_file_path = elf_file_path self.result_file_path = self.elf_file_path + ".type" self.inputfile = inputfile self.resultdir = resultdir # To save the basic information. self.base_type_map = {} self.addr2type_map = {} self.CU_TYPE = None # self.compile_unit_base_types = {} # self.compile_unit_const_types = {} # self.compile_unit_pointer_types = {} # self.compile_unit_enumeration_types = {} # self.compile_unit_union_types = {} # self.compile_unit_array_types = {} # self.compile_unit_subrange_types = {} # self.compile_unit_structure_types = {} # self.compile_unit_typedef_types = {} self.functions = [] self.global_var = [] # To support extract the dwarf self.loc_parser = None self.CU = None self.dwarfinfo = None # print('Processing file:', self.elf_file_path) with open(self.elf_file_path, 'rb') as f: elffile = ELFFile(f) if not elffile.has_dwarf_info(): print(' file has no DWARF info') return # get_dwarf_info returns a DWARFInfo context object, which is the # starting point for all DWARF-based processing in pyelftools. self.dwarfinfo = elffile.get_dwarf_info() # The location lists are extracted by DWARFInfo from the .debug_loc # section, and returned here as a LocationLists object. location_lists = self.dwarfinfo.location_lists() # This is required for the descriptions module to correctly decode # register names contained in DWARF expressions. set_global_machine_arch(elffile.get_machine_arch()) # Create a LocationParser object that parses the DIE attributes and # creates objects representing the actual location information. self.loc_parser = LocationParser(location_lists) for CU in self.dwarfinfo.iter_CUs(): # DWARFInfo allows to iterate over the compile units contained in # the .debug_info section. CU is a CompileUnit object, with some # computed attributes (such as its offset in the section) and # a header which conforms to the DWARF standard. The access to # header elements is, as usual, via item-lookup. print(' Found a compile unit at offset %s, length %s' % (CU.cu_offset, CU['unit_length'])) self.CU = CU self.CU_TYPE = get_compile_unit_types(self.CU) # print(self.CU_TYPE) # A CU provides a simple API to iterate over all the DIEs in it. for die in CU.iter_DIEs(): # Go over all attributes of the DIE. Each attribute is an # AttributeValue object (from elftools.dwarf.die), which we # can examine. if die.tag == 'DW_TAG_subprogram': print("") self.process_subprogram(die) elif die.tag == 'DW_TAG_variable' and 'DW_AT_external' in die.attributes: print("") self.process_global_var(die) pincmd = [ '../pin/pin', '-t', '../TaintAnalysisWithPin/obj-ia32/taint.so', '--', elf_file_path ] print(pincmd) result = self.resultdir + '/' + elf_file_path.split( b'/')[-1] + ".out" print(result) try: # trace = subprocess.check_output(pincmd) trace = "" if inputfile != None: print("fead input") process = subprocess.Popen(pincmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE) with open(self.inputfile, 'rb') as inputfile: for line in inputfile.readlines(): print("Give Std Input:%s" % line) process.stdin.write(line) trace = process.communicate()[0] process.stdin.close() else: print("running without input") trace = subprocess.check_output(pincmd) tracelist = loadtrace(trace) extractfromtrace(tracelist, self.global_var, self.functions, result) except subprocess.CalledProcessError as e: print("run pin error(%s)")
class ElfDwarf: """ :elf_file_path(must): the path of elf file :addr2type_map: map address to type :base_type_map: map id to type """ def __init__(self, elf_file_path, inputfile, resultdir): self.elf_file_path = elf_file_path self.result_file_path = self.elf_file_path + ".type" self.inputfile = inputfile self.resultdir = resultdir # To save the basic information. self.base_type_map = {} self.addr2type_map = {} self.CU_TYPE = None # self.compile_unit_base_types = {} # self.compile_unit_const_types = {} # self.compile_unit_pointer_types = {} # self.compile_unit_enumeration_types = {} # self.compile_unit_union_types = {} # self.compile_unit_array_types = {} # self.compile_unit_subrange_types = {} # self.compile_unit_structure_types = {} # self.compile_unit_typedef_types = {} self.functions = [] self.global_var = [] # To support extract the dwarf self.loc_parser = None self.CU = None self.dwarfinfo = None # print('Processing file:', self.elf_file_path) with open(self.elf_file_path, 'rb') as f: elffile = ELFFile(f) if not elffile.has_dwarf_info(): print(' file has no DWARF info') return # get_dwarf_info returns a DWARFInfo context object, which is the # starting point for all DWARF-based processing in pyelftools. self.dwarfinfo = elffile.get_dwarf_info() # The location lists are extracted by DWARFInfo from the .debug_loc # section, and returned here as a LocationLists object. location_lists = self.dwarfinfo.location_lists() # This is required for the descriptions module to correctly decode # register names contained in DWARF expressions. set_global_machine_arch(elffile.get_machine_arch()) # Create a LocationParser object that parses the DIE attributes and # creates objects representing the actual location information. self.loc_parser = LocationParser(location_lists) for CU in self.dwarfinfo.iter_CUs(): # DWARFInfo allows to iterate over the compile units contained in # the .debug_info section. CU is a CompileUnit object, with some # computed attributes (such as its offset in the section) and # a header which conforms to the DWARF standard. The access to # header elements is, as usual, via item-lookup. print(' Found a compile unit at offset %s, length %s' % (CU.cu_offset, CU['unit_length'])) self.CU = CU self.CU_TYPE = get_compile_unit_types(self.CU) # print(self.CU_TYPE) # A CU provides a simple API to iterate over all the DIEs in it. for die in CU.iter_DIEs(): # Go over all attributes of the DIE. Each attribute is an # AttributeValue object (from elftools.dwarf.die), which we # can examine. if die.tag == 'DW_TAG_subprogram': print("") self.process_subprogram(die) elif die.tag == 'DW_TAG_variable' and 'DW_AT_external' in die.attributes: print("") self.process_global_var(die) pincmd = [ '../pin/pin', '-t', '../TaintAnalysisWithPin/obj-ia32/taint.so', '--', elf_file_path ] print(pincmd) result = self.resultdir + '/' + elf_file_path.split( b'/')[-1] + ".out" print(result) try: # trace = subprocess.check_output(pincmd) trace = "" if inputfile != None: print("fead input") process = subprocess.Popen(pincmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE) with open(self.inputfile, 'rb') as inputfile: for line in inputfile.readlines(): print("Give Std Input:%s" % line) process.stdin.write(line) trace = process.communicate()[0] process.stdin.close() else: print("running without input") trace = subprocess.check_output(pincmd) tracelist = loadtrace(trace) extractfromtrace(tracelist, self.global_var, self.functions, result) except subprocess.CalledProcessError as e: print("run pin error(%s)") # return None def show_loclist(self, loclist, dwarfinfo, indent): """ Display a location list nicely, decoding the DWARF expressions contained within. """ d = [] for loc_entity in loclist: if isinstance(loc_entity, LocationEntry): d.append('%s <<%s>>' % (loc_entity, describe_DWARF_expr(loc_entity.loc_expr, dwarfinfo.structs))) else: d.append(str(loc_entity)) return '\n'.join(indent + s for s in d) def process_global_var(self, DIE): self.global_var.append({}) try: self.global_var[-1]["name"] = DIE.attributes['DW_AT_name'].value except KeyError: #print "DIE has no attribute 'DW_AT_name'" self.global_var[-1]["name"] = None variable_size, variable_type_name = get_variable_size_and_name( DIE, self.CU, self.CU_TYPE) # print(" name:%s, size:%d, type_name:%s" % (self.global_var[-1]["name"],variable_size, variable_type_name)) if variable_size != None and variable_type_name != None: self.global_var[-1]["size"] = variable_size self.global_var[-1]["type_name"] = variable_type_name else: self.global_var.pop() return for attr in itervalues(DIE.attributes): # Check if this attribute contains location information # pdb.set_trace() if self.loc_parser.attribute_has_location(attr, self.CU['version']): # print(' DIE %s. attr %s.' % (DIE.tag, attr.name)) loc = self.loc_parser.parse_from_attribute( attr, self.CU['version']) # We either get a list (in case the attribute is a # reference to the .debug_loc section) or a LocationExpr # object (in case the attribute itself contains location # information). if isinstance(loc, LocationExpr): dwarf_expr_dumper = extract_DWARF_expr( loc.loc_expr, self.dwarfinfo.structs) exp_info = dwarf_expr_dumper._str_parts for item in exp_info: baseregister = item[0:item.find(':')] offset = int(item[item.find(':') + 2:], 16) print("%s:%s:%d:%s" % (self.global_var[-1]["name"], baseregister, offset, self.global_var[-1]["type_name"])) self.global_var[-1]["offset"] = offset self.global_var[-1]["breg"] = baseregister return self.global_var.pop() return def process_subprogram(self, subprogram_die): # Print name, start_address and DW_AT_frame_base of the current function # print(subprogram_die) self.functions.append({}) if 'DW_AT_name' in subprogram_die.attributes: self.functions[-1]["name"] = subprogram_die.attributes[ 'DW_AT_name'].value else: print("Does not find function name") self.functions[-1]["name"] = None print("function name") print(self.functions[-1]["name"]) # try: # dw_at_frame_base = subprogram_die.attributes['DW_AT_frame_base'] # except: # # I am not sure if every subprogram has a DW_AT_frame_base # print(subprogram_die) # print("subprogram [%s] has no a DW_AT_frame_base (and thus no stack variables (?)). Skipping." % self.functions[-1]['name']) # self.functions.pop() # return if subprogram_die.has_children: #print "subprogram [%s] has children!" % self.functions[-1]['name'] self.functions[-1]["stack_variables"] = [] # Print names of all variables that are children of the current DIE (the current function) for child in subprogram_die.iter_children(): if child.tag == 'DW_TAG_variable' or child.tag == 'DW_TAG_formal_parameter': self.process_subprogram_variable(child) def process_subprogram_variable(self, DIE): if self.functions[-1].get("stack_variables") is None: return self.functions[-1]["stack_variables"].append({}) try: self.functions[-1]["stack_variables"][-1]["name"] = DIE.attributes[ 'DW_AT_name'].value except KeyError: #print "subprogram_variable_die has no attribute 'DW_AT_name'" self.functions[-1]["stack_variables"][-1]["name"] = None variable_size, variable_type_name = get_variable_size_and_name( DIE, self.CU, self.CU_TYPE) if variable_size != None and variable_type_name != None: self.functions[-1]["stack_variables"][-1]["size"] = variable_size self.functions[-1]["stack_variables"][-1][ "type_name"] = variable_type_name else: self.functions[-1]["stack_variables"].pop() return for attr in itervalues(DIE.attributes): # Check if this attribute contains location information # pdb.set_trace() if self.loc_parser.attribute_has_location(attr, self.CU['version']): # print(' DIE %s. attr %s.' % (DIE.tag, attr.name)) loc = self.loc_parser.parse_from_attribute( attr, self.CU['version']) # We either get a list (in case the attribute is a # reference to the .debug_loc section) or a LocationExpr # object (in case the attribute itself contains location # information). if isinstance(loc, LocationExpr): dwarf_expr_dumper = extract_DWARF_expr( loc.loc_expr, self.dwarfinfo.structs) exp_info = dwarf_expr_dumper._str_parts for item in exp_info: print(item) index = item.find(':') if index == -1: continue baseregister = item[0:item.find(':')] if baseregister == "DW_OP_addr": offset = int(item[item.find(':') + 1:], 16) print( "%s:%s:%s:%d:%s" % (self.functions[-1]["name"], self.functions[-1] ["stack_variables"][-1]["name"], baseregister, offset, self.functions[-1]["stack_variables"] [-1]["type_name"])) self.functions[-1]["stack_variables"][-1][ "offset"] = offset self.functions[-1]["stack_variables"][-1][ "breg"] = baseregister else: offset = int(item[item.find(':') + 1:]) print( "%s:%s:%s:%d:%s" % (self.functions[-1]["name"], self.functions[-1] ["stack_variables"][-1]["name"], baseregister, offset, self.functions[-1]["stack_variables"] [-1]["type_name"])) self.functions[-1]["stack_variables"][-1][ "offset"] = offset self.functions[-1]["stack_variables"][-1][ "breg"] = baseregister if "breg" not in self.functions[-1]["stack_variables"][-1]: self.functions[-1]["stack_variables"].pop()