def die_info_rec(die, func_map, global_map, type_map, struct_map, variables, global_access_map): """ A recursive function for showing information about a DIE and its children. """ name = '' if die.tag == "DW_TAG_subprogram": variables = {} #print (die.attributes) for attr in itervalues(die.attributes): if attr.name == 'DW_AT_name': name = bytes2str(attr.value) elif die.tag == "DW_TAG_variable" or die.tag == "DW_TAG_formal_parameter": global_flag = 0 #var_name, offset, line, type_val = '',0,0,'' offset_var = '' for attr in itervalues(die.attributes): if attr.name == 'DW_AT_name': var_name = bytes2str(attr.value) elif attr.name == 'DW_AT_location': val = _location_list_extra(attr, die, ' ') #offset = int(val[val.index(':')+1:].strip()[:-1],16) offset_var = val[val.find(':')+1:val.find(')')].strip() elif attr.name == 'DW_AT_decl_line': line = attr.value elif attr.name == 'DW_AT_type': type_val = (attr.value + die.cu.cu_offset) elif attr.name == 'DW_AT_external': global_flag = 1 if type_val in struct_map.keys(): struct_var_name = var_name members = struct_map[type_val] for member in members.keys(): var_name = struct_var_name + "." + struct_map[type_val][member][0] struct_offset = struct_map[type_val][member][1] addVariableInMap(global_flag, global_map, variables, offset_var, var_name, type_val, line, global_access_map, struct_offset) else: addVariableInMap(global_flag, global_map, variables, offset_var, var_name, type_val, line, global_access_map, 0) elif die.tag == 'DW_TAG_base_type': #type_name, size = '',0 for attr in itervalues(die.attributes): if attr.name == "DW_AT_name": type_name = bytes2str(attr.value) elif attr.name == 'DW_AT_byte_size': size = attr.value type_map[die.offset] = (type_name, size) for child in die.iter_children(): die_info_rec(child, func_map, global_map, type_map, struct_map, variables, global_access_map) if die.tag == "DW_TAG_subprogram": #print (die.attributes) func_map[name] = variables
def get_func_with_params(self): """ Dump the debugging info section. """ self._init_dwarfinfo() if self._dwarfinfo is None: return section_offset = self._dwarfinfo.debug_info_sec.global_offset # Offset of the .debug_info section in the stream for cu in self._dwarfinfo.iter_CUs(): funcDic = {} paraCount = 0 funcName="" funcAddr="" for die in cu.iter_DIEs(): if die.abbrev_code == 0 and funcName != "": funcDic[(funcName,funcAddr)] = [paraCount] funcName = "" if die.tag == 'DW_TAG_subprogram': paraCount=0 for attr in itervalues(die.attributes): if 'DW_AT_low_pc' in attr.name: funcAddr=describe_attr_value(attr, die, section_offset).split()[0] if 'DW_AT_name' in attr.name: funcName=describe_attr_value(attr, die, section_offset).split()[-1] elif die.tag == 'DW_TAG_formal_parameter': paraCount+=1 #funcDic.reverse() return funcDic
def get_func_with_params(self): """ Dump the debugging info section. """ self._init_dwarfinfo() if self._dwarfinfo is None: return section_offset = self._dwarfinfo.debug_info_sec.global_offset # Offset of the .debug_info section in the stream for cu in self._dwarfinfo.iter_CUs(): funcDic = {} paraCount = 0 funcName = "" funcAddr = "" for die in cu.iter_DIEs(): if die.abbrev_code == 0 and funcName != "": funcDic[(funcName, funcAddr)] = [paraCount] funcName = "" if die.tag == 'DW_TAG_subprogram': paraCount = 0 for attr in itervalues(die.attributes): if 'DW_AT_low_pc' in attr.name: funcAddr = describe_attr_value( attr, die, section_offset).split()[0] if 'DW_AT_name' in attr.name: funcName = describe_attr_value( attr, die, section_offset).split()[-1] elif die.tag == 'DW_TAG_formal_parameter': paraCount += 1 #funcDic.reverse() return funcDic
def parse_dwarf_info(self): """ build dwarf info data structure :return: OrderedDict """ if self.dwarf_info is None: self.dwarf_info = OrderedDict() logging.debug('Parsing DWARF Info...') dwarf_info = self._elf.get_dwarf_info() if not dwarf_info.has_debug_info: raise ValueError( "Debug information not available in ELF file. \ Symbol table will be empty") for cu in dwarf_info.iter_CUs(): die_depth = 0 for die in cu.iter_DIEs(): if die.is_null(): die_depth -= 1 continue # abbreviation property of interest abbreviation = OrderedDict() abbreviation["depth"] = die_depth abbreviation["offset"] = die.offset abbreviation["code"] = die.abbrev_code abbreviation["tag"] = die.tag if not die.is_null() else "" abbreviation["attr"] = [] abbreviation_log_string = " <{0}><{1}>: Abbrev Number: {2} ({3})".format( die_depth, hex(die.offset), die.abbrev_code, die.tag) logging.debug(abbreviation_log_string) for attr in itervalues(die.attributes): description = self._get_attribute_description( attr, die) if description is not None: attr_dict = OrderedDict() attr_dict["offset"] = attr.offset attr_dict["name"] = attr.name attr_dict["desc"] = description abbreviation["attr"].append(attr_dict) log_description = hex(description) if isinstance( description, int) else description attribute_log_string = " <{0}> {1}: {2}".format( hex(attr.offset), attr.name, log_description) logging.debug(attribute_log_string) if abbreviation["attr"]: self.dwarf_info[die.offset] = abbreviation if die.has_children: die_depth += 1 return self.dwarf_info
def process_file(filename): print('Processing file:', filename) with open(filename, 'rb') as f: elffile = ELFFile(f) if not elffile.has_dwarf_info(): print(' file has no DWARF info') return # get_dwarf_info returns a DWARFInfo context object, which is the # starting point for all DWARF-based processing in pyelftools. dwarfinfo = elffile.get_dwarf_info() # The location lists are extracted by DWARFInfo from the .debug_loc # section, and returned here as a LocationLists object. location_lists = dwarfinfo.location_lists() # This is required for the descriptions module to correctly decode # register names contained in DWARF expressions. set_global_machine_arch(elffile.get_machine_arch()) # Create a LocationParser object that parses the DIE attributes and # creates objects representing the actual location information. loc_parser = LocationParser(location_lists) for CU in dwarfinfo.iter_CUs(): # DWARFInfo allows to iterate over the compile units contained in # the .debug_info section. CU is a CompileUnit object, with some # computed attributes (such as its offset in the section) and # a header which conforms to the DWARF standard. The access to # header elements is, as usual, via item-lookup. print(' Found a compile unit at offset %s, length %s' % (CU.cu_offset, CU['unit_length'])) # A CU provides a simple API to iterate over all the DIEs in it. for DIE in CU.iter_DIEs(): # Go over all attributes of the DIE. Each attribute is an # AttributeValue object (from elftools.dwarf.die), which we # can examine. for attr in itervalues(DIE.attributes): # Check if this attribute contains location information # pdb.set_trace() if loc_parser.attribute_has_location(attr, CU['version']): var_name = DIE.attributes['DW_AT_name'].value print(' Varname:%s' % (var_name)) print(' DIE %s. attr %s.' % (DIE.tag, attr.name)) loc = loc_parser.parse_from_attribute( attr, CU['version']) # We either get a list (in case the attribute is a # reference to the .debug_loc section) or a LocationExpr # object (in case the attribute itself contains location # information). if isinstance(loc, LocationExpr): print(' %s' % (describe_DWARF_expr( loc.loc_expr, dwarfinfo.structs))) elif isinstance(loc, list): print(show_loclist(loc, dwarfinfo, indent=' '))
def _print_die(die, section_offset): DEBUG("Processing DIE: {}".format(str(die))) for attr in itervalues(die.attributes): if attr.name == 'DW_AT_name' : variable_name = attr.value name = attr.name if isinstance(name, int): name = 'Unknown AT value: %x' % name DEBUG(' <%x> %-18s: %s' % (attr.offset, name, describe_attr_value(attr, die, section_offset)))
def _print_die(die, section_offset): DEBUG("Processing DIE: {}".format(str(die))) for attr in itervalues(die.attributes): if attr.name == 'DW_AT_name': variable_name = attr.value name = attr.name if isinstance(name, int): name = 'Unknown AT value: %x' % name DEBUG(' <%x> %-18s: %s' % (attr.offset, name, describe_attr_value(attr, die, section_offset)))
def die_info_rec_struct(die, struct_map, members, global_access_map): if die.tag == "DW_TAG_structure_type": members = {} #struct_name = '' for attr in itervalues(die.attributes): if attr.name == 'DW_AT_name': struct_name = bytes2str(attr.value) elif die.tag == "DW_TAG_member": #var_name, loc = '',0 for attr in itervalues(die.attributes): if attr.name == 'DW_AT_name': var_name = bytes2str(attr.value) if attr.name == 'DW_AT_data_member_location': loc = attr.value members[var_name] = (var_name, loc) for child in die.iter_children(): die_info_rec_struct(child, struct_map, members, global_access_map) if die.tag == "DW_TAG_structure_type": struct_map[die.offset] = members
def _dump_debug_info(self): """ Dump the debugging info section. """ self._emitline('Contents of the .debug_info section:\n') # Offset of the .debug_info section in the stream section_offset = self._dwarfinfo.debug_info_sec.global_offset for cu in self._dwarfinfo.iter_CUs(): self._emitline(' Compilation Unit @ offset %s:' % self._format_hex(cu.cu_offset)) self._emitline(' Length: %s (%s)' % ( self._format_hex(cu['unit_length']), '%s-bit' % cu.dwarf_format())) self._emitline(' Version: %s' % cu['version']), self._emitline(' Abbrev Offset: %s' % ( self._format_hex(cu['debug_abbrev_offset']))), self._emitline(' Pointer Size: %s' % cu['address_size']) # The nesting depth of each DIE within the tree of DIEs must be # displayed. To implement this, a counter is incremented each time # the current DIE has children, and decremented when a null die is # encountered. Due to the way the DIE tree is serialized, this will # correctly reflect the nesting depth # die_depth = 0 for die in cu.iter_DIEs(): self._emitline(' <%s><%x>: Abbrev Number: %s%s' % ( die_depth, die.offset, die.abbrev_code, (' (%s)' % die.tag) if not die.is_null() else '')) if die.is_null(): die_depth -= 1 continue for attr in itervalues(die.attributes): name = attr.name # Unknown attribute values are passed-through as integers if isinstance(name, int): name = 'Unknown AT value: %x' % name self._emitline(' <%2x> %-18s: %s' % ( attr.offset, name, describe_attr_value( attr, die, section_offset))) if die.has_children: die_depth += 1 self._emitline()
def compile(self): """Compile the vtypes from the dwarf information.""" # We currently dump all compilation units into the same # vtype. Technically the same symbol can be defined differently in # different compilation units, but rekall does not have CU # resolution right now so we assume they are all the same. parents = [] section_offset = self._dwarfinfo.debug_info_sec.global_offset for cu in self._dwarfinfo.iter_CUs(): parents.append(cu) die_depth = 0 for die in cu.iter_DIEs(): self.logging.debug( "%d %s<%x>: %s" % (die_depth, "\t" * die_depth, die.offset, ("%s" % die.tag) if not die.is_null() else "") ) if die.is_null(): die_depth -= 1 parents = parents[:-1] continue for attr in itervalues(die.attributes): name = attr.name # Unknown attribute values are passed-through as integers if isinstance(name, int): name = "Unknown AT value: %x" % name if self.logging.isEnabledFor(logging.DEBUG): try: self.logging.debug( "%d %s <%2x> %-18s: %s" % ( die_depth, "\t" * die_depth, attr.offset, name, describe_attr_value(attr, die, section_offset), ) ) except Exception: pass # Record the type in this DIE. t = self.types[die.offset] = DIEFactory(die, self.types, parents) if die.has_children: parents.append(t) die_depth += 1
def get_func_bounds(filename, function_name): with open(filename, 'rb') as f: elffile = ELFFile(f) if not elffile.has_dwarf_info(): print(' file has no DWARF info') return # get_dwarf_info returns a DWARFInfo context object, which is the # starting point for all DWARF-based processing in pyelftools. dwarfinfo = elffile.get_dwarf_info() # The location lists are extracted by DWARFInfo from the .debug_loc # section, and returned here as a LocationLists object. location_lists = dwarfinfo.location_lists() # This is required for the descriptions module to correctly decode # register names contained in DWARF expressions. set_global_machine_arch(elffile.get_machine_arch()) # Create a LocationParser object that parses the DIE attributes and # creates objects representing the actual location information. loc_parser = LocationParser(location_lists) for CU in dwarfinfo.iter_CUs(): # DWARFInfo allows to iterate over the compile units contained in # the .debug_info section. CU is a CompileUnit object, with some # computed attributes (such as its offset in the section) and # a header which conforms to the DWARF standard. The access to # header elements is, as usual, via item-lookup. # A CU provides a simple API to iterate over all the DIEs in it. for DIE in CU.iter_DIEs(): # Find the function if DIE.tag == "DW_TAG_subprogram": fname = "" high_addr = 0 low_addr = 0 c = False for attr in itervalues(DIE.attributes): if attr.name == "DW_AT_name": fname = attr.value if attr.name == "DW_AT_low_pc": low_addr = attr.value if attr.name == "DW_AT_high_pc": high_addr = attr.value if high_addr < low_addr: high_addr = low_addr + high_addr if fname == function_name: return (low_addr, high_addr)
def compile(self): """Compile the vtypes from the dwarf information.""" # We currently dump all compilation units into the same # vtype. Technically the same symbol can be defined differently in # different compilation units, but rekall does not have CU # resolution right now so we assume they are all the same. parents = [] section_offset = self._dwarfinfo.debug_info_sec.global_offset for cu in self._dwarfinfo.iter_CUs(): parents.append(cu) die_depth = 0 for die in cu.iter_DIEs(): self.logging.debug('%d %s<%x>: %s' % ( die_depth, "\t" * die_depth, die.offset, ('%s' % die.tag) if not die.is_null() else '')) if die.is_null(): die_depth -= 1 parents = parents[:-1] continue for attr in itervalues(die.attributes): name = attr.name # Unknown attribute values are passed-through as integers if isinstance(name, int): name = 'Unknown AT value: %x' % name if self.logging.isEnabledFor(logging.DEBUG): try: self.logging.debug('%d %s <%2x> %-18s: %s' % ( die_depth, "\t" * die_depth, attr.offset, name, describe_attr_value( attr, die, section_offset))) except Exception: pass # Record the type in this DIE. t = self.types[die.offset] = DIEFactory( die, self.types, parents) if die.has_children: parents.append(t) die_depth += 1
def process_file(filename): print('Processing file:', filename) with open(filename, 'rb') as f: elffile = ELFFile(f) if not elffile.has_dwarf_info(): print(' file has no DWARF info') return # get_dwarf_info returns a DWARFInfo context object, which is the # starting point for all DWARF-based processing in pyelftools. dwarfinfo = elffile.get_dwarf_info() # The location lists are extracted by DWARFInfo from the .debug_loc # section, and returned here as a LocationLists object. location_lists = dwarfinfo.location_lists() # This is required for the descriptions module to correctly decode # register names contained in DWARF expressions. set_global_machine_arch(elffile.get_machine_arch()) for CU in dwarfinfo.iter_CUs(): # DWARFInfo allows to iterate over the compile units contained in # the .debug_info section. CU is a CompileUnit object, with some # computed attributes (such as its offset in the section) and # a header which conforms to the DWARF standard. The access to # header elements is, as usual, via item-lookup. print(' Found a compile unit at offset %s, length %s' % ( CU.cu_offset, CU['unit_length'])) # A CU provides a simple API to iterate over all the DIEs in it. for DIE in CU.iter_DIEs(): # Go over all attributes of the DIE. Each attribute is an # AttributeValue object (from elftools.dwarf.die), which we # can examine. for attr in itervalues(DIE.attributes): if attribute_has_location_list(attr): # This is a location list. Its value is an offset into # the .debug_loc section, so we can use the location # lists object to decode it. loclist = location_lists.get_location_list_at_offset( attr.value) print(' DIE %s. attr %s.\n%s' % ( DIE.tag, attr.name, show_loclist(loclist, dwarfinfo, indent=' ')))
def process_global_var(self, DIE): self.global_var.append({}) try: self.global_var[-1]["name"] = DIE.attributes['DW_AT_name'].value except KeyError: #print "DIE has no attribute 'DW_AT_name'" self.global_var[-1]["name"] = None variable_size, variable_type_name = get_variable_size_and_name( DIE, self.CU, self.CU_TYPE) # print(" name:%s, size:%d, type_name:%s" % (self.global_var[-1]["name"],variable_size, variable_type_name)) if variable_size != None and variable_type_name != None: self.global_var[-1]["size"] = variable_size self.global_var[-1]["type_name"] = variable_type_name else: self.global_var.pop() return for attr in itervalues(DIE.attributes): # Check if this attribute contains location information # pdb.set_trace() if self.loc_parser.attribute_has_location(attr, self.CU['version']): # print(' DIE %s. attr %s.' % (DIE.tag, attr.name)) loc = self.loc_parser.parse_from_attribute( attr, self.CU['version']) # We either get a list (in case the attribute is a # reference to the .debug_loc section) or a LocationExpr # object (in case the attribute itself contains location # information). if isinstance(loc, LocationExpr): dwarf_expr_dumper = extract_DWARF_expr( loc.loc_expr, self.dwarfinfo.structs) exp_info = dwarf_expr_dumper._str_parts for item in exp_info: baseregister = item[0:item.find(':')] offset = int(item[item.find(':') + 2:], 16) print("%s:%s:%d:%s" % (self.global_var[-1]["name"], baseregister, offset, self.global_var[-1]["type_name"])) self.global_var[-1]["offset"] = offset self.global_var[-1]["breg"] = baseregister return self.global_var.pop() return
def process_file(filename): print('Processing file:', filename) with open(filename, 'rb') as f: elffile = ELFFile(f) if not elffile.has_dwarf_info(): print(' file has no DWARF info') return # get_dwarf_info returns a DWARFInfo context object, which is the # starting point for all DWARF-based processing in pyelftools. dwarfinfo = elffile.get_dwarf_info() # The range lists are extracted by DWARFInfo from the .debug_ranges # section, and returned here as a RangeLists object. range_lists = dwarfinfo.range_lists() if range_lists is None: print(' file has no .debug_ranges section') return for CU in dwarfinfo.iter_CUs(): # DWARFInfo allows to iterate over the compile units contained in # the .debug_info section. CU is a CompileUnit object, with some # computed attributes (such as its offset in the section) and # a header which conforms to the DWARF standard. The access to # header elements is, as usual, via item-lookup. print(' Found a compile unit at offset {0!s}, length {1!s}'.format( CU.cu_offset, CU['unit_length'])) # A CU provides a simple API to iterate over all the DIEs in it. for DIE in CU.iter_DIEs(): # Go over all attributes of the DIE. Each attribute is an # AttributeValue object (from elftools.dwarf.die), which we # can examine. for attr in itervalues(DIE.attributes): if attribute_has_range_list(attr): # This is a range list. Its value is an offset into # the .debug_ranges section, so we can use the range # lists object to decode it. rangelist = range_lists.get_range_list_at_offset( attr.value) print(' DIE {0!s}. attr {1!s}.\n{2!s}'.format( DIE.tag, attr.name, rangelist))
def process_file(filename): print('Processing file:', filename) with open(filename, 'rb') as f: elffile = ELFFile(f) if not elffile.has_dwarf_info(): print(' file has no DWARF info') return # get_dwarf_info returns a DWARFInfo context object, which is the # starting point for all DWARF-based processing in pyelftools. dwarfinfo = elffile.get_dwarf_info() # The range lists are extracted by DWARFInfo from the .debug_ranges # section, and returned here as a RangeLists object. range_lists = dwarfinfo.range_lists() if range_lists is None: print(' file has no .debug_ranges section') return for CU in dwarfinfo.iter_CUs(): # DWARFInfo allows to iterate over the compile units contained in # the .debug_info section. CU is a CompileUnit object, with some # computed attributes (such as its offset in the section) and # a header which conforms to the DWARF standard. The access to # header elements is, as usual, via item-lookup. print(' Found a compile unit at offset %s, length %s' % ( CU.cu_offset, CU['unit_length'])) # A CU provides a simple API to iterate over all the DIEs in it. for DIE in CU.iter_DIEs(): # Go over all attributes of the DIE. Each attribute is an # AttributeValue object (from elftools.dwarf.die), which we # can examine. for attr in itervalues(DIE.attributes): if attribute_has_range_list(attr): # This is a range list. Its value is an offset into # the .debug_ranges section, so we can use the range # lists object to decode it. rangelist = range_lists.get_range_list_at_offset( attr.value) print(' DIE %s. attr %s.\n%s' % ( DIE.tag, attr.name, rangelist))
def _dump_debug_info(self): """ Dump the debugging info section. """ self._emitline('Contents of the .debug_info section:\n') # Offset of the .debug_info section in the stream section_offset = self._dwarfinfo.debug_info_sec.global_offset for cu in self._dwarfinfo.iter_CUs(): self._emitline(' Compilation Unit @ offset %s:' % self._format_hex(cu.cu_offset)) self._emitline(' Length: %s (%s)' % (self._format_hex( cu['unit_length']), '%s-bit' % cu.dwarf_format())) self._emitline(' Version: %s' % cu['version']), self._emitline(' Abbrev Offset: %s' % cu['debug_abbrev_offset']), self._emitline(' Pointer Size: %s' % cu['address_size']) # The nesting depth of each DIE within the tree of DIEs must be # displayed. To implement this, a counter is incremented each time # the current DIE has children, and decremented when a null die is # encountered. Due to the way the DIE tree is serialized, this will # correctly reflect the nesting depth # die_depth = 0 for die in cu.iter_DIEs(): if die.is_null(): die_depth -= 1 continue self._emitline( ' <%s><%x>: Abbrev Number: %s (%s)' % (die_depth, die.offset, die.abbrev_code, die.tag)) for attr in itervalues(die.attributes): name = attr.name # Unknown attribute values are passed-through as integers if isinstance(name, int): name = 'Unknown AT value: %x' % name self._emitline( ' <%2x> %-18s: %s' % (attr.offset, name, describe_attr_value(attr, die, section_offset))) if die.has_children: die_depth += 1 self._emitline()
def get_executable_src_files(exec_path): assert (os.path.isabs(exec_path)) exec_src_paths = [] with open(exec_path, 'rb') as elf_file_handle: try: elf_file = ELFFile(elf_file_handle) except: print('-- Executable \'' + exec_path + '\' is not an ELF file') return [] if not elf_file.has_dwarf_info(): print('-- Executable \'' + exec_path + '\' has no DWARF information') return [] dwarf_info = elf_file.get_dwarf_info() for CU in dwarf_info.iter_CUs(): DIE = CU.get_top_DIE() name = '' comp_dir = '' for attr in itervalues(DIE.attributes): if attr.name == 'DW_AT_name': name = attr.value if attr.name == 'DW_AT_comp_dir': comp_dir = attr.value # If the source path in the executable is not an absolute # path then use the DW_AT_comp_dir attribute to get the # build directory to make it absolute # # Once we have an absolute path, use realpath to resolve any # symbolic links src_path = name if not os.path.isabs(name): assert (comp_dir != '') src_path = os.path.join(comp_dir, name) assert (os.path.isabs(src_path)) src_path = os.path.realpath(src_path) exec_src_paths.append(src_path.decode()) return exec_src_paths
def get_executable_src_files(exec_path): assert (os.path.isabs(exec_path)) exec_src_paths = [] with open(exec_path, 'rb') as elf_file_handle: try: elf_file = ELFFile(elf_file_handle) except: print ('-- Executable \'' + exec_path + '\' is not an ELF file') return [] if not elf_file.has_dwarf_info(): print ('-- Executable \'' + exec_path + '\' has no DWARF information') return [] dwarf_info = elf_file.get_dwarf_info() for CU in dwarf_info.iter_CUs(): DIE = CU.get_top_DIE() name = '' comp_dir = '' for attr in itervalues(DIE.attributes): if attr.name == 'DW_AT_name': name = attr.value if attr.name == 'DW_AT_comp_dir': comp_dir = attr.value # If the source path in the executable is not an absolute # path then use the DW_AT_comp_dir attribute to get the # build directory to make it absolute # # Once we have an absolute path, use realpath to resolve any # symbolic links src_path = name if not os.path.isabs(name): assert (comp_dir != '') src_path = os.path.join(comp_dir, name) assert(os.path.isabs(src_path)) src_path = os.path.realpath(src_path) exec_src_paths.append(src_path.decode()) return exec_src_paths
def getAttr(DIE): res = OrderedDict() res['offset'] = DIE.offset for attr in itervalues(DIE.attributes): if attr.name == 'DW_AT_name': res['name'] = attr.value.decode() if attr.name == 'DW_AT_type': res['type_num'] = attr.value # if attr.name == 'DW_AT_decl_file': # res['file']=attr.value # if attr.name == 'DW_AT_decl_line': # res['line']=attr.value if attr.name == 'DW_AT_data_member_location': res['loc'] = attr.value if attr.name == 'DW_AT_byte_size': res['bytes'] = attr.value if attr.name == 'DW_TAG_array_type': res['array'] = attr.value if attr.name == 'DIE DW_TAG_pointer_type': res['ptr'] = True return res
def _attr_to_dict(self, die): attrs_raw = { attr[0]: attr for attr in [( attr.name, attr.offset, describe_attr_value(attr, die, self.section_offset).strip()) for attr in itervalues(die.attributes)] } attrs = objdict() attrs.tag = die.tag for i in attrs_raw: attrs[i] = attrs_raw[i][2].strip() if self.DW_AT_TYPE in attrs_raw: typestr = attrs_raw[self.DW_AT_TYPE][2].strip() typeint = int(typestr[1:len(typestr) - 1], 16) attrs.DW_AT_type = typeint if self.DW_AT_NAME in attrs and attrs.DW_AT_name.startswith( "(indirect string, offset:"): attrs.DW_AT_name = attrs.DW_AT_name.split(":")[2].strip() attrs.raw_die = die attrs.offset = die.offset return attrs
def process_subprogram_variable(self, DIE): if self.functions[-1].get("stack_variables") is None: return self.functions[-1]["stack_variables"].append({}) try: self.functions[-1]["stack_variables"][-1]["name"] = DIE.attributes[ 'DW_AT_name'].value except KeyError: #print "subprogram_variable_die has no attribute 'DW_AT_name'" self.functions[-1]["stack_variables"][-1]["name"] = None variable_size, variable_type_name = get_variable_size_and_name( DIE, self.CU, self.CU_TYPE) if variable_size != None and variable_type_name != None: self.functions[-1]["stack_variables"][-1]["size"] = variable_size self.functions[-1]["stack_variables"][-1][ "type_name"] = variable_type_name else: self.functions[-1]["stack_variables"].pop() return for attr in itervalues(DIE.attributes): # Check if this attribute contains location information # pdb.set_trace() if self.loc_parser.attribute_has_location(attr, self.CU['version']): # print(' DIE %s. attr %s.' % (DIE.tag, attr.name)) loc = self.loc_parser.parse_from_attribute( attr, self.CU['version']) # We either get a list (in case the attribute is a # reference to the .debug_loc section) or a LocationExpr # object (in case the attribute itself contains location # information). if isinstance(loc, LocationExpr): dwarf_expr_dumper = extract_DWARF_expr( loc.loc_expr, self.dwarfinfo.structs) exp_info = dwarf_expr_dumper._str_parts for item in exp_info: print(item) index = item.find(':') if index == -1: continue baseregister = item[0:item.find(':')] if baseregister == "DW_OP_addr": offset = int(item[item.find(':') + 1:], 16) print( "%s:%s:%s:%d:%s" % (self.functions[-1]["name"], self.functions[-1] ["stack_variables"][-1]["name"], baseregister, offset, self.functions[-1]["stack_variables"] [-1]["type_name"])) self.functions[-1]["stack_variables"][-1][ "offset"] = offset self.functions[-1]["stack_variables"][-1][ "breg"] = baseregister else: offset = int(item[item.find(':') + 1:]) print( "%s:%s:%s:%d:%s" % (self.functions[-1]["name"], self.functions[-1] ["stack_variables"][-1]["name"], baseregister, offset, self.functions[-1]["stack_variables"] [-1]["type_name"])) self.functions[-1]["stack_variables"][-1][ "offset"] = offset self.functions[-1]["stack_variables"][-1][ "breg"] = baseregister if "breg" not in self.functions[-1]["stack_variables"][-1]: self.functions[-1]["stack_variables"].pop()
def test_iterdict(self): d = {1: 'foo', 2: 'bar'} self.assertEqual(list(sorted(iterkeys(d))), [1, 2]) self.assertEqual(list(sorted(itervalues(d))), ['bar', 'foo']) self.assertEqual(list(sorted(iteritems(d))), [(1, 'foo'), (2, 'bar')])
def get_var_offset(filename, function_name, var_name): with open(filename, 'rb') as f: elffile = ELFFile(f) if not elffile.has_dwarf_info(): print(' file has no DWARF info') return # get_dwarf_info returns a DWARFInfo context object, which is the # starting point for all DWARF-based processing in pyelftools. dwarfinfo = elffile.get_dwarf_info() # The location lists are extracted by DWARFInfo from the .debug_loc # section, and returned here as a LocationLists object. location_lists = dwarfinfo.location_lists() # This is required for the descriptions module to correctly decode # register names contained in DWARF expressions. set_global_machine_arch(elffile.get_machine_arch()) # Create a LocationParser object that parses the DIE attributes and # creates objects representing the actual location information. loc_parser = LocationParser(location_lists) for CU in dwarfinfo.iter_CUs(): # DWARFInfo allows to iterate over the compile units contained in # the .debug_info section. CU is a CompileUnit object, with some # computed attributes (such as its offset in the section) and # a header which conforms to the DWARF standard. The access to # header elements is, as usual, via item-lookup. # A CU provides a simple API to iterate over all the DIEs in it. for DIE in CU.iter_DIEs(): # Find the function if DIE.tag == "DW_TAG_subprogram": fname = "" base = 0 for attr in itervalues(DIE.attributes): if attr.name == "DW_AT_name": fname = attr.value if fname == function_name: for CHILD in DIE.iter_children(): if CHILD.tag == "DW_TAG_variable" or CHILD.tag == "DW_TAG_formal_parameter": right_name = False location = 0 for attr in itervalues(CHILD.attributes): if attr.name == "DW_AT_name": if attr.value == var_name: right_name = True # Check if this attribute contains location information if attr.name == "DW_AT_location": loc = loc_parser.parse_from_attribute(attr, CU['version']) if isinstance(loc, LocationExpr): parser = DWARFExprParser(dwarfinfo.structs) parsed = parser.parse_expr(loc.loc_expr) for op in parsed: if op.op_name == 'DW_OP_fbreg': location = op.args[0] if right_name: return location
def patchDebugInfo(self, secName, sectionChunk): """ .debug_info section update The following code is based on a dwarf example of an elftools library From an official DWARF documenation (http://dwarfstd.org/doc/DWARF4.pdf) a) DW_AT_low_pc and DW_AT_high_pc pair or a DW_AT_ranges attribute encode the contiguous or non-contiguous address ranges, respectively, of the machine instructions generated for the compilation unit b) DW_AT_name attribute a null-terminated string containing the path name of the primary source c) DW_AT_stmt_list attribute a section offset to the line number information for compilation unit d) DW_AT_macro_info attribute a section offset to the macro information for compilation unit :param secName: :param sectionChunk: :return: """ def _decode_funcname(dwarfinfo, address): # Go over all DIEs in the DWARF information, looking for a subprogram # entry with an address range that includes the given address. Note that # this simplifies things by disregarding subprograms that may have # split address ranges. for CU in dwarfinfo.iter_CUs(): for DIE in CU.iter_DIEs(): try: if DIE.tag == 'DW_TAG_subprogram': lowpc = DIE.attributes['DW_AT_low_pc'].value # DWARF v4 in section 2.17 describes how to interpret the # DW_AT_high_pc attribute based on the class of its form. # For class 'address' it's taken as an absolute address # (similarly to DW_AT_low_pc); for class 'constant', it's # an offset from DW_AT_low_pc. highpc_attr = DIE.attributes['DW_AT_high_pc'] highpc_attr_class = describe_form_class(highpc_attr.form) if highpc_attr_class == 'address': highpc = highpc_attr.value elif highpc_attr_class == 'constant': highpc = lowpc + highpc_attr.value else: print('Error: invalid DW_AT_high_pc class:', highpc_attr_class) continue if lowpc <= address <= highpc: return DIE.attributes['DW_AT_name'].value except KeyError: continue return None def _decode_file_line(dwarfinfo, address): ''' Go over all the line programs in the DWARF information, looking for one that describes the given address. ''' for CU in dwarfinfo.iter_CUs(): # First, look at line programs to find the file/line for the address lineprog = dwarfinfo.line_program_for_CU(CU) prevstate = None for entry in lineprog.get_entries(): # We're interested in those entries where a new state is assigned if entry.state is None or entry.state.end_sequence: continue # Looking for a range of addresses in two consecutive states that # contain the required address. if prevstate and prevstate.address <= address < entry.state.address: filename = lineprog['file_entry'][prevstate.file - 1].name line = prevstate.line return filename, line prevstate = entry.state return None, None def _decode_addr(address): funcname = _decode_funcname(dwarfinfo, address) file, line = _decode_file_line(dwarfinfo, address) logging.debug('\tFunction: %s' % bytes2str(funcname)) logging.debug('\tFile: %s' % bytes2str(file)) logging.debug('\tLine: %s' % line) dwarfinfo = self.EP.elf.get_dwarf_info() # Experimental: reading statement advances # CUs = [x for x in dwarfinfo.iter_CUs()] # debug_line = dwarfinfo.line_program_for_CU(CUs[0]) # debug_line_entries = debug_line.get_entries() # stmts = [hex(x.state.address) for x in debug_line_entries if x.state is not None] from elftools.common.py3compat import itervalues, bytes2str from elftools.dwarf.descriptions import describe_form_class update_pos_vals = {} for CU in dwarfinfo.iter_CUs(): # DWARFInfo allows to iterate over the compile units contained in # the .debug_info section. CU is a CompileUnit object, with some # computed attributes (such as its offset in the section) and # a header which conforms to the DWARF standard. The access to # header elements is, as usual, via item-lookup. logging.debug(' Found a compile unit at offset %s, length %s' % (CU.cu_offset, CU['unit_length'])) # A CU provides a simple API to iterate over all the DIEs in it. for DIE in CU.iter_DIEs(): # Go over all attributes of the DIE. Each attribute is an # AttributeValue object (from elftools.dwarf.die) # Here we only care the addresses for the subprograms # TODO: check it out for DW_TAG_inlined_subroutine (DWARF4)! for attr in itervalues(DIE.attributes): if attr.name == 'DW_AT_low_pc' and DIE.tag == 'DW_TAG_subprogram': logging.debug(' DIE %s. attr %s. Off: 0x%x, Addr:0x%x' % (DIE.tag, attr.name, attr.offset, attr.value)) #_decode_addr(attr.value) update_pos_vals[attr.offset] = attr.value pos = 0 for off in sorted(update_pos_vals.keys()): self.instBin += sectionChunk[pos:off] va = update_pos_vals[off] if self.EI.base > 0: try: updated_va = self.EI.getBBlByVA(va).newVA self.instBin += self.PK(FMT.ULONG, updated_va) logging.debug('[%s] 0x%08x -> 0x%08x' % (secName, va, updated_va)) except AttributeError: self.instBin += self.PK(FMT.ULONG, va) logging.warning("\t Could not find a proper BBL in a debugging section (Ignored)") pos = off + 8 self.instBin += sectionChunk[pos:]
# correctly reflect the nesting depth # die_depth = 0 for die in cu.iter_DIEs(): rr = collections.OrderedDict() rr['die_depth'] = die_depth rr['offset'] = die.offset rr['abbrev_code'] = die.abbrev_code rr['die_null'] = die.is_null() rr['section_offset'] = section_offset if die.is_null(): die_depth -= 1 continue for attr in itervalues(die.attributes): r = collections.OrderedDict() name = attr.name # Unknown attribute values are passed-through as integers if isinstance(name, int): raise ValueError("Unknown attribute") r['name'] = name r['attr_offset'] = attr.offset r['attr_value'] = describe_attr_value(attr, die, section_offset) r['has_child'] = die.has_children r.update(rr) if die.has_children: die_depth += 1