def __getitem__(self, name): hash = 5381 for c in name: hash = hash * 33 + c & bitmask(32) word = hash // self.maskword_bits % self.header['maskwords'] self.stream.seek(self.filter + word * self.maskword_size) word = struct_parse(self.Maskword, self.stream) mask = 1 << hash % self.maskword_bits mask |= 1 << (hash >> self.header['shift2']) % self.maskword_bits if ~word & mask: raise KeyError(name) self.stream.seek(self.buckets + hash % self.header['nbuckets'] * 4) bucket = struct_parse(self.Elf_word(None), self.stream) if not bucket: raise KeyError(name) while True: offset = self.values + (bucket - self.header['symndx']) * 4 value = struct_parse(self.Elf_word(None), self.stream, stream_pos=offset) if value & ~1 == hash & ~1: sym = self.symtab[bucket] if sym.name == name: return sym if value & 1: raise KeyError(name) bucket += 1
def parse_macro_section(elf, dwarf, section): header_struct = create_header_struct(elf, dwarf) section.stream.seek(section['sh_offset']) header = struct_parse(header_struct, section.stream) yield header entry_struct = create_entry_struct(elf, dwarf, header.flags) while section.stream.tell() - section['sh_offset'] < section['sh_size']: yield struct_parse(entry_struct, section.stream)
def line_program_for_CU(self, cu): top_DIE = cu.get_top_DIE() if 'DW_AT_stmt_list' in top_DIE.attributes: stm = self.linestream stm.seek(top_DIE.attributes['DW_AT_stmt_list'].value, 0) structs = self.structs len = struct_parse(structs.Dwarf_uint32(''), stm) pc = struct_parse(structs.Dwarf_target_addr(''), stm) return LineTableV1(stm, structs, len, pc) else: return None
def __iter__(self): for offset in range(self.buckets, self.values, 4): self.stream.seek(offset) bucket = struct_parse(self.Elf_word(None), self.stream) if not bucket: continue sym = self.symtab[bucket] while True: offset = self.values + (bucket - self.header['symndx']) * 4 value = struct_parse(self.Elf_word(None), self.stream, stream_pos=offset) yield self.symtab[bucket] if value & 1: break bucket += 1
def __iter__(self): end = self.chain + self.header['nchain'] * 4 for offset in range(self.buckets, end, 4): self.stream.seek(offset) sym = struct_parse(self.Elf_word(None), self.stream) if sym: yield self.symtab[sym]
def compute_relocation(elffile, reloc, symtab): if reloc['r_info_sym'] >= symtab.num_symbols(): raise ELFRelocationError( 'Invalid symbol reference in relocation: index %s' % (reloc['r_info_sym'])) sym_value = symtab.get_symbol(reloc['r_info_sym'])['st_value'] reloc_type = reloc['r_info_type'] recipe = None if elffile.get_machine_arch() == 'x86': if reloc.is_RELA(): raise ELFRelocationError('Unexpected RELA relocation for x86: %s' % reloc) recipe = _RELOCATION_RECIPES_X86.get(reloc_type, None) elif elffile.get_machine_arch() == 'x64': if not reloc.is_RELA(): raise ELFRelocationError('Unexpected REL relocation for x64: %s' % reloc) recipe = _RELOCATION_RECIPES_X64.get(reloc_type, None) elif elffile.get_machine_arch() == 'MIPS': if reloc.is_RELA(): raise ELFRelocationError( 'Unexpected RELA relocation for MIPS: %s' % reloc) recipe = _RELOCATION_RECIPES_MIPS.get(reloc_type, None) if recipe is None: raise ELFRelocationError('Unsupported relocation type: %s' % reloc_type) # So now we have everything we need to actually perform the relocation. # Let's get to it: # 0. Find out which struct we're going to be using to read this value # from the stream and write it back. if recipe.bytesize == 4: value_struct = elffile.structs.Elf_word('') elif recipe.bytesize == 8: value_struct = elffile.structs.Elf_word64('') else: raise ELFRelocationError('Invalid bytesize %s for relocation' % recipe.bytesize) # 1. Read the value from the stream (with correct size and endianness) original_value = struct_parse(value_struct, elffile.stream, stream_pos=reloc['r_offset']) # 2. Apply the relocation to the value, acting according to the recipe relocated_value = recipe.calc_func( value=original_value, sym_value=sym_value, offset=reloc['r_offset'], addend=reloc['r_addend'] if recipe.has_addend else 0) # 3. Return reolcation as a tuple # Make sure the relocated value fits back by wrapping it around. This # looks like a problem, but it seems to be the way this is done in # binutils too. relocated_value = relocated_value % (2**(recipe.bytesize * 8)) return (reloc['r_offset'], relocated_value)
def _parse_call_site_entry(self, encoding): base_encoding = encoding & 0x0f modifier = encoding & 0xf0 # header s = struct_parse( Struct( 'CallSiteEntry', self._formats[base_encoding]('cs_start'), self._formats[base_encoding]('cs_len'), self._formats[base_encoding]('cs_lp'), self.entry_structs.Dwarf_uleb128('cs_action'), ), self.stream) cs_start = s['cs_start'] cs_len = s['cs_len'] cs_lp = s['cs_lp'] cs_action = s['cs_action'] if modifier == 0: pass else: raise NotImplementedError( "Unsupported modifier for CallSiteEntry: %#x." % modifier) return CallSiteEntry(cs_start, cs_len, cs_lp, cs_action)
def parse_expr(self, expr): stm = BytesIO(bytelist2string(expr)) parsed = [] while True: # Get the next opcode from the stream. If nothing is left in the # stream, we're done. byte = stm.read(1) if len(byte) == 0: break # Decode the opcode and its name. op = ord(byte) op_name = DW_OP_opcode2name.get(op, 'OP:0x%x' % op) if op <= 4 or op == 0x80: args = [ struct_parse(self.structs.Dwarf_target_addr(''), stm), ] else: args = [] parsed.append(DWARFExprOp(op=op, op_name=op_name, args=args)) return parsed
def __init__(self, stm, cu, di): self.cu = cu self.dwarfinfo = di self.stream = stm self.offset = stm.tell() self.attributes = OrderedDict() self.tag = None self.has_children = None self.abbrev_code = None self.size = 0 # Null DIE terminator. It can be used to obtain offset range occupied # by this DIE including its whole subtree. self._terminator = None self._parent = None structs = self.dwarfinfo.structs self.size = struct_parse(structs.Dwarf_uint32(''), stm) if self.size < 8: self.tag = 'DW_TAG_padding' self.has_children = False else: tag_code = struct_parse(structs.Dwarf_uint16(''), stm) if tag_code not in TAG_reverse: raise ValueError("%d not a known tag" % (tag_code)) self.tag = TAG_reverse[tag_code] while stm.tell() < self.offset + self.size: attr_offset = self.stream.tell() attr = struct_parse(structs.Dwarf_uint16(''), stm) form = FORM_reverse[attr & 0xf] attr >>= 4 if attr in ATTR_reverse: name = ATTR_reverse[attr] elif 0x200 <= attr <= 0x3ff: #DW_AT_MIPS represented as 0x204??? name = 'DW_AT_user_0x%x' % attr else: raise ValueError("%d not a known attribute" % (attr)) raw_value = struct_parse(structs.Dwarf_dw_form[form], stm) value = raw_value self.attributes[name] = AttributeValue(name=name, form=form, value=value, raw_value=raw_value, offset=attr_offset) self.has_children = self.attributes[ 'DW_AT_sibling'].value >= self.offset + self.size + 8
def _parse_lsda_header(self): # lpstart lpstart_encoding = self.stream.read(1)[0] if lpstart_encoding != DW_EH_encoding_flags['DW_EH_PE_omit']: base_encoding = lpstart_encoding & 0x0f modifier = lpstart_encoding & 0xf0 lpstart = struct_parse( Struct('dummy', self._formats[base_encoding]('LPStart')), self.stream)['LPStart'] if modifier == 0: pass elif modifier == DW_EH_encoding_flags['DW_EH_PE_pcrel']: lpstart += self.address + (self.stream.tell() - self.base_offset) else: raise NotImplementedError("Unsupported modifier %#x." % modifier) else: lpstart = None # ttype ttype_encoding = self.stream.read(1)[0] if ttype_encoding != DW_EH_encoding_flags['DW_EH_PE_omit']: ttype_offset = struct_parse( Struct('dummy', self.entry_structs.Dwarf_uleb128('TType')), self.stream)['TType'] else: ttype_offset = None # call site table length cstable_encoding = self.stream.read(1)[0] cstable_length = struct_parse( Struct('dummy', self.entry_structs.Dwarf_uleb128('CSTable')), self.stream)['CSTable'] return ExceptionTableHeader( lpstart, ttype_encoding, ttype_offset, cstable_encoding, cstable_length, )
def __init__(self, elf, *pos, **kw): BaseHash.__init__(self, elf, *pos, **kw) self.Elf_word = elf.structs.Elf_word self.header = Struct('Hash table header', self.Elf_word('nbucket'), self.Elf_word('nchain')) self.header = struct_parse(self.header, self.stream) self.buckets = self.stream.tell() self.chain = self.buckets + self.header['nbucket'] * 4
def _iter_verneed(self, offset): [num] = self.entries["DT_VERNEEDNUM"] Elf_Verneed = self.elf.structs.Elf_Verneed for _ in range(num): entry = struct_parse(Elf_Verneed, self.elf.stream, offset) aux_offset = offset + entry["vn_aux"] versions = self._file_versions(aux_offset, entry["vn_cnt"]) yield (self.strtab[entry["vn_file"]], versions) offset += entry["vn_next"]
def __getitem__(self, sym): """Get Symbol() object for given table index""" self.stream.seek(self.offset + sym * self.entsize) entry = struct_parse(self.Elf_Sym, self.stream) name = entry['st_name'] if name: name = self.stringtable[name] else: name = None return Symbol(entry, name)
def __parse_files_info(self, dwarf_info, structs, offset=0): files = {} lineprog_header = struct_parse(structs.Dwarf_lineprog_header, dwarf_info.debug_line_sec.stream, offset) for i, entry in enumerate(lineprog_header.file_entry): files[i + 1] = File( id=i + 1, name=entry.name, directory=lineprog_header.include_directory[entry.dir_index - 1]) return files
def get_entries(self): if self._decoded_entries is None: stm = self.stm offset = stm.tell() end_offset = offset + self.len structs = self.structs entries = [] pc = self.pc while offset < end_offset: line = struct_parse(structs.Dwarf_uint32(''), stm) col = struct_parse(structs.Dwarf_uint16(''), stm) pc_delta = struct_parse(structs.Dwarf_uint32(''), stm) if line == 0: break state = LineState(True) state.file = 0 state.line = line state.column = col if col != 0xffff else None state.address = pc entries.append(LineProgramEntry(0, False, [], state)) pc += pc_delta self._decoded_entries = entries return self._decoded_entries
def version_definitions(self): offset = self.entries["DT_VERDEF"] if not offset: return [offset] = offset offset = self.segments.map(offset) [num] = self.entries["DT_VERDEFNUM"] Elf_Verdef = self.elf.structs.Elf_Verdef for _ in range(num): entry = struct_parse(Elf_Verdef, self.elf.stream, offset) aux_offset = offset + entry["vd_aux"] names = self._version_names(aux_offset, entry["vd_cnt"]) definition = Version(entry, next(names)) yield (definition, names) offset += entry["vd_next"]
def __getitem__(self, name): hash = 0 for c in name: hash = (hash << 4) + c hash = (hash ^ hash >> 24 & 0xF0) & bitmask(28) self.stream.seek(self.buckets + hash % self.header['nbucket'] * 4) while True: index = struct_parse(self.Elf_word(None), self.stream) if not index: raise KeyError(name) sym = self.symtab[index] if sym.name == name: return sym self.stream.seek(self.chain + index * 4)
def __init__(self, elf, *pos, **kw): BaseHash.__init__(self, elf, *pos, **kw) self.Elf_word = elf.structs.Elf_word self.Maskword = elf.structs.Elf_xword("Maskword") self.header = Struct('GNU hash table header', self.Elf_word('nbuckets'), self.Elf_word('symndx'), self.Elf_word('maskwords'), self.Elf_word('shift2'), ) self.header = struct_parse(self.header, self.stream) self.filter = self.stream.tell() self.maskword_size = self.Maskword.sizeof() self.maskword_bits = self.maskword_size * 8 self.buckets = (self.filter + self.header['maskwords'] * self.maskword_size) self.values = self.buckets + self.header['nbuckets'] * 4
def iter_notes(self): """ Iterates the list of notes in the segment. """ offset = self['p_offset'] end = self['p_offset'] + self['p_filesz'] while offset < end: note = struct_parse(self.elffile.structs.Elf_Nhdr, self.stream, stream_pos=offset) note['n_offset'] = offset offset += self.elffile.structs.Elf_Nhdr.sizeof() self.stream.seek(offset) # n_namesz is 4-byte aligned. disk_namesz = roundup(note['n_namesz'], 2) note['n_name'] = bytes2str( CString('').parse(self.stream.read(disk_namesz))) offset += disk_namesz desc_data = bytes2str(self.stream.read(note['n_descsz'])) note['n_desc'] = desc_data offset += roundup(note['n_descsz'], 2) note['n_size'] = offset - note['n_offset'] yield note
def iter_notes(self): """ Iterates the list of notes in the segment. """ offset = self['p_offset'] end = self['p_offset'] + self['p_filesz'] while offset < end: note = struct_parse( self._elfstructs.Elf_Nhdr, self.stream, stream_pos=offset) note['n_offset'] = offset offset += self._elfstructs.Elf_Nhdr.sizeof() self.stream.seek(offset) # n_namesz is 4-byte aligned. disk_namesz = roundup(note['n_namesz'], 2) note['n_name'] = bytes2str( CString('').parse(self.stream.read(disk_namesz))) offset += disk_namesz desc_data = bytes2str(self.stream.read(note['n_descsz'])) note['n_desc'] = desc_data offset += roundup(note['n_descsz'], 2) note['n_size'] = offset - note['n_offset'] yield note
def rel_table_entries(self, entries, size, type): (entsize, Struct) = { "DT_RELA": ("DT_RELAENT", self.elf.structs.Elf_Rela), "DT_REL": ("DT_RELENT", self.elf.structs.Elf_Rel), }[type] (table,) = entries (size,) = self.entries[size] (entsize,) = self.entries[entsize] table = self.segments.map(table, size) if entsize < Struct.sizeof(): msg = "{} entry size too small: {}" raise NotImplementedError(msg.format(Struct.name, entsize)) if size % entsize: msg = "{} table size: {}" raise NotImplementedError(msg.format(Struct.name, size)) # TODO: mmap # TODO: read rel table in one go for offset in range(table, table + size, entsize): self.elf.stream.seek(offset) entry = struct_parse(Struct, self.elf.stream) yield Relocation(entry, self.elf)
def _parse_lsda_header(self): # https://www.airs.com/blog/archives/464 self.elf.seek(self.lsda_offset) lpstart_raw = self.elf.read(1)[0] lpstart = None if lpstart_raw != DW_EH_encoding_flags['DW_EH_PE_omit']: # See https://www.airs.com/blog/archives/460, it should be omit in # practice raise Exception("We do not handle this case for now") base_encoding = lpstart_raw & 0x0F modifier = lpstart_raw & 0xF0 lpstart = struct_parse( Struct('dummy', self._formats[base_encoding]('LPStart')), self.elf)['LPStart'] if modifier == 0: pass elif modifier == DW_EH_encoding_flags['DW_EH_PE_pcrel']: lpstart += self.address + (self.elf.tell() - self.base_offset) else: raise Exception("what") typetable_encoding = self.elf.read(1)[0] typetable_offset = None # NOW TODO : the encoding is the right one + 1, which is weird self.typetable_offset = 0 if typetable_encoding != DW_EH_encoding_flags['DW_EH_PE_omit']: self.typetable_offset = struct_parse( Struct('dummy', self.entry_structs.Dwarf_uleb128('TType')), self.elf)['TType'] self.typetable_offset_present = True else: self.typetable_offset_present = False call_site_table_encoding = self.elf.read(1)[0] call_site_table_len = struct_parse( Struct('dummy', self.entry_structs.Dwarf_uleb128('CSTable')), self.elf)['CSTable'] if self.typetable_offset_present == False: self.typetable_offset = call_site_table_len print("lpstart", lpstart_raw) print("lpstart_pcrel", lpstart) print("typetable_encoding", typetable_encoding) print("call_site_table_encoding", call_site_table_encoding) print("CALL SITE TABLE LENGTH %d" % call_site_table_len) print("TYPETABLE OFFSET %d" % self.typetable_offset) self.end_label = ".LLSDATT%x" % self.fstart self.table_label = ".LLSDATTD%x" % self.fstart self.action_label = ".LLSDACSE%x" % self.fstart self.callsite_label = ".LLSDACSB%x" % self.fstart self.ttable_prefix_label = ".LLSDATYP%x" % self.fstart # Need to construct some representation here. self.header = { "lpstart": lpstart_raw, "encoding": call_site_table_encoding, "typetable_encoding": typetable_encoding, "call_site_table_len": call_site_table_len, "cs_act_tt_total_len": self. typetable_offset, # Don't forget to check typetable_offset_present }
def ParseELFSymbols(elf): """Parses list of symbols in an ELF file. Args: elf: An elffile.ELFFile instance. Returns: A 2-tuple of (imported, exported) symbols. |imported| is a set of strings of undefined symbols. |exported| is a dict where the keys are defined symbols and the values are 3-tuples (st_info_bind, st_size, st_shndx) with the details of the corresponding exported symbol. Note that for imported symbols this information is always ('STB_GLOBAL', 0, 'SHN_UNDEF') and thus not included in the result. """ imp = set() exp = dict() if elf.header.e_type not in ('ET_DYN', 'ET_EXEC'): return imp, exp for segment in elf.iter_segments(): if segment.header.p_type != 'PT_DYNAMIC': continue # Find strtab and symtab virtual addresses. strtab_ptr = None symtab_ptr = None symbol_size = elf.structs.Elf_Sym.sizeof() for tag in segment.iter_tags(): if tag.entry.d_tag == 'DT_SYMTAB': symtab_ptr = tag.entry.d_ptr if tag.entry.d_tag == 'DT_STRTAB': strtab_ptr = tag.entry.d_ptr if tag.entry.d_tag == 'DT_SYMENT': assert symbol_size == tag.entry.d_val stringtable = segment._get_stringtable() # pylint: disable=W0212 symtab_offset = next(elf.address_offsets(symtab_ptr)) # Assume that symtab ends right before strtab. # This is the same assumption that glibc makes in dl-addr.c. # The first symbol is always local undefined, unnamed so we ignore it. for i in range(1, (strtab_ptr - symtab_ptr) / symbol_size): symbol_offset = symtab_offset + (i * symbol_size) symbol = utils.struct_parse(elf.structs.Elf_Sym, elf.stream, symbol_offset) if symbol['st_info']['bind'] == 'STB_LOCAL': # Ignore local symbols. continue symbol_name = stringtable.get_string(symbol.st_name) if symbol['st_shndx'] == 'SHN_UNDEF': if symbol['st_info']['bind'] == 'STB_GLOBAL': # Global undefined --> required symbols. # We ignore weak undefined symbols. imp.add(symbol_name) elif symbol['st_other']['visibility'] == 'STV_DEFAULT': # Exported symbols must have default visibility. st_shndx = SH_TYPE_VALUES.get(symbol['st_shndx'], symbol['st_shndx']) exp[symbol_name] = (symbol['st_info']['bind'], symbol['st_size'], st_shndx) return imp, exp
def ParseELFSymbols(elf): """Parses list of symbols in an ELF file. Args: elf: An elffile.ELFFile instance. Returns: A 2-tuple of (imported, exported) symbols, each of which is a set. """ imp = set() exp = set() if elf.header.e_type not in ('ET_DYN', 'ET_EXEC'): return imp, exp for segment in elf.iter_segments(): if segment.header.p_type != 'PT_DYNAMIC': continue # Find strtab and symtab virtual addresses. symtab_ptr = None dthash_ptr = None symbol_size = elf.structs.Elf_Sym.sizeof() for tag in segment.iter_tags(): if tag.entry.d_tag == 'DT_SYMTAB': symtab_ptr = tag.entry.d_ptr if tag.entry.d_tag == 'DT_SYMENT': assert symbol_size == tag.entry.d_val if tag.entry.d_tag == 'DT_HASH': dthash_ptr = tag.entry.d_ptr stringtable = segment._get_stringtable() # pylint: disable=protected-access symtab_offset = next(elf.address_offsets(symtab_ptr)) if dthash_ptr: # DT_SYMTAB provides no information on the number of symbols table # entries. Instead, we use DT_HASH's nchain value, which according to the # spec, "should equal the number of symbol table entries". # nchain is the second 32-bit integer at the address pointed by DT_HASH, # both for ELF and ELF64 formats. fmt = '<I' if elf.little_endian else '>I' elf.stream.seek(dthash_ptr + 4) nsymbols = struct.unpack(fmt, elf.stream.read(4))[0] else: # Get the size of DYNSYM section from section header. symtab_size = int(GetSymbolTableSize(elf)) nsymbols = symtab_size // symbol_size # The first symbol is always local undefined, unnamed so we ignore it. for i in range(1, nsymbols): symbol_offset = symtab_offset + (i * symbol_size) symbol = utils.struct_parse(elf.structs.Elf_Sym, elf.stream, symbol_offset) if symbol['st_info']['bind'] == 'STB_LOCAL': # Ignore local symbols. continue symbol_name = stringtable.get_string(symbol.st_name) if symbol['st_shndx'] == 'SHN_UNDEF': if symbol['st_info']['bind'] == 'STB_GLOBAL': # Global undefined --> required symbols. # We ignore weak undefined symbols. imp.add(symbol_name) elif symbol['st_other']['visibility'] == 'STV_DEFAULT': # Exported symbols must have default visibility. exp.add(symbol_name) return imp, exp
def _parse_lsda_entries(self): start_cs_offset = self.elf.tell() action_count = 0 while self.elf.tell( ) - start_cs_offset < self.header["call_site_table_len"]: base_encoding = self.header["encoding"] & 0x0f modifier = self.header["encoding"] & 0xf0 # Maybe we need to store the offset in the entry ? # header s = struct_parse( Struct( 'CallSiteEntry', self._formats[base_encoding]('cs_start'), self._formats[base_encoding]('cs_len'), self._formats[base_encoding]('cs_lp'), self.entry_structs.Dwarf_uleb128('cs_action'), ), self.elf) cs_action = s['cs_action'] if cs_action != 0: action_offset_bytes = (cs_action + 1) >> 1 action_count = max(action_count, action_offset_bytes) self.entries.append(s) processed_bytes = self.elf.tell() - start_cs_offset print("+++++ %d bytes read, %d to go, %d actions" % (processed_bytes, self.typetable_offset - processed_bytes, action_count)) idx = action_count processed_action_count = 0 while idx > 0: action = struct_parse( Struct("ActionEntry", self.entry_structs.Dwarf_uint8('act_filter'), self.entry_structs.Dwarf_uint8('act_next')), self.elf) print(">>>> ACTION ", action) self.actions.append(action) processed_action_count = processed_action_count + 1 idx -= 1 ttendloc = self.lsda_offset + self.typetable_offset + 3 print("TT %x" % (ttendloc)) for action in self.actions: type_bytes_offset = (action.act_filter * -8) ttentryloc = ttendloc + type_bytes_offset print("****** TT LOC: %x" % (ttentryloc)) self.elf.seek(ttentryloc, io.SEEK_SET) ptrbytes = self.elf.read(8) ptr = struct.unpack("<Q", ptrbytes)[0] print("****** TT PTR: %x" % ptr) symbolized_target = 0 if ptr != 0: symbolized_target = ptr + ttentryloc typeentry = {'address': symbolized_target} self.ttentries[action.act_filter] = typeentry print("****** TT x: %x" % (symbolized_target))
def _file_versions(self, offset, cnt): Elf_Vernaux = self.elf.structs.Elf_Vernaux for _ in range(cnt): entry = struct_parse(Elf_Vernaux, self.elf.stream, offset) yield VersionAuxiliary(entry, self.strtab[entry["vna_name"]]) offset += entry["vna_next"]
def _version_names(self, offset, cnt): Elf_Verdaux = self.elf.structs.Elf_Verdaux for _ in range(cnt): entry = struct_parse(Elf_Verdaux, self.elf.stream, offset) yield self.strtab[entry["vda_name"]] offset += entry["vda_next"]