class ElfParser(object): def __init__(self, f): self.elffile = ELFFile(open(f, "rb")) self.text_section = self.elffile.get_section_by_name(".text") self.code = self.text_section.data() self.code_len = len(self.code) self.text_offset = self.text_section.header.sh_addr self.funcs = FunctionsList() self.init_functions_list() def get_functions_list(self): return self.funcs def get_code_and_funcs(self): return self.get_binary_code(), self.get_functions_list() def get_functions_num(self): return len(funcs) def get_code_len(self): return self.code_len def get_binary_code(self): return self.code def get_section_idx(self, section): for i in xrange(self.elffile.num_sections()): if self.elffile.get_section(i) == section: return i def va_to_offset(self, va): return va - self.text_offset def offset_to_va(self, offset): return offset + self.text_offset @staticmethod def is_function_symbol(symbol, section_idx): if symbol.entry.st_info.type == "STT_FUNC": if symbol.entry.st_shndx == section_idx: if symbol.entry.st_size > 0: return True return False def init_functions_list(self): symtab = self.elffile.get_section_by_name(".symtab") text_section_idx = self.get_section_idx(self.text_section) if not isinstance(symtab, SymbolTableSection): raise Exception for symbol in symtab.iter_symbols(): if self.is_function_symbol(symbol, text_section_idx): sym_offset = self.va_to_offset(symbol.entry.st_value) self.funcs.append(symbol.name, sym_offset, symbol.entry.st_size) def print_functions_list(self): print "%-30s\t%8s\t%8s" % ("Name", "Offset", "Size") print "-" * 58 for func in self.funcs: print func
def test_stab(self): expected = [ ("obj_stabs.S", 0, 0, 0x2, 33), # generated by compiler ("label", 0x95, 0xc8, 0x4072, 0xdeadbeef), ("another label", 0x41, 0x66, 0xf9b1, 0xcafebabe) ] with open( os.path.join('test', 'testfiles_for_unittests', 'obj_stabs.elf'), 'rb') as f: elf = ELFFile(f) # using correct type? for s in elf.iter_sections(): if s.name == '.stab': self.assertIsInstance(s, StabSection) # check section contents stab = elf.get_section_by_name('.stab') stabstr = elf.get_section_by_name('.stabstr') for entry, golden in zip(stab.iter_stabs(), expected): self.assertEqual(stabstr.get_string(entry.n_strx), golden[0]) self.assertEqual(entry.n_type, golden[1]) self.assertEqual(entry.n_other, golden[2]) self.assertEqual(entry.n_desc, golden[3]) self.assertEqual(entry.n_value, golden[4])
def calculate_elf_filesizes(elf_object_file): text_size = rodata_size = data_size = bss_size = 0 elf_fileheader_size = 52 elf_sectionheaders_size = 2 * 40 # NULL section header + shstrtab section header elf_shstrtab_size = 11 # NULL section header and shstrtab section header elf_programheaders_size = 0 alignement_overhead = 0 has_rom_program_header = False has_ram_program_header = False with open(elf_object_file, 'rb') as f: elffile = ELFFile(f) print("elf_fileheader_size {}".format(elf_fileheader_size)) # determine rom size text_section = elffile.get_section_by_name('.text') if text_section is not None: text_size += text_section['sh_size'] elf_sectionheaders_size += 40 # section header elf_shstrtab_size += 6 # shstrtab has_rom_program_header = True print("found rom section size {}".format(text_size)) rodata_section = elffile.get_section_by_name('.rodata') if rodata_section is not None: rodata_size += rodata_section['sh_size'] elf_sectionheaders_size += 40 # section header elf_shstrtab_size += 8 # shstrtab has_rom_program_header = True print("found rodata section size {}".format(rodata_size)) # determine ram size data_section = elffile.get_section_by_name('.data') if data_section is not None: data_size += data_section['sh_size'] elf_sectionheaders_size += 40 # section header elf_shstrtab_size += 6 # shstrtab has_ram_program_header = True print("found data section size {}".format(data_size)) bss_section = elffile.get_section_by_name('.bss') if bss_section is not None: bss_size += bss_section['sh_size'] elf_sectionheaders_size += 40 # section header elf_shstrtab_size += 5 # shstrtab has_ram_program_header = True print("found bss section size {}".format(bss_size)) if has_rom_program_header: elf_programheaders_size += 32 if has_ram_program_header: elf_programheaders_size += 32 print("elf_programheaders_size {}".format(elf_programheaders_size)) alignement_overhead += __calculate_alignment_overhead(text_size) alignement_overhead += __calculate_alignment_overhead(rodata_size) alignement_overhead += __calculate_alignment_overhead(data_size) alignement_overhead += __calculate_alignment_overhead(elf_shstrtab_size) print("section_alignment overhead {}".format(alignement_overhead)) print("elf_shstrtab_size {}".format(elf_shstrtab_size)) print("elf_sectionheaders_size {}".format(elf_sectionheaders_size)) elf_file_size = elf_fileheader_size + elf_programheaders_size + text_size + rodata_size + data_size + alignement_overhead + elf_shstrtab_size + elf_sectionheaders_size print("elf_file_size {}".format(elf_sectionheaders_size)) return [ elf_file_size, text_size + rodata_size, data_size + bss_size, elf_fileheader_size + elf_programheaders_size ]
def __init__(self,filename): self.ins = {} self.sym = {} elf = ELFFile(open(filename,'rb')) data = elf.get_section_by_name(b'.rodata') s = elf.get_section_by_name(b'.text') if not s: print ("No code found") return self.code = s.data() self.offset = s['sh_addr'] self.md = Cs(CS_ARCH_X86, CS_MODE_64) self.md.detail = True self.rodata = {k + data.header.sh_addr - s.header.sh_addr : data.data()[k] for k in range(len(data.data())) } for i in self.md.disasm(self.code,0): # Some bug if we dont disassemble from 0 # print hex(i.address),i.mnemonic # print json.dumps(i) self.ins[i.address] = i addr = 0 s = elf.get_section_by_name(b'.symtab') if s: if isinstance(s, SymbolTableSection): for sym in s.iter_symbols(): self.sym[sym.name] = sym['st_value'] - self.offset
def extract(self, binary, logfile): protos = dict() with open(binary, 'rb') as f: e = ELFFile(f) symtable = e.get_section_by_name(".symtab") if not symtable: Log.stderr("symbole table not found") return protos for entry in symtable.iter_symbols(): if "FUNC" in entry['st_info'].type: name = entry.name addr = entry["st_value"] ret = entry["st_value"] protos[addr] = (name, addr, ret) dynsym = e.get_section_by_name(".dynsym") reloc = e.get_section_by_name(".rela.plt") plt_base = e.get_section_by_name(".plt")['sh_addr'] # Additionally add plt entries for idx, entry in enumerate(reloc.iter_relocations()): name = dynsym.get_symbol(entry['r_info_sym']).name + "@plt" addr = plt_base + 16 * (idx + 1) protos[addr] = (name, addr, addr) with open(logfile, "w") as f: for (name, entry, ret) in protos.values(): f.write("{}:{}:{}\n".format(name, entry, ret))
def process_file(filename): print("Processing file: ", filename) with open(filename, "rb") as file: # Note, we can't close file until we have finished reading data elf_file = ELFFile(file) # Code is normally in the data section text_section = elf_file.get_section_by_name(".text") base_address = text_section.header["sh_addr"] disassembly = distorm3.Decompose(base_address, text_section.data()) # Get the symbol table as table of addresses mapped to names symbol_table_section = elf_file.get_section_by_name(".symtab") symbol_table = {} # TODO: Fill in the symbol table... # Create an LLVM emulator emulator = Emulator("module", symbol_table) for instruction in disassembly: if hasattr(emulator, instruction.mnemonic): method = getattr(emulator, instruction.mnemonic) method(instruction) else: print(instruction.mnemonic + " not implemented yet. Please implement it!") return disassembly
def __init__(self, name): """ reads out the entry point, the .text segment addres, the symbol table, and the debugging information from the elf header. """ self.name = name self.fcache = FCache() with open(self.name,'r') as stream: elffile = ELFFile(stream) # get entry point self.entry = elffile.header.e_entry # get text seg address section = elffile.get_section_by_name(b'.text') if not section: raise ValueError('No text segment found.') self.workarea = section.header['sh_addr'] # init symbols section = elffile.get_section_by_name(b'.symtab') if not section: raise ValueError('No symbol table found. Perhaps this ELF has been stripped?') res = {} if isinstance(section, SymbolTableSection): for i in xrange(section.num_symbols()): res[section.get_symbol(i).name]=(section.get_symbol(i).entry.st_value) self.symbols = res self.src_map = self.get_src_map(elffile)
def extract_ioregs(elfFile: Path): with open(elfFile, 'rb') as f: elffile = ELFFile(f) data_section = elffile.get_section_by_name(".data") symbols_section = elffile.get_section_by_name(".symtab") if data_section is None: logger.critical(".data section missing") sys.exit(1) if symbols_section is None: logger.critical(".symtab section missing") sys.exit(1) data = data_section.data() data_offset = data_section['sh_addr'] return { "regs": extract_group("ioreg", REQUIRED_IOREGS, data, data_offset, symbols_section), "constants": extract_group("constant", REQUIRED_CONSTANTS, data, data_offset, symbols_section) }
def printsizes(stream): elffile = ELFFile(stream) text = elffile.get_section_by_name('.text') textlen = len(text.data()) rodata = elffile.get_section_by_name('.rodata') rodatalen = len(rodata.data()) codeperc = (textlen + rodatalen) * 100.0 / 32768 print("\t%d bytes of .text, %d bytes of .rodata (%d%% Flash)" % (textlen, rodatalen, codeperc)) if codeperc > 90: print("WARNING: %d percent of Flash is used!" % codeperc) datalen = 0 bsslen = 0 noinitlen = 0 data = elffile.get_section_by_name('.data') datalen = len(data.data()) bss = elffile.get_section_by_name('.bss') bsslen = len(bss.data()) noinit = elffile.get_section_by_name('.noinit') if noinit != None: noinitlen = len(noinit.data()) dataperc = (datalen + bsslen + noinitlen) * 100.0 / 4096 if datalen + bsslen + noinitlen > 0: print( "\t%d bytes of .data, %d bytes of .bss, %d bytes of .noinit (%d%% RAM)" % (datalen, bsslen, noinitlen, dataperc)) if dataperc > 80: print("WARNING: %d percent of data is used!" % dataperc)
def main(): parser = argparse.ArgumentParser() parser.add_argument("--ram-section", action="append", required=True) parser.add_argument("--rom-section", action="append", required=True) parser.add_argument("elffile") args = parser.parse_args() with open(args.elffile, 'rb') as f: elffile = ELFFile(f) ram_usage = 0 rom_usage = 0 # RAM usage for name in args.ram_section: section = elffile.get_section_by_name(name) if section is None: print("Bad section name " + name) sys.exit(1) ram_usage += section["sh_size"] # ROM usage for name in args.rom_section: section = elffile.get_section_by_name(name) if section is None: print("Bad section name " + name) sys.exit(1) rom_usage += section["sh_size"] print('{{\"program\": {}, \"data\": {}}}'.format(rom_usage, ram_usage))
def get_functions(lib_path): functions = {} data = None offset = None try: from elftools.elf.elffile import ELFFile from elftools.common.exceptions import ELFError with open(lib_path, 'rb') as stream: elffile = ELFFile(stream) if not elffile.has_dwarf_info(): print('file has no DWARF info') scan_section(functions, elffile, lib_path, elffile.get_section_by_name('.symtab')) scan_section(functions, elffile, lib_path, elffile.get_section_by_name('.dynsym')) # .text section hex dump section = elffile.get_section_by_name('.text') if section: data = section.data() offset = section['sh_offset'] return functions, offset, data except Exception as e: exc_type, exc_obj, exc_tb = sys.exc_info() fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1] print "[%s, %s, %s] Error extracting functions: %s" % ( exc_type, fname, exc_tb.tb_lineno, str(e))
def getELFRelInfo(binname): RelSecDict = {} with open(binname, 'rb') as fin: ef = ELFFile(fin) sym_section = ef.get_section_by_name('.symtab') for sec in ef.iter_sections(): if not sec.name.startswith('.rel'): continue rels = [] for rel in sec.iter_relocations(): symname = sym_section.get_symbol(rel.entry['r_info_sym']).name offset = rel['r_offset'] #secmain = ef.get_section_by_name(sec.name[4:]) mname = re.sub('^\.rela?', '', sec.name) secmain = ef.get_section_by_name(mname) if secmain is None: print('Unmatched section %s' % sec.name) continue val = secmain.data()[offset:offset + 8] rels.append((symname, rel, val)) RelSecDict[sec.name] = rels return RelSecDict
def wrap_sm_text_sections(file, output_path, key): elf_file = ELFFile(file) shutil.copy(file.name, output_path) with open(output_path, 'rb+') as out_file: for section in elf_file.iter_sections(): match = re.match(r'.text.sm.(\w+)', section.name) if not match: continue section_name, sm_name = match.group(0, 1) logging.info('Wrapping text section of SM %s', sm_name) section = elf_file.get_section_by_name(section_name) nonce = _get_sm_wrap_nonce(sm_name, section.data()) wrapped_section_data, tag = wrap(key, nonce, section.data()) # Write wrapped section to output file out_file.seek(section['sh_offset']) out_file.write(wrapped_section_data) # Write [nonce, tag] to the wrapinfo section. # FIXME use % formatting when bumping Python version to 3.5 wrapinfo_name = '.data.sm.{}.wrapinfo'.format(sm_name) wrapinfo_section = elf_file.get_section_by_name(wrapinfo_name) if wrapinfo_section is None: raise Error('No wrapinfo section found. Did you links with ' '--prepare-for-sm-text-section-wrapping?') wrapinfo_data = nonce + tag out_file.seek(wrapinfo_section['sh_offset']) out_file.write(wrapinfo_data)
def __init__(self, filename): self.ins = {} self.sym = {} elf = ELFFile(open(filename, 'rb')) data = elf.get_section_by_name(b'.rodata') s = elf.get_section_by_name(b'.text') if not s: print("No code found") return self.code = s.data() self.offset = s['sh_addr'] self.md = Cs(CS_ARCH_X86, CS_MODE_64) self.md.detail = True self.rodata = { k + data.header.sh_addr - s.header.sh_addr: data.data()[k] for k in range(len(data.data())) } for i in self.md.disasm(self.code, 0): # Some bug if we dont disassemble from 0 # print hex(i.address),i.mnemonic # print json.dumps(i) self.ins[i.address] = i addr = 0 s = elf.get_section_by_name(b'.symtab') if s: if isinstance(s, SymbolTableSection): for sym in s.iter_symbols(): self.sym[sym.name] = sym['st_value'] - self.offset
def extract_symbols(pathname, load_address, use_vaddr=False): try: elf = ELFFile(open(pathname, 'rb')) except: return if use_vaddr: for seg in elf.iter_segments(): if seg.header.p_type == "PT_LOAD": vaddr = seg.header.p_vaddr break else: raise PointBreakException( 'Could not find p_vaddr in PT_LOAD segment in file {!r}'. format(pathname)) else: vaddr = 0 section = elf.get_section_by_name('.symtab') if section is None: section = elf.get_section_by_name('.dynsym') if section is not None: for symbol in section.iter_symbols(): if symbol.entry.st_info.type == 'STT_FUNC': name = symbol.name low_addr = symbol.entry.st_value + load_address - vaddr high_addr = low_addr + symbol.entry.st_size yield Symbol(name=name, obj_file=pathname, low_addr=low_addr, high_addr=high_addr, is_code=True)
class PltEvaler(object): def __init__(self, filename, addend=0): self.filename = filename with open(filename, 'rb') as f: self.raw = bytearray(f.read()) self.elffile = ELFFile(open(filename, 'rb')) self.reladyn = self.elffile.get_section_by_name('.rela.plt') self.symtab = self.elffile.get_section_by_name('.dynsym') self.off_d = {reloc['r_offset']: reloc for reloc in self.reladyn.iter_relocations()} self.addend = addend self.got_itable = {} def resolve_got(self, off): if off not in self.off_d: return None nm = self.symtab.get_symbol(self.off_d[off]['r_info_sym']).name self.got_itable[nm] = off return nm def resolve_plt(self, off): if self.raw[off] != 0xff and self.raw[off+1] != 0x25: return None ret = self.resolve_got(off+6+b2i(self.raw[off+2:off+6])+self.addend) if ret is None: print '[!?] GOT address not found:', hex(off) pass return ret
def __init__(self, name): """ reads out the entry point, the .text segment addres, the symbol table, and the debugging information from the elf header. """ self.name = name self.fcache = FCache() with open(self.name, "r") as stream: elffile = ELFFile(stream) # get entry point self.entry = elffile.header.e_entry # get text seg address section = elffile.get_section_by_name(b".text") if not section: raise ValueError("No text segment found.") self.workarea = section.header["sh_addr"] # init symbols section = elffile.get_section_by_name(b".symtab") if not section: raise ValueError("No symbol table found. Perhaps this ELF has been stripped?") res = {} if isinstance(section, SymbolTableSection): for i in xrange(section.num_symbols()): res[section.get_symbol(i).name] = section.get_symbol(i).entry.st_value self.symbols = res self.src_map = self.get_src_map(elffile)
def process_file(filename): print('Processing file: ', filename) with open(filename, 'rb') as file: # Note, we can't close file until we have finished reading data elf_file = ELFFile(file) # Code is normally in the data section text_section = elf_file.get_section_by_name(".text") base_address = text_section.header['sh_addr'] disassembly = distorm3.Decompose(base_address, text_section.data()) # Get the symbol table as table of addresses mapped to names symbol_table_section = elf_file.get_section_by_name(".symtab") symbol_table = {} # TODO: Fill in the symbol table... # Create an LLVM emulator emulator = Emulator("module", symbol_table) for instruction in disassembly: if hasattr(emulator, instruction.mnemonic): method = getattr(emulator, instruction.mnemonic) method(instruction) else: print(instruction.mnemonic + " not implemented yet. Please implement it!") return disassembly
def parse_module_ko(module_path): call_graph = dict() with open(module_path, 'rb') as f: elf_file = ELFFile(f) code = elf_file.get_section_by_name('.text') symtab = elf_file.get_section_by_name('.symtab') relocs = elf_file.get_section_by_name('.rela.text') relmap = dict() for relocation in relocs.iter_relocations(): symbol = symtab.get_symbol(relocation['r_info_sym']) relmap[relocation['r_offset']] = symbol.name code_instructions = code.data() for symbol in symtab.iter_symbols(): if symbol['st_info']['type'] == "STT_FUNC": if not is_in_section(symbol['st_value'], symbol['st_shndx'], code, elf_file): continue md = Cs(CS_ARCH_X86, CS_MODE_64) start = symbol['st_value'] end = start + symbol['st_size'] for i in md.disasm(code_instructions[start:end], offset=0): if i.mnemonic == "call": if (i.address + 1 + start) in relmap: target = relmap[i.address + 1 + start] if symbol.name not in call_graph: call_graph[symbol.name] = [] call_graph[symbol.name].append( (i.address, target, 0)) else: print("WARNING MISSING SOME CALLSITES") print("call site not in relocation table") return call_graph
class ElfParser(object): def __init__(self, f): self.elffile = ELFFile(open(f, "rb")) self.text_section = self.elffile.get_section_by_name(".text") self.code = self.text_section.data() self.code_len = len(self.code) self.text_offset = self.text_section.header.sh_addr self.funcs = FunctionsList() self.init_functions_list() def get_functions_list(self): return self.funcs def get_code_and_funcs(self): return self.get_binary_code(), self.get_functions_list() def get_functions_num(self): return len(funcs) def get_code_len(self): return self.code_len def get_binary_code(self): return self.code def get_section_idx(self, section): for i in xrange(self.elffile.num_sections()): if self.elffile.get_section(i) == section: return i def va_to_offset(self, va): return va - self.text_offset def offset_to_va(self, offset): return offset + self.text_offset @staticmethod def is_function_symbol(symbol, section_idx): if symbol.entry.st_info.type == "STT_FUNC": if symbol.entry.st_shndx == section_idx: if symbol.entry.st_size > 0: return True return False def init_functions_list(self): symtab = self.elffile.get_section_by_name(".symtab") text_section_idx = self.get_section_idx(self.text_section) if not isinstance(symtab, SymbolTableSection): raise Exception for symbol in symtab.iter_symbols(): if self.is_function_symbol(symbol, text_section_idx): sym_offset = self.va_to_offset(symbol.entry.st_value) self.funcs.append(symbol.name, sym_offset, symbol.entry.st_size) def print_functions_list(self): print "%-30s\t%8s\t%8s" % ("Name", "Offset", "Size") print "-" * 58 for func in self.funcs: print func
def find_rw_section(binary, section_name=".got.plt"): with open(binary, "rb") as bin_data: elf = ELFFile(bin_data) sec = elf.get_section_by_name(section_name) if not sec: sec = elf.get_section_by_name(".data") addr = sec['sh_addr'] return addr
def symbols_set(filename): """ Parse given elf file and return its exported symbols as a set. # https://lists.debian.org/lsb-spec/1999/12/msg00017.html For each public symbol: 1. find its version index in the version table read in the .gnu.version section 2.1 (only if the library's symbols are versioned) the index from (1.) is a pointer into a list of symbols version geven into section .gnu.version_d 2.2 use that second index to recover the version string 3. use the version and the name of the symbol to create the full version name """ try: elffile = ELFFile(open(filename, 'rb')) except (IsADirectoryError, ELFError): return {} symbols_versions = dict() # 1. get a table of version indexes version_table = _get_version_table(elffile) # 2.1 try to get a table of the possible versions ver_def = elffile.get_section_by_name('.gnu.version_d') if ver_def: for entry in ver_def.iter_versions(): version, aux_iter = entry index = version['vd_ndx'] for aux in aux_iter: name = aux.name break # ignore parent entry (if any) symbols_versions[index] = name symbols = set() dyn = elffile.get_section_by_name('.dynsym') for nsym, sym in enumerate(dyn.iter_symbols()): if (sym['st_info']['bind'] == 'STB_GLOBAL' and sym['st_size'] != 0 and (sym['st_other']['visibility'] == 'STV_PROTECTED' or sym['st_other']['visibility'] == 'STV_DEFAULT') and sym['st_shndx'] != 'SHN_UNDEF'): version = '' # we have an exported symbol # try 2.2 if possible if nsym in version_table: index = version_table[nsym] version = symbols_versions.get(index, '') if version: # 3. create the full version string symbols.add('{}@{}'.format(sym.name, version)) else: symbols.add(sym.name) return symbols
def calculate_elf_filesizes(elf_object_file): text_size = rodata_size = data_size = bss_size = 0 elf_fileheader_size = 52 elf_sectionheaders_size = 2 * 40 # NULL section header + shstrtab section header elf_shstrtab_size = 11 # NULL section header and shstrtab section header elf_programheaders_size = 0 alignement_overhead = 0 has_rom_program_header = False has_ram_program_header = False with open(elf_object_file, 'rb') as f: elffile = ELFFile(f) print("elf_fileheader_size {}".format(elf_fileheader_size)) # determine rom size text_section = elffile.get_section_by_name('.text') if text_section is not None: text_size += text_section['sh_size'] elf_sectionheaders_size += 40 # section header elf_shstrtab_size += 6 # shstrtab has_rom_program_header = True print("found rom section size {}".format(text_size)) rodata_section = elffile.get_section_by_name('.rodata') if rodata_section is not None: rodata_size += rodata_section['sh_size'] elf_sectionheaders_size += 40 # section header elf_shstrtab_size += 8 # shstrtab has_rom_program_header = True print("found rodata section size {}".format(rodata_size)) # determine ram size data_section = elffile.get_section_by_name('.data') if data_section is not None: data_size += data_section['sh_size'] elf_sectionheaders_size += 40 # section header elf_shstrtab_size += 6 # shstrtab has_ram_program_header = True print("found data section size {}".format(data_size)) bss_section = elffile.get_section_by_name('.bss') if bss_section is not None: bss_size += bss_section['sh_size'] elf_sectionheaders_size += 40 # section header elf_shstrtab_size += 5 # shstrtab has_ram_program_header = True print("found bss section size {}".format(bss_size)) if has_rom_program_header: elf_programheaders_size += 32 if has_ram_program_header: elf_programheaders_size += 32 print("elf_programheaders_size {}".format(elf_programheaders_size)) alignement_overhead += __calculate_alignment_overhead(text_size) alignement_overhead += __calculate_alignment_overhead(rodata_size) alignement_overhead += __calculate_alignment_overhead(data_size) alignement_overhead += __calculate_alignment_overhead(elf_shstrtab_size) print("section_alignment overhead {}".format(alignement_overhead)) print("elf_shstrtab_size {}".format(elf_shstrtab_size)) print("elf_sectionheaders_size {}".format(elf_sectionheaders_size)) elf_file_size = elf_fileheader_size + elf_programheaders_size + text_size + rodata_size + data_size + alignement_overhead + elf_shstrtab_size + elf_sectionheaders_size print("elf_file_size {}".format(elf_sectionheaders_size)) return [elf_file_size, text_size + rodata_size, data_size + bss_size, elf_fileheader_size + elf_programheaders_size]
def process_file(do_write, syms, filename): addrs = dict() with open(filename, 'rb') as f: elffile = ELFFile(f) print('%s: elfclass is %s' % (filename, elffile.elfclass)) text_name = b'.text' sect_text = elffile.get_section_by_name(text_name) if not sect_text: print(' The file has no %s section' % bytes2str(text_name)) return print(' %s section, sh_offset=%s sh_addr=%s' % (bytes2str(text_name), sect_text['sh_offset'], sect_text['sh_addr'])) sect_st = elffile.get_section_by_name(b'.symtab') if not sect_st: print( ' No symbol table found. Perhaps this ELF has been stripped?') return if not isinstance(sect_st, SymbolTableSection): print(' Not a valid symbol table') return for _sym in sect_st.iter_symbols(): if _sym.name in syms.keys(): sym_offset_in_file = sect_text['sh_offset'] - sect_text[ 'sh_addr'] + _sym['st_value'] print('found %s at virtual address %s, offset in file = %s' % (_sym.name, hex( _sym['st_value']), hex(sym_offset_in_file))) addrs[_sym.name] = sym_offset_in_file if len(syms) > len(addrs): for sym_name in syms.keys(): if not sym_name in addrs.keys(): print(' Failed to find symbol %s' % (sym_name)) print(' Not all symbols found, aborting') return else: print(' All required symbols found') f.close() if not do_write: print(' Scan-only mode, not writing any changes') return with open(filename, 'r+b') as f: print(' Writing patches to file...') for _sym in addrs.keys(): f.seek(addrs[_sym]) f.write(syms[_sym]) f.close()
def process_file(do_write, syms, filename): addrs = dict() with open(filename, 'rb') as f: elffile = ELFFile(f) print('%s: elfclass is %s' % (filename, elffile.elfclass)) text_name = b'.text' sect_text = elffile.get_section_by_name(text_name) if not sect_text: print(' The file has no %s section' % bytes2str(text_name)) return print(' %s section, sh_offset=%s sh_addr=%s' % ( bytes2str(text_name), sect_text['sh_offset'], sect_text['sh_addr'])) sect_st = elffile.get_section_by_name(b'.symtab') if not sect_st: print(' No symbol table found. Perhaps this ELF has been stripped?') return if not isinstance(sect_st, SymbolTableSection): print(' Not a valid symbol table') return for _sym in sect_st.iter_symbols(): if _sym.name in syms.keys(): sym_offset_in_file = sect_text['sh_offset'] - sect_text['sh_addr'] + _sym['st_value'] print('found %s at virtual address %s, offset in file = %s' % (_sym.name, hex(_sym['st_value']), hex(sym_offset_in_file)) ) addrs[_sym.name] = sym_offset_in_file if len(syms) > len(addrs): for sym_name in syms.keys(): if not sym_name in addrs.keys(): print(' Failed to find symbol %s' % (sym_name)) print(' Not all symbols found, aborting') return else: print(' All required symbols found'); f.close() if not do_write: print(' Scan-only mode, not writing any changes') return with open(filename, 'r+b') as f: print(' Writing patches to file...') for _sym in addrs.keys(): f.seek(addrs[_sym]) f.write(syms[_sym]) f.close()
def process_file(filename): print('Processing file:', filename) functions = {} with open(filename, 'rb') as f: p_vaddr = [] p_offset = [] p_filesz = [] p_memsz = [] elffile = ELFFile(f) for seg in elffile.iter_segments(): if seg.header.p_type == 'PT_LOAD': #print(seg.header) p_vaddr.append(seg.header.p_vaddr) p_offset.append(seg.header.p_offset) p_memsz.append(seg.header.p_memsz) p_filesz.append(seg.header.p_filesz) # print("p_offset : %d p_vaddr : %d p_memsz : %d p_filesz : %d" %(p_offset,p_vaddr,p_memsz,p_filesz)) section_inter = elffile.get_section_by_name('.interp') section_text = elffile.get_section_by_name('.text') section_fini = elffile.get_section_by_name('.fini') mem = 0 print(elffile['e_shnum']) for i in range(1, elffile['e_shnum']): #print(elffile.get_section(i)) section_numbering = elffile.get_section(i).header.sh_size mem = mem + section_numbering if not section_text: print('Is it rightA?') return if not section_fini: print('Is it right B?') return #print(section_text) #print(elffile.header) #print("All section - :", hex(section_inter.header.sh_offset)) #print("All section - size :", mem) #print("section - text:", hex(section_text.header.sh_offset)) #print("section - text size :", hex(section_text.header.sh_size)) #print("section - fini:", hex(section_fini.header.sh_offset)) #print("section - fini size :", hex(section_fini.header.sh_size)) functions = {'value': section_inter.header.sh_offset, 'size': mem} #print(hex(section_text.header.sh_offset), section_text.header.sh_entsize, section_text.header.sh_size) # print(functions) return functions import pandas as pd
def read_settings(opts): elffile = ELFFile(open(opts.file, "rb")) if elffile.elfclass == 32: capstone_mode = capstone.CS_MODE_32 runtime = Runtime( halfword = WordDesc(size = 2, lg_size = 1, struct = '<H'), word = WordDesc(size = 4, lg_size = 2, struct = '<I'), stack_register = capstone.x86.X86_REG_RBP, heap_register = capstone.x86.X86_REG_RDI, main_register = capstone.x86.X86_REG_RSI, arg_registers = [] ) elif elffile.elfclass == 64: capstone_mode = capstone.CS_MODE_64 runtime = Runtime( halfword = WordDesc(size = 4, lg_size = 2, struct = '<I'), word = WordDesc(size = 8, lg_size = 3, struct = '<Q'), stack_register = capstone.x86.X86_REG_RBP, heap_register = capstone.x86.X86_REG_R12, main_register = capstone.x86.X86_REG_RBX, arg_registers = [capstone.x86.X86_REG_R14, capstone.x86.X86_REG_RSI, capstone.x86.X86_REG_RDI, capstone.x86.X86_REG_R8, capstone.x86.X86_REG_R9] ) settings = Settings( opts = opts, rt = runtime, version = (7, 10, 3), name_to_address = {}, address_to_name = {}, binary = open(opts.file, "rb").read(), capstone = capstone.Cs(capstone.CS_ARCH_X86, capstone_mode), text_offset = elffile.get_section_by_name(b'.text')['sh_offset'] - elffile.get_section_by_name(b'.text')['sh_addr'], data_offset = elffile.get_section_by_name(b'.data')['sh_offset'] - elffile.get_section_by_name(b'.data')['sh_addr'], rodata_offset = elffile.get_section_by_name(b'.rodata')['sh_offset'] - elffile.get_section_by_name(b'.rodata')['sh_addr'] ) symtab = elffile.get_section_by_name(b'.symtab') for sym in symtab.iter_symbols(): try: name = str(sym.name) offset = sym['st_value'] settings.name_to_address[name] = offset settings.address_to_name[offset] = name except: pass settings.capstone.detail = True parsed_version = read_version(settings) if parsed_version != None: settings = settings._replace(version = parsed_version) return settings
def read_settings(opts): elffile = ELFFile(open(opts.file, "rb")) if elffile.elfclass == 32: capstone_mode = capstone.CS_MODE_32 runtime = Runtime( halfword = WordDesc(size = 2, lg_size = 1, struct = '<H'), word = WordDesc(size = 4, lg_size = 2, struct = '<I'), stack_register = capstone.x86.X86_REG_RBP, heap_register = capstone.x86.X86_REG_RDI, main_register = capstone.x86.X86_REG_RSI, arg_registers = [] ) elif elffile.elfclass == 64: capstone_mode = capstone.CS_MODE_64 runtime = Runtime( halfword = WordDesc(size = 4, lg_size = 2, struct = '<I'), word = WordDesc(size = 8, lg_size = 3, struct = '<Q'), stack_register = capstone.x86.X86_REG_RBP, heap_register = capstone.x86.X86_REG_R12, main_register = capstone.x86.X86_REG_RBX, arg_registers = [capstone.x86.X86_REG_R14, capstone.x86.X86_REG_RSI, capstone.x86.X86_REG_RDI, capstone.x86.X86_REG_R8, capstone.x86.X86_REG_R9] ) settings = Settings( opts = opts, rt = runtime, version = (7, 10, 3), name_to_address = {}, address_to_name = {}, binary = open(opts.file, "rb").read(), capstone = capstone.Cs(capstone.CS_ARCH_X86, capstone_mode), text_offset = elffile.get_section_by_name(b'.text')['sh_offset'] - elffile.get_section_by_name(b'.text')['sh_addr'], data_offset = elffile.get_section_by_name(b'.data')['sh_offset'] - elffile.get_section_by_name(b'.data')['sh_addr'], rodata_offset = elffile.get_section_by_name(b'.rodata')['sh_offset'] - elffile.get_section_by_name(b'.rodata')['sh_addr'] ) symtab = elffile.get_section_by_name(b'.symtab') for sym in symtab.iter_symbols(): try: name = str(sym.name, 'ascii') offset = sym['st_value'] settings.name_to_address[name] = offset settings.address_to_name[offset] = name except: pass settings.capstone.detail = True parsed_version = read_version(settings) if parsed_version != None: settings = settings._replace(version = parsed_version) return settings
def get_hex(): """ Get byte encodings corresponding to each source code line """ f = open('a.out', 'rb') info = ELFFile(f) dwarf = info.get_dwarf_info() cu = next(dwarf.iter_CUs()) lp = dwarf.line_program_for_CU(cu).get_entries() textsec = info.get_section_by_name('.text') datas = filter(lambda s: s.name == '$d', info.get_section_by_name('.symtab').iter_symbols()) datas = sorted(map(lambda s: s.entry['st_value'], datas)) datas = filter(lambda s: lp[0].args[0] < s < lp[-1].state.address, datas) datas.append(0) voff = textsec.header['sh_addr'] - textsec.header['sh_offset'] curr_line = 0; curr_data = 0; update_line = False res = [''] * (lp[-1].state.line + 1) f.seek(lp.pop(0).args[0] - voff, os.SEEK_SET) for e in lp: if update_line: curr_line = e.state.line - 1 update_line = False if len(e.args) == 0: curr_line = e.state.line - 1 elif len(e.args) == 1: pc = voff + f.tell() if pc < datas[curr_data] <= pc + e.args[0]: size = datas[curr_data] - pc res[curr_line] += f.read(size) f.seek(e.args[0] - size, os.SEEK_CUR) while pc < datas[curr_data] < pc + e.args[0]: curr_data += 1 update_line = True else: res[curr_line] += f.read(e.args[0]) elif len(e.args) > 1: pc = voff + f.tell() if pc < datas[curr_data] < pc + e.args[1]: size = datas[curr_data] - pc res[curr_line] += f.read(size) f.seek(e.args[1] - size, os.SEEK_CUR) curr_line = e.state.line - 1 while pc < datas[curr_data] < pc + e.args[1]: curr_data += 1 elif e.args[0] == 0: f.seek(e.args[1], os.SEEK_CUR) curr_line = e.state.line - 1 else: res[curr_line] += f.read(e.args[1]) curr_line += e.args[0] f.close() return res
def get_func_addr(self, binary, function_name): with open(binary, "rb") as bin_data: elf = ELFFile(bin_data) sec = elf.get_section_by_name(".symtab") if sec is None: sec = elf.get_section_by_name(".dynsym") symbols_list = sec.get_symbol_by_name(function_name) if symbols_list is None: self.error("{} not found".format(function_name)) exit(1) symbol = symbols_list[0] addr = symbol.entry['st_value'] self.debug("{} addr: {}".format(function_name, hex(addr))) return addr
def __init__(self,f): elf = ELFFile(f) if elf.get_machine_arch() == 'x64': self.bit = 64 elif elf.get_machine_arch() == 'x86': self.bit = 32 self.text = elf.get_section_by_name('.text').data() self.text_addr = elf.get_section_by_name('.text').header['sh_addr'] dynsym = elf.get_section_by_name('.dynsym') dynsym_list = [0] if self.bit == 64: relplt = elf.get_section_by_name('.rela.plt') elif self.bit == 32: relplt = elf.get_section_by_name('.rel.plt') relpltdata = relplt.data() for x in range(0,len(relpltdata),relplt.header['sh_entsize']): tmp = relpltdata[x:x+relplt.header['sh_entsize']] if self.bit == 64: _,_,num,_ = unpack('QIIQ',tmp) elif self.bit == 32: _,num = unpack('II',tmp) num = num >> 8 dynsym_list.append(num) c = 0 for sym in dynsym.iter_symbols(): if c in dynsym_list: dynsym_list[dynsym_list.index(c)] = sym.name c+=1 got_plt = elf.get_section_by_name('.got.plt') got_plt_data = got_plt.data() plt = elf.get_section_by_name('.plt') plt_data = plt.data() self.funcs = {} for n in range(0,len(plt_data),16): tmp = n + plt.header['sh_addr'] self.funcs[tmp] = dynsym_list[n/16] before = None if self.bit == 32: for i in md32.disasm(self.text,self.text_addr): if i and i.mnemonic == 'call' and i.operands[0].imm in self.funcs and self.funcs[i.operands[0].imm] == '__libc_start_main': self.ep = before.operands[0].imm break before = i elif self.bit == 64: for i in md64.disasm(self.text,self.text_addr): if i and i.mnemonic == 'call' and i.operands[0].imm in self.funcs and self.funcs[i.operands[0].imm] == '__libc_start_main': self.ep = before.operands[1].imm break before = i self.rodata = elf.get_section_by_name('.rodata') self.rodata_data = self.rodata.data() f.close()
def parse_elf(self): self.f = open(self.f, 'rb') #read binary form commad line elff = ELFFile(self.f) arch = elff.get_machine_arch() if arch == "x64": cs_arch = CS_ARCH_X86 cs_mode = CS_MODE_64 elif arch == "x86": cs_arch = CS_ARCH_X86 cs_mode = CS_MODE_32 else: print("ELF architecture '%s' currently not supported" % arch) return """ Initialize capstone """ self.md = Cs(cs_arch, cs_mode) s = elff.get_section(1) self.align = s['sh_addr'] - s['sh_offset'] s = elff.get_section_by_name('.plt') if s: print('.plt') self.plt_start, self.plt_end = s[ 'sh_addr'], s['sh_addr'] + s['sh_size'] self.plti = self.plt_start + 16 print('0x%x 0x%x' % (self.plt_start, self.plt_end)) s = elff.get_section_by_name('.dynsym') if s: print(s.name) syms = self.parse_symbols(s) if self.to_look: self.f.close() return syms else: print('No Dynamic Symbols table (.dynsym)') s = elff.get_section_by_name('.symtab') if s: print(s.name) self.parse_symbols(s) else: print('No Symbols Table (.symtab)') else: print('No plt table (.plt)') self.f.close() return self.plts
def encrypt(file_path, key): f = open(file_path, 'rb') elf = ELFFile(f) encrypted_section = elf.get_section_by_name('.encrypted')['sh_addr'] size = elf.get_section_by_name('.encrypted')['sh_size'] f.close() f = open(file_path, 'rb').read() encrypted = f[encrypted_section:encrypted_section + size] if len(encrypted) < len(key): return -1 encrypted = f.replace(encrypted, xor(encrypted, key)) f = open(file_path, 'wb') f.write(encrypted) f.close() return True
def main(): if len(sys.argv) != 2: usage() return elffname = sys.argv[1] if not os.path.exists(elffname): print("Error: file '%s' does not exist" % elffname) return # name + ext basename = os.path.basename(elffname) asmfname = basename + '.asm' # dump source print("creating assembler output") # disassembles Intel style, adds relocation info and uses 'wide' printing os.system("objdump -M intel -drw %s > %s" % (elffname, asmfname)) with open(asmfname, 'r') as asmfh: asm = ASMFile(asmfh) lines = asm.asmlines[:] # deep copy outputfname = basename + '.annotated.asm' print("writing merged asm") with open(outputfname, 'w') as fw: with open(elffname, 'rb') as fh: elffile = ELFFile(fh) sec = None if len(asm.roreloclines): # we have relocation information for secname, addr, idx in asm.roreloclines: sec = elffile.get_section_by_name(secname) if not sec: print("warning: assembler file states offset from symbol '%s' which is not a valid ELF section" % secname) else: adddbgstr(sec, addr, idx, lines) else: # no relocation information present, so try to find information in load opcodes for addr, idx in asm.loadaddrlines: sec = elffile.get_section_by_name('.rodata') if not sec: print("unable to find a section named '.rodata' in elf file, probably different name or no read-only data present") else: adddbgstr(sec, addr, idx, lines) for l in lines: fw.write(l + '\n')
def process_file(stream, isNorm): global normAddresses global normSizes global divAddresses global divSizes elffile = ELFFile(stream) section = elffile.get_section_by_name(b'.symtab') if not section: print('ERROR: No symbol table found. Perhaps this ELF has been stripped?') sys.exit(0) # bytes2str is used to print the name of the section for consistency of # output between Python 2 and 3. The section name is a bytes object. if isinstance(section, SymbolTableSection): num_symbols = section.num_symbols() for i in range(0,num_symbols): #TODO UNIQUE ADDRESSES... if (section.get_symbol(i).entry['st_value'] == 0): continue if (section.get_symbol(i).name.find(divLabel) == -1): continue #Only valid Labels #print('%s : %x' % (section.get_symbol(i).name, section.get_symbol(i).entry['st_value']) ) if (isNorm): normAddresses[section.get_symbol(i).name] = section.get_symbol(i).entry['st_value'] else: divAddresses[section.get_symbol(i).name] = section.get_symbol(i).entry['st_value']
def main(): parser = argparse.ArgumentParser() parser.add_argument("input", help="The input ELF file", type=argparse.FileType("rb")) parser.add_argument("-j", "--javascript", help="Save shellcode to javascript file", type=argparse.FileType("w+")) parser.add_argument("-r", "--raw", help="Save shellcode to raw binary file", type=argparse.FileType("wb+")) parser.add_argument("-c", "--cstub", help="Save shellcode to a C stub test file", type=argparse.FileType("wb+")) args = parser.parse_args() elffile = ELFFile(args.input) text = elffile.get_section_by_name(".text") data = text.data() if args.javascript is None and args.raw is None and args.cstub is None: sys.stderr.write("No output file(s) specified, nothing to do.\n") return if args.javascript is not None: write_js(args.javascript, data) if args.raw is not None: write_raw(args.raw, data) if args.cstub is not None: write_c(args.cstub, data)
def assertImageContainsSection(self, image, elf, section_name): """ Assert an esptool binary image object contains the data for a particular ELF section. """ with open(elf, "rb") as f: e = ELFFile(f) section = e.get_section_by_name(section_name) self.assertTrue(section, "%s should be in the ELF" % section_name) sh_addr = section.header.sh_addr data = section.data() # section contents may be smeared across multiple image segments, # so look through each segment and remove it from ELF section 'data' # as we find it in the image segments. When we're done 'data' should # all be accounted for for seg in sorted(image.segments, key=lambda s: s.addr): print("comparing seg 0x%x sec 0x%x len 0x%x" % (seg.addr, sh_addr, len(data))) if seg.addr == sh_addr: overlap_len = min(len(seg.data), len(data)) self.assertEqual( data[:overlap_len], seg.data[:overlap_len], "ELF '%s' section has mis-matching binary image data" % section_name) sh_addr += overlap_len data = data[overlap_len:] # no bytes in 'data' should be left unmatched self.assertEqual( 0, len(data), "ELF %s section '%s' has no encompassing segment(s) in binary image (image segments: %s)" % (elf, section_name, image.segments))
def _test_elf2image(self, elfpath, binpath): try: self.run_elf2image("esp8266", elfpath, 2) image = esptool.LoadFirmwareImage("esp8266", binpath) self.assertEqual(4, len(image.segments)) self.assertImageContainsSection(image, elfpath, ".data") self.assertImageContainsSection(image, elfpath, ".text") self.assertImageContainsSection(image, elfpath, ".rodata") irom_segment = image.segments[0] self.assertEqual(0, irom_segment.addr, "IROM segment 'load address' should be zero") with open(elfpath, "rb") as f: e = ELFFile(f) sh_size = (e.get_section_by_name(".irom0.text").header.sh_size + 15) & ~15 self.assertEqual( len(irom_segment.data), sh_size, "irom segment (0x%x) should be same size (16 padded) as .irom0.text section (0x%x)" % (len(irom_segment.data), sh_size)) # check V2 CRC (for ESP8266 SDK bootloader) with open(binpath, "rb") as f: f.seek(-4, os.SEEK_END) image_len = f.tell() crc_stored = struct.unpack("<I", f.read(4))[0] f.seek(0) crc_calc = esptool.esp8266_crc32(f.read(image_len)) self.assertEqual(crc_stored, crc_calc) # test imageinfo doesn't fail self.assertImageInfo(binpath) finally: try_delete(binpath)
def _test_elf2image(self, elfpath, binpath): try: self.run_elf2image("esp8266", elfpath, 2) image = esptool.LoadFirmwareImage("esp8266", binpath) self.assertEqual(4, len(image.segments)) self.assertImageContainsSection(image, elfpath, ".data") self.assertImageContainsSection(image, elfpath, ".text") self.assertImageContainsSection(image, elfpath, ".rodata") irom_segment = image.segments[0] self.assertEqual(0, irom_segment.addr, "IROM segment 'load address' should be zero") with open(elfpath, "rb") as f: e = ELFFile(f) sh_size = (e.get_section_by_name(".irom0.text").header.sh_size + 15) & ~15 self.assertEqual(len(irom_segment.data), sh_size, "irom segment (0x%x) should be same size (16 padded) as .irom0.text section (0x%x)" % (len(irom_segment.data), sh_size)) # check V2 CRC (for ESP8266 SDK bootloader) with open(binpath, "rb") as f: f.seek(-4, os.SEEK_END) image_len = f.tell() crc_stored = struct.unpack("<I", f.read(4))[0] f.seek(0) crc_calc = esptool.esp8266_crc32(f.read(image_len)) self.assertEqual(crc_stored, crc_calc) # test imageinfo doesn't fail self.assertImageInfo(binpath) finally: try_delete(binpath)
def assertImageContainsSection(self, image, elf, section_name): """ Assert an esptool binary image object contains the data for a particular ELF section. """ with open(elf, "rb") as f: e = ELFFile(f) section = e.get_section_by_name(section_name) self.assertTrue(section, "%s should be in the ELF" % section_name) sh_addr = section.header.sh_addr data = section.data() # section contents may be smeared across multiple image segments, # so look through each segment and remove it from ELF section 'data' # as we find it in the image segments. When we're done 'data' should # all be accounted for for seg in sorted(image.segments, key=lambda s:s.addr): print("comparing seg 0x%x sec 0x%x len 0x%x" % (seg.addr, sh_addr, len(data))) if seg.addr == sh_addr: overlap_len = min(len(seg.data), len(data)) self.assertEqual(data[:overlap_len], seg.data[:overlap_len], "ELF '%s' section has mis-matching binary image data" % section_name) sh_addr += overlap_len data = data[overlap_len:] # no bytes in 'data' should be left unmatched self.assertEqual(0, len(data), "ELF %s section '%s' has no encompassing segment(s) in binary image (image segments: %s)" % (elf, section_name, image.segments))
class FreezingBear(object): def __init__(self, stream): self.stream = stream self.elf = ELFFile(stream) self.by_offset = {} self.by_class = defaultdict(lambda: []) self.get_classlist() def lookup(self, offset, cls=None): if offset not in self.by_offset: if cls is None: raise KeyError, offset obj = cls(self, offset) self.by_offset[offset] = obj self.by_class[cls].append(obj) return self.by_offset[offset] def read(self, n=1): return struct.unpack('I'*n, self.stream.read(4*n)) def read_at(self, offset, n=1): self.stream.seek(offset) return self.read(n) def get_classlist(self): section = self.elf.get_section_by_name(classlist_section_name) return self.lookup(section.header.sh_offset, ClassList)
def test_irom_bin(self): with open(self.ELF, "rb") as f: e = ELFFile(f) irom_section = e.get_section_by_name(".irom0.text") self.assertEqual(irom_section.header.sh_size, os.stat(self.BIN_IROM).st_size, "IROM raw binary file should be same length as .irom0.text section")
def section_info_highlevel(stream): print('High level API...') elffile = ELFFile(stream) # Just use the public methods of ELFFile to get what we need # Note that section names, like everything read from the file, are bytes # objects. print(' %s sections' % elffile.num_sections()) section = elffile.get_section_by_name(b'.symtab') if not section: print(' No symbol table found. Perhaps this ELF has been stripped?') return # A section type is in its header, but the name was decoded and placed in # a public attribute. # bytes2str is used to print the name of the section for consistency of # output between Python 2 and 3. The section name is a bytes object. print(' Section name: %s, type: %s' %( bytes2str(section.name), section['sh_type'])) # But there's more... If this section is a symbol table section (which is # the case in the sample ELF file that comes with the examples), we can # get some more information about it. if isinstance(section, SymbolTableSection): num_symbols = section.num_symbols() print(" It's a symbol section with %s symbols" % num_symbols) print(" The name of the last symbol in the section is: %s" % ( bytes2str(section.get_symbol(num_symbols - 1).name)))
def parse(self, binary, libclang_path, srcdir=None): print(" * Checking dependencies") self.deps = [] self.protos = dict() with open(binary, "rb") as f: elf_file = ELFFile(f) dynamic = elf_file.get_section_by_name(".dynamic") for tag in dynamic.iter_tags("DT_NEEDED"): print(" Found dependency {}".format(tag.needed)) self.deps.append(tag.needed) for dep in self.deps: dep_data = Data(self.dstdir, dep) dep_data.load(False) self.protos.update(dep_data.protos) self.protos_without_libs = dict() if srcdir != None: print(" * Extracting data from source code") extractor = ClangExtractor(libclang_path, srcdir) self.protos_without_libs.update(extractor.extract()) print(" * Extracting data from binary debug informations") extractor = DwarfExtractor() # self.protos_without_libs.update(extractor.extract(binary)) self.protos.update(self.protos_without_libs)
def library_to_sqlalchemy(filepath, filename=None): with open(filepath) as fileobj: elf_data = fileobj.read() checksum = hash_algo(elf_data).hexdigest() if filename is None: filename = os.path.basename(filepath) elf = ELFFile(StringIO(elf_data)) library = Library(name=filename, checksum=checksum, filepath=filepath, elfclass=elf.elfclass, machine_arch=elf.get_machine_arch()) symtab = elf.get_section_by_name(".symtab") dynsym = elf.get_section_by_name(".dynsym") if not symtab and not dynsym: raise Exception("No symbol table found") elif symtab and dynsym: symbols = chain(symtab.iter_symbols(), dynsym.iter_symbols()) elif symtab: symbols = symtab.iter_symbols() else: symbols = dynsym.iter_symbols() seen_symbols = set() symbol_entities = [] for symbol in symbols: if not symbol.name or not symbol.entry["st_value"] or \ symbol.name in seen_symbols: continue symbol_entities.append(Symbol(name=symbol.name, addr=symbol.entry["st_value"], library=library)) seen_symbols.add(symbol.name) return library, symbol_entities
def get_otbn_syms(elf_path: str) -> List[Tuple[str, int]]: '''Get externally-visible symbols from an ELF Symbols are returned as a list of triples: (name, address). This discards locals and also anything in .scratchpad, since those addresses aren't bus-accessible. ''' with tempfile.TemporaryDirectory() as tmpdir: # First, run objcopy to discard local symbols and the .scratchpad # section. We also use --extract-symbol since we don't care about # anything but the symbol data anyway. syms_path = os.path.join(tmpdir, 'syms.elf') call_rv32_objcopy([ '-O', 'elf32-littleriscv', '--remove-section=.scratchpad', '--extract-symbol' ] + [elf_path, syms_path]) # Load the file and use elftools to grab any symbol table with open(syms_path, 'rb') as syms_fd: syms_file = ELFFile(syms_fd) symtab = syms_file.get_section_by_name('.symtab') if symtab is None or not isinstance(symtab, SymbolTableSection): # No symbol table found or we did find a section called # .symtab, but it isn't actually a symbol table (huh?!). Give # up. return [] ret = [] for sym in symtab.iter_symbols(): if sym['st_info']['bind'] != 'STB_GLOBAL': continue addr = sym['st_value'] assert isinstance(addr, int) ret.append((sym.name, addr)) return ret
def disasm_elf(self, seg_name='.text'): def disasm(ql, address, size): md = ql.create_disassembler() md.detail = True return md.disasm(ql.mem.read(address, size), address) disasm_result = [] if self.ql._archtype == QL_ARCH.X86: BASE = int(self.ql.profile.get("OS32", "load_address"), 16) seg_start = 0x0 seg_end = 0x0 f = open(self.ql.path, 'rb') elffile = ELFFile(f) elf_header = elffile.header reladyn = elffile.get_section_by_name(seg_name) # No PIE if elf_header['e_type'] == 'ET_EXEC': seg_start = reladyn.header.sh_addr seg_end = seg_start + reladyn.data_size # PIE elif elf_header['e_type'] == 'ET_DYN': seg_start = BASE + reladyn.header.sh_addr seg_end = seg_start + reladyn.data_size for insn in disasm(ql, seg_start, seg_end - seg_start): disasm_result.append(insn) return disasm_result
def calculate_symbol_versions(libraries, symbol_versions, arch): calculated_symbol_versions = {k: set() for k in symbol_versions} prefixes = ["/lib", "/usr/lib"] if arch == "64bit": prefixes = [p + "64" for p in prefixes] for library in libraries: library_path = find_library(library) with open(library_path, "rb") as f: e = ELFFile(f) section = e.get_section_by_name(".gnu.version_d") if section: for _, verdef_iter in section.iter_versions(): for vernaux in verdef_iter: for symbol_name in symbol_versions: try: name, version = vernaux.name.split("_", 1) except ValueError: pass if ( name in calculated_symbol_versions and version != "PRIVATE" ): calculated_symbol_versions[name].add(version) return {k: sorted(v, key=versionify) for k, v in calculated_symbol_versions.items()}
def section_info_highlevel(stream): print('High level API...') elffile = ELFFile(stream) # Just use the public methods of ELFFile to get what we need # Note that section names are strings. print(' {0!s} sections'.format(elffile.num_sections())) section = elffile.get_section_by_name('.symtab') if not section: print(' No symbol table found. Perhaps this ELF has been stripped?') return # A section type is in its header, but the name was decoded and placed in # a public attribute. print(' Section name: {0!s}, type: {1!s}'.format( section.name, section['sh_type'])) # But there's more... If this section is a symbol table section (which is # the case in the sample ELF file that comes with the examples), we can # get some more information about it. if isinstance(section, SymbolTableSection): num_symbols = section.num_symbols() print(" It's a symbol section with {0!s} symbols".format(num_symbols)) print(" The name of the last symbol in the section is: {0!s}".format(( section.get_symbol(num_symbols - 1).name)))
def process_file(self, filename): #print('Processing file: ', filename) with open(filename, 'rb') as f: elffile = ELFFile(f) #Find the entry point #print('Entry Point: ', elffile.header.e_entry) #self.entry_point = elffile.header.e_entry #lt:this fails on gcc o2&3. Entry point and text section don't #don't match #Find the section associated with the entry point #entry_section_i = self.find_section_by_addr(elffile, f, # self.entry_point) #if not entry_section_i: # print('Entry section not found. Perhaps the sample is obfuscated?') # return #entry_section = elffile.get_section(entry_section_i) entry_section = elffile.get_section_by_name('.text') #print('Entry section found: ', entry_section.name) self.entry_point = entry_section['sh_addr'] self.entry_end = self.entry_point + entry_section['sh_size'] #Find the PLT section #plt_section = elffile.get_section_by_name('.plt') #if not plt_section: # pass # #print('PLT section not found. Jump reasoning degraded') #else: # pass # #print('PLT section found.') #find symtab and create address to symbol dictionary symtab_section = elffile.get_section_by_name('.symtab') if not symtab_section: print('No symbol table found. Perhaps binary stripped') if isinstance(symtab_section, SymbolTableSection): num_symbols = symtab_section.num_symbols() for s in range(1,num_symbols): sym = symtab_section.get_symbol(s) self.addr_to_sym[sym['st_value']] = sym.name #copy out the entry section f.seek(entry_section['sh_offset']) self.code = f.read(entry_section['sh_size'])
def process_file(filename): with open(filename, 'rb') as f: elffile = ELFFile(f) # elfclass is a public attribute of ELFFile, read from its header print('%s: elfclass is %s' % (filename, elffile.elfclass)) print(elffile.get_machine_arch()) ss = elffile.get_section_by_name('.text') print ss
def get_section_bounds(section_name): section_name = section_name.encode('ascii') with open(pwndbg.proc.exe, 'rb') as f: elffile = ELFFile(f) section = elffile.get_section_by_name(section_name) start = section['sh_addr'] size = section['sh_size'] return (start, start + size)
def elf_find_ucs2_symbols(elf: ELFFile) -> Iterator[str]: section = elf.get_section_by_name('.dynsym') if section is not None: # look for UCS2 symbols that are externally referenced for sym in section.iter_symbols(): if ('PyUnicodeUCS2_' in sym.name and sym['st_shndx'] == 'SHN_UNDEF' and sym['st_info']['type'] == 'STT_FUNC'): yield sym.name
def getRawShellcode(f,outFile,section=".text"): try: e = ELFFile(f) s = e.get_section_by_name(section) fileName = outFile + ".raw" f = open(fileName,'w') f.write(s.data()) except Exception, e: print e exit(0)
def test_reloc(self): test_dir = os.path.join('test', 'testfiles_for_unittests') with open(os.path.join(test_dir, 'arm_reloc_unrelocated.o'), 'rb') as rel_f, \ open(os.path.join(test_dir, 'arm_reloc_relocated.elf'), 'rb') as f: rel_elf = ELFFile(rel_f) elf = ELFFile(f) # Comparison of '.text' section data self.assertEquals(do_relocation(rel_elf), elf.get_section_by_name('.text').data())
def elf_find_versioned_symbols(elf: ELFFile) -> Iterator[Tuple[str, str]]: section = elf.get_section_by_name(b'.gnu.version_r') if section is None: return [] for verneed, verneed_iter in section.iter_versions(): if verneed.name.decode('utf-8').startswith('ld-linux'): continue for vernaux in verneed_iter: yield (verneed.name.decode('utf-8'), vernaux.name.decode('utf-8'))
def elf_references_PyFPE_jbuf(elf: ELFFile) -> bool: offending_symbol_names = ('PyFPE_jbuf', 'PyFPE_dummy', 'PyFPE_counter') section = elf.get_section_by_name('.dynsym') if section is not None: # look for symbols that are externally referenced for sym in section.iter_symbols(): if (sym.name in offending_symbol_names and sym['st_shndx'] == 'SHN_UNDEF' and sym['st_info']['type'] in ('STT_FUNC', 'STT_NOTYPE')): return True return False
def _test_SUNW_ldynsym_section_generic(self, testfile, reference_data): with open(os.path.join('test', 'testfiles_for_unittests', testfile), 'rb') as f: elf = ELFFile(f) ldynsym_section = elf.get_section_by_name(b'.SUNW_ldynsym') self.assertIsNotNone(ldynsym_section) for symbol, ref_symbol_name in zip( ldynsym_section.iter_symbols(), reference_data): self.assertEqual(symbol.name, ref_symbol_name)
def fragment_sizes(fp): ef = ELFFile(fp) text_size = ef.get_section_by_name(b'.text').header.sh_size rels = ef.get_section_by_name(b'.rela.text') if rels is None: # no relocations for .text yield text_size return offsets = [r.entry.r_offset for r in rels.iter_relocations()] offsets.sort() start = 0 for offset in offsets: if offset-start >= 1: yield offset-start start = offset+1 if text_size-1-start >= 1: yield text_size-1-start