class ReadElf(object): def __init__(self, file): self.file = ELFFile(file) def get_symbol_tables(self): symbols = {} for section in self.file.iter_sections(): if isinstance(section, SymbolTableSection) and section['sh_entsize']: for nsym, symbol in enumerate(section.iter_symbols()): sym_name = bytes2str(symbol.name) if sym_name in ['KEY_XOR', 'C_CC_HOST', 'C_CC_URI']: symbols[sym_name] = \ ( symbol['st_value'] if self.file.elfclass == 32 else (symbol['st_value'] % 65536), symbol['st_size'] ) return symbols def get_soname(self): so_name = None for section in self.file.iter_sections(): if isinstance(section, DynamicSection): for tag in section.iter_tags(): if tag.entry.d_tag == 'DT_SONAME': so_name = bytes2str(tag.soname) return so_name
def sendElfFile(progAddress, typeProg): #takes information from elf file and send through USB print "Reading data from ELF file", fileAddr[currentLoc] with open(fileAddr[currentLoc], 'r') as f: # get the data elffile = ELFFile(f) print 'SECTIONS FOUND IN FILE:' #prints all the sections found for s in elffile.iter_sections(): print "[%d] %s %s start addr:0x%06x size:%d offs:%d" %( s.header['sh_name'], s.name, s.header['sh_type'], s.header['sh_addr'], s.header['sh_size'], s.header['sh_offset']) print "" # prepares the memory #flashMemory = bytearray(flashsize) flashMemory = bytearray([]) #fills the bytearray with 0xff for i in range(0, flashsize): flashMemory.append(0xff) for s in elffile.iter_sections(): if (s.header['sh_type'] == 'SHT_PROGBITS' and s.header['sh_addr'] < 0xf80000): addr = s.header['sh_addr'] if s.name == '.const': addr = addr - 0x8000 #eliminates an error, I don't know why size = s.header['sh_size'] val = s.data() print '%s section from 0x%06x to 0x%06x appended to file' %(s.name, addr, (addr - 1 + size/2)) flashMemory[addr*2 : addr*2 + size] = val #creates an array with proper format progMemory = [[0xffff for x in xrange(RowSize*2)] for x in xrange(171)] #171 #512*2*171=175104-->0x2ac00 for i in xrange(0, 171): a = i * 0x400 * 2 for j in xrange (0, RowSize*2): progMemory[i][j] = (flashMemory[a + j*2] << 8) | flashMemory[a + j*2 + 1] #sends data sendData(progMemory, progAddress, typeProg)
def load_binary(static): elf = ELFFile(open(static.path)) # TODO: replace with elf['e_machine'] progdat = open(static.path).read(0x20) fb = struct.unpack("H", progdat[0x12:0x14])[0] # e_machine static['arch'] = get_arch(fb) static['entry'] = elf['e_entry'] ncount = 0 for section in elf.iter_sections(): addr = section['sh_addr'] slen = section['sh_size'] if addr != 0 and slen > 0: static.add_memory_chunk(addr, section.data()) if isinstance(section, RelocationSection): symtable = elf.get_section(section['sh_link']) for rel in section.iter_relocations(): symbol = symtable.get_symbol(rel['r_info_sym']) if static.debug >= 1: #suppress output for testing print "Relocation",rel, symbol.name if rel['r_offset'] != 0 and symbol.name != "": static[rel['r_offset']]['name'] = "__"+symbol.name ncount += 1 if isinstance(section, SymbolTableSection): for nsym, symbol in enumerate(section.iter_symbols()): if symbol['st_value'] != 0 and symbol.name != "" and symbol['st_info']['type'] == "STT_FUNC": if static.debug >= 1: print "Symbol",symbol['st_value'], symbol.name static[symbol['st_value']]['name'] = symbol.name ncount += 1 if static.debug >= 1: print "** found %d names" % ncount
def fill_hmac_sections(file): elf_file = ELFFile(file) keys = {} shutil.copy(args.in_file, args.out_file) with open(args.out_file, 'r+') as out_file: for section in elf_file.iter_sections(): match = re.match(r'.data.spm.(\w+).hmac.(\w+)', section.name) if match: caller = match.group(1) callee = match.group(2) if not caller in keys: keys[caller] = get_spm_key(file, caller, args.key, False) info('Key used for SPM {}: {}' .format(caller, keys[caller].encode('hex'))) try: hmac = get_spm_hmac(file, callee, keys[caller], False) info('HMAC of {} used by {}: {}' .format(callee, caller, hmac.encode('hex'))) out_file.seek(section['sh_offset']) out_file.write(hmac) except ValueError: # FIXME: this is a compiler bug workaround warning('Not adding HMAC for call to unknown SPM {}' .format(callee))
def _find_deps(self, filename, recursive): libs = [] filename = self._get_abs_path(filename) with open(filename, 'rb') as f: try: elf = ELFFile(f) for section in elf.iter_sections(): if not isinstance(section, DynamicSection): continue for tag in section.iter_tags(): if tag.entry.d_tag == 'DT_NEEDED': lib = bytes2str(tag.needed) libs.append(lib) except ELFError: raise if self._with_full_path: libs = [self._get_abs_path(l) for l in libs] deps = [(filename, libs)] else: deps = [(os.path.basename(filename), libs)] if recursive: for lib in libs: deps += self._find_deps(lib, recursive) return deps
def _dump_elf(cls, buf): """ Dump the symbol table of an ELF file. Needs pyelftools (https://github.com/eliben/pyelftools) """ from elftools.elf.elffile import ELFFile from elftools.elf import descriptions from io import BytesIO f = ELFFile(BytesIO(buf)) print("ELF file:") for sec in f.iter_sections(): if sec['sh_type'] == 'SHT_SYMTAB': symbols = sorted(sec.iter_symbols(), key=lambda sym: sym.name) print(" symbols:") for sym in symbols: if not sym.name: continue print(" - %r: size=%d, value=0x%x, type=%s, bind=%s" % (sym.name.decode(), sym['st_size'], sym['st_value'], descriptions.describe_symbol_type(sym['st_info']['type']), descriptions.describe_symbol_bind(sym['st_info']['bind']), )) print()
def test_verneed_section(self): reference_data = TestSymbolVersioning.verneed_reference_data with open(os.path.join('test', 'testfiles_for_unittests', 'lib_versioned64.so.1.elf'), 'rb') as f: elf = ELFFile(f) verneed_section = None for section in elf.iter_sections(): if isinstance(section, GNUVerNeedSection): verneed_section = section break self.assertIsNotNone(verneed_section) for (verneed, vernaux_iter), ref_verneed in zip( section.iter_versions(), reference_data): self.assertEqual(verneed.name, ref_verneed['name']) self.assertEqual(verneed['vn_cnt'], ref_verneed['vn_cnt']) self.assertEqual(verneed['vn_version'], ref_verneed['vn_version']) for vernaux, ref_vernaux in zip( vernaux_iter, ref_verneed['vernaux']): self.assertEqual(vernaux.name, ref_vernaux['name']) self.assertEqual(vernaux['vna_flags'], ref_vernaux['vna_flags']) self.assertEqual(vernaux['vna_other'], ref_vernaux['vna_other'])
def runCode(self, fileName, parameters=None, stackSize=2048): message = LedgerWalletProxyRequest() f = open(fileName, 'rb') elffile = ELFFile(f) for section in elffile.iter_sections(): if section.name == '.ledger': message.startCode.signature = section.data()[0:ord(section.data()[1]) + 2] break if len(message.startCode.signature) == 0: raise Exception("Missing code signature") message.startCode.stackSize = stackSize message.startCode.entryPoint = elffile.header['e_entry'] message.startCode.parameters = parameters for segment in elffile.iter_segments(): if segment['p_type'] == 'PT_LOAD': codeRange = message.startCode.code.add() flags = 0 if ((segment['p_flags'] & P_FLAGS.PF_W) == 0): flags = flags | 0x01 codeRange.flags = flags codeRange.start = segment['p_vaddr'] codeRange.end = segment['p_vaddr'] + segment['p_memsz'] codeRange.dataLength = segment['p_filesz'] codeRange.data = segment.data() response = self.transport.exchange(message) while response.HasField('logAck'): print response.logAck.message message = LedgerWalletProxyRequest() message.resumeCode.CopyFrom(ResumeCode()) response = self.transport.exchange(message) if response.HasField('startCodeResponseAck'): return response.startCodeResponseAck.response else: raise ProxyException("Unexpected response", response)
class ReadElf(object): def __init__(self, file): """ file: stream object with the ELF file to read """ self.elffile = ELFFile(file) def elf_class(self): """ Return the ELF Class """ header = self.elffile.header e_ident = header['e_ident'] return describe_ei_class(e_ident['EI_CLASS']) def dynamic_dt_needed(self): """ Return a list of the DT_NEEDED """ dt_needed = [] for section in self.elffile.iter_sections(): if not isinstance(section, DynamicSection): continue for tag in section.iter_tags(): if tag.entry.d_tag == 'DT_NEEDED': dt_needed.append(bytes2str(tag.needed)) #sys.stdout.write('\t%s\n' % bytes2str(tag.needed) ) return dt_needed
def get_elf_info(filepath): """ Parse and return ELFInfo. Adds various calculated properties to the ELF header, segments and sections. Such added properties are those with prefix 'x_' in the returned dicts. """ local_path = pwndbg.file.get_file(filepath) with open(local_path, 'rb') as f: elffile = ELFFile(f) header = dict(elffile.header) segments = [] for seg in elffile.iter_segments(): s = dict(seg.header) s['x_perms'] = [ mnemonic for mask, mnemonic in [(PF_R, 'read'), (PF_W, 'write'), (PF_X, 'execute')] if s['p_flags'] & mask != 0 ] # end of memory backing s['x_vaddr_mem_end'] = s['p_vaddr'] + s['p_memsz'] # end of file backing s['x_vaddr_file_end'] = s['p_vaddr'] + s['p_filesz'] segments.append(s) sections = [] for sec in elffile.iter_sections(): s = dict(sec.header) s['x_name'] = sec.name s['x_addr_mem_end'] = s['x_addr_file_end'] = s['sh_addr'] + s['sh_size'] sections.append(s) return ELFInfo(header, sections, segments)
def __init__(self, elf): self.__glb = defaultdict(dict) self.__loc = defaultdict(lambda : defaultdict(dict)) self.__map = [] cur_file = '' with open(elf, 'rb') as f: ef = ELFFile(f) for section in ef.iter_sections(): if isinstance(section, SymbolTableSection): for symbol in section.iter_symbols(): if symbol['st_other']['visibility'] == 'STV_DEFAULT': stype = symbol['st_info']['type'][4:].lower() sbind = symbol['st_info']['bind'] if stype == 'file': cur_file = symbol.name elif stype != 'notype': scope = '' if sbind == 'STB_LOCAL': scope = cur_file + ':' self.__loc[cur_file][stype][symbol.name] = symbol['st_value'] elif sbind == 'STB_GLOBAL': self.__glb[stype][symbol.name] = symbol['st_value'] self.__map.append([ symbol['st_value'], symbol['st_size'], '{0}{1}'.format(scope, symbol.name)]) self.__map.sort(key=itemgetter(0)) self.__idx = [i[0] for i in self.__map]
def load_symbols_elf(filename): """ Load the symbol tables contained in the file """ f = open(filename, 'rb') elffile = ELFFile(f) symbols = [] for section in elffile.iter_sections(): if not isinstance(section, SymbolTableSection): continue if section['sh_entsize'] == 0: logger.warn("Symbol table {} has a sh_entsize of zero.".format(section.name)) continue logger.info("Symbol table {} contains {} entries.".format(section.name, section.num_symbols())) for _, symbol in enumerate(section.iter_symbols()): if describe_symbol_shndx(symbol['st_shndx']) != "UND" and \ describe_symbol_type(symbol['st_info']['type']) == "FUNC": symbols.append((symbol['st_value'], symbol['st_size'], symbol.name)) f.close() symbols_by_addr = { addr: (name, size, True) for addr, size, name in symbols } return symbols_by_addr
def load_file(self, filename=None): if filename is None: raise FormatError("Filename not specified.") try: elffile = ELFFile(open(filename, 'rb')) except: raise FormatError("Could not open ELF file \"%s\"." % filename) section_lma_map = dict() for section in elffile.iter_sections(): if section["sh_type"] != "SHT_PROGBITS": continue if section["sh_flags"] & SH_FLAGS.SHF_ALLOC: for segment in elffile.iter_segments(): if segment.section_in_segment(section): if not segment in section_lma_map: section_lma_map[segment] = segment["p_paddr"] segment_offset = section_lma_map[segment] section_lma_map[segment] += section["sh_size"] break new_section = FormatELF_Section(section, segment_offset) section_name = section.name[1 : ] self.sections[section_name] = new_section if len(self.sections) == 0: raise FormatError("ELF file \"%s\" contains no data." % filename)
def get_relocations(fd): """ Return a dict with the relocations contained in a file """ elffile = ELFFile(fd) relocations = {} has_relocation_sections = False for section in elffile.iter_sections(): if not isinstance(section, RelocationSection): continue has_relocation_sections = True # The symbol table section pointed to in sh_link symtable = elffile.get_section(section['sh_link']) for rel in section.iter_relocations(): offset = rel['r_offset'] symbol = symtable.get_symbol(rel['r_info_sym']) # Some symbols have zero 'st_name', so instead what's used is # the name of the section they point at if symbol['st_name'] == 0: symsec = elffile.get_section(symbol['st_shndx']) symbol_name = symsec.name else: symbol_name = symbol.name relocations[offset] = bytes2str(symbol_name) return relocations
def parse(self, section_name): with open(self.path, 'rb') as f: elffile = ELFFile(f) for section in elffile.iter_sections(): name = section.name.decode('utf-8') if name == section_name: self.section_size = section['sh_size']
def process_file(filename): print('In file:', filename) with open(filename, 'rb') as f: elffile = ELFFile(f) for section in elffile.iter_sections(): if section.name.startswith('.debug'): print(' ' + section.name)
def get_dwz(dwz, offset): elffile = ELFFile(dwz) for section in elffile.iter_sections(): name = bytes2str(section.name) if name == ".debug_str": data = section.data() end = data[offset:].find(b"\x00") return data[offset:offset + end]
def process_file(filename): print("In file:", filename) with open(filename, "rb") as f: elffile = ELFFile(f) for section in elffile.iter_sections(): # Section names are bytes objects if section.name.startswith(b".debug"): print(" " + bytes2str(section.name))
def load_elf(self, f): elf = ELFFile(f) self.entrypoint = elf.header['e_entry'] for sec in elf.iter_sections(): if sec['sh_type'] == 'SHT_PROGBITS' and (sec['sh_flags'] & SH_FLAGS.SHF_ALLOC): addr = sec['sh_addr'] data = sec.data() self.add_section(addr, data)
def get_dwz(path, offset): with open(path, "rb") as f: elffile = ELFFile(f) for section in elffile.iter_sections(): name = bytes2str(section.name) if name == ".debug_str": data = section.data() end = data[offset:].find(b"\x00") return data[offset:offset + end]
def get_producer(path): with open(path, "rb") as f: elffile = ELFFile(f) dwarfinfo = elffile.get_dwarf_info() for CU in dwarfinfo.iter_CUs(): # DWARFInfo allows to iterate over the compile units # contained in the .debug_info section. CU is a CompileUnit # object, with some computed attributes (such as its offset # in the section) and a header which conforms to the DWARF # standard. The access to header elements is, as usual, via # item-lookup. # print(' Found a compile unit at offset %s, length %s' % ( # CU.cu_offset, CU['unit_length'])) # Start with the top DIE, the root for this CU's DIE tree top_DIE = CU.get_top_DIE() try: attrs = top_DIE.attributes['DW_AT_producer'] if attrs.form == 'DW_FORM_GNU_strp_alt': # DWARF extensions elfutils recognizes/supports are # described at, # # https://fedorahosted.org/elfutils/wiki/DwarfExtensions # # You can find the alt dwz file by reading the # .gnu_debugaltlink section. Which contains a file name # followed by the build-id of the dwz file. The build-id # symlink will point at the /usr/lib/debug/.dwz/ file. # # export nm=".gnu_debugaltlink" # objdump -s -j $nm /usr/lib/debug/.build-id/XY/34...debug # print("DWZ has the string!") # # DW_FORM_GNU_ref_alt is like DW_FORM_ref, but it refers to # an offset in the .dwz file, not in the main file. # DW_FORM_GNU_strp_alt is like DW_FORM_strp, but it refers # to a string in the .dwz file, not in the main file. for section in elffile.iter_sections(): name = bytes2str(section.name) if name == ".gnu_debugaltlink": data = section.data() fdata = data[0:data.find(b"\x00")] i = fdata.find(".dwz/") rpath = os.path.join("/usr/lib/debug/", fdata[i:].decode("utf-8")) # offset in alternate (.dwz/...)'s .debug_str" return get_dwz(rpath, offset=attrs.value) elif attrs.form == 'DW_FORM_strp': # lucky ;) return attrs.value else: assert 0 except: pass
def test_dynamic_section_solaris(self): """Verify that we can parse relocations from the .dynamic section""" test_dir = os.path.join('test', 'testfiles_for_unittests') with open(os.path.join(test_dir, 'exe_solaris32_cc.elf'), 'rb') as f: elff = ELFFile(f) for sect in elff.iter_sections(): if isinstance(sect, DynamicSection): relos = sect.get_relocation_tables() self.assertEqual(set(relos), {'JMPREL', 'REL'})
def main(): for name in argv[1:]: with open(name, 'rb') as stream: elf = ELFFile(stream) for section in elf.iter_sections(): if isinstance(section, DynamicSection): for tag in section.iter_tags(type='DT_NEEDED'): print(tag.entry, tag.needed) break else: raise RuntimeError('Dynamic section not found.')
def loadELF(self, filename): try: elf = ELFFile(open(filename, 'rb')) except: raise Exception("[-] This file is not an ELF file: %s" % filename) self.arch = elf.get_machine_arch() self.entry = elf.header.e_entry self.memory = self.load_code_segments(elf.iter_segments(), filename) self.symtab, self.thumbtab, self.code_addrs = self.load_section_info(elf.iter_sections()) self.thumbtab.sort(key=lambda tup: tup[0]) self.code_addrs = sorted(self.code_addrs, key=lambda k: k['address'])
def _test_double_dynstr_section_generic(self, testfile): with open(os.path.join('test', 'testfiles_for_unittests', testfile), 'rb') as f: elf = ELFFile(f) for section in elf.iter_sections(): if isinstance(section, DynamicSection): d_tags = [getattr(x, x.entry.d_tag[3:].lower()) for x in section.iter_tags() if x.entry.d_tag in DynamicTag._HANDLED_TAGS] self.assertListEqual( TestDoubleDynstrSections.reference_data, d_tags) return self.fail('No dynamic section found !!')
def get_rpaths(path): """ Get all the RPATH and RUNPATH entries for an ELF binary """ rpaths = [] with open(path, 'rb') as fp: elf = ELFFile(fp) for section in elf.iter_sections(): if isinstance(section, DynamicSection): for tag in section.iter_tags(): d_tag = tag.entry.d_tag if d_tag == 'DT_RPATH': rpaths.extend(tag.rpath.split(':')) elif d_tag == 'DT_RUNPATH': rpaths.extend(tag.runpath.split(':')) return rpaths
def get_func_address(self, name): address = None with open(self.path, 'rb') as stream: elf = ELFFile(stream) for section in elf.iter_sections(): if isinstance(section, SymbolTableSection): for symbol in section.iter_symbols(): if symbol.name == name: self.logger.debug('%s', symbol.entry) address = symbol.entry['st_value'] break if address: break else: raise RuntimeError('Failed to find {}'.format(name)) return self.handle_address(address + self.libc.imageBase)
def __init__( self, image, trace=False, syms=False, timeout=None, preformatted_image=os.path.join("..", "floppy.img.zip"), argv=None, keep_temps=False, ): self.image = image self.trace = trace self.syms = syms self.timeout = timeout self.argv = argv self.keep_temps = keep_temps assert os.path.exists(self.image) with open(self.image, "rb") as fd: elffile = ELFFile(fd) if elffile.get_machine_arch() == "x86": self.arch = "X86" else: raise RuntimeError("Unknown architecture: %s" % elf.get_machine_arch()) if syms: # Get the symbols in the file. self.symbols = {} for section in elffile.iter_sections(): if not isinstance(section, SymbolTableSection): continue for sym in section.iter_symbols(): self.symbols[sym["st_value"]] = sym.name if self.arch == "X86": self.model = Qemu("qemu-system-i386", []) else: raise RuntimeError("Unknown architecture: %s" % self.arch) fd, self.tmpimage = tempfile.mkstemp() os.close(fd) self.floppy_image = Image(self.tmpimage, preformatted_image) self.floppy_image.create_grub_conf(args=self.argv) self.floppy_image.copy(self.image, "/kernel")
def process_binary(bv, binfile): with open(binfile, 'rb') as f: elffile = ELFFile(f) symbol_tables = [s for s in elffile.iter_sections() if isinstance(s, SymbolTableSection)] for section in symbol_tables: if not isinstance(section, SymbolTableSection): continue if section['sh_entsize'] == 0: continue for nsym, symbol in enumerate(section.iter_symbols()): sym_addr = symbol['st_value'] sym_size = symbol['st_size'] if is_data_variable_section(bv, sym_addr): dynamic_symbols[sym_addr] = sym_size
def get_relocations(filename): """ Return a dict with the relocations contained in a file Taken and modified from https://github.com/eliben/pyelftools/blob/master/scripts/readelf.py """ #print '[*] Getting relocations' relocations = {} if not filename: print '[*] Relocations not found' return relocations try: pe = pefile.PE(filename) for entry in pe.DIRECTORY_ENTRY_IMPORT: for imp in entry.imports: relocations[imp.address] = imp.name except pefile.PEFormatError: with file(filename,'r') as fd: elffile = ELFFile(fd) has_relocation_sections = False for section in elffile.iter_sections(): if not isinstance(section, RelocationSection): continue has_relocation_sections = True # The symbol table section pointed to in sh_link symtable = elffile.get_section(section['sh_link']) for rel in section.iter_relocations(): offset = rel['r_offset'] symbol = symtable.get_symbol(rel['r_info_sym']) # Some symbols have zero 'st_name', so instead what's used is # the name of the section they point at if symbol['st_name'] == 0: symsec = elffile.get_section(symbol['st_shndx']) symbol_name = symsec.name else: symbol_name = symbol.name relocations[offset] = bytes2str(symbol_name) #print '[*] Getting relocations DONE' return relocations
class QlReadELF(object): def __init__(self, ql: Qiling, elf_stream): self.ql = ql self.elffile = ELFFile(elf_stream) self._versioninfo = None def elf_file_header(self): elf_header = {} def add_info(key, value): elf_header[key] = value header = self.elffile.header e_ident = header['e_ident'] add_info( 'Magic', ' '.join('%2.2x' % byte2int(b) for b in self.elffile.e_ident_raw)) add_info('Class', describe_ei_class(e_ident['EI_CLASS'])) add_info('Data', describe_ei_data(e_ident['EI_DATA'])) add_info('Version', e_ident['EI_VERSION']) add_info('OS/ABI', describe_ei_osabi(e_ident['EI_OSABI'])) add_info('ABI Version', e_ident['EI_ABIVERSION']) add_info('Type', describe_e_type(header['e_type'])) add_info('Machine', describe_e_machine(header['e_machine'])) add_info('Version_e', describe_e_version_numeric(header['e_version'])) add_info('Entry point address', self._format_hex(header['e_entry'])) add_info('Start of program headers', header['e_phoff']) add_info('Start of section headers', header['e_shoff']) add_info('Flags', [ self._format_hex(header['e_flags']), self.decode_flags(header['e_flags']) ]) add_info('Size of this header', header['e_ehsize']) add_info('Size of program headers', header['e_phentsize']) add_info('Number of program headers', header['e_phnum']) add_info('Size of section headers', header['e_shentsize']) add_info('Number of section headers', header['e_shnum']) add_info('Section header string table index', header['e_shstrndx']) return elf_header def elf_program_headers(self): program_headers = [] def add_info(dic): program_headers.append(dic) if self.elffile.num_segments() == 0: return None for segment in self.elffile.iter_segments(): program_hdr = {} program_hdr['Type'] = describe_p_type(segment['p_type']) program_hdr['Offset'] = self._format_hex(segment['p_offset'], fieldsize=6) program_hdr['VirtAddr'] = self._format_hex(segment['p_vaddr'], fullhex=True) program_hdr['PhysAddr'] = self._format_hex(segment['p_paddr'], fullhex=True) program_hdr['FileSiz'] = self._format_hex(segment['p_filesz'], fieldsize=5) program_hdr['MemSiz'] = self._format_hex(segment['p_memsz'], fieldsize=5) program_hdr['Flg'] = describe_p_flags(segment['p_flags']) program_hdr['Align'] = self._format_hex(segment['p_align']) add_info(program_hdr) return program_headers def elf_section_headers(self): section_headers = [] def add_info(dic): section_headers.append(dic) if self.elffile.num_sections() == 0: return None for nsec, section in enumerate(self.elffile.iter_sections()): section_hdr = {} section_hdr['index'] = nsec section_hdr['Name'] = section.name section_hdr['Type'] = describe_sh_type(section['sh_type']) section_hdr['Addr'] = self._format_hex(section['sh_addr'], fieldsize=8, lead0x=False) section_hdr['Offset'] = self._format_hex(section['sh_offset'], fieldsize=6, lead0x=False) section_hdr['Size'] = self._format_hex(section['sh_size'], fieldsize=6, lead0x=False) section_hdr['ES'] = self._format_hex(section['sh_entsize'], fieldsize=2, lead0x=False) section_hdr['Flag'] = describe_sh_flags(section['sh_flags']) section_hdr['Lk'] = section['sh_link'] section_hdr['Inf'] = section['sh_info'] section_hdr['Al'] = section['sh_addralign'] add_info(section_hdr) return section_headers def elf_symbol_tables(self): symbol_tables_list = [] def add_info(dic): symbol_tables_list.append(dic) self._init_versioninfo() symbol_tables = [ s for s in self.elffile.iter_sections() if isinstance(s, SymbolTableSection) ] if not symbol_tables and self.elffile.num_sections() == 0: return None for section in symbol_tables: if not isinstance(section, SymbolTableSection): continue if section['sh_entsize'] == 0: continue for nsym, symbol in enumerate(section.iter_symbols()): version_info = '' if (section['sh_type'] == 'SHT_DYNSYM' and self._versioninfo['type'] == 'GNU'): version = self._symbol_version(nsym) if (version['name'] != symbol.name and version['index'] not in ('VER_NDX_LOCAL', 'VER_NDX_GLOBAL')): if version['filename']: # external symbol version_info = '@%(name)s (%(index)i)' % version else: # internal symbol if version['hidden']: version_info = '@%(name)s' % version else: version_info = '@@%(name)s' % version symbol_info = {} symbol_info['index'] = nsym symbol_info['Value'] = self._format_hex(symbol['st_value'], fullhex=True, lead0x=False) symbol_info['Size'] = symbol['st_size'] symbol_info['Type'] = describe_symbol_type( symbol['st_info']['type']) symbol_info['Bind'] = describe_symbol_bind( symbol['st_info']['bind']) symbol_info['Vis'] = describe_symbol_visibility( symbol['st_other']['visibility']) symbol_info['Ndx'] = describe_symbol_shndx(symbol['st_shndx']) symbol_info['Name'] = symbol.name symbol_info['version_info'] = version_info add_info(symbol_info) return symbol_tables_list def decode_flags(self, flags): description = "" if self.elffile['e_machine'] == "EM_ARM": eabi = flags & E_FLAGS.EF_ARM_EABIMASK flags &= ~E_FLAGS.EF_ARM_EABIMASK if flags & E_FLAGS.EF_ARM_RELEXEC: description += ', relocatable executabl' flags &= ~E_FLAGS.EF_ARM_RELEXEC if eabi == E_FLAGS.EF_ARM_EABI_VER5: EF_ARM_KNOWN_FLAGS = E_FLAGS.EF_ARM_ABI_FLOAT_SOFT | E_FLAGS.EF_ARM_ABI_FLOAT_HARD | E_FLAGS.EF_ARM_LE8 | E_FLAGS.EF_ARM_BE8 description += ', Version5 EABI' if flags & E_FLAGS.EF_ARM_ABI_FLOAT_SOFT: description += ", soft-float ABI" elif flags & E_FLAGS.EF_ARM_ABI_FLOAT_HARD: description += ", hard-float ABI" if flags & E_FLAGS.EF_ARM_BE8: description += ", BE8" elif flags & E_FLAGS.EF_ARM_LE8: description += ", LE8" if flags & ~EF_ARM_KNOWN_FLAGS: description += ', <unknown>' else: description += ', <unrecognized EABI>' elif self.elffile['e_machine'] == "EM_MIPS": if flags & E_FLAGS.EF_MIPS_NOREORDER: description += ", noreorder" if flags & E_FLAGS.EF_MIPS_PIC: description += ", pic" if flags & E_FLAGS.EF_MIPS_CPIC: description += ", cpic" if (flags & E_FLAGS.EF_MIPS_ABI2): description += ", abi2" if (flags & E_FLAGS.EF_MIPS_32BITMODE): description += ", 32bitmode" if (flags & E_FLAGS_MASKS.EFM_MIPS_ABI_O32): description += ", o32" elif (flags & E_FLAGS_MASKS.EFM_MIPS_ABI_O64): description += ", o64" elif (flags & E_FLAGS_MASKS.EFM_MIPS_ABI_EABI32): description += ", eabi32" elif (flags & E_FLAGS_MASKS.EFM_MIPS_ABI_EABI64): description += ", eabi64" if (flags & E_FLAGS.EF_MIPS_ARCH) == E_FLAGS.EF_MIPS_ARCH_1: description += ", mips1" if (flags & E_FLAGS.EF_MIPS_ARCH) == E_FLAGS.EF_MIPS_ARCH_2: description += ", mips2" if (flags & E_FLAGS.EF_MIPS_ARCH) == E_FLAGS.EF_MIPS_ARCH_3: description += ", mips3" if (flags & E_FLAGS.EF_MIPS_ARCH) == E_FLAGS.EF_MIPS_ARCH_4: description += ", mips4" if (flags & E_FLAGS.EF_MIPS_ARCH) == E_FLAGS.EF_MIPS_ARCH_5: description += ", mips5" if (flags & E_FLAGS.EF_MIPS_ARCH) == E_FLAGS.EF_MIPS_ARCH_32R2: description += ", mips32r2" if (flags & E_FLAGS.EF_MIPS_ARCH) == E_FLAGS.EF_MIPS_ARCH_64R2: description += ", mips64r2" if (flags & E_FLAGS.EF_MIPS_ARCH) == E_FLAGS.EF_MIPS_ARCH_32: description += ", mips32" if (flags & E_FLAGS.EF_MIPS_ARCH) == E_FLAGS.EF_MIPS_ARCH_64: description += ", mips64" return description def _format_hex(self, addr, fieldsize=None, fullhex=False, lead0x=True, alternate=False): """ Format an address into a hexadecimal string. fieldsize: Size of the hexadecimal field (with leading zeros to fit the address into. For example with fieldsize=8, the format will be %08x If None, the minimal required field size will be used. fullhex: If True, override fieldsize to set it to the maximal size needed for the elfclass lead0x: If True, leading 0x is added alternate: If True, override lead0x to emulate the alternate hexadecimal form specified in format string with the # character: only non-zero values are prefixed with 0x. This form is used by readelf. """ if alternate: if addr == 0: lead0x = False else: lead0x = True fieldsize -= 2 s = '0x' if lead0x else '' if fullhex: fieldsize = 8 if self.elffile.elfclass == 32 else 16 if fieldsize is None: field = '%x' else: field = '%' + '0%sx' % fieldsize return s + field % addr def _init_versioninfo(self): """ Search and initialize informations about version related sections and the kind of versioning used (GNU or Solaris). """ if self._versioninfo is not None: return self._versioninfo = { 'versym': None, 'verdef': None, 'verneed': None, 'type': None } for section in self.elffile.iter_sections(): if isinstance(section, GNUVerSymSection): self._versioninfo['versym'] = section elif isinstance(section, GNUVerDefSection): self._versioninfo['verdef'] = section elif isinstance(section, GNUVerNeedSection): self._versioninfo['verneed'] = section elif isinstance(section, DynamicSection): for tag in section.iter_tags(): if tag['d_tag'] == 'DT_VERSYM': self._versioninfo['type'] = 'GNU' break if not self._versioninfo['type'] and (self._versioninfo['verneed'] or self._versioninfo['verdef']): self._versioninfo['type'] = 'Solaris' def _symbol_version(self, nsym): """ Return a dict containing information on the or None if no version information is available """ self._init_versioninfo() symbol_version = dict.fromkeys(('index', 'name', 'filename', 'hidden')) if (not self._versioninfo['versym'] or nsym >= self._versioninfo['versym'].num_symbols()): return None symbol = self._versioninfo['versym'].get_symbol(nsym) index = symbol.entry['ndx'] if not index in ('VER_NDX_LOCAL', 'VER_NDX_GLOBAL'): index = int(index) if self._versioninfo['type'] == 'GNU': # In GNU versioning mode, the highest bit is used to # store wether the symbol is hidden or not if index & 0x8000: index &= ~0x8000 symbol_version['hidden'] = True if (self._versioninfo['verdef'] and index <= self._versioninfo['verdef'].num_versions()): _, verdaux_iter = \ self._versioninfo['verdef'].get_version(index) symbol_version['name'] = next(verdaux_iter).name else: verneed, vernaux = \ self._versioninfo['verneed'].get_version(index) symbol_version['name'] = vernaux.name symbol_version['filename'] = verneed.name symbol_version['index'] = index return symbol_version
class Loader(): def __init__(self, fname): self.fd = open(fname, 'rb') self.elffile = ELFFile(self.fd) self.container = Container() self.dup_symbols = dict() def is_pie(self): base_address = next(seg for seg in self.elffile.iter_segments() if seg['p_type'] == "PT_LOAD")['p_vaddr'] return self.elffile['e_type'] == 'ET_DYN' and base_address == 0 def load_functions(self, fnlist): section = self.elffile.get_section_by_name(".text") data = section.data() base = section['sh_addr'] for faddr, fvalue in fnlist.items(): section_offset = faddr - base bytes = data[section_offset:section_offset + fvalue["sz"]] function = Function(fvalue["name"], faddr, fvalue["sz"], bytes, fvalue["bind"]) self.container.add_function(function) def load_data_sections(self, seclist, section_filter=lambda x: True): for sec in [sec for sec in seclist if section_filter(sec)]: sval = seclist[sec] section = self.elffile.get_section_by_name(sec) data = section.data() more = bytearray() #well, let's hope the first entry in init_array and fini_array are always "frame_dummy" and "__do_global_dtors_aux" if sec == ".init_array" or sec == ".fini_array": if len(data) > 8: data = data[8:] else: data = b'' more.extend(data) else: more.extend(data) if len(more) < sval['sz']: more.extend( [0x0 for _ in range(0, sval['sz'] - len(more))]) bytes = more if sec == ".init_array" or sec == ".fini_array": ds = DataSection(sec, sval["base"] + 8, sval["sz"] - 8, bytes, sval['align']) else: ds = DataSection(sec, sval["base"], sval["sz"], bytes, sval['align']) self.container.add_section(ds) # Find if there is a plt section for sec in seclist: if sec == '.plt': self.container.plt_base = seclist[sec]['base'] if sec == ".plt.got": section = self.elffile.get_section_by_name(sec) data = section.data() entries = list( disasm_bytes(section.data(), seclist[sec]['base'])) self.container.gotplt_base = seclist[sec]['base'] self.container.gotplt_sz = seclist[sec]['sz'] self.container.gotplt_entries = entries def load_relocations(self, relocs): for reloc_section, relocations in relocs.items(): section = reloc_section[5:] if reloc_section == ".rela.plt": self.container.add_plt_information(relocations) if section in self.container.sections: self.container.sections[section].add_relocations(relocations) else: print("[*] Relocations for a section that's not loaded:", reloc_section) self.container.add_relocations(section, relocations) #added by JX def adjust_sym_name(self, symbol): if symbol['st_size'] == 0: return symbol.name if symbol.name in Rewriter.GCC_FUNCTIONS: return symbol.name if symbol['st_info']['bind'] == "STB_GLOBAL": return symbol.name if symbol.name not in self.dup_symbols: return symbol.name if len(self.dup_symbols[symbol.name]) > 1: return symbol.name + "_" + str(hex(symbol['st_value'])) else: return symbol.name def load_dup_symbols(self): symbol_tables = [ sec for sec in self.elffile.iter_sections() if isinstance(sec, SymbolTableSection) ] function_list = dict() for section in symbol_tables: if not isinstance(section, SymbolTableSection): continue if section['sh_entsize'] == 0: continue for symbol in section.iter_symbols(): if symbol['st_other']['visibility'] == "STV_HIDDEN": continue if symbol.name not in self.dup_symbols: self.dup_symbols[symbol.name] = set() self.dup_symbols[symbol.name].add(symbol['st_value']) #end by JX def reloc_list_from_symtab(self): relocs = defaultdict(list) for section in self.elffile.iter_sections(): if not isinstance(section, RelocationSection): continue symtable = self.elffile.get_section(section['sh_link']) for rel in section.iter_relocations(): symbol = None if rel['r_info_sym'] != 0: symbol = symtable.get_symbol(rel['r_info_sym']) if symbol: if symbol['st_name'] == 0: symsec = self.elffile.get_section(symbol['st_shndx']) symbol_name = symsec.name else: symbol_name = self.adjust_sym_name(symbol) else: symbol = dict(st_value=None) symbol_name = None reloc_i = { 'name': symbol_name, 'st_value': symbol['st_value'], 'offset': rel['r_offset'], 'addend': rel['r_addend'], 'type': rel['r_info_type'], 'r_info_sym': rel['r_info_sym'], } relocs[section.name].append(reloc_i) return relocs def flist_from_symtab(self): symbol_tables = [ sec for sec in self.elffile.iter_sections() if isinstance(sec, SymbolTableSection) ] function_list = dict() for section in symbol_tables: if not isinstance(section, SymbolTableSection): continue if section['sh_entsize'] == 0: continue for symbol in section.iter_symbols(): if symbol['st_other']['visibility'] == "STV_HIDDEN": continue if (symbol['st_info']['type'] == 'STT_FUNC' and symbol['st_shndx'] != 'SHN_UNDEF'): function_list[symbol['st_value']] = { 'name': self.adjust_sym_name(symbol), #JX changed the name; needs to make sure the symbols and relocations are consistent 'sz': symbol['st_size'], 'visibility': symbol['st_other']['visibility'], 'bind': symbol['st_info']['bind'], } return function_list # keep track of the list of alias symbols at the same address # added by JX def aliaslist_from_symtab(self): symbol_tables = [ sec for sec in self.elffile.iter_sections() if isinstance(sec, SymbolTableSection) ] alias_list = dict() for section in symbol_tables: if not isinstance(section, SymbolTableSection): continue if section['sh_entsize'] == 0: continue #let's aggressively consider all aliases for symbol in section.iter_symbols(): #if symbol['st_other']['visibility'] == "STV_HIDDEN": # continue #if symbol['st_shndx'] == 'SHN_UNDEF': # continue if symbol['st_value'] not in alias_list: alias_list[symbol['st_value']] = set() if len(symbol.name): alias_list[symbol['st_value']].add( self.adjust_sym_name(symbol)) #JX changed the name; needs to make sure the symbols and relocations are consistent return alias_list #obtain the list of TLS symbols (they are very special) def tlslist_from_symtable(self): symbol_tables = [ sec for sec in self.elffile.iter_sections() if isinstance(sec, SymbolTableSection) ] tlslist = dict() for section in symbol_tables: if section['sh_entsize'] == 0: continue #let's aggressively consider all aliases for symbol in section.iter_symbols(): if symbol['st_info']['type'] == "STT_TLS": tlslist[symbol.name] = symbol return tlslist def load_aliases(self, alist): self.container.add_aliases(alist) def load_tls(self, tls_list): self.container.add_tlslist(tls_list) #end by JX def slist_from_symtab(self): sections = dict() for section in self.elffile.iter_sections(): sections[section.name] = { 'base': section['sh_addr'], 'sz': section['sh_size'], 'offset': section['sh_offset'], 'align': section['sh_addralign'], } return sections def load_globals_from_glist(self, glist): self.container.add_globals(glist) def global_data_list_from_symtab(self): symbol_tables = [ sec for sec in self.elffile.iter_sections() if isinstance(sec, SymbolTableSection) ] global_list = defaultdict(list) for section in symbol_tables: if not isinstance(section, SymbolTableSection): continue if section['sh_entsize'] == 0: continue for symbol in section.iter_symbols(): # XXX: HACK if "@@GLIBC" in symbol.name: continue if symbol['st_other']['visibility'] == "STV_HIDDEN": continue if symbol['st_size'] == 0: continue if (symbol['st_info']['type'] == 'STT_OBJECT' and symbol['st_shndx'] != 'SHN_UNDEF'): global_list[symbol['st_value']].append({ 'name': self.adjust_sym_name(symbol), #JX changed the name; needs to make sure the symbols and relocations are consistent 'sz': symbol['st_size'], 'visibility': symbol['st_other']['visibility'], 'bind': symbol['st_info']['bind'], }) return global_list
def sections(file): with open(file, 'rb') as f: e = ELFFile(f) for section in e.iter_sections(): print(hex(section['sh_addr']), section.name)
class ELF(object): def __init__(self, file_path): self.file_path = file_path self.elf = None self.result = {} def run(self): try: self.elf = ELFFile(open(self.file_path, "rb")) self.result["file_header"] = self._get_file_header() self.result["section_headers"] = self._get_section_headers() self.result["program_headers"] = self._get_program_headers() self.result["dynamic_tags"] = self._get_dynamic_tags() self.result["symbol_tables"] = self._get_symbol_tables() self.result["relocations"] = self._get_relocations() self.result["notes"] = self._get_notes() # TODO: add library name per import (see #807) except ELFError as e: if e.message != "Magic number does not match": raise return self.result def _get_file_header(self): return { "magic": convert_to_printable(self.elf.e_ident_raw[:4]), "class": describe_ei_class(self.elf.header.e_ident["EI_CLASS"]), "data": describe_ei_data(self.elf.header.e_ident["EI_DATA"]), "ei_version": describe_ei_version(self.elf.header.e_ident["EI_VERSION"]), "os_abi": describe_ei_osabi(self.elf.header.e_ident["EI_OSABI"]), "abi_version": self.elf.header.e_ident["EI_ABIVERSION"], "type": describe_e_type(self.elf.header["e_type"]), "machine": describe_e_machine(self.elf.header["e_machine"]), "version": describe_e_version_numeric(self.elf.header["e_version"]), "entry_point_address": self._print_addr(self.elf.header["e_entry"]), "start_of_program_headers": self.elf.header["e_phoff"], "start_of_section_headers": self.elf.header["e_shoff"], "flags": "{}{}".format(self._print_addr(self.elf.header["e_flags"]), self._decode_flags(self.elf.header["e_flags"])), "size_of_this_header": self.elf.header["e_ehsize"], "size_of_program_headers": self.elf.header["e_phentsize"], "number_of_program_headers": self.elf.header["e_phnum"], "size_of_section_headers": self.elf.header["e_shentsize"], "number_of_section_headers": self.elf.header["e_shnum"], "section_header_string_table_index": self.elf.header["e_shstrndx"], } def _get_section_headers(self): section_headers = [] for section in self.elf.iter_sections(): section_headers.append({ "name": section.name, "type": describe_sh_type(section["sh_type"]), "addr": self._print_addr(section["sh_addr"]), "size": section["sh_size"], }) return section_headers def _get_program_headers(self): program_headers = [] for segment in self.elf.iter_segments(): program_headers.append({ "type": describe_p_type(segment["p_type"]), "addr": self._print_addr(segment["p_vaddr"]), "flags": describe_p_flags(segment["p_flags"]).strip(), "size": segment["p_memsz"], }) return program_headers def _get_dynamic_tags(self): dynamic_tags = [] for section in self.elf.iter_sections(): if not isinstance(section, DynamicSection): continue for tag in section.iter_tags(): dynamic_tags.append({ "tag": self._print_addr( ENUM_D_TAG.get(tag.entry.d_tag, tag.entry.d_tag)), "type": tag.entry.d_tag[3:], "value": self._parse_tag(tag), }) return dynamic_tags def _get_symbol_tables(self): symbol_tables = [] for section in self.elf.iter_sections(): if not isinstance(section, SymbolTableSection): continue for nsym, symbol in enumerate(section.iter_symbols()): symbol_tables.append({ "value": self._print_addr(symbol["st_value"]), "type": describe_symbol_type(symbol["st_info"]["type"]), "bind": describe_symbol_bind(symbol["st_info"]["bind"]), "ndx_name": symbol.name, }) return symbol_tables def _get_relocations(self): relocations = [] for section in self.elf.iter_sections(): if not isinstance(section, RelocationSection): continue section_relocations = [] for rel in section.iter_relocations(): relocation = { "offset": self._print_addr(rel["r_offset"]), "info": self._print_addr(rel["r_info"]), "type": describe_reloc_type(rel["r_info_type"], self.elf), "value": "", "name": "" } if rel["r_info_sym"] != 0: symtable = self.elf.get_section(section["sh_link"]) symbol = symtable.get_symbol(rel["r_info_sym"]) # Some symbols have zero "st_name", so instead use # the name of the section they point at if symbol["st_name"] == 0: symsec = self.elf.get_section(symbol["st_shndx"]) symbol_name = symsec.name else: symbol_name = symbol.name relocation["value"] = self._print_addr(symbol["st_value"]) relocation["name"] = symbol_name if relocation not in section_relocations: section_relocations.append(relocation) relocations.append({ "name": section.name, "entries": section_relocations, }) return relocations def _get_notes(self): notes = [] for segment in self.elf.iter_segments(): if not isinstance(segment, NoteSegment): continue for note in segment.iter_notes(): notes.append({ "owner": note["n_name"], "size": self._print_addr(note["n_descsz"]), "note": describe_note(note), "name": note["n_name"], }) return notes def _print_addr(self, addr): fmt = "0x{0:08x}" if self.elf.elfclass == 32 else "0x{0:016x}" return fmt.format(addr) def _decode_flags(self, flags): description = "" if self.elf["e_machine"] == "EM_ARM": if flags & E_FLAGS.EF_ARM_HASENTRY: description = ", has entry point" version = flags & E_FLAGS.EF_ARM_EABIMASK if version == E_FLAGS.EF_ARM_EABI_VER5: description = ", Version5 EABI" elif self.elf["e_machine"] == "EM_MIPS": if flags & E_FLAGS.EF_MIPS_NOREORDER: description = ", noreorder" if flags & E_FLAGS.EF_MIPS_CPIC: description = ", cpic" if not (flags & E_FLAGS.EF_MIPS_ABI2) and not ( flags & E_FLAGS.EF_MIPS_ABI_ON32): description = ", o32" if (flags & E_FLAGS.EF_MIPS_ARCH) == E_FLAGS.EF_MIPS_ARCH_1: description = ", mips1" return description def _parse_tag(self, tag): if tag.entry.d_tag == "DT_NEEDED": parsed = "Shared library: [%s]" % tag.needed elif tag.entry.d_tag == "DT_RPATH": parsed = "Library rpath: [%s]" % tag.rpath elif tag.entry.d_tag == "DT_RUNPATH": parsed = "Library runpath: [%s]" % tag.runpath elif tag.entry.d_tag == "DT_SONAME": parsed = "Library soname: [%s]" % tag.soname elif tag.entry.d_tag.endswith(("SZ", "ENT")): parsed = "%i (bytes)" % tag["d_val"] elif tag.entry.d_tag.endswith(("NUM", "COUNT")): parsed = "%i" % tag["d_val"] elif tag.entry.d_tag == "DT_PLTREL": s = describe_dyn_tag(tag.entry.d_val) if s.startswith("DT_"): s = s[3:] parsed = "%s" % s else: parsed = self._print_addr(tag["d_val"]) return parsed
def process_file(filename): # print('Processing file:', filename) debug_paths = [] with open(filename, 'rb') as f: elffile = ELFFile(f) if not elffile.has_dwarf_info(): for section in elffile.iter_sections(): name = bytes2str(section.name) # print(name) # first try to find ".note.gnu.build-id" in ELF itself # uint32 name_size; /* size of the name */ # uint32 hash_size; /* size of the hash */ # uint32 identifier; /* NT_GNU_BUILD_ID == 0x3 */ # char name[name_size]; /* the name "GNU" */ # char hash[hash_size]; /* the hash */ # # objdump -s -j .note.gnu.build-id /usr/bin/openssl if name == ".note.gnu.build-id": data = section.data() hash = data[16:] value = binascii.hexlify(hash).decode("ascii") # print(value) # a value of "0834ce567a2d57deed6706e28fa29225cf043e16" # implies that we will have a path which looks like, # /usr/lib/debug/.build-id/08/34ce5...25cf043e16.debug path = os.path.join(value[0:2], value[2:] + ".debug") # print(path) debug_paths.append(path) # A filename, with any leading directory components removed, # followed by a zero byte, zero to three bytes of padding, as # needed to reach the next four-byte boundary within the # section, and a four-byte CRC checksum, stored in the same # endianness used for the executable file itself. # # objdump -s -j .gnu_debuglink /usr/bin/openssl if name == ".gnu_debuglink": data = section.data() fdata = data[0:data.find(b"\x00")] debug_paths.append(fdata.decode("utf-8")) else: # this file itself has the DWARF information, must be my lucky day! get_producer(filename) # get_dwarf_info returns a DWARFInfo context object, which is the # starting point for all DWARF-based processing in pyelftools. for path in debug_paths: # So, for example, suppose you ask gdb to debug /usr/bin/ls, which # has a debug link that specifies the file ls.debug, and a build ID # whose value in hex is abcdef1234. If the list of the global debug # directories includes /usr/lib/debug, then gdb will look for the # following debug information files, in the indicated order: # # /usr/lib/debug/.build-id/ab/cdef1234.debug # /usr/bin/ls.debug # /usr/bin/.debug/ls.debug # /usr/lib/debug/usr/bin/ls.debug. rpath = os.path.join("/usr/lib/debug/.build-id", path) if os.path.isfile(rpath): producer = get_producer(rpath) # got producer, is one enough? if producer: print(producer) continue # `cwd` + "/usr/lib/debug/.build-id" is our hack ;) debug_prefixes = ["/usr/lib/debug/.build-id/", "/usr/bin/", "/usr/lib/debug/usr/bin/"] for prefix in debug_prefixes: rpath = os.path.join(prefix, path) if os.path.isfile(rpath): get_producer(rpath)
sp = uc.reg_read(UC_MIPS_REG_SP) print("\nStack") for addr in range(sp, sp + 0x50, 0x4): comment = "" value = struct.unpack('<L', uc.mem_read(addr, 0x4))[0] symbol = get_symbol(value) if symbol != None: offset = value - symbol[0] comment = f" | {symbol[1]}{'+' + hex(offset) if offset != 0 else ''}" print(f"{addr:08X}: {value:08X}{comment}") print("FILELOAD firm_0_COACH_kern.elf") file_handle = open('./firm_0_COACH_kern.elf', 'rb') elf = ELFFile(file_handle) sections = list(elf.iter_sections()) entry = elf['e_entry'] uc = Uc(UC_ARCH_MIPS, UC_MODE_32) # Needed memory mappings # 80000180 - 8009FCB8 | r-x | .exception through .spc0.972___COUNTER__ # 8009FCB8 - 800CA4C8 | rw- | .eh_frame through .bss # 90008000 - 90009C00 | -w- | .spd0 through .spd4 # BFC08000 - BFC09C00 | rwx | .spc0 through .spc4 # Needed hardware mappings # 0xb0400840 | some sort of LED? uc.mem_map(0x80000000, 0xD0000) # We have to map r-x and rw- together because unicorn :(
class ElfAnalyzer: """ This class has some supporting functionalities to analyze ELFs file. In particular it provides the functionality for: [ X ] Detect if a ELF is of type Executable (in opposition to relocatable, None, core dump. [ X ] Detect if a ELF has any section marked both W and X (violates W^X principle). [ X ] Detect if a ELF has the stack segment marked as executable (violates DEP) [ ] Detect if a ELF has any data section marked as executable. [ X ] Detect if a ELF is stripped. [ X ] Detect if a ELF uses stack protector. (By verifying the presence of __stack_check_fail) [ ] Detect if a ELF uses unsafe functions (puts, gets, etc.) """ elf_file = None def __init__(self): pass def set_elf(self, filename): self.elf_file = ELFFile(open(filename)) def is_executable(self): """ :return: True if the file is of type executable, False otherwise """ elf_type = self.elf_file.header['e_type'] if elf_type == "ET_EXEC": return True return False def has_sections_wx(self): """ :return: True if the file contains W+X sections, False otherwise. """ for section in self.elf_file.iter_sections(): name = section.name flags = section.header['sh_flags'] if (flags & 0x0001) > 0 & (flags & 0x0004) > 0: logging.warning( "The section %s is both Writeable and Executable." % name) return True return False def has_stack_executable(self): """ :return: True if the stack segment is marked Executable, False otherwise """ for segment in self.elf_file.iter_segments(): if segment.header['p_type'] == 'PT_GNU_STACK': flags = segment.header['p_flags'] if (flags & 0x0001) > 0: logging.warning("The stack is marked executable.") return True return False def get_functions(self): """ :return: A list of functions used in the binary """ dynstr_section = self.elf_file.get_section_by_name(".dynstr") if dynstr_section is None: logging.warning( "The file does not contain dynamic string information.") return None functions_used = [] hex_data = dynstr_section.data().encode('hex').replace("00", " ") hex_functions = hex_data.split() for f in hex_functions: functions_used.append(f.decode('hex')) return functions_used def is_stripped(self): """ :return: True if the binary is stripped, False otherwise """ has_debug_section = False has_symbol_table = False for section in self.elf_file.iter_sections(): if "debug" in section.name: has_debug_section = True print "[+] Section %s found. Binary contains debug information." % section.name if section.name == ".symtab": has_symbol_table = True print "[+] Symbol table found. Binary is likely not stripped." return has_debug_section or has_symbol_table def uses_ssp(self): """ :return: True if the binary contains __stack_chk_fail, function used to check canary value inserted by SSP. """ functions = self.get_functions() if "__stack_chk_fail" in functions: return True return False def section_to_segment_mapping(self): """ :return: A dictionary containing segment indexes and the sections mapped in that segment. """ mapping = {} sec_to_seg = {} segment_idx = 0 for segment in self.elf_file.iter_segments(): mapping[segment_idx] = { 'begin': segment.header['p_paddr'], 'end': segment.header['p_paddr'] + segment.header['p_memsz'] } sec_to_seg[segment_idx] = { 'flags': segment.header['p_flags'], 'sections': [] } segment_idx += 1 for section in self.elf_file.iter_sections(): for segment in mapping.keys(): section_begin = section.header['sh_addr'] section_end = section.header['sh_addr'] + section.header[ 'sh_size'] if section_end - section_begin == 0: # NULL section. Skip. continue if mapping[segment][ 'begin'] <= section_begin and section_end <= mapping[ segment]['end']: sec_to_seg[segment]['sections'].append(section.name) return sec_to_seg # for item in sec_to_seg.keys(): # sections = "" # for s in sec_to_seg[item]: # sections += s+" " # print "Segment %s : %s" % (str(item), sections) def hard_wx_check(self): """ This functions checks for every segment if this segment is Writeable or eXecutable. For each writeable segment, it checks whether there is one section mapped in this segment which is eXecutable. For each executable segment, it checks wheterh there is one section mapped in this segment which is Writeable. :return: True if W^X is respected, False if it is violated. """ sec_to_seg = self.section_to_segment_mapping() for segment in sec_to_seg.keys(): if len(sec_to_seg[segment]['sections']) == 0: # Empty segment. Skip. continue seg_x = (sec_to_seg[segment]['flags'] & 0x0001) > 0 seg_w = (sec_to_seg[segment]['flags'] & 0x0004) > 0 if seg_x: print "Segment %s is executable." % str(segment) for section in sec_to_seg[segment]['sections']: section_flags = self.elf_file.get_section_by_name( section).header['sh_flags'] if section_flags & 0x0001 > 0: return False print "\t[+] Section %s is not Writeable." % section elif seg_w: print "Segment %s is writeable." % str(segment) for section in sec_to_seg[segment]['sections']: section_flags = self.elf_file.get_section_by_name( section).header['sh_flags'] if section_flags & 0x0004 > 0: return False print "\t[+] Section %s is not eXecutable." % section return True
def process_file(filename, modulename, language, outputname): functions = [] with open(filename, "rb") as f: elffile = ELFFile(f) if not elffile.has_dwarf_info(): print("File has no DWARF info. Compile with -g.") sys.exit(1) dwarfinfo = elffile.get_dwarf_info() funcs_addr = 0 funcs_size = 0 vars_addr = 0 vars_size = 0 for section in elffile.iter_sections(): if section.name == ".lf.funcs": funcs_addr = section["sh_addr"] funcs_size = section["sh_size"] if section.name == ".lf.vars": vars_addr = section["sh_addr"] vars_size = section["sh_size"] if funcs_addr == 0: outc = open(outputname, "w") sys.exit(0) # This iterates through all CUs, even the ones without .lf.funcs section i = 0 for cu in dwarfinfo.iter_CUs(): top = cu.get_top_DIE() for child in top.iter_children(): if child.tag == "DW_TAG_subprogram" and "DW_AT_low_pc" in child.attributes: address = child.attributes["DW_AT_low_pc"].value if address in range(funcs_addr, funcs_addr + funcs_size): name = child.attributes["DW_AT_name"].value # print "n: " + name if "DW_AT_type" in child.attributes.keys(): tdie = get_die_at_offset( cu, child.attributes["DW_AT_type"].value) if tdie == None: print( "Failed to get die at offset: " + hex(child.attributes["DW_AT_type"].value)) return 1 while tdie.tag == "DW_TAG_typedef": tdie = get_die_at_offset( cu, tdie.attributes["DW_AT_type"].value) if "DW_AT_name" in tdie.attributes: type = tdie.attributes["DW_AT_name"].value ret = tdie.attributes["DW_AT_byte_size"].value elif "DW_AT_byte_size" in tdie.attributes: type = "void *" ret = tdie.attributes["DW_AT_byte_size"].value else: type = "void" ret = 0x2 params = get_parameters_from_die(cu, child) functions.append(Function(type, name, ret, params)) if language.lower() == "c": generate_c(modulename, outputname, functions) elif language.lower() == "python": generate_py(modulename, outputname, functions) else: print("Invalid language: " + language) sys.exit(1)
def loadElf(self, elfStream, parameters="", stackSize=100000, maxResponseSize=4096, continuation=None, signature=None): if parameters == None: parameters = "" elffile = ELFFile(elfStream) # Locate signature if not passed if signature == None: for section in elffile.iter_sections(): if section.name == '.ledger': signature = section.data()[0:ord(section.data()[1]) + 2] break if signature == None: raise Exception("Missing code signature") # Allocate session allocateSize = stackSize for segment in elffile.iter_segments(): if segment['p_type'] == 'PT_LOAD': allocateSize = allocateSize + segment['p_memsz'] cmd = struct.pack(">H", self.CMD_CODE_INIT) cmd = cmd + struct.pack(">I", allocateSize) response = self.link.exchange(cmd) if response[0] != self.STATUS_CODE_EXEC_OK: raise Exception("Unexpected status on CODE_INIT %.2x" % response[0]) # Load each component for segment in elffile.iter_segments(): if segment['p_type'] == 'PT_LOAD': flags = 0 if ((segment['p_flags'] & P_FLAGS.PF_W) == 0): flags = flags | self.MSG_LOAD_SECTION_FLAG_READ_ONLY cmd = struct.pack(">H", self.CMD_CODE_LOAD_SECTION) cmd = cmd + chr(flags) cmd = cmd + struct.pack(">I", segment['p_vaddr']) cmd = cmd + struct.pack( ">I", segment['p_vaddr'] + segment['p_memsz']) cmd = cmd + struct.pack(">I", segment['p_filesz']) cmd = cmd + segment.data() response = self.link.exchange(cmd) if response[0] != self.STATUS_CODE_EXEC_OK: raise Exception( "Unexpected status on CODE_LOAD_SECTION %.2x" % response[0]) # Run or resume if continuation == None: cmd = struct.pack(">H", self.CMD_CODE_RUN) cmd = cmd + struct.pack(">I", elffile.header['e_entry']) cmd = cmd + struct.pack(">I", stackSize) cmd = cmd + struct.pack(">I", 0) cmd = cmd + struct.pack(">I", len(parameters)) cmd = cmd + struct.pack(">I", len(signature)) cmd = cmd + str(parameters) cmd = cmd + signature else: cmd = struct.pack(">H", self.CMD_CODE_RESUME) cmd = cmd + struct.pack(">I", continuation['slot']) cmd = cmd + struct.pack(">I", len(continuation['blob'])) cmd = cmd + struct.pack(">I", 0) cmd = cmd + struct.pack(">I", len(parameters)) cmd = cmd + str(continuation['blob']) cmd = cmd + str(parameters) response = self.link.exchange(cmd, maxResponseSize) result = {} if response[0] == self.STATUS_CODE_EXEC_OK: result['suspended'] = False result['response'] = response[1:] elif response[0] == self.STATUS_CODE_EXEC_SUSPENDED: result['suspended'] = True slot, blobSize, appDataSize = struct.unpack( ">III", str(response[1:1 + 12])) result['slot'] = slot result['blob'] = response[1 + 12:1 + 12 + blobSize] result['response'] = response[1 + 12 + blobSize:1 + 12 + blobSize + appDataSize] else: raise Exception("Application error reported %.2x" % response[0]) return result
import sys from elftools.elf.elffile import ELFFile objcopy_bin = sys.argv[1] elffile = sys.argv[2] cached_reg = int(sys.argv[3]) uncached_reg = int(sys.argv[4]) uc_min = uncached_reg << 29 uc_max = uc_min | 0x1fffffff cache_off = "0x%x" % ((cached_reg - uncached_reg) << 29) fixup = [] with open(elffile, "rb") as fd: elf = ELFFile(fd) for s in elf.iter_sections(): addr = s.header.sh_addr if uc_min <= addr <= uc_max: print( f"fix_elf_addrs.py: Moving section {s.name} to cached SRAM region" ) fixup.append(s.name) for s in fixup: # Note redirect: the sof-derived linker scripts currently emit # some zero-length sections at address zero. This is benign, and # the linker is happy, but objcopy will emit an unsilenceable # error (no --quiet option, no -Werror=no-whatever, nothing). # Just swallow the error stream for now pending rework to the # linker framework. cmd = f"{objcopy_bin} --change-section-address {s}+{cache_off} {elffile} 2>/dev/null"
panda.record_cmd(cmd, copy_directory=bin_dir, recording_name=recording_name) panda.stop_run() if not path.isfile(recording_name + "-rr-snp"): panda.queue_async(take_recording) panda.run() mappings = {} # Read symbols from bin into mappings with open(path.join(bin_dir, bin_name), 'rb') as f: our_elf = ELFFile(f) for section in our_elf.iter_sections(): if not isinstance(section, SymbolTableSection): continue for symbol in section.iter_symbols(): if len(symbol.name): # Sometimes empty mappings[symbol['st_value']] = symbol.name @panda.cb_after_block_exec( procname=bin_name ) # After we've executed the block applying taint, make sure everything is tainted as expected def abe(cpu, tb, exit): if tb.pc in mappings: print(hex(tb.pc), mappings[tb.pc]) panda.disable_tb_chaining()
class ELF(Binary): def __init__(self, db, filename): Binary.__init__(self) fd = open(filename, "rb") self.elf = ELFFile(fd) self.db = db self.__parsed_reloc_tables = set() self.dtags = {} self.jmprel = [] self.dynamic_seg = None self.set_arch_name() if self.arch == "MIPS32": self.dynamic_tag_translation = { 0x70000001: "DT_MIPS_RLD_VERSION", 0x70000005: "DT_MIPS_FLAGS", 0x70000006: "DT_MIPS_BASE_ADDRESS", 0x7000000a: "DT_MIPS_LOCAL_GOTNO", 0x70000011: "DT_MIPS_SYMTABNO", 0x70000012: "DT_MIPS_UNREFEXTNO", 0x70000013: "DT_MIPS_GOTSYM", 0x70000016: "DT_MIPS_RLD_MAP", 0x70000032: "DT_MIPS_PLTGOT" } elif self.arch == "MIPS64": self.dynamic_tag_translation = { 0x70000001: "DT_MIPS_RLD_VERSION", 0x70000005: "DT_MIPS_FLAGS", 0x70000006: "DT_MIPS_BASE_ADDRESS", 0x7000000a: "DT_MIPS_LOCAL_GOTNO", 0x70000011: "DT_MIPS_SYMTABNO", 0x70000012: "DT_MIPS_UNREFEXTNO", 0x70000013: "DT_MIPS_GOTSYM", 0x70000016: "DT_MIPS_RLD_MAP" } else: self.dynamic_tag_translation = {} reloc = 0 # Load sections for s in self.elf.iter_sections(): start = s.header.sh_addr if not s.name: s.name = "unk_%x" % start # Keep only sections R|W|X # TODO : is it sufficiant ? if s.header.sh_flags & 0xf == 0: continue name = s.name if isinstance(name, bytes): name = name.decode() if start == 0: start = reloc reloc += s.header.sh_size data = s.data() self.add_section( start, name, s.header.sh_size, len(data), self.__section_is_exec(s), self.__section_is_data(s), name == ".bss", data) # Load segments rename_counter = 1 seen = set() for seg in self.elf.iter_segments(): if seg.header.p_type == "PT_DYNAMIC": self.dynamic_seg = seg if seg.header.p_type != "PT_LOAD": continue name = seg.header.p_type if name in seen: name += "_%d" % rename_counter rename_counter += 1 seen.add(name) start = seg.header.p_vaddr bisect.insort_left(self._sorted_segments, start) is_data = self.__segment_is_data(seg) is_exec = self.__segment_is_exec(seg) data = seg.data() self._abs_segments[start] = SegmentAbs( name, start, seg.header.p_memsz, len(data), is_exec, is_data, data, seg.header.p_offset, not self.elf.little_endian) # No section headers, we add segments in sections if len(self._abs_sections) == 0: self._abs_sections = self._abs_segments self._sorted_sections = self._sorted_segments def read_addr_at(self, ad): seg = self.get_segment(ad) if self.wordsize == 4: return seg.read_dword(ad) else: return seg.read_qword(ad) def __translate_dynamic_tag(self, tag): if isinstance(tag, int): return self.dynamic_tag_translation[tag] return tag def __get_offset(self, ad): seg = self.get_segment(ad) return seg.file_offset + ad - seg.start def load_dyn_sym(self): if self.dynamic_seg is None: return self.dtags = {} for tag in self.dynamic_seg.iter_tags(): # Create a dictionary, mapping DT_* strings to their values tagstr = self.__translate_dynamic_tag(tag.entry.d_tag) self.dtags[tagstr] = tag.entry.d_val # None of the following things make sense without a string table if "DT_STRTAB" not in self.dtags: return # To handle binaries without section headers, we need to hack around # pyreadelf's assumptions make our own string table fakestrtabheader = { "sh_offset": self.__get_offset(self.dtags["DT_STRTAB"]), "sh_size":0, "sh_flags":0, "sh_addralign":0 } strtab = StringTableSection( fakestrtabheader, "strtab_plasma", self.elf) # ... # Here in CLE was checked the DT_SONAME # ... # None of the following structures can be used without a symbol table if "DT_SYMTAB" not in self.dtags or "DT_SYMENT" not in self.dtags: return # Construct our own symbol table to hack around pyreadelf # assuming section headers are around fakesymtabheader = { "sh_offset": self.__get_offset(self.dtags["DT_SYMTAB"]), "sh_entsize": self.dtags["DT_SYMENT"], "sh_size": 0, "sh_flags": 0, "sh_addralign" : 0 } # bogus size: no iteration allowed # ... # Here in CLE : creation of hash section # ... self.dynsym = SymbolTableSection( fakesymtabheader, "symtab_plasma", self.elf, strtab) # mips' relocations are absolutely screwed up, handle some of them here. self.__relocate_mips() # perform a lot of checks to figure out what kind of relocation # tables are around rela_type = None if "DT_PLTREL" in self.dtags: if self.dtags["DT_PLTREL"] == 7: rela_type = "RELA" relentsz = self.elf.structs.Elf_Rela.sizeof() elif self.dtags["DT_PLTREL"] == 17: rela_type = "REL" relentsz = self.elf.structs.Elf_Rel.sizeof() else: raise ExcElf("DT_PLTREL is not REL or RELA?") else: if "DT_RELA" in self.dtags: rela_type = "RELA" relentsz = self.elf.structs.Elf_Rela.sizeof() elif "DT_REL" in self.dtags: rela_type = "REL" relentsz = self.elf.structs.Elf_Rel.sizeof() else: return # try to parse relocations out of a table of type DT_REL{,A} if "DT_" + rela_type in self.dtags: reloffset = self.dtags["DT_" + rela_type] relsz = self.dtags["DT_" + rela_type + "SZ"] fakerelheader = { "sh_offset": self.__get_offset(reloffset), "sh_type": "SHT_" + rela_type, "sh_entsize": relentsz, "sh_size": relsz, "sh_flags":0, "sh_addralign":0 "sh_flags": 2048, } reloc_sec = RelocationSection( fakerelheader, "reloc_plasma", self.elf) self.__register_relocs(reloc_sec)
class Loader(): def __init__(self, fname): self.fd = open(fname, 'rb') self.elffile = ELFFile(self.fd) self.container = Container() # this function is checking if the module is suited for retrowrite rewriting (PIE/PIC) def is_pie(self): return self.elffile['e_type'] == 'ET_REL' def is_stripped(self): # Get the symbol table entry for the respective symbol symtab = self.elffile.get_section_by_name('.symtab') if not symtab: print('No symbol table available, this file is probably stripped!') return True sym = symtab.get_symbol_by_name("init_module")[0] if not sym: print('Symbol {} not found') return True return False # Create a function object for each function in fnlist and add it to the # container def load_functions(self, fnlist): for fn in fnlist: section = fn['address'].section offset = fn['address'].offset function_start = offset function_end = offset + fn['sz'] bytes = section.data()[function_start:function_end] function = Function(fn['name'], fn['address'], fn['sz'], bytes, self.container, fn['bind']) print('Added function %s' % fn['name']) self.container.add_function(function) # Load all the data sections of the executable def load_data_sections(self, seclist, section_filter=lambda sname, sval, container: True): section_names_list = [sname for sname, sval in seclist.items() if section_filter(sname, sval, self.container)] for sname in section_names_list: sval = seclist[sname] section = self.elffile.get_section_by_name(sname) data = section.data() more = bytearray() if sname == ".init_array": if len(data) > 8: data = data[8:] else: data = b'' more.extend(data) else: more.extend(data) if len(more) < sval['sz']: more.extend( [0x0 for _ in range(0, sval['sz'] - len(more))]) bytes = more ds = DataSection(sname, sval["base"], sval["sz"], bytes, sval['flags'], sval['type'], sval['align']) print('Loaded data section %s' % sname) self.container.add_section(ds) # Load all the relocations def load_relocations(self, relocs): for reloc_section, relocations in relocs.items(): section = reloc_section[5:] if section in self.container.sections: # Data section relocation self.container.sections[section].add_relocations(relocations) else: # Code section relocation self.container.add_code_relocations(section, relocations) def reloc_list_from_symtab(self): relocs = defaultdict(list) # Find all the relocation sections in the file # relocation_section is a section containing the relocations for another # section (the target section) for relocation_section in self.elffile.iter_sections(): if not isinstance(relocation_section, RelocationSection): continue # symtable is the symbol table section associated with this # relocation section symtable = self.elffile.get_section(relocation_section['sh_link']) # target_section is the section that the relocation affects (i.e. # the section where the linker/loader will write the value computed by # the relocation) target_section = self.elffile.get_section_by_name(relocation_section.name[5:]) for relocation in relocation_section.iter_relocations(): if relocation_section.name == '.rela.dyn': for s in self.elffile.iter_sections(): section_start = s['sh_offset'] section_end = section_start + s.data_size if section_start <= relocation['r_offset'] < section_end: target_section = s break else: assert False # symbol is the symbol that the relocation refers to symbol = None # symbol_section is the section that contains the symbol symbol_section = None if relocation['r_info_sym'] != 0: symbol = symtable.get_symbol(relocation['r_info_sym']) if symbol: # This relocation points to a symbol symbol_section = self.elffile.get_section(symbol['st_shndx']) if symbol['st_shndx'] != 'SHN_UNDEF' else None # Symbols can have a name or no name if symbol['st_name'] == 0: # The symbol doesn't have a name, we will use the name of # the section that contains it instead. Symbols that don't # have a name always have a section assert symbol_section symbol_name = symbol_section.name else: # The symbol has a name symbol_name = symbol.name # relocation_address is the address at which the relocation will # be applied # symbol_address is the address of the symbol, or None if the # symbol is external/imported if self.elffile['e_type'] == 'ET_REL': relocation_address = Address(target_section, relocation['r_offset']) symbol_address = Address(symbol_section, symbol['st_value']) if symbol_section else None else: relocation_address = Address(target_section, relocation['r_offset'] - target_section['sh_addr']) symbol_address = Address(symbol_section, symbol['st_value'] - symbol_section['sh_addr']) if symbol_section else None else: symbol_name = None # This relocation doesn't point to a symbol, we have to use # 0 as the symbol's value, which basically means that this # points to some place in the executable. This can only work # for non-relocatable files because relocatable files could # have any layout in memory absolute_address = relocation['r_addend'] assert self.elffile['e_type'] != 'ET_REL' for section in self.elffile.iter_sections(): if section['sh_addr'] <= absolute_address < section['sh_addr'] + section.data_size: symbol_address = Address(section, absolute_address - section['sh_addr']) break; else: assert False, 'Relocation with no symbol outside of all sections' # import pdb; pdb.set_trace() relocation_address = Address(target_section, relocation['r_offset'] - target_section['sh_addr']) reloc_i = { 'name': symbol_name, 'address': relocation_address, 'addend': relocation['r_addend'], 'type': relocation['r_info_type'], 'symbol_address': symbol_address, } relocs[relocation_section.name].append(reloc_i) return relocs def flist_from_symtab(self): symbol_tables = [ sec for sec in self.elffile.iter_sections() if isinstance(sec, SymbolTableSection) ] function_list = [] for section in symbol_tables: if not isinstance(section, SymbolTableSection): continue if section['sh_entsize'] == 0: continue for symbol in section.iter_symbols(): if symbol['st_other']['visibility'] == "STV_HIDDEN": continue if (symbol['st_info']['type'] == 'STT_FUNC' and symbol['st_shndx'] != 'SHN_UNDEF'): fn_section = self.elffile.get_section(symbol['st_shndx']) if self.elffile['e_type'] == ENUM_E_TYPE['ET_REL']: fn_offset = symbol['st_value'] else: fn_offset = symbol['st_value'] - fn_section['sh_addr'] function_list.append({ 'name': symbol.name, 'sz': symbol['st_size'], 'visibility': symbol['st_other']['visibility'], 'bind': symbol['st_info']['bind'], 'address': Address(fn_section, fn_offset), }) return function_list def slist_from_symtab(self): sections = dict() for section in self.elffile.iter_sections(): sections[section.name] = { 'base': section['sh_addr'], 'sz': section['sh_size'], 'offset': section['sh_offset'], 'align': section['sh_addralign'], 'flags': section['sh_flags'], 'type': section['sh_type'], } return sections def load_globals_from_glist(self, glist): self.container.add_globals(glist) def global_data_list_from_symtab(self): symbol_tables = [ sec for sec in self.elffile.iter_sections() if isinstance(sec, SymbolTableSection) ] global_list = [] for section in symbol_tables: if not isinstance(section, SymbolTableSection): continue if section['sh_entsize'] == 0: continue for symbol in section.iter_symbols(): # XXX: HACK if "@@GLIBC" in symbol.name: continue if symbol['st_other']['visibility'] == "STV_HIDDEN": continue if symbol['st_size'] == 0: continue if (symbol['st_info']['type'] == 'STT_OBJECT' and symbol['st_shndx'] != 'SHN_UNDEF'): global_section = self.elffile.get_section(symbol['st_shndx']) if self.elffile['e_type'] == ENUM_E_TYPE['ET_REL']: global_offset = symbol['st_value'] else: global_offset = symbol['st_value'] - global_section['sh_addr'] global_list.append({ 'name': "{}_{:x}".format(symbol.name, symbol['st_value']), 'sz': symbol['st_size'], 'section': global_section, 'offset': global_offset, }) return global_list
class Manticore(Eventful): ''' The central analysis object. This should generally not be invoked directly; the various class method constructors should be preferred: :meth:`~manticore.Manticore.linux`, :meth:`~manticore.Manticore.decree`, :meth:`~manticore.Manticore.evm`. :param path_or_state: Path to a binary to analyze (**deprecated**) or `State` object :type path_or_state: str or State :param argv: Arguments to provide to binary (**deprecated**) :type argv: list[str] :ivar dict context: Global context for arbitrary data storage ''' _published_events = {'start_run', 'finish_run'} def __init__(self, path_or_state, argv=None, workspace_url=None, policy='random', **kwargs): super(Manticore, self).__init__() if isinstance(workspace_url, str): if ':' not in workspace_url: ws_path = 'fs:' + workspace_url else: ws_path = workspace_url else: if workspace_url is not None: raise Exception('Invalid workspace') ws_path = None self._output = ManticoreOutput(ws_path) self._context = {} # sugar for 'will_execute_instruction" self._hooks = {} self._executor = Executor(store=self._output.store, policy=policy) self._workers = [] # Link Executor events to default callbacks in manticore object self.forward_events_from(self._executor) if isinstance(path_or_state, str): if not os.path.isfile(path_or_state): raise Exception('{} is not an existing regular file'.format(path_or_state)) self._initial_state = make_initial_state(path_or_state, argv=argv, **kwargs) elif isinstance(path_or_state, State): self._initial_state = path_or_state else: raise TypeError('path_or_state must be either a str or State, not {}'.format(type(path_or_state).__name__)) if not isinstance(self._initial_state, State): raise TypeError("Manticore must be intialized with either a State or a path to a binary") self.plugins = set() # Move the folowing into a plugin self._assertions = {} self._coverage_file = None self.trace = None # FIXME move the folowing to aplugin self.subscribe('will_generate_testcase', self._generate_testcase_callback) self.subscribe('did_finish_run', self._did_finish_run_callback) # Default plugins for now.. FIXME? self.register_plugin(InstructionCounter()) self.register_plugin(Visited()) self.register_plugin(Tracer()) self.register_plugin(RecordSymbolicBranches()) def register_plugin(self, plugin): # Global enumeration of valid events assert isinstance(plugin, Plugin) assert plugin not in self.plugins, "Plugin instance already registered" assert plugin.manticore is None, "Plugin instance already owned" plugin.manticore = self self.plugins.add(plugin) events = Eventful.all_events() prefix = Eventful.prefixes all_events = [x + y for x, y in itertools.product(prefix, events)] for event_name in all_events: callback_name = '{}_callback'.format(event_name) callback = getattr(plugin, callback_name, None) if callback is not None: self.subscribe(event_name, callback) # Safety checks for callback_name in dir(plugin): if callback_name.endswith('_callback'): event_name = callback_name[:-9] if event_name not in all_events: logger.warning("There is no event named %s for callback on plugin %s", event_name, type(plugin).__name__) for event_name in all_events: for plugin_method_name in dir(plugin): if event_name in plugin_method_name: if not plugin_method_name.endswith('_callback'): if plugin_method_name.startswith('on_') or \ plugin_method_name.startswith('will_') or \ plugin_method_name.startswith('did_'): logger.warning("Plugin methods named '%s()' should end with '_callback' on plugin %s", plugin_method_name, type(plugin).__name__) if plugin_method_name.endswith('_callback') and \ not plugin_method_name.startswith('on_') and \ not plugin_method_name.startswith('will_') and \ not plugin_method_name.startswith('did_'): logger.warning("Plugin methods named '%s()' should start with 'on_', 'will_' or 'did_' on plugin %s", plugin_method_name, type(plugin).__name__) def unregister_plugin(self, plugin): assert plugin in self.plugins, "Plugin instance not registered" self.plugins.remove(plugin) plugin.manticore = None @classmethod def linux(cls, path, argv=None, envp=None, symbolic_files=None, concrete_start='', **kwargs): """ Constructor for Linux binary analysis. :param str path: Path to binary to analyze :param argv: Arguments to provide to the binary :type argv: list[str] :param envp: Environment to provide to the binary :type envp: dict[str, str] :param symbolic_files: Filenames to mark as having symbolic input :type symbolic_files: list[str] :param str concrete_start: Concrete stdin to use before symbolic inputt :param kwargs: Forwarded to the Manticore constructor :return: Manticore instance, initialized with a Linux State :rtype: Manticore """ try: return cls(make_linux(path, argv, envp, symbolic_files, concrete_start), **kwargs) except elftools.common.exceptions.ELFError: raise Exception('Invalid binary: {}'.format(path)) @classmethod def decree(cls, path, concrete_start='', **kwargs): """ Constructor for Decree binary analysis. :param str path: Path to binary to analyze :param str concrete_start: Concrete stdin to use before symbolic inputt :param kwargs: Forwarded to the Manticore constructor :return: Manticore instance, initialized with a Decree State :rtype: Manticore """ try: return cls(make_decree(path, concrete_start), **kwargs) except KeyError: # FIXME(mark) magic parsing for DECREE should raise better error raise Exception('Invalid binary: {}'.format(path)) @classmethod def evm(cls, **kwargs): """ Constructor for Ethereum virtual machine bytecode analysis. :param kwargs: Forwarded to the Manticore constructor :return: Manticore instance, initialized with a EVM State :rtype: Manticore """ # Make the constraint store constraints = ConstraintSet() # make the ethereum world state world = evm.EVMWorld(constraints) return cls(State(constraints, world), **kwargs) @property def initial_state(self): return self._initial_state def subscribe(self, name, callback): from types import MethodType if not isinstance(callback, MethodType): callback = MethodType(callback, self) super(Manticore, self).subscribe(name, callback) @property def context(self): ''' Convenient access to shared context ''' if not self.running: return self._context else: logger.warning("Using shared context without a lock") return self._executor._shared_context @contextmanager def locked_context(self, key=None, value_type=list): """ A context manager that provides safe parallel access to the global Manticore context. This should be used to access the global Manticore context when parallel analysis is activated. Code within the `with` block is executed atomically, so access of shared variables should occur within. Example use:: with m.locked_context() as context: visited = context['visited'] visited.append(state.cpu.PC) context['visited'] = visited Optionally, parameters can specify a key and type for the object paired to this key.:: with m.locked_context('feature_list', list) as feature_list: feature_list.append(1) :param object key: Storage key :param value_type: type of value associated with key :type value_type: list or dict or set """ @contextmanager def _real_context(): if not self.running: yield self._context else: with self._executor.locked_context() as context: yield context with _real_context() as context: if key is None: yield context else: assert value_type in (list, dict, set) ctx = context.get(key, value_type()) yield ctx context[key] = ctx @staticmethod def verbosity(level): """Convenience interface for setting logging verbosity to one of several predefined logging presets. Valid values: 0-5. """ log.set_verbosity(level) @property def running(self): return self._executor._running.value @property def running(self): return self._executor.running def enqueue(self, state): ''' Dynamically enqueue states. Users should typically not need to do this ''' assert not self.running, "Can't add state where running. Can we?" self._executor.enqueue(state) ########################################################################### # Workers # ########################################################################### def _start_workers(self, num_processes, profiling=False): assert num_processes > 0, "Must have more than 0 worker processes" logger.debug("Starting %d processes.", num_processes) if profiling: def profile_this(func): @functools.wraps(func) def wrapper(*args, **kwargs): profile = cProfile.Profile() profile.enable() result = func(*args, **kwargs) profile.disable() profile.create_stats() with self.locked_context('profiling_stats', list) as profiling_stats: profiling_stats.append(profile.stats.items()) return result return wrapper target = profile_this(self._executor.run) else: target = self._executor.run if num_processes == 1: target() else: for _ in range(num_processes): p = Process(target=target, args=()) self._workers.append(p) p.start() def _join_workers(self): with WithKeyboardInterruptAs(self._executor.shutdown): while len(self._workers) > 0: self._workers.pop().join() ############################################################################ # Common hooks + callback ############################################################################ def init(self, f): ''' A decorator used to register a hook function to run before analysis begins. Hook function takes one :class:`~manticore.core.state.State` argument. ''' def callback(manticore_obj, state): f(state) self.subscribe('will_start_run', types.MethodType(callback, self)) return f def hook(self, pc): ''' A decorator used to register a hook function for a given instruction address. Equivalent to calling :func:`~add_hook`. :param pc: Address of instruction to hook :type pc: int or None ''' def decorator(f): self.add_hook(pc, f) return f return decorator def add_hook(self, pc, callback): ''' Add a callback to be invoked on executing a program counter. Pass `None` for pc to invoke callback on every instruction. `callback` should be a callable that takes one :class:`~manticore.core.state.State` argument. :param pc: Address of instruction to hook :type pc: int or None :param callable callback: Hook function ''' if not (isinstance(pc, (int, long)) or pc is None): raise TypeError("pc must be either an int or None, not {}".format(pc.__class__.__name__)) else: self._hooks.setdefault(pc, set()).add(callback) if self._hooks: self._executor.subscribe('will_execute_instruction', self._hook_callback) def _hook_callback(self, state, pc, instruction): 'Invoke all registered generic hooks' # Ignore symbolic pc. # TODO(yan): Should we ask the solver if any of the hooks are possible, # and execute those that are? if not isinstance(pc, (int, long)): return # Invoke all pc-specific hooks for cb in self._hooks.get(pc, []): cb(state) # Invoke all pc-agnostic hooks for cb in self._hooks.get(None, []): cb(state) ############################################################################ # Model hooks + callback ############################################################################ def apply_model_hooks(self, path): # TODO(yan): Simplify the partial function application # Imported straight from __main__.py; this will be re-written once the new # event code is in place. import core.cpu import importlib import platforms with open(path, 'r') as fnames: for line in fnames.readlines(): address, cc_name, name = line.strip().split(' ') fmodel = platforms name_parts = name.split('.') importlib.import_module(".platforms.{}".format(name_parts[0]), 'manticore') for n in name_parts: fmodel = getattr(fmodel, n) assert fmodel != platforms def cb_function(state): state.platform.invoke_model(fmodel, prefix_args=(state.platform,)) self._model_hooks.setdefault(int(address, 0), set()).add(cb_function) self._executor.subscribe('will_execute_instruction', self._model_hook_callback) def _model_hook_callback(self, state, instruction): pc = state.cpu.PC if pc not in self._model_hooks: return for cb in self._model_hooks[pc]: cb(state) ############################################################################ # Assertion hooks + callback ############################################################################ def load_assertions(self, path): with open(path, 'r') as f: for line in f.readlines(): pc = int(line.split(' ')[0], 16) if pc in self._assertions: logger.debug("Repeated PC in assertions file %s", path) self._assertions[pc] = ' '.join(line.split(' ')[1:]) self.subscribe('will_execute_instruction', self._assertions_callback) def _assertions_callback(self, state, pc, instruction): if pc not in self._assertions: return from .core.parser.parser import parse program = self._assertions[pc] # This will interpret the buffer specification written in INTEL ASM. # (It may dereference pointers) assertion = parse(program, state.cpu.read_int, state.cpu.read_register) if not solver.can_be_true(state.constraints, assertion): logger.info(str(state.cpu)) logger.info("Assertion %x -> {%s} does not hold. Aborting state.", state.cpu.pc, program) raise TerminateState() # Everything is good add it. state.constraints.add(assertion) ########################################################################## # Some are placeholders Remove FIXME # Any platform specific callback should go to a plugin def _generate_testcase_callback(self, state, name, message): ''' Create a serialized description of a given state. :param state: The state to generate information about :param message: Accompanying message ''' testcase_id = self._output.save_testcase(state, name, message) logger.info("Generated testcase No. {} - {}".format(testcase_id, message)) def _produce_profiling_data(self): class PstatsFormatted: def __init__(self, d): self.stats = dict(d) def create_stats(self): pass with self.locked_context('profiling_stats') as profiling_stats: with self._output.save_stream('profiling.bin', binary=True) as s: ps = None for item in profiling_stats: try: stat = PstatsFormatted(item) if ps is None: ps = pstats.Stats(stat, stream=s) else: ps.add(stat) except TypeError: logger.info("Incorrectly formatted profiling information in _stats, skipping") if ps is None: logger.info("Profiling failed") else: # XXX(yan): pstats does not support dumping to a file stream, only to a file # name. Below is essentially the implementation of pstats.dump_stats() without # the extra open(). import marshal marshal.dump(ps.stats, s) def _start_run(self): assert not self.running if self._initial_state is not None: self._publish('will_start_run', self._initial_state) self.enqueue(self._initial_state) self._initial_state = None # Copy the local main context to the shared conext self._executor._shared_context.update(self._context) def _finish_run(self, profiling=False): assert not self.running # Copy back the shared context self._context = dict(self._executor._shared_context) if profiling: self._produce_profiling_data() self._publish('did_finish_run') def run(self, procs=1, timeout=0, should_profile=False): ''' Runs analysis. :param int procs: Number of parallel worker processes :param timeout: Analysis timeout, in seconds ''' assert not self.running, "Manticore is already running." self._start_run() self._time_started = time.time() if timeout > 0: t = Timer(timeout, self.terminate) t.start() try: self._start_workers(procs, profiling=should_profile) self._join_workers() finally: if timeout > 0: t.cancel() self._finish_run(profiling=should_profile) def terminate(self): ''' Gracefully terminate the currently-executing run. Typically called from within a :func:`~hook`. ''' self._executor.shutdown() ############################################################################# ############################################################################# ############################################################################# # Move all the following elsewhere Not all manticores have this def _get_symbol_address(self, symbol): ''' Return the address of |symbol| within the binary ''' # XXX(yan) This is a bit obtuse; once PE support is updated this should # be refactored out if self._binary_type == 'ELF': self._binary_obj = ELFFile(file(self._binary)) if self._binary_obj is None: return NotImplementedError("Symbols aren't supported") for section in self._binary_obj.iter_sections(): if not isinstance(section, SymbolTableSection): continue symbols = section.get_symbol_by_name(symbol) if not symbols: continue return symbols[0].entry['st_value'] @property def coverage_file(self): return self._coverage_file @property def workspace(self): return self._output.store.uri @coverage_file.setter def coverage_file(self, path): assert not self.running, "Can't set coverage file if Manticore is running." self._coverage_file = path def _did_finish_run_callback(self): _shared_context = self.context with self._output.save_stream('command.sh') as f: f.write(' '.join(sys.argv)) elapsed = time.time() - self._time_started logger.info('Results in %s', self._output.store.uri) logger.info('Total time: %s', elapsed)
def load_module(self, filename, do_init=True): m = self.find_module_by_name(filename) if (m != None): return m # logger.debug("Loading module '%s'." % filename) #do sth like linker with open(filename, 'rb') as fstream: #TODO: load elf without Section Header,pyelftools do not support. elf = ELFFile(fstream) dynamic = elf.header.e_type == 'ET_DYN' if not dynamic: raise NotImplementedError( "Only ET_DYN is supported at the moment.") # Parse program header (Execution view). # - LOAD (determinate what parts of the ELF file get mapped into memory) load_segments = [ x for x in elf.iter_segments() if x.header.p_type == 'PT_LOAD' ] # Find bounds of the load segments. bound_low = 0 bound_high = 0 for segment in load_segments: if segment.header.p_memsz == 0: continue if bound_low > segment.header.p_vaddr: bound_low = segment.header.p_vaddr high = segment.header.p_vaddr + segment.header.p_memsz if bound_high < high: bound_high = high ''' // Segment addresses in memory. Elf32_Addr seg_start = phdr->p_vaddr + load_bias_; Elf32_Addr seg_end = seg_start + phdr->p_memsz; Elf32_Addr seg_page_start = PAGE_START(seg_start); Elf32_Addr seg_page_end = PAGE_END(seg_end); // File offsets. Elf32_Addr file_start = phdr->p_offset; Elf32_Addr file_end = file_start + phdr->p_filesz; Elf32_Addr seg_file_end = seg_start + phdr->p_filesz; Elf32_Addr file_page_start = PAGE_START(file_start); Elf32_Addr file_length = file_end - file_page_start; if (file_length != 0) { void* seg_addr = mmap((void*)seg_page_start, file_length, PFLAGS_TO_PROT(phdr->p_flags), MAP_FIXED|MAP_PRIVATE, fd_, file_page_start); ''' # Retrieve a base address for this module. load_base = self.mem_reserve(bound_low, bound_high) vf = VirtualFile( misc_utils.system_path_to_vfs_path(self.__vfs_root, filename), misc_utils.my_open(filename, os.O_RDONLY), filename) for segment in load_segments: prot = get_segment_protection(segment.header.p_flags) prot = prot if prot is not 0 else UC_PROT_ALL #self.emu.memory.map(load_base + segment.header.p_vaddr, segment.header.p_memsz, prot) #self.emu.mu.mem_write(load_base + segment.header.p_vaddr, segment.data()) seg_start = load_base + segment.header.p_vaddr seg_page_start = page_start(seg_start) file_start = segment.header.p_offset file_end = file_start + segment.header.p_filesz file_page_start = page_start(file_start) file_length = file_end - file_page_start assert (file_length > 0) if (file_length > 0): self.emu.memory.map(seg_page_start, file_length, prot, vf, file_page_start) # seg_end = seg_start + segment.header.p_memsz seg_page_end = page_end(seg_end) seg_file_end = seg_start + segment.header.p_filesz seg_file_end = page_end(seg_file_end) ''' void* zeromap = mmap((void*)seg_file_end, seg_page_end - seg_file_end, PFLAGS_TO_PROT(phdr->p_flags), MAP_FIXED|MAP_ANONYMOUS|MAP_PRIVATE, -1, 0); ''' self.emu.memory.map(seg_file_end, seg_page_end - seg_file_end, prot) # # Find init array. init_array_size = 0 init_array_offset = 0 init_array = [] init_addr = 0 dynstr_addr = 0 dt_needed = [] for x in elf.iter_segments(): if x.header.p_type == "PT_DYNAMIC": for tag in x.iter_tags(): if tag.entry.d_tag == "DT_INIT_ARRAYSZ": init_array_size = tag.entry.d_val elif tag.entry.d_tag == "DT_INIT_ARRAY": init_array_offset = tag.entry.d_val elif tag.entry.d_tag == "DT_INIT": init_addr = tag.entry.d_val + load_base elif tag.entry.d_tag == "DT_STRTAB": dynstr_addr = tag.entry.d_val + load_base elif tag.entry.d_tag == "DT_NEEDED": dt_needed.append(tag.entry.d_val) # # break # # so_needed = [] for str_off in dt_needed: str_addr = dynstr_addr + str_off so_name = memory_helpers.read_utf8(self.emu.mu, str_addr) so_needed.append(so_name) # for so_name in so_needed: path = misc_utils.vfs_path_to_system_path( self.__vfs_root, so_name) if (not os.path.exists(path)): logger.warn("%s needed by %s do not exist in vfs %s" % (so_name, filename, self.__vfs_root)) continue # libmod = self.load_module(path) # rel_section = None for section in elf.iter_sections(): if not isinstance(section, RelocationSection): continue rel_section = section break # # Parse section header (Linking view). dynsym = elf.get_section_by_name(".dynsym") for _ in range(int(init_array_size / 4)): b = self.emu.mu.mem_read(load_base + init_array_offset, 4) fun_ptr = int.from_bytes(b, byteorder='little', signed=False) if fun_ptr != 0: # fun_ptr += load_base init_array.append(fun_ptr + load_base) # print ("find init array for :%s %x" % (filename, fun_ptr)) else: # search in reloc for rel in rel_section.iter_relocations(): rel_info_type = rel['r_info_type'] rel_addr = rel['r_offset'] if rel_info_type == arm.R_ARM_ABS32 and rel_addr == init_array_offset: sym = dynsym.get_symbol(rel['r_info_sym']) sym_value = sym['st_value'] init_array.append(load_base + sym_value) # print ("find init array for :%s %x" % (filename, sym_value)) break # # init_array_offset += 4 # Resolve all symbols. symbols_resolved = dict() for section in elf.iter_sections(): if not isinstance(section, SymbolTableSection): continue itersymbols = section.iter_symbols() next(itersymbols) # Skip first symbol which is always NULL. for symbol in itersymbols: symbol_address = self._elf_get_symval( elf, load_base, symbol) if symbol_address is not None: symbols_resolved[symbol.name] = SymbolResolved( symbol_address, symbol) # Relocate. for section in elf.iter_sections(): if not isinstance(section, RelocationSection): continue for rel in section.iter_relocations(): sym = dynsym.get_symbol(rel['r_info_sym']) sym_value = sym['st_value'] rel_addr = load_base + rel[ 'r_offset'] # Location where relocation should happen rel_info_type = rel['r_info_type'] #print(filename) #print("%x"%rel_addr) # Relocation table for ARM if rel_info_type == arm.R_ARM_ABS32: if sym.name in symbols_resolved: sym_addr = symbols_resolved[sym.name].address value_orig_bytes = self.emu.mu.mem_read( rel_addr, 4) value_orig = int.from_bytes(value_orig_bytes, byteorder='little') #R_ARM_ABS32 重定位方式参见 android linker源码 #*reinterpret_cast<Elf32_Addr*>(reloc) += sym_addr; value = sym_addr + value_orig # Write the new value #print(value) self.emu.mu.mem_write( rel_addr, value.to_bytes(4, byteorder='little')) # # elif rel_info_type in (arm.R_ARM_GLOB_DAT, arm.R_ARM_JUMP_SLOT, arm.R_AARCH64_GLOB_DAT, arm.R_AARCH64_JUMP_SLOT): # Resolve the symbol. #R_ARM_GLOB_DAT,R_ARM_JUMP_SLOT修复方式见linker源码 #*reinterpret_cast<Elf32_Addr*>(reloc) = sym_addr; if sym.name in symbols_resolved: value = symbols_resolved[sym.name].address # Write the new value #print(value) self.emu.mu.mem_write( rel_addr, value.to_bytes(4, byteorder='little')) # # elif rel_info_type in (arm.R_ARM_RELATIVE, arm.R_AARCH64_RELATIVE): if sym_value == 0: # Load address at which it was linked originally. value_orig_bytes = self.emu.mu.mem_read( rel_addr, 4) value_orig = int.from_bytes(value_orig_bytes, byteorder='little') # Create the new value value = load_base + value_orig #print(value) # Write the new value self.emu.mu.mem_write( rel_addr, value.to_bytes(4, byteorder='little')) else: raise NotImplementedError() else: logger.error("Unhandled relocation type %i." % rel_info_type) # # Store information about loaded module. module = Module(filename, load_base, bound_high - bound_low, symbols_resolved, init_addr, init_array) self.modules.append(module) #TODO init tls like linker ''' void __libc_init_tls(KernelArgumentBlock& args) { __libc_auxv = args.auxv; unsigned stack_top = (__get_sp() & ~(PAGE_SIZE - 1)) + PAGE_SIZE; unsigned stack_size = 128 * 1024; unsigned stack_bottom = stack_top - stack_size; static void* tls[BIONIC_TLS_SLOTS]; static pthread_internal_t thread; thread.tid = gettid(); thread.tls = tls; pthread_attr_init(&thread.attr); pthread_attr_setstack(&thread.attr, (void*) stack_bottom, stack_size); _init_thread(&thread, false); __init_tls(&thread); tls[TLS_SLOT_BIONIC_PREINIT] = &args; } ''' if do_init: ''' for r in self.emu.mu.mem_regions(): print("region begin :0x%08X end:0x%08X, prot:%d"%(r[0], r[1], r[2])) # ''' module.call_init(self.emu) # logger.info("finish load lib %s" % filename) return module
def __main(): # Get the LD parameters if len(sys.argv) < 7: raise ValueError( 'Command line:\n\tappname hexpath stacklen ldexe ldscript ldoself ldmap ldobjelf ldobj*\nGiven:\n\t' + str(sys.argv)) appname = sys.argv[1].strip()[:MAX_NAME_LENGTH] hexpath = sys.argv[2] stacklen = int(sys.argv[3][3:], 16) if sys.argv[3].strip().lower()[0:2] == '0x' else int( sys.argv[3]) ldexe = sys.argv[4] ldscript = sys.argv[5] ldoself = sys.argv[6] ldmap = sys.argv[7] ldobjelf = sys.argv[8] ldobjs = sys.argv[9:] # Get the length of each section pgmlen, bsslen, datalen = 0, 0, 0 with open(ldobjelf, 'rb') as f: elffile = ELFFile(f) for section in elffile.iter_sections(): if section.name == '.text': pgmlen = int(section['sh_size']) elif section.name == '.bss': bsslen = int(section['sh_size']) elif section.name == '.data': datalen = int(section['sh_size']) # Open the serial port ser = serial.Serial(SERIAL_PORT, SERIAL_BAUD) sio = io.TextIOWrapper(buffer=io.BufferedRWPair(ser, ser, 1), newline='\r\n', line_buffering=True, encoding='ascii') try: sio.write(unicode('\n\n')) time.sleep(0.1) while ser.inWaiting(): ser.flushInput() time.sleep(0.1) sio.write(unicode('app_install\n')) print '"' + sio.readline() + '"' # Send the section sizes and app name sizestr = '%0.8X,%0.8X,%0.8X,%0.8X,%0.2X%s' % ( pgmlen, bsslen, datalen, stacklen, len(appname), appname) print sizestr sio.write(unicode(sizestr + '\n')) # Receive the allocated addresses addrs = sio.readline().split(',') pgmadr = int(addrs[0].strip(), 16) bssadr = int(addrs[1].strip(), 16) dataadr = int(addrs[2].strip(), 16) datapgmadr = int(addrs[3].strip(), 16) # Link to the OS symbols sectopt = [ '--section-start', '.text=0x%0.8X' % pgmadr, '--section-start', '.bss=0x%0.8X' % bssadr, '--section-start', '.data=0x%0.8X' % dataadr ] args = [ ldexe, '--script', ldscript, '--just-symbols', ldoself, '-Map', ldmap, '-o', ldobjelf ] + sectopt + ldobjs print args subprocess.call(args) subprocess.call(['make']) with open(ldobjelf, 'rb') as f: elffile = ELFFile(f) threadadr = __find_address(elffile, ENTRY_THREAD_NAME) print 'app_thread = 0x%0.8X' % threadadr # Read the generated IHEX file and remove unused records with open(hexpath, 'r') as f: hexdata = f.readlines() hexdata = [ line.strip() for line in hexdata if not line[7:9] in ['05', '03'] and len(line) >= 11 ] hexdata = [line for line in hexdata if len(line) > 0] if len([None for line in hexdata if line[7:9] == '01' ]) != 1 and hexdata[-1][7:9] != '01': raise RuntimeError( 'The IHEX must contain a single EOF record, as last record') # Insert the entry point thread record chks = threadadr & 0xFFFFFFFF chks = (chks >> 24) + (chks >> 16) + (chks >> 8) + (chks & 0xFF) chks = 0x100 - (0x04 + 0x00 + 0x00 + 0x05 + chks) & 0xFF hexdata[0:0] = [':04000005%0.8X%0.2X' % (threadadr, chks)] # Send IHEX records for i in range(len(hexdata)): line = sio.readline().strip() print line if line != ',': raise RuntimeError( 'Error while loading line %d ("%s", received "%s")' % (i, hexdata[i], line)) sio.write(unicode(hexdata[i] + '\n')) print hexdata[i] line = sio.readline().strip() print line if line != '$': raise RuntimeError( 'Error while terminating programming (received "%s")' % line) ser.close() except: ser.close() raise
def load_module(self, filename): logger.debug("Loading module '%s'." % filename) with open(filename, 'rb') as fstream: elf = ELFFile(fstream) dynamic = elf.header.e_type == 'ET_DYN' if not dynamic: raise NotImplementedError( "Only ET_DYN is supported at the moment.") # Parse program header (Execution view). # - LOAD (determinate what parts of the ELF file get mapped into memory) load_segments = [ x for x in elf.iter_segments() if x.header.p_type == 'PT_LOAD' ] # Find bounds of the load segments. bound_low = 0 bound_high = 0 for segment in load_segments: if segment.header.p_memsz == 0: continue if bound_low > segment.header.p_vaddr: bound_low = segment.header.p_vaddr high = segment.header.p_vaddr + segment.header.p_memsz if bound_high < high: bound_high = high # Retrieve a base address for this module. load_base = self.emu.memory.mem_reserve(bound_high - bound_low) for segment in load_segments: prot = get_segment_protection(segment.header.p_flags) prot = prot if prot is not 0 else UC_PROT_ALL self.emu.memory.mem_map(load_base + segment.header.p_vaddr, segment.header.p_memsz, prot) self.emu.memory.mem_write(load_base + segment.header.p_vaddr, segment.data()) # Parse section header (Linking view). dynsym = elf.get_section_by_name(".dynsym") dynstr = elf.get_section_by_name(".dynstr") # Resolve all symbols. symbols_resolved = dict() for section in elf.iter_sections(): if not isinstance(section, SymbolTableSection): continue itersymbols = section.iter_symbols() next(itersymbols) # Skip first symbol which is always NULL. for symbol in itersymbols: symbol_address = self._elf_get_symval( elf, load_base, symbol) if symbol_address is not None: symbols_resolved[symbol.name] = SymbolResolved( symbol_address, symbol) # Relocate. for section in elf.iter_sections(): if not isinstance(section, RelocationSection): continue for rel in section.iter_relocations(): sym = dynsym.get_symbol(rel['r_info_sym']) sym_value = sym['st_value'] rel_addr = load_base + rel[ 'r_offset'] # Location where relocation should happen rel_info_type = rel['r_info_type'] # Relocation table for ARM if rel_info_type == arm.R_ARM_ABS32: # Create the new value. value = load_base + sym_value # Write the new value self.emu.mu.mem_write( rel_addr, value.to_bytes(4, byteorder='little')) elif rel_info_type == arm.R_ARM_GLOB_DAT or \ rel_info_type == arm.R_ARM_JUMP_SLOT or \ rel_info_type == arm.R_AARCH64_GLOB_DAT or \ rel_info_type == arm.R_AARCH64_JUMP_SLOT: # Resolve the symbol. if sym.name in symbols_resolved: value = symbols_resolved[sym.name].address # Write the new value self.emu.mu.mem_write( rel_addr, value.to_bytes(4, byteorder='little')) elif rel_info_type == arm.R_ARM_RELATIVE or \ rel_info_type == arm.R_AARCH64_RELATIVE: if sym_value == 0: # Load address at which it was linked originally. value_orig_bytes = self.emu.mu.mem_read( rel_addr, 4) value_orig = int.from_bytes(value_orig_bytes, byteorder='little') # Create the new value value = load_base + value_orig # Write the new value self.emu.mu.mem_write( rel_addr, value.to_bytes(4, byteorder='little')) else: raise NotImplementedError() else: logger.error("Unhandled relocation type %i." % rel_info_type) # Store information about loaded module. module = Module(filename, load_base, bound_high - bound_low, symbols_resolved) self.modules.append(module) return module
class CoredumpElfFile(): """ Class to parse ELF file for memory content in various sections. There are read-only sections (e.g. text and rodata) where the memory content does not need to be dumped via coredump and can be retrived from the ELF file. """ def __init__(self, elffile): self.elffile = elffile self.fd = None self.elf = None self.memory_regions = list() def open(self): self.fd = open(self.elffile, "rb") self.elf = ELFFile(self.fd) def close(self): self.fd.close() def get_memory_regions(self): return self.memory_regions def parse(self): if self.fd is None: self.open() for section in self.elf.iter_sections(): # REALLY NEED to match exact type as all other sections # (debug, text, etc.) are descendants where # isinstance() would match. if type(section) is not elftools.elf.sections.Section: # pylint: disable=unidiomatic-typecheck continue size = section['sh_size'] flags = section['sh_flags'] sec_start = section['sh_addr'] sec_end = sec_start + size - 1 store = False sect_desc = "?" if section['sh_type'] == 'SHT_PROGBITS': if (flags & SHF_ALLOC_EXEC) == SHF_ALLOC_EXEC: # Text section store = True sect_desc = "text" elif (flags & SHF_WRITE_ALLOC) == SHF_WRITE_ALLOC: # Data section # # Running app changes the content so no need # to store pass elif (flags & SHF_ALLOC) == SHF_ALLOC: # Read only data section store = True sect_desc = "read-only data" if store: mem_region = { "start": sec_start, "end": sec_end, "data": section.data() } logger.info("ELF Section: 0x%x to 0x%x of size %d (%s)" % (mem_region["start"], mem_region["end"], len(mem_region["data"]), sect_desc)) self.memory_regions.append(mem_region) return True
def do_elf(path_to_elf, out_dir='/tmp'): """This function reads information from elf file and translates it to appropiate config structure. """ ret = {} elf = ELFFile(open(path_to_elf, 'rb')) if 'ARM' != elf.get_machine_arch(): raise Exception("Architecture not supported") ret['architecture'] = 'arm' ret['cpu_model'] = 'arm926' ret['endianness'] = 'little' if elf.little_endian else 'big' ret['entry_address'] = [elf.header.e_entry] thumb_targets = [] targets = [] for sec in elf.iter_sections(): if sec.name.startswith(b'.symtab'): log.info("[Translator] binary contains symbols! Using those instead of the single entry") ret['entry_address'] = [] # nm can run on any type of elf binary p = subprocess.Popen(['nm', path_to_elf], stdout=subprocess.PIPE) out, _ = p.communicate() for l in out.split('\n'): try: addr, t, name = l.split(' ') except: continue if (t == 't' or t == 'T') and not name.startswith('$'): targets.append(int(addr, 16)) # call readelf -s for getting the thumb bit # somehow, the $a and $t are not always generated? p = subprocess.Popen(['readelf', '-s', path_to_elf], \ stdout=subprocess.PIPE) out, _ = p.communicate() for l in out.split('\n'): try: _, addr, _, t, _, _, _, name = l.split() except: #print("QQ: %s: %d" % (l, len(l.split(' ')))) #print(str(l.split(None))) continue if t != 'FUNC': continue jumpPC = int(addr, 16) if jumpPC & 1 == 0x1: thumb_targets.append(jumpPC & -2) segments = [] cnt = 0 mapped_targets = [] mapped_thumb_targets = [] for i in range(elf.num_segments()): seg = elf.get_segment(i) if seg.header.p_type != 'PT_LOAD': continue #print(dir(seg.header)) assert(seg.header.p_paddr == seg.header.p_vaddr) padding = seg.header.p_paddr % 4096 #assert(seg.header.p_paddr % 4096 == 0) new_section = {} s = max(seg.header.p_memsz, seg.header.p_filesz) # round up to 4k if s % 4096 != 0: s = 4096*int((s+4096)/4096) s += padding # round up to 4k if s % 4096 != 0: s = 4096*int((s+4096)/4096) # build segment info segm_name = 'seg-'+str(cnt)+'.bin' segm_file = os.path.join(out_dir, segm_name) offset = seg.header.p_offset-padding assert(offset >= 0) segm_desc = {} segm_desc['file'] = segm_file segm_desc['size'] = s segm_desc['address'] = seg.header.p_paddr - padding segm_desc['name'] = segm_name # save chunk save_chunk(segm_file, path_to_elf, offset, s) cnt += 1 segments.append(segm_desc) log.debug("[Translator] loaded %s%08x@%08x" % \ (seg.header.p_type, seg.header.p_paddr, \ seg.header.p_offset)) def inside_segment(e): return e >= segm_desc['address'] and \ e < (segm_desc['address'] + \ segm_desc['size']) # filter data map(mapped_targets.append, filter(inside_segment, \ targets)) map(mapped_thumb_targets.append, filter(inside_segment, \ thumb_targets)) ret['segments'] = segments # unique mapped_thumb_targets = sorted(list(set(mapped_thumb_targets))) mapped_targets = sorted(list(set(mapped_targets))) log.debug("[Translator] elf: %d entries and %d thumb bits" % \ (len(mapped_targets), len(mapped_thumb_targets))) if len(mapped_thumb_targets) > 0: fout = os.path.join(out_dir, "is-thumb-initial.json") with open(fout, 'wt') as f: f.write(json.dumps(mapped_thumb_targets)) else: fout = None map(ret['entry_address'].append, mapped_targets) return ret, fout
class CrashKernel(CrashBaseClass): __types__ = ['struct module'] __symvals__ = ['modules'] def __init__(self, vmlinux_filename, searchpath=None): self.findmap = {} self.vmlinux_filename = vmlinux_filename self.searchpath = searchpath f = open(self.vmlinux_filename, 'rb') self.elffile = ELFFile(f) self.set_gdb_arch() def set_gdb_arch(self): mach = self.elffile['e_machine'] e_class = self.elffile['e_ident']['EI_CLASS'] elf_to_gdb = { ('EM_X86_64', 'ELFCLASS64'): 'i386:x86-64', ('EM_386', 'ELFCLASS32'): 'i386', ('EM_S390', 'ELFCLASS64'): 's390:64-bit' } try: gdbarch = elf_to_gdb[(mach, e_class)] except KeyError as e: raise RuntimeError( "no mapping for {}:{} to gdb architecture found.".format( mach, e_class)) gdb.execute("set arch {}".format(gdbarch), to_string=True) def open_kernel(self): if self.base_offset is None: raise RuntimeError("Base offset is unconfigured.") self.load_sections() try: list_type = gdb.lookup_type('struct list_head') except gdb.error as e: self.load_debuginfo(gdb.objfiles()[0], None) try: list_type = gdb.lookup_type('struct list_head') except gdb.error as e: raise RuntimeError("Couldn't locate debuginfo for {}".format( self.vmlinux_filename)) self.target.setup_arch() def get_sections(self): sections = {} text = self.elffile.get_section_by_name('.text') for section in self.elffile.iter_sections(): if (section['sh_addr'] < text['sh_addr'] and section.name != '.data..percpu'): continue sections[section.name] = section['sh_addr'] return sections def load_sections(self): sections = self.get_sections() line = "" # .data..percpu shouldn't have relocation applied but it does. # Perhaps it's due to the address being 0 and it being handled # as unspecified in the parameter list. # for section, addr in sections.items(): # if addr == 0: # line += " -s {} {:#x}".format(section, addr) # The gdb internals are subtle WRT how symbols are mapped. # Minimal symbols are mapped using the offset for the section # that contains them. That means that using providing an address # for .text here gives a base address with no offset and minimal # symbols in .text (like __switch_to_asm) will not have the correct # addresses after relocation. cmd = "add-symbol-file {} -o {:#x} {} ".format(self.vmlinux_filename, self.base_offset, line) gdb.execute(cmd, to_string=True) def attach_vmcore(self, vmcore_filename, debug=False): self.vmcore_filename = vmcore_filename self.vmcore = kdumpfile(vmcore_filename) self.target = crash.kdump.target.Target(self.vmcore, debug) self.base_offset = 0 try: KERNELOFFSET = "linux.vmcoreinfo.lines.KERNELOFFSET" attr = self.vmcore.attr.get(KERNELOFFSET, "0") self.base_offset = long(attr, base=16) except Exception as e: print(e) def for_each_module(self): for module in list_for_each_entry(self.modules, self.module_type, 'list'): yield module def get_module_sections(self, module): attrs = module['sect_attrs'] out = [] for sec in range(0, attrs['nsections']): attr = attrs['attrs'][sec] name = attr['name'].string() if name == '.text': continue out.append("-s {} {:#x}".format(name, long(attr['address']))) return " ".join(out) def load_modules(self, verbose=False): print("Loading modules...", end='') sys.stdout.flush() failed = 0 loaded = 0 for module in self.for_each_module(): modname = "{}".format(module['name'].string()) modfname = "{}.ko".format(modname) found = False for path in self.searchpath: modpath = self.find_module_file(modfname, path) if not modpath: continue found = True if 'module_core' in module.type: addr = long(module['module_core']) else: addr = long(module['core_layout']['base']) if verbose: print("Loading {} at {:#x}".format(modname, addr)) sections = self.get_module_sections(module) gdb.execute("add-symbol-file {} {:#x} {}".format( modpath, addr, sections), to_string=True) sal = gdb.find_pc_line(addr) if sal.symtab is None: objfile = gdb.lookup_objfile(modpath) self.load_debuginfo(objfile, modpath) # We really should check the version, but GDB doesn't export # a way to lookup sections. break if not found: if failed == 0: print() print("Couldn't find module file for {}".format(modname)) failed += 1 else: loaded += 1 if (loaded + failed) % 10 == 10: print(".", end='') sys.stdout.flush() print(" done. ({} loaded".format(loaded), end='') if failed: print(", {} failed)".format(failed)) else: print(")") # We shouldn't need this again, so why keep it around? del self.findmap self.findmap = {} def find_module_file(self, name, path): if not path in self.findmap: self.findmap[path] = {} for root, dirs, files in os.walk(path): for filename in files: nname = filename.replace('-', '_') self.findmap[path][nname] = os.path.join(root, filename) try: nname = name.replace('-', '_') return self.findmap[path][nname] except KeyError: return None def load_debuginfo(self, objfile, name=None, verbose=False): if name is None: name = objfile.filename if ".gz" in name: name = name.replace(".gz", "") filename = "{}.debug".format(os.path.basename(name)) filepath = None # Check current directory first if os.path.exists(filename): filepath = filename else: for path in self.searchpath: filepath = self.find_module_file(filename, path) if filepath: break if filepath: objfile.add_separate_debug_file(filepath) else: print("Could not locate debuginfo for {}".format(name)) def setup_tasks(self): gdb.execute('set print thread-events 0') init_task = gdb.lookup_global_symbol('init_task') task_list = init_task.value()['tasks'] runqueues = gdb.lookup_global_symbol('runqueues') rqs = get_percpu_var(runqueues) rqscurrs = {long(x["curr"]): k for (k, x) in rqs.items()} self.pid_to_task_struct = {} print("Loading tasks...", end='') sys.stdout.flush() task_count = 0 tasks = [] for taskg in list_for_each_entry(task_list, init_task.type, 'tasks'): tasks.append(taskg) for task in list_for_each_entry(taskg['thread_group'], init_task.type, 'thread_group'): tasks.append(task) for task in tasks: cpu = None regs = None active = long(task.address) in rqscurrs if active: cpu = rqscurrs[long(task.address)] regs = self.vmcore.attr.cpu[cpu].reg ltask = LinuxTask(task, active, cpu, regs) ptid = (LINUX_KERNEL_PID, task['pid'], 0) try: thread = gdb.selected_inferior().new_thread(ptid, ltask) except gdb.error as e: print("Failed to setup task @{:#x}".format(long(task.address))) continue thread.name = task['comm'].string() self.target.arch.setup_thread_info(thread) ltask.attach_thread(thread) ltask.set_get_stack_pointer(self.target.arch.get_stack_pointer) crash.cache.tasks.cache_task(ltask) task_count += 1 if task_count % 100 == 0: print(".", end='') sys.stdout.flush() print(" done. ({} tasks total)".format(task_count)) gdb.selected_inferior().executing = False
class ElfInfo(object): def __init__(self, file, verbose=False): self.elffile = ELFFile(file) self._versioninfo = None self.data = {} self.data["arch"] = self.elffile.elfclass self._verbose = verbose def __str__(self): return json.dumps(self.data) def _format_hex(self, addr, fieldsize=None, fullhex=False, lead0x=True, alternate=False): if alternate: if addr == 0: lead0x = False else: lead0x = True fieldsize -= 2 s = '0x' if lead0x else '' if fullhex: fieldsize = 8 if self.elffile.elfclass == 32 else 16 if fieldsize is None: field = '%x' else: field = '%' + '0%sx' % fieldsize return s + field % addr def _init_versioninfo(self): if self._versioninfo is not None: return self._versioninfo = { 'versym': None, 'verdef': None, 'verneed': None, 'type': None } for section in self.elffile.iter_sections(): if isinstance(section, GNUVerSymSection): self._versioninfo['versym'] = section elif isinstance(section, GNUVerDefSection): self._versioninfo['verdef'] = section elif isinstance(section, GNUVerNeedSection): self._versioninfo['verneed'] = section elif isinstance(section, DynamicSection): for tag in section.iter_tags(): if tag['d_tag'] == 'DT_VERSYM': self._versioninfo['type'] = 'GNU' break if not self._versioninfo['type'] and (self._versioninfo['verneed'] or self._versioninfo['verdef']): self._versioninfo['type'] = 'Solaris' def display_symbol_tables(self): self._init_versioninfo() symbol_tables = [ s for s in self.elffile.iter_sections() if isinstance(s, SymbolTableSection) ] imports = set() exports = set() for section in symbol_tables: if not isinstance(section, SymbolTableSection): continue if section['sh_entsize'] == 0: # print("\nSymbol table '%s' has a sh_entsize of zero!" % (section.name)) continue for nsym, symbol in enumerate(section.iter_symbols()): sym_type = describe_symbol_type(symbol['st_info']['type']) if sym_type == "FUNC": desc = describe_symbol_shndx(symbol['st_shndx']) if desc == "UND": imports.add(str(symbol.name)) try: exports.add(str(symbol.name)) except ValueError: pass self.data["imports"] = list(imports) self.data["exports"] = list(exports) def _symbol_version(self, nsym): self._init_versioninfo() symbol_version = dict.fromkeys(('index', 'name', 'filename', 'hidden')) if (not self._versioninfo['versym'] or nsym >= self._versioninfo['versym'].num_symbols()): return None symbol = self._versioninfo['versym'].get_symbol(nsym) index = symbol.entry['ndx'] if not index in ('VER_NDX_LOCAL', 'VER_NDX_GLOBAL'): index = int(index) if self._versioninfo['type'] == 'GNU': if index & 0x8000: index &= ~0x8000 symbol_version['hidden'] = True if self._versioninfo['verdef'] and index <= self._versioninfo[ 'verdef'].num_versions(): verdaux_iter = self._versioninfo['verdef'].get_version(index) symbol_version['name'] = next(verdaux_iter).name else: verneed, vernaux = self._versioninfo['verneed'].get_version( index) symbol_version['name'] = vernaux.name symbol_version['filename'] = verneed.name symbol_version['index'] = index def collect_sharedlib(self): for section in self.elffile.iter_sections(): if not isinstance(section, DynamicSection): continue shlib = set() for tag in section.iter_tags(): if tag.entry.d_tag == 'DT_NEEDED': # print('Shared library: [%s]' % tag.needed) shlib.add(tag.needed) self.data["shlib"] = list(shlib) def collect_sections_segments(self): elf = self.elffile sections = [] for section in elf.iter_sections(): if section.name != '': entropy = self.compute_entropy(section.data()) # print('{} {} {} {}'.format(section.name, section.header['sh_size'], section.header['sh_flags'], entropy)) s = { "name": section.name, "size": int(section.header['sh_size']), "flags": int(section.header['sh_flags']), "entro": float(entropy) } sections.append(s) self.data["sections"] = sections segments = [] for segment in elf.iter_segments(): entropy = self.compute_entropy(segment.data()) # print('{} {} {} {}'.format(segment.header["p_type"], segment.header["p_memsz"], segment.header["p_flags"], entropy)) s = { "name": segment.header["p_type"], "size": int(segment.header["p_memsz"]), "flags": int(segment.header['p_flags']), "entro": float(entropy) } segments.append(s) self.data["segments"] = segments @staticmethod def compute_entropy(text): map_of_bytes = dict() entropy = 0 for byte in text: i = struct.unpack('h', byte + "\x00")[0] if not map_of_bytes.has_key(i): map_of_bytes[i] = 1 else: map_of_bytes[i] += 1 for key in map_of_bytes: p = float(map_of_bytes[key]) / float(len(text)) if p > 0: entropy -= p * math.log(p, 2) # We obtain an entropy value in range 0, 8 return entropy def get_infos(self): self.collect_sections_segments() self.collect_sharedlib() self.display_symbol_tables()
class ELFBinaryFile(object): def __init__(self, elf, memory_map): if isinstance(elf, six.string_types): self._file = open(elf, 'rb') self._owns_file = True else: self._file = elf self._owns_file = False self._elf = ELFFile(self._file) self._memory_map = memory_map or MemoryMap() self._symbol_decoder = None self._address_decoder = None self._extract_sections() self._compute_regions() ## @brief Close the ELF file if it is owned by this instance. def __del__(self): if self._owns_file: self.close() def _extract_sections(self): # Get list of interesting sections. self._sections = [] sections = self._elf.iter_sections() for s in sections: # Skip sections not of these types. if s['sh_type'] not in ('SHT_PROGBITS', 'SHT_NOBITS'): continue # Skip sections that don't have one of these flags set. if s['sh_flags'] & (SH_FLAGS.SHF_WRITE | SH_FLAGS.SHF_ALLOC | SH_FLAGS.SHF_EXECINSTR) == 0: continue self._sections.append(ELFSection(self, s)) self._sections.sort(key=lambda x: x.start) def _dump_sections(self): for s in self._sections: print("{0:<20} {1:<25} {2:<10} {3:<10}".format( s.name, s.flags_description, hex(s.start), hex(s.length))) def _compute_regions(self): used = [] unused = [] for region in self._memory_map: current = region.start for sect in self._sections: start = sect.start length = sect.length # Skip if this section isn't within this memory region. if not region.contains_range(start, length=length): continue # Add this section as used. used.append( MemoryRange(start=start, length=length, region=region)) # Add unused segment. if start > current: unused.append( MemoryRange(start=current, length=(start - current), region=region)) current = start + length # Add a final unused segment of the region. if region.end > current: unused.append( MemoryRange(start=current, end=region.end, region=region)) self._used = used self._unused = unused def close(self): self._file.close() self._owns_file = False ## # @brief Access the list of sections in the ELF file. # @return A list of ELFSection objects sorted by start address. @property def sections(self): return self._sections ## # @brief Access the list of used ranges of memory in the ELF file. # @return A list of MemoryRange objects sorted by start address. @property def used_ranges(self): return self._used ## # @brief Access the list of unused ranges of memory in the ELF file. # @return A list of MemoryRange objects sorted by start address. @property def unused_ranges(self): return self._unused @property def symbol_decoder(self): if self._symbol_decoder is None: self._symbol_decoder = ElfSymbolDecoder(self._elf) return self._symbol_decoder @property def address_decoder(self): if self._address_decoder is None: self._address_decoder = DwarfAddressDecoder(self._elf) return self._address_decoder
class ImageInfo(object): ## # Initialize internals. # # @param ImagePath file path to image # def __init__(self, ImagePath): # internals self._Path = ImagePath self._Handle = None self._SizeBytes = 0 self._Elf = None self._IsExecutable = False self._Sections = {} self._SectionsFast = None self._Segments = [] self._Strings = {} self._Symbols = {} self._TextInstructions = {} self._PLTInstructions = {} self._SymbolsFast = None self._TextSection = None self._PLTSection = None self._SymbolTable = None self._StringTable = None ## # Get infos about the given image. # # @return none # def parseImage(self): # basic stats self._SizeBytes = os.path.getsize(self._Path) # open self._Handle = open(self._Path, 'rb') self._Elf = ELFFile(self._Handle) # executable if self._Elf['e_type'] == 'ET_EXEC': self._IsExecutable = True # header string table hdstrtbl = None cnt = 0 for sec in self._Elf.iter_sections(): if (sec['sh_type'] == 'SHT_STRTAB') and (self._Elf['e_shstrndx'] == cnt): hdstrtbl = sec break cnt += 1 if hdstrtbl is None: raise Exception("[ERROR] Could not find header string table!") # register segments for seg in self._Elf.iter_segments(): self._Segments.append(seg) # register sections for sec in self._Elf.iter_sections(): curname = hdstrtbl.get_string(sec['sh_name']) if (sec['sh_addr'] != 0): cursec = SectionInfo() cursec._Name = curname cursec._Addr = sec['sh_addr'] cursec._Size = sec['sh_size'] cursec._Obj = sec self._Sections[cursec._Addr] = cursec # special sections secnames = [] for sec in self._Elf.iter_sections(): if sec['sh_size'] > 0: secnames.append(hdstrtbl.get_string(sec['sh_name'])) if ('.text' not in secnames): raise Exception("[ERROR] No text section found!") if (('.symtab' not in secnames) and ('.dynsym' not in secnames)): raise Exception("[ERROR] No symbol table found!") if (('.strtab' not in secnames) and ('.dynstr' not in secnames)): raise Exception("[ERROR] No string table found!") usedebugtables = (('.symtab' in secnames) and ('.strtab' in secnames)) # register special sections for sec in self._Elf.iter_sections(): if sec['sh_size'] > 0: cursec = SectionInfo() cursec._Name = hdstrtbl.get_string(sec['sh_name']) cursec._Addr = sec['sh_addr'] cursec._Size = sec['sh_size'] cursec._Obj = sec if cursec._Name == '.text': self._TextSection = cursec elif cursec._Name == '.plt': self._PLTSection = cursec elif (cursec._Name == '.symtab') and usedebugtables: self._SymbolTable = cursec elif (cursec._Name == '.strtab') and usedebugtables: self._StringTable = cursec elif (cursec._Name == '.dynsym') and not usedebugtables: self._SymbolTable = cursec elif (cursec._Name == '.dynstr') and not usedebugtables: self._StringTable = cursec # sanity check if (self._TextSection is None): raise Exception("[ERROR] Could not assign text section!") if (self._PLTSection is None): raise Exception("[ERROR] Could not assign plt section!") if (self._SymbolTable is None): raise Exception("[ERROR] Could not assign symbol table!") if (self._StringTable is None): raise Exception("[ERROR] Could not assign string table!") # parse strings binstr = self._StringTable._Obj.data() binstrdec = binstr.decode() curstart = 0 for cmatch in re.finditer('\x00', binstrdec): curstr = binstr[curstart:cmatch.start()].decode("utf-8") if curstr != "": self._Strings[curstart] = curstr curstart = cmatch.start() + 1 self._Strings[0] = '' # register symbols for symb in self._SymbolTable._Obj.iter_symbols(): if (symb['st_value'] != 0) and \ (symb['st_info']['type'] != 'STT_SECTION') and \ (symb['st_info']['type'] != 'STT_FILE') and \ (symb['st_info']['type'] != 'STT_NOTYPE') and \ (symb['st_info']['bind'] != 'STB_LOCAL'): # new symbol cursymb = SymbolInfo() cursymb._Name = symb.name cursymb._Addr = symb['st_value'] cursymb._Size = symb['st_size'] cursymb._Type = symb['st_info']['type'] cursymb._Obj = symb # fix name if cursymb._Name == '': cursymb._Name = '0x%08x' % cursymb._Addr # safe add if cursymb._Addr in self._Symbols.keys(): if sys.stdout.isatty(): print ("[INFO] Symbols with same start addr: new=%s and old=%s" \ % (cursymb._Name, self._Symbols[cursymb._Addr]._Name)) if cursymb._Size == self._Symbols[cursymb._Addr]._Size: self._Symbols[cursymb._Addr]._Name += ("+%s" % cursymb._Name) elif cursymb._Size > self._Symbols[cursymb._Addr]._Size: cursymb._Name += ("+%s(len=%d)" % \ (self._Symbols[cursymb._Addr]._Name, \ self._Symbols[cursymb._Addr]._Size)) self._Symbols[cursymb._Addr] = cursymb elif cursymb._Size < self._Symbols[cursymb._Addr]._Size: self._Symbols[cursymb._Addr]._Name += ("+%s(len=%d)" % \ (cursymb._Name, \ cursymb._Size)) else: self._Symbols[cursymb._Addr] = cursymb # prune overlay functions ksort = sorted(self._Symbols.keys()) krem = [] for i in range(0, len(ksort) - 1): if ((self._Symbols[ksort[i]]._Addr + self._Symbols[ksort[i]]._Size) > \ self._Symbols[ksort[i+1]]._Addr) and \ ((self._Symbols[ksort[i]]._Addr + self._Symbols[ksort[i]]._Size) == \ (self._Symbols[ksort[i+1]]._Addr + self._Symbols[ksort[i+1]]._Size)): krem.append((ksort[i], ksort[i + 1])) for k in krem: if sys.stdout.isatty(): print("[INFO] Pruning overlay function %s." % self._Symbols[k[1]]._Name) self._Symbols[k[0]]._Name += ("+%s(%d)" % \ (self._Symbols[k[1]]._Name, k[1]-k[0])) self._Symbols.pop(k[1]) # fast access self._SectionsFast = numpy.zeros(len(self._Sections), \ dtype=numpy.dtype([('Start', numpy.uintp, 1), \ ('Size', numpy.uintp, 1)])) ksort = sorted(self._Sections.keys()) for i in range(0, len(self._Sections)): self._SectionsFast[i]['Start'] = self._Sections[ksort[i]]._Addr self._SectionsFast[i]['Size'] = self._Sections[ksort[i]]._Size self._SymbolsFast = numpy.zeros(len(self._Symbols), \ dtype=numpy.dtype([('Start', numpy.uintp, 1), \ ('Size', numpy.uintp, 1)])) ksort = sorted(self._Symbols.keys()) for i in range(0, len(self._Symbols)): self._SymbolsFast[i]['Start'] = self._Symbols[ksort[i]]._Addr self._SymbolsFast[i]['Size'] = self._Symbols[ksort[i]]._Size # consistency check for i in range(0, len(self._SectionsFast) - 1): if self._SectionsFast[i]['Start'] + self._SectionsFast[i]['Size'] > \ self._SectionsFast[i+1]['Start']: raise Exception('[ERROR] Inconsistent section placement!') for i in range(0, len(self._SymbolsFast) - 1): if self._SymbolsFast[i]['Start'] + self._SymbolsFast[i]['Size'] > \ self._SymbolsFast[i+1]['Start']: raise Exception('[ERROR] Inconsistent symbol placement: %s -> %s!' % \ (self._Symbols[self._SymbolsFast[i]['Start']]._Name, \ self._Symbols[self._SymbolsFast[i+1]['Start']]._Name)) # set up disassembler if 'x64' in self._Elf.get_machine_arch().lower(): md = capstone.Cs(capstone.CS_ARCH_X86, capstone.CS_MODE_64) elif 'x86' in self._Elf.get_machine_arch().lower(): md = capstone.Cs(capstone.CS_ARCH_X86, capstone.CS_MODE_32) elif 'arm' in self._Elf.get_machine_arch().lower(): md = capstone.Cs(capstone.CS_ARCH_ARM, capstone.CS_MODE_ARM) elif 'aarch64' in self._Elf.get_machine_arch().lower(): md = capstone.Cs(capstone.CS_ARCH_ARM64, capstone.CS_MODE_ARM + \ capstone.CS_MODE_V8) else: raise Exception( "[ERROR] Image architecture currently not supported!") md.skipdata = True # parse .text section instructions = md.disasm_lite(self._TextSection._Obj.data(), \ self._TextSection._Addr) for (address, size, mnemonic, op_str) in instructions: self._TextInstructions[address] = (size, "%s\t%s" % (mnemonic, op_str)) # parse .plt instructions instructions = md.disasm_lite(self._PLTSection._Obj.data(), \ self._PLTSection._Addr) for (address, size, mnemonic, op_str) in instructions: self._PLTInstructions[address] = (size, "%s\t%s" % (mnemonic, op_str)) ## # Get section from given address. # # @param Address address within image # @return the section of the address (None if error) # def getSection(self, Address): # find idx = numpy.argwhere( self._SectionsFast[:]['Start'] <= Address).flatten() if len(idx) == 0: return None # check if Address < self._SectionsFast[idx[-1]]['Start'] + \ self._SectionsFast[idx[-1]]['Size']: return (self._Sections[self._SectionsFast[idx[-1]]['Start']]) else: return None ## # Get symbol from given address. # # @param Address address within image # @return the symbol of the address (None if error) # def getSymbol(self, Address): # find idx = numpy.argwhere( self._SymbolsFast[:]['Start'] <= Address).flatten() if len(idx) == 0: return None # check if Address < self._SymbolsFast[idx[-1]]['Start'] + \ self._SymbolsFast[idx[-1]]['Size']: return (self._Symbols[self._SymbolsFast[idx[-1]]['Start']]) else: return None ## # Get instruction from given address. # # @param Address address within image # @return size of instr. and assembly code (None if error) # def getInstruction(self, Address): # get section sec = self.getSection(Address) if sec is None: return None # search if sec._Name == '.text': if Address in self._TextInstructions.keys(): return (self._TextInstructions[Address]) elif sec._Name == '.plt': if Address in self._PLTInstructions.keys(): return (self._PLTInstructions[Address]) # error return None
class BuildIdInspectorAndPatcher: def __init__(self, elf_file): """ :param elf_file: file object with the ELF to inspect and/or patch """ self.elf_file = elf_file self.elf = ELFFile(elf_file) @staticmethod def _section_in_binary(section): # Only allocated sections make it into the actual binary sh_flags = section["sh_flags"] return sh_flags & SH_FLAGS.SHF_ALLOC != 0 def _get_section_type(self, section): if not self._section_in_binary(section): return SectionType.UNALLOCATED sh_flags = section["sh_flags"] is_text = sh_flags & SH_FLAGS.SHF_EXECINSTR != 0 or sh_flags & SH_FLAGS.SHF_WRITE == 0 if is_text: return SectionType.TEXT if section["sh_type"] != "SHT_NOBITS": return SectionType.DATA return SectionType.BSS def _find_section_for_address_range(self, addr_range): for section in self.elf.iter_sections(): if not self._section_in_binary(section): continue sec_start = section["sh_addr"] sh_size = section["sh_size"] sec_end = sec_start + sh_size addr_start, addr_end = addr_range range_in_section = (sec_start <= addr_start < sec_end) and (sec_start <= addr_end <= sec_end) if not range_in_section: continue return section return None def _find_symbol_and_section(self, symbol_name): symtab = self.elf.get_section_by_name(".symtab") if symtab is None: return None, None symbol = symtab.get_symbol_by_name(symbol_name) if not symbol: return None, None symbol = symbol[0] symbol_start = symbol["st_value"] symbol_size = symbol["st_size"] section = self._find_section_for_address_range( (symbol_start, symbol_start + symbol_size)) if section is None: raise BuildIdException( "Could not locate a section with symbol {}".format( symbol_name)) return (symbol, section) def _generate_build_id(self): build_id = hashlib.sha1() for section in self.elf.iter_sections(): if not self._section_in_binary(section): continue sec_start = section["sh_addr"] build_id.update(struct.pack("<I", sec_start)) sec_type = self._get_section_type(section) if sec_type == SectionType.BSS: # All zeros so just include the length of the section in our sha1 length = section["sh_size"] build_id.update(struct.pack("<I", length)) else: build_id.update(section.data()) return build_id @staticmethod def _get_symbol_offset_in_sector(symbol, section): return symbol["st_value"] - section["sh_addr"] def _get_symbol_data(self, symbol, section): offset_in_section = self._get_symbol_offset_in_sector(symbol, section) symbol_size = symbol["st_size"] data = section.data()[offset_in_section:offset_in_section + symbol_size] if isinstance(data, str): return bytearray(data) return data def _get_build_id(self): def _get_note_sections(elf): for section in elf.iter_sections(): if not isinstance(section, NoteSection): continue for note in section.iter_notes(): yield note for note in _get_note_sections(self.elf): if note.n_type == "NT_GNU_BUILD_ID": return note.n_desc return None def _write_and_return_build_info(self, dump_only): sdk_build_id_sym_name = "g_memfault_build_id" symbol, section = self._find_symbol_and_section(sdk_build_id_sym_name) if symbol is None: raise BuildIdException( "Could not locate '{}' symbol in provided ELF".format( sdk_build_id_sym_name)) gnu_build_id = self._get_build_id() # Maps to sMemfaultBuildIdStorage from "core/src/memfault_build_id_private.h" data = self._get_symbol_data(symbol, section) # FW SDK's <= 0.20.1 did not encode the configured short length in the # "sMemfaultBuildIdStorage". In this situation the byte was a zero-initialized padding # byte. In this scenario we report "None" to signify we do not know the short len short_len = data[2] or None build_id_type = data[0] if build_id_type == MemfaultBuildIdTypes.GNU_BUILD_ID_SHA1.value: if gnu_build_id is None: raise BuildIdException( "Couldn't locate GNU Build ID but 'MEMFAULT_USE_GNU_BUILD_ID' is in use" ) return MemfaultBuildIdTypes.GNU_BUILD_ID_SHA1, gnu_build_id, short_len derived_sym_name = "g_memfault_sdk_derived_build_id" sdk_build_id, sdk_build_id_section = self._find_symbol_and_section( derived_sym_name) if sdk_build_id is None: raise BuildIdException( "Could not locate '{}' symbol in provided elf".format( derived_sym_name)) data = self._get_symbol_data(sdk_build_id, sdk_build_id_section) if build_id_type == MemfaultBuildIdTypes.MEMFAULT_BUILD_ID_SHA1.value: build_id = data.hex() if isinstance( data, bytes) else bytes(data).encode("hex") return MemfaultBuildIdTypes.MEMFAULT_BUILD_ID_SHA1, build_id, short_len if gnu_build_id is not None: print( "WARNING: Located a GNU build id but it's not being used by the Memfault SDK" ) if build_id_type != MemfaultBuildIdTypes.NONE.value: raise BuildIdException( "Unrecognized Build Id Type '{}'".format(build_id_type)) if dump_only: return MemfaultBuildIdTypes.NONE, None, None build_id = self._generate_build_id() with open(self.elf_file.name, "r+b") as fh: build_id_type_patch_offset = section[ "sh_offset"] + self._get_symbol_offset_in_sector( symbol, section) fh.seek(build_id_type_patch_offset) fh.write( struct.pack("B", MemfaultBuildIdTypes.MEMFAULT_BUILD_ID_SHA1.value)) derived_id_patch_offset = sdk_build_id_section[ "sh_offset"] + self._get_symbol_offset_in_sector( sdk_build_id, sdk_build_id_section) fh.seek(derived_id_patch_offset) fh.write(build_id.digest()) build_id = build_id.hexdigest() print("Added Memfault Generated Build ID to ELF: {}".format(build_id)) return MemfaultBuildIdTypes.MEMFAULT_BUILD_ID_SHA1, build_id, short_len def check_or_update_build_id(self): build_type, build_id, _ = self._write_and_return_build_info( dump_only=False) if build_type == MemfaultBuildIdTypes.GNU_BUILD_ID_SHA1: print("Found GNU Build ID: {}".format(build_id)) elif build_type == MemfaultBuildIdTypes.MEMFAULT_BUILD_ID_SHA1: print("Found Memfault Build Id: {}".format(build_id)) def dump_build_info(self, num_chars): build_type, build_id, _ = self._write_and_return_build_info( dump_only=True) if build_type is None or build_id is None: raise BuildIdException("No Build ID Found") print(build_id[:num_chars]) def get_build_info(self): try: return self._write_and_return_build_info(dump_only=True) except BuildIdException: return None, None, None
def config_from_elf(self, path): """Load all the necessary information about the program parsing the ELF headers. Furthermore, check some pre-requisites for the exploit to be successful.""" executable_file = open(path, "r") elf = ELFFile(executable_file) get_section = lambda name: first_or_none( filter(lambda section: section.name == name, elf.iter_sections())) get_section_address = lambda section: None if (get_section( section) is None) else get_section(section).header.sh_addr # Checks if elf.header.e_type == ENUM_E_TYPE["ET_EXEC"]: raise Exception("Only non-PIE executables are supported") # Binary type self.arch = elf.header.e_machine self.little = elf.little_endian self.pointer_size = elf.elfclass / 8 self.pointer_format = ("0x%." + str(self.pointer_size * 2) + "x") self.structs = elftools.elf.structs.ELFStructs(self.little, self.pointer_size * 8) # Useful sections self.sections = { section.name: (section.header.sh_addr, section.header.sh_addr + section.header.sh_size) for section in elf.iter_sections() } self.plt = get_section_address(".plt") self.got = get_section_address(".got") self.gotplt = get_section_address(".got.plt") # Dynamic section dynamic_section = get_section(".dynamic") self.writable_dynamic = dynamic_section.header.sh_flags & SH_FLAGS.SHF_WRITE self.dynamic = dynamic_section.header.sh_addr dynamic_entries = [ self.structs.Elf_Dyn.parse(dynamic_entry) for dynamic_entry in chunks(dynamic_section.data(), self.structs.Elf_Dyn.sizeof()) ] # Dynamic symbols # TODO: we're relying on section names here symbol_table = elf.get_section_by_name(".dynsym") has_name = lambda name: lambda symbol: symbol.name == name attribute_or_default = lambda default, attribute, x: getattr( x, attribute) if x is not None else default memcpy_symbol = first_or_none( filter(has_name("memcpy"), symbol_table.iter_symbols())) self.memcpy_plt = 0 if memcpy_symbol is None else memcpy_symbol.entry.st_value # We try not to rely on section names get_dynamic = lambda name: first_or_none( map(lambda entry: entry.d_val, filter(lambda entry: entry.d_tag == name, dynamic_entries))) get_dynamic_index = lambda name: filter( lambda entry: entry[1].d_tag == name, enumerate(dynamic_entries))[ 0][0] self.dynstr = get_dynamic("DT_STRTAB") self.dynsym = get_dynamic("DT_SYMTAB") self.versym = get_dynamic("DT_VERSYM") self.verneed = get_dynamic("DT_VERNEED") self.relplt = get_dynamic("DT_JMPREL") self.addend = get_dynamic("DT_RELA") is not None self.dt_debug = self.dynamic + get_dynamic_index( "DT_DEBUG") * self.structs.Elf_Dyn.sizeof() + self.pointer_size self.full_relro = (get_dynamic("DT_FLAGS") is not None) and \ ((get_dynamic("DT_FLAGS") & DF_BIND_NOW) != 0) self.full_relro = self.full_relro or ((get_dynamic("DT_FLAGS_1") is not None) and \ ((get_dynamic("DT_FLAGS_1") & DF_1_NOW) != 0)) # Choose between Elf_Rel and Elf_Rela depending on the architecture self.rel_struct = self.structs.Elf_Rela if self.addend else self.structs.Elf_Rel # Looks like 64-bit and 32-bit have different alignment for the call to _dl_fixup self.reloc_alignment = 1 if self.pointer_size == 4 else self.rel_struct.sizeof( ) self.reloc_index_multiplier = self.rel_struct.sizeof( ) if self.pointer_size == 4 else 1 # # Find candidate writeable areas # # Collect PT_LOAD segments (what gets mapped) loaded_segments = filter( lambda segment: segment.header.p_type == "PT_LOAD", elf.iter_segments()) # Collect the segments which are writeable writeable_segments = filter( lambda segment: segment.header.p_flags & P_FLAGS.PF_W, loaded_segments) # Get their memory ranges (start, end) writeable_ranges = RangeSet.mutual_union(*map( lambda segment: (segment.header.p_vaddr, segment.header.p_vaddr + segment.header.p_memsz), writeable_segments)) # List of sections we don't want to write to dont_overwrite_sections = filter_none([ self.dynstr, self.dynsym, self.versym, self.relplt, self.dynamic, self.got, self.gotplt ]) # Memory ranges of the sections we don't want to write to dont_overwrite_ranges = RangeSet.mutual_union(*[ self.sections[self.section_from_address(start)] for start in dont_overwrite_sections ]) # Handle RELRO segment, we don't want to write there relro_segment = first_or_none( filter(lambda segment: segment.header.p_type == "PT_GNU_RELRO", elf.iter_segments())) if relro_segment is not None: dont_overwrite_ranges = dont_overwrite_ranges | RangeSet( relro_segment.header.p_vaddr, relro_segment.header.p_vaddr + relro_segment.header.p_memsz) # Compute the set of candidate memory ranges self.writeable_ranges = writeable_ranges - dont_overwrite_ranges # Save the index of the DT_FINI entry fini = filter(lambda (i, entry): entry.d_tag == "DT_FINI", enumerate(dynamic_entries)) if len(fini) > 0: self.fini = self.dynamic + self.structs.Elf_Dyn.sizeof( ) * fini[0][0] # Gadgets if self.gadgets.has_key(self.arch): executable_segments = filter( lambda segment: segment.header.p_flags & P_FLAGS.PF_X, elf.iter_segments()) for name, (info, gadget) in self.gadgets[self.arch].iteritems(): locations = find_all_strings(executable_segments, hex_bytes(gadget)) locations = map(self.ptr2str, locations) location = first_or_none( filter( lambda address: not reduce( lambda accumulate, badchar: badchar in address or accumulate, self.badchars, False), locations)) if location is None: self.gadgets[self.arch][name] = None else: self.gadgets[self.arch][name] = (info, gadget, location) # Find all '\x00\x00' in non-writeable segments self.non_writeable_segments = filter( lambda segment: not (segment.header.p_flags & P_FLAGS.PF_W), loaded_segments) self.zero_or_one_addresses = find_all_strings(self.non_writeable_segments, "\x00\x00") + \ find_all_strings(self.non_writeable_segments, "\x01\x00" if self.little else "\x00\x01") self.filler = self.ptr2str( reduce(lambda x, y: (x << 32) | 0xdeadb00b, xrange(1 + (self.pointer_size % 4)), 0)) self.relocation_type = relocation_types[self.arch] # # Find the reloc pointing to the symbol whose name is the earliest in .dynstr # relplt_section = elf.get_section_by_name( self.section_from_address(self.relplt)) dynsym_section = elf.get_section_by_name( self.section_from_address(self.dynsym)) if not (isinstance(relplt_section, RelocationSection) and \ isinstance(dynsym_section, SymbolTableSection)): raise Exception("Unexpect type for dynamic sections: " + str(relplt_section) + " " + str(dynsym_section)) # Grab .got.plt relocs symbol indexes symbol_indexes = [ reloc.entry.r_info_sym if reloc.entry.r_info_type == self.relocation_type else None for reloc in relplt_section.iter_relocations() ] # Get offsets in .dynstr names_offsets = [ dynsym_section.get_symbol(index).entry.st_name if index is not None else None for index in symbol_indexes ] # Filter out unamed offsets names_offsets = [ offset if offset > 0 else None for offset in names_offsets ] # Get the minimum value self.min_reloc_index, self.min_string_offset = min( enumerate(names_offsets), key=operator.itemgetter(1)) self.min_symbol_index = symbol_indexes[self.min_reloc_index] log(self.dump())
class ELF: def __init__(self, mem, classbinary, filename): import capstone as CAPSTONE fd = open(filename, "rb") self.elf = ELFFile(fd) self.classbinary = classbinary self.mem = mem self.arch_lookup = { "x86": CAPSTONE.CS_ARCH_X86, "x64": CAPSTONE.CS_ARCH_X86, "ARM": CAPSTONE.CS_ARCH_ARM, "MIPS": CAPSTONE.CS_ARCH_MIPS, } self.arch_mode_lookup = { "x86": CAPSTONE.CS_MODE_32, "x64": CAPSTONE.CS_MODE_64, "ARM": CAPSTONE.CS_ARCH_ARM, "MIPS": { 32: CAPSTONE.CS_MODE_MIPS32, 64: CAPSTONE.CS_MODE_MIPS64, } } self.sym_type_lookup = { "STT_FUNC": MEM_FUNC, } self.__sections = {} # start address -> elf section for s in self.elf.iter_sections(): if not s.name: continue start = s.header.sh_addr if s.header.sh_flags & 0xf != 0: bisect.insort_left(classbinary._sorted_sections, start) self.__sections[start] = s is_data = self.__section_is_data(s) is_exec = self.__section_is_exec(s) data = s.data() classbinary._abs_sections[start] = SectionAbs( s.name.decode(), start, s.header.sh_size, len(data), is_exec, is_data, data) def load_section_names(self): # Used for the auto-completion for s in self.elf.iter_sections(): if s.header.sh_flags & 0xf != 0: ad = s.header.sh_addr name = s.name.decode() self.classbinary.section_names[name] = ad def load_static_sym(self): symtab = self.elf.get_section_by_name(b".symtab") if symtab is None: return dont_save = [b"$a", b"$t", b"$d"] arch = self.elf.get_machine_arch() is_arm = arch == "ARM" for sy in symtab.iter_symbols(): if is_arm and sy.name in dont_save: continue ad = sy.entry.st_value if ad != 0 and sy.name != b"": name = sy.name.decode() if name in self.classbinary.symbols: name = self.classbinary.rename_sym(name) self.classbinary.reverse_symbols[ad] = name self.classbinary.symbols[name] = ad ty = self.sym_type_lookup.get(sy.entry.st_info.type, MEM_UNK) self.mem.add(ad, 1, ty) def __x86_resolve_reloc(self, rel, symtab, plt, got_plt, addr_size): # Save all got offsets with the corresponding symbol got_off = {} for r in rel.iter_relocations(): sym = symtab.get_symbol(r.entry.r_info_sym) name = sym.name.decode() ad = r.entry.r_offset if name and ad: ty = self.sym_type_lookup.get(sym.entry.st_info.type, MEM_UNK) got_off[ad] = [name + "@plt", ty] data = got_plt.data() unpack_str = "<" if self.elf.little_endian else ">" unpack_str += str(int(len(data) / addr_size)) unpack_str += "Q" if addr_size == 8 else "I" got_values = struct.unpack(unpack_str, data) plt_data = plt.data() wrong_jump_opcode = False off = got_plt.header.sh_addr # Read the .got.plt and for each address in the plt, substract 6 # to go at the begining of the plt entry. opcode_jmp = [b"\xff\x25", b"\xff\xa3"] for jump_in_plt in got_values: if off in got_off: plt_start = jump_in_plt - 6 plt_off = plt_start - plt.header.sh_addr # Check "jmp *(ADDR)" opcode. if plt_data[plt_off:plt_off + 2] not in opcode_jmp: wrong_jump_opcode = True continue name, ty = got_off[off] if name in self.classbinary.symbols: name = self.classbinary.rename_sym(name) self.classbinary.reverse_symbols[plt_start] = name self.classbinary.symbols[name] = plt_start self.mem.add(plt_start, 1, ty) off += addr_size if wrong_jump_opcode: warning("I'm expecting to see a jmp *(ADDR) on each plt entry") warning("opcode \\xff\\x25 was not found, please report") def __resolve_symtab(self, rel, symtab): # TODO: don't know why st_value is not 0 like x86 # In some executables I've tested, it seems that st_value # is the address of the plt entry # TODO: really useful to iter on relocations and get the symbol # from the symtab ? # for r in rel.iter_relocations(): # sym = symtab.get_symbol(r.entry.r_info_sym) for sym in symtab.iter_symbols(): ad = sym.entry.st_value if ad != 0: name = sym.name.decode() if name in self.classbinary.symbols: name = self.classbinary.rename_sym(name) self.classbinary.reverse_symbols[ad] = name self.classbinary.symbols[name] = ad ty = self.sym_type_lookup.get(sym.entry.st_info.type, MEM_UNK) self.mem.add(ad, 1, ty) def __iter_reloc(self): for rel in self.elf.iter_sections(): if rel.header.sh_type in ["SHT_RELA", "SHT_REL"]: symtab = self.elf.get_section(rel.header.sh_link) if symtab is None: continue yield (rel, symtab) def load_dyn_sym(self): arch = self.elf.get_machine_arch() if arch == "ARM" or arch == "MIPS": for (rel, symtab) in self.__iter_reloc(): self.__resolve_symtab(rel, symtab) return # x86/x64 # TODO: .plt can be renamed ? plt = self.elf.get_section_by_name(b".plt") if plt is None: warning(".plt section not found") return # TODO: .got.plt can be renamed or may be removed ? got_plt = self.elf.get_section_by_name(b".got.plt") addr_size = 8 if arch == "x64" else 4 if got_plt is None: warning(".got.plt section not found") return for (rel, symtab) in self.__iter_reloc(): self.__x86_resolve_reloc(rel, symtab, plt, got_plt, addr_size) def __section_is_data(self, s): mask = SH_FLAGS.SHF_WRITE | SH_FLAGS.SHF_ALLOC return s.header.sh_flags & mask and not self.__section_is_exec(s) def __section_is_exec(self, s): if s is None: return 0 return s.header.sh_flags & SH_FLAGS.SHF_EXECINSTR def section_stream_read(self, addr, size): s = self.classbinary.get_section(addr) if s is None: return b"" s = self.__sections[s.start] off = addr - s.header.sh_addr end = s.header.sh_addr + s.header.sh_size s.stream.seek(s.header.sh_offset + off) return s.stream.read(min(size, end - addr)) def get_arch(self): import capstone as CAPSTONE arch = self.arch_lookup.get(self.elf.get_machine_arch(), None) mode = self.arch_mode_lookup.get(self.elf.get_machine_arch(), None) if arch is None: return None, None # If one arch name has multiple "word size" if isinstance(mode, dict): mode = mode[self.elf.elfclass] if self.elf.little_endian: mode |= CAPSTONE.CS_MODE_LITTLE_ENDIAN else: mode |= CAPSTONE.CS_MODE_BIG_ENDIAN return arch, mode def get_arch_string(self): return self.elf.get_machine_arch() def get_entry_point(self): return self.elf.header['e_entry']
def main(input, output, format='nro'): format = format.lower() assert format in ('nro', 'nso') with open(input, 'rb') as f: elffile = ELFFile(f) elffile.iter_sections_by_type = lambda type: (x for x in elffile.iter_sections() if isinstance(x, type)) symbols = {} symbolList = [] for x in elffile.iter_sections_by_type(SymbolTableSection): for i, sym in enumerate(x.iter_symbols()): sectaddr = elffile.get_section(sym['st_shndx'])['sh_addr'] if isinstance(sym['st_shndx'], int) else 0 symbols[sym.name] = sectaddr + sym['st_value'] symbolList.append(sym.name) textCont, rodataCont, relaDynCont, dataCont, dynamicCont, dynstrCont, dynsymCont = [elffile.get_section_by_name(x).data() for x in ( '.text', '.rodata', '.rela.dyn', '.data', '.dynamic', '.dynstr', '.dynsym')] csec = dict(text=textCont, rodata=rodataCont, relaDyn=relaDynCont, data=dataCont, dynamic=dynamicCont, dynstr=dynstrCont, dynsym=dynsymCont) def replace(tgt, offset, data): orig = csec[tgt] csec[tgt] = orig[:offset] + data + orig[offset + len(data):] for x in elffile.iter_sections_by_type(RelocationSection): tgtsect = elffile.get_section(x['sh_info']) tgt = tgtsect.name[1:] if tgt not in csec: continue for iter in x.iter_relocations(): symname = symbolList[iter['r_info_sym']] if not symname.startswith('NORELOC_'): continue reloc_type = iter['r_info_type'] if reloc_type == R_AARCH64_PREL32: replace(tgt, iter['r_offset'], struct.pack('<i', symbols[symname] + iter['r_addend'] - ( tgtsect['sh_addr'] + iter['r_offset']))) elif reloc_type == R_AARCH64_ABS32: replace(tgt, iter['r_offset'], struct.pack('<I', symbols[symname] + iter['r_addend'])) else: print('Unknown relocation type!', reloc_type) assert False text, rodata, data = csec['text'], csec['rodata'], csec['data'] if len(rodata) & 0x7: rodata += '\0'.encode() * (0x8 - (len(rodata) & 0x7)) rodata += csec['relaDyn'] if len(data) & 0x7: data += '\0'.encode() * (0x8 - (len(data) & 0x7)) if len(text) & 0xFFF: text += '\0'.encode() * (0x1000 - (len(text) & 0xFFF)) if len(rodata) & 0xFFF: rodata += '\0'.encode() * (0x1000 - (len(rodata) & 0xFFF)) data += csec['dynamic'] if len(data) & 0xFFF: data += '\0'.encode() * (0x1000 - (len(data) & 0xFFF)) data += csec['dynsym'] if len(data) & 0x7: data += '\0'.encode() * (0x8 - (len(data) & 0x7)) data += csec['dynstr'] if len(data) & 0xFFF: data += '\0'.encode() * (0x1000 - (len(data) & 0xFFF)) bssSize = elffile.get_section_by_name('.bss')['sh_size'] if bssSize & 0xFFF: bssSize += 0x1000 - (bssSize & 0xFFF) if format == 'nro': # text = text[0x80:] with open(output, 'wb') as fp: fp.write(text[:0x4]) # first branch instruction fp.write(struct.pack('<III', len(text) + len(rodata) + 8, 0, 0)) fp.write('NRO0'.encode()) fp.write(struct.pack('<III', 0, len(text) + len(rodata) + len(data), 0)) fp.write(struct.pack('<II', 0, len(text))) # exec segment fp.write(struct.pack('<II', len(text), len(rodata))) # read only segment fp.write(struct.pack('<II', len(text) + len(rodata), len(data))) # rw segment fp.write(struct.pack('<II', bssSize, 0)) fp.write('\0'.encode() * 0x40) fp.write(text[0x80:]) fp.write(rodata) fp.write(data) else: with open(output, 'wb') as fp: ctext, crodata, cdata = [lz4.block.compress(x, store_size=False) for x in (text, rodata, data)] fp.write('NSO0'.encode()) fp.write('\0'.encode() * 0xC) off = 0x101 fp.write(struct.pack('<IIII', off, 0, len(text), 0)) off += len(ctext) fp.write(struct.pack('<IIII', off, len(text), len(rodata), 0)) off += len(crodata) fp.write(struct.pack('<IIII', off, len(text) + len(rodata), len(data), symbols['NORELOC_BSS_END_'] - symbols['NORELOC_BSS_START_'])) fp.write('\0'.encode() * 0x20) fp.write(struct.pack('<IIII', len(ctext), len(crodata), len(cdata), 0)) fp.write('\0'.encode() * 0x91) fp.write(ctext) fp.write(crodata) fp.write(cdata)
class ElfHelper: def __init__(self, filename, verbose, kobjs, subs): self.verbose = verbose self.fp = open(filename, "rb") self.elf = ELFFile(self.fp) self.little_endian = self.elf.little_endian global kobjects global subsystems kobjects = kobjs subsystems = subs def find_kobjects(self, syms): if not self.elf.has_dwarf_info(): sys.stderr.write("ELF file has no DWARF information\n") sys.exit(1) kram_start = syms["__kernel_ram_start"] kram_end = syms["__kernel_ram_end"] krom_start = syms["_image_rom_start"] krom_end = syms["_image_rom_end"] di = self.elf.get_dwarf_info() variables = [] # Step 1: collect all type information. for CU in di.iter_CUs(): for die in CU.iter_DIEs(): # Unions are disregarded, kernel objects should never be union # members since the memory is not dedicated to that object and # could be something else if die.tag == "DW_TAG_structure_type": analyze_die_struct(die) elif die.tag == "DW_TAG_const_type": analyze_die_const(die) elif die.tag == "DW_TAG_array_type": analyze_die_array(die) elif die.tag == "DW_TAG_variable": variables.append(die) # Step 2: filter type_env to only contain kernel objects, or structs # and arrays of kernel objects bad_offsets = [] for offset, type_object in type_env.items(): if not type_object.has_kobject(): bad_offsets.append(offset) for offset in bad_offsets: del type_env[offset] # Step 3: Now that we know all the types we are looking for, examine # all variables all_objs = {} for die in variables: name = die_get_name(die) if not name: continue if name.startswith("__device_sys_init"): # Boot-time initialization function; not an actual device continue type_offset = die_get_type_offset(die) # Is this a kernel object, or a structure containing kernel # objects? if type_offset not in type_env: continue if "DW_AT_declaration" in die.attributes: # Extern declaration, only used indirectly extern_env[die.offset] = die continue if "DW_AT_location" not in die.attributes: self.debug_die( die, "No location information for object '%s'; possibly" " stack allocated" % name) continue loc = die.attributes["DW_AT_location"] if loc.form != "DW_FORM_exprloc" and \ loc.form != "DW_FORM_block1": self.debug_die( die, "kernel object '%s' unexpected location format" % name) continue opcode = loc.value[0] if opcode != DW_OP_addr: # Check if frame pointer offset DW_OP_fbreg if opcode == DW_OP_fbreg: self.debug_die(die, "kernel object '%s' found on stack" % name) else: self.debug_die( die, "kernel object '%s' unexpected exprloc opcode %s" % (name, hex(opcode))) continue addr = (loc.value[1] | (loc.value[2] << 8) | (loc.value[3] << 16) | (loc.value[4] << 24)) if addr == 0: # Never linked; gc-sections deleted it continue if ((addr < kram_start or addr >= kram_end) and (addr < krom_start or addr >= krom_end)): self.debug_die(die, "object '%s' found in invalid location %s" % (name, hex(addr))) continue type_obj = type_env[type_offset] objs = type_obj.get_kobjects(addr) all_objs.update(objs) self.debug("symbol '%s' at %s contains %d object(s)" % (name, hex(addr), len(objs))) # Step 4: objs is a dictionary mapping variable memory addresses to # their associated type objects. Now that we have seen all variables # and can properly look up API structs, convert this into a dictionary # mapping variables to the C enumeration of what kernel object type it # is. ret = {} for addr, ko in all_objs.items(): # API structs don't get into the gperf table if ko.type_obj.api: continue if ko.type_obj.name != "device": # Not a device struct so we immediately know its type ko.type_name = kobject_to_enum(ko.type_obj.name) ret[addr] = ko continue # Device struct. Need to get the address of its API struct, # if it has one. apiaddr = device_get_api_addr(self.elf, addr) if apiaddr not in all_objs: if apiaddr == 0: self.debug("device instance at 0x%x has no associated subsystem" % addr) else: self.debug("device instance at 0x%x has unknown API 0x%x" % (addr, apiaddr)) # API struct does not correspond to a known subsystem, skip it continue apiobj = all_objs[apiaddr] ko.type_name = subsystem_to_enum(apiobj.type_obj.name) ret[addr] = ko self.debug("found %d kernel object instances total" % len(ret)) # 1. Before python 3.7 dict order is not guaranteed. With Python # 3.5 it doesn't seem random with *integer* keys but can't # rely on that. # 2. OrderedDict means _insertion_ order, so not enough because # built from other (random!) dicts: need to _sort_ first. # 3. Sorting memory address looks good. return OrderedDict(sorted(ret.items())) def get_symbols(self): for section in self.elf.iter_sections(): if isinstance(section, SymbolTableSection): return {sym.name: sym.entry.st_value for sym in section.iter_symbols()} raise LookupError("Could not find symbol table") def debug(self, text): if not self.verbose: return sys.stdout.write(scr + ": " + text + "\n") def error(self, text): sys.stderr.write("%s ERROR: %s\n" % (scr, text)) sys.exit(1) def debug_die(self, die, text): fn, ln = get_filename_lineno(die) self.debug(str(die)) self.debug("File '%s', line %d:" % (fn, ln)) self.debug(" %s" % text) def get_thread_counter(self): return thread_counter
class ZephyrElf: """ Represents information about devices in an elf file. """ def __init__(self, kernel, edt, device_start_symbol): self.elf = ELFFile(open(kernel, "rb")) self.edt = edt self.devices = [] self.ld_consts = self._symbols_find_value( set([ device_start_symbol, *Device.required_ld_consts, *DevicePM.required_ld_consts ])) self._device_parse_and_link() @property def little_endian(self): """ True if the elf file is for a little-endian architecture. """ return self.elf.little_endian @property def native_struct_format(self): """ Get the struct format specifier and byte size of the native machine type. """ format = "<" if self.little_endian else ">" if self.elf.elfclass == 32: format += "I" size = 4 else: format += "Q" size = 8 return (format, size) def symbol_data(self, sym): """ Retrieve the raw bytes associated with a symbol from the elf file. """ addr = sym.entry.st_value len = sym.entry.st_size for section in self.elf.iter_sections(): start = section['sh_addr'] end = start + section['sh_size'] if (start <= addr) and (addr + len) <= end: offset = addr - section['sh_addr'] return bytes(section.data()[offset:offset + len]) def _symbols_find_value(self, names): symbols = {} for section in self.elf.iter_sections(): if isinstance(section, SymbolTableSection): for sym in section.iter_symbols(): if sym.name in names: symbols[sym.name] = sym.entry.st_value return symbols def _object_find_named(self, prefix, cb): for section in self.elf.iter_sections(): if isinstance(section, SymbolTableSection): for sym in section.iter_symbols(): if sym.entry.st_info.type != 'STT_OBJECT': continue if sym.name.startswith(prefix): cb(sym) def _link_devices(self, devices): # Compute the dependency graph induced from the full graph restricted to the # the nodes that exist in the application. Note that the edges in the # induced graph correspond to paths in the full graph. root = self.edt.dep_ord2node[0] for ord, dev in devices.items(): n = self.edt.dep_ord2node[ord] deps = set(n.depends_on) while len(deps) > 0: dn = deps.pop() if dn.dep_ordinal in devices: # this is used dev.devs_depends_on.add(devices[dn.dep_ordinal]) elif dn != root: # forward the dependency up one level for ddn in dn.depends_on: deps.add(ddn) sups = set(n.required_by) while len(sups) > 0: sn = sups.pop() if sn.dep_ordinal in devices: dev.devs_supports.add(devices[sn.dep_ordinal]) else: # forward the support down one level for ssn in sn.required_by: sups.add(ssn) def _link_injected(self, devices): for dev in devices.values(): injected = dev.ordinals.ordinals[1] for inj in injected: if inj in devices: dev.devs_depends_on_injected.add(devices[inj]) devices[inj].devs_supports.add(dev) def _device_parse_and_link(self): # Find all PM structs pm_structs = {} def _on_pm(sym): pm_structs[sym.entry.st_value] = DevicePM(self, sym) self._object_find_named('__pm_device_', _on_pm) # Find all ordinal arrays ordinal_arrays = {} def _on_ordinal(sym): ordinal_arrays[sym.entry.st_value] = DeviceOrdinals(self, sym) self._object_find_named('__devicehdl_', _on_ordinal) # Find all device structs def _on_device(sym): self.devices.append(Device(self, sym)) self._object_find_named('__device_', _on_device) # Sort the device array by address for handle calculation self.devices = sorted(self.devices, key=lambda k: k.sym.entry.st_value) # Assign handles to the devices for idx, dev in enumerate(self.devices): dev.handle = 1 + idx # Link devices structs with PM and ordinals for dev in self.devices: if dev.obj_pm in pm_structs: dev.pm = pm_structs[dev.obj_pm] if dev.obj_ordinals in ordinal_arrays: dev.ordinals = ordinal_arrays[dev.obj_ordinals] if dev.ordinal != DeviceOrdinals.DEVICE_HANDLE_NULL: dev.edt_node = self.edt.dep_ord2node[dev.ordinal] # Create mapping of ordinals to devices devices_by_ord = {d.ordinal: d for d in self.devices if d.edt_node} # Link devices to each other based on the EDT tree self._link_devices(devices_by_ord) # Link injected devices to each other self._link_injected(devices_by_ord) def device_dependency_graph(self, title, comment): """ Construct a graphviz Digraph of the relationships between devices. """ import graphviz dot = graphviz.Digraph(title, comment=comment) # Split iteration so nodes and edges are grouped in source for dev in self.devices: if dev.ordinal == DeviceOrdinals.DEVICE_HANDLE_NULL: text = '{:s}\\nHandle: {:d}'.format(dev.sym.name, dev.handle) else: n = self.edt.dep_ord2node[dev.ordinal] text = '{:s}\\nOrdinal: {:d} | Handle: {:d}\\n{:s}'.format( n.name, dev.ordinal, dev.handle, n.path) dot.node(str(dev.ordinal), text) for dev in self.devices: for sup in dev.devs_supports: dot.edge(str(dev.ordinal), str(sup.ordinal)) return dot
class Elf(object): def __init__(self, fileobj): self.elffile = ELFFile(fileobj) self.output = sys.stdout # our code starts here :-) def network(self): ret = "None" for section in self.elffile.iter_sections(): if not isinstance(section, SymbolTableSection): continue if section['sh_entsize'] == 0: print("\nSymbol table '%s' has a sh_entsize " "of zero!" % (bytes2str(section.name)), file=sys.stderr) continue for _, symbol in enumerate(section.iter_symbols()): # first match IP_PATTERNS for pattern in IP_PATTERNS: if re.match(pattern, bytes2str(symbol.name)): return "network-ip" # then match LOCAL_PATTERNS for pattern in LOCAL_PATTERNS: if re.match(pattern, bytes2str(symbol.name)): ret = "network-local" break return ret def _strings(self): stream = self.elffile.stream epos = stream.tell() stream.seek(0, 0) data = stream.read() stream.seek(epos, 0) ret = [] # XXX avoid calling eu-strings import subprocess p = subprocess.Popen("eu-strings", shell=True, stdin=subprocess.PIPE, stderr=subprocess.PIPE, stdout=subprocess.PIPE) out = p.communicate(input=data)[0] for line in out.splitlines(): if re.match(b"^/tmp/.+", line) and "XXX" not in line: ret.append(line) return ret def tempstuff(self): tmp_strings = self._strings() # if there are no /tmp references, just return if len(tmp_strings) == 0: return "None" for section in self.elffile.iter_sections(): if not isinstance(section, SymbolTableSection): continue if section['sh_entsize'] == 0: print("\nSymbol table '%s' has a sh_entsize " "of zero!" % (bytes2str(section.name)), file=sys.stderr) continue for _, symbol in enumerate(section.iter_symbols()): for pattern in TMP_FUNCTIONS: if re.match(pattern, bytes2str(symbol.name)): return "None" return "$".join(tmp_strings) # XXX implement this def chroot_without_chdir(self): """ Check for apps that use chroot(2) without using chdir(2). Inspired by http://people.redhat.com/sgrubb/security/find-chroot """ pass def fortify(self): """ Check if source code was compiled with FORTIFY_SOURCE. NA : FORTIFY_SOURCE was not applicable Enabled : unsafe and _chk functions were found Disabled : only unsafe functions were found (_chk functions missing) """ ret = "NA" for section in self.elffile.iter_sections(): if not isinstance(section, SymbolTableSection): continue if section['sh_entsize'] == 0: print("\nSymbol table '%s' has a sh_entsize " "of zero!" % (bytes2str(section.name)), file=sys.stderr) continue for _, symbol in enumerate(section.iter_symbols()): for pattern in UNSAFE_FUNCTIONS: if re.match(pattern, bytes2str(symbol.name)): if ret == "NA": ret = "Disabled" break if ret == "Disabled": # afename = "__" + bytes2str(symbol.name) + "_chk" for _, symbol in enumerate(section.iter_symbols()): # first look for corresponding _chk symbol symbolstr = bytes2str(symbol.name) if (symbolstr.startswith("__") and symbolstr.endswith("_chk")) or \ symbolstr.endswith(" __chk_fail"): ret = "Enabled" break return ret def canary(self): for section in self.elffile.iter_sections(): if not isinstance(section, SymbolTableSection): continue if section['sh_entsize'] == 0: print("\nSymbol table '%s' has a sh_entsize " "of zero!" % (bytes2str(section.name)), file=sys.stderr) continue for _, symbol in enumerate(section.iter_symbols()): if bytes2str(symbol.name) in STACK_CHK: return "Enabled" return "Disabled" def dynamic_tags(self, key="DT_RPATH"): for section in self.elffile.iter_sections(): if not isinstance(section, DynamicSection): continue for tag in section.iter_tags(): if tag.entry.d_tag == key: return "Enabled" return "Disabled" def program_headers(self): pflags = P_FLAGS() if self.elffile.num_segments() == 0: # print('There are no program headers in this file.', \ # file=sys.stderr) return found = False for segment in self.elffile.iter_segments(): if re.search("GNU_STACK", str(segment['p_type'])): found = True if segment['p_flags'] & pflags.PF_X: return "Disabled" if found: return "Enabled" return "Disabled" def relro(self): if self.elffile.num_segments() == 0: # print('There are no program headers in this file.', \ # file=sys.stderr) return have_relro = False for segment in self.elffile.iter_segments(): if re.search("GNU_RELRO", str(segment['p_type'])): have_relro = True break if self.dynamic_tags("DT_BIND_NOW") == "Enabled" and have_relro: return "Enabled" if have_relro: return "Partial" return "Disabled" def pie(self): header = self.elffile.header if self.dynamic_tags("EXEC") == "Enabled": return "Disabled" if "ET_DYN" in header['e_type']: if self.dynamic_tags("DT_DEBUG") == "Enabled": return "Enabled" else: return "DSO" return "Disabled" def getdeps(self): deps = [] if self.elffile.num_segments() == 0: return deps for segment in self.elffile.iter_segments(): if re.search("PT_DYNAMIC", str(segment['p_type'])): # this file uses dynamic linking, so read the dynamic section # and find DT_SONAME tag for section in self.elffile.iter_sections(): if not isinstance(section, DynamicSection): continue for tag in section.iter_tags(): if tag.entry.d_tag == 'DT_NEEDED': deps.append(bytes2str(tag.needed)) break return deps