def read_program(filename): ''' Identifies the program header (ELF/PE) and returns an ELF, PE or DataIO instance. Args: filename (str): the program to read. Returns: an instance of currently supported program format (ELF, PE) ''' try: data = open(filename, 'rb') except (TypeError, IOError): data = bytes(filename) f = DataIO(data) try: # open file as a ELF object: p = elf.Elf(f) logger.info("ELF format detected") return p except elf.ElfError: f.seek(0) logger.debug('ElfError raised for %s' % f.name) try: # open file as a PE object: p = pe.PE(f) logger.info("PE format detected") return p except pe.PEError: f.seek(0) logger.debug('PEError raised for %s' % f.name) try: # open file as a HEX object: p = utils.HEX(f) logger.info("HEX format detected") return p except utils.FormatError: f.seek(0) logger.debug(' HEX FormatError raised for %s' % f.name) try: # open file as a SREC object: p = utils.SREC(f) logger.info("SREC format detected") return p except utils.FormatError: f.seek(0) logger.debug(' SREC FormatError raised for %s' % f.name) logger.warning('unknown format') return f
class Elf32(object): basemap = None symtab = None strtab = None reltab = None functions = None variables = None @property def entrypoints(self): return [self.Ehdr.e_entry] @property def filename(self): return self.__file.name def __init__(self, filename): try: self.__file = file(filename, 'rb') except (TypeError, IOError): from amoco.system.core import DataIO self.__file = DataIO(filename) data = self.__file.read(52) if len(data) < 52: data = data.ljust(52, '\x00') self.Ehdr = Elf32_Ehdr(data) self.dynamic = False # read program header table: should not raise any errors self.Phdr = [] if self.Ehdr.e_phoff: self.__file.seek(self.Ehdr.e_phoff) n, l = self.Ehdr.e_phnum, self.Ehdr.e_phentsize data = self.__file.read(n * l) for pht in range(n): logger.progress(pht, n, 'parsing Phdrs ') self.Phdr.append(Elf32_Phdr(data[pht * l:])) if self.Phdr[-1].p_type == PT_LOAD: if not self.basemap: self.basemap = self.Phdr[-1].p_vaddr elif self.Phdr[-1].p_type == PT_DYNAMIC: self.dynamic = True elif not self.Phdr[-1].p_type in ELF_CONSTS['p_type'].keys(): logger.verbose('invalid segment detected (removed)') self.Phdr.pop() # read section header table: unused by loader, can raise error self.Shdr = [] if self.Ehdr.e_shoff: try: self.__file.seek(self.Ehdr.e_shoff) n, l = self.Ehdr.e_shnum, self.Ehdr.e_shentsize data = self.__file.read(n * l) for sht in range(n): logger.progress(sht, n, 'parsing Shdrs ') S = Elf32_Shdr(data[sht * l:]) if S.sh_type in ELF_CONSTS['sh_type'].keys(): self.Shdr.append(S) else: raise StandardError except: logger.verbose('invalid section detected (all Shdr removed)') self.Shdr = [] # read section's name string table: n = self.Ehdr.e_shstrndx if n != SHN_UNDEF and n in range(len(self.Shdr)): S = self.Shdr[self.Ehdr.e_shstrndx] self.__file.seek(S.sh_offset) data = self.__file.read(S.sh_size) if S.sh_type != SHT_STRTAB: logger.verbose('section names not a string table') for s in self.Shdr: s.name = '' else: for s in self.Shdr: s.name = data[s.sh_name:].split('\0')[0] self.functions = self.__functions() self.variables = self.__variables() ## def getsize(self): total = sum([s.p_filesz for s in self.Phdr]) return total # allows to get info about target : # - section index (0 is error, -1 is a dynamic call) # - offset into section (idem) # - base virtual address (0 for dynamic calls) # target can be a virtual address in hex string format or integer, # or a symbol string searched in the functions dictionary. def getinfo(self, target): addr = None if isinstance(target, str): try: addr = int(target, 16) except ValueError: for a, f in self.functions.iteritems(): if f[0] == target: addr = int(a, 16) break elif type(target) in [int, long]: addr = target if addr is None: # target is propably a symbol not found in functions return None, 0, 0 # now we have addr so we can see in which section/segment it is... # sections are smaller than segments so we try first with Shdr # but this may lead to errors because what really matters are segments # loaded by the kernel binfmt_elf.c loader. if self.Shdr: for s in self.Shdr[::-1]: if s.sh_type == SHT_NULL: continue if s.sh_addr <= addr < s.sh_addr + s.sh_size: return s, addr - s.sh_addr, s.sh_addr ## elif self.Phdr: for s in self.Phdr[::-1]: if s.p_type != PT_LOAD: continue if s.p_vaddr <= addr < s.p_vaddr + s.p_filesz: return s, addr - s.p_vaddr, s.p_vaddr return None, 0, 0 ## def data(self, target, size): return self.readcode(target, size)[0] def readcode(self, target, size=None): s, offset, base = self.getinfo(target) data = '' if s: if isinstance(s, Elf32_Phdr): c = self.readsegment(s) else: c = self.readsection(s) if c: if size != None: if isinstance(c, Elf32_Str): c = c.data data = c[offset:offset + size] else: data = c[offset:] return data, 0, base + offset def readsegment(self, S): if S: if S.p_type == PT_LOAD: self.__file.seek(S.p_offset) return self.__file.read(S.p_filesz).ljust(S.p_memsz, '\x00') return None ## def loadsegment(self, S, pagesize=None): if S: if S.p_type == PT_LOAD: self.__file.seek(S.p_offset) if S.p_offset != (S.p_vaddr % S.p_align): logger.verbose('wrong p_vaddr/p_align [%08x/%0d]' % (S.p_vaddr, S.p_align)) base = S.p_vaddr bytes = self.__file.read(S.p_filesz).ljust(S.p_memsz, '\x00') if pagesize: # note: bytes are not truncated, only extended if needed... bytes = bytes.ljust(pagesize, '\x00') return {base: bytes} return None ## def readsection(self, sect): S = None if type(sect) == str: for st in self.Shdr: if st.name == sect: S = st break elif type(sect) in [int, long]: S = self.Shdr[sect] else: S = sect if S: if S.sh_type in (SHT_SYMTAB, SHT_DYNSYM): return self.__read_symtab(S) elif S.sh_type == SHT_STRTAB: return self.__read_strtab(S) elif S.sh_type in (SHT_REL, SHT_RELA): return self.__read_relocs(S) elif S.sh_type == SHT_DYNAMIC: return self.__read_dynamic(S) elif S.sh_type == SHT_PROGBITS: self.__file.seek(S.sh_offset) return self.__file.read(S.sh_size) return None ## def __read_symtab(self, section): if section.sh_type not in (SHT_SYMTAB, SHT_DYNSYM): logger.warning('not a symbol table section') return None # read the section: self.__file.seek(section.sh_offset) data = self.__file.read(section.sh_size) # and parse it into Elf32_Sym objects: l = section.sh_entsize if (section.sh_size % l) != 0: raise ElfError('symbol table size mismatch') else: n = section.sh_size / l symtab = [] for i in range(n): symtab.append(Elf32_Sym(data[i * l:])) self.symtab = symtab return symtab ## def __read_strtab(self, section): if section.sh_type != SHT_STRTAB: raise ElfError('not a string table section') self.__file.seek(section.sh_offset) data = self.__file.read(section.sh_size) strtab = Elf32_Str(data) self.strtab = strtab return strtab ## def __read_relocs(self, section): if section.sh_type not in (SHT_REL, SHT_RELA): logger.warning('not a relocation table section') return None self.__file.seek(section.sh_offset) data = self.__file.read(section.sh_size) l = section.sh_entsize if (section.sh_size % l) != 0: raise ElfError('relocation table size mismatch') else: n = section.sh_size / l reltab = [] if section.sh_type == SHT_REL: for i in range(n): reltab.append(Elf32_Rel(data[i * l:])) elif section.sh_type == SHT_RELA: for i in range(n): reltab.append(Elf32_Rela(data[i * l:])) self.reltab = reltab return reltab def __read_dynamic(self, section): if section.sh_type != SHT_DYNAMIC: logger.warning('not a dynamic linking section') return None # read the section: self.__file.seek(section.sh_offset) data = self.__file.read(section.sh_size) # and parse it into Elf32_Dyn objects: l = section.sh_entsize if (section.sh_size % l) != 0: raise ElfError('dynamic linking size mismatch') else: n = section.sh_size / l dyntab = [] for i in range(n): dyntab.append(Elf32_Dyn(data[i * l:])) self.dyntab = dyntab return dyntab ## def __read_note(self, section): if section.sh_type != SHT_NOTE: logger.warning('not a note section') return None self.__file.seek(section.sh_offset) data = self.__file.read(section.sh_size) note = Elf32_Note(data) self.note = note return note ## def __functions(self, fltr=None): D = self.__symbols(STT_FUNC) # fltr applies to section name only : if fltr: for k, v in D.iteritems(): if self.Shdr[v[2]].name != fltr: D.pop(k) if self.dynamic: D.update(self.__dynamic(STT_FUNC)) return D def __variables(self, fltr=None): D = self.__symbols(STT_OBJECT) # fltr applies also to section name : if fltr: for k, v in D.iteritems(): if self.Shdr[v[2]].name != fltr: D.pop(k) return D def __symbols(self, t): if not self.readsection('.symtab'): return {} D = {} if self.readsection('.strtab'): for sym in self.symtab: if sym.st_type == t and sym.st_value: D[sym.st_value] = (self.strtab[sym.st_name], sym.st_size, sym.st_info, sym.st_shndx) else: # need to build a fake strtab with our own symbol names: pass #TODO return D def __dynamic(self, type=STT_FUNC): if not self.readsection('.dynsym'): return {} D = {} if self.readsection('.dynstr'): for i, s in enumerate(self.Shdr): if s.sh_type in (SHT_REL, SHT_RELA): if self.readsection(i): for r in self.reltab: if r.r_offset: sym = self.symtab[r.r_sym] D[r.r_offset] = self.strtab[sym.st_name] else: # need to build a fake strtab with our own symbol names: pass #TODO return D def __str__(self): ss = ['ELF header:'] tmp = self.Ehdr.pfx self.Ehdr.pfx = '\t' ss.append(self.Ehdr.__str__()) self.Ehdr.pfx = tmp ss += ['\nSections:'] for s in self.Shdr: tmp = s.pfx s.pfx = '\t' ss.append(s.__str__()) ss.append('---') s.pfx = tmp ss += ['\nSegments:'] for s in self.Phdr: tmp = s.pfx s.pfx = '\t' ss.append(s.__str__()) ss.append('---') s.pfx = tmp return '\n'.join(ss)