Exemple #1
0
def read_program(filename):
    '''
    Identifies the program header (ELF/PE) and returns an ELF, PE or DataIO
    instance.

    Args:
        filename (str): the program to read.

    Returns:
        an instance of currently supported program format (ELF, PE)

    '''

    try:
        data = open(filename, 'rb')
    except (TypeError, IOError):
        data = bytes(filename)

    f = DataIO(data)

    try:
        # open file as a ELF object:
        p = elf.Elf(f)
        logger.info("ELF format detected")
        return p
    except elf.ElfError:
        f.seek(0)
        logger.debug('ElfError raised for %s' % f.name)

    try:
        # open file as a PE object:
        p = pe.PE(f)
        logger.info("PE format detected")
        return p
    except pe.PEError:
        f.seek(0)
        logger.debug('PEError raised for %s' % f.name)

    try:
        # open file as a HEX object:
        p = utils.HEX(f)
        logger.info("HEX format detected")
        return p
    except utils.FormatError:
        f.seek(0)
        logger.debug(' HEX FormatError raised for %s' % f.name)

    try:
        # open file as a SREC object:
        p = utils.SREC(f)
        logger.info("SREC format detected")
        return p
    except utils.FormatError:
        f.seek(0)
        logger.debug(' SREC FormatError raised for %s' % f.name)

    logger.warning('unknown format')
    return f
Exemple #2
0
class Elf32(object):

    basemap = None
    symtab = None
    strtab = None
    reltab = None
    functions = None
    variables = None

    @property
    def entrypoints(self):
        return [self.Ehdr.e_entry]

    @property
    def filename(self):
        return self.__file.name

    def __init__(self, filename):
        try:
            self.__file = file(filename, 'rb')
        except (TypeError, IOError):
            from amoco.system.core import DataIO
            self.__file = DataIO(filename)
        data = self.__file.read(52)
        if len(data) < 52: data = data.ljust(52, '\x00')
        self.Ehdr = Elf32_Ehdr(data)

        self.dynamic = False

        # read program header table: should not raise any errors
        self.Phdr = []
        if self.Ehdr.e_phoff:
            self.__file.seek(self.Ehdr.e_phoff)
            n, l = self.Ehdr.e_phnum, self.Ehdr.e_phentsize
            data = self.__file.read(n * l)
            for pht in range(n):
                logger.progress(pht, n, 'parsing Phdrs ')
                self.Phdr.append(Elf32_Phdr(data[pht * l:]))
                if self.Phdr[-1].p_type == PT_LOAD:
                    if not self.basemap: self.basemap = self.Phdr[-1].p_vaddr
                elif self.Phdr[-1].p_type == PT_DYNAMIC:
                    self.dynamic = True
                elif not self.Phdr[-1].p_type in ELF_CONSTS['p_type'].keys():
                    logger.verbose('invalid segment detected (removed)')
                    self.Phdr.pop()

        # read section header table: unused by loader, can raise error
        self.Shdr = []
        if self.Ehdr.e_shoff:
            try:
                self.__file.seek(self.Ehdr.e_shoff)
                n, l = self.Ehdr.e_shnum, self.Ehdr.e_shentsize
                data = self.__file.read(n * l)
                for sht in range(n):
                    logger.progress(sht, n, 'parsing Shdrs ')
                    S = Elf32_Shdr(data[sht * l:])
                    if S.sh_type in ELF_CONSTS['sh_type'].keys():
                        self.Shdr.append(S)
                    else:
                        raise StandardError
            except:
                logger.verbose('invalid section detected (all Shdr removed)')
                self.Shdr = []

        # read section's name string table:
        n = self.Ehdr.e_shstrndx
        if n != SHN_UNDEF and n in range(len(self.Shdr)):
            S = self.Shdr[self.Ehdr.e_shstrndx]
            self.__file.seek(S.sh_offset)
            data = self.__file.read(S.sh_size)
            if S.sh_type != SHT_STRTAB:
                logger.verbose('section names not a string table')
                for s in self.Shdr:
                    s.name = ''
            else:
                for s in self.Shdr:
                    s.name = data[s.sh_name:].split('\0')[0]

        self.functions = self.__functions()
        self.variables = self.__variables()

    ##

    def getsize(self):
        total = sum([s.p_filesz for s in self.Phdr])
        return total

    #  allows to get info about target :
    # - section index (0 is error, -1 is a dynamic call)
    # - offset into section  (idem)
    # - base virtual address (0 for dynamic calls)
    # target can be a virtual address in hex string format or integer,
    # or a symbol string searched in the functions dictionary.
    def getinfo(self, target):
        addr = None
        if isinstance(target, str):
            try:
                addr = int(target, 16)
            except ValueError:
                for a, f in self.functions.iteritems():
                    if f[0] == target:
                        addr = int(a, 16)
                        break
        elif type(target) in [int, long]:
            addr = target
        if addr is None:
            # target is propably a symbol not found in functions
            return None, 0, 0

        # now we have addr so we can see in which section/segment it is...
        # sections are smaller than segments so we try first with Shdr
        # but this may lead to errors because what really matters are segments
        # loaded by the kernel binfmt_elf.c loader.
        if self.Shdr:
            for s in self.Shdr[::-1]:
                if s.sh_type == SHT_NULL: continue
                if s.sh_addr <= addr < s.sh_addr + s.sh_size:
                    return s, addr - s.sh_addr, s.sh_addr
            ##
        elif self.Phdr:
            for s in self.Phdr[::-1]:
                if s.p_type != PT_LOAD: continue
                if s.p_vaddr <= addr < s.p_vaddr + s.p_filesz:
                    return s, addr - s.p_vaddr, s.p_vaddr
        return None, 0, 0

    ##

    def data(self, target, size):
        return self.readcode(target, size)[0]

    def readcode(self, target, size=None):
        s, offset, base = self.getinfo(target)
        data = ''
        if s:
            if isinstance(s, Elf32_Phdr):
                c = self.readsegment(s)
            else:
                c = self.readsection(s)
            if c:
                if size != None:
                    if isinstance(c, Elf32_Str): c = c.data
                    data = c[offset:offset + size]
                else:
                    data = c[offset:]
        return data, 0, base + offset

    def readsegment(self, S):
        if S:
            if S.p_type == PT_LOAD:
                self.__file.seek(S.p_offset)
                return self.__file.read(S.p_filesz).ljust(S.p_memsz, '\x00')
        return None

    ##
    def loadsegment(self, S, pagesize=None):
        if S:
            if S.p_type == PT_LOAD:
                self.__file.seek(S.p_offset)
                if S.p_offset != (S.p_vaddr % S.p_align):
                    logger.verbose('wrong p_vaddr/p_align [%08x/%0d]' %
                                   (S.p_vaddr, S.p_align))
                base = S.p_vaddr
                bytes = self.__file.read(S.p_filesz).ljust(S.p_memsz, '\x00')
                if pagesize:
                    # note: bytes are not truncated, only extended if needed...
                    bytes = bytes.ljust(pagesize, '\x00')
                return {base: bytes}
        return None

    ##

    def readsection(self, sect):
        S = None
        if type(sect) == str:
            for st in self.Shdr:
                if st.name == sect:
                    S = st
                    break
        elif type(sect) in [int, long]:
            S = self.Shdr[sect]
        else:
            S = sect
        if S:
            if S.sh_type in (SHT_SYMTAB, SHT_DYNSYM):
                return self.__read_symtab(S)
            elif S.sh_type == SHT_STRTAB:
                return self.__read_strtab(S)
            elif S.sh_type in (SHT_REL, SHT_RELA):
                return self.__read_relocs(S)
            elif S.sh_type == SHT_DYNAMIC:
                return self.__read_dynamic(S)
            elif S.sh_type == SHT_PROGBITS:
                self.__file.seek(S.sh_offset)
                return self.__file.read(S.sh_size)
        return None

    ##

    def __read_symtab(self, section):
        if section.sh_type not in (SHT_SYMTAB, SHT_DYNSYM):
            logger.warning('not a symbol table section')
            return None
        # read the section:
        self.__file.seek(section.sh_offset)
        data = self.__file.read(section.sh_size)
        # and parse it into Elf32_Sym objects:
        l = section.sh_entsize
        if (section.sh_size % l) != 0:
            raise ElfError('symbol table size mismatch')
        else:
            n = section.sh_size / l
        symtab = []
        for i in range(n):
            symtab.append(Elf32_Sym(data[i * l:]))
        self.symtab = symtab
        return symtab

    ##

    def __read_strtab(self, section):
        if section.sh_type != SHT_STRTAB:
            raise ElfError('not a string table section')
        self.__file.seek(section.sh_offset)
        data = self.__file.read(section.sh_size)
        strtab = Elf32_Str(data)
        self.strtab = strtab
        return strtab

    ##

    def __read_relocs(self, section):
        if section.sh_type not in (SHT_REL, SHT_RELA):
            logger.warning('not a relocation table section')
            return None
        self.__file.seek(section.sh_offset)
        data = self.__file.read(section.sh_size)
        l = section.sh_entsize
        if (section.sh_size % l) != 0:
            raise ElfError('relocation table size mismatch')
        else:
            n = section.sh_size / l
        reltab = []
        if section.sh_type == SHT_REL:
            for i in range(n):
                reltab.append(Elf32_Rel(data[i * l:]))
        elif section.sh_type == SHT_RELA:
            for i in range(n):
                reltab.append(Elf32_Rela(data[i * l:]))
        self.reltab = reltab
        return reltab

    def __read_dynamic(self, section):
        if section.sh_type != SHT_DYNAMIC:
            logger.warning('not a dynamic linking section')
            return None
        # read the section:
        self.__file.seek(section.sh_offset)
        data = self.__file.read(section.sh_size)
        # and parse it into Elf32_Dyn objects:
        l = section.sh_entsize
        if (section.sh_size % l) != 0:
            raise ElfError('dynamic linking size mismatch')
        else:
            n = section.sh_size / l
        dyntab = []
        for i in range(n):
            dyntab.append(Elf32_Dyn(data[i * l:]))
        self.dyntab = dyntab
        return dyntab

    ##

    def __read_note(self, section):
        if section.sh_type != SHT_NOTE:
            logger.warning('not a note section')
            return None
        self.__file.seek(section.sh_offset)
        data = self.__file.read(section.sh_size)
        note = Elf32_Note(data)
        self.note = note
        return note

    ##

    def __functions(self, fltr=None):
        D = self.__symbols(STT_FUNC)
        # fltr applies to section name only :
        if fltr:
            for k, v in D.iteritems():
                if self.Shdr[v[2]].name != fltr: D.pop(k)
        if self.dynamic:
            D.update(self.__dynamic(STT_FUNC))
        return D

    def __variables(self, fltr=None):
        D = self.__symbols(STT_OBJECT)
        # fltr applies also to section name :
        if fltr:
            for k, v in D.iteritems():
                if self.Shdr[v[2]].name != fltr: D.pop(k)
        return D

    def __symbols(self, t):
        if not self.readsection('.symtab'): return {}
        D = {}
        if self.readsection('.strtab'):
            for sym in self.symtab:
                if sym.st_type == t and sym.st_value:
                    D[sym.st_value] = (self.strtab[sym.st_name], sym.st_size,
                                       sym.st_info, sym.st_shndx)
        else:
            # need to build a fake strtab with our own symbol names:
            pass  #TODO
        return D

    def __dynamic(self, type=STT_FUNC):
        if not self.readsection('.dynsym'): return {}
        D = {}
        if self.readsection('.dynstr'):
            for i, s in enumerate(self.Shdr):
                if s.sh_type in (SHT_REL, SHT_RELA):
                    if self.readsection(i):
                        for r in self.reltab:
                            if r.r_offset:
                                sym = self.symtab[r.r_sym]
                                D[r.r_offset] = self.strtab[sym.st_name]
        else:
            # need to build a fake strtab with our own symbol names:
            pass  #TODO
        return D

    def __str__(self):
        ss = ['ELF header:']
        tmp = self.Ehdr.pfx
        self.Ehdr.pfx = '\t'
        ss.append(self.Ehdr.__str__())
        self.Ehdr.pfx = tmp
        ss += ['\nSections:']
        for s in self.Shdr:
            tmp = s.pfx
            s.pfx = '\t'
            ss.append(s.__str__())
            ss.append('---')
            s.pfx = tmp
        ss += ['\nSegments:']
        for s in self.Phdr:
            tmp = s.pfx
            s.pfx = '\t'
            ss.append(s.__str__())
            ss.append('---')
            s.pfx = tmp
        return '\n'.join(ss)