def __init__(self, manager):
     self._manager = manager
     self._header = self._manager._header
     self.fqueue = {}
     self.fqueue_sucess = {}
     self.new_fb_list = {}
     self.new_bb_list = []
     pyvex.set_iropt_level(1)
Exemple #2
0
    def _load_plt(self):
        # The main problem here is that there's literally no good way to do this.
        # like, I read through the binutils source and they have a hacked up solution for each arch
        # that performs actual comparisons against the machine code in the plt section.
        # it's pretty bad.
        # we sanity-check all our attempts by requiring that the block lifted at the given address
        # references the GOT slot for the symbol.

        pyvex.set_iropt_level(1)

        # ATTEMPT 1: some arches will just leave the plt stub addr in the import symbol
        if self.arch.name in ('ARM', 'ARMEL', 'ARMHF', 'AARCH64', 'MIPS32',
                              'MIPS64'):
            plt_sec = None
            if '.plt' in self.sections_map:
                plt_sec = self.sections_map['.plt']
            if '.MIPS.stubs' in self.sections_map:
                plt_sec = self.sections_map['.MIPS.stubs']

            for name, reloc in self.jmprel.iteritems():
                if plt_sec is None or plt_sec.contains_addr(reloc.symbol.addr):
                    self._add_plt_stub(name,
                                       reloc.symbol.addr,
                                       sanity_check=plt_sec is None)

        # ATTEMPT 2: on intel chips the data in the got slot pre-relocation points to a lazy-resolver
        # stub immediately after the plt stub
        if self.arch.name in ('X86', 'AMD64'):
            for name, reloc in self.jmprel.iteritems():
                try:
                    self._add_plt_stub(name,
                                       self.memory.read_addr_at(reloc.addr) -
                                       6,
                                       sanity_check=not self.pic)
                except KeyError:
                    pass

            # do another sanity check
            if len(set(self._plt.itervalues())) != len(self._plt):
                self._plt = {}

        # ATTEMPT 3: one ppc scheme I've seen is that there are 16-byte stubs packed together
        # right before the resolution stubs.
        if self.arch.name in ('PPC32', ):
            resolver_stubs = sorted(
                (self.memory.read_addr_at(reloc.addr), name)
                for name, reloc in self.jmprel.iteritems())
            stubs_table = resolver_stubs[0][0] - 16 * len(resolver_stubs)
            for i, (_, name) in enumerate(resolver_stubs):
                self._add_plt_stub(name, stubs_table + i * 16)

        if len(self._plt) == len(self.jmprel):
            # real quick, bail out before shit hits the fan
            return

        # ATTEMPT 4:
        # ok. time to go in on this.
        # try to find a single plt stub, anywhere. if we already have one, use that, otherwise
        # try to scan forward from _start to __libc_start_main to find that one.
        # then, scan forward and backward from that stub to find the rest of them. yikes!

        # keep a timer so we don't get stuck. keep this short and sweet.
        def tick():
            tick.bailout_timer -= 1
            assert tick.bailout_timer > 0

        tick.bailout_timer = 5

        def scan_forward(addr, name, push=False):
            gotslot = self.jmprel[name].addr
            try:
                while True:
                    tick()
                    bb = self._block(addr)
                    if gotslot in [c.value for c in bb.all_constants]:
                        break
                    if bb.jumpkind == 'Ijk_NoDecode':
                        addr += self.arch.instruction_alignment
                    else:
                        addr += bb.size

                while push and gotslot in [
                        c.value for c in self._block(
                            addr +
                            self.arch.instruction_alignment).all_constants
                ]:
                    addr += self.arch.instruction_alignment
                return self._add_plt_stub(name, addr)
            except (AssertionError, KeyError, pyvex.PyVEXError):
                return False

        if len(
                self._plt
        ) == 0 and '__libc_start_main' in self.jmprel and self.entry != 0:
            # try to scan forward through control flow to find __libc_start_main!
            try:
                last_jk = None
                addr = self.entry
                bb = self._block(addr)
                target = bb._get_defaultexit_target()
                while target is not None:
                    tick()
                    last_jk = bb.jumpkind
                    addr = target
                    bb = self._block(addr)
                    target = bb._get_defaultexit_target()

                if last_jk == 'Ijk_Call':
                    self._add_plt_stub('__libc_start_main', addr)
            except (AssertionError, KeyError, pyvex.PyVEXError):
                pass

        # if self.jmprel.keys()[0] not in self._plt:
        if not set(self.jmprel.keys()).intersection(self._plt.keys()):
            # LAST TRY: check if we have a .plt section
            if '.plt' not in self.sections_map:
                # WAHP WAHP
                return

            # try to find a block that references the first GOT slot
            tick.bailout_timer = 5
            scan_forward(self.sections_map['.plt'].vaddr,
                         self.jmprel.keys()[0],
                         push=True)

        if len(self._plt) == 0:
            # \(_^^)/
            return

        # if we've gotten this far there is at least one plt slot address known, guaranteed.
        plt_hitlist = [(name, self._plt.get(name)) for name in self.jmprel]
        last_good_idx = None
        stub_size = None

        name, addr = plt_hitlist[0]
        if addr is None:
            # try to resolve the very first entry
            tick.bailout_timer = 5
            guessed_addr = plt_sec.vaddr
            scan_forward(guessed_addr, name)
            if name in self._plt:
                # resolved :-)
                plt_hitlist[0] = (name, self._plt[name])

        for i, (name, addr) in enumerate(plt_hitlist):
            if addr is not None:
                last_good_idx = i
                if stub_size is None:
                    b0 = self._block(addr)
                    stub_size = b0.size
                    if isinstance(b0.next, pyvex.expr.Const
                                  ) and b0.next.con.value == addr + b0.size:
                        b1 = self._block(addr + b0.size)
                        stub_size += b1.size
                continue

            if last_good_idx is None:
                continue

            tick.bailout_timer = 5
            guess_addr = plt_hitlist[last_good_idx][1] + (
                i - last_good_idx) * stub_size
            scan_forward(guess_addr, name)
Exemple #3
0
    def lift(self,
             addr,
             arch=None,
             insn_bytes=None,
             max_size=None,
             num_inst=None,
             traceflags=0,
             thumb=False,
             backup_state=None,
             opt_level=None):
        """
        Returns a pyvex block starting at address addr

        @param addr: the address at which to start the block

        The below parameters are optional:
        @param thumb: whether the block should be lifted in ARM's THUMB mode
        @param backup_state: a state to read bytes from instead of using project memory
        @param opt_level: the VEX optimization level to use
        @param insn_bytes: a string of bytes to use for the block instead of the project
        @param max_size: the maximum size of the block, in bytes
        @param num_inst: the maximum number of instructions
        @param traceflags: traceflags to be passed to VEX. Default: 0
        """
        passed_max_size = max_size is not None
        passed_num_inst = num_inst is not None
        max_size = VEX_IRSB_MAX_SIZE if max_size is None else max_size
        num_inst = VEX_IRSB_MAX_INST if num_inst is None else num_inst
        opt_level = VEX_DEFAULT_OPT_LEVEL if opt_level is None else opt_level

        if self._thumbable and addr % 2 == 1:
            thumb = True
        elif not self._thumbable and thumb:
            l.warning(
                "Why did you pass in thumb=True on a non-ARM architecture")
            thumb = False

        if thumb:
            addr &= ~1

        # TODO: FIXME: figure out what to do if we're about to exhaust the memory
        # (we can probably figure out how many instructions we have left by talking to IDA)

        if insn_bytes is not None:
            buff, size = insn_bytes, len(insn_bytes)
            max_size = min(max_size, size)
            passed_max_size = True
        else:
            buff, size = "", 0

            if backup_state:
                buff, size = self._bytes_from_state(backup_state, addr,
                                                    max_size)
                max_size = min(max_size, size)
            else:
                try:
                    buff, size = self._project.loader.memory.read_bytes_c(addr)
                except KeyError:
                    pass

            if not buff or size == 0:
                raise AngrMemoryError(
                    "No bytes in memory for block starting at 0x%x." % addr)

        # deal with thumb mode in ARM, sending an odd address and an offset
        # into the string
        byte_offset = 0
        real_addr = addr
        if thumb:
            byte_offset = 1
            addr += 1

        l.debug("Creating pyvex.IRSB of arch %s at 0x%x",
                self._project.arch.name, addr)

        arch = arch or self._project.arch

        pyvex.set_iropt_level(opt_level)
        try:
            if passed_max_size and not passed_num_inst:
                irsb = pyvex.IRSB(bytes=buff,
                                  mem_addr=addr,
                                  num_bytes=max_size,
                                  arch=arch,
                                  bytes_offset=byte_offset,
                                  traceflags=traceflags)
            elif not passed_max_size and passed_num_inst:
                irsb = pyvex.IRSB(bytes=buff,
                                  mem_addr=addr,
                                  num_bytes=VEX_IRSB_MAX_SIZE,
                                  num_inst=num_inst,
                                  arch=arch,
                                  bytes_offset=byte_offset,
                                  traceflags=traceflags)
            elif passed_max_size and passed_num_inst:
                irsb = pyvex.IRSB(bytes=buff,
                                  mem_addr=addr,
                                  num_bytes=min(size, max_size),
                                  num_inst=num_inst,
                                  arch=arch,
                                  bytes_offset=byte_offset,
                                  traceflags=traceflags)
            else:
                irsb = pyvex.IRSB(bytes=buff,
                                  mem_addr=addr,
                                  num_bytes=min(size, max_size),
                                  arch=arch,
                                  bytes_offset=byte_offset,
                                  traceflags=traceflags)
        except pyvex.PyVEXError:
            l.debug("VEX translation error at %#x", addr)
            if isinstance(buff, str):
                l.debug('Using bytes: ' + buff)
            else:
                l.debug("Using bytes: " +
                        str(pyvex.ffi.buffer(buff, size)).encode('hex'))
            e_type, value, traceback = sys.exc_info()
            raise AngrTranslationError, ("Translation error", e_type,
                                         value), traceback

        if insn_bytes is None:
            for stmt in irsb.statements:
                if stmt.tag != 'Ist_IMark' or stmt.addr == real_addr:
                    continue
                if self._project.is_hooked(stmt.addr):
                    size = stmt.addr - real_addr
                    irsb = pyvex.IRSB(bytes=buff,
                                      mem_addr=addr,
                                      num_bytes=size,
                                      arch=arch,
                                      bytes_offset=byte_offset,
                                      traceflags=traceflags)
                    break

        irsb = self._post_process(irsb)
        return Block(buff, irsb, thumb)
Exemple #4
0
    def lift(self, addr, arch=None, insn_bytes=None, max_size=None, num_inst=None,
             traceflags=0, thumb=False, backup_state=None, opt_level=None):
        """
        Returns a pyvex block starting at address `addr`.

        :param addr:    The address at which to start the block.

        The following parameters are optional:

        :param thumb:           Whether the block should be lifted in ARM's THUMB mode.
        :param backup_state:    A state to read bytes from instead of using project memory.
        :param opt_level:       The VEX optimization level to use.
        :param insn_bytes:      A string of bytes to use for the block instead of the project.
        :param max_size:        The maximum size of the block, in bytes.
        :param num_inst:        The maximum number of instructions.
        :param traceflags:      traceflags to be passed to VEX. (default: 0)
        """

        passed_max_size = max_size is not None
        passed_num_inst = num_inst is not None
        max_size = VEX_IRSB_MAX_SIZE if max_size is None else max_size
        num_inst = VEX_IRSB_MAX_INST if num_inst is None else num_inst
        opt_level = VEX_DEFAULT_OPT_LEVEL if opt_level is None else opt_level

        if self._thumbable and addr % 2 == 1:
            thumb = True
        elif not self._thumbable and thumb:
            l.warning("Why did you pass in thumb=True on a non-ARM architecture")
            thumb = False

        if thumb:
            addr &= ~1

        cache_key = (addr, insn_bytes, max_size, num_inst, thumb, opt_level)
        if self._cache_enabled and cache_key in self._block_cache:
            return self._block_cache[cache_key]

        # TODO: FIXME: figure out what to do if we're about to exhaust the memory
        # (we can probably figure out how many instructions we have left by talking to IDA)

        if insn_bytes is not None:
            buff, size = insn_bytes, len(insn_bytes)
            max_size = min(max_size, size)
            passed_max_size = True
        else:
            buff, size = "", 0

            if backup_state:
                buff, size = self._bytes_from_state(backup_state, addr, max_size)
                max_size = min(max_size, size)
            else:
                try:
                    buff, size = self._project.loader.memory.read_bytes_c(addr)
                except KeyError:
                    pass

            if not buff or size == 0:
                raise AngrMemoryError("No bytes in memory for block starting at 0x%x." % addr)

        # deal with thumb mode in ARM, sending an odd address and an offset
        # into the string
        byte_offset = 0
        real_addr = addr
        if thumb:
            byte_offset = 1
            addr += 1

        l.debug("Creating pyvex.IRSB of arch %s at 0x%x", self._project.arch.name, addr)

        arch = arch or self._project.arch

        pyvex.set_iropt_level(opt_level)
        try:
            if passed_max_size and not passed_num_inst:
                irsb = pyvex.IRSB(buff, addr, arch,
                                  num_bytes=max_size,
                                  bytes_offset=byte_offset,
                                  traceflags=traceflags)
            elif not passed_max_size and passed_num_inst:
                irsb = pyvex.IRSB(buff, addr, arch,
                                  num_bytes=VEX_IRSB_MAX_SIZE,
                                  num_inst=num_inst,
                                  bytes_offset=byte_offset,
                                  traceflags=traceflags)
            elif passed_max_size and passed_num_inst:
                irsb = pyvex.IRSB(buff, addr, arch,
                                  num_bytes=min(size, max_size),
                                  num_inst=num_inst,
                                  bytes_offset=byte_offset,
                                  traceflags=traceflags)
            else:
                irsb = pyvex.IRSB(buff, addr, arch,
                                  num_bytes=min(size, max_size),
                                  bytes_offset=byte_offset,
                                  traceflags=traceflags)
        except pyvex.PyVEXError:
            l.debug("VEX translation error at %#x", addr)
            if isinstance(buff, str):
                l.debug('Using bytes: ' + buff)
            else:
                l.debug("Using bytes: " + str(pyvex.ffi.buffer(buff, size)).encode('hex'))
            e_type, value, traceback = sys.exc_info()
            raise AngrTranslationError, ("Translation error", e_type, value), traceback

        if insn_bytes is None:
            for stmt in irsb.statements:
                if stmt.tag != 'Ist_IMark' or stmt.addr == real_addr:
                    continue
                if self._project.is_hooked(stmt.addr):
                    size = stmt.addr - real_addr
                    irsb = pyvex.IRSB(buff, addr, arch,
                                      num_bytes=size,
                                      bytes_offset=byte_offset,
                                      traceflags=traceflags)
                    break

        irsb = self._post_process(irsb)
        b = Block(buff, irsb, thumb)
        if self._cache_enabled:
            self._block_cache[cache_key] = b
        return b
Exemple #5
0
    def lift(self,
             addr,
             arch=None,
             insn_bytes=None,
             max_size=None,
             num_inst=None,
             traceflags=0,
             thumb=False,
             backup_state=None,
             opt_level=None):
        """
        Returns a pyvex block starting at address `addr`.

        :param addr:    The address at which to start the block.

        The following parameters are optional:

        :param thumb:           Whether the block should be lifted in ARM's THUMB mode.
        :param backup_state:    A state to read bytes from instead of using project memory.
        :param opt_level:       The VEX optimization level to use.
        :param insn_bytes:      A string of bytes to use for the block instead of the project.
        :param max_size:        The maximum size of the block, in bytes.
        :param num_inst:        The maximum number of instructions.
        :param traceflags:      traceflags to be passed to VEX. (default: 0)
        """

        passed_max_size = max_size is not None
        passed_num_inst = num_inst is not None
        max_size = VEX_IRSB_MAX_SIZE if max_size is None else max_size
        num_inst = VEX_IRSB_MAX_INST if num_inst is None else num_inst

        if self._project._support_selfmodifying_code:
            if opt_level > 0:
                l.warning(
                    "Self-modifying code is not always correctly optimized by PyVEX. To guarantee correctness, VEX optimizations have been disabled."
                )
            opt_level = 0

        opt_level = VEX_DEFAULT_OPT_LEVEL if opt_level is None else opt_level

        addr, arch, thumb = self._normalize_options(addr, arch, thumb)

        cache_key = (addr, insn_bytes, max_size, num_inst, thumb, opt_level)
        if self._cache_enabled and cache_key in self._block_cache and self._block_cache[
                cache_key].vex is not None:
            self._cache_hit_count += 1
            return self._block_cache[cache_key]
        else:
            self._cache_miss_count += 1

        if insn_bytes is not None:
            buff, size = insn_bytes, len(insn_bytes)
            passed_max_size = True
        else:
            if self._project is None:
                raise AngrLifterError(
                    "Lifter does not have an associated angr Project. "
                    "You must specify \"insn_bytes\".")
            buff, size = self._load_bytes(addr, max_size, state=backup_state)

        if not buff or size == 0:
            raise AngrMemoryError(
                "No bytes in memory for block starting at %#x." % addr)

        # deal with thumb mode in ARM, sending an odd address and an offset
        # into the string
        byte_offset = 0
        real_addr = addr
        if thumb:
            byte_offset = 1
            addr += 1

        l.debug("Creating pyvex.IRSB of arch %s at %#x", arch.name, addr)

        pyvex.set_iropt_level(opt_level)
        try:
            if passed_max_size and not passed_num_inst:
                irsb = pyvex.IRSB(buff,
                                  addr,
                                  arch,
                                  num_bytes=size,
                                  bytes_offset=byte_offset,
                                  traceflags=traceflags)
            elif not passed_max_size and passed_num_inst:
                irsb = pyvex.IRSB(buff,
                                  addr,
                                  arch,
                                  num_bytes=VEX_IRSB_MAX_SIZE,
                                  num_inst=num_inst,
                                  bytes_offset=byte_offset,
                                  traceflags=traceflags)
            elif passed_max_size and passed_num_inst:
                irsb = pyvex.IRSB(buff,
                                  addr,
                                  arch,
                                  num_bytes=size,
                                  num_inst=num_inst,
                                  bytes_offset=byte_offset,
                                  traceflags=traceflags)
            else:
                irsb = pyvex.IRSB(buff,
                                  addr,
                                  arch,
                                  num_bytes=size,
                                  bytes_offset=byte_offset,
                                  traceflags=traceflags)
        except pyvex.PyVEXError:
            l.debug("VEX translation error at %#x", addr)
            if isinstance(buff, str):
                l.debug('Using bytes: ' + buff)
            else:
                l.debug("Using bytes: " +
                        str(pyvex.ffi.buffer(buff, size)).encode('hex'))
            e_type, value, traceback = sys.exc_info()
            raise AngrTranslationError, ("Translation error", e_type,
                                         value), traceback

        if insn_bytes is None and self._project is not None:
            for stmt in irsb.statements:
                if stmt.tag != 'Ist_IMark' or stmt.addr == real_addr:
                    continue
                if self._project.is_hooked(stmt.addr):
                    size = stmt.addr - real_addr
                    irsb = pyvex.IRSB(buff,
                                      addr,
                                      arch,
                                      num_bytes=size,
                                      bytes_offset=byte_offset,
                                      traceflags=traceflags)
                    break

        irsb = self._post_process(irsb, arch)
        b = Block(buff, arch=arch, addr=addr, vex=irsb, thumb=thumb)
        if self._cache_enabled:
            self._block_cache[cache_key] = b
        return b
Exemple #6
0
    def lift(self,
            state=None,
            clemory=None,
            insn_bytes=None,
            arch=None,
            addr=None,
            size=None,
            num_inst=None,
            traceflags=0,
            thumb=False,
            opt_level=None):

        """
        Lift an IRSB.

        There are many possible valid sets of parameters. You at the very least must pass some
        source of data, some source of an architecture, and some source of an address.

        Sources of data in order of priority: insn_bytes, clemory, state

        Sources of an address, in order of priority: addr, state

        Sources of an architecture, in order of priority: arch, clemory, state

        :param state:           A state to use as a data source.
        :param clemory:         A cle.memory.Clemory object to use as a data source.
        :param addr:            The address at which to start the block.
        :param thumb:           Whether the block should be lifted in ARM's THUMB mode.
        :param opt_level:       The VEX optimization level to use. The final IR optimization level is determined by
                                (ordered by priority):
                                - Argument opt_level
                                - opt_level is set to 1 if OPTIMIZE_IR exists in state options
                                - self._default_opt_level
        :param insn_bytes:      A string of bytes to use as a data source.
        :param size:            The maximum size of the block, in bytes.
        :param num_inst:        The maximum number of instructions.
        :param traceflags:      traceflags to be passed to VEX. (default: 0)
        """
        # phase 0: sanity check
        if not state and not clemory and not insn_bytes:
            raise ValueError("Must provide state or clemory or insn_bytes!")
        if not state and not clemory and not arch:
            raise ValueError("Must provide state or clemory or arch!")
        if addr is None and not state:
            raise ValueError("Must provide state or addr!")
        if arch is None:
            arch = clemory._arch if clemory else state.arch
        if arch.name.startswith("MIPS") and self._single_step:
            l.error("Cannot specify single-stepping on MIPS.")
            self._single_step = False

        # phase 1: parameter defaults
        if addr is None:
            addr = state.se.any_int(state._ip)
        if size is not None:
            size = min(size, VEX_IRSB_MAX_SIZE)
        if size is None:
            size = VEX_IRSB_MAX_SIZE
        if num_inst is not None:
            num_inst = min(num_inst, VEX_IRSB_MAX_INST)
        if num_inst is None and self._single_step:
            num_inst = 1
        if opt_level is None:
            if state and o.OPTIMIZE_IR in state.options:
                opt_level = 1
            else:
                opt_level = self._default_opt_level
        if self._support_selfmodifying_code:
            if opt_level > 0:
                l.warning("Self-modifying code is not always correctly optimized by PyVEX. To guarantee correctness, VEX optimizations have been disabled.")
                opt_level = 0
                if state and o.OPTIMIZE_IR in state.options:
                    state.options.remove(o.OPTIMIZE_IR)

        # phase 2: permissions
        if state and o.STRICT_PAGE_ACCESS in state.options:
            try:
                perms = state.memory.permissions(addr)
            except (KeyError, SimMemoryError):  # TODO: can this still raise KeyError?
                raise SimSegfaultError(addr, 'exec-miss')
            else:
                if not perms.symbolic:
                    perms = perms.args[0]
                    if not perms & 4:
                        raise SimSegfaultError(addr, 'non-executable')

        # phase 3: thumb normalization
        thumb = int(thumb)
        if isinstance(arch, ArchARM):
            if addr % 2 == 1:
                thumb = 1
            if thumb:
                addr &= ~1
        elif thumb:
            l.error("thumb=True passed on non-arm architecture!")
            thumb = 0

        # phase 4: check cache
        cache_key = (addr, insn_bytes, size, num_inst, thumb, opt_level)
        if self._use_cache and cache_key in self._block_cache:
            self._cache_hit_count += 1
            irsb = self._block_cache[cache_key]
            stop_point = self._first_stoppoint(irsb)
            if stop_point is None:
                return irsb
            else:
                size = stop_point - addr
                # check the cache again
                cache_key = (addr, insn_bytes, size, num_inst, thumb, opt_level)
                if cache_key in self._block_cache:
                    self._cache_hit_count += 1
                    return self._block_cache[cache_key]
                else:
                    self._cache_miss_count += 1
        else:
            self._cache_miss_count += 1

        # phase 5: get bytes
        if insn_bytes is not None:
            buff, size = insn_bytes, len(insn_bytes)
        else:
            buff, size = self._load_bytes(addr, size, state, clemory)

        if not buff or size == 0:
            raise SimEngineError("No bytes in memory for block starting at %#x." % addr)

        # phase 6: call into pyvex
        l.debug("Creating pyvex.IRSB of arch %s at %#x", arch.name, addr)
        pyvex.set_iropt_level(opt_level)

        try:
            for subphase in xrange(2):
                irsb = pyvex.IRSB(buff, addr + thumb, arch,
                                  num_bytes=size,
                                  num_inst=num_inst,
                                  bytes_offset=thumb,
                                  traceflags=traceflags)

                if subphase == 0:
                    # check for possible stop points
                    stop_point = self._first_stoppoint(irsb)
                    if stop_point is not None:
                        size = stop_point - addr
                        continue

                if self._use_cache:
                    self._block_cache[cache_key] = irsb
                return irsb

        # phase x: error handling
        except pyvex.PyVEXError:
            l.debug("VEX translation error at %#x", addr)
            if isinstance(buff, str):
                l.debug('Using bytes: ' + buff)
            else:
                l.debug("Using bytes: " + str(pyvex.ffi.buffer(buff, size)).encode('hex'))
            e_type, value, traceback = sys.exc_info()
            raise SimTranslationError, ("Translation error", e_type, value), traceback
Exemple #7
0
    def lift(self, addr, arch=None, insn_bytes=None, max_size=None, num_inst=None,
             traceflags=0, thumb=False, backup_state=None, opt_level=None):
        """
        Returns a pyvex block starting at address `addr`.

        :param addr:    The address at which to start the block.

        The following parameters are optional:

        :param thumb:           Whether the block should be lifted in ARM's THUMB mode.
        :param backup_state:    A state to read bytes from instead of using project memory.
        :param opt_level:       The VEX optimization level to use.
        :param insn_bytes:      A string of bytes to use for the block instead of the project.
        :param max_size:        The maximum size of the block, in bytes.
        :param num_inst:        The maximum number of instructions.
        :param traceflags:      traceflags to be passed to VEX. (default: 0)
        """

        passed_max_size = max_size is not None
        passed_num_inst = num_inst is not None
        max_size = VEX_IRSB_MAX_SIZE if max_size is None else max_size
        num_inst = VEX_IRSB_MAX_INST if num_inst is None else num_inst
        opt_level = VEX_DEFAULT_OPT_LEVEL if opt_level is None else opt_level

        addr, arch, thumb = self._normalize_options(addr, arch, thumb)

        cache_key = (addr, insn_bytes, max_size, num_inst, thumb, opt_level)
        if self._cache_enabled and cache_key in self._block_cache and self._block_cache[cache_key].vex is not None:
            self._cache_hit_count += 1
            return self._block_cache[cache_key]
        else:
            self._cache_miss_count += 1

        if insn_bytes is not None:
            buff, size = insn_bytes, len(insn_bytes)
            passed_max_size = True
        else:
            if self._project is None:
                raise AngrLifterError("Lifter does not have an associated angr Project. "
                                      "You must specify \"insn_bytes\".")
            buff, size = self._load_bytes(addr, max_size, state=backup_state)

        if not buff or size == 0:
            raise AngrMemoryError("No bytes in memory for block starting at %#x." % addr)

        # deal with thumb mode in ARM, sending an odd address and an offset
        # into the string
        byte_offset = 0
        real_addr = addr
        if thumb:
            byte_offset = 1
            addr += 1

        l.debug("Creating pyvex.IRSB of arch %s at %#x", arch.name, addr)

        pyvex.set_iropt_level(opt_level)
        try:
            if passed_max_size and not passed_num_inst:
                irsb = pyvex.IRSB(buff, addr, arch,
                                  num_bytes=size,
                                  bytes_offset=byte_offset,
                                  traceflags=traceflags)
            elif not passed_max_size and passed_num_inst:
                irsb = pyvex.IRSB(buff, addr, arch,
                                  num_bytes=VEX_IRSB_MAX_SIZE,
                                  num_inst=num_inst,
                                  bytes_offset=byte_offset,
                                  traceflags=traceflags)
            elif passed_max_size and passed_num_inst:
                irsb = pyvex.IRSB(buff, addr, arch,
                                  num_bytes=size,
                                  num_inst=num_inst,
                                  bytes_offset=byte_offset,
                                  traceflags=traceflags)
            else:
                irsb = pyvex.IRSB(buff, addr, arch,
                                  num_bytes=size,
                                  bytes_offset=byte_offset,
                                  traceflags=traceflags)
        except pyvex.PyVEXError:
            l.debug("VEX translation error at %#x", addr)
            if isinstance(buff, str):
                l.debug('Using bytes: ' + buff)
            else:
                l.debug("Using bytes: " + str(pyvex.ffi.buffer(buff, size)).encode('hex'))
            e_type, value, traceback = sys.exc_info()
            raise AngrTranslationError, ("Translation error", e_type, value), traceback

        if insn_bytes is None and self._project is not None:
            for stmt in irsb.statements:
                if stmt.tag != 'Ist_IMark' or stmt.addr == real_addr:
                    continue
                if self._project.is_hooked(stmt.addr):
                    size = stmt.addr - real_addr
                    irsb = pyvex.IRSB(buff, addr, arch,
                                      num_bytes=size,
                                      bytes_offset=byte_offset,
                                      traceflags=traceflags)
                    break

        irsb = self._post_process(irsb, arch)
        b = Block(buff, arch=arch, addr=addr, vex=irsb, thumb=thumb)
        if self._cache_enabled:
            self._block_cache[cache_key] = b
        return b
Exemple #8
0
    def _load_plt(self):
        # The main problem here is that there's literally no good way to do this.
        # like, I read through the binutils source and they have a hacked up solution for each arch
        # that performs actual comparisons against the machine code in the plt section.
        # it's pretty bad.
        # we sanity-check all our attempts by requiring that the block lifted at the given address
        # references the GOT slot for the symbol.

        pyvex.set_iropt_level(1)

        # ATTEMPT 1: some arches will just leave the plt stub addr in the import symbol
        if self.arch.name in ('ARM', 'ARMEL', 'ARMHF', 'AARCH64', 'MIPS32', 'MIPS64'):
            plt_sec = None
            if '.plt' in self.sections_map:
                plt_sec = self.sections_map['.plt']
            if '.MIPS.stubs' in self.sections_map:
                plt_sec = self.sections_map['.MIPS.stubs']

            for name, reloc in self.jmprel.iteritems():
                if plt_sec is None or plt_sec.contains_addr(reloc.symbol.addr):
                    self._add_plt_stub(name, reloc.symbol.addr, sanity_check=plt_sec is None)

        # ATTEMPT 2: on intel chips the data in the got slot pre-relocation points to a lazy-resolver
        # stub immediately after the plt stub
        if self.arch.name in ('X86', 'AMD64'):
            for name, reloc in self.jmprel.iteritems():
                try:
                    self._add_plt_stub(name, self.memory.read_addr_at(reloc.addr) - 6, sanity_check=not self.pic)
                except KeyError:
                    pass

            # do another sanity check
            if len(set(self._plt.itervalues())) != len(self._plt):
                self._plt = {}

        # ATTEMPT 3: one ppc scheme I've seen is that there are 16-byte stubs packed together
        # right before the resolution stubs.
        if self.arch.name in ('PPC32',):
            resolver_stubs = sorted((self.memory.read_addr_at(reloc.addr), name) for name, reloc in self.jmprel.iteritems())
            stubs_table = resolver_stubs[0][0] - 16 * len(resolver_stubs)
            for i, (_, name) in enumerate(resolver_stubs):
                self._add_plt_stub(name, stubs_table + i*16)

        if len(self._plt) == len(self.jmprel):
            # real quick, bail out before shit hits the fan
            return

        # ATTEMPT 4:
        # ok. time to go in on this.
        # try to find a single plt stub, anywhere. if we already have one, use that, otherwise
        # try to scan forward from _start to __libc_start_main to find that one.
        # then, scan forward and backward from that stub to find the rest of them. yikes!

        # keep a timer so we don't get stuck. keep this short and sweet.
        def tick():
            tick.bailout_timer -= 1
            assert tick.bailout_timer > 0
        tick.bailout_timer = 5

        def scan_forward(addr, name, push=False):
            gotslot = self.jmprel[name].addr
            try:
                while True:
                    tick()
                    bb = self._block(addr)
                    if gotslot in [c.value for c in bb.all_constants]:
                        break
                    if bb.jumpkind == 'Ijk_NoDecode':
                        addr += self.arch.instruction_alignment
                    else:
                        addr += bb.size

                while push and gotslot in [c.value for c in self._block(addr + self.arch.instruction_alignment).all_constants]:
                    addr += self.arch.instruction_alignment
                return self._add_plt_stub(name, addr)
            except (AssertionError, KeyError, pyvex.PyVEXError):
                return False

        if len(self._plt) == 0 and '__libc_start_main' in self.jmprel and self.entry != 0:
            # try to scan forward through control flow to find __libc_start_main!
            try:
                last_jk = None
                addr = self.entry
                bb = self._block(addr)
                target = bb._get_defaultexit_target()
                while target is not None:
                    tick()
                    last_jk = bb.jumpkind
                    addr = target
                    bb = self._block(addr)
                    target = bb._get_defaultexit_target()

                if last_jk == 'Ijk_Call':
                    self._add_plt_stub('__libc_start_main', addr)
            except (AssertionError, KeyError, pyvex.PyVEXError):
                pass

        # if self.jmprel.keys()[0] not in self._plt:
        if not set(self.jmprel.keys()).intersection(self._plt.keys()):
            # LAST TRY: check if we have a .plt section
            if '.plt' not in self.sections_map:
                # WAHP WAHP
                return

            # try to find a block that references the first GOT slot
            tick.bailout_timer = 5
            scan_forward(self.sections_map['.plt'].vaddr, self.jmprel.keys()[0], push=True)

        if len(self._plt) == 0:
            # \(_^^)/
            return

        # if we've gotten this far there is at least one plt slot address known, guaranteed.
        plt_hitlist = [(name, self._plt.get(name)) for name in self.jmprel]
        last_good_idx = None
        stub_size = None

        name, addr = plt_hitlist[0]
        if addr is None:
            # try to resolve the very first entry
            tick.bailout_timer = 5
            guessed_addr = plt_sec.vaddr
            scan_forward(guessed_addr, name)
            if name in self._plt:
                # resolved :-)
                plt_hitlist[0] = (name, self._plt[name])

        for i, (name, addr) in enumerate(plt_hitlist):
            if addr is not None:
                last_good_idx = i
                if stub_size is None:
                    b0 = self._block(addr)
                    stub_size = b0.size
                    if isinstance(b0.next, pyvex.expr.Const) and b0.next.con.value == addr + b0.size:
                        b1 = self._block(addr + b0.size)
                        stub_size += b1.size
                continue

            if last_good_idx is None:
                continue

            tick.bailout_timer = 5
            guess_addr = plt_hitlist[last_good_idx][1] + (i - last_good_idx) * stub_size
            scan_forward(guess_addr, name)