Ejemplo n.º 1
0
class DSEEngine(object):
    """Dynamic Symbolic Execution Engine

    This class aims to be overrided for each specific purpose
    """
    SYMB_ENGINE = ESETrackModif

    def __init__(self, machine):
        self.machine = machine
        self.handler = {} # addr -> callback(DSEEngine instance)
        self.instrumentation = {} # addr -> callback(DSEEngine instance)
        self.addr_to_cacheblocks = {} # addr -> {label -> IRBlock}
        self.ir_arch = self.machine.ir() # corresponding IR

        # Defined after attachment
        self.jitter = None # Jitload (concrete execution)
        self.symb = None # SymbolicExecutionEngine
        self.symb_concrete = None # Concrete SymbExec for path desambiguisation
        self.mdis = None # DisasmEngine

    def prepare(self):
        """Prepare the environment for attachment with a jitter"""
        # Disassembler
        self.mdis = self.machine.dis_engine(bin_stream_vm(self.jitter.vm),
                                            lines_wd=1)

        # Symbexec engine
        ## Prepare symbexec engines
        self.symb = self.SYMB_ENGINE(self.jitter.cpu, self.jitter.vm,
                                     self.ir_arch, {})
        self.symb.enable_emulated_simplifications()
        self.symb_concrete = EmulatedSymbExec(self.jitter.cpu, self.jitter.vm,
                                              self.ir_arch, {})

        ## Update registers value
        self.symb.symbols[self.ir_arch.IRDst] = ExprInt(getattr(self.jitter.cpu,
                                                                self.ir_arch.pc.name),
                                                        self.ir_arch.IRDst.size)

        # Avoid memory write
        self.symb.func_write = None

        # Activate callback on each instr
        self.jitter.jit.set_options(max_exec_per_call=1, jit_maxline=1)
        self.jitter.exec_cb = self.callback

    def attach(self, emulator):
        """Attach the DSE to @emulator
        @emulator: jitload (or API equivalent) instance"""
        self.jitter = emulator
        self.prepare()

    def handle(self, cur_addr):
        """Handle destination
        @cur_addr: Expr of the next address in concrete execution
        /!\ cur_addr may be a lbl_gen

        In this method, self.symb is in the "just before branching" state
        """
        pass

    def add_handler(self, addr, callback):
        """Add a @callback for address @addr before any state update.
        The state IS NOT updated after returning from the callback
        @addr: int
        @callback: func(dse instance)"""
        self.handler[addr] = callback

    def add_lib_handler(self, libimp, namespace):
        """Add search for handler based on a @libimp libimp instance

        Known functions will be looked by {name}_symb in the @namespace
        """

        # lambda cannot contain statement
        def default_func(dse):
            fname = "%s_symb" % libimp.fad2cname[dse.jitter.pc]
            raise RuntimeError("Symbolic stub '%s' not found" % fname)

        for addr, fname in libimp.fad2cname.iteritems():
            fname = "%s_symb" % fname
            func = namespace.get(fname, None)
            if func is not None:
                self.add_handler(addr, func)
            else:
                self.add_handler(addr, default_func)

    def add_instrumentation(self, addr, callback):
        """Add a @callback for address @addr before any state update.
        The state IS updated after returning from the callback
        @addr: int
        @callback: func(dse instance)"""
        self.instrumentation[addr] = callback

    def _check_state(self):
        """Check the current state against the concrete one"""
        errors = [] # List of DriftInfo

        for symbol in self.symb.modified_expr:
            # Do not consider PC
            if symbol in [self.ir_arch.pc, self.ir_arch.IRDst]:
                continue

            # Consider only concrete values
            symb_value = self.eval_expr(symbol)
            if not symb_value.is_int():
                continue
            symb_value = int(symb_value)

            # Check computed values against real ones
            if symbol.is_id():
                if hasattr(self.jitter.cpu, symbol.name):
                    value = getattr(self.jitter.cpu, symbol.name)
                    if value != symb_value:
                        errors.append(DriftInfo(symbol, symb_value, value))
            elif symbol.is_mem() and symbol.arg.is_int():
                value_chr = self.jitter.vm.get_mem(int(symbol.arg),
                                                   symbol.size / 8)
                exp_value = int(value_chr[::-1].encode("hex"), 16)
                if exp_value != symb_value:
                    errors.append(DriftInfo(symbol, symb_value, exp_value))

        # Check for drift, and act accordingly
        if errors:
            raise DriftException(errors)

    def callback(self, _):
        """Called before each instruction"""
        # Assert synchronization with concrete execution
        self._check_state()

        # Call callbacks associated to the current address
        cur_addr = self.jitter.pc

        if cur_addr in self.handler:
            self.handler[cur_addr](self)
            return True

        if cur_addr in self.instrumentation:
            self.instrumentation[cur_addr](self)

        # Handle current address
        self.handle(ExprInt(cur_addr, self.ir_arch.IRDst.size))

        # Avoid memory issue in ExpressionSimplifier
        if len(self.symb.expr_simp.simplified_exprs) > 100000:
            self.symb.expr_simp.simplified_exprs.clear()

        # Get IR blocks
        if cur_addr in self.addr_to_cacheblocks:
            self.ir_arch.blocks.clear()
            self.ir_arch.blocks.update(self.addr_to_cacheblocks[cur_addr])
        else:

            ## Reset cache structures
            self.mdis.job_done.clear()
            self.ir_arch.blocks.clear()# = {}

            ## Update current state
            asm_block = self.mdis.dis_bloc(cur_addr)
            self.ir_arch.add_bloc(asm_block)
            self.addr_to_cacheblocks[cur_addr] = dict(self.ir_arch.blocks)

        # Emulate the current instruction
        self.symb.reset_modified()

        # Is the symbolic execution going (potentially) to jump on a lbl_gen?
        if len(self.ir_arch.blocks) == 1:
            next_addr = self.symb.emul_ir_blocks(cur_addr)
        else:
            # Emulation could stuck in generated IR blocks
            # But concrete execution callback is not enough precise to obtain
            # the full IR blocks path
            # -> Use a fully concrete execution to get back path

            # Update the concrete execution
            self._update_state_from_concrete_symb(self.symb_concrete)
            while True:
                next_addr_concrete = self.symb_concrete.emul_ir_block(cur_addr)
                self.symb.emul_ir_block(cur_addr)

                if not(expr_is_label(next_addr_concrete) and
                       next_addr_concrete.name.offset is None):
                    # Not a lbl_gen, exit
                    break

                # Call handle with lbl_gen state
                self.handle(next_addr_concrete)
                cur_addr = next_addr_concrete

        # At this stage, symbolic engine is one instruction after the concrete
        # engine

        return True

    def take_snapshot(self):
        """Return a snapshot of the current state (including jitter state)"""
        snapshot = {
            "mem": self.jitter.vm.get_all_memory(),
            "regs": self.jitter.cpu.get_gpreg(),
            "symb": self.symb.symbols.copy()
        }
        return snapshot

    def restore_snapshot(self, snapshot, memory=True):
        """Restore a @snapshot taken with .take_snapshot
        @snapshot: .take_snapshot output
        @memory: (optional) if set, also restore the memory
        """
        # Restore memory
        if memory:
            self.jitter.vm.reset_memory_page_pool()
            self.jitter.vm.reset_code_bloc_pool()
            for addr, metadata in snapshot["mem"].iteritems():
                self.jitter.vm.add_memory_page(addr,
                                               metadata["access"],
                                               metadata["data"])

        # Restore registers
        self.jitter.pc = snapshot["regs"][self.ir_arch.pc.name]
        self.jitter.cpu.set_gpreg(snapshot["regs"])

        # Reset intern elements
        self.jitter.vm.set_exception(0)
        self.jitter.cpu.set_exception(0)
        self.jitter.bs._atomic_mode = False

        # Reset symb exec
        for key, _ in self.symb.symbols.items():
            del self.symb.symbols[key]
        for expr, value in snapshot["symb"].items():
            self.symb.symbols[expr] = value

    def update_state(self, assignblk):
        """From this point, assume @assignblk in the symbolic execution
        @assignblk: AssignBlock/{dst -> src}
        """
        for dst, src in assignblk.iteritems():
            self.symb.apply_change(dst, src)

    def _update_state_from_concrete_symb(self, symbexec, cpu=True, mem=False):
        if mem:
            # Values will be retrieved from the concrete execution if they are
            # not present
            for symbol in symbexec.symbols.symbols_mem.copy():
                del symbexec.symbols[symbol]
        if cpu:
            regs = self.ir_arch.arch.regs.attrib_to_regs[self.ir_arch.attrib]
            for reg in regs:
                if hasattr(self.jitter.cpu, reg.name):
                    value = ExprInt(getattr(self.jitter.cpu, reg.name),
                                    size=reg.size)
                    symbexec.symbols[reg] = value

    def update_state_from_concrete(self, cpu=True, mem=False):
        """Update the symbolic state with concrete values from the concrete
        engine

        @cpu: (optional) if set, update registers' value
        @mem: (optional) if set, update memory value

        /!\ all current states will be loss.
        This function is usually called when states are no more synchronized
        (at the beginning, returning from an unstubbed syscall, ...)
        """
        self._update_state_from_concrete_symb(self.symb, cpu, mem)

    def eval_expr(self, expr):
        """Return the evaluation of @expr:
        @expr: Expr instance"""
        return self.symb.eval_expr(expr)

    @staticmethod
    def memory_to_expr(addr):
        """Translate an address to its corresponding symbolic ID (8bits)
        @addr: int"""
        return ExprId("MEM_0x%x" % int(addr), 8)

    def symbolize_memory(self, memory_range):
        """Register a range of memory addresses to symbolize
        @memory_range: object with support of __in__ operation (intervals, list,
        ...)
        """
        self.symb.dse_memory_range = memory_range
        self.symb.dse_memory_to_expr = self.memory_to_expr
Ejemplo n.º 2
0
class DSEEngine(object):
    """Dynamic Symbolic Execution Engine

    This class aims to be overrided for each specific purpose
    """
    SYMB_ENGINE = ESETrackModif

    def __init__(self, machine):
        self.machine = machine
        self.handler = {}  # addr -> callback(DSEEngine instance)
        self.instrumentation = {}  # addr -> callback(DSEEngine instance)
        self.addr_to_cacheblocks = {}  # addr -> {label -> IRBlock}
        self.ir_arch = self.machine.ir()  # corresponding IR

        # Defined after attachment
        self.jitter = None  # Jitload (concrete execution)
        self.symb = None  # SymbolicExecutionEngine
        self.symb_concrete = None  # Concrete SymbExec for path desambiguisation
        self.mdis = None  # DisasmEngine

    def prepare(self):
        """Prepare the environment for attachment with a jitter"""
        # Disassembler
        self.mdis = self.machine.dis_engine(bin_stream_vm(self.jitter.vm),
                                            lines_wd=1)

        # Symbexec engine
        ## Prepare symbexec engines
        self.symb = self.SYMB_ENGINE(self.jitter.cpu, self.jitter.vm,
                                     self.ir_arch, {})
        self.symb.enable_emulated_simplifications()
        self.symb_concrete = EmulatedSymbExec(self.jitter.cpu, self.jitter.vm,
                                              self.ir_arch, {})

        ## Update registers value
        self.symb.symbols[self.ir_arch.IRDst] = ExprInt(
            getattr(self.jitter.cpu, self.ir_arch.pc.name),
            self.ir_arch.IRDst.size)

        # Avoid memory write
        self.symb.func_write = None

        # Activate callback on each instr
        self.jitter.jit.set_options(max_exec_per_call=1, jit_maxline=1)
        self.jitter.exec_cb = self.callback

        # Clean jit cache to avoid multi-line basic blocks already jitted
        self.jitter.jit.lbl2jitbloc.clear()

    def attach(self, emulator):
        """Attach the DSE to @emulator
        @emulator: jitload (or API equivalent) instance"""
        self.jitter = emulator
        self.prepare()

    def handle(self, cur_addr):
        """Handle destination
        @cur_addr: Expr of the next address in concrete execution
        /!\ cur_addr may be a lbl_gen

        In this method, self.symb is in the "just before branching" state
        """
        pass

    def add_handler(self, addr, callback):
        """Add a @callback for address @addr before any state update.
        The state IS NOT updated after returning from the callback
        @addr: int
        @callback: func(dse instance)"""
        self.handler[addr] = callback

    def add_lib_handler(self, libimp, namespace):
        """Add search for handler based on a @libimp libimp instance

        Known functions will be looked by {name}_symb in the @namespace
        """

        # lambda cannot contain statement
        def default_func(dse):
            fname = "%s_symb" % libimp.fad2cname[dse.jitter.pc]
            raise RuntimeError("Symbolic stub '%s' not found" % fname)

        for addr, fname in libimp.fad2cname.iteritems():
            fname = "%s_symb" % fname
            func = namespace.get(fname, None)
            if func is not None:
                self.add_handler(addr, func)
            else:
                self.add_handler(addr, default_func)

    def add_instrumentation(self, addr, callback):
        """Add a @callback for address @addr before any state update.
        The state IS updated after returning from the callback
        @addr: int
        @callback: func(dse instance)"""
        self.instrumentation[addr] = callback

    def _check_state(self):
        """Check the current state against the concrete one"""
        errors = []  # List of DriftInfo

        for symbol in self.symb.modified_expr:
            # Do not consider PC
            if symbol in [self.ir_arch.pc, self.ir_arch.IRDst]:
                continue

            # Consider only concrete values
            symb_value = self.eval_expr(symbol)
            if not symb_value.is_int():
                continue
            symb_value = int(symb_value)

            # Check computed values against real ones
            if symbol.is_id():
                if hasattr(self.jitter.cpu, symbol.name):
                    value = getattr(self.jitter.cpu, symbol.name)
                    if value != symb_value:
                        errors.append(DriftInfo(symbol, symb_value, value))
            elif symbol.is_mem() and symbol.arg.is_int():
                value_chr = self.jitter.vm.get_mem(int(symbol.arg),
                                                   symbol.size / 8)
                exp_value = int(value_chr[::-1].encode("hex"), 16)
                if exp_value != symb_value:
                    errors.append(DriftInfo(symbol, symb_value, exp_value))

        # Check for drift, and act accordingly
        if errors:
            raise DriftException(errors)

    def callback(self, _):
        """Called before each instruction"""
        # Assert synchronization with concrete execution
        self._check_state()

        # Call callbacks associated to the current address
        cur_addr = self.jitter.pc

        if cur_addr in self.handler:
            self.handler[cur_addr](self)
            return True

        if cur_addr in self.instrumentation:
            self.instrumentation[cur_addr](self)

        # Handle current address
        self.handle(ExprInt(cur_addr, self.ir_arch.IRDst.size))

        # Avoid memory issue in ExpressionSimplifier
        if len(self.symb.expr_simp.simplified_exprs) > 100000:
            self.symb.expr_simp.simplified_exprs.clear()

        # Get IR blocks
        if cur_addr in self.addr_to_cacheblocks:
            self.ir_arch.blocks.clear()
            self.ir_arch.blocks.update(self.addr_to_cacheblocks[cur_addr])
        else:

            ## Reset cache structures
            self.ir_arch.blocks.clear()  # = {}

            ## Update current state
            asm_block = self.mdis.dis_block(cur_addr)
            self.ir_arch.add_block(asm_block)
            self.addr_to_cacheblocks[cur_addr] = dict(self.ir_arch.blocks)

        # Emulate the current instruction
        self.symb.reset_modified()

        # Is the symbolic execution going (potentially) to jump on a lbl_gen?
        if len(self.ir_arch.blocks) == 1:
            next_addr = self.symb.emul_ir_blocks(cur_addr)
        else:
            # Emulation could stuck in generated IR blocks
            # But concrete execution callback is not enough precise to obtain
            # the full IR blocks path
            # -> Use a fully concrete execution to get back path

            # Update the concrete execution
            self._update_state_from_concrete_symb(self.symb_concrete)
            while True:
                next_addr_concrete = self.symb_concrete.emul_ir_block(cur_addr)
                self.symb.emul_ir_block(cur_addr)

                if not (expr_is_label(next_addr_concrete)
                        and next_addr_concrete.name.offset is None):
                    # Not a lbl_gen, exit
                    break

                # Call handle with lbl_gen state
                self.handle(next_addr_concrete)
                cur_addr = next_addr_concrete

        # At this stage, symbolic engine is one instruction after the concrete
        # engine

        return True

    def _get_gpregs(self):
        """Return a dict of regs: value from the jitter
        This version use the regs associated to the attrib (!= cpu.get_gpreg())
        """
        out = {}
        regs = self.ir_arch.arch.regs.attrib_to_regs[self.ir_arch.attrib]
        for reg in regs:
            if hasattr(self.jitter.cpu, reg.name):
                out[reg.name] = getattr(self.jitter.cpu, reg.name)
        return out

    def take_snapshot(self):
        """Return a snapshot of the current state (including jitter state)"""
        snapshot = {
            "mem": self.jitter.vm.get_all_memory(),
            "regs": self._get_gpregs(),
            "symb": self.symb.symbols.copy(),
        }
        return snapshot

    def restore_snapshot(self, snapshot, memory=True):
        """Restore a @snapshot taken with .take_snapshot
        @snapshot: .take_snapshot output
        @memory: (optional) if set, also restore the memory
        """
        # Restore memory
        if memory:
            self.jitter.vm.reset_memory_page_pool()
            self.jitter.vm.reset_code_bloc_pool()
            for addr, metadata in snapshot["mem"].iteritems():
                self.jitter.vm.add_memory_page(addr, metadata["access"],
                                               metadata["data"])

        # Restore registers
        self.jitter.pc = snapshot["regs"][self.ir_arch.pc.name]
        for reg, value in snapshot["regs"].iteritems():
            setattr(self.jitter.cpu, reg, value)

        # Reset intern elements
        self.jitter.vm.set_exception(0)
        self.jitter.cpu.set_exception(0)
        self.jitter.bs._atomic_mode = False

        # Reset symb exec
        for key, _ in self.symb.symbols.items():
            del self.symb.symbols[key]
        for expr, value in snapshot["symb"].items():
            self.symb.symbols[expr] = value

    def update_state(self, assignblk):
        """From this point, assume @assignblk in the symbolic execution
        @assignblk: AssignBlock/{dst -> src}
        """
        for dst, src in assignblk.iteritems():
            self.symb.apply_change(dst, src)

    def _update_state_from_concrete_symb(self, symbexec, cpu=True, mem=False):
        if mem:
            # Values will be retrieved from the concrete execution if they are
            # not present
            for symbol in symbexec.symbols.symbols_mem.copy():
                del symbexec.symbols[symbol]
        if cpu:
            regs = self.ir_arch.arch.regs.attrib_to_regs[self.ir_arch.attrib]
            for reg in regs:
                if hasattr(self.jitter.cpu, reg.name):
                    value = ExprInt(getattr(self.jitter.cpu, reg.name),
                                    size=reg.size)
                    symbexec.symbols[reg] = value

    def update_state_from_concrete(self, cpu=True, mem=False):
        """Update the symbolic state with concrete values from the concrete
        engine

        @cpu: (optional) if set, update registers' value
        @mem: (optional) if set, update memory value

        /!\ all current states will be loss.
        This function is usually called when states are no more synchronized
        (at the beginning, returning from an unstubbed syscall, ...)
        """
        self._update_state_from_concrete_symb(self.symb, cpu, mem)

    def eval_expr(self, expr):
        """Return the evaluation of @expr:
        @expr: Expr instance"""
        return self.symb.eval_expr(expr)

    @staticmethod
    def memory_to_expr(addr):
        """Translate an address to its corresponding symbolic ID (8bits)
        @addr: int"""
        return ExprId("MEM_0x%x" % int(addr), 8)

    def symbolize_memory(self, memory_range):
        """Register a range of memory addresses to symbolize
        @memory_range: object with support of __in__ operation (intervals, list,
        ...)
        """
        self.symb.dse_memory_range = memory_range
        self.symb.dse_memory_to_expr = self.memory_to_expr