Beispiel #1
0
def emul_symb(ir_arch, ircfg, mdis, states_todo, states_done):
    while states_todo:
        addr, symbols, conds = states_todo.pop()
        print('*' * 40, "addr", addr, '*' * 40)
        if (addr, symbols, conds) in states_done:
            print('Known state, skipping', addr)
            continue
        states_done.add((addr, symbols, conds))
        symbexec = SymbolicExecutionEngine(ir_arch)
        symbexec.symbols = symbols.copy()
        if ir_arch.pc in symbexec.symbols:
            del symbexec.symbols[ir_arch.pc]
        irblock = get_block(ir_arch, ircfg, mdis, addr)

        print('Run block:')
        print(irblock)
        addr = symbexec.eval_updt_irblock(irblock)
        print('Final state:')
        symbexec.dump(mems=False)

        assert addr is not None
        if isinstance(addr, ExprCond):
            # Create 2 states, each including complementary conditions
            cond_group_a = {addr.cond: ExprInt(0, addr.cond.size)}
            cond_group_b = {addr.cond: ExprInt(1, addr.cond.size)}
            addr_a = expr_simp(
                symbexec.eval_expr(addr.replace_expr(cond_group_a), {}))
            addr_b = expr_simp(
                symbexec.eval_expr(addr.replace_expr(cond_group_b), {}))
            if not (addr_a.is_int() or addr_a.is_loc() and addr_b.is_int()
                    or addr_b.is_loc()):
                print(str(addr_a), str(addr_b))
                raise ValueError("Unsupported condition")
            if isinstance(addr_a, ExprInt):
                addr_a = int(addr_a.arg)
            if isinstance(addr_b, ExprInt):
                addr_b = int(addr_b.arg)
            states_todo.add(
                (addr_a, symbexec.symbols.copy(),
                 tuple(list(conds) + list(viewitems(cond_group_a)))))
            states_todo.add(
                (addr_b, symbexec.symbols.copy(),
                 tuple(list(conds) + list(viewitems(cond_group_b)))))
        elif addr == ret_addr:
            print('Return address reached')
            continue
        elif addr.is_int():
            addr = int(addr.arg)
            states_todo.add((addr, symbexec.symbols.copy(), tuple(conds)))
        elif addr.is_loc():
            states_todo.add((addr, symbexec.symbols.copy(), tuple(conds)))
        else:
            raise ValueError("Unsupported destination")
Beispiel #2
0
def intra_block_flow_symb(ir_arch, _, flow_graph, irblock, in_nodes,
                          out_nodes):
    symbols_init = ir_arch.arch.regs.regs_init.copy()
    sb = SymbolicExecutionEngine(ir_arch, symbols_init)
    sb.eval_updt_irblock(irblock)
    print('*' * 40)
    print(irblock)

    out = sb.modified(mems=False)
    current_nodes = {}
    # Gen mem arg to mem node links
    for dst, src in out:
        src = sb.eval_expr(dst)
        for n in [dst, src]:

            all_mems = set()
            all_mems.update(get_expr_mem(n))

        for n in all_mems:
            node_n_w = get_node_name(irblock.loc_key, 0, n)
            if not n == src:
                continue
            o_r = n.ptr.get_r(mem_read=False, cst_read=True)
            for i, n_r in enumerate(o_r):
                if n_r in current_nodes:
                    node_n_r = current_nodes[n_r]
                else:
                    node_n_r = get_node_name(irblock.loc_key, i, n_r)
                if not n_r in in_nodes:
                    in_nodes[n_r] = node_n_r
                flow_graph.add_uniq_edge(node_n_r, node_n_w)

    # Gen data flow links
    for dst in out:
        src = sb.eval_expr(dst)
        nodes_r = src.get_r(mem_read=False, cst_read=True)
        nodes_w = set([dst])
        for n_r in nodes_r:
            if n_r in current_nodes:
                node_n_r = current_nodes[n_r]
            else:
                node_n_r = get_node_name(irblock.loc_key, 0, n_r)
            if not n_r in in_nodes:
                in_nodes[n_r] = node_n_r

            flow_graph.add_node(node_n_r)
            for n_w in nodes_w:
                node_n_w = get_node_name(irblock.loc_key, 1, n_w)
                out_nodes[n_w] = node_n_w

                flow_graph.add_node(node_n_w)
                flow_graph.add_uniq_edge(node_n_r, node_n_w)
Beispiel #3
0
def exec_instruction(mn_str, init_values, results, index=0, offset=0):
    """Symbolically execute an instruction and check the expected results."""

    # Assemble and disassemble the instruction
    instr = mn_mep.fromstring(mn_str, "b")
    instr.mode = "b"
    mn_bin = mn_mep.asm(instr)[index]
    try:
        instr = mn_mep.dis(mn_bin, "b")
    except Disasm_Exception:
        assert (False)  # miasm don't know what to do

    # Specify the instruction offset and compute the destination label
    instr.offset = offset
    loc_db = LocationDB()
    if instr.dstflow():
        instr.dstflow2label(loc_db)

    # Get the IR
    im = Lifter_MEPb(loc_db)
    iir, eiir = im.get_ir(instr)

    # Filter out IRDst
    iir = [
        ir for ir in iir
        if not (isinstance(ir, ExprAssign) and isinstance(ir.dst, ExprId)
                and ir.dst.name == "IRDst")
    ]

    # Prepare symbolic execution
    sb = SymbolicExecutionEngine(LifterModelCallMepb(loc_db), regs_init)

    # Assign int values before symbolic evaluation
    for expr_id, expr_value in init_values:
        sb.symbols[expr_id] = expr_value

    # Execute the IR
    ab = AssignBlock(iir)
    sb.eval_updt_assignblk(ab)

    # Check if expected expr_id were modified
    matched_results = 0
    for expr_id, expr_value in results:

        result = sb.eval_expr(expr_id)
        if isinstance(result, ExprLoc):
            addr = loc_db.get_location_offset(result.loc_key)
            if expr_value.arg == addr:
                matched_results += 1
                continue
        elif result == expr_value:
            matched_results += 1
            continue

    # Ensure that all expected results were verified
    if len(results) is not matched_results:
        print("Expected:", results)
        print("Modified:", [r for r in sb.modified(mems=False)])
        assert (False)
Beispiel #4
0
def exec_instruction(mn_str, init_values, results, index=0, offset=0):
    """Symbolically execute an instruction and check the expected results."""

    # Assemble and disassemble the instruction
    instr = mn_mep.fromstring(mn_str, "b")
    instr.mode = "b"
    mn_bin = mn_mep.asm(instr)[index]
    try:
        instr = mn_mep.dis(mn_bin, "b")
    except Disasm_Exception:
        assert(False)  # miasm don't know what to do

    # Specify the instruction offset and compute the destination label
    instr.offset = offset
    loc_db = LocationDB()
    if instr.dstflow():
        instr.dstflow2label(loc_db)

    # Get the IR
    im = ir_mepb(loc_db)
    iir, eiir = im.get_ir(instr)

    # Filter out IRDst
    iir = [ir for ir in iir if not (isinstance(ir, ExprAssign) and
                                    isinstance(ir.dst, ExprId) and
                                    ir.dst.name == "IRDst")]

    # Prepare symbolic execution
    sb = SymbolicExecutionEngine(ir_a_mepb(loc_db), regs_init)

    # Assign int values before symbolic evaluation
    for expr_id, expr_value in init_values:
        sb.symbols[expr_id] = expr_value

    # Execute the IR
    ab = AssignBlock(iir)
    sb.eval_updt_assignblk(ab)

    # Check if expected expr_id were modified
    matched_results = 0
    for expr_id, expr_value in results:

        result = sb.eval_expr(expr_id)
        if isinstance(result, ExprLoc):
            addr = loc_db.get_location_offset(result.loc_key)
            if expr_value.arg == addr:
                matched_results += 1
                continue
        elif result == expr_value:
            matched_results += 1
            continue

    # Ensure that all expected results were verified
    if len(results) is not matched_results:
        print("Expected:", results)
        print("Modified:", [r for r in sb.modified(mems=False)])
        assert(False)
Beispiel #5
0
    def emul(self, lifter, ctx=None, step=False):
        # Init
        ctx_init = {}
        if ctx is not None:
            ctx_init.update(ctx)
        solver = z3.Solver()
        symb_exec = SymbolicExecutionEngine(lifter, ctx_init)
        history = self.history[::-1]
        history_size = len(history)
        translator = Translator.to_language("z3")
        size = self._ircfg.IRDst.size

        for hist_nb, loc_key in enumerate(history, 1):
            if hist_nb == history_size and loc_key == self.initial_state.loc_key:
                line_nb = self.initial_state.line_nb
            else:
                line_nb = None
            irb = self.irblock_slice(self._ircfg.blocks[loc_key], line_nb)

            # Emul the block and get back destination
            dst = symb_exec.eval_updt_irblock(irb, step=step)

            # Add constraint
            if hist_nb < history_size:
                next_loc_key = history[hist_nb]
                expected = symb_exec.eval_expr(ExprLoc(next_loc_key, size))
                solver.add(
                    self._gen_path_constraints(translator, dst, expected))
        # Save the solver
        self._solver = solver

        # Return only inputs values (others could be wrongs)
        return {
            element: symb_exec.eval_expr(element)
            for element in self.inputs
        }
Beispiel #6
0
def emul_symb(ir_arch, ircfg, mdis, states_todo, states_done):
    while states_todo:
        addr, symbols, conds = states_todo.pop()
        print('*' * 40, "addr", addr, '*' * 40)
        if (addr, symbols, conds) in states_done:
            print('Known state, skipping', addr)
            continue
        states_done.add((addr, symbols, conds))
        symbexec = SymbolicExecutionEngine(ir_arch)
        symbexec.symbols = symbols.copy()
        if ir_arch.pc in symbexec.symbols:
            del symbexec.symbols[ir_arch.pc]
        irblock = get_block(ir_arch, ircfg, mdis, addr)

        print('Run block:')
        print(irblock)
        addr = symbexec.eval_updt_irblock(irblock)
        print('Final state:')
        symbexec.dump(mems=False)

        assert addr is not None
        if isinstance(addr, ExprCond):
            # Create 2 states, each including complementary conditions
            cond_group_a = {addr.cond: ExprInt(0, addr.cond.size)}
            cond_group_b = {addr.cond: ExprInt(1, addr.cond.size)}
            addr_a = expr_simp(symbexec.eval_expr(addr.replace_expr(cond_group_a), {}))
            addr_b = expr_simp(symbexec.eval_expr(addr.replace_expr(cond_group_b), {}))
            if not (addr_a.is_int() or addr_a.is_loc() and
                    addr_b.is_int() or addr_b.is_loc()):
                print(str(addr_a), str(addr_b))
                raise ValueError("Unsupported condition")
            if isinstance(addr_a, ExprInt):
                addr_a = int(addr_a.arg)
            if isinstance(addr_b, ExprInt):
                addr_b = int(addr_b.arg)
            states_todo.add((addr_a, symbexec.symbols.copy(), tuple(list(conds) + list(viewitems(cond_group_a)))))
            states_todo.add((addr_b, symbexec.symbols.copy(), tuple(list(conds) + list(viewitems(cond_group_b)))))
        elif addr == ret_addr:
            print('Return address reached')
            continue
        elif addr.is_int():
            addr = int(addr.arg)
            states_todo.add((addr, symbexec.symbols.copy(), tuple(conds)))
        elif addr.is_loc():
            states_todo.add((addr, symbexec.symbols.copy(), tuple(conds)))
        else:
            raise ValueError("Unsupported destination")
Beispiel #7
0
    def _normalize_ircfg(self, conn):
        # unalias stack miasm.re/blog/2017/02/03/data_flow_analysis_depgraph.html , but involve base pointer too
        # TODO remove manual *BP propagation in normalize_ircfg and use standrad Miasm propagation when it is fixed
        # remove composes from bigger to smaller, they are not important for us
        bp = {}
        prev_offset = None
        for irb_loc_key in self.ircfg.walk_breadth_first_forward(LocKey(0)):
            irs = []
            if irb_loc_key not in self.ircfg.blocks:
                continue
            irb = self.ircfg.blocks[irb_loc_key]
            if irb.dst.is_cond() and irb.dst.cond.is_op() and irb.dst.cond.op == 'CC_EQ':
                # TODO propagate cmp ..., arb_int too
                # propagate known zeroes to process test    eax, eax; jnz ...; lea     edi, [eax+4]
                symb_exec = SymbolicExecutionEngine(self.ir_arch)
                dst = symb_exec.eval_updt_irblock(irb)
                if dst.is_cond() and dst.cond.is_id() and not is_bad_expr(dst.cond) and \
                        symb_exec.eval_expr(dst.cond) == dst.cond:
                    # add explicit mov ID, 0 to given irb
                    target_loc = dst.src2
                    if target_loc.is_int():
                        target_loc = self.asmcfg.loc_db.get_offset_location(int(target_loc))
                    elif target_loc.is_loc():
                        target_loc = target_loc.loc_key
                    else:
                        continue
                    if len(self.ircfg.predecessors(target_loc)) > 1:
                        continue
                    target_irb = self.ircfg.blocks[target_loc]
                    asign_blk = AssignBlock([ExprAssign(dst.cond, ExprInt(0, dst.cond.size))])
                    assignblks = tuple([asign_blk, *target_irb.assignblks])
                    new_irb = IRBlock(target_loc, assignblks)
                    self.ircfg.blocks[target_loc] = new_irb
            fix_dct = {}
            for assignblk in irb:
                offset = prev_offset
                if assignblk.instr and assignblk.instr.offset:
                    offset = assignblk.instr.offset
                prev_offset = offset
                spd = conn.modules.idc.get_spd(offset)
                if spd is not None:
                    stk_high = ExprInt(spd, self.ir_arch.sp.size)
                    fix_dct = {self.ir_arch.sp: self.mn.regs.regs_init[self.ir_arch.sp] + stk_high}
                    fix_dct.update(bp)
                else:
                    logger.warning("Couldn't acquire stack depth at 0x%x" % (offset or 0x0BADF00D))

                new_assignblk = {}
                for dst, src in assignblk.items():
                    if src.is_compose():
                        slc_arg = None
                        arg = None
                        for tmp_arg in src.args:
                            if not tmp_arg.is_slice():
                                arg = tmp_arg
                            else:
                                # we're interested only in bigger to smaller
                                slc_arg = tmp_arg
                        if slc_arg and arg and len(arg.get_r()) == 1:
                            top_to_bottom_visitor = ExprVisitorCallbackTopToBottom(
                                lambda x: self._resize_top_expr(x, src.size))
                            src = top_to_bottom_visitor.visit(arg)
                    if dst == src:
                        # special compiler anomalies such as lea     esp, [esp+0]
                        continue
                    if src == self.ir_arch.sp:
                        src = expr_simp(src.replace_expr(fix_dct))
                        if bp and src not in bp.values() and irb_loc_key != LocKey(0):
                            raise RuntimeError("Ambiguous base pointer")
                        bp.update({dst: src})
                        fix_dct.update(bp)
                    else:
                        src = expr_simp(src.replace_expr(fix_dct))
                        if dst != self.ir_arch.sp and dst not in bp.keys():
                            dst = dst.replace_expr(fix_dct)

                    dst, src = expr_simp(dst), expr_simp(src)
                    new_assignblk[dst] = src
                irs.append(AssignBlock(new_assignblk, instr=assignblk.instr))
            self.ircfg.blocks[irb.loc_key] = IRBlock(irb.loc_key, irs)
Beispiel #8
0
ircfg = lifter.new_ircfg()

first_block = list(asmcfg.blocks)[0]
lifter.add_asmblock_to_ircfg(first_block, ircfg)

# --- Symbolic execution --- #

from miasm.ir.symbexec import SymbolicExecutionEngine
from miasm.expression.expression import *

symb = SymbolicExecutionEngine(lifter, machine.mn.regs.regs_init)

# irDst contains the offset of next IR basic block to execute
irDst = symb.run_at(ircfg, entry_addr, step=False)
print("IR Dest = ", irDst)

# Provide symbolic context to irDst
expr_flag = ExprId("flag", 32)
result = symb.eval_expr(
    expr_simp(
        irDst.replace_expr(
            {
                expr_simp(
                    ExprMem(machine.mn.regs.EBP_init - ExprInt(0x4, 32), 32)):
                expr_flag,
            })))
print("IR Dest Semantics = ", result)

# Dump the final state of symbolic execution
# symb.dump()