Example #1
0
    def emul(self, lifter, ctx=None, step=False):
        """Symbolic execution of relevant nodes according to the history
        Return the values of inputs nodes' elements
        @lifter: Lifter instance
        @ctx: (optional) Initial context as dictionary
        @step: (optional) Verbose execution
        Warning: The emulation is not sound if the inputs nodes depend on loop
        variant.
        """
        # Init
        ctx_init = {}
        if ctx is not None:
            ctx_init.update(ctx)
        assignblks = []

        # Build a single assignment block according to history
        last_index = len(self.relevant_loc_keys)
        for index, loc_key in enumerate(reversed(self.relevant_loc_keys), 1):
            if index == last_index and loc_key == self.initial_state.loc_key:
                line_nb = self.initial_state.line_nb
            else:
                line_nb = None
            assignblks += self.irblock_slice(self._ircfg.blocks[loc_key],
                                             line_nb).assignblks

        # Eval the block
        loc_db = lifter.loc_db
        temp_loc = loc_db.get_or_create_name_location("Temp")
        symb_exec = SymbolicExecutionEngine(lifter, ctx_init)
        symb_exec.eval_updt_irblock(IRBlock(loc_db, temp_loc, assignblks),
                                    step=step)

        # Return only inputs values (others could be wrongs)
        return {element: symb_exec.symbols[element] for element in self.inputs}
Example #2
0
def compute(asm, inputstate={}, debug=False):
    loc_db = LocationDB()
    sympool = dict(regs_init)
    sympool.update({k: ExprInt(v, k.size) for k, v in viewitems(inputstate)})
    ir_tmp = ir_arch(loc_db)
    ircfg = ir_tmp.new_ircfg()
    symexec = SymbolicExecutionEngine(ir_tmp, sympool)
    instr = mn.fromstring(asm, loc_db, "b")
    code = mn.asm(instr)[0]
    instr = mn.dis(code, "b")
    instr.offset = inputstate.get(PC, 0)
    lbl = ir_tmp.add_instr_to_ircfg(instr, ircfg)
    symexec.run_at(ircfg, lbl)
    if debug:
        for k, v in viewitems(symexec.symbols):
            if regs_init.get(k, None) != v:
                print(k, v)
    out = {}
    for k, v in viewitems(symexec.symbols):
        if k in EXCLUDE_REGS:
            continue
        elif regs_init.get(k, None) == v:
            continue
        elif isinstance(v, ExprInt):
            out[k] = int(v)
        else:
            out[k] = v
    return out
Example #3
0
def ExecuteSymbolicSingleStep(addr, state=INIT_REG):
    size = idc.ItemSize(addr)
    code = idc.GetManyBytes(addr, size)
    loc_db = LocationDB()

    base = addr
    try:
        ins = mn_x86.dis(bin_stream_str(code, base_address=base), 64, base)
    except:
        return state.copy()

    ira = machine.ira(loc_db)
    ircfg = ira.new_ircfg()
    try:
        ira.add_instr_to_ircfg(ins, ircfg)
        sb = SymbolicExecutionEngine(ira, state)
        symbolic_pc = sb.run_at(ircfg, base)
    except:
        return state.copy()
    ret = state.copy()
    for key, value in sb.modified():
        if isinstance(value, ExprOp) and value.op == "call_func_ret":
            value = ExprInt(0, 64)
        ret[key] = value
    return ret
Example #4
0
File: sem.py Project: cea-sec/miasm
def compute(asm, inputstate={}, debug=False):
    loc_db = LocationDB()
    sympool = dict(regs_init)
    sympool.update({k: ExprInt(v, k.size) for k, v in viewitems(inputstate)})
    ir_tmp = ir_arch(loc_db)
    ircfg = ir_tmp.new_ircfg()
    symexec = SymbolicExecutionEngine(ir_tmp, sympool)
    instr = mn.fromstring(asm, loc_db, "l")
    code = mn.asm(instr)[0]
    instr = mn.dis(code, "l")
    instr.offset = inputstate.get(PC, 0)
    lbl = ir_tmp.add_instr_to_ircfg(instr, ircfg)
    symexec.run_at(ircfg, lbl)
    if debug:
        for k, v in viewitems(symexec.symbols):
            if regs_init.get(k, None) != v:
                print(k, v)
    out = {}
    for k, v in viewitems(symexec.symbols):
        if k in EXCLUDE_REGS:
            continue
        elif regs_init.get(k, None) == v:
            continue
        elif isinstance(v, ExprInt):
            out[k] = int(v)
        else:
            out[k] = v
    return out
def sym(data, addr, status):
    cont = Container.from_string(data, loc_db=loc_db, addr=addr)
    mdis = machine.dis_engine(cont.bin_stream, loc_db=loc_db)
    asm_block = mdis.dis_block(addr)

    # Translate ASM -> IR
    ircfg = ira.new_ircfg()
    try:
        ira.add_asmblock_to_ircfg(asm_block, ircfg)
    except NotImplementedError:
        return None

    # Instantiate a Symbolic Execution engine with default value for registers
    regs_init = regs.regs_init
    sympool = copy.deepcopy(regs_init)
    sympool.update(status)
    symb = SymbolicExecutionEngine(ira, sympool)

    # Emulate one IR basic block
    ## Emulation of several basic blocks can be done through .emul_ir_blocks
    cur_addr = symb.run_at(ircfg, addr)
    IRDst = symb.symbols[ira.IRDst]

    expr = expr_simp_explicit(IRDst)
    #if isinstance(expr, ExprMem):
    #    expr = expr.ptr
    return expr
Example #6
0
    def _deobfuscate_cff_loops(self, source_block, symbols):
        """

        :param symbols: initial symbols of symbolic execution engine to be created
        :param source_block: head of the graph to be deobfuscated
        :return:
        """
        symb_exec = SymbolicExecutionEngine(self.ir_arch)
        flat_block = self.flat_loops.get_block(source_block.loc_key, symb_exec, None)
        # maps flattening blocks to their respective loc_keys
        new_head = LocKey(0)
        flat_block_to_loc_key = {flat_block: new_head}
        todo = [FlattenState(flat_block, symbols)]
        counter = {}
        while todo:
            state = todo.pop()
            block_loc_key = state.flat_block.block_loc_key
            self.relevant_nodes.add(block_loc_key)
            counter[block_loc_key] = counter.get(block_loc_key, 0) + 1
            logger.debug("Processing block at 0x%x as %s; in all affected: %d; loops_id: %s; the jtc_vars are:" %
                         (self.asmcfg.loc_db.get_location_offset(block_loc_key) or 0xBAD, str(block_loc_key),
                          block_loc_key in self.all_affected_lines,
                          self.flat_loops[block_loc_key].loc_key))
            if counter[block_loc_key] > 500:
                raise Exception("Couldn't deobfuscate cff loop, either fell into an infinite loop or processing very "
                                "big function")
            symb_exec.set_state(state.symbols)
            # evaluate all affected lines
            self._eval_updt_lines(symb_exec, block_loc_key)
            for flat_block in self._insert_flat_block(state.flat_block, symb_exec, flat_block_to_loc_key):
                todo.append(FlattenState(flat_block, symb_exec.get_state()))
        return new_head
Example #7
0
 def symbolicExecution(self, ira, ircfg, address, state):
     symbolicEngine = SymbolicExecutionEngine(ira, state)
     nextTarget = symbolicEngine.run_block_at(ircfg, address)
     while not isinstance(nextTarget, ExprCond) and not isinstance(
             nextTarget, ExprMem):
         nextTarget = symbolicEngine.run_block_at(ircfg,
                                                  nextTarget,
                                                  step=False)
     return nextTarget, symbolicEngine.symbols
Example #8
0
        def exec_instruction(hex_asm, init_values):
            """Symbolically execute an instruction"""

            print("Hex:", hex_asm)

            # Disassemble an instruction
            mn = mn_mep.dis(decode_hex(hex_asm), "b")
            print("Dis:", mn)

            loc_db = LocationDB()

            # Get the IR
            im = ir_mepb(loc_db)
            iir, eiir, = im.get_ir(mn)
            print("\nInternal representation:", iir)

            # Symbolic execution
            sb = SymbolicExecutionEngine(ir_a_mepb(loc_db), regs_init)

            # Assign register values before symbolic evaluation
            for reg_expr_id, reg_expr_value in init_values:
                sb.symbols[reg_expr_id] = reg_expr_value

            print("\nModified registers:", [reg for reg in sb.modified(mems=False)])
            print("Modified memories:", [mem for mem in sb.modified()])

            print("\nFinal registers:")
            sb.dump(mems=False)

            print("\nFinal mems:")
            sb.dump()
Example #9
0
File: sem.py Project: cea-sec/miasm
def symb_exec(lbl, ir_arch, ircfg, inputstate, debug):
    sympool = dict(regs_init)
    sympool.update(inputstate)
    symexec = SymbolicExecutionEngine(ir_arch, sympool)
    symexec.run_at(ircfg, lbl)
    if debug:
        for k, v in viewitems(symexec.symbols):
            if regs_init.get(k, None) != v:
                print(k, v)
    return {
        k: v for k, v in viewitems(symexec.symbols)
        if k not in EXCLUDE_REGS and regs_init.get(k, None) != v
    }
Example #10
0
def symb_exec(lbl, lifter, ircfg, inputstate, debug):
    sympool = dict(regs_init)
    sympool.update(inputstate)
    symexec = SymbolicExecutionEngine(lifter, sympool)
    symexec.run_at(ircfg, lbl)
    if debug:
        for k, v in viewitems(symexec.symbols):
            if regs_init.get(k, None) != v:
                print(k, v)
    return {
        k: v
        for k, v in viewitems(symexec.symbols)
        if k not in EXCLUDE_REGS and regs_init.get(k, None) != v
    }
Example #11
0
def get_assignblock_for_state(ircfg, ir_arch, symbols_init, state_register,
                              state):
    referenced_blocks = []

    for cfgnode in ircfg.nodes():
        irblock = ircfg.get_block(cfgnode)
        if not irblock:
            print('[!] Could not get IRBLOCK!')
            sys.exit()
        if len(irblock.assignblks) == 1:
            _next_addr = irblock.dst
        else:
            _symbolic_engine = SymbolicExecutionEngine(ir_arch, symbols_init)
            _next_addr = _symbolic_engine.run_block_at(
                ircfg, get_address(ircfg.loc_db, cfgnode))
            if _next_addr == None:
                continue
            _next_addr = expr_simp(_next_addr)

        if isinstance(_next_addr, ExprCond) and \
                isinstance(_next_addr.cond, ExprOp) and \
                _next_addr.cond.op == '==':
            args = _next_addr.cond

            while not isinstance(args.args[0], ExprId):
                if hasattr(args, 'args'):
                    args = args.args[0]

                    if not isinstance(args, ExprOp):
                        break

            if hasattr(args, 'args') and \
                    args.args[0] in (state_register, symbols_init[state_register]) and \
                    args.args[1] == state:

                block = ircfg.get_block(cfgnode)
                if hasattr(block.dst.cond,
                           'op') and block.dst.cond.op in ('CC_S>'):
                    dst = get_address(ircfg.loc_db, block.dst.src2.loc_key)
                    next_block = ircfg.get_block(dst)
                    dst = get_address(ircfg.loc_db,
                                      next_block.dst.src1.loc_key)
                else:
                    dst = get_address(ircfg.loc_db, block.dst.src1.loc_key)

                referenced_block = ircfg.get_block(dst)
                referenced_blocks.append(referenced_block)
    return referenced_blocks
Example #12
0
def exec_instruction(mn_str, init_values, results, index=0, offset=0):
    """Symbolically execute an instruction and check the expected results."""

    # Assemble and disassemble the instruction
    instr = mn_mep.fromstring(mn_str, "b")
    instr.mode = "b"
    mn_bin = mn_mep.asm(instr)[index]
    try:
        instr = mn_mep.dis(mn_bin, "b")
    except Disasm_Exception:
        assert (False)  # miasm don't know what to do

    # Specify the instruction offset and compute the destination label
    instr.offset = offset
    loc_db = LocationDB()
    if instr.dstflow():
        instr.dstflow2label(loc_db)

    # Get the IR
    im = Lifter_MEPb(loc_db)
    iir, eiir = im.get_ir(instr)

    # Filter out IRDst
    iir = [
        ir for ir in iir
        if not (isinstance(ir, ExprAssign) and isinstance(ir.dst, ExprId)
                and ir.dst.name == "IRDst")
    ]

    # Prepare symbolic execution
    sb = SymbolicExecutionEngine(LifterModelCallMepb(loc_db), regs_init)

    # Assign int values before symbolic evaluation
    for expr_id, expr_value in init_values:
        sb.symbols[expr_id] = expr_value

    # Execute the IR
    ab = AssignBlock(iir)
    sb.eval_updt_assignblk(ab)

    # Check if expected expr_id were modified
    matched_results = 0
    for expr_id, expr_value in results:

        result = sb.eval_expr(expr_id)
        if isinstance(result, ExprLoc):
            addr = loc_db.get_location_offset(result.loc_key)
            if expr_value.arg == addr:
                matched_results += 1
                continue
        elif result == expr_value:
            matched_results += 1
            continue

    # Ensure that all expected results were verified
    if len(results) is not matched_results:
        print("Expected:", results)
        print("Modified:", [r for r in sb.modified(mems=False)])
        assert (False)
Example #13
0
def emul_symb(ir_arch, ircfg, mdis, states_todo, states_done):
    while states_todo:
        addr, symbols, conds = states_todo.pop()
        print('*' * 40, "addr", addr, '*' * 40)
        if (addr, symbols, conds) in states_done:
            print('Known state, skipping', addr)
            continue
        states_done.add((addr, symbols, conds))
        symbexec = SymbolicExecutionEngine(ir_arch)
        symbexec.symbols = symbols.copy()
        if ir_arch.pc in symbexec.symbols:
            del symbexec.symbols[ir_arch.pc]
        irblock = get_block(ir_arch, ircfg, mdis, addr)

        print('Run block:')
        print(irblock)
        addr = symbexec.eval_updt_irblock(irblock)
        print('Final state:')
        symbexec.dump(mems=False)

        assert addr is not None
        if isinstance(addr, ExprCond):
            # Create 2 states, each including complementary conditions
            cond_group_a = {addr.cond: ExprInt(0, addr.cond.size)}
            cond_group_b = {addr.cond: ExprInt(1, addr.cond.size)}
            addr_a = expr_simp(
                symbexec.eval_expr(addr.replace_expr(cond_group_a), {}))
            addr_b = expr_simp(
                symbexec.eval_expr(addr.replace_expr(cond_group_b), {}))
            if not (addr_a.is_int() or addr_a.is_loc() and addr_b.is_int()
                    or addr_b.is_loc()):
                print(str(addr_a), str(addr_b))
                raise ValueError("Unsupported condition")
            if isinstance(addr_a, ExprInt):
                addr_a = int(addr_a.arg)
            if isinstance(addr_b, ExprInt):
                addr_b = int(addr_b.arg)
            states_todo.add(
                (addr_a, symbexec.symbols.copy(),
                 tuple(list(conds) + list(viewitems(cond_group_a)))))
            states_todo.add(
                (addr_b, symbexec.symbols.copy(),
                 tuple(list(conds) + list(viewitems(cond_group_b)))))
        elif addr == ret_addr:
            print('Return address reached')
            continue
        elif addr.is_int():
            addr = int(addr.arg)
            states_todo.add((addr, symbexec.symbols.copy(), tuple(conds)))
        elif addr.is_loc():
            states_todo.add((addr, symbexec.symbols.copy(), tuple(conds)))
        else:
            raise ValueError("Unsupported destination")
def main(file_path: Path, start_addr: int, oracle_path: Path) -> None:
    # symbol table
    loc_db = LocationDB()

    # open the binary for analysis
    container = Container.from_stream(open(file_path, 'rb'), loc_db)

    # cpu abstraction
    machine = Machine(container.arch)

    # init disassemble engine
    mdis = machine.dis_engine(container.bin_stream, loc_db=loc_db)

    # initialize intermediate representation
    lifter = machine.lifter_model_call(mdis.loc_db)

    # disassemble the function at address
    asm_block = mdis.dis_block(start_addr)

    # lift to Miasm IR
    ira_cfg = lifter.new_ircfg()
    lifter.add_asmblock_to_ircfg(asm_block, ira_cfg)

    # init symbolic execution engine
    sb = SymbolicExecutionEngine(lifter)

    # symbolically execute basic block
    sb.run_block_at(ira_cfg, start_addr)

    # initialize simplifier
    simplifier = Simplifier(oracle_path)

    for k, v in sb.modified():

        if v.is_int() or v.is_id() or v.is_loc():
            continue

        print(f"before: {v}")
        simplified = simplifier.simplify(v)
        print(f"simplified: {simplified}")
        print("\n\n")
Example #15
0
File: sem.py Project: cea-sec/miasm
def compute(asm, inputstate={}, debug=False):
    loc_db = LocationDB()
    sympool = dict(regs_init)
    sympool.update({k: ExprInt(v, k.size) for k, v in viewitems(inputstate)})
    ir_tmp = ir_arch(loc_db)
    ircfg = ir_tmp.new_ircfg()
    symexec = SymbolicExecutionEngine(ir_tmp, sympool)
    instr = mn.fromstring(asm, mode)
    code = mn.asm(instr)[0]
    instr = mn.dis(code, mode)
    instr.offset = inputstate.get(PC, 0)
    loc_key = ir_tmp.add_instr_to_ircfg(instr, ircfg)
    symexec.run_at(ircfg, loc_key)
    if debug:
        for k, v in viewitems(symexec.symbols):
            if regs_init.get(k, None) != v:
                print(k, v)
    return {
        k: v.arg.arg for k, v in viewitems(symexec.symbols)
        if k not in EXCLUDE_REGS and regs_init.get(k, None) != v
    }
Example #16
0
def symbolic_exec():
    from miasm.ir.symbexec import SymbolicExecutionEngine
    from miasm.core.bin_stream_ida import bin_stream_ida

    from utils import guess_machine

    start, end = idc.read_selection_start(), idc.read_selection_end()
    loc_db = LocationDB()

    bs = bin_stream_ida()
    machine = guess_machine(addr=start)

    mdis = machine.dis_engine(bs, loc_db=loc_db)

    if start == idc.BADADDR and end == idc.BADADDR:
        start = idc.get_screen_ea()
        end = idc.next_head(start)  # Get next instruction address

    mdis.dont_dis = [end]
    asmcfg = mdis.dis_multiblock(start)
    ira = machine.ira(loc_db=loc_db)
    ircfg = ira.new_ircfg_from_asmcfg(asmcfg)

    print("Run symbolic execution...")
    sb = SymbolicExecutionEngine(ira, machine.mn.regs.regs_init)
    sb.run_at(ircfg, start)
    modified = {}

    for dst, src in sb.modified(init_state=machine.mn.regs.regs_init):
        modified[dst] = src

    view = symbolicexec_t()
    all_views.append(view)
    if not view.Create(
            modified, machine, loc_db, "Symbolic Execution - 0x%x to 0x%x" %
        (start, idc.prev_head(end))):
        return

    view.Show()
Example #17
0
File: sem.py Project: zyc1314/miasm
def compute(asm, inputstate={}, debug=False):
    loc_db = LocationDB()
    sympool = dict(regs_init)
    sympool.update({k: ExprInt(v, k.size) for k, v in viewitems(inputstate)})
    ir_tmp = ir_arch(loc_db)
    ircfg = ir_tmp.new_ircfg()
    symexec = SymbolicExecutionEngine(ir_tmp, sympool)
    instr = mn.fromstring(asm, mode)
    code = mn.asm(instr)[0]
    instr = mn.dis(code, mode)
    instr.offset = inputstate.get(PC, 0)
    loc_key = ir_tmp.add_instr_to_ircfg(instr, ircfg)
    symexec.run_at(ircfg, loc_key)
    if debug:
        for k, v in viewitems(symexec.symbols):
            if regs_init.get(k, None) != v:
                print(k, v)
    return {
        k: v.arg.arg
        for k, v in viewitems(symexec.symbols)
        if k not in EXCLUDE_REGS and regs_init.get(k, None) != v
    }
Example #18
0
def symbolic_exec():
    from miasm.ir.symbexec import SymbolicExecutionEngine
    from miasm.core.bin_stream_ida import bin_stream_ida

    from utils import guess_machine

    start, end = idc.SelStart(), idc.SelEnd()

    bs = bin_stream_ida()
    machine = guess_machine(addr=start)

    mdis = machine.dis_engine(bs)

    if start == idc.BADADDR and end == idc.BADADDR:
        start = idc.ScreenEA()
        end = idc.next_head(start) # Get next instruction address

    mdis.dont_dis = [end]
    asmcfg = mdis.dis_multiblock(start)
    ira = machine.ira(loc_db=mdis.loc_db)
    ircfg = ira.new_ircfg_from_asmcfg(asmcfg)

    print("Run symbolic execution...")
    sb = SymbolicExecutionEngine(ira, machine.mn.regs.regs_init)
    sb.run_at(ircfg, start)
    modified = {}

    for dst, src in sb.modified(init_state=machine.mn.regs.regs_init):
        modified[dst] = src

    view = symbolicexec_t()
    all_views.append(view)
    if not view.Create(modified, machine, mdis.loc_db,
                       "Symbolic Execution - 0x%x to 0x%x"
                       % (start, idc.prev_head(end))):
        return

    view.Show()
Example #19
0
def exec_instruction(mn_str, init_values, results, index=0, offset=0):
    """Symbolically execute an instruction and check the expected results."""

    # Assemble and disassemble the instruction
    instr = mn_mep.fromstring(mn_str, "b")
    instr.mode = "b"
    mn_bin = mn_mep.asm(instr)[index]
    try:
        instr = mn_mep.dis(mn_bin, "b")
    except Disasm_Exception:
        assert(False)  # miasm don't know what to do

    # Specify the instruction offset and compute the destination label
    instr.offset = offset
    loc_db = LocationDB()
    if instr.dstflow():
        instr.dstflow2label(loc_db)

    # Get the IR
    im = ir_mepb(loc_db)
    iir, eiir = im.get_ir(instr)

    # Filter out IRDst
    iir = [ir for ir in iir if not (isinstance(ir, ExprAssign) and
                                    isinstance(ir.dst, ExprId) and
                                    ir.dst.name == "IRDst")]

    # Prepare symbolic execution
    sb = SymbolicExecutionEngine(ir_a_mepb(loc_db), regs_init)

    # Assign int values before symbolic evaluation
    for expr_id, expr_value in init_values:
        sb.symbols[expr_id] = expr_value

    # Execute the IR
    ab = AssignBlock(iir)
    sb.eval_updt_assignblk(ab)

    # Check if expected expr_id were modified
    matched_results = 0
    for expr_id, expr_value in results:

        result = sb.eval_expr(expr_id)
        if isinstance(result, ExprLoc):
            addr = loc_db.get_location_offset(result.loc_key)
            if expr_value.arg == addr:
                matched_results += 1
                continue
        elif result == expr_value:
            matched_results += 1
            continue

    # Ensure that all expected results were verified
    if len(results) is not matched_results:
        print("Expected:", results)
        print("Modified:", [r for r in sb.modified(mems=False)])
        assert(False)
Example #20
0
def intra_block_flow_symb(ir_arch, _, flow_graph, irblock, in_nodes,
                          out_nodes):
    symbols_init = ir_arch.arch.regs.regs_init.copy()
    sb = SymbolicExecutionEngine(ir_arch, symbols_init)
    sb.eval_updt_irblock(irblock)
    print('*' * 40)
    print(irblock)

    out = sb.modified(mems=False)
    current_nodes = {}
    # Gen mem arg to mem node links
    for dst, src in out:
        src = sb.eval_expr(dst)
        for n in [dst, src]:

            all_mems = set()
            all_mems.update(get_expr_mem(n))

        for n in all_mems:
            node_n_w = get_node_name(irblock.loc_key, 0, n)
            if not n == src:
                continue
            o_r = n.ptr.get_r(mem_read=False, cst_read=True)
            for i, n_r in enumerate(o_r):
                if n_r in current_nodes:
                    node_n_r = current_nodes[n_r]
                else:
                    node_n_r = get_node_name(irblock.loc_key, i, n_r)
                if not n_r in in_nodes:
                    in_nodes[n_r] = node_n_r
                flow_graph.add_uniq_edge(node_n_r, node_n_w)

    # Gen data flow links
    for dst in out:
        src = sb.eval_expr(dst)
        nodes_r = src.get_r(mem_read=False, cst_read=True)
        nodes_w = set([dst])
        for n_r in nodes_r:
            if n_r in current_nodes:
                node_n_r = current_nodes[n_r]
            else:
                node_n_r = get_node_name(irblock.loc_key, 0, n_r)
            if not n_r in in_nodes:
                in_nodes[n_r] = node_n_r

            flow_graph.add_node(node_n_r)
            for n_w in nodes_w:
                node_n_w = get_node_name(irblock.loc_key, 1, n_w)
                out_nodes[n_w] = node_n_w

                flow_graph.add_node(node_n_w)
                flow_graph.add_uniq_edge(node_n_r, node_n_w)
Example #21
0
    def emul(self, lifter, ctx=None, step=False):
        # Init
        ctx_init = {}
        if ctx is not None:
            ctx_init.update(ctx)
        solver = z3.Solver()
        symb_exec = SymbolicExecutionEngine(lifter, ctx_init)
        history = self.history[::-1]
        history_size = len(history)
        translator = Translator.to_language("z3")
        size = self._ircfg.IRDst.size

        for hist_nb, loc_key in enumerate(history, 1):
            if hist_nb == history_size and loc_key == self.initial_state.loc_key:
                line_nb = self.initial_state.line_nb
            else:
                line_nb = None
            irb = self.irblock_slice(self._ircfg.blocks[loc_key], line_nb)

            # Emul the block and get back destination
            dst = symb_exec.eval_updt_irblock(irb, step=step)

            # Add constraint
            if hist_nb < history_size:
                next_loc_key = history[hist_nb]
                expected = symb_exec.eval_expr(ExprLoc(next_loc_key, size))
                solver.add(
                    self._gen_path_constraints(translator, dst, expected))
        # Save the solver
        self._solver = solver

        # Return only inputs values (others could be wrongs)
        return {
            element: symb_exec.eval_expr(element)
            for element in self.inputs
        }
Example #22
0
    def do_step(self):
        if len(self.todo) == 0:
            return None
        if self.total_done > 600:
            print("symbexec watchdog!")
            return None
        self.total_done += 1
        print('CPT', self.total_done)
        while self.todo:
            state = self.get_next_state()
            parent, ad, s = state
            self.states_done.add(state)
            self.states_var_done.add(state)

            sb = SymbolicExecutionEngine(self.ir_arch, dict(s))

            return parent, ad, sb
        return None
Example #23
0
    def _recognize(self, max_loop_num):
        symb_engine = SymbolicExecutionEngine(self.ir_arch, regs.regs_init)
        todo = [(LocKey(0), symb_engine.get_state())]
        done_loc = set()
        if not max_loop_num:
            max_loop_num = float('inf')
        found_loops_num = 0
        while todo:
            loc_key, symb_state = todo.pop()
            if loc_key in done_loc or loc_key not in self.ircfg.blocks:
                continue
            done_loc.add(loc_key)
            ir_block = self.ircfg.blocks[loc_key]
            symb_engine.set_state(symb_state)
            for ind, assignblk in enumerate(ir_block.assignblks):
                for dst, src in assignblk.items():
                    if max_loop_num < found_loops_num:
                        return
                    if src.is_int() and int(src) in self.func_addresses:
                        assignblk_node = AssignblkNode(ir_block.loc_key, ind,
                                                       dst)
                        # no uses
                        if assignblk_node not in self.analyses.defuse_edges or not \
                                self.analyses.defuse_edges[assignblk_node]:
                            # possible virtual table initialization
                            self.possible_merge_funcs.add(
                                (int(src), frozenset(), loc_key))
                    elif src.is_op("call_func_stack"):
                        self._process_call(src, dst, symb_engine, assignblk,
                                           loc_key)
                    elif (expr_simp(src).is_int() and not is_bad_expr(dst)) \
                            or (ir_block.loc_key == LocKey(0) and dst == src and
                                (not self._merging_var_candidates or dst in self._merging_var_candidates)):
                        if self._process_assignment(ir_block, ind, dst):
                            self._merging_var_candidates = None
                            found_loops_num += 1
                symb_engine.eval_updt_assignblk(assignblk)

            for succ in self.ircfg.successors(loc_key):
                todo.append((succ, symb_engine.get_state()))
Example #24
0
def emul_symb(ir_arch, ircfg, mdis, states_todo, states_done):
    while states_todo:
        addr, symbols, conds = states_todo.pop()
        print('*' * 40, "addr", addr, '*' * 40)
        if (addr, symbols, conds) in states_done:
            print('Known state, skipping', addr)
            continue
        states_done.add((addr, symbols, conds))
        symbexec = SymbolicExecutionEngine(ir_arch)
        symbexec.symbols = symbols.copy()
        if ir_arch.pc in symbexec.symbols:
            del symbexec.symbols[ir_arch.pc]
        irblock = get_block(ir_arch, ircfg, mdis, addr)

        print('Run block:')
        print(irblock)
        addr = symbexec.eval_updt_irblock(irblock)
        print('Final state:')
        symbexec.dump(mems=False)

        assert addr is not None
        if isinstance(addr, ExprCond):
            # Create 2 states, each including complementary conditions
            cond_group_a = {addr.cond: ExprInt(0, addr.cond.size)}
            cond_group_b = {addr.cond: ExprInt(1, addr.cond.size)}
            addr_a = expr_simp(symbexec.eval_expr(addr.replace_expr(cond_group_a), {}))
            addr_b = expr_simp(symbexec.eval_expr(addr.replace_expr(cond_group_b), {}))
            if not (addr_a.is_int() or addr_a.is_loc() and
                    addr_b.is_int() or addr_b.is_loc()):
                print(str(addr_a), str(addr_b))
                raise ValueError("Unsupported condition")
            if isinstance(addr_a, ExprInt):
                addr_a = int(addr_a.arg)
            if isinstance(addr_b, ExprInt):
                addr_b = int(addr_b.arg)
            states_todo.add((addr_a, symbexec.symbols.copy(), tuple(list(conds) + list(viewitems(cond_group_a)))))
            states_todo.add((addr_b, symbexec.symbols.copy(), tuple(list(conds) + list(viewitems(cond_group_b)))))
        elif addr == ret_addr:
            print('Return address reached')
            continue
        elif addr.is_int():
            addr = int(addr.arg)
            states_todo.add((addr, symbexec.symbols.copy(), tuple(conds)))
        elif addr.is_loc():
            states_todo.add((addr, symbexec.symbols.copy(), tuple(conds)))
        else:
            raise ValueError("Unsupported destination")
Example #25
0
line = machine.mn.fromstring("MOV EAX, EBX", loc_db, 32)
asm = machine.mn.asm(line)[0]

# Get back block
cont = Container.from_string(asm, loc_db=loc_db)
mdis = machine.dis_engine(cont.bin_stream, loc_db=loc_db)
mdis.lines_wd = 1
asm_block = mdis.dis_block(START_ADDR)

# Translate ASM -> IR
ira = machine.ira(mdis.loc_db)
ircfg = ira.new_ircfg()
ira.add_asmblock_to_ircfg(asm_block, ircfg)

# Instantiate a Symbolic Execution engine with default value for registers
symb = SymbolicExecutionEngine(ira)

# Emulate one IR basic block
## Emulation of several basic blocks can be done through .emul_ir_blocks
cur_addr = symb.run_at(ircfg, START_ADDR)

# Modified elements
print('Modified registers:')
symb.dump(mems=False)
print('Modified memory (should be empty):')
symb.dump(ids=False)

# Check final status
eax, ebx = ira.arch.regs.EAX, ira.arch.regs.EBX
assert symb.symbols[eax] == ebx
assert eax in symb.symbols
#!/usr/bin/python3
from miasm.analysis.binary import Container
from miasm.analysis.machine import Machine
from miasm.core.locationdb import LocationDB
from miasm.ir.symbexec import SymbolicExecutionEngine

start_addr = 0x402300
loc_db = LocationDB()
target_file = open("hello_world.exe", 'rb')
container = Container.from_stream(target_file, loc_db)

machine = Machine(container.arch)
mdis = machine.dis_engine(container.bin_stream, loc_db=loc_db)
ira = machine.ira(mdis.loc_db)
asm_cfg = mdis.dis_multiblock(start_addr)
ira_cfg = ira.new_ircfg_from_asmcfg(asm_cfg)
symbex = SymbolicExecutionEngine(ira)
symbex_state = symbex.run_block_at(ira_cfg, start_addr)
print(symbex_state)
Example #27
0
def resolve_offsets(state_register, asmcfg, ircfg, ir_arch):
    patches = set()
    nodes_to_walk = list(ircfg.nodes())

    symbols_init = dict()
    for i, r in enumerate(all_regs_ids):
        symbols_init[r] = all_regs_ids_init[i]

    expr_simp.enable_passes({ExprOp: [ignore_call_results]})

    for node in nodes_to_walk:
        irblock = ircfg.get_block(node)
        if not irblock:
            print('[-] Could not get IRBLOCK!')
            sys.exit()

        if len(irblock.assignblks) == 1:
            if irblock.assignblks[
                    0].instr.name == "CMOVNZ" and irblock.assignblks[
                        0].instr.args[0] == state_register:
                temp_reg1 = irblock.assignblks[0].instr.args[0]
                temp_reg2 = irblock.assignblks[0].instr.args[1]

                state1 = None
                state2 = None

                previous_block = ircfg.get_block(ircfg.predecessors(node)[0])
                for line in previous_block.assignblks:
                    if line.instr.name == 'MOV' and \
                            line.instr.args[0] in (temp_reg1, temp_reg2) and isinstance(line.instr.args[1], ExprInt):
                        if line.instr.args[0] == state_register:
                            state1 = line.instr.args[1]
                        else:
                            state2 = line.instr.args[1]
                    if state1 and state2:
                        break

                # compiler shenanigans. state missing is not initialised in current bblk. search function for it
                if not state1:
                    state1 = scan_function_for_state(asmcfg, state_register,
                                                     temp_reg1)
                elif not state2:
                    state2 = scan_function_for_state(asmcfg, state_register,
                                                     temp_reg2)

                blocks1 = get_assignblock_for_state(ircfg, ir_arch,
                                                    symbols_init,
                                                    state_register, state2)
                blocks2 = get_assignblock_for_state(ircfg, ir_arch,
                                                    symbols_init,
                                                    state_register, state1)

                dst1 = get_address(ircfg.loc_db, blocks1[0].loc_key)
                src1 = irblock.assignblks[0].instr.offset
                patches.add((src1, dst1, CNDP1))

                dst2 = get_address(ircfg.loc_db, blocks2[0].loc_key)
                src2 = src1
                patches.add((src2, dst2, CNDP0))

            elif irblock.assignblks[0].instr.name == "CMOVZ":
                state1 = None
                state2 = None

                temp_reg1 = irblock.assignblks[0].instr.args[0]
                temp_reg2 = irblock.assignblks[0].instr.args[1]

                if temp_reg1 == state_register:
                    previous_block = ircfg.get_block(
                        ircfg.predecessors(node)[0])

                    for line in previous_block.assignblks:
                        if line.instr.name == 'MOV' and \
                                line.instr.args[0] in (temp_reg1, temp_reg2):
                            if line.instr.args[0] == state_register:
                                state1 = line.instr.args[1]
                            else:
                                state2 = line.instr.args[1]

                    if state1 and state2:
                        blocks1 = get_assignblock_for_state(
                            ircfg, ir_arch, symbols_init, state_register,
                            state1)
                        blocks2 = get_assignblock_for_state(
                            ircfg, ir_arch, symbols_init, state_register,
                            state2)

                        dst1 = get_address(ircfg.loc_db, blocks1[0].loc_key)
                        src1 = irblock.assignblks[0].instr.offset
                        patches.add((src1, dst1, CNDP1))

                        dst2 = get_address(ircfg.loc_db, blocks2[0].loc_key)
                        src2 = src1
                        patches.add((src2, dst2, CNDP0))

                    else:
                        found_state = state1 if state1 else state2
                        missing_state = state1 if not state1 else state2
                        subject_reg = temp_reg1 if not state1 else temp_reg2

                        def get_imm_write_for_reg(asmcfg, subject_reg):
                            for node in asmcfg.nodes():
                                asmblock = asmcfg.loc_key_to_block(node)
                                for line in asmblock.lines:
                                    if line.name == 'MOV' and line.args[0] == subject_reg and \
                                            isinstance(line.args[1], ExprInt):
                                        return line.args[1]
                            return None

                        missing_state = get_imm_write_for_reg(
                            asmcfg, subject_reg)
                        if not missing_state:
                            print(
                                "[-] Something went wrong. could not find mising state!"
                            )
                            continue

                        state1 = state1 if state1 == found_state else missing_state
                        state2 = missing_state if state1 == found_state else state2

                        blocks1 = get_assignblock_for_state(
                            ircfg, ir_arch, symbols_init, state_register,
                            state1)
                        blocks2 = get_assignblock_for_state(
                            ircfg, ir_arch, symbols_init, state_register,
                            state2)

                        dst1 = get_address(ircfg.loc_db, blocks1[0].loc_key)
                        src1 = irblock.assignblks[0].instr.offset
                        patches.add((src1, dst1, CNDP1))

                        dst2 = get_address(ircfg.loc_db, blocks2[0].loc_key)
                        src2 = src1
                        patches.add((src2, dst2, CNDP0))

                else:
                    next_block = ircfg.get_block(ircfg.successors(node)[0])
                    for line in next_block.assignblks:
                        if line.instr.name == 'MOV' and line.instr.args[
                                0] == state_register:
                            state1 = line.instr.args[1]
                            break

                    if state1:
                        blocks1 = get_assignblock_for_state(
                            ircfg, ir_arch, symbols_init, state_register,
                            state1)
                        src = None
                        for assignblk in next_block.assignblks:
                            if assignblk.instr.name == 'JMP':
                                src = assignblk.instr.offset

                        dst_block = ircfg.get_block(blocks1[0].loc_key)
                        if isinstance(dst_block.dst, ExprCond) and len(
                                dst_block.assignblks):
                            if hasattr(dst_block.dst.cond,
                                       'op') and dst_block.dst.cond.op in (
                                           'CC_S>'):
                                dst = get_address(ircfg.loc_db,
                                                  dst_block.dst.src2.loc_key)
                                next_block = ircfg.get_block(dst)
                                dst = get_address(ircfg.loc_db,
                                                  next_block.dst.src1.loc_key)
                            else:
                                dst = get_address(ircfg.loc_db,
                                                  dst_block.dst.src1.loc_key)
                        else:
                            dst = get_address(ircfg.loc_db, blocks1[0].loc_key)

                        patches.add((src, dst, STDP))

        else:
            symbolic_engine = SymbolicExecutionEngine(ir_arch, symbols_init)
            next_addr = symbolic_engine.run_block_at(
                ircfg, get_address(ircfg.loc_db, node))
            next_addr = expr_simp(next_addr)

            updated_state = symbolic_engine.symbols[state_register]

            if isinstance(updated_state, ExprOp):
                updated_state = expr_simp(updated_state)

            if updated_state != symbols_init[state_register] and \
                isinstance(updated_state, ExprOp):

                irblock = ircfg.get_block(node)
                if not irblock:
                    print('[-] Could not get IRBLOCK!')
                    sys.exit()

                if len(irblock.assignblks) > 3:
                    neg_inst = False
                    for i in range(len(irblock.assignblks)):
                        if irblock.assignblks[i].instr.name == 'NEG':
                            neg_inst = True
                        if irblock.assignblks[i].instr.name == 'SBB' and \
                                irblock.assignblks[i + 1].instr.name == 'AND' and \
                                irblock.assignblks[i + 2].instr.name == 'ADD':

                            expr = symbolic_engine.symbols[
                                state_register].copy()

                            if neg_inst:
                                state1 = expr_simp(
                                    expr.replace_expr(
                                        {EAX_init: ExprInt(0, 32)}))
                                state2 = expr_simp(
                                    expr.replace_expr(
                                        {EAX_init: ExprInt(1, 32)}))

                            elif irblock.assignblks[i-1].instr.name == 'CMP' and \
                                irblock.assignblks[i-2].instr.name == 'ADD' and \
                                    isinstance(irblock.assignblks[i-2].instr.args[1], ExprInt):
                                id = irblock.assignblks[i - 2].instr.args[0]
                                imm = irblock.assignblks[i - 2].instr.args[1]

                                state1 = expr_simp(
                                    expr.replace_expr({
                                        EAX_init: imm
                                    }).replace_expr({
                                        symbolic_engine.symbols[id].args[0]:
                                        imm
                                    }))
                                state2 = expr_simp(
                                    expr.replace_expr({
                                        EAX_init: ExprInt(-1, 32)
                                    }).replace_expr({
                                        symbolic_engine.symbols[id].args[0]:
                                        imm
                                    }))

                            blocks1 = get_assignblock_for_state(
                                ircfg, ir_arch, symbols_init, state_register,
                                state1)
                            blocks2 = get_assignblock_for_state(
                                ircfg, ir_arch, symbols_init, state_register,
                                state2)

                            process_blocks_for_patches(node, blocks1, ircfg,
                                                       patches, nodes_to_walk,
                                                       state1, True)
                            process_blocks_for_patches(node, blocks2, ircfg,
                                                       patches, nodes_to_walk,
                                                       state2, False)
                            break

            elif updated_state != symbols_init[state_register] and \
                isinstance(updated_state, ExprInt) and \
                updated_state._get_int() > 0xff:

                #print("[*] Looking for state %s" % hex(updated_state._get_int()))

                referenced_blocks = get_assignblock_for_state(
                    ircfg, ir_arch, symbols_init, state_register,
                    updated_state)
                # for block in referenced_blocks:
                #     print("\t[+] Found reference at %s" % hex(get_address(ircfg.loc_db, block.loc_key)))
                process_blocks_for_patches(node, referenced_blocks, ircfg,
                                           patches, nodes_to_walk)

            elif isinstance(next_addr, ExprCond):
                if not hasattr(next_addr.cond, 'args'):
                    if isinstance(next_addr.src1, ExprLoc):
                        dest1 = next_addr.src1.loc_key
                    else:
                        dest1 = get_loc_key_at(ircfg.loc_db,
                                               next_addr.src1._get_int())

                    if isinstance(next_addr.src2, ExprLoc):
                        dest2 = next_addr.src2.loc_key
                    else:
                        dest2 = get_loc_key_at(ircfg.loc_db,
                                               next_addr.src2._get_int())

                    if dest1 not in nodes_to_walk:
                        nodes_to_walk.append(dest1)

                    if dest2 not in nodes_to_walk:
                        nodes_to_walk.append(dest2)

                    dst2block = ircfg.get_block(dest2)
                    if dst2block.assignblks[0].instr.name == 'CMP' and \
                        dst2block.assignblks[0].instr.args[0] == state_register and \
                            len(ircfg.get_block(node).assignblks) > 1:

                        ref_block = node
                        while True:
                            irblock = ircfg.get_block(
                                ircfg.predecessors(ref_block)[0])
                            if irblock.assignblks[0].instr.name == 'CMP' and \
                                    dst2block.assignblks[0].instr.args[0] == state_register:
                                break
                            ref_block = ircfg.predecessors(ref_block)[0]

                        asmblock = asmcfg.loc_key_to_block(node)
                        for line in asmblock.lines:
                            if line.name == 'JZ':
                                patches.add(
                                    (line.offset,
                                     get_address(asmcfg.loc_db,
                                                 ref_block), CNDP2))
                                true_block = ircfg.get_block(
                                    ircfg.get_block(node).dst.src2.loc_key)
                                symbolic_engine.run_block_at(
                                    ircfg, true_block.loc_key)

                                if isinstance(
                                        symbolic_engine.
                                        symbols[state_register], ExprInt):
                                    referenced_block = get_assignblock_for_state(
                                        ircfg, ir_arch, symbols_init,
                                        state_register, symbolic_engine.
                                        symbols[state_register])[0]
                                    patches.add(
                                        (line.offset,
                                         get_address(ircfg.loc_db,
                                                     referenced_block.loc_key),
                                         CNDP3))
                                break

            elif isinstance(next_addr, ExprInt):
                dest = get_loc_key_at(ircfg.loc_db, next_addr._get_int())
                if dest not in nodes_to_walk:
                    nodes_to_walk.append(
                        get_loc_key_at(ircfg.loc_db, next_addr._get_int()))
    return list(patches)
Example #28
0
# translate asm_cfg into ira_cfg
ira_cfg = ira.new_ircfg_from_asmcfg(asm_cfg)

# set opaque predicate counter
opaque_counter = 0

# dictionary of byte patches
patches = {}

# walk over all basic blocks
for basic_block in asm_cfg.blocks:
    # get address of first basic block instruction
    address = basic_block.lines[0].offset

    # init symbolic execution engine
    sb = SymbolicExecutionEngine(ira)

    # symbolically execute basic block
    e = sb.run_block_at(ira_cfg, address)

    # skip if no conditional jump
    if not e.is_cond():
        continue

    # cond ? src1 : src2

    # check if opaque predicate -- jump
    if branch_cannot_be_taken(e, e.src1):
        print(f"opaque predicate at {hex(address)} (jump is never taken)")
        opaque_counter += 1
Example #29
0
loc_db = LocationDB()
s = '\x8dI\x04\x8d[\x01\x80\xf9\x01t\x05\x8d[\xff\xeb\x03\x8d[\x01\x89\xd8\xc3'
s = '\x55\x8b\xec\x83\xec\x08\xc7\x45\xf8\xcc\xcc\xcc\xcc\xc7\x45\xfc\xcc\xcc\xcc\xcc\xc7\x45\xfc\x03\x00\x00\x00\xc7\x45\xf8\x05\x00\x00\x00\x83\x7d\xfc\x05\x7e\x07\x8b\x45\xfc\xeb\x09\xeb\x05\x8b\x45\xf8\xeb\x02\x33\xc0\x8b\xe5\x5d\xc3'
c = Container.from_string(s)
machine = Machine('x86_32')
mdis = machine.dis_engine(c.bin_stream)
asmcfg = mdis.dis_multiblock(0)
for block in asmcfg.blocks:
    print(block.to_string(asmcfg.loc_db))
ira = machine.ira(loc_db)
ircfg = ira.new_ircfg_from_asmcfg(asmcfg)
# print(ircfg)
# ircfg = ira.new_ircfg(asmcfg)
# print(loc_db._offset_to_loc_key.keys()[0])
sb = SymbolicExecutionEngine(ira)
# symbolic_pc = sb.run_at(ircfg, loc_db._offset_to_loc_key.keys()[0])
# for index, info in enumerate(sb.info_ids):
#     print('###### step', index+1)
#     print('\t', info[0])
#     for reg in info[1]:
#         print('\t\t', reg, ':', info[1][reg])

# dse = DSEEngine(machine)
# sb.symbols[machine.mn.regs.ECX] = ExprInt(253, 32)
# sb.symbols[machine.mn.regs.EBP] = ExprOp('+', ExprId('ESP', 32), ExprInt(0xFFFFFFFC, 32))
# memory_expr = dse.memory_to_expr(0x40000004)
# sb.symbols[machine.mn.regs.EBP] = dse.eval_expr(memory_expr)
symbolic_pc = sb.run_at(ircfg, loc_db._offset_to_loc_key.keys()[0])
print('###### modified state step by step')
for step, info in enumerate(sb.info_ids):
Example #30
0
            states_todo.add((addr, symbexec.symbols.copy(), tuple(conds)))
        else:
            raise ValueError("Unsupported destination")


if __name__ == '__main__':
    loc_db = LocationDB()
    translator_smt2 = Translator.to_language("smt2")

    addr = int(options.address, 16)

    cont = Container.from_stream(open(args[0], 'rb'), loc_db)
    mdis = machine.dis_engine(cont.bin_stream, loc_db=loc_db)
    lifter = machine.lifter(mdis.loc_db)
    ircfg = lifter.new_ircfg()
    symbexec = SymbolicExecutionEngine(lifter)

    asmcfg = parse_asm.parse_txt(
        machine.mn, 32, '''
    init:
    PUSH argv
    PUSH argc
    PUSH ret_addr
    ''',
        loc_db
    )


    argc_lbl = loc_db.get_name_location('argc')
    argv_lbl = loc_db.get_name_location('argv')
    ret_addr_lbl = loc_db.get_name_location('ret_addr')
Example #31
0
    def test_ClassDef(self):
        from miasm.expression.expression import ExprInt, ExprId, ExprMem, \
            ExprCompose, ExprAssign
        from miasm.arch.x86.sem import ir_x86_32
        from miasm.core.locationdb import LocationDB
        from miasm.ir.symbexec import SymbolicExecutionEngine
        from miasm.ir.ir import AssignBlock


        loc_db = LocationDB()
        ira = ir_x86_32(loc_db)
        ircfg = ira.new_ircfg()

        id_x = ExprId('x', 32)
        id_a = ExprId('a', 32)
        id_b = ExprId('b', 32)
        id_c = ExprId('c', 32)
        id_d = ExprId('d', 32)
        id_e = ExprId('e', 64)

        class CustomSymbExec(SymbolicExecutionEngine):
            def mem_read(self, expr):
                if expr == ExprMem(ExprInt(0x1000, 32), 32):
                    return id_x
                return super(CustomSymbExec, self).mem_read(expr)

        sb = CustomSymbExec(ira,
                            {
                                ExprMem(ExprInt(0x4, 32), 8): ExprInt(0x44, 8),
                                ExprMem(ExprInt(0x5, 32), 8): ExprInt(0x33, 8),
                                ExprMem(ExprInt(0x6, 32), 8): ExprInt(0x22, 8),
                                ExprMem(ExprInt(0x7, 32), 8): ExprInt(0x11, 8),

                                ExprMem(ExprInt(0x20, 32), 32): id_x,

                                ExprMem(ExprInt(0x40, 32), 32): id_x,
                                ExprMem(ExprInt(0x44, 32), 32): id_a,

                                ExprMem(ExprInt(0x54, 32), 32): ExprInt(0x11223344, 32),

                                ExprMem(id_a, 32): ExprInt(0x11223344, 32),
                                id_a: ExprInt(0, 32),
                                id_b: ExprInt(0, 32),

                                ExprMem(id_c, 32): ExprMem(id_d + ExprInt(0x4, 32), 32),
                                ExprMem(id_c + ExprInt(0x4, 32), 32): ExprMem(id_d + ExprInt(0x8, 32), 32),

                            })


        self.assertEqual(sb.eval_expr(ExprInt(1, 32)-ExprInt(1, 32)), ExprInt(0, 32))

        ## Test with unknown mem + integer
        self.assertEqual(sb.eval_expr(ExprMem(ExprInt(0, 32), 32)), ExprMem(ExprInt(0, 32), 32))
        self.assertEqual(sb.eval_expr(ExprMem(ExprInt(1, 32), 32)), ExprCompose(ExprMem(ExprInt(1, 32), 24), ExprInt(0x44, 8)))
        self.assertEqual(sb.eval_expr(ExprMem(ExprInt(2, 32), 32)), ExprCompose(ExprMem(ExprInt(2, 32), 16), ExprInt(0x3344, 16)))
        self.assertEqual(sb.eval_expr(ExprMem(ExprInt(3, 32), 32)), ExprCompose(ExprMem(ExprInt(3, 32), 8), ExprInt(0x223344, 24)))
        self.assertEqual(sb.eval_expr(ExprMem(ExprInt(4, 32), 32)), ExprInt(0x11223344, 32))
        self.assertEqual(sb.eval_expr(ExprMem(ExprInt(5, 32), 32)), ExprCompose(ExprInt(0x112233, 24), ExprMem(ExprInt(8, 32), 8)))
        self.assertEqual(sb.eval_expr(ExprMem(ExprInt(6, 32), 32)), ExprCompose(ExprInt(0x1122, 16), ExprMem(ExprInt(8, 32), 16)))
        self.assertEqual(sb.eval_expr(ExprMem(ExprInt(7, 32), 32)), ExprCompose(ExprInt(0x11, 8), ExprMem(ExprInt(8, 32), 24)))
        self.assertEqual(sb.eval_expr(ExprMem(ExprInt(8, 32), 32)), ExprMem(ExprInt(8, 32), 32))

        ## Test with unknown mem + integer
        self.assertEqual(sb.eval_expr(ExprMem(ExprInt(0x50, 32), 32)), ExprMem(ExprInt(0x50, 32), 32))
        self.assertEqual(sb.eval_expr(ExprMem(ExprInt(0x51, 32), 32)), ExprCompose(ExprMem(ExprInt(0x51, 32), 24), ExprInt(0x44, 8)))
        self.assertEqual(sb.eval_expr(ExprMem(ExprInt(0x52, 32), 32)), ExprCompose(ExprMem(ExprInt(0x52, 32), 16), ExprInt(0x3344, 16)))
        self.assertEqual(sb.eval_expr(ExprMem(ExprInt(0x53, 32), 32)), ExprCompose(ExprMem(ExprInt(0x53, 32), 8), ExprInt(0x223344, 24)))
        self.assertEqual(sb.eval_expr(ExprMem(ExprInt(0x54, 32), 32)), ExprInt(0x11223344, 32))
        self.assertEqual(sb.eval_expr(ExprMem(ExprInt(0x55, 32), 32)), ExprCompose(ExprInt(0x112233, 24), ExprMem(ExprInt(0x58, 32), 8)))
        self.assertEqual(sb.eval_expr(ExprMem(ExprInt(0x56, 32), 32)), ExprCompose(ExprInt(0x1122, 16), ExprMem(ExprInt(0x58, 32), 16)))
        self.assertEqual(sb.eval_expr(ExprMem(ExprInt(0x57, 32), 32)), ExprCompose(ExprInt(0x11, 8), ExprMem(ExprInt(0x58, 32), 24)))
        self.assertEqual(sb.eval_expr(ExprMem(ExprInt(0x58, 32), 32)), ExprMem(ExprInt(0x58, 32), 32))



        ## Test with unknown mem + id
        self.assertEqual(sb.eval_expr(ExprMem(ExprInt(0x1D, 32), 32)), ExprCompose(ExprMem(ExprInt(0x1D, 32), 24), id_x[:8]))
        self.assertEqual(sb.eval_expr(ExprMem(ExprInt(0x1E, 32), 32)), ExprCompose(ExprMem(ExprInt(0x1E, 32), 16), id_x[:16]))
        self.assertEqual(sb.eval_expr(ExprMem(ExprInt(0x1F, 32), 32)), ExprCompose(ExprMem(ExprInt(0x1F, 32), 8), id_x[:24]))
        self.assertEqual(sb.eval_expr(ExprMem(ExprInt(0x20, 32), 32)), id_x)
        self.assertEqual(sb.eval_expr(ExprMem(ExprInt(0x21, 32), 32)), ExprCompose(id_x[8:], ExprMem(ExprInt(0x24, 32), 8)))
        self.assertEqual(sb.eval_expr(ExprMem(ExprInt(0x22, 32), 32)), ExprCompose(id_x[16:], ExprMem(ExprInt(0x24, 32), 16)))
        self.assertEqual(sb.eval_expr(ExprMem(ExprInt(0x23, 32), 32)), ExprCompose(id_x[24:], ExprMem(ExprInt(0x24, 32), 24)))
        self.assertEqual(sb.eval_expr(ExprMem(ExprInt(0x24, 32), 32)), ExprMem(ExprInt(0x24, 32), 32))


        ## Partial read
        self.assertEqual(sb.eval_expr(ExprMem(ExprInt(4, 32), 8)), ExprInt(0x44, 8))
        self.assertEqual(sb.eval_expr(ExprMem(ExprInt(0x20, 32), 8)), id_x[:8])
        self.assertEqual(sb.eval_expr(ExprMem(ExprInt(0x23, 32), 8)), id_x[24:])


        ## Merge
        self.assertEqual(sb.eval_expr(ExprMem(ExprInt(0x40, 32), 64)), ExprCompose(id_x, id_a))
        self.assertEqual(sb.eval_expr(ExprMem(ExprInt(0x42, 32), 32)), ExprCompose(id_x[16:], id_a[:16]))

        # Merge memory
        self.assertEqual(sb.eval_expr(ExprMem(ExprInt(0x100, 32), 32)), ExprMem(ExprInt(0x100, 32), 32))
        self.assertEqual(sb.eval_expr(ExprMem(id_c + ExprInt(0x2, 32), 32)), ExprMem(id_d  + ExprInt(0x6, 32), 32))

        ## Unmodified read
        self.assertEqual(sb.eval_expr(ExprMem(ExprInt(4, 32), 8)), ExprInt(0x44, 8))

        ## Modified read
        self.assertEqual(sb.eval_expr(ExprMem(ExprInt(0x1000, 32), 32)), id_x)


        ## Apply_change / eval_ir / apply_expr

        ## x = a (with a = 0x0)
        assignblk = AssignBlock({id_x:id_a})
        sb.eval_updt_assignblk(assignblk)
        self.assertEqual(sb.eval_expr(id_x), ExprInt(0, 32))

        ## x = a (without replacing 'a' with 0x0)
        sb.apply_change(id_x, id_a)
        self.assertEqual(sb.eval_expr(id_x), id_a)

        ## x = a (with a = 0x0)
        self.assertEqual(sb.eval_updt_expr(assignblk.dst2ExprAssign(id_x)), ExprInt(0, 32))
        self.assertEqual(sb.eval_expr(id_x), ExprInt(0, 32))
        self.assertEqual(sb.eval_updt_expr(id_x), ExprInt(0, 32))

        sb.dump()

        ## state
        reads = set()
        for dst, src in sb.modified():
            reads.update(ExprAssign(dst, src).get_r())

        self.assertEqual(reads, set([
            id_x, id_a,
            ExprMem(id_d + ExprInt(0x4, 32), 32),
            ExprMem(id_d + ExprInt(0x8, 32), 32),
        ]))

        # Erase low id_x byte with 0xFF
        sb.apply_change(ExprMem(ExprInt(0x20, 32), 8), ExprInt(0xFF, 8))
        state = dict(sb.modified(ids=False))
        self.assertEqual(state[ExprMem(ExprInt(0x20, 32), 8)], ExprInt(0xFF, 8))
        self.assertEqual(state[ExprMem(ExprInt(0x21, 32), 24)], id_x[8:32])

        # Erase high id_x byte with 0xEE
        sb.apply_change(ExprMem(ExprInt(0x23, 32), 8), ExprInt(0xEE, 8))

        state = dict(sb.modified(ids=False))
        self.assertEqual(state[ExprMem(ExprInt(0x20, 32), 8)], ExprInt(0xFF, 8))
        self.assertEqual(state[ExprMem(ExprInt(0x21, 32), 16)], id_x[8:24])
        self.assertEqual(state[ExprMem(ExprInt(0x23, 32), 8)], ExprInt(0xEE, 8))

        self.assertEqual(sb.eval_expr(ExprMem(ExprInt(0x22, 32), 32)), ExprCompose(id_x[16:24], ExprInt(0xEE, 8), ExprMem(ExprInt(0x24, 32), 16)))

        # Erase low byte of 0x11223344 with 0xFF at 0x54
        sb.apply_change(ExprMem(ExprInt(0x54, 32), 8), ExprInt(0xFF, 8))

        # Erase low byte of 0x11223344 with 0xFF at id_a
        sb.apply_change(ExprMem(id_a + ExprInt(0x1, 32), 8), ExprInt(0xFF, 8))
        state = dict(sb.modified(ids=False))
        self.assertEqual(state[ExprMem(id_a + ExprInt(0x1, 32), 8)], ExprInt(0xFF, 8))
        self.assertEqual(state[ExprMem(id_a + ExprInt(0x2, 32), 16)], ExprInt(0x1122, 16))

        # Write uint32_t at 0xFFFFFFFE
        sb.apply_change(ExprMem(ExprInt(0xFFFFFFFE, 32), 32), ExprInt(0x11223344, 32))
        self.assertEqual(sb.eval_expr(ExprMem(ExprInt(0, 32), 16)), ExprInt(0x1122, 16))

        # Revert memory to original value at 0x42
        sb.apply_change(ExprMem(ExprInt(0x42, 32), 32), ExprMem(ExprInt(0x42, 32), 32))
        self.assertEqual(sb.eval_expr(ExprMem(ExprInt(0x42, 32), 32)), ExprMem(ExprInt(0x42, 32), 32))

        # Revert memory to original value at c + 0x2
        sb.apply_change(ExprMem(id_c + ExprInt(0x2, 32), 32), ExprMem(id_c + ExprInt(0x2, 32), 32))
        self.assertEqual(sb.eval_expr(ExprMem(id_c + ExprInt(0x2, 32), 32)), ExprMem(id_c + ExprInt(0x2, 32), 32))

        # Test del symbol
        del sb.symbols[id_a]
        sb.dump()
        del sb.symbols[ExprMem(id_a, 8)]
        print("*"*40, 'Orig:')
        sb.dump()

        sb_cp = sb.symbols.copy()
        print("*"*40, 'Copy:')
        sb_cp.dump()

        # Add symbol at address limit
        sb.apply_change(ExprMem(ExprInt(0xFFFFFFFE, 32), 32), id_c)
        sb.dump()
        found = False
        for dst, src in viewitems(sb.symbols):
            if dst == ExprMem(ExprInt(0xFFFFFFFE, 32), 32) and src == id_c:
                found = True
        assert found


        # Add symbol at address limit
        sb.apply_change(ExprMem(ExprInt(0x7FFFFFFE, 32), 32), id_c)
        sb.dump()
        found = False
        for dst, src in viewitems(sb.symbols):
            if dst == ExprMem(ExprInt(0x7FFFFFFE, 32), 32) and src == id_c:
                found = True
        assert found



        # Add truncated symbol at address limit
        sb.apply_change(ExprMem(ExprInt(0xFFFFFFFC, 32), 64), id_e)
        # Revert parts of memory
        sb.apply_change(ExprMem(ExprInt(0xFFFFFFFC, 32), 16), ExprMem(ExprInt(0xFFFFFFFC, 32), 16))
        sb.apply_change(ExprMem(ExprInt(0x2, 32), 16), ExprMem(ExprInt(0x2, 32), 16))
        sb.dump()
        found = False
        for dst, src in viewitems(sb.symbols):
            if dst == ExprMem(ExprInt(0xFFFFFFFE, 32), 32) and src == id_e[16:48]:
                found = True
        assert found


        sb_empty = SymbolicExecutionEngine(ira)
        sb_empty.dump()


        # Test memory full
        print('full')
        arch_addr8 = ir_x86_32(loc_db)
        ircfg = arch_addr8.new_ircfg()
        # Hack to obtain tiny address space
        arch_addr8.addrsize = 5
        sb_addr8 = SymbolicExecutionEngine(arch_addr8)
        sb_addr8.dump()
        # Fulfill memory
        sb_addr8.apply_change(ExprMem(ExprInt(0, 5), 256), ExprInt(0, 256))
        sb_addr8.dump()
        variables = list(viewitems(sb_addr8.symbols))
        assert variables == [(ExprMem(ExprInt(0, 5), 256), ExprInt(0, 256))]

        print(sb_addr8.symbols.symbols_mem)

        sb_addr8.apply_change(ExprMem(ExprInt(0x5, 5), 256), ExprInt(0x123, 256))
        sb_addr8.dump()
        variables = list(viewitems(sb_addr8.symbols))
        assert variables == [(ExprMem(ExprInt(0x5, 5), 256), ExprInt(0x123, 256))]
        print(sb_addr8.symbols.symbols_mem)

        print('dump')
        sb_addr8.symbols.symbols_mem.dump()


        sb.dump()
        try:
            del sb.symbols.symbols_mem[ExprMem(ExprInt(0xFFFFFFFF, 32), 32)]
        except KeyError:
            # ok
            pass
        else:
            raise RuntimeError("Should raise error!")


        del sb.symbols.symbols_mem[ExprMem(ExprInt(0xFFFFFFFF, 32), 16)]
        sb.dump()
        self.assertEqual(sb.eval_expr(ExprMem(ExprInt(0xFFFFFFFE, 32), 32)),
                         ExprCompose(id_e[16:24], ExprMem(ExprInt(0xFFFFFFFF, 32), 16), id_e[40:48]))
        sb.symbols.symbols_mem.delete_partial(ExprMem(ExprInt(0xFFFFFFFF, 32), 32))

        self.assertEqual(sb.eval_expr(ExprMem(ExprInt(0xFFFFFFFE, 32), 32)),
                         ExprCompose(id_e[16:24], ExprMem(ExprInt(0xFFFFFFFF, 32), 24)))

        sb.dump()

        assert ExprMem(ExprInt(0xFFFFFFFE, 32), 8) in sb.symbols
        assert ExprMem(ExprInt(0xFFFFFFFE, 32), 32) not in sb.symbols
        assert sb.symbols.symbols_mem.contains_partial(ExprMem(ExprInt(0xFFFFFFFE, 32), 32))
        assert not sb.symbols.symbols_mem.contains_partial(ExprMem(ExprInt(0xFFFFFFFF, 32), 8))

        assert list(sb_addr8.symbols) == [ExprMem(ExprInt(0x5, 5), 256)]
Example #32
0
line = machine.mn.fromstring("MOV EAX, EBX", loc_db, 32)
asm = machine.mn.asm(line)[0]

# Get back block
cont = Container.from_string(asm, loc_db = loc_db)
mdis = machine.dis_engine(cont.bin_stream, loc_db=loc_db)
mdis.lines_wd = 1
asm_block = mdis.dis_block(START_ADDR)

# Translate ASM -> IR
ira = machine.ira(mdis.loc_db)
ircfg = ira.new_ircfg()
ira.add_asmblock_to_ircfg(asm_block, ircfg)

# Instantiate a Symbolic Execution engine with default value for registers
symb = SymbolicExecutionEngine(ira)

# Emulate one IR basic block
## Emulation of several basic blocks can be done through .emul_ir_blocks
cur_addr = symb.run_at(ircfg, START_ADDR)

# Modified elements
print('Modified registers:')
symb.dump(mems=False)
print('Modified memory (should be empty):')
symb.dump(ids=False)

# Check final status
eax, ebx = ira.arch.regs.EAX, ira.arch.regs.EBX
assert symb.symbols[eax] == ebx
assert eax in symb.symbols
Example #33
0
            states_todo.add((addr, symbexec.symbols.copy(), tuple(conds)))
        else:
            raise ValueError("Unsupported destination")


if __name__ == '__main__':

    translator_smt2 = Translator.to_language("smt2")

    addr = int(options.address, 16)

    cont = Container.from_stream(open(args[0], 'rb'))
    mdis = machine.dis_engine(cont.bin_stream, loc_db=cont.loc_db)
    ir_arch = machine.ir(mdis.loc_db)
    ircfg = ir_arch.new_ircfg()
    symbexec = SymbolicExecutionEngine(ir_arch)

    asmcfg, loc_db = parse_asm.parse_txt(machine.mn, 32, '''
    init:
    PUSH argv
    PUSH argc
    PUSH ret_addr
    ''',
    loc_db=mdis.loc_db)


    argc_lbl = loc_db.get_name_location('argc')
    argv_lbl = loc_db.get_name_location('argv')
    ret_addr_lbl = loc_db.get_name_location('ret_addr')
    init_lbl = loc_db.get_name_location('init')
Example #34
0
open("cfg.dot", "w").write(asmcfg.dot())

# --- Get IR --- #

lifter = LifterModelCall_x86_32(loc_db)
ircfg = lifter.new_ircfg()

first_block = list(asmcfg.blocks)[0]
lifter.add_asmblock_to_ircfg(first_block, ircfg)

# --- Symbolic execution --- #

from miasm.ir.symbexec import SymbolicExecutionEngine
from miasm.expression.expression import *

symb = SymbolicExecutionEngine(lifter, machine.mn.regs.regs_init)

# irDst contains the offset of next IR basic block to execute
irDst = symb.run_at(ircfg, entry_addr, step=False)
print("IR Dest = ", irDst)

# Provide symbolic context to irDst
expr_flag = ExprId("flag", 32)
result = symb.eval_expr(
    expr_simp(
        irDst.replace_expr(
            {
                expr_simp(
                    ExprMem(machine.mn.regs.EBP_init - ExprInt(0x4, 32), 32)):
                expr_flag,
            })))
Example #35
0
    def _normalize_ircfg(self, conn):
        # unalias stack miasm.re/blog/2017/02/03/data_flow_analysis_depgraph.html , but involve base pointer too
        # TODO remove manual *BP propagation in normalize_ircfg and use standrad Miasm propagation when it is fixed
        # remove composes from bigger to smaller, they are not important for us
        bp = {}
        prev_offset = None
        for irb_loc_key in self.ircfg.walk_breadth_first_forward(LocKey(0)):
            irs = []
            if irb_loc_key not in self.ircfg.blocks:
                continue
            irb = self.ircfg.blocks[irb_loc_key]
            if irb.dst.is_cond() and irb.dst.cond.is_op() and irb.dst.cond.op == 'CC_EQ':
                # TODO propagate cmp ..., arb_int too
                # propagate known zeroes to process test    eax, eax; jnz ...; lea     edi, [eax+4]
                symb_exec = SymbolicExecutionEngine(self.ir_arch)
                dst = symb_exec.eval_updt_irblock(irb)
                if dst.is_cond() and dst.cond.is_id() and not is_bad_expr(dst.cond) and \
                        symb_exec.eval_expr(dst.cond) == dst.cond:
                    # add explicit mov ID, 0 to given irb
                    target_loc = dst.src2
                    if target_loc.is_int():
                        target_loc = self.asmcfg.loc_db.get_offset_location(int(target_loc))
                    elif target_loc.is_loc():
                        target_loc = target_loc.loc_key
                    else:
                        continue
                    if len(self.ircfg.predecessors(target_loc)) > 1:
                        continue
                    target_irb = self.ircfg.blocks[target_loc]
                    asign_blk = AssignBlock([ExprAssign(dst.cond, ExprInt(0, dst.cond.size))])
                    assignblks = tuple([asign_blk, *target_irb.assignblks])
                    new_irb = IRBlock(target_loc, assignblks)
                    self.ircfg.blocks[target_loc] = new_irb
            fix_dct = {}
            for assignblk in irb:
                offset = prev_offset
                if assignblk.instr and assignblk.instr.offset:
                    offset = assignblk.instr.offset
                prev_offset = offset
                spd = conn.modules.idc.get_spd(offset)
                if spd is not None:
                    stk_high = ExprInt(spd, self.ir_arch.sp.size)
                    fix_dct = {self.ir_arch.sp: self.mn.regs.regs_init[self.ir_arch.sp] + stk_high}
                    fix_dct.update(bp)
                else:
                    logger.warning("Couldn't acquire stack depth at 0x%x" % (offset or 0x0BADF00D))

                new_assignblk = {}
                for dst, src in assignblk.items():
                    if src.is_compose():
                        slc_arg = None
                        arg = None
                        for tmp_arg in src.args:
                            if not tmp_arg.is_slice():
                                arg = tmp_arg
                            else:
                                # we're interested only in bigger to smaller
                                slc_arg = tmp_arg
                        if slc_arg and arg and len(arg.get_r()) == 1:
                            top_to_bottom_visitor = ExprVisitorCallbackTopToBottom(
                                lambda x: self._resize_top_expr(x, src.size))
                            src = top_to_bottom_visitor.visit(arg)
                    if dst == src:
                        # special compiler anomalies such as lea     esp, [esp+0]
                        continue
                    if src == self.ir_arch.sp:
                        src = expr_simp(src.replace_expr(fix_dct))
                        if bp and src not in bp.values() and irb_loc_key != LocKey(0):
                            raise RuntimeError("Ambiguous base pointer")
                        bp.update({dst: src})
                        fix_dct.update(bp)
                    else:
                        src = expr_simp(src.replace_expr(fix_dct))
                        if dst != self.ir_arch.sp and dst not in bp.keys():
                            dst = dst.replace_expr(fix_dct)

                    dst, src = expr_simp(dst), expr_simp(src)
                    new_assignblk[dst] = src
                irs.append(AssignBlock(new_assignblk, instr=assignblk.instr))
            self.ircfg.blocks[irb.loc_key] = IRBlock(irb.loc_key, irs)
Example #36
0
line = machine.mn.fromstring("MOV EAX, EBX", loc_db, 32)
asm = machine.mn.asm(line)[0]

# Get back block
cont = Container.from_string(asm, loc_db = loc_db)
mdis = machine.dis_engine(cont.bin_stream, loc_db=loc_db)
mdis.lines_wd = 1
asm_block = mdis.dis_block(START_ADDR)

# Translate ASM -> IR
lifter_model_call = machine.lifter_model_call(mdis.loc_db)
ircfg = lifter_model_call.new_ircfg()
lifter_model_call.add_asmblock_to_ircfg(asm_block, ircfg)

# Instantiate a Symbolic Execution engine with default value for registers
symb = SymbolicExecutionEngine(lifter_model_call)

# Emulate one IR basic block
## Emulation of several basic blocks can be done through .emul_ir_blocks
cur_addr = symb.run_at(ircfg, START_ADDR)

# Modified elements
print('Modified registers:')
symb.dump(mems=False)
print('Modified memory (should be empty):')
symb.dump(ids=False)

# Check final status
eax, ebx = lifter_model_call.arch.regs.EAX, lifter_model_call.arch.regs.EBX
assert symb.symbols[eax] == ebx
assert eax in symb.symbols