Beispiel #1
0
    def emul(self, lifter, ctx=None, step=False):
        """Symbolic execution of relevant nodes according to the history
        Return the values of inputs nodes' elements
        @lifter: Lifter instance
        @ctx: (optional) Initial context as dictionary
        @step: (optional) Verbose execution
        Warning: The emulation is not sound if the inputs nodes depend on loop
        variant.
        """
        # Init
        ctx_init = {}
        if ctx is not None:
            ctx_init.update(ctx)
        assignblks = []

        # Build a single assignment block according to history
        last_index = len(self.relevant_loc_keys)
        for index, loc_key in enumerate(reversed(self.relevant_loc_keys), 1):
            if index == last_index and loc_key == self.initial_state.loc_key:
                line_nb = self.initial_state.line_nb
            else:
                line_nb = None
            assignblks += self.irblock_slice(self._ircfg.blocks[loc_key],
                                             line_nb).assignblks

        # Eval the block
        loc_db = lifter.loc_db
        temp_loc = loc_db.get_or_create_name_location("Temp")
        symb_exec = SymbolicExecutionEngine(lifter, ctx_init)
        symb_exec.eval_updt_irblock(IRBlock(loc_db, temp_loc, assignblks),
                                    step=step)

        # Return only inputs values (others could be wrongs)
        return {element: symb_exec.symbols[element] for element in self.inputs}
Beispiel #2
0
def intra_block_flow_symb(ir_arch, _, flow_graph, irblock, in_nodes,
                          out_nodes):
    symbols_init = ir_arch.arch.regs.regs_init.copy()
    sb = SymbolicExecutionEngine(ir_arch, symbols_init)
    sb.eval_updt_irblock(irblock)
    print('*' * 40)
    print(irblock)

    out = sb.modified(mems=False)
    current_nodes = {}
    # Gen mem arg to mem node links
    for dst, src in out:
        src = sb.eval_expr(dst)
        for n in [dst, src]:

            all_mems = set()
            all_mems.update(get_expr_mem(n))

        for n in all_mems:
            node_n_w = get_node_name(irblock.loc_key, 0, n)
            if not n == src:
                continue
            o_r = n.ptr.get_r(mem_read=False, cst_read=True)
            for i, n_r in enumerate(o_r):
                if n_r in current_nodes:
                    node_n_r = current_nodes[n_r]
                else:
                    node_n_r = get_node_name(irblock.loc_key, i, n_r)
                if not n_r in in_nodes:
                    in_nodes[n_r] = node_n_r
                flow_graph.add_uniq_edge(node_n_r, node_n_w)

    # Gen data flow links
    for dst in out:
        src = sb.eval_expr(dst)
        nodes_r = src.get_r(mem_read=False, cst_read=True)
        nodes_w = set([dst])
        for n_r in nodes_r:
            if n_r in current_nodes:
                node_n_r = current_nodes[n_r]
            else:
                node_n_r = get_node_name(irblock.loc_key, 0, n_r)
            if not n_r in in_nodes:
                in_nodes[n_r] = node_n_r

            flow_graph.add_node(node_n_r)
            for n_w in nodes_w:
                node_n_w = get_node_name(irblock.loc_key, 1, n_w)
                out_nodes[n_w] = node_n_w

                flow_graph.add_node(node_n_w)
                flow_graph.add_uniq_edge(node_n_r, node_n_w)
Beispiel #3
0
def emul_symb(ir_arch, ircfg, mdis, states_todo, states_done):
    while states_todo:
        addr, symbols, conds = states_todo.pop()
        print('*' * 40, "addr", addr, '*' * 40)
        if (addr, symbols, conds) in states_done:
            print('Known state, skipping', addr)
            continue
        states_done.add((addr, symbols, conds))
        symbexec = SymbolicExecutionEngine(ir_arch)
        symbexec.symbols = symbols.copy()
        if ir_arch.pc in symbexec.symbols:
            del symbexec.symbols[ir_arch.pc]
        irblock = get_block(ir_arch, ircfg, mdis, addr)

        print('Run block:')
        print(irblock)
        addr = symbexec.eval_updt_irblock(irblock)
        print('Final state:')
        symbexec.dump(mems=False)

        assert addr is not None
        if isinstance(addr, ExprCond):
            # Create 2 states, each including complementary conditions
            cond_group_a = {addr.cond: ExprInt(0, addr.cond.size)}
            cond_group_b = {addr.cond: ExprInt(1, addr.cond.size)}
            addr_a = expr_simp(
                symbexec.eval_expr(addr.replace_expr(cond_group_a), {}))
            addr_b = expr_simp(
                symbexec.eval_expr(addr.replace_expr(cond_group_b), {}))
            if not (addr_a.is_int() or addr_a.is_loc() and addr_b.is_int()
                    or addr_b.is_loc()):
                print(str(addr_a), str(addr_b))
                raise ValueError("Unsupported condition")
            if isinstance(addr_a, ExprInt):
                addr_a = int(addr_a.arg)
            if isinstance(addr_b, ExprInt):
                addr_b = int(addr_b.arg)
            states_todo.add(
                (addr_a, symbexec.symbols.copy(),
                 tuple(list(conds) + list(viewitems(cond_group_a)))))
            states_todo.add(
                (addr_b, symbexec.symbols.copy(),
                 tuple(list(conds) + list(viewitems(cond_group_b)))))
        elif addr == ret_addr:
            print('Return address reached')
            continue
        elif addr.is_int():
            addr = int(addr.arg)
            states_todo.add((addr, symbexec.symbols.copy(), tuple(conds)))
        elif addr.is_loc():
            states_todo.add((addr, symbexec.symbols.copy(), tuple(conds)))
        else:
            raise ValueError("Unsupported destination")
Beispiel #4
0
def emul_symb(ir_arch, ircfg, mdis, states_todo, states_done):
    while states_todo:
        addr, symbols, conds = states_todo.pop()
        print('*' * 40, "addr", addr, '*' * 40)
        if (addr, symbols, conds) in states_done:
            print('Known state, skipping', addr)
            continue
        states_done.add((addr, symbols, conds))
        symbexec = SymbolicExecutionEngine(ir_arch)
        symbexec.symbols = symbols.copy()
        if ir_arch.pc in symbexec.symbols:
            del symbexec.symbols[ir_arch.pc]
        irblock = get_block(ir_arch, ircfg, mdis, addr)

        print('Run block:')
        print(irblock)
        addr = symbexec.eval_updt_irblock(irblock)
        print('Final state:')
        symbexec.dump(mems=False)

        assert addr is not None
        if isinstance(addr, ExprCond):
            # Create 2 states, each including complementary conditions
            cond_group_a = {addr.cond: ExprInt(0, addr.cond.size)}
            cond_group_b = {addr.cond: ExprInt(1, addr.cond.size)}
            addr_a = expr_simp(symbexec.eval_expr(addr.replace_expr(cond_group_a), {}))
            addr_b = expr_simp(symbexec.eval_expr(addr.replace_expr(cond_group_b), {}))
            if not (addr_a.is_int() or addr_a.is_loc() and
                    addr_b.is_int() or addr_b.is_loc()):
                print(str(addr_a), str(addr_b))
                raise ValueError("Unsupported condition")
            if isinstance(addr_a, ExprInt):
                addr_a = int(addr_a.arg)
            if isinstance(addr_b, ExprInt):
                addr_b = int(addr_b.arg)
            states_todo.add((addr_a, symbexec.symbols.copy(), tuple(list(conds) + list(viewitems(cond_group_a)))))
            states_todo.add((addr_b, symbexec.symbols.copy(), tuple(list(conds) + list(viewitems(cond_group_b)))))
        elif addr == ret_addr:
            print('Return address reached')
            continue
        elif addr.is_int():
            addr = int(addr.arg)
            states_todo.add((addr, symbexec.symbols.copy(), tuple(conds)))
        elif addr.is_loc():
            states_todo.add((addr, symbexec.symbols.copy(), tuple(conds)))
        else:
            raise ValueError("Unsupported destination")
Beispiel #5
0
    def emul(self, lifter, ctx=None, step=False):
        # Init
        ctx_init = {}
        if ctx is not None:
            ctx_init.update(ctx)
        solver = z3.Solver()
        symb_exec = SymbolicExecutionEngine(lifter, ctx_init)
        history = self.history[::-1]
        history_size = len(history)
        translator = Translator.to_language("z3")
        size = self._ircfg.IRDst.size

        for hist_nb, loc_key in enumerate(history, 1):
            if hist_nb == history_size and loc_key == self.initial_state.loc_key:
                line_nb = self.initial_state.line_nb
            else:
                line_nb = None
            irb = self.irblock_slice(self._ircfg.blocks[loc_key], line_nb)

            # Emul the block and get back destination
            dst = symb_exec.eval_updt_irblock(irb, step=step)

            # Add constraint
            if hist_nb < history_size:
                next_loc_key = history[hist_nb]
                expected = symb_exec.eval_expr(ExprLoc(next_loc_key, size))
                solver.add(
                    self._gen_path_constraints(translator, dst, expected))
        # Save the solver
        self._solver = solver

        # Return only inputs values (others could be wrongs)
        return {
            element: symb_exec.eval_expr(element)
            for element in self.inputs
        }
Beispiel #6
0
        argv_loc: ExprId("argv", argv_loc.size),
        ret_addr_loc: ret_addr,
    }



    block = asmcfg.loc_key_to_block(init_lbl)
    for instr in block.lines:
        for i, arg in enumerate(instr.args):
            instr.args[i]= arg.replace_expr(fix_args)
    print(block)

    # add fake address and len to parsed instructions
    lifter.add_asmblock_to_ircfg(block, ircfg)
    irb = ircfg.blocks[init_lbl]
    symbexec.eval_updt_irblock(irb)
    symbexec.dump(ids=False)
    # reset lifter blocks
    lifter.blocks = {}

    states_todo = set()
    states_done = set()
    states_todo.add((addr, symbexec.symbols, ()))

    # emul blocks, propagate states
    emul_symb(lifter, ircfg, mdis, states_todo, states_done)

    all_info = []

    print('*' * 40, 'conditions to match', '*' * 40)
    for addr, symbols, conds in sorted(states_done, key=str):
Beispiel #7
0
        argv_loc: ExprId("argv", argv_loc.size),
        ret_addr_loc: ret_addr,
    }



    block = asmcfg.loc_key_to_block(init_lbl)
    for instr in block.lines:
        for i, arg in enumerate(instr.args):
            instr.args[i]= arg.replace_expr(fix_args)
    print(block)

    # add fake address and len to parsed instructions
    ir_arch.add_asmblock_to_ircfg(block, ircfg)
    irb = ircfg.blocks[init_lbl]
    symbexec.eval_updt_irblock(irb)
    symbexec.dump(ids=False)
    # reset ir_arch blocks
    ir_arch.blocks = {}

    states_todo = set()
    states_done = set()
    states_todo.add((addr, symbexec.symbols, ()))

    # emul blocks, propagate states
    emul_symb(ir_arch, ircfg, mdis, states_todo, states_done)

    all_info = []

    print('*' * 40, 'conditions to match', '*' * 40)
    for addr, symbols, conds in sorted(states_done, key=str):
Beispiel #8
0
    def _normalize_ircfg(self, conn):
        # unalias stack miasm.re/blog/2017/02/03/data_flow_analysis_depgraph.html , but involve base pointer too
        # TODO remove manual *BP propagation in normalize_ircfg and use standrad Miasm propagation when it is fixed
        # remove composes from bigger to smaller, they are not important for us
        bp = {}
        prev_offset = None
        for irb_loc_key in self.ircfg.walk_breadth_first_forward(LocKey(0)):
            irs = []
            if irb_loc_key not in self.ircfg.blocks:
                continue
            irb = self.ircfg.blocks[irb_loc_key]
            if irb.dst.is_cond() and irb.dst.cond.is_op() and irb.dst.cond.op == 'CC_EQ':
                # TODO propagate cmp ..., arb_int too
                # propagate known zeroes to process test    eax, eax; jnz ...; lea     edi, [eax+4]
                symb_exec = SymbolicExecutionEngine(self.ir_arch)
                dst = symb_exec.eval_updt_irblock(irb)
                if dst.is_cond() and dst.cond.is_id() and not is_bad_expr(dst.cond) and \
                        symb_exec.eval_expr(dst.cond) == dst.cond:
                    # add explicit mov ID, 0 to given irb
                    target_loc = dst.src2
                    if target_loc.is_int():
                        target_loc = self.asmcfg.loc_db.get_offset_location(int(target_loc))
                    elif target_loc.is_loc():
                        target_loc = target_loc.loc_key
                    else:
                        continue
                    if len(self.ircfg.predecessors(target_loc)) > 1:
                        continue
                    target_irb = self.ircfg.blocks[target_loc]
                    asign_blk = AssignBlock([ExprAssign(dst.cond, ExprInt(0, dst.cond.size))])
                    assignblks = tuple([asign_blk, *target_irb.assignblks])
                    new_irb = IRBlock(target_loc, assignblks)
                    self.ircfg.blocks[target_loc] = new_irb
            fix_dct = {}
            for assignblk in irb:
                offset = prev_offset
                if assignblk.instr and assignblk.instr.offset:
                    offset = assignblk.instr.offset
                prev_offset = offset
                spd = conn.modules.idc.get_spd(offset)
                if spd is not None:
                    stk_high = ExprInt(spd, self.ir_arch.sp.size)
                    fix_dct = {self.ir_arch.sp: self.mn.regs.regs_init[self.ir_arch.sp] + stk_high}
                    fix_dct.update(bp)
                else:
                    logger.warning("Couldn't acquire stack depth at 0x%x" % (offset or 0x0BADF00D))

                new_assignblk = {}
                for dst, src in assignblk.items():
                    if src.is_compose():
                        slc_arg = None
                        arg = None
                        for tmp_arg in src.args:
                            if not tmp_arg.is_slice():
                                arg = tmp_arg
                            else:
                                # we're interested only in bigger to smaller
                                slc_arg = tmp_arg
                        if slc_arg and arg and len(arg.get_r()) == 1:
                            top_to_bottom_visitor = ExprVisitorCallbackTopToBottom(
                                lambda x: self._resize_top_expr(x, src.size))
                            src = top_to_bottom_visitor.visit(arg)
                    if dst == src:
                        # special compiler anomalies such as lea     esp, [esp+0]
                        continue
                    if src == self.ir_arch.sp:
                        src = expr_simp(src.replace_expr(fix_dct))
                        if bp and src not in bp.values() and irb_loc_key != LocKey(0):
                            raise RuntimeError("Ambiguous base pointer")
                        bp.update({dst: src})
                        fix_dct.update(bp)
                    else:
                        src = expr_simp(src.replace_expr(fix_dct))
                        if dst != self.ir_arch.sp and dst not in bp.keys():
                            dst = dst.replace_expr(fix_dct)

                    dst, src = expr_simp(dst), expr_simp(src)
                    new_assignblk[dst] = src
                irs.append(AssignBlock(new_assignblk, instr=assignblk.instr))
            self.ircfg.blocks[irb.loc_key] = IRBlock(irb.loc_key, irs)