def emul_symb(ir_arch, ircfg, mdis, states_todo, states_done): while states_todo: addr, symbols, conds = states_todo.pop() print('*' * 40, "addr", addr, '*' * 40) if (addr, symbols, conds) in states_done: print('Known state, skipping', addr) continue states_done.add((addr, symbols, conds)) symbexec = SymbolicExecutionEngine(ir_arch) symbexec.symbols = symbols.copy() if ir_arch.pc in symbexec.symbols: del symbexec.symbols[ir_arch.pc] irblock = get_block(ir_arch, ircfg, mdis, addr) print('Run block:') print(irblock) addr = symbexec.eval_updt_irblock(irblock) print('Final state:') symbexec.dump(mems=False) assert addr is not None if isinstance(addr, ExprCond): # Create 2 states, each including complementary conditions cond_group_a = {addr.cond: ExprInt(0, addr.cond.size)} cond_group_b = {addr.cond: ExprInt(1, addr.cond.size)} addr_a = expr_simp( symbexec.eval_expr(addr.replace_expr(cond_group_a), {})) addr_b = expr_simp( symbexec.eval_expr(addr.replace_expr(cond_group_b), {})) if not (addr_a.is_int() or addr_a.is_loc() and addr_b.is_int() or addr_b.is_loc()): print(str(addr_a), str(addr_b)) raise ValueError("Unsupported condition") if isinstance(addr_a, ExprInt): addr_a = int(addr_a.arg) if isinstance(addr_b, ExprInt): addr_b = int(addr_b.arg) states_todo.add( (addr_a, symbexec.symbols.copy(), tuple(list(conds) + list(viewitems(cond_group_a))))) states_todo.add( (addr_b, symbexec.symbols.copy(), tuple(list(conds) + list(viewitems(cond_group_b))))) elif addr == ret_addr: print('Return address reached') continue elif addr.is_int(): addr = int(addr.arg) states_todo.add((addr, symbexec.symbols.copy(), tuple(conds))) elif addr.is_loc(): states_todo.add((addr, symbexec.symbols.copy(), tuple(conds))) else: raise ValueError("Unsupported destination")
def intra_block_flow_symb(ir_arch, _, flow_graph, irblock, in_nodes, out_nodes): symbols_init = ir_arch.arch.regs.regs_init.copy() sb = SymbolicExecutionEngine(ir_arch, symbols_init) sb.eval_updt_irblock(irblock) print('*' * 40) print(irblock) out = sb.modified(mems=False) current_nodes = {} # Gen mem arg to mem node links for dst, src in out: src = sb.eval_expr(dst) for n in [dst, src]: all_mems = set() all_mems.update(get_expr_mem(n)) for n in all_mems: node_n_w = get_node_name(irblock.loc_key, 0, n) if not n == src: continue o_r = n.ptr.get_r(mem_read=False, cst_read=True) for i, n_r in enumerate(o_r): if n_r in current_nodes: node_n_r = current_nodes[n_r] else: node_n_r = get_node_name(irblock.loc_key, i, n_r) if not n_r in in_nodes: in_nodes[n_r] = node_n_r flow_graph.add_uniq_edge(node_n_r, node_n_w) # Gen data flow links for dst in out: src = sb.eval_expr(dst) nodes_r = src.get_r(mem_read=False, cst_read=True) nodes_w = set([dst]) for n_r in nodes_r: if n_r in current_nodes: node_n_r = current_nodes[n_r] else: node_n_r = get_node_name(irblock.loc_key, 0, n_r) if not n_r in in_nodes: in_nodes[n_r] = node_n_r flow_graph.add_node(node_n_r) for n_w in nodes_w: node_n_w = get_node_name(irblock.loc_key, 1, n_w) out_nodes[n_w] = node_n_w flow_graph.add_node(node_n_w) flow_graph.add_uniq_edge(node_n_r, node_n_w)
def exec_instruction(mn_str, init_values, results, index=0, offset=0): """Symbolically execute an instruction and check the expected results.""" # Assemble and disassemble the instruction instr = mn_mep.fromstring(mn_str, "b") instr.mode = "b" mn_bin = mn_mep.asm(instr)[index] try: instr = mn_mep.dis(mn_bin, "b") except Disasm_Exception: assert (False) # miasm don't know what to do # Specify the instruction offset and compute the destination label instr.offset = offset loc_db = LocationDB() if instr.dstflow(): instr.dstflow2label(loc_db) # Get the IR im = Lifter_MEPb(loc_db) iir, eiir = im.get_ir(instr) # Filter out IRDst iir = [ ir for ir in iir if not (isinstance(ir, ExprAssign) and isinstance(ir.dst, ExprId) and ir.dst.name == "IRDst") ] # Prepare symbolic execution sb = SymbolicExecutionEngine(LifterModelCallMepb(loc_db), regs_init) # Assign int values before symbolic evaluation for expr_id, expr_value in init_values: sb.symbols[expr_id] = expr_value # Execute the IR ab = AssignBlock(iir) sb.eval_updt_assignblk(ab) # Check if expected expr_id were modified matched_results = 0 for expr_id, expr_value in results: result = sb.eval_expr(expr_id) if isinstance(result, ExprLoc): addr = loc_db.get_location_offset(result.loc_key) if expr_value.arg == addr: matched_results += 1 continue elif result == expr_value: matched_results += 1 continue # Ensure that all expected results were verified if len(results) is not matched_results: print("Expected:", results) print("Modified:", [r for r in sb.modified(mems=False)]) assert (False)
def exec_instruction(mn_str, init_values, results, index=0, offset=0): """Symbolically execute an instruction and check the expected results.""" # Assemble and disassemble the instruction instr = mn_mep.fromstring(mn_str, "b") instr.mode = "b" mn_bin = mn_mep.asm(instr)[index] try: instr = mn_mep.dis(mn_bin, "b") except Disasm_Exception: assert(False) # miasm don't know what to do # Specify the instruction offset and compute the destination label instr.offset = offset loc_db = LocationDB() if instr.dstflow(): instr.dstflow2label(loc_db) # Get the IR im = ir_mepb(loc_db) iir, eiir = im.get_ir(instr) # Filter out IRDst iir = [ir for ir in iir if not (isinstance(ir, ExprAssign) and isinstance(ir.dst, ExprId) and ir.dst.name == "IRDst")] # Prepare symbolic execution sb = SymbolicExecutionEngine(ir_a_mepb(loc_db), regs_init) # Assign int values before symbolic evaluation for expr_id, expr_value in init_values: sb.symbols[expr_id] = expr_value # Execute the IR ab = AssignBlock(iir) sb.eval_updt_assignblk(ab) # Check if expected expr_id were modified matched_results = 0 for expr_id, expr_value in results: result = sb.eval_expr(expr_id) if isinstance(result, ExprLoc): addr = loc_db.get_location_offset(result.loc_key) if expr_value.arg == addr: matched_results += 1 continue elif result == expr_value: matched_results += 1 continue # Ensure that all expected results were verified if len(results) is not matched_results: print("Expected:", results) print("Modified:", [r for r in sb.modified(mems=False)]) assert(False)
def emul(self, lifter, ctx=None, step=False): # Init ctx_init = {} if ctx is not None: ctx_init.update(ctx) solver = z3.Solver() symb_exec = SymbolicExecutionEngine(lifter, ctx_init) history = self.history[::-1] history_size = len(history) translator = Translator.to_language("z3") size = self._ircfg.IRDst.size for hist_nb, loc_key in enumerate(history, 1): if hist_nb == history_size and loc_key == self.initial_state.loc_key: line_nb = self.initial_state.line_nb else: line_nb = None irb = self.irblock_slice(self._ircfg.blocks[loc_key], line_nb) # Emul the block and get back destination dst = symb_exec.eval_updt_irblock(irb, step=step) # Add constraint if hist_nb < history_size: next_loc_key = history[hist_nb] expected = symb_exec.eval_expr(ExprLoc(next_loc_key, size)) solver.add( self._gen_path_constraints(translator, dst, expected)) # Save the solver self._solver = solver # Return only inputs values (others could be wrongs) return { element: symb_exec.eval_expr(element) for element in self.inputs }
def emul_symb(ir_arch, ircfg, mdis, states_todo, states_done): while states_todo: addr, symbols, conds = states_todo.pop() print('*' * 40, "addr", addr, '*' * 40) if (addr, symbols, conds) in states_done: print('Known state, skipping', addr) continue states_done.add((addr, symbols, conds)) symbexec = SymbolicExecutionEngine(ir_arch) symbexec.symbols = symbols.copy() if ir_arch.pc in symbexec.symbols: del symbexec.symbols[ir_arch.pc] irblock = get_block(ir_arch, ircfg, mdis, addr) print('Run block:') print(irblock) addr = symbexec.eval_updt_irblock(irblock) print('Final state:') symbexec.dump(mems=False) assert addr is not None if isinstance(addr, ExprCond): # Create 2 states, each including complementary conditions cond_group_a = {addr.cond: ExprInt(0, addr.cond.size)} cond_group_b = {addr.cond: ExprInt(1, addr.cond.size)} addr_a = expr_simp(symbexec.eval_expr(addr.replace_expr(cond_group_a), {})) addr_b = expr_simp(symbexec.eval_expr(addr.replace_expr(cond_group_b), {})) if not (addr_a.is_int() or addr_a.is_loc() and addr_b.is_int() or addr_b.is_loc()): print(str(addr_a), str(addr_b)) raise ValueError("Unsupported condition") if isinstance(addr_a, ExprInt): addr_a = int(addr_a.arg) if isinstance(addr_b, ExprInt): addr_b = int(addr_b.arg) states_todo.add((addr_a, symbexec.symbols.copy(), tuple(list(conds) + list(viewitems(cond_group_a))))) states_todo.add((addr_b, symbexec.symbols.copy(), tuple(list(conds) + list(viewitems(cond_group_b))))) elif addr == ret_addr: print('Return address reached') continue elif addr.is_int(): addr = int(addr.arg) states_todo.add((addr, symbexec.symbols.copy(), tuple(conds))) elif addr.is_loc(): states_todo.add((addr, symbexec.symbols.copy(), tuple(conds))) else: raise ValueError("Unsupported destination")
def _normalize_ircfg(self, conn): # unalias stack miasm.re/blog/2017/02/03/data_flow_analysis_depgraph.html , but involve base pointer too # TODO remove manual *BP propagation in normalize_ircfg and use standrad Miasm propagation when it is fixed # remove composes from bigger to smaller, they are not important for us bp = {} prev_offset = None for irb_loc_key in self.ircfg.walk_breadth_first_forward(LocKey(0)): irs = [] if irb_loc_key not in self.ircfg.blocks: continue irb = self.ircfg.blocks[irb_loc_key] if irb.dst.is_cond() and irb.dst.cond.is_op() and irb.dst.cond.op == 'CC_EQ': # TODO propagate cmp ..., arb_int too # propagate known zeroes to process test eax, eax; jnz ...; lea edi, [eax+4] symb_exec = SymbolicExecutionEngine(self.ir_arch) dst = symb_exec.eval_updt_irblock(irb) if dst.is_cond() and dst.cond.is_id() and not is_bad_expr(dst.cond) and \ symb_exec.eval_expr(dst.cond) == dst.cond: # add explicit mov ID, 0 to given irb target_loc = dst.src2 if target_loc.is_int(): target_loc = self.asmcfg.loc_db.get_offset_location(int(target_loc)) elif target_loc.is_loc(): target_loc = target_loc.loc_key else: continue if len(self.ircfg.predecessors(target_loc)) > 1: continue target_irb = self.ircfg.blocks[target_loc] asign_blk = AssignBlock([ExprAssign(dst.cond, ExprInt(0, dst.cond.size))]) assignblks = tuple([asign_blk, *target_irb.assignblks]) new_irb = IRBlock(target_loc, assignblks) self.ircfg.blocks[target_loc] = new_irb fix_dct = {} for assignblk in irb: offset = prev_offset if assignblk.instr and assignblk.instr.offset: offset = assignblk.instr.offset prev_offset = offset spd = conn.modules.idc.get_spd(offset) if spd is not None: stk_high = ExprInt(spd, self.ir_arch.sp.size) fix_dct = {self.ir_arch.sp: self.mn.regs.regs_init[self.ir_arch.sp] + stk_high} fix_dct.update(bp) else: logger.warning("Couldn't acquire stack depth at 0x%x" % (offset or 0x0BADF00D)) new_assignblk = {} for dst, src in assignblk.items(): if src.is_compose(): slc_arg = None arg = None for tmp_arg in src.args: if not tmp_arg.is_slice(): arg = tmp_arg else: # we're interested only in bigger to smaller slc_arg = tmp_arg if slc_arg and arg and len(arg.get_r()) == 1: top_to_bottom_visitor = ExprVisitorCallbackTopToBottom( lambda x: self._resize_top_expr(x, src.size)) src = top_to_bottom_visitor.visit(arg) if dst == src: # special compiler anomalies such as lea esp, [esp+0] continue if src == self.ir_arch.sp: src = expr_simp(src.replace_expr(fix_dct)) if bp and src not in bp.values() and irb_loc_key != LocKey(0): raise RuntimeError("Ambiguous base pointer") bp.update({dst: src}) fix_dct.update(bp) else: src = expr_simp(src.replace_expr(fix_dct)) if dst != self.ir_arch.sp and dst not in bp.keys(): dst = dst.replace_expr(fix_dct) dst, src = expr_simp(dst), expr_simp(src) new_assignblk[dst] = src irs.append(AssignBlock(new_assignblk, instr=assignblk.instr)) self.ircfg.blocks[irb.loc_key] = IRBlock(irb.loc_key, irs)
ircfg = lifter.new_ircfg() first_block = list(asmcfg.blocks)[0] lifter.add_asmblock_to_ircfg(first_block, ircfg) # --- Symbolic execution --- # from miasm.ir.symbexec import SymbolicExecutionEngine from miasm.expression.expression import * symb = SymbolicExecutionEngine(lifter, machine.mn.regs.regs_init) # irDst contains the offset of next IR basic block to execute irDst = symb.run_at(ircfg, entry_addr, step=False) print("IR Dest = ", irDst) # Provide symbolic context to irDst expr_flag = ExprId("flag", 32) result = symb.eval_expr( expr_simp( irDst.replace_expr( { expr_simp( ExprMem(machine.mn.regs.EBP_init - ExprInt(0x4, 32), 32)): expr_flag, }))) print("IR Dest Semantics = ", result) # Dump the final state of symbolic execution # symb.dump()