def emul(self, lifter, ctx=None, step=False): """Symbolic execution of relevant nodes according to the history Return the values of inputs nodes' elements @lifter: Lifter instance @ctx: (optional) Initial context as dictionary @step: (optional) Verbose execution Warning: The emulation is not sound if the inputs nodes depend on loop variant. """ # Init ctx_init = {} if ctx is not None: ctx_init.update(ctx) assignblks = [] # Build a single assignment block according to history last_index = len(self.relevant_loc_keys) for index, loc_key in enumerate(reversed(self.relevant_loc_keys), 1): if index == last_index and loc_key == self.initial_state.loc_key: line_nb = self.initial_state.line_nb else: line_nb = None assignblks += self.irblock_slice(self._ircfg.blocks[loc_key], line_nb).assignblks # Eval the block loc_db = lifter.loc_db temp_loc = loc_db.get_or_create_name_location("Temp") symb_exec = SymbolicExecutionEngine(lifter, ctx_init) symb_exec.eval_updt_irblock(IRBlock(loc_db, temp_loc, assignblks), step=step) # Return only inputs values (others could be wrongs) return {element: symb_exec.symbols[element] for element in self.inputs}
def intra_block_flow_symb(ir_arch, _, flow_graph, irblock, in_nodes, out_nodes): symbols_init = ir_arch.arch.regs.regs_init.copy() sb = SymbolicExecutionEngine(ir_arch, symbols_init) sb.eval_updt_irblock(irblock) print('*' * 40) print(irblock) out = sb.modified(mems=False) current_nodes = {} # Gen mem arg to mem node links for dst, src in out: src = sb.eval_expr(dst) for n in [dst, src]: all_mems = set() all_mems.update(get_expr_mem(n)) for n in all_mems: node_n_w = get_node_name(irblock.loc_key, 0, n) if not n == src: continue o_r = n.ptr.get_r(mem_read=False, cst_read=True) for i, n_r in enumerate(o_r): if n_r in current_nodes: node_n_r = current_nodes[n_r] else: node_n_r = get_node_name(irblock.loc_key, i, n_r) if not n_r in in_nodes: in_nodes[n_r] = node_n_r flow_graph.add_uniq_edge(node_n_r, node_n_w) # Gen data flow links for dst in out: src = sb.eval_expr(dst) nodes_r = src.get_r(mem_read=False, cst_read=True) nodes_w = set([dst]) for n_r in nodes_r: if n_r in current_nodes: node_n_r = current_nodes[n_r] else: node_n_r = get_node_name(irblock.loc_key, 0, n_r) if not n_r in in_nodes: in_nodes[n_r] = node_n_r flow_graph.add_node(node_n_r) for n_w in nodes_w: node_n_w = get_node_name(irblock.loc_key, 1, n_w) out_nodes[n_w] = node_n_w flow_graph.add_node(node_n_w) flow_graph.add_uniq_edge(node_n_r, node_n_w)
def emul_symb(ir_arch, ircfg, mdis, states_todo, states_done): while states_todo: addr, symbols, conds = states_todo.pop() print('*' * 40, "addr", addr, '*' * 40) if (addr, symbols, conds) in states_done: print('Known state, skipping', addr) continue states_done.add((addr, symbols, conds)) symbexec = SymbolicExecutionEngine(ir_arch) symbexec.symbols = symbols.copy() if ir_arch.pc in symbexec.symbols: del symbexec.symbols[ir_arch.pc] irblock = get_block(ir_arch, ircfg, mdis, addr) print('Run block:') print(irblock) addr = symbexec.eval_updt_irblock(irblock) print('Final state:') symbexec.dump(mems=False) assert addr is not None if isinstance(addr, ExprCond): # Create 2 states, each including complementary conditions cond_group_a = {addr.cond: ExprInt(0, addr.cond.size)} cond_group_b = {addr.cond: ExprInt(1, addr.cond.size)} addr_a = expr_simp( symbexec.eval_expr(addr.replace_expr(cond_group_a), {})) addr_b = expr_simp( symbexec.eval_expr(addr.replace_expr(cond_group_b), {})) if not (addr_a.is_int() or addr_a.is_loc() and addr_b.is_int() or addr_b.is_loc()): print(str(addr_a), str(addr_b)) raise ValueError("Unsupported condition") if isinstance(addr_a, ExprInt): addr_a = int(addr_a.arg) if isinstance(addr_b, ExprInt): addr_b = int(addr_b.arg) states_todo.add( (addr_a, symbexec.symbols.copy(), tuple(list(conds) + list(viewitems(cond_group_a))))) states_todo.add( (addr_b, symbexec.symbols.copy(), tuple(list(conds) + list(viewitems(cond_group_b))))) elif addr == ret_addr: print('Return address reached') continue elif addr.is_int(): addr = int(addr.arg) states_todo.add((addr, symbexec.symbols.copy(), tuple(conds))) elif addr.is_loc(): states_todo.add((addr, symbexec.symbols.copy(), tuple(conds))) else: raise ValueError("Unsupported destination")
def emul_symb(ir_arch, ircfg, mdis, states_todo, states_done): while states_todo: addr, symbols, conds = states_todo.pop() print('*' * 40, "addr", addr, '*' * 40) if (addr, symbols, conds) in states_done: print('Known state, skipping', addr) continue states_done.add((addr, symbols, conds)) symbexec = SymbolicExecutionEngine(ir_arch) symbexec.symbols = symbols.copy() if ir_arch.pc in symbexec.symbols: del symbexec.symbols[ir_arch.pc] irblock = get_block(ir_arch, ircfg, mdis, addr) print('Run block:') print(irblock) addr = symbexec.eval_updt_irblock(irblock) print('Final state:') symbexec.dump(mems=False) assert addr is not None if isinstance(addr, ExprCond): # Create 2 states, each including complementary conditions cond_group_a = {addr.cond: ExprInt(0, addr.cond.size)} cond_group_b = {addr.cond: ExprInt(1, addr.cond.size)} addr_a = expr_simp(symbexec.eval_expr(addr.replace_expr(cond_group_a), {})) addr_b = expr_simp(symbexec.eval_expr(addr.replace_expr(cond_group_b), {})) if not (addr_a.is_int() or addr_a.is_loc() and addr_b.is_int() or addr_b.is_loc()): print(str(addr_a), str(addr_b)) raise ValueError("Unsupported condition") if isinstance(addr_a, ExprInt): addr_a = int(addr_a.arg) if isinstance(addr_b, ExprInt): addr_b = int(addr_b.arg) states_todo.add((addr_a, symbexec.symbols.copy(), tuple(list(conds) + list(viewitems(cond_group_a))))) states_todo.add((addr_b, symbexec.symbols.copy(), tuple(list(conds) + list(viewitems(cond_group_b))))) elif addr == ret_addr: print('Return address reached') continue elif addr.is_int(): addr = int(addr.arg) states_todo.add((addr, symbexec.symbols.copy(), tuple(conds))) elif addr.is_loc(): states_todo.add((addr, symbexec.symbols.copy(), tuple(conds))) else: raise ValueError("Unsupported destination")
def emul(self, lifter, ctx=None, step=False): # Init ctx_init = {} if ctx is not None: ctx_init.update(ctx) solver = z3.Solver() symb_exec = SymbolicExecutionEngine(lifter, ctx_init) history = self.history[::-1] history_size = len(history) translator = Translator.to_language("z3") size = self._ircfg.IRDst.size for hist_nb, loc_key in enumerate(history, 1): if hist_nb == history_size and loc_key == self.initial_state.loc_key: line_nb = self.initial_state.line_nb else: line_nb = None irb = self.irblock_slice(self._ircfg.blocks[loc_key], line_nb) # Emul the block and get back destination dst = symb_exec.eval_updt_irblock(irb, step=step) # Add constraint if hist_nb < history_size: next_loc_key = history[hist_nb] expected = symb_exec.eval_expr(ExprLoc(next_loc_key, size)) solver.add( self._gen_path_constraints(translator, dst, expected)) # Save the solver self._solver = solver # Return only inputs values (others could be wrongs) return { element: symb_exec.eval_expr(element) for element in self.inputs }
argv_loc: ExprId("argv", argv_loc.size), ret_addr_loc: ret_addr, } block = asmcfg.loc_key_to_block(init_lbl) for instr in block.lines: for i, arg in enumerate(instr.args): instr.args[i]= arg.replace_expr(fix_args) print(block) # add fake address and len to parsed instructions lifter.add_asmblock_to_ircfg(block, ircfg) irb = ircfg.blocks[init_lbl] symbexec.eval_updt_irblock(irb) symbexec.dump(ids=False) # reset lifter blocks lifter.blocks = {} states_todo = set() states_done = set() states_todo.add((addr, symbexec.symbols, ())) # emul blocks, propagate states emul_symb(lifter, ircfg, mdis, states_todo, states_done) all_info = [] print('*' * 40, 'conditions to match', '*' * 40) for addr, symbols, conds in sorted(states_done, key=str):
argv_loc: ExprId("argv", argv_loc.size), ret_addr_loc: ret_addr, } block = asmcfg.loc_key_to_block(init_lbl) for instr in block.lines: for i, arg in enumerate(instr.args): instr.args[i]= arg.replace_expr(fix_args) print(block) # add fake address and len to parsed instructions ir_arch.add_asmblock_to_ircfg(block, ircfg) irb = ircfg.blocks[init_lbl] symbexec.eval_updt_irblock(irb) symbexec.dump(ids=False) # reset ir_arch blocks ir_arch.blocks = {} states_todo = set() states_done = set() states_todo.add((addr, symbexec.symbols, ())) # emul blocks, propagate states emul_symb(ir_arch, ircfg, mdis, states_todo, states_done) all_info = [] print('*' * 40, 'conditions to match', '*' * 40) for addr, symbols, conds in sorted(states_done, key=str):
def _normalize_ircfg(self, conn): # unalias stack miasm.re/blog/2017/02/03/data_flow_analysis_depgraph.html , but involve base pointer too # TODO remove manual *BP propagation in normalize_ircfg and use standrad Miasm propagation when it is fixed # remove composes from bigger to smaller, they are not important for us bp = {} prev_offset = None for irb_loc_key in self.ircfg.walk_breadth_first_forward(LocKey(0)): irs = [] if irb_loc_key not in self.ircfg.blocks: continue irb = self.ircfg.blocks[irb_loc_key] if irb.dst.is_cond() and irb.dst.cond.is_op() and irb.dst.cond.op == 'CC_EQ': # TODO propagate cmp ..., arb_int too # propagate known zeroes to process test eax, eax; jnz ...; lea edi, [eax+4] symb_exec = SymbolicExecutionEngine(self.ir_arch) dst = symb_exec.eval_updt_irblock(irb) if dst.is_cond() and dst.cond.is_id() and not is_bad_expr(dst.cond) and \ symb_exec.eval_expr(dst.cond) == dst.cond: # add explicit mov ID, 0 to given irb target_loc = dst.src2 if target_loc.is_int(): target_loc = self.asmcfg.loc_db.get_offset_location(int(target_loc)) elif target_loc.is_loc(): target_loc = target_loc.loc_key else: continue if len(self.ircfg.predecessors(target_loc)) > 1: continue target_irb = self.ircfg.blocks[target_loc] asign_blk = AssignBlock([ExprAssign(dst.cond, ExprInt(0, dst.cond.size))]) assignblks = tuple([asign_blk, *target_irb.assignblks]) new_irb = IRBlock(target_loc, assignblks) self.ircfg.blocks[target_loc] = new_irb fix_dct = {} for assignblk in irb: offset = prev_offset if assignblk.instr and assignblk.instr.offset: offset = assignblk.instr.offset prev_offset = offset spd = conn.modules.idc.get_spd(offset) if spd is not None: stk_high = ExprInt(spd, self.ir_arch.sp.size) fix_dct = {self.ir_arch.sp: self.mn.regs.regs_init[self.ir_arch.sp] + stk_high} fix_dct.update(bp) else: logger.warning("Couldn't acquire stack depth at 0x%x" % (offset or 0x0BADF00D)) new_assignblk = {} for dst, src in assignblk.items(): if src.is_compose(): slc_arg = None arg = None for tmp_arg in src.args: if not tmp_arg.is_slice(): arg = tmp_arg else: # we're interested only in bigger to smaller slc_arg = tmp_arg if slc_arg and arg and len(arg.get_r()) == 1: top_to_bottom_visitor = ExprVisitorCallbackTopToBottom( lambda x: self._resize_top_expr(x, src.size)) src = top_to_bottom_visitor.visit(arg) if dst == src: # special compiler anomalies such as lea esp, [esp+0] continue if src == self.ir_arch.sp: src = expr_simp(src.replace_expr(fix_dct)) if bp and src not in bp.values() and irb_loc_key != LocKey(0): raise RuntimeError("Ambiguous base pointer") bp.update({dst: src}) fix_dct.update(bp) else: src = expr_simp(src.replace_expr(fix_dct)) if dst != self.ir_arch.sp and dst not in bp.keys(): dst = dst.replace_expr(fix_dct) dst, src = expr_simp(dst), expr_simp(src) new_assignblk[dst] = src irs.append(AssignBlock(new_assignblk, instr=assignblk.instr)) self.ircfg.blocks[irb.loc_key] = IRBlock(irb.loc_key, irs)