def emul(self, lifter, ctx=None, step=False): """Symbolic execution of relevant nodes according to the history Return the values of inputs nodes' elements @lifter: Lifter instance @ctx: (optional) Initial context as dictionary @step: (optional) Verbose execution Warning: The emulation is not sound if the inputs nodes depend on loop variant. """ # Init ctx_init = {} if ctx is not None: ctx_init.update(ctx) assignblks = [] # Build a single assignment block according to history last_index = len(self.relevant_loc_keys) for index, loc_key in enumerate(reversed(self.relevant_loc_keys), 1): if index == last_index and loc_key == self.initial_state.loc_key: line_nb = self.initial_state.line_nb else: line_nb = None assignblks += self.irblock_slice(self._ircfg.blocks[loc_key], line_nb).assignblks # Eval the block loc_db = lifter.loc_db temp_loc = loc_db.get_or_create_name_location("Temp") symb_exec = SymbolicExecutionEngine(lifter, ctx_init) symb_exec.eval_updt_irblock(IRBlock(loc_db, temp_loc, assignblks), step=step) # Return only inputs values (others could be wrongs) return {element: symb_exec.symbols[element] for element in self.inputs}
def compute(asm, inputstate={}, debug=False): loc_db = LocationDB() sympool = dict(regs_init) sympool.update({k: ExprInt(v, k.size) for k, v in viewitems(inputstate)}) ir_tmp = ir_arch(loc_db) ircfg = ir_tmp.new_ircfg() symexec = SymbolicExecutionEngine(ir_tmp, sympool) instr = mn.fromstring(asm, loc_db, "b") code = mn.asm(instr)[0] instr = mn.dis(code, "b") instr.offset = inputstate.get(PC, 0) lbl = ir_tmp.add_instr_to_ircfg(instr, ircfg) symexec.run_at(ircfg, lbl) if debug: for k, v in viewitems(symexec.symbols): if regs_init.get(k, None) != v: print(k, v) out = {} for k, v in viewitems(symexec.symbols): if k in EXCLUDE_REGS: continue elif regs_init.get(k, None) == v: continue elif isinstance(v, ExprInt): out[k] = int(v) else: out[k] = v return out
def ExecuteSymbolicSingleStep(addr, state=INIT_REG): size = idc.ItemSize(addr) code = idc.GetManyBytes(addr, size) loc_db = LocationDB() base = addr try: ins = mn_x86.dis(bin_stream_str(code, base_address=base), 64, base) except: return state.copy() ira = machine.ira(loc_db) ircfg = ira.new_ircfg() try: ira.add_instr_to_ircfg(ins, ircfg) sb = SymbolicExecutionEngine(ira, state) symbolic_pc = sb.run_at(ircfg, base) except: return state.copy() ret = state.copy() for key, value in sb.modified(): if isinstance(value, ExprOp) and value.op == "call_func_ret": value = ExprInt(0, 64) ret[key] = value return ret
def compute(asm, inputstate={}, debug=False): loc_db = LocationDB() sympool = dict(regs_init) sympool.update({k: ExprInt(v, k.size) for k, v in viewitems(inputstate)}) ir_tmp = ir_arch(loc_db) ircfg = ir_tmp.new_ircfg() symexec = SymbolicExecutionEngine(ir_tmp, sympool) instr = mn.fromstring(asm, loc_db, "l") code = mn.asm(instr)[0] instr = mn.dis(code, "l") instr.offset = inputstate.get(PC, 0) lbl = ir_tmp.add_instr_to_ircfg(instr, ircfg) symexec.run_at(ircfg, lbl) if debug: for k, v in viewitems(symexec.symbols): if regs_init.get(k, None) != v: print(k, v) out = {} for k, v in viewitems(symexec.symbols): if k in EXCLUDE_REGS: continue elif regs_init.get(k, None) == v: continue elif isinstance(v, ExprInt): out[k] = int(v) else: out[k] = v return out
def sym(data, addr, status): cont = Container.from_string(data, loc_db=loc_db, addr=addr) mdis = machine.dis_engine(cont.bin_stream, loc_db=loc_db) asm_block = mdis.dis_block(addr) # Translate ASM -> IR ircfg = ira.new_ircfg() try: ira.add_asmblock_to_ircfg(asm_block, ircfg) except NotImplementedError: return None # Instantiate a Symbolic Execution engine with default value for registers regs_init = regs.regs_init sympool = copy.deepcopy(regs_init) sympool.update(status) symb = SymbolicExecutionEngine(ira, sympool) # Emulate one IR basic block ## Emulation of several basic blocks can be done through .emul_ir_blocks cur_addr = symb.run_at(ircfg, addr) IRDst = symb.symbols[ira.IRDst] expr = expr_simp_explicit(IRDst) #if isinstance(expr, ExprMem): # expr = expr.ptr return expr
def _deobfuscate_cff_loops(self, source_block, symbols): """ :param symbols: initial symbols of symbolic execution engine to be created :param source_block: head of the graph to be deobfuscated :return: """ symb_exec = SymbolicExecutionEngine(self.ir_arch) flat_block = self.flat_loops.get_block(source_block.loc_key, symb_exec, None) # maps flattening blocks to their respective loc_keys new_head = LocKey(0) flat_block_to_loc_key = {flat_block: new_head} todo = [FlattenState(flat_block, symbols)] counter = {} while todo: state = todo.pop() block_loc_key = state.flat_block.block_loc_key self.relevant_nodes.add(block_loc_key) counter[block_loc_key] = counter.get(block_loc_key, 0) + 1 logger.debug("Processing block at 0x%x as %s; in all affected: %d; loops_id: %s; the jtc_vars are:" % (self.asmcfg.loc_db.get_location_offset(block_loc_key) or 0xBAD, str(block_loc_key), block_loc_key in self.all_affected_lines, self.flat_loops[block_loc_key].loc_key)) if counter[block_loc_key] > 500: raise Exception("Couldn't deobfuscate cff loop, either fell into an infinite loop or processing very " "big function") symb_exec.set_state(state.symbols) # evaluate all affected lines self._eval_updt_lines(symb_exec, block_loc_key) for flat_block in self._insert_flat_block(state.flat_block, symb_exec, flat_block_to_loc_key): todo.append(FlattenState(flat_block, symb_exec.get_state())) return new_head
def symbolicExecution(self, ira, ircfg, address, state): symbolicEngine = SymbolicExecutionEngine(ira, state) nextTarget = symbolicEngine.run_block_at(ircfg, address) while not isinstance(nextTarget, ExprCond) and not isinstance( nextTarget, ExprMem): nextTarget = symbolicEngine.run_block_at(ircfg, nextTarget, step=False) return nextTarget, symbolicEngine.symbols
def exec_instruction(hex_asm, init_values): """Symbolically execute an instruction""" print("Hex:", hex_asm) # Disassemble an instruction mn = mn_mep.dis(decode_hex(hex_asm), "b") print("Dis:", mn) loc_db = LocationDB() # Get the IR im = ir_mepb(loc_db) iir, eiir, = im.get_ir(mn) print("\nInternal representation:", iir) # Symbolic execution sb = SymbolicExecutionEngine(ir_a_mepb(loc_db), regs_init) # Assign register values before symbolic evaluation for reg_expr_id, reg_expr_value in init_values: sb.symbols[reg_expr_id] = reg_expr_value print("\nModified registers:", [reg for reg in sb.modified(mems=False)]) print("Modified memories:", [mem for mem in sb.modified()]) print("\nFinal registers:") sb.dump(mems=False) print("\nFinal mems:") sb.dump()
def symb_exec(lbl, ir_arch, ircfg, inputstate, debug): sympool = dict(regs_init) sympool.update(inputstate) symexec = SymbolicExecutionEngine(ir_arch, sympool) symexec.run_at(ircfg, lbl) if debug: for k, v in viewitems(symexec.symbols): if regs_init.get(k, None) != v: print(k, v) return { k: v for k, v in viewitems(symexec.symbols) if k not in EXCLUDE_REGS and regs_init.get(k, None) != v }
def symb_exec(lbl, lifter, ircfg, inputstate, debug): sympool = dict(regs_init) sympool.update(inputstate) symexec = SymbolicExecutionEngine(lifter, sympool) symexec.run_at(ircfg, lbl) if debug: for k, v in viewitems(symexec.symbols): if regs_init.get(k, None) != v: print(k, v) return { k: v for k, v in viewitems(symexec.symbols) if k not in EXCLUDE_REGS and regs_init.get(k, None) != v }
def get_assignblock_for_state(ircfg, ir_arch, symbols_init, state_register, state): referenced_blocks = [] for cfgnode in ircfg.nodes(): irblock = ircfg.get_block(cfgnode) if not irblock: print('[!] Could not get IRBLOCK!') sys.exit() if len(irblock.assignblks) == 1: _next_addr = irblock.dst else: _symbolic_engine = SymbolicExecutionEngine(ir_arch, symbols_init) _next_addr = _symbolic_engine.run_block_at( ircfg, get_address(ircfg.loc_db, cfgnode)) if _next_addr == None: continue _next_addr = expr_simp(_next_addr) if isinstance(_next_addr, ExprCond) and \ isinstance(_next_addr.cond, ExprOp) and \ _next_addr.cond.op == '==': args = _next_addr.cond while not isinstance(args.args[0], ExprId): if hasattr(args, 'args'): args = args.args[0] if not isinstance(args, ExprOp): break if hasattr(args, 'args') and \ args.args[0] in (state_register, symbols_init[state_register]) and \ args.args[1] == state: block = ircfg.get_block(cfgnode) if hasattr(block.dst.cond, 'op') and block.dst.cond.op in ('CC_S>'): dst = get_address(ircfg.loc_db, block.dst.src2.loc_key) next_block = ircfg.get_block(dst) dst = get_address(ircfg.loc_db, next_block.dst.src1.loc_key) else: dst = get_address(ircfg.loc_db, block.dst.src1.loc_key) referenced_block = ircfg.get_block(dst) referenced_blocks.append(referenced_block) return referenced_blocks
def exec_instruction(mn_str, init_values, results, index=0, offset=0): """Symbolically execute an instruction and check the expected results.""" # Assemble and disassemble the instruction instr = mn_mep.fromstring(mn_str, "b") instr.mode = "b" mn_bin = mn_mep.asm(instr)[index] try: instr = mn_mep.dis(mn_bin, "b") except Disasm_Exception: assert (False) # miasm don't know what to do # Specify the instruction offset and compute the destination label instr.offset = offset loc_db = LocationDB() if instr.dstflow(): instr.dstflow2label(loc_db) # Get the IR im = Lifter_MEPb(loc_db) iir, eiir = im.get_ir(instr) # Filter out IRDst iir = [ ir for ir in iir if not (isinstance(ir, ExprAssign) and isinstance(ir.dst, ExprId) and ir.dst.name == "IRDst") ] # Prepare symbolic execution sb = SymbolicExecutionEngine(LifterModelCallMepb(loc_db), regs_init) # Assign int values before symbolic evaluation for expr_id, expr_value in init_values: sb.symbols[expr_id] = expr_value # Execute the IR ab = AssignBlock(iir) sb.eval_updt_assignblk(ab) # Check if expected expr_id were modified matched_results = 0 for expr_id, expr_value in results: result = sb.eval_expr(expr_id) if isinstance(result, ExprLoc): addr = loc_db.get_location_offset(result.loc_key) if expr_value.arg == addr: matched_results += 1 continue elif result == expr_value: matched_results += 1 continue # Ensure that all expected results were verified if len(results) is not matched_results: print("Expected:", results) print("Modified:", [r for r in sb.modified(mems=False)]) assert (False)
def emul_symb(ir_arch, ircfg, mdis, states_todo, states_done): while states_todo: addr, symbols, conds = states_todo.pop() print('*' * 40, "addr", addr, '*' * 40) if (addr, symbols, conds) in states_done: print('Known state, skipping', addr) continue states_done.add((addr, symbols, conds)) symbexec = SymbolicExecutionEngine(ir_arch) symbexec.symbols = symbols.copy() if ir_arch.pc in symbexec.symbols: del symbexec.symbols[ir_arch.pc] irblock = get_block(ir_arch, ircfg, mdis, addr) print('Run block:') print(irblock) addr = symbexec.eval_updt_irblock(irblock) print('Final state:') symbexec.dump(mems=False) assert addr is not None if isinstance(addr, ExprCond): # Create 2 states, each including complementary conditions cond_group_a = {addr.cond: ExprInt(0, addr.cond.size)} cond_group_b = {addr.cond: ExprInt(1, addr.cond.size)} addr_a = expr_simp( symbexec.eval_expr(addr.replace_expr(cond_group_a), {})) addr_b = expr_simp( symbexec.eval_expr(addr.replace_expr(cond_group_b), {})) if not (addr_a.is_int() or addr_a.is_loc() and addr_b.is_int() or addr_b.is_loc()): print(str(addr_a), str(addr_b)) raise ValueError("Unsupported condition") if isinstance(addr_a, ExprInt): addr_a = int(addr_a.arg) if isinstance(addr_b, ExprInt): addr_b = int(addr_b.arg) states_todo.add( (addr_a, symbexec.symbols.copy(), tuple(list(conds) + list(viewitems(cond_group_a))))) states_todo.add( (addr_b, symbexec.symbols.copy(), tuple(list(conds) + list(viewitems(cond_group_b))))) elif addr == ret_addr: print('Return address reached') continue elif addr.is_int(): addr = int(addr.arg) states_todo.add((addr, symbexec.symbols.copy(), tuple(conds))) elif addr.is_loc(): states_todo.add((addr, symbexec.symbols.copy(), tuple(conds))) else: raise ValueError("Unsupported destination")
def main(file_path: Path, start_addr: int, oracle_path: Path) -> None: # symbol table loc_db = LocationDB() # open the binary for analysis container = Container.from_stream(open(file_path, 'rb'), loc_db) # cpu abstraction machine = Machine(container.arch) # init disassemble engine mdis = machine.dis_engine(container.bin_stream, loc_db=loc_db) # initialize intermediate representation lifter = machine.lifter_model_call(mdis.loc_db) # disassemble the function at address asm_block = mdis.dis_block(start_addr) # lift to Miasm IR ira_cfg = lifter.new_ircfg() lifter.add_asmblock_to_ircfg(asm_block, ira_cfg) # init symbolic execution engine sb = SymbolicExecutionEngine(lifter) # symbolically execute basic block sb.run_block_at(ira_cfg, start_addr) # initialize simplifier simplifier = Simplifier(oracle_path) for k, v in sb.modified(): if v.is_int() or v.is_id() or v.is_loc(): continue print(f"before: {v}") simplified = simplifier.simplify(v) print(f"simplified: {simplified}") print("\n\n")
def compute(asm, inputstate={}, debug=False): loc_db = LocationDB() sympool = dict(regs_init) sympool.update({k: ExprInt(v, k.size) for k, v in viewitems(inputstate)}) ir_tmp = ir_arch(loc_db) ircfg = ir_tmp.new_ircfg() symexec = SymbolicExecutionEngine(ir_tmp, sympool) instr = mn.fromstring(asm, mode) code = mn.asm(instr)[0] instr = mn.dis(code, mode) instr.offset = inputstate.get(PC, 0) loc_key = ir_tmp.add_instr_to_ircfg(instr, ircfg) symexec.run_at(ircfg, loc_key) if debug: for k, v in viewitems(symexec.symbols): if regs_init.get(k, None) != v: print(k, v) return { k: v.arg.arg for k, v in viewitems(symexec.symbols) if k not in EXCLUDE_REGS and regs_init.get(k, None) != v }
def symbolic_exec(): from miasm.ir.symbexec import SymbolicExecutionEngine from miasm.core.bin_stream_ida import bin_stream_ida from utils import guess_machine start, end = idc.read_selection_start(), idc.read_selection_end() loc_db = LocationDB() bs = bin_stream_ida() machine = guess_machine(addr=start) mdis = machine.dis_engine(bs, loc_db=loc_db) if start == idc.BADADDR and end == idc.BADADDR: start = idc.get_screen_ea() end = idc.next_head(start) # Get next instruction address mdis.dont_dis = [end] asmcfg = mdis.dis_multiblock(start) ira = machine.ira(loc_db=loc_db) ircfg = ira.new_ircfg_from_asmcfg(asmcfg) print("Run symbolic execution...") sb = SymbolicExecutionEngine(ira, machine.mn.regs.regs_init) sb.run_at(ircfg, start) modified = {} for dst, src in sb.modified(init_state=machine.mn.regs.regs_init): modified[dst] = src view = symbolicexec_t() all_views.append(view) if not view.Create( modified, machine, loc_db, "Symbolic Execution - 0x%x to 0x%x" % (start, idc.prev_head(end))): return view.Show()
def symbolic_exec(): from miasm.ir.symbexec import SymbolicExecutionEngine from miasm.core.bin_stream_ida import bin_stream_ida from utils import guess_machine start, end = idc.SelStart(), idc.SelEnd() bs = bin_stream_ida() machine = guess_machine(addr=start) mdis = machine.dis_engine(bs) if start == idc.BADADDR and end == idc.BADADDR: start = idc.ScreenEA() end = idc.next_head(start) # Get next instruction address mdis.dont_dis = [end] asmcfg = mdis.dis_multiblock(start) ira = machine.ira(loc_db=mdis.loc_db) ircfg = ira.new_ircfg_from_asmcfg(asmcfg) print("Run symbolic execution...") sb = SymbolicExecutionEngine(ira, machine.mn.regs.regs_init) sb.run_at(ircfg, start) modified = {} for dst, src in sb.modified(init_state=machine.mn.regs.regs_init): modified[dst] = src view = symbolicexec_t() all_views.append(view) if not view.Create(modified, machine, mdis.loc_db, "Symbolic Execution - 0x%x to 0x%x" % (start, idc.prev_head(end))): return view.Show()
def exec_instruction(mn_str, init_values, results, index=0, offset=0): """Symbolically execute an instruction and check the expected results.""" # Assemble and disassemble the instruction instr = mn_mep.fromstring(mn_str, "b") instr.mode = "b" mn_bin = mn_mep.asm(instr)[index] try: instr = mn_mep.dis(mn_bin, "b") except Disasm_Exception: assert(False) # miasm don't know what to do # Specify the instruction offset and compute the destination label instr.offset = offset loc_db = LocationDB() if instr.dstflow(): instr.dstflow2label(loc_db) # Get the IR im = ir_mepb(loc_db) iir, eiir = im.get_ir(instr) # Filter out IRDst iir = [ir for ir in iir if not (isinstance(ir, ExprAssign) and isinstance(ir.dst, ExprId) and ir.dst.name == "IRDst")] # Prepare symbolic execution sb = SymbolicExecutionEngine(ir_a_mepb(loc_db), regs_init) # Assign int values before symbolic evaluation for expr_id, expr_value in init_values: sb.symbols[expr_id] = expr_value # Execute the IR ab = AssignBlock(iir) sb.eval_updt_assignblk(ab) # Check if expected expr_id were modified matched_results = 0 for expr_id, expr_value in results: result = sb.eval_expr(expr_id) if isinstance(result, ExprLoc): addr = loc_db.get_location_offset(result.loc_key) if expr_value.arg == addr: matched_results += 1 continue elif result == expr_value: matched_results += 1 continue # Ensure that all expected results were verified if len(results) is not matched_results: print("Expected:", results) print("Modified:", [r for r in sb.modified(mems=False)]) assert(False)
def intra_block_flow_symb(ir_arch, _, flow_graph, irblock, in_nodes, out_nodes): symbols_init = ir_arch.arch.regs.regs_init.copy() sb = SymbolicExecutionEngine(ir_arch, symbols_init) sb.eval_updt_irblock(irblock) print('*' * 40) print(irblock) out = sb.modified(mems=False) current_nodes = {} # Gen mem arg to mem node links for dst, src in out: src = sb.eval_expr(dst) for n in [dst, src]: all_mems = set() all_mems.update(get_expr_mem(n)) for n in all_mems: node_n_w = get_node_name(irblock.loc_key, 0, n) if not n == src: continue o_r = n.ptr.get_r(mem_read=False, cst_read=True) for i, n_r in enumerate(o_r): if n_r in current_nodes: node_n_r = current_nodes[n_r] else: node_n_r = get_node_name(irblock.loc_key, i, n_r) if not n_r in in_nodes: in_nodes[n_r] = node_n_r flow_graph.add_uniq_edge(node_n_r, node_n_w) # Gen data flow links for dst in out: src = sb.eval_expr(dst) nodes_r = src.get_r(mem_read=False, cst_read=True) nodes_w = set([dst]) for n_r in nodes_r: if n_r in current_nodes: node_n_r = current_nodes[n_r] else: node_n_r = get_node_name(irblock.loc_key, 0, n_r) if not n_r in in_nodes: in_nodes[n_r] = node_n_r flow_graph.add_node(node_n_r) for n_w in nodes_w: node_n_w = get_node_name(irblock.loc_key, 1, n_w) out_nodes[n_w] = node_n_w flow_graph.add_node(node_n_w) flow_graph.add_uniq_edge(node_n_r, node_n_w)
def emul(self, lifter, ctx=None, step=False): # Init ctx_init = {} if ctx is not None: ctx_init.update(ctx) solver = z3.Solver() symb_exec = SymbolicExecutionEngine(lifter, ctx_init) history = self.history[::-1] history_size = len(history) translator = Translator.to_language("z3") size = self._ircfg.IRDst.size for hist_nb, loc_key in enumerate(history, 1): if hist_nb == history_size and loc_key == self.initial_state.loc_key: line_nb = self.initial_state.line_nb else: line_nb = None irb = self.irblock_slice(self._ircfg.blocks[loc_key], line_nb) # Emul the block and get back destination dst = symb_exec.eval_updt_irblock(irb, step=step) # Add constraint if hist_nb < history_size: next_loc_key = history[hist_nb] expected = symb_exec.eval_expr(ExprLoc(next_loc_key, size)) solver.add( self._gen_path_constraints(translator, dst, expected)) # Save the solver self._solver = solver # Return only inputs values (others could be wrongs) return { element: symb_exec.eval_expr(element) for element in self.inputs }
def do_step(self): if len(self.todo) == 0: return None if self.total_done > 600: print("symbexec watchdog!") return None self.total_done += 1 print('CPT', self.total_done) while self.todo: state = self.get_next_state() parent, ad, s = state self.states_done.add(state) self.states_var_done.add(state) sb = SymbolicExecutionEngine(self.ir_arch, dict(s)) return parent, ad, sb return None
def _recognize(self, max_loop_num): symb_engine = SymbolicExecutionEngine(self.ir_arch, regs.regs_init) todo = [(LocKey(0), symb_engine.get_state())] done_loc = set() if not max_loop_num: max_loop_num = float('inf') found_loops_num = 0 while todo: loc_key, symb_state = todo.pop() if loc_key in done_loc or loc_key not in self.ircfg.blocks: continue done_loc.add(loc_key) ir_block = self.ircfg.blocks[loc_key] symb_engine.set_state(symb_state) for ind, assignblk in enumerate(ir_block.assignblks): for dst, src in assignblk.items(): if max_loop_num < found_loops_num: return if src.is_int() and int(src) in self.func_addresses: assignblk_node = AssignblkNode(ir_block.loc_key, ind, dst) # no uses if assignblk_node not in self.analyses.defuse_edges or not \ self.analyses.defuse_edges[assignblk_node]: # possible virtual table initialization self.possible_merge_funcs.add( (int(src), frozenset(), loc_key)) elif src.is_op("call_func_stack"): self._process_call(src, dst, symb_engine, assignblk, loc_key) elif (expr_simp(src).is_int() and not is_bad_expr(dst)) \ or (ir_block.loc_key == LocKey(0) and dst == src and (not self._merging_var_candidates or dst in self._merging_var_candidates)): if self._process_assignment(ir_block, ind, dst): self._merging_var_candidates = None found_loops_num += 1 symb_engine.eval_updt_assignblk(assignblk) for succ in self.ircfg.successors(loc_key): todo.append((succ, symb_engine.get_state()))
def emul_symb(ir_arch, ircfg, mdis, states_todo, states_done): while states_todo: addr, symbols, conds = states_todo.pop() print('*' * 40, "addr", addr, '*' * 40) if (addr, symbols, conds) in states_done: print('Known state, skipping', addr) continue states_done.add((addr, symbols, conds)) symbexec = SymbolicExecutionEngine(ir_arch) symbexec.symbols = symbols.copy() if ir_arch.pc in symbexec.symbols: del symbexec.symbols[ir_arch.pc] irblock = get_block(ir_arch, ircfg, mdis, addr) print('Run block:') print(irblock) addr = symbexec.eval_updt_irblock(irblock) print('Final state:') symbexec.dump(mems=False) assert addr is not None if isinstance(addr, ExprCond): # Create 2 states, each including complementary conditions cond_group_a = {addr.cond: ExprInt(0, addr.cond.size)} cond_group_b = {addr.cond: ExprInt(1, addr.cond.size)} addr_a = expr_simp(symbexec.eval_expr(addr.replace_expr(cond_group_a), {})) addr_b = expr_simp(symbexec.eval_expr(addr.replace_expr(cond_group_b), {})) if not (addr_a.is_int() or addr_a.is_loc() and addr_b.is_int() or addr_b.is_loc()): print(str(addr_a), str(addr_b)) raise ValueError("Unsupported condition") if isinstance(addr_a, ExprInt): addr_a = int(addr_a.arg) if isinstance(addr_b, ExprInt): addr_b = int(addr_b.arg) states_todo.add((addr_a, symbexec.symbols.copy(), tuple(list(conds) + list(viewitems(cond_group_a))))) states_todo.add((addr_b, symbexec.symbols.copy(), tuple(list(conds) + list(viewitems(cond_group_b))))) elif addr == ret_addr: print('Return address reached') continue elif addr.is_int(): addr = int(addr.arg) states_todo.add((addr, symbexec.symbols.copy(), tuple(conds))) elif addr.is_loc(): states_todo.add((addr, symbexec.symbols.copy(), tuple(conds))) else: raise ValueError("Unsupported destination")
line = machine.mn.fromstring("MOV EAX, EBX", loc_db, 32) asm = machine.mn.asm(line)[0] # Get back block cont = Container.from_string(asm, loc_db=loc_db) mdis = machine.dis_engine(cont.bin_stream, loc_db=loc_db) mdis.lines_wd = 1 asm_block = mdis.dis_block(START_ADDR) # Translate ASM -> IR ira = machine.ira(mdis.loc_db) ircfg = ira.new_ircfg() ira.add_asmblock_to_ircfg(asm_block, ircfg) # Instantiate a Symbolic Execution engine with default value for registers symb = SymbolicExecutionEngine(ira) # Emulate one IR basic block ## Emulation of several basic blocks can be done through .emul_ir_blocks cur_addr = symb.run_at(ircfg, START_ADDR) # Modified elements print('Modified registers:') symb.dump(mems=False) print('Modified memory (should be empty):') symb.dump(ids=False) # Check final status eax, ebx = ira.arch.regs.EAX, ira.arch.regs.EBX assert symb.symbols[eax] == ebx assert eax in symb.symbols
#!/usr/bin/python3 from miasm.analysis.binary import Container from miasm.analysis.machine import Machine from miasm.core.locationdb import LocationDB from miasm.ir.symbexec import SymbolicExecutionEngine start_addr = 0x402300 loc_db = LocationDB() target_file = open("hello_world.exe", 'rb') container = Container.from_stream(target_file, loc_db) machine = Machine(container.arch) mdis = machine.dis_engine(container.bin_stream, loc_db=loc_db) ira = machine.ira(mdis.loc_db) asm_cfg = mdis.dis_multiblock(start_addr) ira_cfg = ira.new_ircfg_from_asmcfg(asm_cfg) symbex = SymbolicExecutionEngine(ira) symbex_state = symbex.run_block_at(ira_cfg, start_addr) print(symbex_state)
def resolve_offsets(state_register, asmcfg, ircfg, ir_arch): patches = set() nodes_to_walk = list(ircfg.nodes()) symbols_init = dict() for i, r in enumerate(all_regs_ids): symbols_init[r] = all_regs_ids_init[i] expr_simp.enable_passes({ExprOp: [ignore_call_results]}) for node in nodes_to_walk: irblock = ircfg.get_block(node) if not irblock: print('[-] Could not get IRBLOCK!') sys.exit() if len(irblock.assignblks) == 1: if irblock.assignblks[ 0].instr.name == "CMOVNZ" and irblock.assignblks[ 0].instr.args[0] == state_register: temp_reg1 = irblock.assignblks[0].instr.args[0] temp_reg2 = irblock.assignblks[0].instr.args[1] state1 = None state2 = None previous_block = ircfg.get_block(ircfg.predecessors(node)[0]) for line in previous_block.assignblks: if line.instr.name == 'MOV' and \ line.instr.args[0] in (temp_reg1, temp_reg2) and isinstance(line.instr.args[1], ExprInt): if line.instr.args[0] == state_register: state1 = line.instr.args[1] else: state2 = line.instr.args[1] if state1 and state2: break # compiler shenanigans. state missing is not initialised in current bblk. search function for it if not state1: state1 = scan_function_for_state(asmcfg, state_register, temp_reg1) elif not state2: state2 = scan_function_for_state(asmcfg, state_register, temp_reg2) blocks1 = get_assignblock_for_state(ircfg, ir_arch, symbols_init, state_register, state2) blocks2 = get_assignblock_for_state(ircfg, ir_arch, symbols_init, state_register, state1) dst1 = get_address(ircfg.loc_db, blocks1[0].loc_key) src1 = irblock.assignblks[0].instr.offset patches.add((src1, dst1, CNDP1)) dst2 = get_address(ircfg.loc_db, blocks2[0].loc_key) src2 = src1 patches.add((src2, dst2, CNDP0)) elif irblock.assignblks[0].instr.name == "CMOVZ": state1 = None state2 = None temp_reg1 = irblock.assignblks[0].instr.args[0] temp_reg2 = irblock.assignblks[0].instr.args[1] if temp_reg1 == state_register: previous_block = ircfg.get_block( ircfg.predecessors(node)[0]) for line in previous_block.assignblks: if line.instr.name == 'MOV' and \ line.instr.args[0] in (temp_reg1, temp_reg2): if line.instr.args[0] == state_register: state1 = line.instr.args[1] else: state2 = line.instr.args[1] if state1 and state2: blocks1 = get_assignblock_for_state( ircfg, ir_arch, symbols_init, state_register, state1) blocks2 = get_assignblock_for_state( ircfg, ir_arch, symbols_init, state_register, state2) dst1 = get_address(ircfg.loc_db, blocks1[0].loc_key) src1 = irblock.assignblks[0].instr.offset patches.add((src1, dst1, CNDP1)) dst2 = get_address(ircfg.loc_db, blocks2[0].loc_key) src2 = src1 patches.add((src2, dst2, CNDP0)) else: found_state = state1 if state1 else state2 missing_state = state1 if not state1 else state2 subject_reg = temp_reg1 if not state1 else temp_reg2 def get_imm_write_for_reg(asmcfg, subject_reg): for node in asmcfg.nodes(): asmblock = asmcfg.loc_key_to_block(node) for line in asmblock.lines: if line.name == 'MOV' and line.args[0] == subject_reg and \ isinstance(line.args[1], ExprInt): return line.args[1] return None missing_state = get_imm_write_for_reg( asmcfg, subject_reg) if not missing_state: print( "[-] Something went wrong. could not find mising state!" ) continue state1 = state1 if state1 == found_state else missing_state state2 = missing_state if state1 == found_state else state2 blocks1 = get_assignblock_for_state( ircfg, ir_arch, symbols_init, state_register, state1) blocks2 = get_assignblock_for_state( ircfg, ir_arch, symbols_init, state_register, state2) dst1 = get_address(ircfg.loc_db, blocks1[0].loc_key) src1 = irblock.assignblks[0].instr.offset patches.add((src1, dst1, CNDP1)) dst2 = get_address(ircfg.loc_db, blocks2[0].loc_key) src2 = src1 patches.add((src2, dst2, CNDP0)) else: next_block = ircfg.get_block(ircfg.successors(node)[0]) for line in next_block.assignblks: if line.instr.name == 'MOV' and line.instr.args[ 0] == state_register: state1 = line.instr.args[1] break if state1: blocks1 = get_assignblock_for_state( ircfg, ir_arch, symbols_init, state_register, state1) src = None for assignblk in next_block.assignblks: if assignblk.instr.name == 'JMP': src = assignblk.instr.offset dst_block = ircfg.get_block(blocks1[0].loc_key) if isinstance(dst_block.dst, ExprCond) and len( dst_block.assignblks): if hasattr(dst_block.dst.cond, 'op') and dst_block.dst.cond.op in ( 'CC_S>'): dst = get_address(ircfg.loc_db, dst_block.dst.src2.loc_key) next_block = ircfg.get_block(dst) dst = get_address(ircfg.loc_db, next_block.dst.src1.loc_key) else: dst = get_address(ircfg.loc_db, dst_block.dst.src1.loc_key) else: dst = get_address(ircfg.loc_db, blocks1[0].loc_key) patches.add((src, dst, STDP)) else: symbolic_engine = SymbolicExecutionEngine(ir_arch, symbols_init) next_addr = symbolic_engine.run_block_at( ircfg, get_address(ircfg.loc_db, node)) next_addr = expr_simp(next_addr) updated_state = symbolic_engine.symbols[state_register] if isinstance(updated_state, ExprOp): updated_state = expr_simp(updated_state) if updated_state != symbols_init[state_register] and \ isinstance(updated_state, ExprOp): irblock = ircfg.get_block(node) if not irblock: print('[-] Could not get IRBLOCK!') sys.exit() if len(irblock.assignblks) > 3: neg_inst = False for i in range(len(irblock.assignblks)): if irblock.assignblks[i].instr.name == 'NEG': neg_inst = True if irblock.assignblks[i].instr.name == 'SBB' and \ irblock.assignblks[i + 1].instr.name == 'AND' and \ irblock.assignblks[i + 2].instr.name == 'ADD': expr = symbolic_engine.symbols[ state_register].copy() if neg_inst: state1 = expr_simp( expr.replace_expr( {EAX_init: ExprInt(0, 32)})) state2 = expr_simp( expr.replace_expr( {EAX_init: ExprInt(1, 32)})) elif irblock.assignblks[i-1].instr.name == 'CMP' and \ irblock.assignblks[i-2].instr.name == 'ADD' and \ isinstance(irblock.assignblks[i-2].instr.args[1], ExprInt): id = irblock.assignblks[i - 2].instr.args[0] imm = irblock.assignblks[i - 2].instr.args[1] state1 = expr_simp( expr.replace_expr({ EAX_init: imm }).replace_expr({ symbolic_engine.symbols[id].args[0]: imm })) state2 = expr_simp( expr.replace_expr({ EAX_init: ExprInt(-1, 32) }).replace_expr({ symbolic_engine.symbols[id].args[0]: imm })) blocks1 = get_assignblock_for_state( ircfg, ir_arch, symbols_init, state_register, state1) blocks2 = get_assignblock_for_state( ircfg, ir_arch, symbols_init, state_register, state2) process_blocks_for_patches(node, blocks1, ircfg, patches, nodes_to_walk, state1, True) process_blocks_for_patches(node, blocks2, ircfg, patches, nodes_to_walk, state2, False) break elif updated_state != symbols_init[state_register] and \ isinstance(updated_state, ExprInt) and \ updated_state._get_int() > 0xff: #print("[*] Looking for state %s" % hex(updated_state._get_int())) referenced_blocks = get_assignblock_for_state( ircfg, ir_arch, symbols_init, state_register, updated_state) # for block in referenced_blocks: # print("\t[+] Found reference at %s" % hex(get_address(ircfg.loc_db, block.loc_key))) process_blocks_for_patches(node, referenced_blocks, ircfg, patches, nodes_to_walk) elif isinstance(next_addr, ExprCond): if not hasattr(next_addr.cond, 'args'): if isinstance(next_addr.src1, ExprLoc): dest1 = next_addr.src1.loc_key else: dest1 = get_loc_key_at(ircfg.loc_db, next_addr.src1._get_int()) if isinstance(next_addr.src2, ExprLoc): dest2 = next_addr.src2.loc_key else: dest2 = get_loc_key_at(ircfg.loc_db, next_addr.src2._get_int()) if dest1 not in nodes_to_walk: nodes_to_walk.append(dest1) if dest2 not in nodes_to_walk: nodes_to_walk.append(dest2) dst2block = ircfg.get_block(dest2) if dst2block.assignblks[0].instr.name == 'CMP' and \ dst2block.assignblks[0].instr.args[0] == state_register and \ len(ircfg.get_block(node).assignblks) > 1: ref_block = node while True: irblock = ircfg.get_block( ircfg.predecessors(ref_block)[0]) if irblock.assignblks[0].instr.name == 'CMP' and \ dst2block.assignblks[0].instr.args[0] == state_register: break ref_block = ircfg.predecessors(ref_block)[0] asmblock = asmcfg.loc_key_to_block(node) for line in asmblock.lines: if line.name == 'JZ': patches.add( (line.offset, get_address(asmcfg.loc_db, ref_block), CNDP2)) true_block = ircfg.get_block( ircfg.get_block(node).dst.src2.loc_key) symbolic_engine.run_block_at( ircfg, true_block.loc_key) if isinstance( symbolic_engine. symbols[state_register], ExprInt): referenced_block = get_assignblock_for_state( ircfg, ir_arch, symbols_init, state_register, symbolic_engine. symbols[state_register])[0] patches.add( (line.offset, get_address(ircfg.loc_db, referenced_block.loc_key), CNDP3)) break elif isinstance(next_addr, ExprInt): dest = get_loc_key_at(ircfg.loc_db, next_addr._get_int()) if dest not in nodes_to_walk: nodes_to_walk.append( get_loc_key_at(ircfg.loc_db, next_addr._get_int())) return list(patches)
# translate asm_cfg into ira_cfg ira_cfg = ira.new_ircfg_from_asmcfg(asm_cfg) # set opaque predicate counter opaque_counter = 0 # dictionary of byte patches patches = {} # walk over all basic blocks for basic_block in asm_cfg.blocks: # get address of first basic block instruction address = basic_block.lines[0].offset # init symbolic execution engine sb = SymbolicExecutionEngine(ira) # symbolically execute basic block e = sb.run_block_at(ira_cfg, address) # skip if no conditional jump if not e.is_cond(): continue # cond ? src1 : src2 # check if opaque predicate -- jump if branch_cannot_be_taken(e, e.src1): print(f"opaque predicate at {hex(address)} (jump is never taken)") opaque_counter += 1
loc_db = LocationDB() s = '\x8dI\x04\x8d[\x01\x80\xf9\x01t\x05\x8d[\xff\xeb\x03\x8d[\x01\x89\xd8\xc3' s = '\x55\x8b\xec\x83\xec\x08\xc7\x45\xf8\xcc\xcc\xcc\xcc\xc7\x45\xfc\xcc\xcc\xcc\xcc\xc7\x45\xfc\x03\x00\x00\x00\xc7\x45\xf8\x05\x00\x00\x00\x83\x7d\xfc\x05\x7e\x07\x8b\x45\xfc\xeb\x09\xeb\x05\x8b\x45\xf8\xeb\x02\x33\xc0\x8b\xe5\x5d\xc3' c = Container.from_string(s) machine = Machine('x86_32') mdis = machine.dis_engine(c.bin_stream) asmcfg = mdis.dis_multiblock(0) for block in asmcfg.blocks: print(block.to_string(asmcfg.loc_db)) ira = machine.ira(loc_db) ircfg = ira.new_ircfg_from_asmcfg(asmcfg) # print(ircfg) # ircfg = ira.new_ircfg(asmcfg) # print(loc_db._offset_to_loc_key.keys()[0]) sb = SymbolicExecutionEngine(ira) # symbolic_pc = sb.run_at(ircfg, loc_db._offset_to_loc_key.keys()[0]) # for index, info in enumerate(sb.info_ids): # print('###### step', index+1) # print('\t', info[0]) # for reg in info[1]: # print('\t\t', reg, ':', info[1][reg]) # dse = DSEEngine(machine) # sb.symbols[machine.mn.regs.ECX] = ExprInt(253, 32) # sb.symbols[machine.mn.regs.EBP] = ExprOp('+', ExprId('ESP', 32), ExprInt(0xFFFFFFFC, 32)) # memory_expr = dse.memory_to_expr(0x40000004) # sb.symbols[machine.mn.regs.EBP] = dse.eval_expr(memory_expr) symbolic_pc = sb.run_at(ircfg, loc_db._offset_to_loc_key.keys()[0]) print('###### modified state step by step') for step, info in enumerate(sb.info_ids):
states_todo.add((addr, symbexec.symbols.copy(), tuple(conds))) else: raise ValueError("Unsupported destination") if __name__ == '__main__': loc_db = LocationDB() translator_smt2 = Translator.to_language("smt2") addr = int(options.address, 16) cont = Container.from_stream(open(args[0], 'rb'), loc_db) mdis = machine.dis_engine(cont.bin_stream, loc_db=loc_db) lifter = machine.lifter(mdis.loc_db) ircfg = lifter.new_ircfg() symbexec = SymbolicExecutionEngine(lifter) asmcfg = parse_asm.parse_txt( machine.mn, 32, ''' init: PUSH argv PUSH argc PUSH ret_addr ''', loc_db ) argc_lbl = loc_db.get_name_location('argc') argv_lbl = loc_db.get_name_location('argv') ret_addr_lbl = loc_db.get_name_location('ret_addr')
def test_ClassDef(self): from miasm.expression.expression import ExprInt, ExprId, ExprMem, \ ExprCompose, ExprAssign from miasm.arch.x86.sem import ir_x86_32 from miasm.core.locationdb import LocationDB from miasm.ir.symbexec import SymbolicExecutionEngine from miasm.ir.ir import AssignBlock loc_db = LocationDB() ira = ir_x86_32(loc_db) ircfg = ira.new_ircfg() id_x = ExprId('x', 32) id_a = ExprId('a', 32) id_b = ExprId('b', 32) id_c = ExprId('c', 32) id_d = ExprId('d', 32) id_e = ExprId('e', 64) class CustomSymbExec(SymbolicExecutionEngine): def mem_read(self, expr): if expr == ExprMem(ExprInt(0x1000, 32), 32): return id_x return super(CustomSymbExec, self).mem_read(expr) sb = CustomSymbExec(ira, { ExprMem(ExprInt(0x4, 32), 8): ExprInt(0x44, 8), ExprMem(ExprInt(0x5, 32), 8): ExprInt(0x33, 8), ExprMem(ExprInt(0x6, 32), 8): ExprInt(0x22, 8), ExprMem(ExprInt(0x7, 32), 8): ExprInt(0x11, 8), ExprMem(ExprInt(0x20, 32), 32): id_x, ExprMem(ExprInt(0x40, 32), 32): id_x, ExprMem(ExprInt(0x44, 32), 32): id_a, ExprMem(ExprInt(0x54, 32), 32): ExprInt(0x11223344, 32), ExprMem(id_a, 32): ExprInt(0x11223344, 32), id_a: ExprInt(0, 32), id_b: ExprInt(0, 32), ExprMem(id_c, 32): ExprMem(id_d + ExprInt(0x4, 32), 32), ExprMem(id_c + ExprInt(0x4, 32), 32): ExprMem(id_d + ExprInt(0x8, 32), 32), }) self.assertEqual(sb.eval_expr(ExprInt(1, 32)-ExprInt(1, 32)), ExprInt(0, 32)) ## Test with unknown mem + integer self.assertEqual(sb.eval_expr(ExprMem(ExprInt(0, 32), 32)), ExprMem(ExprInt(0, 32), 32)) self.assertEqual(sb.eval_expr(ExprMem(ExprInt(1, 32), 32)), ExprCompose(ExprMem(ExprInt(1, 32), 24), ExprInt(0x44, 8))) self.assertEqual(sb.eval_expr(ExprMem(ExprInt(2, 32), 32)), ExprCompose(ExprMem(ExprInt(2, 32), 16), ExprInt(0x3344, 16))) self.assertEqual(sb.eval_expr(ExprMem(ExprInt(3, 32), 32)), ExprCompose(ExprMem(ExprInt(3, 32), 8), ExprInt(0x223344, 24))) self.assertEqual(sb.eval_expr(ExprMem(ExprInt(4, 32), 32)), ExprInt(0x11223344, 32)) self.assertEqual(sb.eval_expr(ExprMem(ExprInt(5, 32), 32)), ExprCompose(ExprInt(0x112233, 24), ExprMem(ExprInt(8, 32), 8))) self.assertEqual(sb.eval_expr(ExprMem(ExprInt(6, 32), 32)), ExprCompose(ExprInt(0x1122, 16), ExprMem(ExprInt(8, 32), 16))) self.assertEqual(sb.eval_expr(ExprMem(ExprInt(7, 32), 32)), ExprCompose(ExprInt(0x11, 8), ExprMem(ExprInt(8, 32), 24))) self.assertEqual(sb.eval_expr(ExprMem(ExprInt(8, 32), 32)), ExprMem(ExprInt(8, 32), 32)) ## Test with unknown mem + integer self.assertEqual(sb.eval_expr(ExprMem(ExprInt(0x50, 32), 32)), ExprMem(ExprInt(0x50, 32), 32)) self.assertEqual(sb.eval_expr(ExprMem(ExprInt(0x51, 32), 32)), ExprCompose(ExprMem(ExprInt(0x51, 32), 24), ExprInt(0x44, 8))) self.assertEqual(sb.eval_expr(ExprMem(ExprInt(0x52, 32), 32)), ExprCompose(ExprMem(ExprInt(0x52, 32), 16), ExprInt(0x3344, 16))) self.assertEqual(sb.eval_expr(ExprMem(ExprInt(0x53, 32), 32)), ExprCompose(ExprMem(ExprInt(0x53, 32), 8), ExprInt(0x223344, 24))) self.assertEqual(sb.eval_expr(ExprMem(ExprInt(0x54, 32), 32)), ExprInt(0x11223344, 32)) self.assertEqual(sb.eval_expr(ExprMem(ExprInt(0x55, 32), 32)), ExprCompose(ExprInt(0x112233, 24), ExprMem(ExprInt(0x58, 32), 8))) self.assertEqual(sb.eval_expr(ExprMem(ExprInt(0x56, 32), 32)), ExprCompose(ExprInt(0x1122, 16), ExprMem(ExprInt(0x58, 32), 16))) self.assertEqual(sb.eval_expr(ExprMem(ExprInt(0x57, 32), 32)), ExprCompose(ExprInt(0x11, 8), ExprMem(ExprInt(0x58, 32), 24))) self.assertEqual(sb.eval_expr(ExprMem(ExprInt(0x58, 32), 32)), ExprMem(ExprInt(0x58, 32), 32)) ## Test with unknown mem + id self.assertEqual(sb.eval_expr(ExprMem(ExprInt(0x1D, 32), 32)), ExprCompose(ExprMem(ExprInt(0x1D, 32), 24), id_x[:8])) self.assertEqual(sb.eval_expr(ExprMem(ExprInt(0x1E, 32), 32)), ExprCompose(ExprMem(ExprInt(0x1E, 32), 16), id_x[:16])) self.assertEqual(sb.eval_expr(ExprMem(ExprInt(0x1F, 32), 32)), ExprCompose(ExprMem(ExprInt(0x1F, 32), 8), id_x[:24])) self.assertEqual(sb.eval_expr(ExprMem(ExprInt(0x20, 32), 32)), id_x) self.assertEqual(sb.eval_expr(ExprMem(ExprInt(0x21, 32), 32)), ExprCompose(id_x[8:], ExprMem(ExprInt(0x24, 32), 8))) self.assertEqual(sb.eval_expr(ExprMem(ExprInt(0x22, 32), 32)), ExprCompose(id_x[16:], ExprMem(ExprInt(0x24, 32), 16))) self.assertEqual(sb.eval_expr(ExprMem(ExprInt(0x23, 32), 32)), ExprCompose(id_x[24:], ExprMem(ExprInt(0x24, 32), 24))) self.assertEqual(sb.eval_expr(ExprMem(ExprInt(0x24, 32), 32)), ExprMem(ExprInt(0x24, 32), 32)) ## Partial read self.assertEqual(sb.eval_expr(ExprMem(ExprInt(4, 32), 8)), ExprInt(0x44, 8)) self.assertEqual(sb.eval_expr(ExprMem(ExprInt(0x20, 32), 8)), id_x[:8]) self.assertEqual(sb.eval_expr(ExprMem(ExprInt(0x23, 32), 8)), id_x[24:]) ## Merge self.assertEqual(sb.eval_expr(ExprMem(ExprInt(0x40, 32), 64)), ExprCompose(id_x, id_a)) self.assertEqual(sb.eval_expr(ExprMem(ExprInt(0x42, 32), 32)), ExprCompose(id_x[16:], id_a[:16])) # Merge memory self.assertEqual(sb.eval_expr(ExprMem(ExprInt(0x100, 32), 32)), ExprMem(ExprInt(0x100, 32), 32)) self.assertEqual(sb.eval_expr(ExprMem(id_c + ExprInt(0x2, 32), 32)), ExprMem(id_d + ExprInt(0x6, 32), 32)) ## Unmodified read self.assertEqual(sb.eval_expr(ExprMem(ExprInt(4, 32), 8)), ExprInt(0x44, 8)) ## Modified read self.assertEqual(sb.eval_expr(ExprMem(ExprInt(0x1000, 32), 32)), id_x) ## Apply_change / eval_ir / apply_expr ## x = a (with a = 0x0) assignblk = AssignBlock({id_x:id_a}) sb.eval_updt_assignblk(assignblk) self.assertEqual(sb.eval_expr(id_x), ExprInt(0, 32)) ## x = a (without replacing 'a' with 0x0) sb.apply_change(id_x, id_a) self.assertEqual(sb.eval_expr(id_x), id_a) ## x = a (with a = 0x0) self.assertEqual(sb.eval_updt_expr(assignblk.dst2ExprAssign(id_x)), ExprInt(0, 32)) self.assertEqual(sb.eval_expr(id_x), ExprInt(0, 32)) self.assertEqual(sb.eval_updt_expr(id_x), ExprInt(0, 32)) sb.dump() ## state reads = set() for dst, src in sb.modified(): reads.update(ExprAssign(dst, src).get_r()) self.assertEqual(reads, set([ id_x, id_a, ExprMem(id_d + ExprInt(0x4, 32), 32), ExprMem(id_d + ExprInt(0x8, 32), 32), ])) # Erase low id_x byte with 0xFF sb.apply_change(ExprMem(ExprInt(0x20, 32), 8), ExprInt(0xFF, 8)) state = dict(sb.modified(ids=False)) self.assertEqual(state[ExprMem(ExprInt(0x20, 32), 8)], ExprInt(0xFF, 8)) self.assertEqual(state[ExprMem(ExprInt(0x21, 32), 24)], id_x[8:32]) # Erase high id_x byte with 0xEE sb.apply_change(ExprMem(ExprInt(0x23, 32), 8), ExprInt(0xEE, 8)) state = dict(sb.modified(ids=False)) self.assertEqual(state[ExprMem(ExprInt(0x20, 32), 8)], ExprInt(0xFF, 8)) self.assertEqual(state[ExprMem(ExprInt(0x21, 32), 16)], id_x[8:24]) self.assertEqual(state[ExprMem(ExprInt(0x23, 32), 8)], ExprInt(0xEE, 8)) self.assertEqual(sb.eval_expr(ExprMem(ExprInt(0x22, 32), 32)), ExprCompose(id_x[16:24], ExprInt(0xEE, 8), ExprMem(ExprInt(0x24, 32), 16))) # Erase low byte of 0x11223344 with 0xFF at 0x54 sb.apply_change(ExprMem(ExprInt(0x54, 32), 8), ExprInt(0xFF, 8)) # Erase low byte of 0x11223344 with 0xFF at id_a sb.apply_change(ExprMem(id_a + ExprInt(0x1, 32), 8), ExprInt(0xFF, 8)) state = dict(sb.modified(ids=False)) self.assertEqual(state[ExprMem(id_a + ExprInt(0x1, 32), 8)], ExprInt(0xFF, 8)) self.assertEqual(state[ExprMem(id_a + ExprInt(0x2, 32), 16)], ExprInt(0x1122, 16)) # Write uint32_t at 0xFFFFFFFE sb.apply_change(ExprMem(ExprInt(0xFFFFFFFE, 32), 32), ExprInt(0x11223344, 32)) self.assertEqual(sb.eval_expr(ExprMem(ExprInt(0, 32), 16)), ExprInt(0x1122, 16)) # Revert memory to original value at 0x42 sb.apply_change(ExprMem(ExprInt(0x42, 32), 32), ExprMem(ExprInt(0x42, 32), 32)) self.assertEqual(sb.eval_expr(ExprMem(ExprInt(0x42, 32), 32)), ExprMem(ExprInt(0x42, 32), 32)) # Revert memory to original value at c + 0x2 sb.apply_change(ExprMem(id_c + ExprInt(0x2, 32), 32), ExprMem(id_c + ExprInt(0x2, 32), 32)) self.assertEqual(sb.eval_expr(ExprMem(id_c + ExprInt(0x2, 32), 32)), ExprMem(id_c + ExprInt(0x2, 32), 32)) # Test del symbol del sb.symbols[id_a] sb.dump() del sb.symbols[ExprMem(id_a, 8)] print("*"*40, 'Orig:') sb.dump() sb_cp = sb.symbols.copy() print("*"*40, 'Copy:') sb_cp.dump() # Add symbol at address limit sb.apply_change(ExprMem(ExprInt(0xFFFFFFFE, 32), 32), id_c) sb.dump() found = False for dst, src in viewitems(sb.symbols): if dst == ExprMem(ExprInt(0xFFFFFFFE, 32), 32) and src == id_c: found = True assert found # Add symbol at address limit sb.apply_change(ExprMem(ExprInt(0x7FFFFFFE, 32), 32), id_c) sb.dump() found = False for dst, src in viewitems(sb.symbols): if dst == ExprMem(ExprInt(0x7FFFFFFE, 32), 32) and src == id_c: found = True assert found # Add truncated symbol at address limit sb.apply_change(ExprMem(ExprInt(0xFFFFFFFC, 32), 64), id_e) # Revert parts of memory sb.apply_change(ExprMem(ExprInt(0xFFFFFFFC, 32), 16), ExprMem(ExprInt(0xFFFFFFFC, 32), 16)) sb.apply_change(ExprMem(ExprInt(0x2, 32), 16), ExprMem(ExprInt(0x2, 32), 16)) sb.dump() found = False for dst, src in viewitems(sb.symbols): if dst == ExprMem(ExprInt(0xFFFFFFFE, 32), 32) and src == id_e[16:48]: found = True assert found sb_empty = SymbolicExecutionEngine(ira) sb_empty.dump() # Test memory full print('full') arch_addr8 = ir_x86_32(loc_db) ircfg = arch_addr8.new_ircfg() # Hack to obtain tiny address space arch_addr8.addrsize = 5 sb_addr8 = SymbolicExecutionEngine(arch_addr8) sb_addr8.dump() # Fulfill memory sb_addr8.apply_change(ExprMem(ExprInt(0, 5), 256), ExprInt(0, 256)) sb_addr8.dump() variables = list(viewitems(sb_addr8.symbols)) assert variables == [(ExprMem(ExprInt(0, 5), 256), ExprInt(0, 256))] print(sb_addr8.symbols.symbols_mem) sb_addr8.apply_change(ExprMem(ExprInt(0x5, 5), 256), ExprInt(0x123, 256)) sb_addr8.dump() variables = list(viewitems(sb_addr8.symbols)) assert variables == [(ExprMem(ExprInt(0x5, 5), 256), ExprInt(0x123, 256))] print(sb_addr8.symbols.symbols_mem) print('dump') sb_addr8.symbols.symbols_mem.dump() sb.dump() try: del sb.symbols.symbols_mem[ExprMem(ExprInt(0xFFFFFFFF, 32), 32)] except KeyError: # ok pass else: raise RuntimeError("Should raise error!") del sb.symbols.symbols_mem[ExprMem(ExprInt(0xFFFFFFFF, 32), 16)] sb.dump() self.assertEqual(sb.eval_expr(ExprMem(ExprInt(0xFFFFFFFE, 32), 32)), ExprCompose(id_e[16:24], ExprMem(ExprInt(0xFFFFFFFF, 32), 16), id_e[40:48])) sb.symbols.symbols_mem.delete_partial(ExprMem(ExprInt(0xFFFFFFFF, 32), 32)) self.assertEqual(sb.eval_expr(ExprMem(ExprInt(0xFFFFFFFE, 32), 32)), ExprCompose(id_e[16:24], ExprMem(ExprInt(0xFFFFFFFF, 32), 24))) sb.dump() assert ExprMem(ExprInt(0xFFFFFFFE, 32), 8) in sb.symbols assert ExprMem(ExprInt(0xFFFFFFFE, 32), 32) not in sb.symbols assert sb.symbols.symbols_mem.contains_partial(ExprMem(ExprInt(0xFFFFFFFE, 32), 32)) assert not sb.symbols.symbols_mem.contains_partial(ExprMem(ExprInt(0xFFFFFFFF, 32), 8)) assert list(sb_addr8.symbols) == [ExprMem(ExprInt(0x5, 5), 256)]
line = machine.mn.fromstring("MOV EAX, EBX", loc_db, 32) asm = machine.mn.asm(line)[0] # Get back block cont = Container.from_string(asm, loc_db = loc_db) mdis = machine.dis_engine(cont.bin_stream, loc_db=loc_db) mdis.lines_wd = 1 asm_block = mdis.dis_block(START_ADDR) # Translate ASM -> IR ira = machine.ira(mdis.loc_db) ircfg = ira.new_ircfg() ira.add_asmblock_to_ircfg(asm_block, ircfg) # Instantiate a Symbolic Execution engine with default value for registers symb = SymbolicExecutionEngine(ira) # Emulate one IR basic block ## Emulation of several basic blocks can be done through .emul_ir_blocks cur_addr = symb.run_at(ircfg, START_ADDR) # Modified elements print('Modified registers:') symb.dump(mems=False) print('Modified memory (should be empty):') symb.dump(ids=False) # Check final status eax, ebx = ira.arch.regs.EAX, ira.arch.regs.EBX assert symb.symbols[eax] == ebx assert eax in symb.symbols
states_todo.add((addr, symbexec.symbols.copy(), tuple(conds))) else: raise ValueError("Unsupported destination") if __name__ == '__main__': translator_smt2 = Translator.to_language("smt2") addr = int(options.address, 16) cont = Container.from_stream(open(args[0], 'rb')) mdis = machine.dis_engine(cont.bin_stream, loc_db=cont.loc_db) ir_arch = machine.ir(mdis.loc_db) ircfg = ir_arch.new_ircfg() symbexec = SymbolicExecutionEngine(ir_arch) asmcfg, loc_db = parse_asm.parse_txt(machine.mn, 32, ''' init: PUSH argv PUSH argc PUSH ret_addr ''', loc_db=mdis.loc_db) argc_lbl = loc_db.get_name_location('argc') argv_lbl = loc_db.get_name_location('argv') ret_addr_lbl = loc_db.get_name_location('ret_addr') init_lbl = loc_db.get_name_location('init')
open("cfg.dot", "w").write(asmcfg.dot()) # --- Get IR --- # lifter = LifterModelCall_x86_32(loc_db) ircfg = lifter.new_ircfg() first_block = list(asmcfg.blocks)[0] lifter.add_asmblock_to_ircfg(first_block, ircfg) # --- Symbolic execution --- # from miasm.ir.symbexec import SymbolicExecutionEngine from miasm.expression.expression import * symb = SymbolicExecutionEngine(lifter, machine.mn.regs.regs_init) # irDst contains the offset of next IR basic block to execute irDst = symb.run_at(ircfg, entry_addr, step=False) print("IR Dest = ", irDst) # Provide symbolic context to irDst expr_flag = ExprId("flag", 32) result = symb.eval_expr( expr_simp( irDst.replace_expr( { expr_simp( ExprMem(machine.mn.regs.EBP_init - ExprInt(0x4, 32), 32)): expr_flag, })))
def _normalize_ircfg(self, conn): # unalias stack miasm.re/blog/2017/02/03/data_flow_analysis_depgraph.html , but involve base pointer too # TODO remove manual *BP propagation in normalize_ircfg and use standrad Miasm propagation when it is fixed # remove composes from bigger to smaller, they are not important for us bp = {} prev_offset = None for irb_loc_key in self.ircfg.walk_breadth_first_forward(LocKey(0)): irs = [] if irb_loc_key not in self.ircfg.blocks: continue irb = self.ircfg.blocks[irb_loc_key] if irb.dst.is_cond() and irb.dst.cond.is_op() and irb.dst.cond.op == 'CC_EQ': # TODO propagate cmp ..., arb_int too # propagate known zeroes to process test eax, eax; jnz ...; lea edi, [eax+4] symb_exec = SymbolicExecutionEngine(self.ir_arch) dst = symb_exec.eval_updt_irblock(irb) if dst.is_cond() and dst.cond.is_id() and not is_bad_expr(dst.cond) and \ symb_exec.eval_expr(dst.cond) == dst.cond: # add explicit mov ID, 0 to given irb target_loc = dst.src2 if target_loc.is_int(): target_loc = self.asmcfg.loc_db.get_offset_location(int(target_loc)) elif target_loc.is_loc(): target_loc = target_loc.loc_key else: continue if len(self.ircfg.predecessors(target_loc)) > 1: continue target_irb = self.ircfg.blocks[target_loc] asign_blk = AssignBlock([ExprAssign(dst.cond, ExprInt(0, dst.cond.size))]) assignblks = tuple([asign_blk, *target_irb.assignblks]) new_irb = IRBlock(target_loc, assignblks) self.ircfg.blocks[target_loc] = new_irb fix_dct = {} for assignblk in irb: offset = prev_offset if assignblk.instr and assignblk.instr.offset: offset = assignblk.instr.offset prev_offset = offset spd = conn.modules.idc.get_spd(offset) if spd is not None: stk_high = ExprInt(spd, self.ir_arch.sp.size) fix_dct = {self.ir_arch.sp: self.mn.regs.regs_init[self.ir_arch.sp] + stk_high} fix_dct.update(bp) else: logger.warning("Couldn't acquire stack depth at 0x%x" % (offset or 0x0BADF00D)) new_assignblk = {} for dst, src in assignblk.items(): if src.is_compose(): slc_arg = None arg = None for tmp_arg in src.args: if not tmp_arg.is_slice(): arg = tmp_arg else: # we're interested only in bigger to smaller slc_arg = tmp_arg if slc_arg and arg and len(arg.get_r()) == 1: top_to_bottom_visitor = ExprVisitorCallbackTopToBottom( lambda x: self._resize_top_expr(x, src.size)) src = top_to_bottom_visitor.visit(arg) if dst == src: # special compiler anomalies such as lea esp, [esp+0] continue if src == self.ir_arch.sp: src = expr_simp(src.replace_expr(fix_dct)) if bp and src not in bp.values() and irb_loc_key != LocKey(0): raise RuntimeError("Ambiguous base pointer") bp.update({dst: src}) fix_dct.update(bp) else: src = expr_simp(src.replace_expr(fix_dct)) if dst != self.ir_arch.sp and dst not in bp.keys(): dst = dst.replace_expr(fix_dct) dst, src = expr_simp(dst), expr_simp(src) new_assignblk[dst] = src irs.append(AssignBlock(new_assignblk, instr=assignblk.instr)) self.ircfg.blocks[irb.loc_key] = IRBlock(irb.loc_key, irs)
line = machine.mn.fromstring("MOV EAX, EBX", loc_db, 32) asm = machine.mn.asm(line)[0] # Get back block cont = Container.from_string(asm, loc_db = loc_db) mdis = machine.dis_engine(cont.bin_stream, loc_db=loc_db) mdis.lines_wd = 1 asm_block = mdis.dis_block(START_ADDR) # Translate ASM -> IR lifter_model_call = machine.lifter_model_call(mdis.loc_db) ircfg = lifter_model_call.new_ircfg() lifter_model_call.add_asmblock_to_ircfg(asm_block, ircfg) # Instantiate a Symbolic Execution engine with default value for registers symb = SymbolicExecutionEngine(lifter_model_call) # Emulate one IR basic block ## Emulation of several basic blocks can be done through .emul_ir_blocks cur_addr = symb.run_at(ircfg, START_ADDR) # Modified elements print('Modified registers:') symb.dump(mems=False) print('Modified memory (should be empty):') symb.dump(ids=False) # Check final status eax, ebx = lifter_model_call.arch.regs.EAX, lifter_model_call.arch.regs.EBX assert symb.symbols[eax] == ebx assert eax in symb.symbols