def emul(self, ctx=None, step=False): """Symbolic execution of relevant nodes according to the history Return the values of inputs nodes' elements @ctx: (optional) Initial context as dictionnary @step: (optional) Verbose execution Warning: The emulation is not sound if the inputs nodes depend on loop variant. """ # Init ctx_init = self._ira.arch.regs.regs_init if ctx is not None: ctx_init.update(ctx) assignblks = [] # Build a single affectation block according to history for label in self.relevant_labels[::-1]: assignblks += self.irblock_slice(self._ira.blocs[label]).irs # Eval the block temp_label = asm_label("Temp") symb_exec = symbexec(self._ira, ctx_init) symb_exec.emulbloc(irbloc(temp_label, assignblks), step=step) # Return only inputs values (others could be wrongs) return {element: symb_exec.symbols[element] for element in self.inputs}
def compute(asm, inputstate={}, debug=False): sympool = dict(regs_init) sympool.update({k: ExprInt_from(k, v) for k, v in inputstate.iteritems()}) interm = ir_arch() symexec = symbexec(interm, sympool) instr = mn.fromstring(asm, "l") code = mn.asm(instr)[0] instr = mn.dis(code, "l") instr.offset = inputstate.get(PC, 0) interm.add_instr(instr) symexec.emul_ir_blocs(interm, instr.offset) if debug: for k, v in symexec.symbols.items(): if regs_init.get(k, None) != v: print k, v out = {} for k, v in symexec.symbols.items(): if k in EXCLUDE_REGS: continue elif regs_init.get(k, None) == v: continue elif isinstance(v, ExprInt): out[k] = v.arg.arg else: out[k] = v return out
def emul(self, ctx=None, step=False): """Symbolic execution of relevant nodes according to the history Return the values of inputs nodes' elements @ctx: (optional) Initial context as dictionnary @step: (optional) Verbose execution Warning: The emulation is not sound if the inputs nodes depend on loop variant. """ # Init ctx_init = self._ira.arch.regs.regs_init if ctx is not None: ctx_init.update(ctx) assignblks = [] # Build a single affectation block according to history last_index = len(self.relevant_labels) for index, label in enumerate(reversed(self.relevant_labels), 1): if index == last_index and label == self.initial_state.label: line_nb = self.initial_state.line_nb else: line_nb = None assignblks += self.irblock_slice(self._ira.blocs[label], line_nb).irs # Eval the block temp_label = asm_label("Temp") symb_exec = symbexec(self._ira, ctx_init) symb_exec.emulbloc(irbloc(temp_label, assignblks), step=step) # Return only inputs values (others could be wrongs) return {element: symb_exec.symbols[element] for element in self.inputs}
def emul(self, ctx=None, step=False): # Init ctx_init = self._ira.arch.regs.regs_init if ctx is not None: ctx_init.update(ctx) solver = z3.Solver() symb_exec = symbexec(self._ira, ctx_init) history = self.history[::-1] history_size = len(history) translator = Translator.to_language("z3") size = self._ira.IRDst.size for hist_nb, label in enumerate(history, 1): if hist_nb == history_size and label == self.initial_state.label: line_nb = self.initial_state.line_nb else: line_nb = None irb = self.irblock_slice(self._ira.blocs[label], line_nb) # Emul the block and get back destination dst = symb_exec.emulbloc(irb, step=step) # Add constraint if hist_nb < history_size: next_label = history[hist_nb] expected = symb_exec.eval_expr(m2_expr.ExprId(next_label, size)) solver.add( self._gen_path_constraints(translator, dst, expected)) # Save the solver self._solver = solver # Return only inputs values (others could be wrongs) return {element: symb_exec.eval_expr(element) for element in self.inputs}
def codepath_walk(addr, symbols, conds, depth): if depth >= cond_limit: return None for _ in range(uncond_limit): sb = symbexec(ir, symbols) pc = sb.emul_ir_blocs(ir, addr) if is_goal(sb.symbols) == True: return conds if isinstance(pc, ExprCond): cond_true = {pc.cond: ExprInt_from(pc.cond, 1)} cond_false = {pc.cond: ExprInt_from(pc.cond, 0)} addr_true = expr_simp( sb.eval_expr(pc.replace_expr(cond_true), {})) addr_false = expr_simp( sb.eval_expr(pc.replace_expr(cond_false), {})) conds_true = list(conds) + cond_true.items() conds_false = list(conds) + cond_false.items() rslt = codepath_walk(addr_true, sb.symbols.copy(), conds_true, depth + 1) if rslt != None: return rslt rslt = codepath_walk(addr_false, sb.symbols.copy(), conds_false, depth + 1) if rslt != None: return rslt break else: break return None
def codepath_walk(addr, symbols, conds, depth): if depth >= cond_limit: return None for _ in range(uncond_limit): sb = symbexec(ir, symbols) pc = sb.emul_ir_blocs(ir, addr) if is_goal(sb.symbols) == True: return conds if isinstance(pc, ExprCond): cond_true = {pc.cond: ExprInt_from(pc.cond, 1)} cond_false = {pc.cond: ExprInt_from(pc.cond, 0)} addr_true = expr_simp( sb.eval_expr(pc.replace_expr(cond_true), {})) addr_false = expr_simp( sb.eval_expr(pc.replace_expr(cond_false), {})) conds_true = list(conds) + cond_true.items() conds_false = list(conds) + cond_false.items() rslt = codepath_walk( addr_true, sb.symbols.copy(), conds_true, depth + 1) if rslt != None: return rslt rslt = codepath_walk( addr_false, sb.symbols.copy(), conds_false, depth + 1) if rslt != None: return rslt break else: break return None
def emul(self, ctx=None, step=False): """Symbolic execution of relevant nodes according to the history Return the values of input nodes' elements @ctx: (optional) Initial context as dictionary @step: (optional) Verbose execution Warning: The emulation is not sound if the input nodes depend on loop variant. """ # Init ctx_init = self._ira.arch.regs.regs_init if ctx is not None: ctx_init.update(ctx) depnodes = self.relevant_nodes affects = [] # Build a single affectation block according to history for label in self.relevant_labels[::-1]: affected_lines = set(depnode.line_nb for depnode in depnodes if depnode.label == label) irs = self._ira.blocs[label].irs for line_nb in sorted(affected_lines): affects.append(irs[line_nb]) # Eval the block temp_label = asm_label("Temp") symb_exec = symbexec(self._ira, ctx_init) symb_exec.emulbloc(irbloc(temp_label, affects), step=step) # Return only inputs values (others could be wrongs) return { depnode.element: symb_exec.symbols[depnode.element] for depnode in self.input }
def emul(self, ctx=None, step=False): # Init ctx_init = self._ira.arch.regs.regs_init if ctx is not None: ctx_init.update(ctx) solver = z3.Solver() symb_exec = symbexec(self._ira, ctx_init) history = self.history[::-1] history_size = len(history) translator = Translator.to_language("z3") size = self._ira.IRDst.size for hist_nb, label in enumerate(history): irb = self.irblock_slice(self._ira.blocs[label]) # Emul the block and get back destination dst = symb_exec.emulbloc(irb, step=step) # Add constraint if hist_nb + 1 < history_size: next_label = history[hist_nb + 1] expected = symb_exec.eval_expr(m2_expr.ExprId(next_label, size)) solver.add( self._gen_path_constraints(translator, dst, expected)) # Save the solver self._solver = solver # Return only inputs values (others could be wrongs) return {element: symb_exec.eval_expr(element) for element in self.inputs}
def emul(self, ctx=None, step=False): """Symbolic execution of relevant nodes according to the history Return the values of input nodes' elements @ctx: (optional) Initial context as dictionnary @step: (optional) Verbose execution /!\ The emulation is not safe if there is a loop in the relevant labels """ # Init ctx_init = self._ira.arch.regs.regs_init if ctx is not None: ctx_init.update(ctx) depnodes = self.relevant_nodes affects = [] # Build a single affectation block according to history for label in self.relevant_labels[::-1]: affected_lines = set(depnode.line_nb for depnode in depnodes if depnode.label == label) irs = self._ira.blocs[label].irs for line_nb in sorted(affected_lines): affects.append(irs[line_nb]) # Eval the block temp_label = asm_label("Temp") sb = symbexec(self._ira, ctx_init) sb.emulbloc(irbloc(temp_label, affects), step=step) # Return only inputs values (others could be wrongs) return {depnode.element: sb.symbols[depnode.element] for depnode in self.input}
def analyse_bb(begin, end): # Disassemble dis_engine = dis_engine_cls(bs=bi.bs) dis_engine.dont_dis = [end] bloc = dis_engine.dis_bloc(begin) # Transform to IR ira = ira_cls() irabloc = ira.add_bloc(bloc)[0] # Perform symbolic exec sb = symbexec(ira, symbols_init) sb.emulbloc(irabloc) # Find out what has been modified during symbolic execution # only 1 iteration here assert len(sb.symbols.symbols_mem) == 1 expr_res = [] for mem, vals in sb.symbols.symbols_mem.iteritems(): exprs = [my_simplify(e) for e in vals] expr_res.append(exprs) assert len(expr_res) == 1 return expr_res[0]
def emul(self, step=False): """Symbolic execution of relevant nodes according to the history Return the values of input nodes' elements /!\ The emulation is not safe if there is a loop in the relevant labels """ # Init depnodes = self.relevant_nodes affects = [] # Build a single affectation block according to history for label in self.relevant_labels[::-1]: affected_lines = set(depnode.line_nb for depnode in depnodes if depnode.label == label) irs = self._ira.blocs[label].irs for line_nb in sorted(affected_lines): affects.append(irs[line_nb]) # Eval the block temp_label = asm_label("Temp") sb = symbexec(self._ira, self._ira.arch.regs.regs_init) sb.emulbloc(irbloc(temp_label, affects), step=step) # Return only inputs values (others could be wrongs) return { depnode.element: sb.symbols[depnode.element] for depnode in self.input }
def test_ClassDef(self): from miasm2.expression.expression import ExprInt32, ExprId, ExprMem, ExprCompose from miasm2.arch.x86.sem import ir_x86_32 from miasm2.ir.symbexec import symbexec addrX = ExprInt32(-1) addr0 = ExprInt32(0) addr1 = ExprInt32(1) addr8 = ExprInt32(8) addr9 = ExprInt32(9) addr20 = ExprInt32(20) addr40 = ExprInt32(40) addr50 = ExprInt32(50) mem0 = ExprMem(addr0) mem1 = ExprMem(addr1) mem8 = ExprMem(addr8) mem9 = ExprMem(addr9) mem20 = ExprMem(addr20) mem40v = ExprMem(addr40, 8) mem40w = ExprMem(addr40, 16) mem50v = ExprMem(addr50, 8) mem50w = ExprMem(addr50, 16) id_x = ExprId('x') id_y = ExprId('y', 8) id_a = ExprId('a') id_eax = ExprId('eax_init') e = symbexec( ir_x86_32(), { mem0: id_x, mem1: id_y, mem9: id_x, mem40w: id_x, mem50v: id_y, id_a: addr0, id_eax: addr0 }) self.assertEqual(e.find_mem_by_addr(addr0), mem0) self.assertEqual(e.find_mem_by_addr(addrX), None) self.assertEqual(e.eval_ExprMem(ExprMem(addr1 - addr1)), id_x) self.assertEqual(e.eval_ExprMem(ExprMem(addr1, 8)), id_y) self.assertEqual( e.eval_ExprMem(ExprMem(addr1 + addr1)), ExprCompose([(id_x[16:32], 0, 16), (ExprMem(ExprInt32(4), 16), 16, 32)])) self.assertEqual( e.eval_ExprMem(mem8), ExprCompose([(id_x[0:24], 0, 24), (ExprMem(ExprInt32(11), 8), 24, 32)])) self.assertEqual(e.eval_ExprMem(mem40v), id_x[:8]) self.assertEqual( e.eval_ExprMem(mem50w), ExprCompose([(id_y, 0, 8), (ExprMem(ExprInt32(51), 8), 8, 16)])) self.assertEqual(e.eval_ExprMem(mem20), mem20) e.func_read = lambda x: x self.assertEqual(e.eval_ExprMem(mem20), mem20) self.assertEqual(set(e.modified()), set(e.symbols)) self.assertRaises(KeyError, e.symbols.__getitem__, ExprMem(ExprInt32(100)))
def load(self): "Preload symbols according to current architecture" symbols_init = {r:m2_expr.ExprInt(0, size=r.size) for r in self.ir_arch.arch.regs.all_regs_ids_no_alias} self.symbexec = symbexec(self.ir_arch, symbols_init, func_read = self.func_read, func_write = self.func_write)
def intra_bloc_flow_symb(ir_arch, flow_graph, irbloc): symbols_init = {} for i, r in enumerate(all_regs_ids): symbols_init[r] = all_regs_ids_init[i] sb = symbexec(ir_arch, symbols_init) sb.emulbloc(irbloc) print '*' * 40 print irbloc # sb.dump_mem() # sb.dump_id() in_nodes = {} out_nodes = {} out = get_modified_symbols(sb) current_nodes = {} # gen mem arg to mem node links for dst, src in out.items(): for n in [dst, src]: all_mems = set() all_mems.update(get_expr_mem(n)) for n in all_mems: node_n_w = get_node_name(irbloc.label, 0, n) if not n == src: continue o_r = n.arg.get_r(mem_read=False, cst_read=True) for n_r in o_r: if n_r in current_nodes: node_n_r = current_nodes[n_r] else: node_n_r = get_node_name(irbloc.label, i, n_r) if not n_r in in_nodes: in_nodes[n_r] = node_n_r flow_graph.add_uniq_edge(node_n_r, node_n_w) # gen data flow links for dst, src in out.items(): nodes_r = src.get_r(mem_read=False, cst_read=True) nodes_w = set([dst]) for n_r in nodes_r: if n_r in current_nodes: node_n_r = current_nodes[n_r] else: node_n_r = get_node_name(irbloc.label, 0, n_r) if not n_r in in_nodes: in_nodes[n_r] = node_n_r flow_graph.add_node(node_n_r) for n_w in nodes_w: node_n_w = get_node_name(irbloc.label, 1, n_w) out_nodes[n_w] = node_n_w flow_graph.add_node(node_n_w) flow_graph.add_uniq_edge(node_n_r, node_n_w) irbloc.in_nodes = in_nodes irbloc.out_nodes = out_nodes
def execc(self, code): machine = Machine('x86_32') mdis = machine.dis_engine(code) blocs = mdis.dis_multibloc(0) ira = machine.ira() for b in blocs: ira.add_bloc(b) sb = symbexec(ira, machine.mn.regs.regs_init) sb.emul_ir_blocs(ira, 0) return sb
def symb_exec(interm, inputstate, debug): sympool = dict(regs_init) sympool.update(inputstate) symexec = symbexec(interm, sympool) symexec.emul_ir_blocks(0) if debug: for k, v in symexec.symbols.items(): if regs_init.get(k, None) != v: print k, v return {k: v for k, v in symexec.symbols.items() if k not in EXCLUDE_REGS and regs_init.get(k, None) != v}
def load(self): "Preload symbols according to current architecture" symbols_init = {} for r in self.ir_arch.arch.regs.all_regs_ids_no_alias: symbols_init[r] = self.ir_arch.arch.regs.regs_init[r] self.symbexec = symbexec(self.ir_arch, symbols_init, func_read = self.func_read, func_write = self.func_write)
def load(self): "Preload symbols according to current architecture" symbols_init = { r: m2_expr.ExprInt(0, size=r.size) for r in self.ir_arch.arch.regs.all_regs_ids_no_alias } self.symbexec = symbexec(self.ir_arch, symbols_init, func_read=self.func_read, func_write=self.func_write)
def load(self): "Preload symbols according to current architecture" symbols_init = {} for r in self.ir_arch.arch.regs.all_regs_ids_no_alias: symbols_init[r] = self.ir_arch.arch.regs.regs_init[r] self.symbexec = symbexec(self.ir_arch, symbols_init, func_read=self.func_read, func_write=self.func_write)
def test_ClassDef(self): from miasm2.expression.expression import ExprInt32, ExprId, ExprMem, \ ExprCompose, ExprAff from miasm2.arch.x86.sem import ir_x86_32 from miasm2.ir.symbexec import symbexec addrX = ExprInt32(-1) addr0 = ExprInt32(0) addr1 = ExprInt32(1) addr8 = ExprInt32(8) addr9 = ExprInt32(9) addr20 = ExprInt32(20) addr40 = ExprInt32(40) addr50 = ExprInt32(50) mem0 = ExprMem(addr0) mem1 = ExprMem(addr1, 8) mem8 = ExprMem(addr8) mem9 = ExprMem(addr9) mem20 = ExprMem(addr20) mem40v = ExprMem(addr40, 8) mem40w = ExprMem(addr40, 16) mem50v = ExprMem(addr50, 8) mem50w = ExprMem(addr50, 16) id_x = ExprId('x') id_y = ExprId('y', 8) id_a = ExprId('a') id_eax = ExprId('eax_init') e = symbexec(ir_x86_32(), {mem0: id_x, mem1: id_y, mem9: id_x, mem40w: id_x[:16], mem50v: id_y, id_a: addr0, id_eax: addr0}) self.assertEqual(e.find_mem_by_addr(addr0), mem0) self.assertEqual(e.find_mem_by_addr(addrX), None) self.assertEqual(e.eval_expr(ExprMem(addr1 - addr1)), id_x) self.assertEqual(e.eval_expr(ExprMem(addr1, 8)), id_y) self.assertEqual(e.eval_expr(ExprMem(addr1 + addr1)), ExprCompose( id_x[16:32], ExprMem(ExprInt32(4), 16))) self.assertEqual(e.eval_expr(mem8), ExprCompose( id_x[0:24], ExprMem(ExprInt32(11), 8))) self.assertEqual(e.eval_expr(mem40v), id_x[:8]) self.assertEqual(e.eval_expr(mem50w), ExprCompose( id_y, ExprMem(ExprInt32(51), 8))) self.assertEqual(e.eval_expr(mem20), mem20) e.func_read = lambda x: x self.assertEqual(e.eval_expr(mem20), mem20) self.assertEqual(set(e.modified()), set(e.symbols)) self.assertRaises( KeyError, e.symbols.__getitem__, ExprMem(ExprInt32(100))) self.assertEqual(e.apply_expr(id_eax), addr0) self.assertEqual(e.apply_expr(ExprAff(id_eax, addr9)), addr9) self.assertEqual(e.apply_expr(id_eax), addr9)
def gen_equations(self): for irb in self.blocs.values(): symbols_init = dict(self.arch.regs.all_regs_ids_init) sb = symbexec(self, dict(symbols_init)) sb.emulbloc(irb) eqs = [] for n_w in sb.symbols: v = sb.symbols[n_w] if n_w in symbols_init and symbols_init[n_w] == v: continue eqs.append(ExprAff(n_w, v)) print '*' * 40 print irb irb.irs = [eqs] irb.lines = [None]
def compute(asm, inputstate={}, debug=False): sympool = dict(regs_init) sympool.update({k: ExprInt_from(k, v) for k, v in inputstate.iteritems()}) interm = ir_arch() symexec = symbexec(interm, sympool) instr = mn.fromstring(asm, mode) code = mn.asm(instr)[0] instr = mn.dis(code, mode) instr.offset = inputstate.get(PC, 0) interm.add_instr(instr) symexec.emul_ir_blocs(interm, instr.offset) if debug: for k, v in symexec.symbols.items(): if regs_init.get(k, None) != v: print k, v return {k: v.arg.arg for k, v in symexec.symbols.items() if k not in EXCLUDE_REGS and regs_init.get(k, None) != v}
def compute(asm, inputstate={}, debug=False): sympool = dict(regs_init) sympool.update({k: ExprInt(v, k.size) for k, v in inputstate.iteritems()}) interm = ir_arch() symexec = symbexec(interm, sympool) instr = mn.fromstring(asm, mode) code = mn.asm(instr)[0] instr = mn.dis(code, mode) instr.offset = inputstate.get(PC, 0) interm.add_instr(instr) symexec.emul_ir_blocks(instr.offset) if debug: for k, v in symexec.symbols.items(): if regs_init.get(k, None) != v: print k, v return {k: v.arg.arg for k, v in symexec.symbols.items() if k not in EXCLUDE_REGS and regs_init.get(k, None) != v}
def intra_bloc_flow_symbexec(ir_arch, flow_graph, irb): """ Create data flow for an irbloc using symbolic execution """ in_nodes = {} out_nodes = {} current_nodes = {} symbols_init = {} for r in ir_arch.arch.regs.all_regs_ids: # symbols_init[r] = ir_arch.arch.regs.all_regs_ids_init[i] x = ExprId(r.name, r.size) x.is_term = True symbols_init[r] = x sb = symbexec(ir_arch, dict(symbols_init)) sb.emulbloc(irb) # print "*"*40 # print irb # print sb.dump_id() # print sb.dump_mem() for n_w in sb.symbols: # print n_w v = sb.symbols[n_w] if n_w in symbols_init and symbols_init[n_w] == v: continue read_values = v.get_r(cst_read=True) # print n_w, v, [str(x) for x in read_values] node_n_w = get_node_name(irb.label, len(irb.lines), n_w) for n_r in read_values: if n_r in current_nodes: node_n_r = current_nodes[n_r] else: node_n_r = get_node_name(irb.label, 0, n_r) current_nodes[n_r] = node_n_r in_nodes[n_r] = node_n_r out_nodes[n_w] = node_n_w flow_graph.add_uniq_edge(node_n_r, node_n_w) irb.in_nodes = in_nodes irb.out_nodes = out_nodes
def gen_equations(self): for irb in self.blocs.values(): symbols_init = {} for r in self.arch.regs.all_regs_ids: x = ExprId(r.name, r.size) x.is_term = True symbols_init[r] = x sb = symbexec(self, dict(symbols_init)) sb.emulbloc(irb) eqs = [] for n_w in sb.symbols: v = sb.symbols[n_w] if n_w in symbols_init and symbols_init[n_w] == v: continue eqs.append(ExprAff(n_w, v)) print "*" * 40 print irb irb.irs = [eqs] irb.lines = [None]
def gen_equations(self): for irb in self.blocs.values(): symbols_init = {} for r in self.arch.regs.all_regs_ids: x = ExprId(r.name, r.size) x.is_term = True symbols_init[r] = x sb = symbexec(self, dict(symbols_init)) sb.emulbloc(irb) eqs = [] for n_w in sb.symbols: v = sb.symbols[n_w] if n_w in symbols_init and symbols_init[n_w] == v: continue eqs.append(ExprAff(n_w, v)) print '*' * 40 print irb for eq in eqs: eq irb.irs = [eqs] irb.lines = [None]
def emul(self, ctx=None, step=False): # Init ctx_init = self._ira.arch.regs.regs_init if ctx is not None: ctx_init.update(ctx) depnodes = self.relevant_nodes solver = z3.Solver() symb_exec = symbexec(self._ira, ctx_init) temp_label = asm_label("Temp") history = self.relevant_labels[::-1] history_size = len(history) for hist_nb, label in enumerate(history): # Build block with relevant lines only affected_lines = set(depnode.line_nb for depnode in depnodes if depnode.label == label) irs = self._ira.blocs[label].irs affects = [] for line_nb in sorted(affected_lines): affects.append(irs[line_nb]) # Emul the block and get back destination dst = symb_exec.emulbloc(irbloc(temp_label, affects), step=step) # Add constraint if hist_nb + 1 < history_size: next_label = history[hist_nb + 1] expected = symb_exec.eval_expr(m2_expr.ExprId(next_label, 32)) constraint = m2_expr.ExprAff(dst, expected) solver.add(Translator.to_language("z3").from_expr(constraint)) # Save the solver self._solver = solver # Return only inputs values (others could be wrongs) return { depnode.element: symb_exec.symbols[depnode.element] for depnode in self.input }
def symbolic_exec(): from miasm2.ir.symbexec import symbexec from miasm2.core.bin_stream_ida import bin_stream_ida from utils import guess_machine bs = bin_stream_ida() machine = guess_machine() mdis = machine.dis_engine(bs) start, end = SelStart(), SelEnd() mdis.dont_dis = [end] blocs = mdis.dis_multibloc(start) ira = machine.ira() for bloc in blocs: ira.add_bloc(bloc) print "Run symbolic execution..." sb = symbexec(ira, machine.mn.regs.regs_init) sb.emul_ir_blocs(ira, start) modified = {} for ident in sb.symbols.symbols_id: if ident in sb.ir_arch.arch.regs.regs_init and \ ident in sb.symbols.symbols_id and \ sb.symbols.symbols_id[ident] == sb.ir_arch.arch.regs.regs_init[ident]: continue modified[ident] = sb.symbols.symbols_id[ident] for ident in sb.symbols.symbols_mem: modified[sb.symbols.symbols_mem[ident][0]] = sb.symbols.symbols_mem[ident][1] view = symbolicexec_t() if not view.Create(modified, machine, "Symbolic Execution - 0x%x to 0x%x" % (start, end)): return view.Show()
def symbolic_exec(): from miasm2.ir.symbexec import symbexec from miasm2.core.bin_stream_ida import bin_stream_ida from utils import guess_machine bs = bin_stream_ida() machine = guess_machine() mdis = machine.dis_engine(bs) start, end = SelStart(), SelEnd() mdis.dont_dis = [end] blocs = mdis.dis_multibloc(start) ira = machine.ira() for bloc in blocs: ira.add_bloc(bloc) print "Run symbolic execution..." sb = symbexec(ira, machine.mn.regs.regs_init) sb.emul_ir_blocks(start) modified = {} for ident in sb.symbols.symbols_id: if ident in sb.ir_arch.arch.regs.regs_init and \ ident in sb.symbols.symbols_id and \ sb.symbols.symbols_id[ident] == sb.ir_arch.arch.regs.regs_init[ident]: continue modified[ident] = sb.symbols.symbols_id[ident] for ident in sb.symbols.symbols_mem: modified[sb.symbols.symbols_mem[ident][0]] = sb.symbols.symbols_mem[ident][1] view = symbolicexec_t() if not view.Create(modified, machine, "Symbolic Execution - 0x%x to 0x%x" % (start, end)): return view.Show()
def emul(self, ctx=None, step=False): # Init ctx_init = self._ira.arch.regs.regs_init if ctx is not None: ctx_init.update(ctx) depnodes = self.relevant_nodes solver = z3.Solver() sb = symbexec(self._ira, ctx_init) temp_label = asm_label("Temp") history = self.relevant_labels[::-1] history_size = len(history) for hist_nb, label in enumerate(history): # Build block with relevant lines only affected_lines = set(depnode.line_nb for depnode in depnodes if depnode.label == label) irs = self._ira.blocs[label].irs affects = [] for line_nb in sorted(affected_lines): affects.append(irs[line_nb]) # Emul the block and get back destination dst = sb.emulbloc(irbloc(temp_label, affects), step=step) # Add constraint if hist_nb + 1 < history_size: next_label = history[hist_nb + 1] expected = sb.eval_expr(m2_expr.ExprId(next_label, 32)) constraint = m2_expr.ExprAff(dst, expected) solver.add(Translator.to_language("z3").from_expr(constraint)) # Save the solver self._solver = solver # Return only inputs values (others could be wrongs) return {depnode.element: sb.symbols[depnode.element] for depnode in self.input}
def do_step(self): if len(self.todo) == 0: return None if self.total_done > 600: print "symbexec watchdog!" return None self.total_done += 1 print 'CPT', self.total_done while self.todo: # if self.total_done>20: # self.get_next_min() # state = self.todo.pop() state = self.get_next_state() parent, ad, s = state self.states_done.add(state) self.states_var_done.add(state) # if s in self.states_var_done: # print "state done" # continue sb = symbexec(self.ir_arch, dict(s)) return parent, ad, sb return None
base_expr.setParseAction(my_var_parser) argc = ExprId('argc', 32) argv = ExprId('argv', 32) ret_addr = ExprId('ret_addr') reg_and_id[argc.name] = argc reg_and_id[argv.name] = argv reg_and_id[ret_addr.name] = ret_addr my_symbols = [argc, argv, ret_addr] my_symbols = dict([(x.name, x) for x in my_symbols]) my_symbols.update(mn_x86.regs.all_regs_ids_byname) ir_arch = ir_x86_32(mdis.symbol_pool) sb = symbexec(ir_arch, symbols_init) blocs, symbol_pool = parse_asm.parse_txt( mn_x86, 32, ''' PUSH argv PUSH argc PUSH ret_addr ''') b = list(blocs)[0] print b # add fake address and len to parsed instructions for i, l in enumerate(b.lines): l.offset, l.l = i, 1 ir_arch.add_bloc(b) irb = get_bloc(ir_arch, mdis, 0)
# Stop disassembler after the XOR mdis.dont_dis = [0x1C] # Disassemble one basic block block = mdis.dis_bloc(0) # instanciate an IR analysis ir_arch = ira(mdis.symbol_pool) # Translate asm basic block to an IR basic block ir_arch.add_bloc(block) # Store IR graph open('ir_graph.dot', 'w').write(ir_arch.graph.dot()) # Initiate the symbolic execution engine # regs_init associates EAX to EAX_init and to on sb = symbexec(ir_arch, machine.mn.regs.regs_init) # sb.dump_id() # Start execution at address 0 # IRDst represents the label of the next IR basic block to execute irdst = sb.emul_ir_blocs(ir_arch, 0, step=True) print 'ECX =', sb.symbols[machine.mn.regs.ECX] print 'ESP =', sb.symbols[machine.mn.regs.ESP] print 'EAX =', sb.symbols[machine.mn.regs.EAX] except lite.Error, e: if c: c.rollback() print "Error %s:" % e.args[0] sys.exit(1) finally: if c: cur.close()
ira = ira_cls() irabloc = ira.add_bloc(bloc)[0] print '\n'.join(map(lambda b: str(b[0]), irabloc.irs)) from miasm2.expression.expression import * from miasm2.ir.symbexec import symbexec from miasm2.expression.simplifications import expr_simp # Prepare symbolic execution symbols_init = {} for i, r in enumerate(mn.regs.all_regs_ids): symbols_init[r] = mn.regs.all_regs_ids_init[i] # Perform symbolic exec sb = symbexec(ira, symbols_init) sb.emulbloc(irabloc) mem, exprs = sb.symbols.symbols_mem.items()[0] print "Memory changed at %s :" % mem print " before:", exprs[0] print " after:", exprs[1] # Simplifications fp_init = ExprId('FP_init', 32) zero_init = ExprId('ZERO_init', 32) e_i_pattern = expr_simp(ExprMem(fp_init + ExprInt32(0x38), 32)) e_i = ExprId('i', 32) e_pass_i_pattern = expr_simp(ExprMem(fp_init + (e_i << ExprInt32(2)) + ExprInt32(0x20), 32)) e_pass_i = ExprId("pwd[i]", 32)
def emul_symb(ir_arch, mdis, states_todo, states_done): while states_todo: ad, symbols, conds = states_todo.pop() print '*' * 40, "addr", ad, '*' * 40 if (ad, symbols, conds) in states_done: print 'skip', ad continue states_done.add((ad, symbols, conds)) sb = symbexec(ir_arch, {}) sb.symbols = symbols.copy() if ir_arch.pc in sb.symbols: del (sb.symbols[ir_arch.pc]) b = get_bloc(ir_arch, mdis, ad) print 'run bloc' print b # print blocs[ad] ad = sb.emulbloc(b) print 'final state' sb.dump_id() print 'dataflow' # data_flow_graph_from_expr(sb) assert (ad is not None) print "DST", ad if isinstance(ad, ExprCond): # Create 2 states, each including complementary conditions p1 = sb.symbols.copy() p2 = sb.symbols.copy() c1 = {ad.cond: ExprInt(0, ad.cond.size)} c2 = {ad.cond: ExprInt(1, ad.cond.size)} print ad.cond p1[ad.cond] = ExprInt(0, ad.cond.size) p2[ad.cond] = ExprInt(1, ad.cond.size) ad1 = expr_simp(sb.eval_expr(ad.replace_expr(c1), {})) ad2 = expr_simp(sb.eval_expr(ad.replace_expr(c2), {})) if not (isinstance(ad1, ExprInt) or (isinstance(ad1, ExprId) and isinstance(ad1.name, asmbloc.asm_label)) and isinstance(ad2, ExprInt) or (isinstance(ad2, ExprId) and isinstance(ad2.name, asmbloc.asm_label))): print str(ad1), str(ad2) raise ValueError("zarb condition") conds1 = list(conds) + c1.items() conds2 = list(conds) + c2.items() if isinstance(ad1, ExprId): ad1 = ad1.name if isinstance(ad2, ExprId): ad2 = ad2.name if isinstance(ad1, ExprInt): ad1 = ad1.arg if isinstance(ad2, ExprInt): ad2 = ad2.arg states_todo.add((ad1, p1, tuple(conds1))) states_todo.add((ad2, p2, tuple(conds2))) elif isinstance(ad, ExprInt): ad = int(ad.arg) states_todo.add((ad, sb.symbols.copy(), tuple(conds))) elif isinstance(ad, ExprId) and isinstance(ad.name, asmbloc.asm_label): if isinstance(ad, ExprId): ad = ad.name states_todo.add((ad, sb.symbols.copy(), tuple(conds))) elif ad == ret_addr: print 'ret reached' continue else: raise ValueError("zarb eip")
from miasm2.core.bin_stream import bin_stream_str from miasm2.arch.x86.arch import mn_x86 from miasm2.arch.x86.ira import ir_a_x86_32 from miasm2.arch.x86.regs import all_regs_ids, all_regs_ids_init from miasm2.ir.symbexec import symbexec from miasm2.arch.x86.disasm import dis_x86_32 as dis_engine import miasm2.expression.expression as m2_expr l = mn_x86.fromstring("MOV EAX, EBX", 32) asm = mn_x86.asm(l)[0] bin_stream = bin_stream_str(asm) mdis = dis_engine(bin_stream) disasm = mdis.dis_multibloc(0) ir = ir_a_x86_32(mdis.symbol_pool) for bbl in disasm: ir.add_bloc(bbl) symbols_init = {} for i, r in enumerate(all_regs_ids): symbols_init[r] = all_regs_ids_init[i] symb = symbexec(ir, symbols_init) block = ir.get_bloc(0) cur_addr = symb.emulbloc(block) assert(symb.symbols[m2_expr.ExprId("EAX")] == symbols_init[m2_expr.ExprId("EBX")]) print 'modified registers:' symb.dump_id()
def test_ClassDef(self): from miasm2.expression.expression import ExprInt32, ExprId, ExprMem, \ ExprCompose, ExprAff from miasm2.arch.x86.sem import ir_x86_32 from miasm2.ir.symbexec import symbexec from miasm2.ir.ir import AssignBlock addrX = ExprInt32(-1) addr0 = ExprInt32(0) addr1 = ExprInt32(1) addr8 = ExprInt32(8) addr9 = ExprInt32(9) addr20 = ExprInt32(20) addr40 = ExprInt32(40) addr50 = ExprInt32(50) mem0 = ExprMem(addr0) mem1 = ExprMem(addr1, 8) mem8 = ExprMem(addr8) mem9 = ExprMem(addr9) mem20 = ExprMem(addr20) mem40v = ExprMem(addr40, 8) mem40w = ExprMem(addr40, 16) mem50v = ExprMem(addr50, 8) mem50w = ExprMem(addr50, 16) id_x = ExprId('x') id_y = ExprId('y', 8) id_a = ExprId('a') id_eax = ExprId('eax_init') e = symbexec( ir_x86_32(), { mem0: id_x, mem1: id_y, mem9: id_x, mem40w: id_x[:16], mem50v: id_y, id_a: addr0, id_eax: addr0 }) self.assertEqual(e.find_mem_by_addr(addr0), mem0) self.assertEqual(e.find_mem_by_addr(addrX), None) self.assertEqual(e.eval_expr(ExprMem(addr1 - addr1)), id_x) self.assertEqual(e.eval_expr(ExprMem(addr1, 8)), id_y) self.assertEqual(e.eval_expr(ExprMem(addr1 + addr1)), ExprCompose(id_x[16:32], ExprMem(ExprInt32(4), 16))) self.assertEqual(e.eval_expr(mem8), ExprCompose(id_x[0:24], ExprMem(ExprInt32(11), 8))) self.assertEqual(e.eval_expr(mem40v), id_x[:8]) self.assertEqual(e.eval_expr(mem50w), ExprCompose(id_y, ExprMem(ExprInt32(51), 8))) self.assertEqual(e.eval_expr(mem20), mem20) e.func_read = lambda x: x self.assertEqual(e.eval_expr(mem20), mem20) self.assertEqual(set(e.modified()), set(e.symbols)) self.assertRaises(KeyError, e.symbols.__getitem__, ExprMem(ExprInt32(100))) self.assertEqual(e.apply_expr(id_eax), addr0) self.assertEqual(e.apply_expr(ExprAff(id_eax, addr9)), addr9) self.assertEqual(e.apply_expr(id_eax), addr9) # apply_change / eval_ir / apply_expr ## x = a (with a = 0x0) assignblk = AssignBlock() assignblk[id_x] = id_a e.eval_ir(assignblk) self.assertEqual(e.apply_expr(id_x), addr0) ## x = a (without replacing 'a' with 0x0) e.apply_change(id_x, id_a) self.assertEqual(e.apply_expr(id_x), id_a) ## x = a (with a = 0x0) self.assertEqual(e.apply_expr(assignblk.dst2ExprAff(id_x)), addr0) self.assertEqual(e.apply_expr(id_x), addr0)
base_expr.setParseAction(my_var_parser) argc = ExprId('argc', 32) argv = ExprId('argv', 32) ret_addr = ExprId('ret_addr') reg_and_id[argc.name] = argc reg_and_id[argv.name] = argv reg_and_id[ret_addr.name] = ret_addr my_symbols = [argc, argv, ret_addr] my_symbols = dict([(x.name, x) for x in my_symbols]) my_symbols.update(mn_x86.regs.all_regs_ids_byname) ir_arch = ir_x86_32(mdis.symbol_pool) sb = symbexec(ir_arch, symbols_init) blocs, symbol_pool = parse_asm.parse_txt(mn_x86, 32, ''' PUSH argv PUSH argc PUSH ret_addr ''') b = list(blocs)[0] print b # add fake address and len to parsed instructions for i, l in enumerate(b.lines): l.offset, l.l = i, 1 ir_arch.add_bloc(b) irb = get_bloc(ir_arch, mdis, 0)
def emul_symb(ir_arch, mdis, states_todo, states_done): while states_todo: ad, symbols, conds = states_todo.pop() print '*' * 40, "addr", ad, '*' * 40 if (ad, symbols, conds) in states_done: print 'skip', ad continue states_done.add((ad, symbols, conds)) sb = symbexec(ir_arch, {}) sb.symbols = symbols.copy() if ir_arch.pc in sb.symbols: del(sb.symbols[ir_arch.pc]) b = get_bloc(ir_arch, mdis, ad) print 'run bloc' print b # print blocs[ad] ad = sb.emulbloc(b) print 'final state' sb.dump_id() print 'dataflow' # data_flow_graph_from_expr(sb) assert(ad is not None) print "DST", ad if isinstance(ad, ExprCond): # Create 2 states, each including complementary conditions p1 = sb.symbols.copy() p2 = sb.symbols.copy() c1 = {ad.cond: ExprInt_from(ad.cond, 0)} c2 = {ad.cond: ExprInt_from(ad.cond, 1)} print ad.cond p1[ad.cond] = ExprInt_from(ad.cond, 0) p2[ad.cond] = ExprInt_from(ad.cond, 1) ad1 = expr_simp(sb.eval_expr(ad.replace_expr(c1), {})) ad2 = expr_simp(sb.eval_expr(ad.replace_expr(c2), {})) if not (isinstance(ad1, ExprInt) or (isinstance(ad1, ExprId) and isinstance(ad1.name, asmbloc.asm_label)) and isinstance(ad2, ExprInt) or (isinstance(ad2, ExprId) and isinstance(ad2.name, asmbloc.asm_label))): print str(ad1), str(ad2) raise ValueError("zarb condition") conds1 = list(conds) + c1.items() conds2 = list(conds) + c2.items() if isinstance(ad1, ExprId): ad1 = ad1.name if isinstance(ad2, ExprId): ad2 = ad2.name if isinstance(ad1, ExprInt): ad1 = ad1.arg if isinstance(ad2, ExprInt): ad2 = ad2.arg states_todo.add((ad1, p1, tuple(conds1))) states_todo.add((ad2, p2, tuple(conds2))) elif isinstance(ad, ExprInt): ad = int(ad.arg) states_todo.add((ad, sb.symbols.copy(), tuple(conds))) elif isinstance(ad, ExprId) and isinstance(ad.name, asmbloc.asm_label): if isinstance(ad, ExprId): ad = ad.name states_todo.add((ad, sb.symbols.copy(), tuple(conds))) elif ad == ret_addr: print 'ret reached' continue else: raise ValueError("zarb eip")
# Stop disassembler after the XOR mdis.dont_dis = [0x1C] # Disassemble one basic block block = mdis.dis_bloc(0) # instanciate an IR analysis ir_arch = ira(mdis.symbol_pool) # Translate asm basic block to an IR basic block ir_arch.add_bloc(block) # Store IR graph open('ir_graph.dot', 'w').write(ir_arch.graph.dot()) # Initiate the symbolic execution engine # regs_init associates EAX to EAX_init and to on sb = symbexec(ir_arch, machine.mn.regs.regs_init) # sb.dump_id() # Start execution at address 0 # IRDst represents the label of the next IR basic block to execute irdst = sb.emul_ir_blocs(ir_arch, 0,step=True) print 'ECX =', sb.symbols[machine.mn.regs.ECX] print 'ESP =', sb.symbols[machine.mn.regs.ESP] print 'EAX =', sb.symbols[machine.mn.regs.EAX] except lite.Error, e: if c: c.rollback() print "Error %s:" % e.args[0] sys.exit(1) finally: if c: cur.close()