def inject_info(self, info): s = symbols() for k, v in self.items(): k = expr_simp(k.replace_expr(info)) v = expr_simp(v.replace_expr(info)) s[k] = v return s
def inject_info(self, info): new_symbols = symbols() for expr, value in self.items(): expr = expr_simp(expr.replace_expr(info)) value = expr_simp(value.replace_expr(info)) new_symbols[expr] = value return new_symbols
def codepath_walk(addr, symbols, conds, depth): if depth >= cond_limit: return None for _ in range(uncond_limit): sb = symbexec(ir, symbols) pc = sb.emul_ir_blocs(ir, addr) if is_goal(sb.symbols) == True: return conds if isinstance(pc, ExprCond): cond_true = {pc.cond: ExprInt_from(pc.cond, 1)} cond_false = {pc.cond: ExprInt_from(pc.cond, 0)} addr_true = expr_simp( sb.eval_expr(pc.replace_expr(cond_true), {})) addr_false = expr_simp( sb.eval_expr(pc.replace_expr(cond_false), {})) conds_true = list(conds) + cond_true.items() conds_false = list(conds) + cond_false.items() rslt = codepath_walk( addr_true, sb.symbols.copy(), conds_true, depth + 1) if rslt != None: return rslt rslt = codepath_walk( addr_false, sb.symbols.copy(), conds_false, depth + 1) if rslt != None: return rslt break else: break return None
def del_mem_above_stack(self, sp): sp_val = self.symbols[sp] for mem_ad, (mem, _) in self.symbols.symbols_mem.items(): # print mem_ad, sp_val diff = self.eval_expr(mem_ad - sp_val, {}) diff = expr_simp(diff) if not isinstance(diff, ExprInt): continue m = expr_simp(diff.msb()) if m.arg == 1: del(self.symbols[mem])
def check_expr_below_stack(ir_arch_a, expr): """ Return False if expr pointer is below original stack pointer @ir_arch_a: ira instance @expr: Expression instance """ ptr = expr.ptr diff = expr_simp(ptr - ir_arch_a.sp) if not diff.is_int(): return True if int(diff) == 0 or int(expr_simp(diff.msb())) == 0: return False return True
def retrieve_stack_accesses(ir_arch_a, ssa): """ Walk the ssa graph and find stack based variables. Return a dictionnary linking stack base address to its size/name @ir_arch_a: ira instance @ssa: SSADiGraph instance """ stack_vars = set() for block in ssa.graph.blocks.itervalues(): for assignblk in block: for dst, src in assignblk.iteritems(): stack_vars.update(get_stack_accesses(ir_arch_a, dst)) stack_vars.update(get_stack_accesses(ir_arch_a, src)) stack_vars = filter(lambda expr: check_expr_below_stack(ir_arch_a, expr), stack_vars) base_to_var = {} for var in stack_vars: base_to_var.setdefault(var.ptr, set()).add(var) base_to_interval = {} for addr, vars in base_to_var.iteritems(): var_interval = interval() for var in vars: offset = expr_simp(addr - ir_arch_a.sp) if not offset.is_int(): # skip non linear stack offset continue start = int(offset) stop = int(expr_simp(offset + ExprInt(var.size / 8, offset.size))) mem = interval([(start, stop-1)]) var_interval += mem base_to_interval[addr] = var_interval if not base_to_interval: return {} # Check if not intervals overlap _, tmp = base_to_interval.popitem() while base_to_interval: addr, mem = base_to_interval.popitem() assert (tmp & mem).empty tmp += mem base_to_info = {} for addr, vars in base_to_var.iteritems(): name = "var_%d" % (len(base_to_info)) size = max([var.size for var in vars]) base_to_info[addr] = size, name return base_to_info
def resolve_args_with_symbols(self, symbols=None): if symbols is None: symbols = {} args_out = [] for a in self.args: e = a # try to resolve symbols using symbols (0 for default value) ids = m2_expr.get_expr_ids(e) fixed_ids = {} for x in ids: if isinstance(x.name, asmbloc.asm_label): name = x.name.name # special symbol $ if name == "$": fixed_ids[x] = self.get_asm_offset(x) continue if not name in symbols: raise ValueError("unresolved symbol! %r" % x) else: name = x.name if not name in symbols: continue if symbols[name].offset is None: raise ValueError('The offset of label "%s" cannot be ' "determined" % name) else: size = x.size if size is None: default_size = self.get_symbol_size(x, symbols) size = default_size value = m2_expr.ExprInt(symbols[name].offset, size) fixed_ids[x] = value e = e.replace_expr(fixed_ids) e = expr_simp(e) args_out.append(e) return args_out
def emul_symb(ir_arch, ircfg, mdis, states_todo, states_done): while states_todo: addr, symbols, conds = states_todo.pop() print '*' * 40, "addr", addr, '*' * 40 if (addr, symbols, conds) in states_done: print 'Known state, skipping', addr continue states_done.add((addr, symbols, conds)) symbexec = SymbolicExecutionEngine(ir_arch) symbexec.symbols = symbols.copy() if ir_arch.pc in symbexec.symbols: del symbexec.symbols[ir_arch.pc] irblock = get_block(ir_arch, ircfg, mdis, addr) print 'Run block:' print irblock addr = symbexec.eval_updt_irblock(irblock) print 'Final state:' symbexec.dump(mems=False) assert addr is not None if isinstance(addr, ExprCond): # Create 2 states, each including complementary conditions cond_group_a = {addr.cond: ExprInt(0, addr.cond.size)} cond_group_b = {addr.cond: ExprInt(1, addr.cond.size)} addr_a = expr_simp(symbexec.eval_expr(addr.replace_expr(cond_group_a), {})) addr_b = expr_simp(symbexec.eval_expr(addr.replace_expr(cond_group_b), {})) if not (addr_a.is_int() or addr_a.is_loc() and addr_b.is_int() or addr_b.is_loc()): print str(addr_a), str(addr_b) raise ValueError("Unsupported condition") if isinstance(addr_a, ExprInt): addr_a = int(addr_a.arg) if isinstance(addr_b, ExprInt): addr_b = int(addr_b.arg) states_todo.add((addr_a, symbexec.symbols.copy(), tuple(list(conds) + cond_group_a.items()))) states_todo.add((addr_b, symbexec.symbols.copy(), tuple(list(conds) + cond_group_b.items()))) elif addr == ret_addr: print 'Return address reached' continue elif addr.is_int(): addr = int(addr.arg) states_todo.add((addr, symbexec.symbols.copy(), tuple(conds))) elif addr.is_loc(): states_todo.add((addr, symbexec.symbols.copy(), tuple(conds))) else: raise ValueError("Unsupported destination")
def _follow_simp_expr(exprs): """Simplify expression so avoid tracking useless elements, as: XOR EAX, EAX """ follow = set() for expr in exprs: follow.add(expr_simp(expr)) return follow, set()
def is_stack_access(ir_arch_a, expr): if not expr.is_mem(): return False ptr = expr.ptr diff = expr_simp(ptr - ir_arch_a.sp) if not diff.is_int(): return False return expr
def fix_expr_val(e, symbols): def expr_calc(e): if isinstance(e, m2_expr.ExprId): s = symbols.s[e.name] e = m2_expr.ExprInt_from(e, s.offset) return e e = e.visit(expr_calc) e = expr_simp(e) return e
def arm_guess_jump_table( mnemo, attrib, pool_bin, cur_bloc, offsets_to_dis, symbol_pool): ira = get_ira(mnemo, attrib) jra = ExprId('jra') jrb = ExprId('jrb') sp = AsmSymbolPool() ir_arch = ira(sp) ir_arch.add_bloc(cur_bloc) ir_blocks = ir_arch.blocks.values() for irblock in ir_blocks: # print 'X'*40 # print irblock pc_val = None # lr_val = None for exprs in irblock.irs: for e in exprs: if e.dst == ir_arch.pc: pc_val = e.src # if e.dst == mnemo.regs.LR: # lr_val = e.src if pc_val is None: continue if not isinstance(pc_val, ExprMem): continue assert(pc_val.size == 32) print pc_val ad = pc_val.arg ad = expr_simp(ad) print ad res = match_expr(ad, jra + jrb, set([jra, jrb])) if res is False: raise NotImplementedError('not fully functional') print res if not isinstance(res[jrb], ExprInt): raise NotImplementedError('not fully functional') base_ad = int(res[jrb]) print base_ad addrs = set() i = -1 max_table_entry = 10000 max_diff_addr = 0x100000 # heuristic while i < max_table_entry: i += 1 try: ad = upck32(pool_bin.getbytes(base_ad + 4 * i, 4)) except: break if abs(ad - base_ad) > max_diff_addr: break addrs.add(ad) print [hex(x) for x in addrs]
def fix_expr_val(expr, symbols): """Resolve an expression @expr using @symbols""" def expr_calc(e): if isinstance(e, m2_expr.ExprId): s = symbols._name2label[e.name] e = m2_expr.ExprInt_from(e, s.offset) return e result = expr.visit(expr_calc) result = expr_simp(result) if not isinstance(result, m2_expr.ExprInt): raise RuntimeError('Cannot resolve symbol %s' % expr) return result
def propag_expr_cst(self, expr): """Propagate constant expressions in @expr @expr: Expression to update""" elements = expr.get_r(mem_read=True) to_propag = {} for element in elements: # Only ExprId can be safely propagated if not element.is_id(): continue value = self.eval_expr(element) if self.is_expr_cst(self.ir_arch, value): to_propag[element] = value return expr_simp(expr.replace_expr(to_propag))
def field_addr(self, base, Clike, is_ptr=False): key = (base, Clike, is_ptr) ret = self.cache_field_addr.get(key, None) if ret is None: base_expr = self.trad(base) if is_ptr: access_expr = self.trad(Clike) else: access_expr = self.trad("&(%s)" % Clike) offset = int(expr_simp(access_expr - base_expr)) ret = offset self.cache_field_addr[key] = ret return ret
def elements(self): value = self.cbReg.value if value in self.stk_args: line = self.ircfg.blocks[self.loc_key][self.line_nb].instr arg_num = self.stk_args[value] stk_high = m2_expr.ExprInt(idc.GetSpd(line.offset), ir_arch.sp.size) stk_off = m2_expr.ExprInt(self.ira.sp.size/8 * arg_num, ir_arch.sp.size) element = m2_expr.ExprMem(mn.regs.regs_init[ir_arch.sp] + stk_high + stk_off, self.ira.sp.size) element = expr_simp(element) # Force stack unaliasing self.stk_unalias_force = True elif value: element = self.ira.arch.regs.all_regs_ids_byname.get(value, None) else: raise ValueError("Unknown element '%s'!" % value) return set([element])
def resolve_args_with_symbols(self, symbols=None): if symbols is None: symbols = LocationDB() args_out = [] for expr in self.args: # try to resolve symbols using symbols (0 for default value) loc_keys = m2_expr.get_expr_locs(expr) fixed_expr = {} for exprloc in loc_keys: loc_key = exprloc.loc_key names = symbols.get_location_names(loc_key) # special symbols if '$' in names: fixed_expr[exprloc] = self.get_asm_offset(exprloc) continue if '_' in names: fixed_expr[exprloc] = self.get_asm_next_offset(exprloc) continue arg_int = symbols.get_location_offset(loc_key) if arg_int is not None: fixed_expr[exprloc] = m2_expr.ExprInt(arg_int, exprloc.size) continue if not names: raise ValueError('Unresolved symbol: %r' % exprloc) offset = symbols.get_location_offset(loc_key) if offset is None: raise ValueError( 'The offset of loc_key "%s" cannot be determined' % names ) else: # Fix symbol with its offset size = exprloc.size if size is None: default_size = self.get_symbol_size(exprloc, symbols) size = default_size value = m2_expr.ExprInt(offset, size) fixed_expr[exprloc] = value expr = expr.replace_expr(fixed_expr) expr = expr_simp(expr) args_out.append(expr) return args_out
def eval_updt_irblock(self, irb, step=False): """ Symbolic execution of the @irb on the current state @irb: irblock instance @step: display intermediate steps """ offset2cmt = {} for index, assignblk in enumerate(irb): if set(assignblk) == set([self.ir_arch.IRDst, self.ir_arch.pc]): # Don't display on jxx continue instr = assignblk.instr tmp_r = assignblk.get_r() tmp_w = assignblk.get_w() todo = set() # Replace PC with value to match IR args pc_fixed = {self.ir_arch.pc: m2_expr.ExprInt(instr.offset + instr.l, self.ir_arch.pc.size)} inputs = tmp_r inputs.update(arg for arg in tmp_w if arg.is_mem()) for arg in inputs: arg = expr_simp(arg.replace_expr(pc_fixed)) if arg in tmp_w and not arg.is_mem(): continue todo.add(arg) for expr in todo: if expr.is_int(): continue for c_str, c_type in self.chandler.expr_to_c_and_types(expr, self.symbols): expr = self.cst_propag_link.get((irb.loc_key, index), {}).get(expr, expr) offset2cmt.setdefault(instr.offset, set()).add( "\n%s: %s\n%s" % (expr, c_str, c_type)) self.eval_updt_assignblk(assignblk) for offset, value in offset2cmt.iteritems(): idc.MakeComm(offset, '\n'.join(value)) print "%x\n" % offset, '\n'.join(value) return self.eval_expr(self.ir_arch.IRDst)
def func_write(self, symb_exec, dest, data): """Memory read wrapper for symbolic execution @symb_exec: symbexec instance @dest: ExprMem instance @data: Expr instance""" # Get the content to write data = expr_simp(data) if not isinstance(data, m2_expr.ExprInt): raise NotImplementedError("A simplification is missing: %s" % data) to_write = data.arg.arg # Format information addr = dest.arg.arg.arg size = data.size / 8 content = hex(to_write).replace("0x", "").replace("L", "") content = "0" * (size * 2 - len(content)) + content content = content.decode("hex")[::-1] # Write in VmMngr context self.cpu.set_mem(addr, content)
def resolve_args_with_symbols(self, symbols=None): if symbols is None: symbols = {} args_out = [] for a in self.args: e = a # try to resolve symbols using symbols (0 for default value) ids = m2_expr.get_expr_ids(e) fixed_ids = {} for x in ids: if isinstance(x.name, asmbloc.asm_label): name = x.name.name if not name in symbols: raise ValueError('unresolved symbol! %r' % x) else: name = x.name # special symbol if name == '$': fixed_ids[x] = self.get_asm_offset(x) continue if not name in symbols: continue if symbols[name].offset is None: default_size = self.get_symbol_size(x, symbols) # default value value = m2_expr.ExprInt_fromsize(default_size, 0) else: size = x.size if size is None: default_size = self.get_symbol_size(x, symbols) size = default_size value = m2_expr.ExprInt_fromsize(size, symbols[name].offset) fixed_ids[x] = value e = e.replace_expr(fixed_ids) e = expr_simp(e) args_out.append(e) return args_out
# print current pool for lbl, b in ira.blocs.items(): print b # Init our symbols with all architecture known registers symbols_init = {} for i, r in enumerate(all_regs_ids): symbols_init[r] = all_regs_ids_init[i] # Create symbolic execution engine symb = symbexec(ir, symbols_init) # Get the block we want and emulate it # We obtain the address of the next block to execute block = ir.get_bloc(offset) nxt_addr = symb.emulbloc(block) # Run the Miasm's simplification engine on the next # address to be sure to have the simplest expression simp_addr = expr_simp(nxt_addr) # The simp_addr variable is an integer expression (next basic block offset) if isinstance(simp_addr, ExprInt): print("Jump on next basic block: %s" % simp_addr) # The simp_addr variable is a condition expression elif isinstance(simp_addr, ExprCond): branch1 = simp_addr.src1 branch2 = simp_addr.src2 print("Condition: %s or %s" % (branch1, branch2))
def myfunc(cpu, vmmngr): """Execute the function according to cpu and vmmngr states @cpu: JitCpu instance @vm: VmMngr instance """ # Keep current location in irblocs cur_label = label loop = True # Required to detect new instructions offsets_jitted = set() # Get exec engine exec_engine = self.symbexec # For each irbloc inside irblocs while loop is True: # Get the current bloc loop = False for irb in irblocs: if irb.label == cur_label: loop = True break # Irblocs must end with returning an ExprInt instance assert (loop is not False) # Refresh CPU values according to @cpu instance update_engine_from_cpu(cpu, exec_engine) # Execute current ir bloc for ir, line in zip(irb.irs, irb.lines): # For each new instruction (in assembly) if line.offset not in offsets_jitted: offsets_jitted.add(line.offset) # Log registers values if self.log_regs: update_cpu_from_engine(cpu, exec_engine) cpu.dump_gpregs() # Log instruction if self.log_mn: print "%08x %s" % (line.offset, line) # Check for memory exception if (vmmngr.get_exception() != 0): update_cpu_from_engine(cpu, exec_engine) return line.offset # Eval current instruction (in IR) exec_engine.eval_ir(ir) # Check for memory exception which do not update PC if (vmmngr.get_exception() & csts.EXCEPT_DO_NOT_UPDATE_PC != 0): update_cpu_from_engine(cpu, exec_engine) return line.offset # Get next bloc address ad = expr_simp(exec_engine.eval_expr(self.ir_arch.IRDst)) # Updates @cpu instance according to new CPU values update_cpu_from_engine(cpu, exec_engine) # Manage resulting address if isinstance(ad, m2_expr.ExprInt): return ad.arg.arg elif isinstance(ad, m2_expr.ExprId): cur_label = ad.name else: raise NotImplementedError("Type not handled: %s" % ad)
def dis(cls, bs_o, mode_o = None, offset=0): if not isinstance(bs_o, bin_stream): bs_o = bin_stream_str(bs_o) offset_o = offset pre_dis_info, bs, mode, offset, prefix_len = cls.pre_dis( bs_o, mode_o, offset) candidates = cls.guess_mnemo(bs, mode, pre_dis_info, offset) out = [] out_c = [] if hasattr(bs, 'getlen'): bs_l = bs.getlen() else: bs_l = len(bs) alias = False for c in candidates: log.debug("*" * 40, mode, c.mode) log.debug(c.fields) c = cls.all_mn_inst[c][0] c.reset_class() c.mode = mode if not c.add_pre_dis_info(pre_dis_info): continue args = [] todo = {} getok = True fname_values = dict(pre_dis_info) offset_b = offset * 8 total_l = 0 for i, f in enumerate(c.fields_order): if f.flen is not None: l = f.flen(mode, fname_values) else: l = f.l if l is not None: total_l += l f.l = l f.is_present = True log.debug("FIELD %s %s %s %s", f.__class__, f.fname, offset_b, l) if bs_l * 8 - offset_b < l: getok = False break bv = cls.getbits(bs, mode, offset_b, l) offset_b += l if not f.fname in fname_values: fname_values[f.fname] = bv todo[i] = bv else: f.is_present = False todo[i] = None if not getok: continue c.l = prefix_len + total_l / 8 for i in c.to_decode: f = c.fields_order[i] if f.is_present: ret = f.decode(todo[i]) if not ret: log.debug("cannot decode %r", f) break if not ret: continue for a in c.args: a.expr = expr_simp(a.expr) c.b = cls.getbytes(bs, offset_o, c.l) c.offset = offset_o c = c.post_dis() if c is None: continue c_args = [a.expr for a in c.args] instr = cls.instruction(c.name, mode, c_args, additional_info=c.additional_info()) instr.l = prefix_len + total_l / 8 instr.b = cls.getbytes(bs, offset_o, instr.l) instr.offset = offset_o instr.get_info(c) if c.alias: alias = True out.append(instr) out_c.append(c) if not out: raise Disasm_Exception('cannot disasm at %X' % offset_o) if len(out) != 1: if not alias: log.warning('dis multiple args ret default') assert(len(out) == 2) for i, o in enumerate(out_c): if o.alias: return out[i] raise NotImplementedError('not fully functional') return out[0]
def myfunc(cpu, vmmngr): """Execute the function according to cpu and vmmngr states @cpu: JitCpu instance @vm: VmMngr instance """ # Keep current location in irblocs cur_label = label loop = True # Required to detect new instructions offsets_jitted = set() # Get exec engine exec_engine = self.symbexec # For each irbloc inside irblocs while loop is True: # Get the current bloc loop = False for irb in irblocs: if irb.label == cur_label: loop = True break # Irblocs must end with returning an ExprInt instance assert(loop is not False) # Refresh CPU values according to @cpu instance update_engine_from_cpu(cpu, exec_engine) # Execute current ir bloc for ir, line in zip(irb.irs, irb.lines): # For each new instruction (in assembly) if line.offset not in offsets_jitted: offsets_jitted.add(line.offset) # Log registers values if self.log_regs: update_cpu_from_engine(cpu, exec_engine) cpu.dump_gpregs() # Log instruction if self.log_mn: print "%08x %s" % (line.offset, line) # Check for memory exception if (vmmngr.get_exception() != 0): update_cpu_from_engine(cpu, exec_engine) return line.offset # Eval current instruction (in IR) exec_engine.eval_ir(ir) # Check for memory exception which do not update PC if (vmmngr.get_exception() & csts.EXCEPT_DO_NOT_UPDATE_PC != 0): update_cpu_from_engine(cpu, exec_engine) return line.offset # Get next bloc address ad = expr_simp(exec_engine.eval_expr(self.ir_arch.IRDst)) # Updates @cpu instance according to new CPU values update_cpu_from_engine(cpu, exec_engine) # Manage resulting address if isinstance(ad, m2_expr.ExprInt): return ad.arg.arg elif isinstance(ad, m2_expr.ExprId): cur_label = ad.name else: raise NotImplementedError("Type not handled: %s" % ad)
print "generating graph" open('asm_flow.dot', 'w').write(ab.dot()) print "generating IR... %x" % ad for block in ab: print 'ADD' print block ir_arch.add_bloc(block) print "IR ok... %x" % ad for irb in ir_arch.blocks.itervalues(): for i, assignblk in enumerate(irb.irs): new_assignblk = { expr_simp(dst): expr_simp(src) for dst, src in assignblk.iteritems() } irb.irs[i] = AssignBlock(new_assignblk, instr=assignblk.instr) out = ir_arch.graph.dot() open(os.path.join(tempfile.gettempdir(), 'graph.dot'), 'wb').write(out) # dead_simp(ir_arch) g = GraphMiasmIR(ir_arch, "Miasm IR graph", None) def mycb(*test): print test raise NotImplementedError('not fully functional')
def build_graph(verbose=False, simplify=False): machine = guess_machine() mn, dis_engine, ira = machine.mn, machine.dis_engine, machine.ira if verbose: print "Arch", dis_engine fname = idc.GetInputFile() if verbose: print fname bs = bin_stream_ida() mdis = dis_engine(bs) ir_arch = ira(mdis.symbol_pool) # populate symbols with ida names for addr, name in idautils.Names(): # print hex(ad), repr(name) if name is None: continue mdis.symbol_pool.add_label(name, addr) if verbose: print "start disasm" addr = idc.ScreenEA() if verbose: print hex(addr) blocks = mdis.dis_multibloc(addr) if verbose: print "generating graph" open('asm_flow.dot', 'w').write(blocks.dot()) print "generating IR... %x" % addr for block in blocks: if verbose: print 'ADD' print block ir_arch.add_bloc(block) if verbose: print "IR ok... %x" % addr for irb in ir_arch.blocks.itervalues(): irs = [] for assignblk in irb.irs: new_assignblk = { expr_simp(dst): expr_simp(src) for dst, src in assignblk.iteritems() } irs.append(AssignBlock(new_assignblk, instr=assignblk.instr)) ir_arch.blocks[irb.label] = IRBlock(irb.label, irs) if verbose: out = ir_arch.graph.dot() open(os.path.join(tempfile.gettempdir(), 'graph.dot'), 'wb').write(out) title = "Miasm IR graph" if simplify: dead_simp(ir_arch) ir_arch.simplify(expr_simp) modified = True while modified: modified = False modified |= dead_simp(ir_arch) modified |= ir_arch.remove_empty_assignblks() modified |= ir_arch.remove_jmp_blocks() modified |= ir_arch.merge_blocks() title += " (simplified)" g = GraphMiasmIR(ir_arch, title, None) g.cmd_a = g.AddCommand("cmd a", "x") g.cmd_b = g.AddCommand("cmd b", "y") g.Show()
def emul_symb(ir_arch, mdis, states_todo, states_done): while states_todo: ad, symbols, conds = states_todo.pop() print '*' * 40, "addr", ad, '*' * 40 if (ad, symbols, conds) in states_done: print 'skip', ad continue states_done.add((ad, symbols, conds)) sb = symbexec(ir_arch, {}) sb.symbols = symbols.copy() if ir_arch.pc in sb.symbols: del (sb.symbols[ir_arch.pc]) b = get_bloc(ir_arch, mdis, ad) print 'run bloc' print b # print blocs[ad] ad = sb.emulbloc(b) print 'final state' sb.dump_id() print 'dataflow' # data_flow_graph_from_expr(sb) assert (ad is not None) print "DST", ad if isinstance(ad, ExprCond): # Create 2 states, each including complementary conditions p1 = sb.symbols.copy() p2 = sb.symbols.copy() c1 = {ad.cond: ExprInt_from(ad.cond, 0)} c2 = {ad.cond: ExprInt_from(ad.cond, 1)} print ad.cond p1[ad.cond] = ExprInt_from(ad.cond, 0) p2[ad.cond] = ExprInt_from(ad.cond, 1) ad1 = expr_simp(sb.eval_expr(ad.replace_expr(c1), {})) ad2 = expr_simp(sb.eval_expr(ad.replace_expr(c2), {})) if not (isinstance(ad1, ExprInt) or (isinstance(ad1, ExprId) and isinstance(ad1.name, asmbloc.asm_label)) and isinstance(ad2, ExprInt) or (isinstance(ad2, ExprId) and isinstance(ad2.name, asmbloc.asm_label))): print str(ad1), str(ad2) raise ValueError("zarb condition") conds1 = list(conds) + c1.items() conds2 = list(conds) + c2.items() if isinstance(ad1, ExprId): ad1 = ad1.name if isinstance(ad2, ExprId): ad2 = ad2.name if isinstance(ad1, ExprInt): ad1 = ad1.arg if isinstance(ad2, ExprInt): ad2 = ad2.arg states_todo.add((ad1, p1, tuple(conds1))) states_todo.add((ad2, p2, tuple(conds2))) elif isinstance(ad, ExprInt): ad = int(ad.arg) states_todo.add((ad, sb.symbols.copy(), tuple(conds))) elif isinstance(ad, ExprId) and isinstance(ad.name, asmbloc.asm_label): if isinstance(ad, ExprId): ad = ad.name states_todo.add((ad, sb.symbols.copy(), tuple(conds))) elif ad == ret_addr: print 'ret reached' continue else: raise ValueError("zarb eip")
def build_graph(verbose=False, simplify=False): start_addr = idc.ScreenEA() machine = guess_machine(addr=start_addr) mn, dis_engine, ira = machine.mn, machine.dis_engine, machine.ira if verbose: print "Arch", dis_engine fname = idc.GetInputFile() if verbose: print fname bs = bin_stream_ida() mdis = dis_engine(bs) ir_arch = ira(mdis.loc_db) # populate symbols with ida names for addr, name in idautils.Names(): if name is None: continue if (mdis.loc_db.get_offset_location(addr) or mdis.loc_db.get_name_location(name)): # Symbol alias continue mdis.loc_db.add_location(name, addr) if verbose: print "start disasm" if verbose: print hex(addr) asmcfg = mdis.dis_multiblock(start_addr) if verbose: print "generating graph" open('asm_flow.dot', 'w').write(asmcfg.dot()) print "generating IR... %x" % start_addr for block in asmcfg.blocks: if verbose: print 'ADD' print block ir_arch.add_block(block) if verbose: print "IR ok... %x" % start_addr for irb in ir_arch.blocks.itervalues(): irs = [] for assignblk in irb: new_assignblk = { expr_simp(dst): expr_simp(src) for dst, src in assignblk.iteritems() } irs.append(AssignBlock(new_assignblk, instr=assignblk.instr)) ir_arch.blocks[irb.loc_key] = IRBlock(irb.loc_key, irs) if verbose: out = ir_arch.graph.dot() open(os.path.join(tempfile.gettempdir(), 'graph.dot'), 'wb').write(out) title = "Miasm IR graph" if simplify: dead_simp(ir_arch) ir_arch.simplify(expr_simp) modified = True while modified: modified = False modified |= dead_simp(ir_arch) modified |= ir_arch.remove_empty_assignblks() modified |= ir_arch.remove_jmp_blocks() modified |= ir_arch.merge_blocks() title += " (simplified)" g = GraphMiasmIR(ir_arch, title, None) g.Show()
def sanitize_memory_accesses(self, memories, c_handler, expr_type_from_C): """Modify memory accesses to consider only access on "full final element" Example: struct T{ int a; int b; int *c; } @8[T + 2] = X -> @32[T] = 00 X 00 00 @32[T + 2] = WW XX YY ZZ -> @32[T] = 00 00 WW XX, @32[T + 4] = YY ZZ 00 00 @memories: AssignBlock @ctype_manager: CHandler with argument types @expr_type_from_C: Name -> ObjC dict, for C -> Expr generation Return sanitized access, filled memory cases {Full access -> [offset filled]} """ # First, identify involved fields fields = set() atomic_values = {} for dst, value in memories.iteritems(): assert isinstance(dst, ExprMem) addr_expr = dst.ptr for i in xrange(dst.size / 8): # Split in atomic access offset = ExprInt(i, addr_expr.size) sub_addr_expr = expr_simp(addr_expr + offset) mem_access = ExprMem(sub_addr_expr, 8) value_access = expr_simp(value[i * 8:(i + 1) * 8]) # Keep atomic value atomic_values[mem_access] = value_access # Convert atomic access -> fields access -> Expr access on the # full field info_C = list(c_handler.expr_to_c(mem_access)) assert len(info_C) == 1 if "__PAD__" in info_C[0]: # This is a field used for padding, ignore it continue expr_sanitize = expr_simp(c_handler.c_to_expr(info_C[0], expr_type_from_C)) # Conserve the involved field fields.add(expr_sanitize) # Second, rebuild the fields values filled_memory = {} out = {} for dst in fields: assert isinstance(dst, ExprMem) accumulator = [] addr_expr = dst.ptr for i in reversed(xrange(dst.size / 8)): # Split in atomic access offset = ExprInt(i, addr_expr.size) sub_addr_expr = expr_simp(addr_expr + offset) mem_access = ExprMem(sub_addr_expr, 8) # Get the value, or complete with 0 if mem_access not in atomic_values: value = ExprInt(0, 8) filled_memory.setdefault(dst, []).append(offset) else: value = atomic_values[mem_access] accumulator.append(value) # Save the computed value out[dst] = expr_simp(ExprCompose(*reversed(accumulator))) out = AssignBlock(out) if memories != out: self.logger.debug("SANITIZE: %s", memories) self.logger.debug("OUT SANITIZE: %s", out) return out, filled_memory
# Match the expected form ## isinstance(expr, m2_expr.ExprOp) is not needed: simplifications are ## attached to expression types if expr.op == "+" and \ len(expr.args) == 3 and \ expr.args.count(expr.args[0]) == len(expr.args): # Effective simplification return m2_expr.ExprOp("*", expr.args[0], m2_expr.ExprInt(3, expr.args[0].size)) else: # Do not simplify return expr a = m2_expr.ExprId('a', 32) base_expr = a + a + a print "Without adding the simplification:" print "\t%s = %s" % (base_expr, expr_simp(base_expr)) # Enable pass expr_simp.enable_passes({m2_expr.ExprOp: [simp_add_mul]}) print "After adding the simplification:" print "\t%s = %s" % (base_expr, expr_simp(base_expr)) # Automatic fail assert (expr_simp(base_expr) == m2_expr.ExprOp("*", a, m2_expr.ExprInt(3, a.size)))
& ExprInt(uint8(0x1L)), 0, 8), (ExprInt(uint56(0x0L)), 8, 64))), ExprCompose(((a[:8] & ExprInt8(1), 0, 8), (ExprInt(uint56(0)), 8, 64)))), (ExprCompose( ((ExprCompose(((a[:8], 0, 8), (ExprInt(uint56(0x0L)), 8, 64)))[:32] & ExprInt(uint32(0x1L)), 0, 32), (ExprInt(uint32(0x0L)), 32, 64))), ExprCompose( ((ExprCompose( ((ExprSlice(a, 0, 8), 0, 8), (ExprInt(uint24(0x0L)), 8, 32))) & ExprInt(uint32(0x1L)), 0, 32), (ExprInt(uint32(0x0L)), 32, 64)))), ] for e, e_check in to_test[:]: # print "#" * 80 # print str(e), str(e_check) e_new = expr_simp(e) print "original: ", str(e), "new: ", str(e_new) rez = e_new == e_check if not rez: raise ValueError('bug in expr_simp simp(%s) is %s and should be %s' % (e, e_new, e_check)) # Test conds to_test = [ (((a - b) ^ ((a ^ b) & ((a - b) ^ a))).msb(), ExprOp_inf_signed(a, b)), ((((a - b) ^ ((a ^ b) & ((a - b) ^ a))) ^ a ^ b).msb(), ExprOp_inf_unsigned(a, b)), (ExprOp_inf_unsigned(ExprInt32(-1), ExprInt32(3)), ExprInt1(0)), (ExprOp_inf_signed(ExprInt32(-1), ExprInt32(3)), ExprInt1(1)), (ExprOp_inf_unsigned(a, b) ^ (a ^ b).msb(), ExprOp_inf_signed(a, b)),
def dis(cls, bs_o, mode_o=None, offset=0): if not isinstance(bs_o, bin_stream): bs_o = bin_stream_str(bs_o) offset_o = offset pre_dis_info, bs, mode, offset, prefix_len = cls.pre_dis( bs_o, mode_o, offset) candidates = cls.guess_mnemo(bs, mode, pre_dis_info, offset) out = [] out_c = [] if hasattr(bs, 'getlen'): bs_l = bs.getlen() else: bs_l = len(bs) alias = False for c in candidates: log.debug("*" * 40, mode, c.mode) log.debug(c.fields) c = cls.all_mn_inst[c][0] c.reset_class() c.mode = mode if not c.add_pre_dis_info(pre_dis_info): continue args = [] todo = {} getok = True fname_values = dict(pre_dis_info) offset_b = offset * 8 total_l = 0 for i, f in enumerate(c.fields_order): if f.flen is not None: l = f.flen(mode, fname_values) else: l = f.l if l is not None: total_l += l f.l = l f.is_present = True log.debug("FIELD %s %s %s %s", f.__class__, f.fname, offset_b, l) if bs_l * 8 - offset_b < l: getok = False break bv = cls.getbits(bs, mode, offset_b, l) offset_b += l if not f.fname in fname_values: fname_values[f.fname] = bv todo[i] = bv else: f.is_present = False todo[i] = None if not getok: continue c.l = prefix_len + total_l / 8 for i in c.to_decode: f = c.fields_order[i] if f.is_present: ret = f.decode(todo[i]) if not ret: log.debug("cannot decode %r", f) break if not ret: continue for a in c.args: a.expr = expr_simp(a.expr) c.b = cls.getbytes(bs, offset_o, c.l) c.offset = offset_o c = c.post_dis() if c is None: continue c_args = [a.expr for a in c.args] instr = cls.instruction(c.name, mode, c_args, additional_info=c.additional_info()) instr.l = prefix_len + total_l / 8 instr.b = cls.getbytes(bs, offset_o, instr.l) instr.offset = offset_o instr.get_info(c) if c.alias: alias = True out.append(instr) out_c.append(c) if not out: raise Disasm_Exception('cannot disasm at %X' % offset_o) if len(out) != 1: if not alias: log.warning('dis multiple args ret default') assert (len(out) == 2) for i, o in enumerate(out_c): if o.alias: return out[i] raise NotImplementedError('not fully functional') return out[0]
def fromstring(cls, s, mode=None): global total_scans name = re.search('(\S+)', s).groups() if not name: raise ValueError('cannot find name', s) name = name[0] if not name in cls.all_mn_name: raise ValueError('unknown name', name) clist = [x for x in cls.all_mn_name[name]] out = [] out_args = [] parsers = defaultdict(dict) for cc in clist: for c in cls.get_cls_instance(cc, mode): args_expr = [] args_str = s[len(name):].strip(' ') start = 0 cannot_parse = False len_o = len(args_str) for i, f in enumerate(c.args): start_i = len_o - len(args_str) if type(f.parser) == tuple: parser = f.parser else: parser = (f.parser, ) for p in parser: if p in parsers[(i, start_i)]: continue try: total_scans += 1 v, start, stop = p.scanString(args_str).next() except StopIteration: v, start, stop = [None], None, None if start != 0: v, start, stop = [None], None, None parsers[(i, start_i)][p] = v[0], start, stop start, stop = f.fromstring(args_str, parsers[(i, start_i)]) if start != 0: log.debug("cannot fromstring %r", args_str) cannot_parse = True break if f.expr is None: raise NotImplementedError('not fully functional') f.expr = expr_simp(f.expr) args_expr.append(f.expr) a = args_str[start:stop] args_str = args_str[stop:].strip(' ') if args_str.startswith(','): args_str = args_str[1:] args_str = args_str.strip(' ') if args_str: cannot_parse = True if cannot_parse: continue out.append(c) out_args.append(args_expr) break if len(out) == 0: raise ValueError('cannot fromstring %r' % s) if len(out) != 1: log.debug('fromstring multiple args ret default') c = out[0] c_args = out_args[0] instr = cls.instruction(c.name, mode, c_args, additional_info=c.additional_info()) return instr
label, elements, line_nb = settings.label, settings.elements, settings.line_nb # Simplify affectations for irb in ir_arch.blocks.values(): fix_stack = irb.label.offset is not None and settings.unalias_stack for i, assignblk in enumerate(irb.irs): if fix_stack: stk_high = m2_expr.ExprInt(GetSpd(irb.irs[i].instr.offset), ir_arch.sp.size) fix_dct = {ir_arch.sp: mn.regs.regs_init[ir_arch.sp] + stk_high} new_assignblk = {} for dst, src in assignblk.iteritems(): if fix_stack: src = src.replace_expr(fix_dct) if dst != ir_arch.sp: dst = dst.replace_expr(fix_dct) dst, src = expr_simp(dst), expr_simp(src) new_assignblk[dst] = src irb.irs[i] = AssignBlock(new_assignblk, instr=assignblk.instr) # Get dependency graphs dg = settings.depgraph graphs = dg.get(label, elements, line_nb, set([ir_arch.symbol_pool.getby_offset(func.startEA)])) # Display the result comments = {} sol_nb = 0 def clean_lines(): "Remove previous comments" global comments
print "generating IR... %x" % ad for b in ab: print 'ADD' print b ir_arch.add_bloc(b) print "IR ok... %x" % ad for irb in ir_arch.blocs.values(): for irs in irb.irs: for i, e in enumerate(irs): e.dst, e.src = expr_simp(e.dst), expr_simp(e.src) ir_arch.gen_graph() out = ir_arch.graph() open('/tmp/graph.txt', 'w').write(out) # ir_arch.dead_simp() g = GraphMiasmIR(ir_arch, "Miasm IR graph", None) def mycb(*test): print test raise NotImplementedError('not fully functional')
def build_graph(start_addr, type_graph, simplify=False, dontmodstack=True, loadint=False, verbose=False): machine = guess_machine(addr=start_addr) dis_engine, ira = machine.dis_engine, machine.ira class IRADelModCallStack(ira): def call_effects(self, addr, instr): assignblks, extra = super(IRADelModCallStack, self).call_effects(addr, instr) if not dontmodstack: return assignblks, extra out = [] for assignblk in assignblks: dct = dict(assignblk) dct = { dst: src for (dst, src) in dct.iteritems() if dst != self.sp } out.append(AssignBlock(dct, assignblk.instr)) return out, extra if verbose: print "Arch", dis_engine fname = idc.GetInputFile() if verbose: print fname bs = bin_stream_ida() mdis = dis_engine(bs) ir_arch = IRADelModCallStack(mdis.loc_db) # populate symbols with ida names for addr, name in idautils.Names(): if name is None: continue if (mdis.loc_db.get_offset_location(addr) or mdis.loc_db.get_name_location(name)): # Symbol alias continue mdis.loc_db.add_location(name, addr) if verbose: print "start disasm" if verbose: print hex(start_addr) asmcfg = mdis.dis_multiblock(start_addr) entry_points = set([mdis.loc_db.get_offset_location(start_addr)]) if verbose: print "generating graph" open('asm_flow.dot', 'w').write(asmcfg.dot()) print "generating IR... %x" % start_addr ircfg = ir_arch.new_ircfg_from_asmcfg(asmcfg) if verbose: print "IR ok... %x" % start_addr for irb in ircfg.blocks.itervalues(): irs = [] for assignblk in irb: new_assignblk = { expr_simp(dst): expr_simp(src) for dst, src in assignblk.iteritems() } irs.append(AssignBlock(new_assignblk, instr=assignblk.instr)) ircfg.blocks[irb.loc_key] = IRBlock(irb.loc_key, irs) if verbose: out = ircfg.dot() open(os.path.join(tempfile.gettempdir(), 'graph.dot'), 'wb').write(out) title = "Miasm IR graph" if simplify: dead_simp(ir_arch, ircfg) ircfg.simplify(expr_simp) modified = True while modified: modified = False modified |= dead_simp(ir_arch, ircfg) modified |= remove_empty_assignblks(ircfg) modified |= merge_blocks(ircfg, entry_points) title += " (simplified)" if type_graph == TYPE_GRAPH_IR: graph = GraphMiasmIR(ircfg, title, None) graph.Show() return head = list(entry_points)[0] class IRAOutRegs(ira): def get_out_regs(self, block): regs_todo = super(IRAOutRegs, self).get_out_regs(block) out = {} for assignblk in block: for dst in assignblk: reg = self.ssa_var.get(dst, None) if reg is None: continue if reg in regs_todo: out[reg] = dst return set(out.values()) # Add dummy dependency to uncover out regs affectation for loc in ircfg.leaves(): irblock = ircfg.blocks.get(loc) if irblock is None: continue regs = {} for reg in ir_arch.get_out_regs(irblock): regs[reg] = reg assignblks = list(irblock) new_assiblk = AssignBlock(regs, assignblks[-1].instr) assignblks.append(new_assiblk) new_irblock = IRBlock(irblock.loc_key, assignblks) ircfg.blocks[loc] = new_irblock ir_arch = IRAOutRegs(mdis.loc_db) ir_arch.ssa_var = {} modified = True ssa_forbidden_regs = set( [ir_arch.pc, ir_arch.IRDst, ir_arch.arch.regs.exception_flags]) head = list(entry_points)[0] heads = set([head]) all_ssa_vars = {} propagate_expr = PropagateExpr() ssa = SSADiGraph(ircfg) ssa.immutable_ids.update(ssa_forbidden_regs) ssa.ssa_variable_to_expr.update(all_ssa_vars) ssa.transform(head) all_ssa_vars.update(ssa.ssa_variable_to_expr) ir_arch.ssa_var.update(ssa.ssa_variable_to_expr) if simplify: while modified: ssa = SSADiGraph(ircfg) ssa.immutable_ids.update(ssa_forbidden_regs) ssa.ssa_variable_to_expr.update(all_ssa_vars) ssa.transform(head) all_ssa_vars.update(ssa.ssa_variable_to_expr) ir_arch.ssa_var.update(ssa.ssa_variable_to_expr) while modified: modified = False modified |= propagate_expr.propagate(ssa, head) modified |= ircfg.simplify(expr_simp) simp_modified = True while simp_modified: simp_modified = False simp_modified |= dead_simp(ir_arch, ircfg) simp_modified |= remove_empty_assignblks(ircfg) simp_modified |= load_from_int(ircfg, bs, is_addr_ro_variable) modified |= simp_modified ssa = SSADiGraph(ircfg) ssa.immutable_ids.update(ssa_forbidden_regs) ssa.ssa_variable_to_expr.update(all_ssa_vars) ssa.transform(head) all_ssa_vars.update(ssa.ssa_variable_to_expr) if type_graph == TYPE_GRAPH_IRSSA: graph = GraphMiasmIR(ssa.graph, title, None) graph.Show() return if type_graph == TYPE_GRAPH_IRSSAUNSSA: cfg_liveness = DiGraphLivenessSSA(ssa.graph) cfg_liveness.init_var_info(ir_arch) cfg_liveness.compute_liveness() UnSSADiGraph(ssa, head, cfg_liveness) if simplify: modified = True while modified: modified = False modified |= ssa.graph.simplify(expr_simp) simp_modified = True while simp_modified: simp_modified = False simp_modified |= dead_simp(ir_arch, ssa.graph) simp_modified |= remove_empty_assignblks(ssa.graph) simp_modified |= merge_blocks(ssa.graph, heads) modified |= simp_modified graph = GraphMiasmIR(ssa.graph, title, None) graph.Show()
( ExprCond( ExprOp(TOK_INF_EQUAL_SIGNED, a8.zeroExtend(32), ExprInt(-1, 32) ), a, b ), b ), (a8.zeroExtend(32)[2:5], a8[2:5]), ] for e_input, e_check in to_test: print "#" * 80 e_check = expr_simp(e_check) e_new = expr_simp(e_input) print "original: ", str(e_input), "new: ", str(e_new) rez = e_new == e_check if not rez: raise ValueError( 'bug in expr_simp simp(%s) is %s and should be %s' % (e_input, e_new, e_check) ) # Test conds to_test = [ (((a - b) ^ ((a ^ b) & ((a - b) ^ a))).msb(), ExprOp_inf_signed(a, b)), ((((a - b) ^ ((a ^ b) & ((a - b) ^ a))) ^ a ^ b).msb(),
mdis.symbol_pool.add_label(name, ad) # Get the current function addr = ScreenEA() func = idaapi.get_func(addr) blocs = mdis.dis_multibloc(func.startEA) # Generate IR for bloc in blocs: ir_arch.add_bloc(bloc) # Simplify affectations for irb in ir_arch.blocs.values(): for irs in irb.irs: for i, expr in enumerate(irs): irs[i] = m2_expr.ExprAff(expr_simp(expr.dst), expr_simp(expr.src)) # Build the IRA Graph ir_arch.gen_graph() # Get settings settings = depGraphSettingsForm(ir_arch) settings.Execute() # Get dependency graphs dg = settings.depgraph graphs = dg.get(settings.label, settings.elements, settings.line_nb, set([ir_arch.symbol_pool.getby_offset(func.startEA)])) # Display the result comments = {}
from miasm2.expression.expression import * from miasm2.expression.simplifications import expr_simp print """ Simple expression simplification demo """ a = ExprId('eax', 32) b = ExprId('ebx', 32) exprs = [a + b - a, ExprInt(0x12, 32) + ExprInt(0x30, 32) - a, ExprCompose(a[:8], a[8:16])] for e in exprs: print '*' * 40 print 'original expression:', e print "simplified:", expr_simp(e)
def sanitize_memory_accesses(self, memories, c_handler, expr_type_from_C): """Modify memory accesses to consider only access on "full final element" Example: struct T{ int a; int b; int *c; } @8[T + 2] = X -> @32[T] = 00 X 00 00 @32[T + 2] = WW XX YY ZZ -> @32[T] = 00 00 WW XX, @32[T + 4] = YY ZZ 00 00 @memories: AssignBlock @ctype_manager: CHandler with argument types @expr_type_from_C: Name -> ObjC dict, for C -> Expr generation Return sanitized access, filled memory cases {Full access -> [offset filled]} """ # First, identify involved fields fields = set() atomic_values = {} for dst, value in memories.iteritems(): assert isinstance(dst, ExprMem) addr_expr = dst.ptr for i in xrange(dst.size / 8): # Split in atomic access offset = ExprInt(i, addr_expr.size) sub_addr_expr = expr_simp(addr_expr + offset) mem_access = ExprMem(sub_addr_expr, 8) value_access = expr_simp(value[i * 8:(i + 1) * 8]) # Keep atomic value atomic_values[mem_access] = value_access # Convert atomic access -> fields access -> Expr access on the # full field info_C = list(c_handler.expr_to_c(mem_access)) assert len(info_C) == 1 if "__PAD__" in info_C[0]: # This is a field used for padding, ignore it continue expr_sanitize = expr_simp( c_handler.c_to_expr(info_C[0], expr_type_from_C)) # Conserve the involved field fields.add(expr_sanitize) # Second, rebuild the fields values filled_memory = {} out = {} for dst in fields: assert isinstance(dst, ExprMem) accumulator = [] addr_expr = dst.ptr for i in reversed(xrange(dst.size / 8)): # Split in atomic access offset = ExprInt(i, addr_expr.size) sub_addr_expr = expr_simp(addr_expr + offset) mem_access = ExprMem(sub_addr_expr, 8) # Get the value, or complete with 0 if mem_access not in atomic_values: value = ExprInt(0, 8) filled_memory.setdefault(dst, []).append(offset) else: value = atomic_values[mem_access] accumulator.append(value) # Save the computed value out[dst] = expr_simp(ExprCompose(*reversed(accumulator))) out = AssignBlock(out) if memories != out: self.logger.debug("SANITIZE: %s", memories) self.logger.debug("OUT SANITIZE: %s", out) return out, filled_memory
def simplify_blocs(self): for b in self.blocs.values(): for ir in b.irs: for i, r in enumerate(ir): ir[i] = m2_expr.ExprAff(expr_simp(r.dst), expr_simp(r.src))
def fromstring(cls, s, mode = None): global total_scans name = re.search('(\S+)', s).groups() if not name: raise ValueError('cannot find name', s) name = name[0] if not name in cls.all_mn_name: raise ValueError('unknown name', name) clist = [x for x in cls.all_mn_name[name]] out = [] out_args = [] parsers = defaultdict(dict) for cc in clist: for c in cls.get_cls_instance(cc, mode): args_expr = [] args_str = s[len(name):].strip(' ') start = 0 cannot_parse = False len_o = len(args_str) for i, f in enumerate(c.args): start_i = len_o - len(args_str) if type(f.parser) == tuple: parser = f.parser else: parser = (f.parser,) for p in parser: if p in parsers[(i, start_i)]: continue try: total_scans += 1 v, start, stop = p.scanString(args_str).next() except StopIteration: v, start, stop = [None], None, None if start != 0: v, start, stop = [None], None, None parsers[(i, start_i)][p] = v[0], start, stop start, stop = f.fromstring(args_str, parsers[(i, start_i)]) if start != 0: log.debug("cannot fromstring %r", args_str) cannot_parse = True break if f.expr is None: raise NotImplementedError('not fully functional') f.expr = expr_simp(f.expr) args_expr.append(f.expr) a = args_str[start:stop] args_str = args_str[stop:].strip(' ') if args_str.startswith(','): args_str = args_str[1:] args_str = args_str.strip(' ') if args_str: cannot_parse = True if cannot_parse: continue out.append(c) out_args.append(args_expr) break if len(out) == 0: raise ValueError('cannot fromstring %r' % s) if len(out) != 1: log.debug('fromstring multiple args ret default') c = out[0] c_args = out_args[0] instr = cls.instruction(c.name, mode, c_args, additional_info=c.additional_info()) return instr
def Expr2C(ir_arch, l, exprs, gen_exception_code=False): id_to_update = [] out = ["// %s" % (l)] out_pc = [] dst_dict = {} src_mem = {} prefect_index = {8: 0, 16: 0, 32: 0, 64: 0} new_expr = [] e = set_pc(ir_arch, l.offset & mask_int) #out.append("%s;" % patch_c_id(ir_arch.arch, e))) pc_is_dst = False fetch_mem = False set_exception_flags = False for e in exprs: assert isinstance(e, m2_expr.ExprAff) assert not isinstance(e.dst, m2_expr.ExprOp) if isinstance(e.dst, m2_expr.ExprId): if not e.dst in dst_dict: dst_dict[e.dst] = [] dst_dict[e.dst].append(e) else: new_expr.append(e) # test exception flags ops = m2_expr.get_expr_ops(e) if set(['umod', 'udiv']).intersection(ops): set_exception_flags = True if e.dst == exception_flags: set_exception_flags = True # TODO XXX test function whose set exception_flags # search mem lookup for generate mem read prefetch rs = e.src.get_r(mem_read=True) for r in rs: if (not isinstance(r, m2_expr.ExprMem)) or r in src_mem: continue fetch_mem = True index = prefect_index[r.size] prefect_index[r.size] += 1 pfmem = prefetch_id_size[r.size][index] src_mem[r] = pfmem for dst, exs in dst_dict.items(): if len(exs) == 1: new_expr += exs continue exs = [expr_simp(x) for x in exs] log_to_c_h.debug('warning: detected multi dst to same id') log_to_c_h.debug('\t'.join([str(x) for x in exs])) new_expr += exs out_mem = [] # first, generate mem prefetch mem_k = src_mem.keys() mem_k.sort() for k in mem_k: str_src = translator.from_expr(patch_c_id(ir_arch.arch, k)) str_dst = translator.from_expr(patch_c_id(ir_arch.arch, src_mem[k])) out.append('%s = %s;' % (str_dst, str_src)) src_w_len = {} for k, v in src_mem.items(): src_w_len[k] = v for e in new_expr: src, dst = e.src, e.dst # reload src using prefetch src = src.replace_expr(src_w_len) if dst is ir_arch.IRDst: out += gen_irdst(ir_arch, src) continue str_src = translator.from_expr(patch_c_id(ir_arch.arch, src)) str_dst = translator.from_expr(patch_c_id(ir_arch.arch, dst)) if isinstance(dst, m2_expr.ExprId): id_to_update.append(dst) str_dst = patch_c_new_id(ir_arch.arch, dst) if dst in ir_arch.arch.regs.regs_flt_expr: # dont mask float affectation out.append('%s = (%s);' % (str_dst, str_src)) else: out.append('%s = (%s)&0x%X;' % (str_dst, str_src, my_size_mask[src.size])) elif isinstance(dst, m2_expr.ExprMem): fetch_mem = True str_dst = str_dst.replace('MEM_LOOKUP', 'MEM_WRITE') out_mem.append('%s, %s);' % (str_dst[:-1], str_src)) if e.dst == ir_arch.arch.pc[ir_arch.attrib]: pc_is_dst = True out_pc += ["return JIT_RET_NO_EXCEPTION;"] # if len(id_to_update) != len(set(id_to_update)): # raise ValueError('Not implemented: multi dst to same id!', str([str(x) # for x in exprs])) out += out_mem if gen_exception_code: if fetch_mem: e = set_pc(ir_arch, l.offset & mask_int) s1 = "%s" % translator.from_expr(patch_c_id(ir_arch.arch, e)) s1 += ';\n Resolve_dst(BlockDst, 0x%X, 0)' % (l.offset & mask_int) out.append(code_exception_fetch_mem_at_instr_noautomod % s1) if set_exception_flags: e = set_pc(ir_arch, l.offset & mask_int) s1 = "%s" % translator.from_expr(patch_c_id(ir_arch.arch, e)) s1 += ';\n Resolve_dst(BlockDst, 0x%X, 0)' % (l.offset & mask_int) out.append(code_exception_at_instr_noautomod % s1) for i in id_to_update: if i is ir_arch.IRDst: continue out.append( '%s = %s;' % (patch_c_id(ir_arch.arch, i), patch_c_new_id(ir_arch.arch, i))) post_instr = [] # test stop exec #### if gen_exception_code: if set_exception_flags: if pc_is_dst: post_instr.append( "if (VM_exception_flag) { " + "/*pc = 0x%X; */return JIT_RET_EXCEPTION; }" % (l.offset)) else: e = set_pc(ir_arch, l.offset & mask_int) s1 = "%s" % translator.from_expr(patch_c_id(ir_arch.arch, e)) s1 += ';\n Resolve_dst(BlockDst, 0x%X, 0)' % (l.offset & mask_int) e = set_pc(ir_arch, (l.offset + l.l) & mask_int) s2 = "%s" % translator.from_expr(patch_c_id(ir_arch.arch, e)) s2 += ';\n Resolve_dst(BlockDst, 0x%X, 0)' % ( (l.offset + l.l) & mask_int) post_instr.append(code_exception_post_instr_noautomod % (s1, s2)) if fetch_mem: if l.additional_info.except_on_instr: offset = l.offset else: offset = l.offset + l.l e = set_pc(ir_arch, offset & mask_int) s1 = "%s" % translator.from_expr(patch_c_id(ir_arch.arch, e)) s1 += ';\n Resolve_dst(BlockDst, 0x%X, 0)' % (offset & mask_int) post_instr.append(code_exception_fetch_mem_post_instr_noautomod % (s1)) # pc manip after all modifications return out, post_instr, post_instr + out_pc
def Expr2C(ir_arch, l, exprs, gen_exception_code=False): id_to_update = [] out = ["// %s" % (l)] out_pc = [] dst_dict = {} src_mem = {} prefect_index = {8: 0, 16: 0, 32: 0, 64: 0} new_expr = [] e = set_pc(ir_arch, l.offset & mask_int) #out.append("%s;" % patch_c_id(ir_arch.arch, e))) pc_is_dst = False fetch_mem = False set_exception_flags = False for e in exprs: assert isinstance(e, m2_expr.ExprAff) assert not isinstance(e.dst, m2_expr.ExprOp) if isinstance(e.dst, m2_expr.ExprId): if not e.dst in dst_dict: dst_dict[e.dst] = [] dst_dict[e.dst].append(e) else: new_expr.append(e) # test exception flags ops = m2_expr.get_expr_ops(e) if set(['umod', 'udiv']).intersection(ops): set_exception_flags = True if e.dst == exception_flags: set_exception_flags = True # TODO XXX test function whose set exception_flags # search mem lookup for generate mem read prefetch rs = e.src.get_r(mem_read=True) for r in rs: if (not isinstance(r, m2_expr.ExprMem)) or r in src_mem: continue fetch_mem = True index = prefect_index[r.size] prefect_index[r.size] += 1 pfmem = prefetch_id_size[r.size][index] src_mem[r] = pfmem for dst, exs in dst_dict.items(): if len(exs) == 1: new_expr += exs continue exs = [expr_simp(x) for x in exs] log_to_c_h.debug('warning: detected multi dst to same id') log_to_c_h.debug('\t'.join([str(x) for x in exs])) new_expr += exs out_mem = [] # first, generate mem prefetch mem_k = src_mem.keys() mem_k.sort() for k in mem_k: str_src = translator.from_expr(patch_c_id(ir_arch.arch, k)) str_dst = translator.from_expr(patch_c_id(ir_arch.arch, src_mem[k])) out.append('%s = %s;' % (str_dst, str_src)) src_w_len = {} for k, v in src_mem.items(): src_w_len[k] = v for e in new_expr: src, dst = e.src, e.dst # reload src using prefetch src = src.replace_expr(src_w_len) if dst is ir_arch.IRDst: out += gen_irdst(ir_arch, src) continue str_src = translator.from_expr(patch_c_id(ir_arch.arch, src)) str_dst = translator.from_expr(patch_c_id(ir_arch.arch, dst)) if isinstance(dst, m2_expr.ExprId): id_to_update.append(dst) str_dst = patch_c_new_id(ir_arch.arch, dst) if dst in ir_arch.arch.regs.regs_flt_expr: # dont mask float affectation out.append('%s = (%s);' % (str_dst, str_src)) else: out.append('%s = (%s)&0x%X;' % (str_dst, str_src, my_size_mask[src.size])) elif isinstance(dst, m2_expr.ExprMem): fetch_mem = True str_dst = str_dst.replace('MEM_LOOKUP', 'MEM_WRITE') out_mem.append('%s, %s);' % (str_dst[:-1], str_src)) if e.dst == ir_arch.arch.pc[ir_arch.attrib]: pc_is_dst = True out_pc += ["return JIT_RET_NO_EXCEPTION;"] # if len(id_to_update) != len(set(id_to_update)): # raise ValueError('Not implemented: multi dst to same id!', str([str(x) # for x in exprs])) out += out_mem if gen_exception_code: if fetch_mem: e = set_pc(ir_arch, l.offset & mask_int) s1 = "%s" % translator.from_expr(patch_c_id(ir_arch.arch, e)) s1 += ';\n Resolve_dst(BlockDst, 0x%X, 0)'%(l.offset & mask_int) out.append(code_exception_fetch_mem_at_instr_noautomod % s1) if set_exception_flags: e = set_pc(ir_arch, l.offset & mask_int) s1 = "%s" % translator.from_expr(patch_c_id(ir_arch.arch, e)) s1 += ';\n Resolve_dst(BlockDst, 0x%X, 0)'%(l.offset & mask_int) out.append(code_exception_at_instr_noautomod % s1) for i in id_to_update: if i is ir_arch.IRDst: continue out.append('%s = %s;' % (patch_c_id(ir_arch.arch, i), patch_c_new_id(ir_arch.arch, i))) post_instr = [] # test stop exec #### if gen_exception_code: if set_exception_flags: if pc_is_dst: post_instr.append("if (VM_exception_flag) { " + "/*pc = 0x%X; */return JIT_RET_EXCEPTION; }" % (l.offset)) else: e = set_pc(ir_arch, l.offset & mask_int) s1 = "%s" % translator.from_expr(patch_c_id(ir_arch.arch, e)) s1 += ';\n Resolve_dst(BlockDst, 0x%X, 0)'%(l.offset & mask_int) e = set_pc(ir_arch, (l.offset + l.l) & mask_int) s2 = "%s" % translator.from_expr(patch_c_id(ir_arch.arch, e)) s2 += ';\n Resolve_dst(BlockDst, 0x%X, 0)'%((l.offset + l.l) & mask_int) post_instr.append( code_exception_post_instr_noautomod % (s1, s2)) if fetch_mem: if l.additional_info.except_on_instr: offset = l.offset else: offset = l.offset + l.l e = set_pc(ir_arch, offset & mask_int) s1 = "%s" % translator.from_expr(patch_c_id(ir_arch.arch, e)) s1 += ';\n Resolve_dst(BlockDst, 0x%X, 0)'%(offset & mask_int) post_instr.append( code_exception_fetch_mem_post_instr_noautomod % (s1)) # pc manip after all modifications return out, post_instr, post_instr + out_pc
def simplify_blocs(self): for irblock in self.blocks.values(): for assignblk in irblock.irs: for dst, src in assignblk.items(): del assignblk[dst] assignblk[expr_simp(dst)] = expr_simp(src)
def launch_depgraph(): global graphs, comments, sol_nb, settings, addr, ir_arch, ircfg # Get the current function addr = idc.ScreenEA() func = ida_funcs.get_func(addr) # Init machine = guess_machine(addr=func.startEA) mn, dis_engine, ira = machine.mn, machine.dis_engine, machine.ira bs = bin_stream_ida() mdis = dis_engine(bs, dont_dis_nulstart_bloc=True) ir_arch = ira(mdis.loc_db) # Populate symbols with ida names for ad, name in idautils.Names(): if name is None: continue mdis.loc_db.add_location(name, ad) asmcfg = mdis.dis_multiblock(func.startEA) # Generate IR ircfg = ir_arch.new_ircfg_from_asmcfg(asmcfg) # Get settings settings = depGraphSettingsForm(ir_arch, ircfg) settings.Execute() loc_key, elements, line_nb = settings.loc_key, settings.elements, settings.line_nb # Simplify affectations for irb in ircfg.blocks.values(): irs = [] offset = ir_arch.loc_db.get_location_offset(irb.loc_key) fix_stack = offset is not None and settings.unalias_stack for assignblk in irb: if fix_stack: stk_high = m2_expr.ExprInt(idc.GetSpd(assignblk.instr.offset), ir_arch.sp.size) fix_dct = {ir_arch.sp: mn.regs.regs_init[ir_arch.sp] + stk_high} new_assignblk = {} for dst, src in assignblk.iteritems(): if fix_stack: src = src.replace_expr(fix_dct) if dst != ir_arch.sp: dst = dst.replace_expr(fix_dct) dst, src = expr_simp(dst), expr_simp(src) new_assignblk[dst] = src irs.append(AssignBlock(new_assignblk, instr=assignblk.instr)) ircfg.blocks[irb.loc_key] = IRBlock(irb.loc_key, irs) # Get dependency graphs dg = settings.depgraph graphs = dg.get(loc_key, elements, line_nb, set([ir_arch.loc_db.get_offset_location(func.startEA)])) # Display the result comments = {} sol_nb = 0 # Register and launch ida_kernwin.add_hotkey("Shift-N", next_element) treat_element()
from miasm2.expression.expression import * from miasm2.expression.simplifications import expr_simp print """ Simple expression simplification demo """ a = ExprId('eax') b = ExprId('ebx') exprs = [a + b - a, ExprInt(0x12, 32) + ExprInt(0x30, 32) - a, ExprCompose(a[:8], a[8:16])] for e in exprs: print '*' * 40 print 'original expression:', e print "simplified:", expr_simp(e)
(ExprOp(">>>c_rez", icustom, i1, i0), ExprInt(0x91A2B3C, 32)), (ExprOp(">>>c_rez", icustom, i1, i1), ExprInt(0x891A2B3C, 32)), (ExprOp("idiv", ExprInt(0x0123, 16), ExprInt(0xfffb, 16))[:8], ExprInt(0xc6, 8)), (ExprOp("imod", ExprInt(0x0123, 16), ExprInt(0xfffb, 16))[:8], ExprInt(0x01, 8)), ] for e, e_check in to_test[:]: # print "#" * 80 # print str(e), str(e_check) e_new = expr_simp(e) print "original: ", str(e), "new: ", str(e_new) rez = e_new == e_check if not rez: raise ValueError( 'bug in expr_simp simp(%s) is %s and should be %s' % (e, e_new, e_check)) # Test conds to_test = [ (((a - b) ^ ((a ^ b) & ((a - b) ^ a))).msb(), ExprOp_inf_signed(a, b)), ((((a - b) ^ ((a ^ b) & ((a - b) ^ a))) ^ a ^ b).msb(), ExprOp_inf_unsigned(a, b)), (ExprOp_inf_unsigned(ExprInt(-1, 32), ExprInt(3, 32)), ExprInt(0, 1)), (ExprOp_inf_signed(ExprInt(-1, 32), ExprInt(3, 32)), ExprInt(1, 1)),
def launch_depgraph(): global graphs, comments, sol_nb, settings, addr, ir_arch, ircfg # Get the current function addr = idc.ScreenEA() func = ida_funcs.get_func(addr) # Init machine = guess_machine(addr=func.startEA) mn, dis_engine, ira = machine.mn, machine.dis_engine, machine.ira bs = bin_stream_ida() mdis = dis_engine(bs, dont_dis_nulstart_bloc=True) ir_arch = ira(mdis.loc_db) # Populate symbols with ida names for ad, name in idautils.Names(): if name is None: continue mdis.loc_db.add_location(name, ad) asmcfg = mdis.dis_multiblock(func.startEA) # Generate IR ircfg = ir_arch.new_ircfg_from_asmcfg(asmcfg) # Get settings settings = depGraphSettingsForm(ir_arch, ircfg) settings.Execute() loc_key, elements, line_nb = settings.loc_key, settings.elements, settings.line_nb # Simplify assignments for irb in ircfg.blocks.values(): irs = [] offset = ir_arch.loc_db.get_location_offset(irb.loc_key) fix_stack = offset is not None and settings.unalias_stack for assignblk in irb: if fix_stack: stk_high = m2_expr.ExprInt(idc.GetSpd(assignblk.instr.offset), ir_arch.sp.size) fix_dct = { ir_arch.sp: mn.regs.regs_init[ir_arch.sp] + stk_high } new_assignblk = {} for dst, src in assignblk.iteritems(): if fix_stack: src = src.replace_expr(fix_dct) if dst != ir_arch.sp: dst = dst.replace_expr(fix_dct) dst, src = expr_simp(dst), expr_simp(src) new_assignblk[dst] = src irs.append(AssignBlock(new_assignblk, instr=assignblk.instr)) ircfg.blocks[irb.loc_key] = IRBlock(irb.loc_key, irs) # Get dependency graphs dg = settings.depgraph graphs = dg.get(loc_key, elements, line_nb, set([ir_arch.loc_db.get_offset_location(func.startEA)])) # Display the result comments = {} sol_nb = 0 # Register and launch ida_kernwin.add_hotkey("Shift-N", next_element) treat_element()
def simplify_blocs(self): for b in self.blocs.values(): for ir in b.irs: for i, r in enumerate(ir): ir[i].src = expr_simp(r.src) ir[i].dst = expr_simp(r.dst)