def extract_ast_core(v, my_id2expr, my_int2expr): ast_tokens = _extract_ast_core(v) ids = ast_get_ids(ast_tokens) ids_expr = [my_id2expr(x) for x in ids] sizes = set([i.size for i in ids_expr]) if len(sizes) == 0: pass elif len(sizes) == 1: size = sizes.pop() my_int2expr = lambda x: m2_expr.ExprInt(x, size) else: raise ValueError('multiple sizes in ids') e = ast_raw2expr(ast_tokens, my_id2expr, my_int2expr) return e
def mod_pc(self, instr, instr_ir, extra_ir): "Replace PC by the instruction's offset" cur_offset = m2_expr.ExprInt(instr.offset, 64) pc_fixed = {self.pc: cur_offset} for i, expr in enumerate(instr_ir): dst, src = expr.dst, expr.src if dst != self.pc: dst = dst.replace_expr(pc_fixed) src = src.replace_expr(pc_fixed) instr_ir[i] = m2_expr.ExprAff(dst, src) for idx, irblock in enumerate(extra_ir): extra_ir[idx] = irblock.modify_exprs(lambda expr: expr.replace_expr(pc_fixed) \ if expr != self.pc else expr, lambda expr: expr.replace_expr(pc_fixed))
def _func_read(self, expr_mem): """Memory read wrapper for symbolic execution @expr_mem: ExprMem""" addr = expr_mem.arg if not addr.is_int(): return expr_mem addr = int(addr) size = expr_mem.size / 8 value = self.cpu.get_mem(addr, size) if self.vm.is_little_endian(): value = value[::-1] self.vm.add_mem_read(addr, size) return m2_expr.ExprInt(int(value.encode("hex"), 16), expr_mem.size)
def extend_arg(dst, arg): if not isinstance(arg, m2_expr.ExprOp): return arg op, (reg, shift) = arg.op, arg.args if op == 'SXTW': base = reg.signExtend(dst.size) op = "<<" elif op in ['<<', '>>', '<<a', 'a>>', '<<<', '>>>']: base = reg.zeroExtend(dst.size) else: raise NotImplementedError('Unknown shifter operator') out = ExprOp(op, base, (shift.zeroExtend(dst.size) & m2_expr.ExprInt(dst.size - 1, dst.size))) return out
def simp_add_mul(expr_simp, expr): "Naive Simplification: a + a + a == a * 3" # Match the expected form ## isinstance(expr, m2_expr.ExprOp) is not needed: simplifications are ## attached to expression types if expr.op == "+" and \ len(expr.args) == 3 and \ expr.args.count(expr.args[0]) == len(expr.args): # Effective simplification return m2_expr.ExprOp("*", expr.args[0], m2_expr.ExprInt(3, expr.args[0].size)) else: # Do not simplify return expr
def emulbloc(self, irb, step=False): """ Symbolic execution of the @irb on the current state @irb: irblock instance @step: display intermediate steps """ offset2cmt = {} for index, assignblk in enumerate(irb.irs): if set(assignblk) == set([self.ir_arch.IRDst, self.ir_arch.pc]): # Don't display on jxx continue instr = assignblk.instr tmp_r = assignblk.get_r() tmp_w = assignblk.get_w() todo = set() # Replace PC with value to match IR args pc_fixed = { self.ir_arch.pc: m2_expr.ExprInt(instr.offset + instr.l, self.ir_arch.pc.size) } inputs = tmp_r inputs.update(arg for arg in tmp_w if arg.is_mem()) for arg in inputs: arg = expr_simp(arg.replace_expr(pc_fixed)) if arg in tmp_w and not arg.is_mem(): continue todo.add(arg) for expr in todo: if expr.is_int(): continue for c_str, c_type in self.chandler.expr_to_c_and_types( expr, self.symbols): expr = self.cst_propag_link.get((irb.label, index), {}).get(expr, expr) offset2cmt.setdefault(instr.offset, set()).add( "\n%s: %s\n%s" % (expr, c_str, c_type)) self.eval_ir(assignblk) for offset, value in offset2cmt.iteritems(): idc.MakeComm(offset, '\n'.join(value)) print "%x\n" % offset, '\n'.join(value) return self.eval_expr(self.ir_arch.IRDst)
def get_ir(self, instr): args = instr.args instr_ir, extra_ir = get_mnemo_expr(self, instr, *args) pc_fixed = {self.pc: m2_expr.ExprInt(instr.offset + 4, 32)} instr_ir = [ m2_expr.ExprAff(expr.dst, expr.src.replace_expr(pc_fixed)) for expr in instr_ir ] new_extra_ir = [ irblock.modify_exprs( mod_src=lambda expr: expr.replace_expr(pc_fixed)) for irblock in extra_ir ] return instr_ir, new_extra_ir
def mod_pc(self, instr, instr_ir, extra_ir): "Replace PC by the instruction's offset" cur_offset = m2_expr.ExprInt(instr.offset, 64) for i, expr in enumerate(instr_ir): dst, src = expr.dst, expr.src if dst != self.pc: dst = dst.replace_expr({self.pc: cur_offset}) src = src.replace_expr({self.pc: cur_offset}) instr_ir[i] = m2_expr.ExprAff(dst, src) for irblock in extra_ir: for irs in irblock.irs: for i, expr in enumerate(irs): dst, src = expr.dst, expr.src if dst != self.pc: dst = dst.replace_expr({self.pc: cur_offset}) src = src.replace_expr({self.pc: cur_offset}) irs[i] = m2_expr.ExprAff(dst, src)
def block2assignblks(self, block): irblocks_list = super(mipsCGen, self).block2assignblks(block) for instr, irblocks in zip(block.lines, irblocks_list): if not instr.breakflow(): continue for irblock in irblocks: for assignblock in irblock.irs: if self.ir_arch.pc not in assignblock: continue # Add internal branch destination assignblock[self.delay_slot_dst] = assignblock[ self.ir_arch.pc] assignblock[self.delay_slot_set] = m2_expr.ExprInt(1, 32) # Replace IRDst with next instruction assignblock[self.ir_arch.IRDst] = m2_expr.ExprId( self.ir_arch.get_next_instr(instr)) irblock.dst = m2_expr.ExprId( self.ir_arch.get_next_instr(instr)) return irblocks_list
def merge_sliceto_slice(expr): """ Apply basic factorisation on ExprCompose sub components @expr: ExprCompose """ out_args = [] last_index = 0 for index, arg in expr.iter_args(): # Init if len(out_args) == 0: out_args.append(arg) continue last_value = out_args[-1] # Consecutive if last_index + last_value.size == index: # Merge consecutive integers if (isinstance(arg, m2_expr.ExprInt) and isinstance(last_value, m2_expr.ExprInt)): new_size = last_value.size + arg.size value = int(arg) << last_value.size value |= int(last_value) out_args[-1] = m2_expr.ExprInt(value, size=new_size) continue # Merge consecuvite slice elif (isinstance(arg, m2_expr.ExprSlice) and isinstance(last_value, m2_expr.ExprSlice)): value = arg.arg if (last_value.arg == value and last_value.stop == arg.start): out_args[-1] = value[last_value.start:arg.stop] continue # Unmergeable last_index = index out_args.append(arg) return out_args
def resolve_args_with_symbols(self, symbols=None): if symbols is None: symbols = {} args_out = [] for expr in self.args: # try to resolve symbols using symbols (0 for default value) loc_keys = m2_expr.get_expr_locs(expr) fixed_expr = {} for exprloc in loc_keys: loc_key = exprloc.loc_key names = symbols.get_location_names(loc_key) # special symbols if '$' in names: fixed_expr[exprloc] = self.get_asm_offset(exprloc) continue if '_' in names: fixed_expr[exprloc] = self.get_asm_next_offset(exprloc) continue if not names: raise ValueError('Unresolved symbol: %r' % exprloc) offset = symbols.get_location_offset(loc_key) if offset is None: raise ValueError( 'The offset of loc_key "%s" cannot be determined' % name) else: # Fix symbol with its offset size = exprloc.size if size is None: default_size = self.get_symbol_size(exprloc, symbols) size = default_size value = m2_expr.ExprInt(offset, size) fixed_expr[exprloc] = value expr = expr.replace_expr(fixed_expr) expr = expr_simp(expr) args_out.append(expr) return args_out
def resolve_args_with_symbols(self, symbols=None): if symbols is None: symbols = {} args_out = [] for a in self.args: e = a # try to resolve symbols using symbols (0 for default value) ids = m2_expr.get_expr_ids(e) fixed_ids = {} for x in ids: if isinstance(x.name, asmbloc.asm_label): name = x.name.name # special symbol $ if name == '$': fixed_ids[x] = self.get_asm_offset(x) continue if name == '_': fixed_ids[x] = self.get_asm_next_offset(x) continue if not name in symbols: raise ValueError('unresolved symbol! %r' % x) else: name = x.name if not name in symbols: continue if symbols[name].offset is None: raise ValueError('The offset of label "%s" cannot be ' 'determined' % name) else: size = x.size if size is None: default_size = self.get_symbol_size(x, symbols) size = default_size value = m2_expr.ExprInt(symbols[name].offset, size) fixed_ids[x] = value e = e.replace_expr(fixed_ids) e = expr_simp(e) args_out.append(e) return args_out
def substract_mems(self, arg1, arg2): """ Return the remaining memory areas of @arg1 - @arg2 @arg1, @arg2: ExprMem """ ptr_diff = self.expr_simp(arg2.arg - arg1.arg) ptr_diff = int(int32(ptr_diff.arg)) zone1 = interval([(0, arg1.size/8-1)]) zone2 = interval([(ptr_diff, ptr_diff + arg2.size/8-1)]) zones = zone1 - zone2 out = [] for start, stop in zones: ptr = arg1.arg + m2_expr.ExprInt(start, arg1.arg.size) ptr = self.expr_simp(ptr) value = self.expr_simp(self.symbols[arg1][start*8:(stop+1)*8]) mem = m2_expr.ExprMem(ptr, (stop - start + 1)*8) assert mem.size == value.size out.append((mem, value)) return out
def teq(ir, instr, arg1, arg2): e = [] loc_except, loc_except_expr = ir.gen_loc_key_and_expr(ir.IRDst.size) loc_next = ir.get_next_loc_key(instr) loc_next_expr = m2_expr.ExprLoc(loc_next, ir.IRDst.size) do_except = [] do_except.append( m2_expr.ExprAssign( exception_flags, m2_expr.ExprInt(EXCEPT_DIV_BY_ZERO, exception_flags.size))) do_except.append(m2_expr.ExprAssign(ir.IRDst, loc_next_expr)) blk_except = IRBlock(loc_except.index, [AssignBlock(do_except, instr)]) cond = arg1 - arg2 e = [] e.append( m2_expr.ExprAssign( ir.IRDst, m2_expr.ExprCond(cond, loc_next_expr, loc_except_expr))) return e, [blk_except]
def get_mem_overlapping(self, expr): """ Gives mem stored overlapping memory in @expr Hypothesis: Max mem size is 64 bytes, compute all reachable addresses @expr: target memory """ overlaps = [] base_ptr = self.expr_simp(expr.arg) for i in xrange(-7, expr.size / 8): new_ptr = base_ptr + m2_expr.ExprInt(i, expr.arg.size) new_ptr = self.expr_simp(new_ptr) mem, origin = self.symbols.symbols_mem.get(new_ptr, (None, None)) if mem is None: continue ptr_diff = -i if ptr_diff >= origin.size / 8: # access is too small to overlap the memory target continue overlaps.append((i, mem)) return overlaps
def ccmp(ir, instr, arg1, arg2, arg3, arg4): e = [] if (arg2.is_int): arg2 = m2_expr.ExprInt(arg2.arg.arg, arg1.size) default_nf = arg3[0:1] default_zf = arg3[1:2] default_cf = arg3[2:3] default_of = arg3[3:4] cond_expr = cond2expr[arg4.name] res = arg1 - arg2 new_nf = nf new_zf = update_flag_zf(res)[0].src new_cf = update_flag_sub_cf(arg1, arg2, res).src new_of = update_flag_sub_of(arg1, arg2, res).src e.append( m2_expr.ExprAff(nf, m2_expr.ExprCond(cond_expr, new_nf, default_nf))) e.append( m2_expr.ExprAff(zf, m2_expr.ExprCond(cond_expr, new_zf, default_zf))) e.append( m2_expr.ExprAff(cf, m2_expr.ExprCond(cond_expr, new_cf, default_cf))) e.append( m2_expr.ExprAff(of, m2_expr.ExprCond(cond_expr, new_of, default_of))) return e, []
def teq(ir, instr, arg1, arg2): e = [] lbl_except, lbl_except_expr = ir.gen_label_and_expr(ir.IRDst.size) lbl_next = ir.get_next_label(instr) lbl_next_expr = m2_expr.ExprId(lbl_next, ir.IRDst.size) do_except = [] do_except.append( m2_expr.ExprAff( exception_flags, m2_expr.ExprInt(EXCEPT_DIV_BY_ZERO, exception_flags.size))) do_except.append(m2_expr.ExprAff(ir.IRDst, lbl_next_expr)) blk_except = IRBlock(lbl_except, [AssignBlock(do_except, instr)]) cond = arg1 - arg2 e = [] e.append( m2_expr.ExprAff(ir.IRDst, m2_expr.ExprCond(cond, lbl_next_expr, lbl_except_expr))) return e, [blk_except]
def reset_regs(self): """Set registers value to 0. Ignore register aliases""" for reg in self.ir_arch.arch.regs.all_regs_ids_no_alias: self.symbols.symbols_id[reg] = m2_expr.ExprInt(0, size=reg.size)
def gen_pc_update(self, c, l): c.irs.append(AssignBlock([m2_expr.ExprAff(self.pc, m2_expr.ExprInt(l.offset, self.pc.size) )])) c.lines.append(l)
def get_mem_state(self, expr): """ Evaluate the @expr memory in the current state using @cache @expr: the memory key """ ptr, size = expr.arg, expr.size ret = self.find_mem_by_addr(ptr) if not ret: overlaps = self.get_mem_overlapping(expr) if not overlaps: if self.func_read and ptr.is_int(): expr = self.func_read(expr) return expr out = [] off_base = 0 for off, mem in overlaps: if off >= 0: new_size = min(size - off * 8, mem.size) tmp = self.expr_simp(self.symbols[mem][0:new_size]) out.append((tmp, off_base, off_base + new_size)) off_base += new_size else: new_size = min(size - off * 8, mem.size) tmp = self.expr_simp(self.symbols[mem][-off * 8:new_size]) new_off_base = off_base + new_size + off * 8 out.append((tmp, off_base, new_off_base)) off_base = new_off_base missing_slice = self.rest_slice(out, 0, size) for slice_start, slice_stop in missing_slice: ptr = self.expr_simp(ptr + m2_expr.ExprInt(slice_start / 8, ptr.size)) mem = m2_expr.ExprMem(ptr, slice_stop - slice_start) if self.func_read and ptr.is_int(): mem = self.func_read(mem) out.append((mem, slice_start, slice_stop)) out.sort(key=lambda x: x[1]) args = [expr for (expr, _, _) in out] ret = self.expr_simp(m2_expr.ExprCompose(*args)[:size]) return ret # bigger lookup if size > ret.size: rest = size out = [] while rest: mem = self.find_mem_by_addr(ptr) if mem is None: mem = m2_expr.ExprMem(ptr, 8) if self.func_read and ptr.is_int(): value = self.func_read(mem) else: value = mem elif rest >= mem.size: value = self.symbols[mem] else: value = self.symbols[mem][:rest] out.append(value) rest -= value.size ptr = self.expr_simp(ptr + m2_expr.ExprInt(mem.size / 8, ptr.size)) ret = self.expr_simp(m2_expr.ExprCompose(*out)) return ret # part lookup ret = self.expr_simp(self.symbols[ret][:size]) return ret
def gen_pc_update(self, irblock, instr): irblock.irs.append( AssignBlock({self.pc: m2_expr.ExprInt(instr.offset, self.pc.size)}, instr))
def gen_pc_update(self, assignments, instr): offset = m2_expr.ExprInt(instr.offset, self.pc.size) assignments.append(AssignBlock({self.pc:offset}, instr))
def launch_depgraph(): global graphs, comments, sol_nb, settings, addr, ir_arch # Init machine = guess_machine() mn, dis_engine, ira = machine.mn, machine.dis_engine, machine.ira bs = bin_stream_ida() mdis = dis_engine(bs, dont_dis_nulstart_bloc=True) ir_arch = ira(mdis.symbol_pool) # Populate symbols with ida names for ad, name in idautils.Names(): if name is None: continue mdis.symbol_pool.add_label(name, ad) # Get the current function addr = idc.ScreenEA() func = ida_funcs.get_func(addr) blocks = mdis.dis_multiblock(func.startEA) # Generate IR for block in blocks: ir_arch.add_block(block) # Get settings settings = depGraphSettingsForm(ir_arch) settings.Execute() label, elements, line_nb = settings.label, settings.elements, settings.line_nb # Simplify affectations for irb in ir_arch.blocks.values(): irs = [] fix_stack = irb.label.offset is not None and settings.unalias_stack for assignblk in irb.irs: if fix_stack: stk_high = m2_expr.ExprInt(idc.GetSpd(assignblk.instr.offset), ir_arch.sp.size) fix_dct = {ir_arch.sp: mn.regs.regs_init[ir_arch.sp] + stk_high} new_assignblk = {} for dst, src in assignblk.iteritems(): if fix_stack: src = src.replace_expr(fix_dct) if dst != ir_arch.sp: dst = dst.replace_expr(fix_dct) dst, src = expr_simp(dst), expr_simp(src) new_assignblk[dst] = src irs.append(AssignBlock(new_assignblk, instr=assignblk.instr)) ir_arch.blocks[irb.label] = IRBlock(irb.label, irs) # Get dependency graphs dg = settings.depgraph graphs = dg.get(label, elements, line_nb, set([ir_arch.symbol_pool.getby_offset(func.startEA)])) # Display the result comments = {} sol_nb = 0 # Register and launch ida_kernwin.add_hotkey("Shift-N", next_element) treat_element()
def to_constraint(self): return m2_expr.ExprAff(self.expr, m2_expr.ExprInt(0, self.expr.size))
class DependencyResultImplicit(DependencyResult): """Stand for a result of a DependencyGraph with implicit option Provide path constraints using the z3 solver""" # Z3 Solver instance _solver = None unsat_expr = m2_expr.ExprAff(m2_expr.ExprInt(0, 1), m2_expr.ExprInt(1, 1)) def _gen_path_constraints(self, translator, expr, expected): """Generate path constraint from @expr. Handle special case with generated labels """ out = [] expected_is_label = expr_is_label(expected) for consval in possible_values(expr): if (expected_is_label and consval.value != expected): continue if (not expected_is_label and expr_is_label(consval.value)): continue conds = z3.And(*[ translator.from_expr(cond.to_constraint()) for cond in consval.constraints ]) if expected != consval.value: conds = z3.And( conds, translator.from_expr( m2_expr.ExprAff(consval.value, expected))) out.append(conds) if out: conds = z3.Or(*out) else: # Ex: expr: lblgen1, expected: 0x1234 # -> Avoid unconsistent solution lblgen1 = 0x1234 conds = translator.from_expr(self.unsat_expr) return conds def emul(self, ctx=None, step=False): # Init ctx_init = self._ira.arch.regs.regs_init if ctx is not None: ctx_init.update(ctx) solver = z3.Solver() symb_exec = SymbolicExecutionEngine(self._ira, ctx_init) history = self.history[::-1] history_size = len(history) translator = Translator.to_language("z3") size = self._ira.IRDst.size for hist_nb, label in enumerate(history, 1): if hist_nb == history_size and label == self.initial_state.label: line_nb = self.initial_state.line_nb else: line_nb = None irb = self.irblock_slice(self._ira.blocks[label], line_nb) # Emul the block and get back destination dst = symb_exec.eval_updt_irblock(irb, step=step) # Add constraint if hist_nb < history_size: next_label = history[hist_nb] expected = symb_exec.eval_expr(m2_expr.ExprId( next_label, size)) solver.add( self._gen_path_constraints(translator, dst, expected)) # Save the solver self._solver = solver # Return only inputs values (others could be wrongs) return { element: symb_exec.eval_expr(element) for element in self.inputs } @property def is_satisfiable(self): """Return True iff the solution path admits at least one solution PRE: 'emul' """ return self._solver.check() == z3.sat @property def constraints(self): """If satisfiable, return a valid solution as a Z3 Model instance""" if not self.is_satisfiable: raise ValueError("Unsatisfiable") return self._solver.model()
def to_constraint(self): cst1, cst2 = m2_expr.ExprInt(0, 1), m2_expr.ExprInt(1, 1) return m2_expr.ExprAff(cst1, m2_expr.ExprCond(self.expr, cst1, cst2))
# Match the expected form ## isinstance(expr, m2_expr.ExprOp) is not needed: simplifications are ## attached to expression types if expr.op == "+" and \ len(expr.args) == 3 and \ expr.args.count(expr.args[0]) == len(expr.args): # Effective simplification return m2_expr.ExprOp("*", expr.args[0], m2_expr.ExprInt(3, expr.args[0].size)) else: # Do not simplify return expr a = m2_expr.ExprId('a') base_expr = a + a + a print "Without adding the simplification:" print "\t%s = %s" % (base_expr, expr_simp(base_expr)) # Enable pass expr_simp.enable_passes({m2_expr.ExprOp: [simp_add_mul]}) print "After adding the simplification:" print "\t%s = %s" % (base_expr, expr_simp(base_expr)) # Automatic fail assert (expr_simp(base_expr) == m2_expr.ExprOp("*", a, m2_expr.ExprInt(3, a.size)))
def dst_to_c(self, src): """Translate Expr @src into C code""" if not isinstance(src, m2_expr.Expr): src = m2_expr.ExprInt(src, self.PC.size) return self.id_to_c(src)
def int2expr(self, v): if (v & ~self.intmask) != 0: return None return m2_expr.ExprInt(v, self.intsize)
def merge_sliceto_slice(args): sources = {} non_slice = {} sources_int = {} for a in args: if isinstance(a[0], m2_expr.ExprInt): # sources_int[a.start] = a # copy ExprInt because we will inplace modify arg just below # /!\ TODO XXX never ever modify inplace args... sources_int[a[1]] = (m2_expr.ExprInt_fromsize( a[2] - a[1], a[0].arg.__class__(a[0].arg)), a[1], a[2]) elif isinstance(a[0], m2_expr.ExprSlice): if not a[0].arg in sources: sources[a[0].arg] = [] sources[a[0].arg].append(a) else: non_slice[a[1]] = a # find max stop to determine size max_size = None for a in args: if max_size is None or max_size < a[2]: max_size = a[2] # first simplify all num slices final_sources = [] sorted_s = [] for x in sources_int.values(): x = list(x) # mask int v = x[0].arg & ((1 << (x[2] - x[1])) - 1) x[0] = m2_expr.ExprInt_from(x[0], v) x = tuple(x) sorted_s.append((x[1], x)) sorted_s.sort() while sorted_s: start, v = sorted_s.pop() out = [m2_expr.ExprInt(v[0].arg), v[1], v[2]] size = v[2] - v[1] while sorted_s: if sorted_s[-1][1][2] != start: break s_start, s_stop = sorted_s[-1][1][1], sorted_s[-1][1][2] size += s_stop - s_start a = m2_expr.mod_size2uint[size]((int(out[0].arg) << (out[1] - s_start)) + int(sorted_s[-1][1][0].arg)) out[0] = m2_expr.ExprInt(a) sorted_s.pop() out[1] = s_start out[0] = m2_expr.ExprInt_fromsize(size, out[0].arg) final_sources.append((start, out)) final_sources_int = final_sources # check if same sources have corresponding start/stop # is slice AND is sliceto simp_sources = [] for args in sources.values(): final_sources = [] sorted_s = [] for x in args: sorted_s.append((x[1], x)) sorted_s.sort() while sorted_s: start, v = sorted_s.pop() ee = v[0].arg[v[0].start:v[0].stop] out = ee, v[1], v[2] while sorted_s: if sorted_s[-1][1][2] != start: break if sorted_s[-1][1][0].stop != out[0].start: break start = sorted_s[-1][1][1] # out[0].start = sorted_s[-1][1][0].start o_e, _, o_stop = out o1, o2 = sorted_s[-1][1][0].start, o_e.stop o_e = o_e.arg[o1:o2] out = o_e, start, o_stop # update _size # out[0]._size = out[0].stop-out[0].start sorted_s.pop() out = out[0], start, out[2] final_sources.append((start, out)) simp_sources += final_sources simp_sources += final_sources_int for i, v in non_slice.items(): simp_sources.append((i, v)) simp_sources.sort() simp_sources = [x[1] for x in simp_sources] return simp_sources