def is_local_variable(expr, ir_arch_a, mn): if not expr.is_mem(): return None ptr = expr.ptr diff = expr_simp(ptr - mn.regs.regs_init[ir_arch_a.sp]) if diff.is_int() and int( expr_simp(expr_is_signed_lower(diff, ExprInt(0, diff.size)))): return True return None
def emul_symb(ir_arch, ircfg, mdis, states_todo, states_done): while states_todo: addr, symbols, conds = states_todo.pop() print('*' * 40, "addr", addr, '*' * 40) if (addr, symbols, conds) in states_done: print('Known state, skipping', addr) continue states_done.add((addr, symbols, conds)) symbexec = SymbolicExecutionEngine(ir_arch) symbexec.symbols = symbols.copy() if ir_arch.pc in symbexec.symbols: del symbexec.symbols[ir_arch.pc] irblock = get_block(ir_arch, ircfg, mdis, addr) print('Run block:') print(irblock) addr = symbexec.eval_updt_irblock(irblock) print('Final state:') symbexec.dump(mems=False) assert addr is not None if isinstance(addr, ExprCond): # Create 2 states, each including complementary conditions cond_group_a = {addr.cond: ExprInt(0, addr.cond.size)} cond_group_b = {addr.cond: ExprInt(1, addr.cond.size)} addr_a = expr_simp( symbexec.eval_expr(addr.replace_expr(cond_group_a), {})) addr_b = expr_simp( symbexec.eval_expr(addr.replace_expr(cond_group_b), {})) if not (addr_a.is_int() or addr_a.is_loc() and addr_b.is_int() or addr_b.is_loc()): print(str(addr_a), str(addr_b)) raise ValueError("Unsupported condition") if isinstance(addr_a, ExprInt): addr_a = int(addr_a.arg) if isinstance(addr_b, ExprInt): addr_b = int(addr_b.arg) states_todo.add( (addr_a, symbexec.symbols.copy(), tuple(list(conds) + list(viewitems(cond_group_a))))) states_todo.add( (addr_b, symbexec.symbols.copy(), tuple(list(conds) + list(viewitems(cond_group_b))))) elif addr == ret_addr: print('Return address reached') continue elif addr.is_int(): addr = int(addr.arg) states_todo.add((addr, symbexec.symbols.copy(), tuple(conds))) elif addr.is_loc(): states_todo.add((addr, symbexec.symbols.copy(), tuple(conds))) else: raise ValueError("Unsupported destination")
def evaluate_expression(expr: Expr, inputs_array: List[int]) -> int: """ Evaluates an expression for an array of random values. Each input variable p0, p1, ..., pn is associated with an entry in the array of inputs [i0, i1, ..., in]. In the given expression, we replace p0 with i1, p1 with i1 etc. and evaluate the expression. As a result, the expression results in a final constant in form of ExprInt. Args: expr: Expression to evaluate inputs_array: List of random values. Returns: Int that is the return value of the evaluated expression. """ # dictionary of replacements replacements = {} # walk over unique variables in the expression for v in get_unique_variables(expr): # skip if register pattern does not match if not re.search("^p[0-9]*", v.name): continue # calculate index for p index = int(v.name.strip("p")) # insert into replacements dictionary replacements[v] = ExprInt(inputs_array[index], v.size) return int(expr_simp(expr.replace_expr(replacements)))
def _get_strings_from_dse(self, dse): modified_mem = SortedList(key=lambda x: int(x[0])) for key, val in dse.symb.modified(ids=False, mems=True): try: val = dse.eval_expr(key) key = dse.eval_expr(key.ptr) except RuntimeError: continue if not key.is_int() or not val.is_int(): continue modified_mem.add((key, val)) following_address = None current_sequence = b"" strings = set() for address, value in modified_mem: if following_address == address: current_sequence += int(value).to_bytes( value.size // 8, "little") else: self._update_strings_from_sequence(current_sequence, strings) current_sequence = int(value).to_bytes(value.size // 8, "little") following_address = expr_simp(address + ExprInt(value.size // 8, address.size)) self._update_strings_from_sequence(current_sequence, strings) return strings
def _expr_str_to_equiv_class(self, expr_str: str) -> Tuple[str, Expr]: """ Determines the equivalence class of a given Miasm IR expression (passed as string). Used as part of the parallel computation in `gen_oracle_map`. Args: expr_str: String containing a Miasm IR expression from the pre-computed library. Returns: Tuple of equivalence class and expression. """ # init AST translator translator = AbstractSyntaxTreeTranslator() # read expression expr = eval(expr_str) # simplify and transform into abtsract syntax tree expr = translator.from_expr(expr_simp(expr)) # calculate output behavior outputs = self.get_outputs(expr) # determine equivalence class equiv_class = self.determine_equiv_class(expr, outputs) return (equiv_class, expr)
def emul_symb(ir_arch, ircfg, mdis, states_todo, states_done): while states_todo: addr, symbols, conds = states_todo.pop() print('*' * 40, "addr", addr, '*' * 40) if (addr, symbols, conds) in states_done: print('Known state, skipping', addr) continue states_done.add((addr, symbols, conds)) symbexec = SymbolicExecutionEngine(ir_arch) symbexec.symbols = symbols.copy() if ir_arch.pc in symbexec.symbols: del symbexec.symbols[ir_arch.pc] irblock = get_block(ir_arch, ircfg, mdis, addr) print('Run block:') print(irblock) addr = symbexec.eval_updt_irblock(irblock) print('Final state:') symbexec.dump(mems=False) assert addr is not None if isinstance(addr, ExprCond): # Create 2 states, each including complementary conditions cond_group_a = {addr.cond: ExprInt(0, addr.cond.size)} cond_group_b = {addr.cond: ExprInt(1, addr.cond.size)} addr_a = expr_simp(symbexec.eval_expr(addr.replace_expr(cond_group_a), {})) addr_b = expr_simp(symbexec.eval_expr(addr.replace_expr(cond_group_b), {})) if not (addr_a.is_int() or addr_a.is_loc() and addr_b.is_int() or addr_b.is_loc()): print(str(addr_a), str(addr_b)) raise ValueError("Unsupported condition") if isinstance(addr_a, ExprInt): addr_a = int(addr_a.arg) if isinstance(addr_b, ExprInt): addr_b = int(addr_b.arg) states_todo.add((addr_a, symbexec.symbols.copy(), tuple(list(conds) + list(viewitems(cond_group_a))))) states_todo.add((addr_b, symbexec.symbols.copy(), tuple(list(conds) + list(viewitems(cond_group_b))))) elif addr == ret_addr: print('Return address reached') continue elif addr.is_int(): addr = int(addr.arg) states_todo.add((addr, symbexec.symbols.copy(), tuple(conds))) elif addr.is_loc(): states_todo.add((addr, symbexec.symbols.copy(), tuple(conds))) else: raise ValueError("Unsupported destination")
def arm_guess_jump_table( mnemo, attrib, pool_bin, cur_bloc, offsets_to_dis, loc_db): ira = get_ira(mnemo, attrib) jra = ExprId('jra') jrb = ExprId('jrb') sp = LocationDB() ir_arch = ira(sp) ircfg = ira.new_ircfg() ir_arch.add_asmblock_to_ircfg(cur_bloc, ircfg) for irblock in viewvalues(ircfg.blocks): pc_val = None for exprs in irblock: for e in exprs: if e.dst == ir_arch.pc: pc_val = e.src if pc_val is None: continue if not isinstance(pc_val, ExprMem): continue assert(pc_val.size == 32) print(pc_val) ad = pc_val.arg ad = expr_simp(ad) print(ad) res = match_expr(ad, jra + jrb, set([jra, jrb])) if res is False: raise NotImplementedError('not fully functional') print(res) if not isinstance(res[jrb], ExprInt): raise NotImplementedError('not fully functional') base_ad = int(res[jrb]) print(base_ad) addrs = set() i = -1 max_table_entry = 10000 max_diff_addr = 0x100000 # heuristic while i < max_table_entry: i += 1 try: ad = upck32(pool_bin.getbytes(base_ad + 4 * i, 4)) except: break if abs(ad - base_ad) > max_diff_addr: break addrs.add(ad) print([hex(x) for x in addrs]) for ad in addrs: offsets_to_dis.add(ad) l = loc_db.get_or_create_offset_location(ad) c = AsmConstraintTo(l) cur_bloc.addto(c)
def arm_guess_jump_table(dis_engine, cur_block, offsets_to_dis): arch = dis_engine.arch loc_db = dis_engine.loc_db ira = get_ira(arch, dis_engine.attrib) jra = ExprId('jra') jrb = ExprId('jrb') ir_arch = ira(loc_db) ircfg = ira.new_ircfg() ir_arch.add_asmblock_to_ircfg(cur_block, ircfg) for irblock in viewvalues(ircfg.blocks): pc_val = None for exprs in irblock: for e in exprs: if e.dst == ir_arch.pc: pc_val = e.src if pc_val is None: continue if not isinstance(pc_val, ExprMem): continue assert (pc_val.size == 32) print(pc_val) ad = pc_val.arg ad = expr_simp(ad) print(ad) res = match_expr(ad, jra + jrb, set([jra, jrb])) if res is False: raise NotImplementedError('not fully functional') print(res) if not isinstance(res[jrb], ExprInt): raise NotImplementedError('not fully functional') base_ad = int(res[jrb]) print(base_ad) addrs = set() i = -1 max_table_entry = 10000 max_diff_addr = 0x100000 # heuristic while i < max_table_entry: i += 1 try: ad = upck32(dis_engine.bin_stream.getbytes(base_ad + 4 * i, 4)) except: break if abs(ad - base_ad) > max_diff_addr: break addrs.add(ad) print([hex(x) for x in addrs]) for ad in addrs: offsets_to_dis.add(ad) l = loc_db.get_or_create_offset_location(ad) c = AsmConstraintTo(l) cur_block.addto(c)
def propag_expr_cst(self, expr): """Propagate constant expressions in @expr @expr: Expression to update""" elements = expr.get_r(mem_read=True) to_propag = {} for element in elements: # Only ExprId can be safely propagated if not element.is_id(): continue value = self.eval_expr(element) if self.is_expr_cst(self.ir_arch, value): to_propag[element] = value return expr_simp(expr.replace_expr(to_propag))
def propag_expr_cst(self, expr): """Propagate constant expressions in @expr @expr: Expression to update""" elements = expr.get_r(mem_read=True) to_propag = {} for element in elements: # Only ExprId can be safely propagated if not element.is_id(): continue value = self.eval_expr(element) if self.is_expr_cst(self.ir_arch, value): to_propag[element] = value return expr_simp(expr.replace_expr(to_propag))
def get_assignblock_for_state(ircfg, ir_arch, symbols_init, state_register, state): referenced_blocks = [] for cfgnode in ircfg.nodes(): irblock = ircfg.get_block(cfgnode) if not irblock: print('[!] Could not get IRBLOCK!') sys.exit() if len(irblock.assignblks) == 1: _next_addr = irblock.dst else: _symbolic_engine = SymbolicExecutionEngine(ir_arch, symbols_init) _next_addr = _symbolic_engine.run_block_at( ircfg, get_address(ircfg.loc_db, cfgnode)) if _next_addr == None: continue _next_addr = expr_simp(_next_addr) if isinstance(_next_addr, ExprCond) and \ isinstance(_next_addr.cond, ExprOp) and \ _next_addr.cond.op == '==': args = _next_addr.cond while not isinstance(args.args[0], ExprId): if hasattr(args, 'args'): args = args.args[0] if not isinstance(args, ExprOp): break if hasattr(args, 'args') and \ args.args[0] in (state_register, symbols_init[state_register]) and \ args.args[1] == state: block = ircfg.get_block(cfgnode) if hasattr(block.dst.cond, 'op') and block.dst.cond.op in ('CC_S>'): dst = get_address(ircfg.loc_db, block.dst.src2.loc_key) next_block = ircfg.get_block(dst) dst = get_address(ircfg.loc_db, next_block.dst.src1.loc_key) else: dst = get_address(ircfg.loc_db, block.dst.src1.loc_key) referenced_block = ircfg.get_block(dst) referenced_blocks.append(referenced_block) return referenced_blocks
def gen_from_expression(expr: Expr, variables: List[Expr], num_samples: int) -> SynthesisOracle: """ Builds a SynthesisOracle instance from a given expression. For a given expression, `num_samples` independent I/O pairs are evaluated as follows: 1. We generate a list of random values, one for each variable. Random values are represented in Miasm IL. 2. We evaluate the expression by replacing all variables in the expression by their corresponding value and do a constant propagation. 3. We map the list of inputs to the obtained integer value (in Miasm IL). Args: expr (Expr): Expression representing a function f(x0, ..., xi). variables (List[Expr]): List of variables contained in `expr`. num_samples (int): Number of I/O samples to evaluate. Returns: SynthesisOracle: Generated SynthesisOracle instance. """ # init map synthesis_map = {} # walk over number of samples for _ in range(num_samples): # list of inputs inputs = [] # dictionary of expression replacements replacements = {} # walk over all variables for v in variables: # generate a random value value = get_rand_input() # replace variable with random value replacements[v] = ExprInt(value, v.size) # add random value to list of inputs inputs.append(ExprInt(value, v.size)) # evaluate expression to obtain output result = expr_simp(expr.replace_expr(replacements)) # output should be an ExprInt assert(result.is_int()) # map list of inputs to output synthesis_map[tuple(inputs)] = result return SynthesisOracle(synthesis_map)
def elements(self): value = self.cbReg.value if value in self.stk_args: line = self.ircfg.blocks[self.loc_key][self.line_nb].instr arg_num = self.stk_args[value] stk_high = m2_expr.ExprInt(idc.get_spd(line.offset), ir_arch.sp.size) stk_off = m2_expr.ExprInt(self.ira.sp.size // 8 * arg_num, ir_arch.sp.size) element = m2_expr.ExprMem(self.mn.regs.regs_init[ir_arch.sp] + stk_high + stk_off, self.ira.sp.size) element = expr_simp(element) # Force stack unaliasing self.stk_unalias_force = True elif value: element = self.ira.arch.regs.all_regs_ids_byname.get(value, None) else: raise ValueError("Unknown element '%s'!" % value) return set([element])
def eval_updt_irblock(self, irb, step=False): """ Symbolic execution of the @irb on the current state @irb: irblock instance @step: display intermediate steps """ offset2cmt = {} for index, assignblk in enumerate(irb): if set(assignblk) == set([self.lifter.IRDst, self.lifter.pc]): # Don't display on jxx continue instr = assignblk.instr tmp_r = assignblk.get_r() tmp_w = assignblk.get_w() todo = set() # Replace PC with value to match IR args pc_fixed = { self.lifter.pc: m2_expr.ExprInt(instr.offset + instr.l, self.lifter.pc.size) } inputs = tmp_r inputs.update(arg for arg in tmp_w if arg.is_mem()) for arg in inputs: arg = expr_simp(arg.replace_expr(pc_fixed)) if arg in tmp_w and not arg.is_mem(): continue todo.add(arg) for expr in todo: if expr.is_int(): continue for c_str, c_type in self.chandler.expr_to_c_and_types( expr, self.symbols): expr = self.cst_propag_link.get((irb.loc_key, index), {}).get(expr, expr) offset2cmt.setdefault(instr.offset, set()).add( "\n%s: %s\n%s" % (expr, c_str, c_type)) self.eval_updt_assignblk(assignblk) for offset, value in viewitems(offset2cmt): idc.set_cmt(offset, '\n'.join(value), 0) print("%x\n" % offset, '\n'.join(value)) return self.eval_expr(self.lifter.IRDst)
def elements(self): value = self.cbReg.value if value in self.stk_args: line = self.ircfg.blocks[self.loc_key][self.line_nb].instr arg_num = self.stk_args[value] stk_high = m2_expr.ExprInt(idc.GetSpd(line.offset), ir_arch.sp.size) stk_off = m2_expr.ExprInt(self.ira.sp.size // 8 * arg_num, ir_arch.sp.size) element = m2_expr.ExprMem(mn.regs.regs_init[ir_arch.sp] + stk_high + stk_off, self.ira.sp.size) element = expr_simp(element) # Force stack unaliasing self.stk_unalias_force = True elif value: element = self.ira.arch.regs.all_regs_ids_byname.get(value, None) else: raise ValueError("Unknown element '%s'!" % value) return set([element])
def resolve_args_with_symbols(self, symbols=None): if symbols is None: symbols = LocationDB() args_out = [] for expr in self.args: # try to resolve symbols using symbols (0 for default value) loc_keys = m2_expr.get_expr_locs(expr) fixed_expr = {} for exprloc in loc_keys: loc_key = exprloc.loc_key names = symbols.get_location_names(loc_key) # special symbols if b'$' in names: fixed_expr[exprloc] = self.get_asm_offset(exprloc) continue if b'_' in names: fixed_expr[exprloc] = self.get_asm_next_offset(exprloc) continue arg_int = symbols.get_location_offset(loc_key) if arg_int is not None: fixed_expr[exprloc] = m2_expr.ExprInt( arg_int, exprloc.size) continue if not names: raise ValueError('Unresolved symbol: %r' % exprloc) offset = symbols.get_location_offset(loc_key) if offset is None: raise ValueError( 'The offset of loc_key "%s" cannot be determined' % names) else: # Fix symbol with its offset size = exprloc.size if size is None: default_size = self.get_symbol_size(exprloc, symbols) size = default_size value = m2_expr.ExprInt(offset, size) fixed_expr[exprloc] = value expr = expr.replace_expr(fixed_expr) expr = expr_simp(expr) args_out.append(expr) return args_out
def resolve_args_with_symbols(self, symbols=None): if symbols is None: symbols = LocationDB() args_out = [] for expr in self.args: # try to resolve symbols using symbols (0 for default value) loc_keys = m2_expr.get_expr_locs(expr) fixed_expr = {} for exprloc in loc_keys: loc_key = exprloc.loc_key names = symbols.get_location_names(loc_key) # special symbols if b'$' in names: fixed_expr[exprloc] = self.get_asm_offset(exprloc) continue if b'_' in names: fixed_expr[exprloc] = self.get_asm_next_offset(exprloc) continue arg_int = symbols.get_location_offset(loc_key) if arg_int is not None: fixed_expr[exprloc] = m2_expr.ExprInt(arg_int, exprloc.size) continue if not names: raise ValueError('Unresolved symbol: %r' % exprloc) offset = symbols.get_location_offset(loc_key) if offset is None: raise ValueError( 'The offset of loc_key "%s" cannot be determined' % names ) else: # Fix symbol with its offset size = exprloc.size if size is None: default_size = self.get_symbol_size(exprloc, symbols) size = default_size value = m2_expr.ExprInt(offset, size) fixed_expr[exprloc] = value expr = expr.replace_expr(fixed_expr) expr = expr_simp(expr) args_out.append(expr) return args_out
def eval_updt_irblock(self, irb, step=False): """ Symbolic execution of the @irb on the current state @irb: irblock instance @step: display intermediate steps """ offset2cmt = {} for index, assignblk in enumerate(irb): if set(assignblk) == set([self.ir_arch.IRDst, self.ir_arch.pc]): # Don't display on jxx continue instr = assignblk.instr tmp_r = assignblk.get_r() tmp_w = assignblk.get_w() todo = set() # Replace PC with value to match IR args pc_fixed = {self.ir_arch.pc: m2_expr.ExprInt(instr.offset + instr.l, self.ir_arch.pc.size)} inputs = tmp_r inputs.update(arg for arg in tmp_w if arg.is_mem()) for arg in inputs: arg = expr_simp(arg.replace_expr(pc_fixed)) if arg in tmp_w and not arg.is_mem(): continue todo.add(arg) for expr in todo: if expr.is_int(): continue for c_str, c_type in self.chandler.expr_to_c_and_types(expr, self.symbols): expr = self.cst_propag_link.get((irb.loc_key, index), {}).get(expr, expr) offset2cmt.setdefault(instr.offset, set()).add( "\n%s: %s\n%s" % (expr, c_str, c_type) ) self.eval_updt_assignblk(assignblk) for offset, value in viewitems(offset2cmt): idc.MakeComm(offset, '\n'.join(value)) print("%x\n" % offset, '\n'.join(value)) return self.eval_expr(self.ir_arch.IRDst)
def dis(cls, bs_o, mode_o = None, offset=0): if not isinstance(bs_o, bin_stream): bs_o = bin_stream_str(bs_o) bs_o.enter_atomic_mode() offset_o = offset try: pre_dis_info, bs, mode, offset, prefix_len = cls.pre_dis( bs_o, mode_o, offset) except: bs_o.leave_atomic_mode() raise candidates = cls.guess_mnemo(bs, mode, pre_dis_info, offset) if not candidates: bs_o.leave_atomic_mode() raise Disasm_Exception('cannot disasm (guess) at %X' % offset) out = [] out_c = [] if hasattr(bs, 'getlen'): bs_l = bs.getlen() else: bs_l = len(bs) alias = False for c in candidates: log.debug("*" * 40, mode, c.mode) log.debug(c.fields) c = cls.all_mn_inst[c][0] c.reset_class() c.mode = mode if not c.add_pre_dis_info(pre_dis_info): continue todo = {} getok = True fname_values = dict(pre_dis_info) offset_b = offset * 8 total_l = 0 for i, f in enumerate(c.fields_order): if f.flen is not None: l = f.flen(mode, fname_values) else: l = f.l if l is not None: total_l += l f.l = l f.is_present = True log.debug("FIELD %s %s %s %s", f.__class__, f.fname, offset_b, l) if bs_l * 8 - offset_b < l: getok = False break try: bv = cls.getbits(bs, mode, offset_b, l) except: bs_o.leave_atomic_mode() raise offset_b += l if not f.fname in fname_values: fname_values[f.fname] = bv todo[i] = bv else: f.is_present = False todo[i] = None if not getok: continue c.l = prefix_len + total_l // 8 for i in c.to_decode: f = c.fields_order[i] if f.is_present: ret = f.decode(todo[i]) if not ret: log.debug("cannot decode %r", f) break if not ret: continue for a in c.args: a.expr = expr_simp(a.expr) c.b = cls.getbytes(bs, offset_o, c.l) c.offset = offset_o c = c.post_dis() if c is None: continue c_args = [a.expr for a in c.args] instr = cls.instruction(c.name, mode, c_args, additional_info=c.additional_info()) instr.l = prefix_len + total_l // 8 instr.b = cls.getbytes(bs, offset_o, instr.l) instr.offset = offset_o instr.get_info(c) if c.alias: alias = True out.append(instr) out_c.append(c) bs_o.leave_atomic_mode() if not out: raise Disasm_Exception('cannot disasm at %X' % offset_o) if len(out) != 1: if not alias: log.warning('dis multiple args ret default') for i, o in enumerate(out_c): if o.alias: return out[i] raise NotImplementedError( 'Multiple disas: \n' + "\n".join(str(x) for x in out) ) return out[0]
def _is_suitable_simplification_candidate(self, expr: Expr, simplified: Expr) -> bool: """ Checks if a simplification candidate is not suitable. This check ensures the semantical correctness of the simplification. We skip the simplification candiate 1. If the simplification candidate contains any unification variable. In this case, not every variable of the simplification candidate can be matched to a terminal expression in the original one. 2. If the tree depth of the original expression is smaller or equal to the simplified one. In this case, simplification could make expressions even more complex. 3. If Miasm's expression simplification results in the same expression for the original and the simplified one. In this case, the lookup in the simplification oracle is not required. 4. If the original expression is semantically equivalent to the simplified one. Since this query is computationally expensive, we, by default, set a small timeout and check only if the SMT solver is not able to find a proof for inequivalence in the provided time. If the solver was not able to proof the equivalence within the provided time, we still accept it. The user has the possibility to enforce the SMT-based equivalence check to be successful by setting the `enforce_equivalence` flag and (optionally) increasing the `solver_timeout`. Args: expr: Original expression. simplified: Simplified expression candidate. Returns: True if simplification should be skipped, False otherwise. """ # contains placeholder variables if any([ re.search("^p[0-9]*", v.name) for v in get_unique_variables(simplified) ]): logger.debug( f"{expr} <==> {simplified} (incorrect variable replacement)") return False # checks if original is smaller to simplified if len(expr.graph().nodes()) <= len(simplified.graph().nodes()): return False # same normalized expression if expr_simp(expr) == expr_simp(simplified): return False # SMT solver proves non-equivalence or timeouts if self.enforce_equivalence and self.check_semantical_equivalence( expr, simplified) != z3.unsat: logger.debug( f"{expr} <==> {simplified} (not semantically equivalent)") return False # SMT solver finds a counter example if self.check_semantical_equivalence(expr, simplified) == z3.sat: logger.debug( f"{expr} <==> {simplified} (not semantically equivalent, counterexample found)" ) return False return True
(b0, expr_is_signed_lower_or_equal, int_1, int_m1), (b1, expr_is_signed_greater_or_equal, int_m1, int_m1), (b1, expr_is_signed_lower_or_equal, int_m1, int_m1), (b1, expr_is_signed_greater, int_m1, int_m2), (b1, expr_is_signed_lower, int_m2, int_m1), (b0, expr_is_signed_greater, int_m2, int_m1), (b0, expr_is_signed_lower, int_m1, int_m2), (b1, expr_is_signed_greater_or_equal, int_m1, int_m2), (b1, expr_is_signed_lower_or_equal, int_m2, int_m1), (b0, expr_is_signed_greater_or_equal, int_m2, int_m1), (b0, expr_is_signed_lower_or_equal, int_m1, int_m2), # eq/neq (b1, expr_is_equal, int_1, int_1), (b1, expr_is_not_equal, int_0, int_1), (b0, expr_is_equal, int_1, int_0), (b0, expr_is_not_equal, int_0, int_0), ] for result, func, arg1, arg2 in tests: assert result == expr_simp(func(arg1, arg2))
def dis(cls, bs_o, mode_o = None, offset=0): if not isinstance(bs_o, bin_stream): bs_o = bin_stream_str(bs_o) bs_o.enter_atomic_mode() offset_o = offset try: pre_dis_info, bs, mode, offset, prefix_len = cls.pre_dis( bs_o, mode_o, offset) except: bs_o.leave_atomic_mode() raise candidates = cls.guess_mnemo(bs, mode, pre_dis_info, offset) if not candidates: bs_o.leave_atomic_mode() raise Disasm_Exception('cannot disasm (guess) at %X' % offset) out = [] out_c = [] if hasattr(bs, 'getlen'): bs_l = bs.getlen() else: bs_l = len(bs) alias = False for c in candidates: log.debug("*" * 40, mode, c.mode) log.debug(c.fields) c = cls.all_mn_inst[c][0] c.reset_class() c.mode = mode if not c.add_pre_dis_info(pre_dis_info): continue todo = {} getok = True fname_values = dict(pre_dis_info) offset_b = offset * 8 total_l = 0 for i, f in enumerate(c.fields_order): if f.flen is not None: l = f.flen(mode, fname_values) else: l = f.l if l is not None: total_l += l f.l = l f.is_present = True log.debug("FIELD %s %s %s %s", f.__class__, f.fname, offset_b, l) if bs_l * 8 - offset_b < l: getok = False break try: bv = cls.getbits(bs, mode, offset_b, l) except: bs_o.leave_atomic_mode() raise offset_b += l if not f.fname in fname_values: fname_values[f.fname] = bv todo[i] = bv else: f.is_present = False todo[i] = None if not getok: continue c.l = prefix_len + total_l // 8 for i in c.to_decode: f = c.fields_order[i] if f.is_present: ret = f.decode(todo[i]) if not ret: log.debug("cannot decode %r", f) break if not ret: continue for a in c.args: a.expr = expr_simp(a.expr) c.b = cls.getbytes(bs, offset_o, c.l) c.offset = offset_o c = c.post_dis() if c is None: continue c_args = [a.expr for a in c.args] instr = cls.instruction(c.name, mode, c_args, additional_info=c.additional_info()) instr.l = prefix_len + total_l // 8 instr.b = cls.getbytes(bs, offset_o, instr.l) instr.offset = offset_o instr.get_info(c) if c.alias: alias = True out.append(instr) out_c.append(c) bs_o.leave_atomic_mode() if not out: raise Disasm_Exception('cannot disasm at %X' % offset_o) if len(out) != 1: if not alias: log.warning('dis multiple args ret default') for i, o in enumerate(out_c): if o.alias: return out[i] raise NotImplementedError( 'Multiple disas: \n' + "\n".join(str(x) for x in out) ) return out[0]
def simplify(self, expr: Expr) -> Expr: """ High-level algorithm to simplify an expression. Given an expression, we generate an abstract syntax tree (AST) and simplify the AST as follows in a fixpoint iteration: 1. We do a BFS over the AST (top to bottom) and try to simplify the largest possible subtree. 2. For each subtree, we check if its input-output behavior can be represented as an equivalence class that is already contained in the pre-computed oracle. For this, we have to unify the subtree (by replacing terminal nodes with place holder variables), re-apply the unifications to simplification candidates and check if it is suitable. 3. If a suitable simplification candidate is found, we store it in an dictionary and replace the subtree with a placeholder variable in the AST. 4. If no more simplifications can be applied, we recursively replace all place holder variables with the simplified subtrees in the AST. Args: expr: Expression to simplify Returns: Simplified expression """ # transform expr to abstract syntax tree ast = self._translator_ast.from_expr(expr) # dictionary to map to placeholder variables to simplified subtrees global_unification_dict: Dict[Expr, Expr] = {} # placeholder variable counter global_ctr = 0 logger.info(f"initial ast: {ast}") # fixpoint iteration while True: before = ast.copy() # walk over all subtrees for subtree in get_subexpressions(ast): # skip subtree if possible if self._skip_subtree(subtree): continue # build unification dictionary unification_dict = gen_unification_dict(subtree) # determine subtree's equivalence class equiv_class = self.determine_equivalence_class( subtree.replace_expr(unification_dict)) # if the equivalence class is in the pre-computed oracle: if self.oracle.contains_equiv_class(equiv_class): # check if there is a simpler subtree in the equivalence class success, simplified = self._find_suitable_simplification( equiv_class, subtree, unification_dict) # skip if no candidate found if not success: continue # generate global placeholder variable global_variable = self._gen_global_variable_replacement( global_ctr, subtree.size) global_ctr += 1 # map global placeholder variable to simplified subtree global_unification_dict[global_variable] = simplified # replace original subtree with global placeholder variable ast = ast.replace_expr({subtree: global_variable}) break # check if fixpoint is reached if before == ast: break # replace global placeholder variables with simplified subtrees in ast ast = self._reverse_global_unification(ast, global_unification_dict) return expr_simp(ast)
def m2expr_to_r2esil(iir, loc_db): """Convert a miasm expression to a radare2 ESIL""" if isinstance(iir, ExprId): return iir.name.lower() if isinstance(iir, ExprLoc): return loc_db.get_location_offset(iir.loc_key) if isinstance(iir, ExprInt): return hex(iir.arg) if isinstance(iir, ExprMem): ret = "%s,[%d]" % (m2expr_to_r2esil(iir.arg, loc_db), iir.size/8) return ret.lower() elif isAssignation(iir): if not isinstance(iir.dst, ExprMem): esil_dst = m2expr_to_r2esil(iir.dst, loc_db) return "%s,%s,=" % (m2expr_to_r2esil(iir.src, loc_db), esil_dst) else: esrc = m2expr_to_r2esil(iir.src, loc_db) edst = m2expr_to_r2esil(iir.dst.arg, loc_db) return "%s,%s,=[]" % (esrc, edst) elif isinstance(iir, ExprOp): if len(iir.args) == 2: arg_1 = m2expr_to_r2esil(iir.args[1], loc_db) arg_0 = m2expr_to_r2esil(iir.args[0], loc_db) if iir.op == "FLAG_SIGN_SUB": shift = iir.args[1].size - 1 return "%s,%s,-,%d,>>" % (arg_1, arg_0, shift) return "%s,%s,%s" % (arg_1, arg_0, iir.op) elif iir.op == "parity": arg = m2expr_to_r2esil(iir.args[0], loc_db) return "%s,1,&,?{,0,}{,1,}" % arg elif iir.op.startswith("signExt_") and isinstance(iir.args[0], ExprMem): argsize = iir.args[0].size bits = int(iir.op.split("_")[1]) test = 1 << (argsize - 1) mask = 2**bits-1 ^ 2**argsize-1 tmp = m2expr_to_r2esil(iir.args[0], loc_db) sign_extension = "%s,0x%x,&,1,?{,%s,0x%x,+,}{,%s,}" return sign_extension % (tmp, test, tmp, mask, tmp) elif iir.op.startswith("zeroExt_"): return m2expr_to_r2esil(iir.args[0], loc_db) elif iir.op == "CC_EQ": return m2expr_to_r2esil(iir.args[0], loc_db) else: return "%s,0,%s" % (m2expr_to_r2esil(iir.args[0], loc_db), iir.op) elif isinstance(iir, ExprCompose): esil_strings = [] for start, expr in iir.iter_args(): stop = start + expr.size mask = (2**stop - 1) - (2**start - 1) esil_tmp = "%s,%s,&" % (m2expr_to_r2esil(expr, loc_db), hex(mask)) esil_strings.append(esil_tmp) l = esil_strings if len(l) == 2: ret_string = "%s,%s,+" % (l[0], l[1]) return ret_string else: tmp_list = [",".join(l[i:i+2]) for i in xrange(0, len(l), 2)] ret_string = ",+,".join(tmp_list) return ret_string elif isinstance(iir, ExprSlice): mask = (2**iir.stop - 1) - (2**iir.start - 1) return "%s,%s,&" % (m2expr_to_r2esil(iir.arg, loc_db), hex(mask)) elif isinstance(iir, ExprCond): if isinstance(iir.cond, ExprSlice): # Attempt to evaluate the expression result = expr_simp(iir.cond) if isinstance(result, ExprInt): if result.arg != 0: tmp_src = iir.src1 else: tmp_src = iir.src2 else: tmp = m2expr_to_r2esil(iir.cond, loc_db) esil_string = "%s,?{,%s,},?{,%s,}" % (tmp, iir.src1, iir.src2) return esil_string return m2expr_to_r2esil(tmp_src, loc_db) elif (isinstance(iir.cond, ExprOp) or isinstance(iir.cond, ExprId) or isinstance(iir.cond, ExprCond)): condition = m2expr_to_r2esil(iir.cond, loc_db) if_clause = m2expr_to_r2esil(iir.src1, loc_db) then_clause = m2expr_to_r2esil(iir.src2, loc_db) return "%s,?{,%s,}{,%s,}" % (condition, if_clause, then_clause) elif isinstance(iir.cond, ExprInt): if int(iir.cond.arg): return m2expr_to_r2esil(iir.src1, loc_db) else: return m2expr_to_r2esil(iir.src2, loc_db) return "TODO_Cond" # GV: use a r2m2 exception ? elif isinstance(iir, str): return iir else: print >> sys.stderr, "Unknown type:", type(iir), iir return "TODO_UNK"
from __future__ import print_function from miasm.expression.expression import * from miasm.expression.simplifications import expr_simp print(""" Simple expression simplification demo """) a = ExprId('eax', 32) b = ExprId('ebx', 32) exprs = [a + b - a, ExprInt(0x12, 32) + ExprInt(0x30, 32) - a, ExprCompose(a[:8], a[8:16])] for e in exprs: print('*' * 40) print('original expression:', e) print("simplified:", expr_simp(e))
(ExprCond(ExprOp(TOK_INF_SIGNED, a8.zeroExtend(32), ExprInt(-1, 32)), a, b), b), (ExprCond(ExprOp(TOK_INF_EQUAL_SIGNED, a8.zeroExtend(32), ExprInt(-1, 32)), a, b), b), (a8.zeroExtend(32)[2:5], a8[2:5]), (ExprCond(a + b, a, b), ExprCond(ExprOp(TOK_EQUAL, a, -b), b, a)), (ExprCond(a + i1, a, b), ExprCond(ExprOp(TOK_EQUAL, a, im1), b, a)), (ExprCond(ExprOp(TOK_EQUAL, a, i1), bi1, bi0), ExprOp(TOK_EQUAL, a, i1)), (ExprCond(ExprOp(TOK_INF_SIGNED, a, i1), bi1, bi0), ExprOp(TOK_INF_SIGNED, a, i1)), (ExprOp(TOK_INF_EQUAL_UNSIGNED, a, i0), ExprOp(TOK_EQUAL, a, i0)), ] for e_input, e_check in to_test: print("#" * 80) e_check = expr_simp(e_check) e_new = expr_simp(e_input) print("original: ", str(e_input), "new: ", str(e_new)) rez = e_new == e_check if not rez: raise ValueError('bug in expr_simp simp(%s) is %s and should be %s' % (e_input, e_new, e_check)) # Test conds to_test = [ (((a - b) ^ ((a ^ b) & ((a - b) ^ a))).msb(), ExprOp_inf_signed(a, b)), ((((a - b) ^ ((a ^ b) & ((a - b) ^ a))) ^ a ^ b).msb(), ExprOp_inf_unsigned(a, b)), (ExprOp_inf_unsigned(ExprInt(-1, 32), ExprInt(3, 32)), ExprInt(0, 1)), (ExprOp_inf_signed(ExprInt(-1, 32), ExprInt(3, 32)), ExprInt(1, 1)),
def resolve_offsets(state_register, asmcfg, ircfg, ir_arch): patches = set() nodes_to_walk = list(ircfg.nodes()) symbols_init = dict() for i, r in enumerate(all_regs_ids): symbols_init[r] = all_regs_ids_init[i] expr_simp.enable_passes({ExprOp: [ignore_call_results]}) for node in nodes_to_walk: irblock = ircfg.get_block(node) if not irblock: print('[-] Could not get IRBLOCK!') sys.exit() if len(irblock.assignblks) == 1: if irblock.assignblks[ 0].instr.name == "CMOVNZ" and irblock.assignblks[ 0].instr.args[0] == state_register: temp_reg1 = irblock.assignblks[0].instr.args[0] temp_reg2 = irblock.assignblks[0].instr.args[1] state1 = None state2 = None previous_block = ircfg.get_block(ircfg.predecessors(node)[0]) for line in previous_block.assignblks: if line.instr.name == 'MOV' and \ line.instr.args[0] in (temp_reg1, temp_reg2) and isinstance(line.instr.args[1], ExprInt): if line.instr.args[0] == state_register: state1 = line.instr.args[1] else: state2 = line.instr.args[1] if state1 and state2: break # compiler shenanigans. state missing is not initialised in current bblk. search function for it if not state1: state1 = scan_function_for_state(asmcfg, state_register, temp_reg1) elif not state2: state2 = scan_function_for_state(asmcfg, state_register, temp_reg2) blocks1 = get_assignblock_for_state(ircfg, ir_arch, symbols_init, state_register, state2) blocks2 = get_assignblock_for_state(ircfg, ir_arch, symbols_init, state_register, state1) dst1 = get_address(ircfg.loc_db, blocks1[0].loc_key) src1 = irblock.assignblks[0].instr.offset patches.add((src1, dst1, CNDP1)) dst2 = get_address(ircfg.loc_db, blocks2[0].loc_key) src2 = src1 patches.add((src2, dst2, CNDP0)) elif irblock.assignblks[0].instr.name == "CMOVZ": state1 = None state2 = None temp_reg1 = irblock.assignblks[0].instr.args[0] temp_reg2 = irblock.assignblks[0].instr.args[1] if temp_reg1 == state_register: previous_block = ircfg.get_block( ircfg.predecessors(node)[0]) for line in previous_block.assignblks: if line.instr.name == 'MOV' and \ line.instr.args[0] in (temp_reg1, temp_reg2): if line.instr.args[0] == state_register: state1 = line.instr.args[1] else: state2 = line.instr.args[1] if state1 and state2: blocks1 = get_assignblock_for_state( ircfg, ir_arch, symbols_init, state_register, state1) blocks2 = get_assignblock_for_state( ircfg, ir_arch, symbols_init, state_register, state2) dst1 = get_address(ircfg.loc_db, blocks1[0].loc_key) src1 = irblock.assignblks[0].instr.offset patches.add((src1, dst1, CNDP1)) dst2 = get_address(ircfg.loc_db, blocks2[0].loc_key) src2 = src1 patches.add((src2, dst2, CNDP0)) else: found_state = state1 if state1 else state2 missing_state = state1 if not state1 else state2 subject_reg = temp_reg1 if not state1 else temp_reg2 def get_imm_write_for_reg(asmcfg, subject_reg): for node in asmcfg.nodes(): asmblock = asmcfg.loc_key_to_block(node) for line in asmblock.lines: if line.name == 'MOV' and line.args[0] == subject_reg and \ isinstance(line.args[1], ExprInt): return line.args[1] return None missing_state = get_imm_write_for_reg( asmcfg, subject_reg) if not missing_state: print( "[-] Something went wrong. could not find mising state!" ) continue state1 = state1 if state1 == found_state else missing_state state2 = missing_state if state1 == found_state else state2 blocks1 = get_assignblock_for_state( ircfg, ir_arch, symbols_init, state_register, state1) blocks2 = get_assignblock_for_state( ircfg, ir_arch, symbols_init, state_register, state2) dst1 = get_address(ircfg.loc_db, blocks1[0].loc_key) src1 = irblock.assignblks[0].instr.offset patches.add((src1, dst1, CNDP1)) dst2 = get_address(ircfg.loc_db, blocks2[0].loc_key) src2 = src1 patches.add((src2, dst2, CNDP0)) else: next_block = ircfg.get_block(ircfg.successors(node)[0]) for line in next_block.assignblks: if line.instr.name == 'MOV' and line.instr.args[ 0] == state_register: state1 = line.instr.args[1] break if state1: blocks1 = get_assignblock_for_state( ircfg, ir_arch, symbols_init, state_register, state1) src = None for assignblk in next_block.assignblks: if assignblk.instr.name == 'JMP': src = assignblk.instr.offset dst_block = ircfg.get_block(blocks1[0].loc_key) if isinstance(dst_block.dst, ExprCond) and len( dst_block.assignblks): if hasattr(dst_block.dst.cond, 'op') and dst_block.dst.cond.op in ( 'CC_S>'): dst = get_address(ircfg.loc_db, dst_block.dst.src2.loc_key) next_block = ircfg.get_block(dst) dst = get_address(ircfg.loc_db, next_block.dst.src1.loc_key) else: dst = get_address(ircfg.loc_db, dst_block.dst.src1.loc_key) else: dst = get_address(ircfg.loc_db, blocks1[0].loc_key) patches.add((src, dst, STDP)) else: symbolic_engine = SymbolicExecutionEngine(ir_arch, symbols_init) next_addr = symbolic_engine.run_block_at( ircfg, get_address(ircfg.loc_db, node)) next_addr = expr_simp(next_addr) updated_state = symbolic_engine.symbols[state_register] if isinstance(updated_state, ExprOp): updated_state = expr_simp(updated_state) if updated_state != symbols_init[state_register] and \ isinstance(updated_state, ExprOp): irblock = ircfg.get_block(node) if not irblock: print('[-] Could not get IRBLOCK!') sys.exit() if len(irblock.assignblks) > 3: neg_inst = False for i in range(len(irblock.assignblks)): if irblock.assignblks[i].instr.name == 'NEG': neg_inst = True if irblock.assignblks[i].instr.name == 'SBB' and \ irblock.assignblks[i + 1].instr.name == 'AND' and \ irblock.assignblks[i + 2].instr.name == 'ADD': expr = symbolic_engine.symbols[ state_register].copy() if neg_inst: state1 = expr_simp( expr.replace_expr( {EAX_init: ExprInt(0, 32)})) state2 = expr_simp( expr.replace_expr( {EAX_init: ExprInt(1, 32)})) elif irblock.assignblks[i-1].instr.name == 'CMP' and \ irblock.assignblks[i-2].instr.name == 'ADD' and \ isinstance(irblock.assignblks[i-2].instr.args[1], ExprInt): id = irblock.assignblks[i - 2].instr.args[0] imm = irblock.assignblks[i - 2].instr.args[1] state1 = expr_simp( expr.replace_expr({ EAX_init: imm }).replace_expr({ symbolic_engine.symbols[id].args[0]: imm })) state2 = expr_simp( expr.replace_expr({ EAX_init: ExprInt(-1, 32) }).replace_expr({ symbolic_engine.symbols[id].args[0]: imm })) blocks1 = get_assignblock_for_state( ircfg, ir_arch, symbols_init, state_register, state1) blocks2 = get_assignblock_for_state( ircfg, ir_arch, symbols_init, state_register, state2) process_blocks_for_patches(node, blocks1, ircfg, patches, nodes_to_walk, state1, True) process_blocks_for_patches(node, blocks2, ircfg, patches, nodes_to_walk, state2, False) break elif updated_state != symbols_init[state_register] and \ isinstance(updated_state, ExprInt) and \ updated_state._get_int() > 0xff: #print("[*] Looking for state %s" % hex(updated_state._get_int())) referenced_blocks = get_assignblock_for_state( ircfg, ir_arch, symbols_init, state_register, updated_state) # for block in referenced_blocks: # print("\t[+] Found reference at %s" % hex(get_address(ircfg.loc_db, block.loc_key))) process_blocks_for_patches(node, referenced_blocks, ircfg, patches, nodes_to_walk) elif isinstance(next_addr, ExprCond): if not hasattr(next_addr.cond, 'args'): if isinstance(next_addr.src1, ExprLoc): dest1 = next_addr.src1.loc_key else: dest1 = get_loc_key_at(ircfg.loc_db, next_addr.src1._get_int()) if isinstance(next_addr.src2, ExprLoc): dest2 = next_addr.src2.loc_key else: dest2 = get_loc_key_at(ircfg.loc_db, next_addr.src2._get_int()) if dest1 not in nodes_to_walk: nodes_to_walk.append(dest1) if dest2 not in nodes_to_walk: nodes_to_walk.append(dest2) dst2block = ircfg.get_block(dest2) if dst2block.assignblks[0].instr.name == 'CMP' and \ dst2block.assignblks[0].instr.args[0] == state_register and \ len(ircfg.get_block(node).assignblks) > 1: ref_block = node while True: irblock = ircfg.get_block( ircfg.predecessors(ref_block)[0]) if irblock.assignblks[0].instr.name == 'CMP' and \ dst2block.assignblks[0].instr.args[0] == state_register: break ref_block = ircfg.predecessors(ref_block)[0] asmblock = asmcfg.loc_key_to_block(node) for line in asmblock.lines: if line.name == 'JZ': patches.add( (line.offset, get_address(asmcfg.loc_db, ref_block), CNDP2)) true_block = ircfg.get_block( ircfg.get_block(node).dst.src2.loc_key) symbolic_engine.run_block_at( ircfg, true_block.loc_key) if isinstance( symbolic_engine. symbols[state_register], ExprInt): referenced_block = get_assignblock_for_state( ircfg, ir_arch, symbols_init, state_register, symbolic_engine. symbols[state_register])[0] patches.add( (line.offset, get_address(ircfg.loc_db, referenced_block.loc_key), CNDP3)) break elif isinstance(next_addr, ExprInt): dest = get_loc_key_at(ircfg.loc_db, next_addr._get_int()) if dest not in nodes_to_walk: nodes_to_walk.append( get_loc_key_at(ircfg.loc_db, next_addr._get_int())) return list(patches)
def launch_depgraph(): global graphs, comments, sol_nb, settings, addr, ir_arch, ircfg # Get the current function addr = idc.get_screen_ea() func = ida_funcs.get_func(addr) # Init machine = guess_machine(addr=func.start_ea) mn, dis_engine, ira = machine.mn, machine.dis_engine, machine.ira bs = bin_stream_ida() mdis = dis_engine(bs, dont_dis_nulstart_bloc=True) ir_arch = ira(mdis.loc_db) # Populate symbols with ida names for ad, name in idautils.Names(): if name is None: continue mdis.loc_db.add_location(name, ad) asmcfg = mdis.dis_multiblock(func.start_ea) # Generate IR ircfg = ir_arch.new_ircfg_from_asmcfg(asmcfg) # Get settings settings = depGraphSettingsForm(ir_arch, ircfg, mn) settings.Execute() loc_key, elements, line_nb = settings.loc_key, settings.elements, settings.line_nb # Simplify assignments for irb in list(viewvalues(ircfg.blocks)): irs = [] offset = ir_arch.loc_db.get_location_offset(irb.loc_key) fix_stack = offset is not None and settings.unalias_stack for assignblk in irb: if fix_stack: stk_high = m2_expr.ExprInt(idc.get_spd(assignblk.instr.offset), ir_arch.sp.size) fix_dct = {ir_arch.sp: mn.regs.regs_init[ir_arch.sp] + stk_high} new_assignblk = {} for dst, src in viewitems(assignblk): if fix_stack: src = src.replace_expr(fix_dct) if dst != ir_arch.sp: dst = dst.replace_expr(fix_dct) dst, src = expr_simp(dst), expr_simp(src) new_assignblk[dst] = src irs.append(AssignBlock(new_assignblk, instr=assignblk.instr)) ircfg.blocks[irb.loc_key] = IRBlock(irb.loc_key, irs) # Get dependency graphs dg = settings.depgraph graphs = dg.get(loc_key, elements, line_nb, set([ir_arch.loc_db.get_offset_location(func.start_ea)])) # Display the result comments = {} sol_nb = 0 # Register and launch ida_kernwin.add_hotkey("Shift-N", next_element) treat_element()
def build_graph(start_addr, type_graph, simplify=False, dontmodstack=True, loadint=False, verbose=False): machine = guess_machine(addr=start_addr) dis_engine, ira = machine.dis_engine, machine.ira class IRADelModCallStack(ira): def call_effects(self, addr, instr): assignblks, extra = super(IRADelModCallStack, self).call_effects(addr, instr) if not dontmodstack: return assignblks, extra out = [] for assignblk in assignblks: dct = dict(assignblk) dct = { dst:src for (dst, src) in viewitems(dct) if dst != self.sp } out.append(AssignBlock(dct, assignblk.instr)) return out, extra if verbose: print("Arch", dis_engine) fname = idc.get_root_filename() if verbose: print(fname) bs = bin_stream_ida() mdis = dis_engine(bs) ir_arch = IRADelModCallStack(mdis.loc_db) # populate symbols with ida names for addr, name in idautils.Names(): if name is None: continue if (mdis.loc_db.get_offset_location(addr) or mdis.loc_db.get_name_location(name)): # Symbol alias continue mdis.loc_db.add_location(name, addr) if verbose: print("start disasm") if verbose: print(hex(start_addr)) asmcfg = mdis.dis_multiblock(start_addr) entry_points = set([mdis.loc_db.get_offset_location(start_addr)]) if verbose: print("generating graph") open('asm_flow.dot', 'w').write(asmcfg.dot()) print("generating IR... %x" % start_addr) ircfg = ir_arch.new_ircfg_from_asmcfg(asmcfg) if verbose: print("IR ok... %x" % start_addr) for irb in list(viewvalues(ircfg.blocks)): irs = [] for assignblk in irb: new_assignblk = { expr_simp(dst): expr_simp(src) for dst, src in viewitems(assignblk) } irs.append(AssignBlock(new_assignblk, instr=assignblk.instr)) ircfg.blocks[irb.loc_key] = IRBlock(irb.loc_key, irs) if verbose: out = ircfg.dot() open(os.path.join(tempfile.gettempdir(), 'graph.dot'), 'wb').write(out) title = "Miasm IR graph" head = list(entry_points)[0] if simplify: ircfg_simplifier = IRCFGSimplifierCommon(ir_arch) ircfg_simplifier.simplify(ircfg, head) title += " (simplified)" if type_graph == TYPE_GRAPH_IR: graph = GraphMiasmIR(ircfg, title, None) graph.Show() return class IRAOutRegs(ira): def get_out_regs(self, block): regs_todo = super(IRAOutRegs, self).get_out_regs(block) out = {} for assignblk in block: for dst in assignblk: reg = self.ssa_var.get(dst, None) if reg is None: continue if reg in regs_todo: out[reg] = dst return set(viewvalues(out)) # Add dummy dependency to uncover out regs affectation for loc in ircfg.leaves(): irblock = ircfg.blocks.get(loc) if irblock is None: continue regs = {} for reg in ir_arch.get_out_regs(irblock): regs[reg] = reg assignblks = list(irblock) new_assiblk = AssignBlock(regs, assignblks[-1].instr) assignblks.append(new_assiblk) new_irblock = IRBlock(irblock.loc_key, assignblks) ircfg.blocks[loc] = new_irblock class CustomIRCFGSimplifierSSA(IRCFGSimplifierSSA): def do_simplify(self, ssa, head): modified = super(CustomIRCFGSimplifierSSA, self).do_simplify(ssa, head) if loadint: modified |= load_from_int(ssa.graph, bs, is_addr_ro_variable) return modified def simplify(self, ircfg, head): ssa = self.ircfg_to_ssa(ircfg, head) ssa = self.do_simplify_loop(ssa, head) if type_graph == TYPE_GRAPH_IRSSA: ret = ssa.graph elif type_graph == TYPE_GRAPH_IRSSAUNSSA: ircfg = self.ssa_to_unssa(ssa, head) ircfg_simplifier = IRCFGSimplifierCommon(self.ir_arch) ircfg_simplifier.simplify(ircfg, head) ret = ircfg else: raise ValueError("Unknown option") return ret head = list(entry_points)[0] simplifier = CustomIRCFGSimplifierSSA(ir_arch) ircfg = simplifier.simplify(ircfg, head) open('final.dot', 'w').write(ircfg.dot()) graph = GraphMiasmIR(ircfg, title, None) graph.Show()
def build_graph(start_addr, type_graph, simplify=False, dontmodstack=True, loadint=False, verbose=False): machine = guess_machine(addr=start_addr) dis_engine, ira = machine.dis_engine, machine.ira class IRADelModCallStack(ira): def call_effects(self, addr, instr): assignblks, extra = super(IRADelModCallStack, self).call_effects(addr, instr) if not dontmodstack: return assignblks, extra out = [] for assignblk in assignblks: dct = dict(assignblk) dct = { dst:src for (dst, src) in viewitems(dct) if dst != self.sp } out.append(AssignBlock(dct, assignblk.instr)) return out, extra if verbose: print("Arch", dis_engine) fname = idc.GetInputFile() if verbose: print(fname) bs = bin_stream_ida() mdis = dis_engine(bs) ir_arch = IRADelModCallStack(mdis.loc_db) # populate symbols with ida names for addr, name in idautils.Names(): if name is None: continue if (mdis.loc_db.get_offset_location(addr) or mdis.loc_db.get_name_location(name)): # Symbol alias continue mdis.loc_db.add_location(name, addr) if verbose: print("start disasm") if verbose: print(hex(start_addr)) asmcfg = mdis.dis_multiblock(start_addr) entry_points = set([mdis.loc_db.get_offset_location(start_addr)]) if verbose: print("generating graph") open('asm_flow.dot', 'w').write(asmcfg.dot()) print("generating IR... %x" % start_addr) ircfg = ir_arch.new_ircfg_from_asmcfg(asmcfg) if verbose: print("IR ok... %x" % start_addr) for irb in list(viewvalues(ircfg.blocks)): irs = [] for assignblk in irb: new_assignblk = { expr_simp(dst): expr_simp(src) for dst, src in viewitems(assignblk) } irs.append(AssignBlock(new_assignblk, instr=assignblk.instr)) ircfg.blocks[irb.loc_key] = IRBlock(irb.loc_key, irs) if verbose: out = ircfg.dot() open(os.path.join(tempfile.gettempdir(), 'graph.dot'), 'wb').write(out) title = "Miasm IR graph" head = list(entry_points)[0] if simplify: ircfg_simplifier = IRCFGSimplifierCommon(ir_arch) ircfg_simplifier.simplify(ircfg, head) title += " (simplified)" if type_graph == TYPE_GRAPH_IR: graph = GraphMiasmIR(ircfg, title, None) graph.Show() return class IRAOutRegs(ira): def get_out_regs(self, block): regs_todo = super(IRAOutRegs, self).get_out_regs(block) out = {} for assignblk in block: for dst in assignblk: reg = self.ssa_var.get(dst, None) if reg is None: continue if reg in regs_todo: out[reg] = dst return set(viewvalues(out)) # Add dummy dependency to uncover out regs affectation for loc in ircfg.leaves(): irblock = ircfg.blocks.get(loc) if irblock is None: continue regs = {} for reg in ir_arch.get_out_regs(irblock): regs[reg] = reg assignblks = list(irblock) new_assiblk = AssignBlock(regs, assignblks[-1].instr) assignblks.append(new_assiblk) new_irblock = IRBlock(irblock.loc_key, assignblks) ircfg.blocks[loc] = new_irblock class CustomIRCFGSimplifierSSA(IRCFGSimplifierSSA): def do_simplify(self, ssa, head): modified = super(CustomIRCFGSimplifierSSA, self).do_simplify(ssa, head) if loadint: modified |= load_from_int(ssa.graph, bs, is_addr_ro_variable) return modified def simplify(self, ircfg, head): ssa = self.ircfg_to_ssa(ircfg, head) ssa = self.do_simplify_loop(ssa, head) if type_graph == TYPE_GRAPH_IRSSA: ret = ssa.graph elif type_graph == TYPE_GRAPH_IRSSAUNSSA: ircfg = self.ssa_to_unssa(ssa, head) ircfg_simplifier = IRCFGSimplifierCommon(self.ir_arch) ircfg_simplifier.simplify(ircfg, head) ret = ircfg else: raise ValueError("Unknown option") return ret head = list(entry_points)[0] simplifier = CustomIRCFGSimplifierSSA(ir_arch) ircfg = simplifier.simplify(ircfg, head) open('final.dot', 'w').write(ircfg.dot()) graph = GraphMiasmIR(ircfg, title, None) graph.Show()
"Naive Simplification: a + a + a == a * 3" # Match the expected form ## isinstance(expr, m2_expr.ExprOp) is not needed: simplifications are ## attached to expression types if expr.op == "+" and \ len(expr.args) == 3 and \ expr.args.count(expr.args[0]) == len(expr.args): # Effective simplification return m2_expr.ExprOp("*", expr.args[0], m2_expr.ExprInt(3, expr.args[0].size)) else: # Do not simplify return expr a = m2_expr.ExprId('a', 32) base_expr = a + a + a print("Without adding the simplification:") print("\t%s = %s" % (base_expr, expr_simp(base_expr))) # Enable pass expr_simp.enable_passes({m2_expr.ExprOp: [simp_add_mul]}) print("After adding the simplification:") print("\t%s = %s" % (base_expr, expr_simp(base_expr))) # Automatic fail assert(expr_simp(base_expr) == m2_expr.ExprOp("*", a, m2_expr.ExprInt(3, a.size)))
def get_graph(self): simplify = self.simplify dontmodstack = self.dontmodstack loadmemint = self.loadmemint type_graph = self.type_graph bin_str = "" for s in self.data.segments: bin_str += self.data.read(s.start, len(s)) # add padding between each segment if s.end != self.data.end: bin_str += '\x00' * (((s.end | 0xfff) + 1) - s.end) bs = bin_stream_str(input_str=bin_str, base_address=self.data.start) machine = Machine(archs[self.data.arch.name]) mdis = machine.dis_engine(bs) asmcfg = mdis.dis_multiblock(self.function.start) entry_points = set( [mdis.loc_db.get_offset_location(self.function.start)]) class IRADelModCallStack(machine.ira): def call_effects(self, addr, instr): assignblks, extra = super(IRADelModCallStack, self).call_effects(addr, instr) if not dontmodstack: return assignblks, extra out = [] for assignblk in assignblks: dct = dict(assignblk) dct = { dst: src for (dst, src) in viewitems(dct) if dst != self.sp } out.append(AssignBlock(dct, assignblk.instr)) return out, extra ir_arch = IRADelModCallStack(mdis.loc_db) ircfg = ir_arch.new_ircfg_from_asmcfg(asmcfg) for irb in list(viewvalues(ircfg.blocks)): irs = [] for assignblk in irb: new_assignblk = { expr_simp(dst): expr_simp(src) for dst, src in viewitems(assignblk) } irs.append(AssignBlock(new_assignblk, instr=assignblk.instr)) ircfg.blocks[irb.loc_key] = IRBlock(irb.loc_key, irs) head = list(entry_points)[0] if simplify: ircfg_simplifier = IRCFGSimplifierCommon(ir_arch) ircfg_simplifier.simplify(ircfg, head) if type_graph == TYPE_GRAPH_IR: return MiasmIRGraph(self.add_names(ircfg)) class IRAOutRegs(machine.ira): def get_out_regs(self, block): regs_todo = super(IRAOutRegs, self).get_out_regs(block) out = {} for assignblk in block: for dst in assignblk: reg = self.ssa_var.get(dst, None) if reg is None: continue if reg in regs_todo: out[reg] = dst return set(viewvalues(out)) # Add dummy dependency to uncover out regs affectation for loc in ircfg.leaves(): irblock = ircfg.blocks.get(loc) if irblock is None: continue regs = {} for reg in ir_arch.get_out_regs(irblock): regs[reg] = reg assignblks = list(irblock) new_assiblk = AssignBlock(regs, assignblks[-1].instr) assignblks.append(new_assiblk) new_irblock = IRBlock(irblock.loc_key, assignblks) ircfg.blocks[loc] = new_irblock class CustomIRCFGSimplifierSSA(IRCFGSimplifierSSA): def do_simplify(self, ssa, head): modified = super(CustomIRCFGSimplifierSSA, self).do_simplify(ssa, head) if loadmemint: modified |= load_from_int(ssa.graph, bs, is_addr_ro_variable) return modified def simplify(self, ircfg, head): ssa = self.ircfg_to_ssa(ircfg, head) ssa = self.do_simplify_loop(ssa, head) if type_graph == TYPE_GRAPH_IRSSA: ret = ssa.graph elif type_graph == TYPE_GRAPH_IRSSAUNSSA: ircfg = self.ssa_to_unssa(ssa, head) ircfg_simplifier = IRCFGSimplifierCommon(self.ir_arch) ircfg_simplifier.simplify(ircfg, head) ret = ircfg else: raise ValueError("Unknown option") return ret # dirty patch to synchronize nodes and blocks lists in ircfg nodes_to_del = [ node for node in ircfg.nodes() if not node in ircfg.blocks ] for node in nodes_to_del: ircfg.del_node(node) head = list(entry_points)[0] simplifier = CustomIRCFGSimplifierSSA(ir_arch) ircfg = simplifier.simplify(ircfg, head) return MiasmIRGraph(self.add_names(ircfg))
def launch_depgraph(): global graphs, comments, sol_nb, settings, addr, ir_arch, ircfg # Get the current function addr = idc.ScreenEA() func = ida_funcs.get_func(addr) # Init machine = guess_machine(addr=func.startEA) mn, dis_engine, ira = machine.mn, machine.dis_engine, machine.ira bs = bin_stream_ida() mdis = dis_engine(bs, dont_dis_nulstart_bloc=True) ir_arch = ira(mdis.loc_db) # Populate symbols with ida names for ad, name in idautils.Names(): if name is None: continue mdis.loc_db.add_location(name, ad) asmcfg = mdis.dis_multiblock(func.startEA) # Generate IR ircfg = ir_arch.new_ircfg_from_asmcfg(asmcfg) # Get settings settings = depGraphSettingsForm(ir_arch, ircfg) settings.Execute() loc_key, elements, line_nb = settings.loc_key, settings.elements, settings.line_nb # Simplify assignments for irb in list(viewvalues(ircfg.blocks)): irs = [] offset = ir_arch.loc_db.get_location_offset(irb.loc_key) fix_stack = offset is not None and settings.unalias_stack for assignblk in irb: if fix_stack: stk_high = m2_expr.ExprInt(idc.GetSpd(assignblk.instr.offset), ir_arch.sp.size) fix_dct = {ir_arch.sp: mn.regs.regs_init[ir_arch.sp] + stk_high} new_assignblk = {} for dst, src in viewitems(assignblk): if fix_stack: src = src.replace_expr(fix_dct) if dst != ir_arch.sp: dst = dst.replace_expr(fix_dct) dst, src = expr_simp(dst), expr_simp(src) new_assignblk[dst] = src irs.append(AssignBlock(new_assignblk, instr=assignblk.instr)) ircfg.blocks[irb.loc_key] = IRBlock(irb.loc_key, irs) # Get dependency graphs dg = settings.depgraph graphs = dg.get(loc_key, elements, line_nb, set([ir_arch.loc_db.get_offset_location(func.startEA)])) # Display the result comments = {} sol_nb = 0 # Register and launch ida_kernwin.add_hotkey("Shift-N", next_element) treat_element()
def test(left, right): """Launch tests on left OP right""" global size, mask for left_i in left: left_i = ModularIntervals(size, left_i) left_values = list(interval_elements(left_i)) # Check operations without other arguments ## Check NEG result = -left_i for x in left_values: rez = (-x) & mask assert rez in result # Check operations on intervals for right_i in right: right_i = ModularIntervals(size, right_i) right_values = list(interval_elements(right_i)) # Check operations available only on integer if len(right_values) == 1: # Check mod value = right_values[0] # Avoid division by zero if value != 0: result = left_i % value for x in left_values: rez = (x % value) & mask assert rez in result # Check ADD result = left_i + right_i for x in left_values: for y in right_values: rez = (x + y) & mask assert rez in result # Check OR result = left_i | right_i for x in left_values: for y in right_values: rez = (x | y) & mask assert rez in result # Check AND result = left_i & right_i for x in left_values: for y in right_values: rez = (x & y) & mask assert rez in result # Check XOR result = left_i ^ right_i for x in left_values: for y in right_values: rez = (x ^ y) & mask assert rez in result # Check MUL result = left_i * right_i for x in left_values: for y in right_values: rez = (x * y) & mask assert rez in result # Check >> result = left_i >> right_i for x in left_values: for y in right_values: rez = (x >> y) & mask assert rez in result # Check << result = left_i << right_i for x in left_values: for y in right_values: rez = (x << y) & mask assert rez in result # Check a>> result = left_i.arithmetic_shift_right(right_i) for x in left_values: x = ExprInt(x, size) for y in right_values: y = ExprInt(y, size) rez = int(expr_simp(ExprOp('a>>', x, y))) assert rez in result # Check >>> result = left_i.rotation_right(right_i) for x in left_values: x = ExprInt(x, size) for y in right_values: y = ExprInt(y, size) rez = int(expr_simp(ExprOp('>>>', x, y))) assert rez in result # Check <<< result = left_i.rotation_left(right_i) for x in left_values: x = ExprInt(x, size) for y in right_values: y = ExprInt(y, size) rez = int(expr_simp(ExprOp('<<<', x, y))) assert rez in result
# Match the expected form ## isinstance(expr, m2_expr.ExprOp) is not needed: simplifications are ## attached to expression types if expr.op == "+" and \ len(expr.args) == 3 and \ expr.args.count(expr.args[0]) == len(expr.args): # Effective simplification return m2_expr.ExprOp("*", expr.args[0], m2_expr.ExprInt(3, expr.args[0].size)) else: # Do not simplify return expr a = m2_expr.ExprId('a', 32) base_expr = a + a + a print("Without adding the simplification:") print("\t%s = %s" % (base_expr, expr_simp(base_expr))) # Enable pass expr_simp.enable_passes({m2_expr.ExprOp: [simp_add_mul]}) print("After adding the simplification:") print("\t%s = %s" % (base_expr, expr_simp(base_expr))) # Automatic fail assert (expr_simp(base_expr) == m2_expr.ExprOp("*", a, m2_expr.ExprInt(3, a.size)))
def fromstring(cls, text, loc_db, mode = None): global total_scans name = re.search('(\S+)', text).groups() if not name: raise ValueError('cannot find name', text) name = name[0] if not name in cls.all_mn_name: raise ValueError('unknown name', name) clist = [x for x in cls.all_mn_name[name]] out = [] out_args = [] parsers = defaultdict(dict) for cc in clist: for c in cls.get_cls_instance(cc, mode): args_expr = [] args_str = text[len(name):].strip(' ') start = 0 cannot_parse = False len_o = len(args_str) for i, f in enumerate(c.args): start_i = len_o - len(args_str) if type(f.parser) == tuple: parser = f.parser else: parser = (f.parser,) for p in parser: if p in parsers[(i, start_i)]: continue try: total_scans += 1 v, start, stop = next(p.scanString(args_str)) except StopIteration: v, start, stop = [None], None, None if start != 0: v, start, stop = [None], None, None if v != [None]: v = f.asm_ast_to_expr(v[0], loc_db) if v is None: v, start, stop = [None], None, None parsers[(i, start_i)][p] = v, start, stop start, stop = f.fromstring(args_str, loc_db, parsers[(i, start_i)]) if start != 0: log.debug("cannot fromstring %r", args_str) cannot_parse = True break if f.expr is None: raise NotImplementedError('not fully functional') f.expr = expr_simp(f.expr) args_expr.append(f.expr) args_str = args_str[stop:].strip(' ') if args_str.startswith(','): args_str = args_str[1:] args_str = args_str.strip(' ') if args_str: cannot_parse = True if cannot_parse: continue out.append(c) out_args.append(args_expr) break if len(out) == 0: raise ValueError('cannot fromstring %r' % text) if len(out) != 1: log.debug('fromstring multiple args ret default') c = out[0] c_args = out_args[0] instr = cls.instruction(c.name, mode, c_args, additional_info=c.additional_info()) return instr
for c_str, ctype in mychandler.expr_to_c_and_types(expr): print(c_str, ctype) computed.add((str(ctype), c_str)) assert computed == result for out_type, out_str in computed: parsed_expr = mychandler.c_to_expr(out_str) parsed_type = mychandler.c_to_type(out_str) print("Access expr:", parsed_expr) print("Access type:", parsed_type) ast = parse_access(out_str) access_c = ast_get_c_access_expr(ast, c_context) print("Generated access:", access_c) parsed_expr_bis, parsed_type_bis = mychandler.exprc2expr.get_expr(access_c, c_context) assert parsed_expr_bis is not None assert parsed_expr == parsed_expr_bis assert parsed_type == parsed_type_bis parsed_expr_3, parsed_type_3 = mychandler.c_to_expr_and_type(out_str) assert parsed_expr_3 is not None assert parsed_expr == parsed_expr_3 assert parsed_type == parsed_type_3 expr_new1 = expr_simp(parsed_expr) expr_new2 = expr_simp(expr) print("\t", expr_new1) assert expr_new1 == expr_new2
def test(left, right): """Launch tests on left OP right""" global size, mask for left_i in left: left_i = ModularIntervals(size, left_i) left_values = list(interval_elements(left_i)) # Check operations without other arguments ## Check NEG result = - left_i for x in left_values: rez = (- x) & mask assert rez in result # Check operations on intervals for right_i in right: right_i = ModularIntervals(size, right_i) right_values = list(interval_elements(right_i)) # Check operations available only on integer if len(right_values) == 1: # Check mod value = right_values[0] # Avoid division by zero if value != 0: result = left_i % value for x in left_values: rez = (x % value) & mask assert rez in result # Check ADD result = left_i + right_i for x in left_values: for y in right_values: rez = (x + y) & mask assert rez in result # Check OR result = left_i | right_i for x in left_values: for y in right_values: rez = (x | y) & mask assert rez in result # Check AND result = left_i & right_i for x in left_values: for y in right_values: rez = (x & y) & mask assert rez in result # Check XOR result = left_i ^ right_i for x in left_values: for y in right_values: rez = (x ^ y) & mask assert rez in result # Check MUL result = left_i * right_i for x in left_values: for y in right_values: rez = (x * y) & mask assert rez in result # Check >> result = left_i >> right_i for x in left_values: for y in right_values: rez = (x >> y) & mask assert rez in result # Check << result = left_i << right_i for x in left_values: for y in right_values: rez = (x << y) & mask assert rez in result # Check a>> result = left_i.arithmetic_shift_right(right_i) for x in left_values: x = ExprInt(x, size) for y in right_values: y = ExprInt(y, size) rez = int(expr_simp(ExprOp('a>>', x, y))) assert rez in result # Check >>> result = left_i.rotation_right(right_i) for x in left_values: x = ExprInt(x, size) for y in right_values: y = ExprInt(y, size) rez = int(expr_simp(ExprOp('>>>', x, y))) assert rez in result # Check <<< result = left_i.rotation_left(right_i) for x in left_values: x = ExprInt(x, size) for y in right_values: y = ExprInt(y, size) rez = int(expr_simp(ExprOp('<<<', x, y))) assert rez in result
ircfg = lifter.new_ircfg() first_block = list(asmcfg.blocks)[0] lifter.add_asmblock_to_ircfg(first_block, ircfg) # --- Symbolic execution --- # from miasm.ir.symbexec import SymbolicExecutionEngine from miasm.expression.expression import * symb = SymbolicExecutionEngine(lifter, machine.mn.regs.regs_init) # irDst contains the offset of next IR basic block to execute irDst = symb.run_at(ircfg, entry_addr, step=False) print("IR Dest = ", irDst) # Provide symbolic context to irDst expr_flag = ExprId("flag", 32) result = symb.eval_expr( expr_simp( irDst.replace_expr( { expr_simp( ExprMem(machine.mn.regs.EBP_init - ExprInt(0x4, 32), 32)): expr_flag, }))) print("IR Dest Semantics = ", result) # Dump the final state of symbolic execution # symb.dump()
for c_str, ctype in mychandler.expr_to_c_and_types(expr): print(c_str, ctype) computed.add((str(ctype), c_str)) assert computed == result for out_type, out_str in computed: parsed_expr = mychandler.c_to_expr(out_str) parsed_type = mychandler.c_to_type(out_str) print("Access expr:", parsed_expr) print("Access type:", parsed_type) ast = parse_access(out_str) access_c = ast_get_c_access_expr(ast, c_context) print("Generated access:", access_c) parsed_expr_bis, parsed_type_bis = mychandler.exprc2expr.get_expr( access_c, c_context) assert parsed_expr_bis is not None assert parsed_expr == parsed_expr_bis assert parsed_type == parsed_type_bis parsed_expr_3, parsed_type_3 = mychandler.c_to_expr_and_type(out_str) assert parsed_expr_3 is not None assert parsed_expr == parsed_expr_3 assert parsed_type == parsed_type_3 expr_new1 = expr_simp(parsed_expr) expr_new2 = expr_simp(expr) print("\t", expr_new1) assert expr_new1 == expr_new2
def fromstring(cls, text, loc_db, mode = None): global total_scans name = re.search('(\S+)', text).groups() if not name: raise ValueError('cannot find name', text) name = name[0] if not name in cls.all_mn_name: raise ValueError('unknown name', name) clist = [x for x in cls.all_mn_name[name]] out = [] out_args = [] parsers = defaultdict(dict) for cc in clist: for c in cls.get_cls_instance(cc, mode): args_expr = [] args_str = text[len(name):].strip(' ') start = 0 cannot_parse = False len_o = len(args_str) for i, f in enumerate(c.args): start_i = len_o - len(args_str) if type(f.parser) == tuple: parser = f.parser else: parser = (f.parser,) for p in parser: if p in parsers[(i, start_i)]: continue try: total_scans += 1 v, start, stop = next(p.scanString(args_str)) except StopIteration: v, start, stop = [None], None, None if start != 0: v, start, stop = [None], None, None if v != [None]: v = f.asm_ast_to_expr(v[0], loc_db) if v is None: v, start, stop = [None], None, None parsers[(i, start_i)][p] = v, start, stop start, stop = f.fromstring(args_str, loc_db, parsers[(i, start_i)]) if start != 0: log.debug("cannot fromstring %r", args_str) cannot_parse = True break if f.expr is None: raise NotImplementedError('not fully functional') f.expr = expr_simp(f.expr) args_expr.append(f.expr) args_str = args_str[stop:].strip(' ') if args_str.startswith(','): args_str = args_str[1:] args_str = args_str.strip(' ') if args_str: cannot_parse = True if cannot_parse: continue out.append(c) out_args.append(args_expr) break if len(out) == 0: raise ValueError('cannot fromstring %r' % text) if len(out) != 1: log.debug('fromstring multiple args ret default') c = out[0] c_args = out_args[0] instr = cls.instruction(c.name, mode, c_args, additional_info=c.additional_info()) return instr
# greater lesser, neg (b1, expr_is_signed_greater, int_1, int_m1), (b1, expr_is_signed_lower, int_m1, int_1), (b0, expr_is_signed_greater, int_m1, int_1), (b0, expr_is_signed_lower, int_1, int_m1), (b1, expr_is_signed_greater_or_equal, int_1, int_m1), (b1, expr_is_signed_lower_or_equal, int_m1, int_1), (b0, expr_is_signed_greater_or_equal, int_m1, int_1), (b0, expr_is_signed_lower_or_equal, int_1, int_m1), (b1, expr_is_signed_greater_or_equal, int_m1, int_m1), (b1, expr_is_signed_lower_or_equal, int_m1, int_m1), (b1, expr_is_signed_greater, int_m1, int_m2), (b1, expr_is_signed_lower, int_m2, int_m1), (b0, expr_is_signed_greater, int_m2, int_m1), (b0, expr_is_signed_lower, int_m1, int_m2), (b1, expr_is_signed_greater_or_equal, int_m1, int_m2), (b1, expr_is_signed_lower_or_equal, int_m2, int_m1), (b0, expr_is_signed_greater_or_equal, int_m2, int_m1), (b0, expr_is_signed_lower_or_equal, int_m1, int_m2), # eq/neq (b1, expr_is_equal, int_1, int_1), (b1, expr_is_not_equal, int_0, int_1), (b0, expr_is_equal, int_1, int_0), (b0, expr_is_not_equal, int_0, int_0), ] for result, func, arg1, arg2 in tests: assert result == expr_simp(func(arg1, arg2))