def __init__(self, expr): "@expr: Expr instance" # Init self.languages = list(Translator.available_languages()) self.expr = expr # Initial translation text = Translator.to_language(self.languages[0]).from_expr(self.expr) # Create the Form idaapi.Form.__init__( self, r"""STARTITEM 0 Python Expression {FormChangeCb} <Language:{cbLanguage}> <Translation:{result}> """, { 'result': idaapi.Form.MultiLineTextControl(text=text, flags=translatorForm.flags), 'cbLanguage': idaapi.Form.DropdownListControl( items=self.languages, readonly=True, selval=0), 'FormChangeCb': idaapi.Form.FormChangeCb(self.OnFormChange), })
def __init__(self, expr): "@expr: Expr instance" # Init self.languages = list(Translator.available_languages()) self.expr = expr # Initial translation text = Translator.to_language(self.languages[0]).from_expr(self.expr) # Create the Form idaapi.Form.__init__(self, r"""STARTITEM 0 Python Expression {FormChangeCb} <Language:{cbLanguage}> <Translation:{result}> """, { 'result': idaapi.Form.MultiLineTextControl(text=text, flags=translatorForm.flags), 'cbLanguage': idaapi.Form.DropdownListControl( items=self.languages, readonly=True, selval=0), 'FormChangeCb': idaapi.Form.FormChangeCb(self.OnFormChange), })
def test_ExprOp_toC(self): from miasm.expression.expression import ExprInt, ExprOp from miasm.ir.translators.C import Translator args = [ExprInt(i, 32) for i in range(9)] translator = Translator.to_language("C") # Unary operators self.translationTest( ExprOp('parity', *args[:1]), r'parity(0x0&0xffffffff)') self.translationTest( ExprOp('!', *args[:1]), r'(~ 0x0)&0xffffffff') self.translationTest( ExprOp('hex2bcd', *args[:1]), r'hex2bcd_32(0x0)') self.translationTest(ExprOp('fabs', *args[:1]), r'fabs(0x0)') self.assertRaises(NotImplementedError, translator.from_expr, ExprOp('X', *args[:1])) # Binary operators self.translationTest( ExprOp(TOK_EQUAL, *args[:2]), r'(((0x0&0xffffffff) == (0x1&0xffffffff))?1:0)') self.translationTest( ExprOp('%', *args[:2]), r'(((0x0&0xffffffff)%(0x1&0xffffffff))&0xffffffff)') self.translationTest( ExprOp('-', *args[:2]), r'(((0x0&0xffffffff) - (0x1&0xffffffff))&0xffffffff)') self.translationTest( ExprOp('cntleadzeros', *args[:1]), r'cntleadzeros(0x0, 0x20)') self.translationTest( ExprOp('x86_cpuid', *args[:2]), r'x86_cpuid(0x0, 0x1)') self.translationTest( ExprOp('fcom0', *args[:2]), r'fcom0(0x0, 0x1)') self.translationTest( ExprOp('fadd', *args[:2]), r'fadd(0x0, 0x1)') self.translationTest( ExprOp('segm', *args[:2]), r'segm2addr(jitcpu, 0x0, 0x1)') self.translationTest( ExprOp('imod', *args[:2]), r'imod32((struct vm_cpu*)jitcpu->cpu, 0x0, 0x1)') self.translationTest( ExprOp('bcdadd', *args[:2]), r'bcdadd_32(0x0, 0x1)') self.assertRaises(NotImplementedError, translator.from_expr, ExprOp('X', *args[:2])) # Other cases self.translationTest( ExprOp('+', *args[:3]), r'(((0x0&0xffffffff)+(0x1&0xffffffff)+(0x2&0xffffffff))&0xffffffff)') self.assertRaises(NotImplementedError, translator.from_expr, ExprOp('X', *args[:3]))
def test_ExprOp_toC(self): from miasm.expression.expression import ExprInt, ExprOp from miasm.ir.translators.C import Translator args = [ExprInt(i, 32) for i in range(9)] translator = Translator.to_language("C") # Unary operators self.translationTest( ExprOp('parity', *args[:1]), r'parity(0x0&0xffffffff)') self.translationTest( ExprOp('!', *args[:1]), r'(~ 0x0)&0xffffffff') self.translationTest( ExprOp('hex2bcd', *args[:1]), r'hex2bcd_32(0x0)') self.translationTest(ExprOp('fabs', *args[:1]), r'fabs(0x0)') self.assertRaises(NotImplementedError, translator.from_expr, ExprOp('X', *args[:1])) # Binary operators self.translationTest( ExprOp(TOK_EQUAL, *args[:2]), r'(((0x0&0xffffffff) == (0x1&0xffffffff))?1:0)') self.translationTest( ExprOp('%', *args[:2]), r'(((0x0&0xffffffff)%(0x1&0xffffffff))&0xffffffff)') self.translationTest( ExprOp('-', *args[:2]), r'(((0x0&0xffffffff) - (0x1&0xffffffff))&0xffffffff)') self.translationTest( ExprOp('cntleadzeros', *args[:1]), r'cntleadzeros(0x0, 0x20)') self.translationTest( ExprOp('x86_cpuid', *args[:2]), r'x86_cpuid(0x0, 0x1)') self.translationTest( ExprOp('fcom0', *args[:2]), r'fcom0(0x0, 0x1)') self.translationTest( ExprOp('fadd', *args[:2]), r'fadd(0x0, 0x1)') self.translationTest( ExprOp('segm', *args[:2]), r'segm2addr(jitcpu, 0x0, 0x1)') self.translationTest( ExprOp('imod', *args[:2]), r'imod32((vm_cpu_t*)jitcpu->cpu, 0x0, 0x1)') self.translationTest( ExprOp('bcdadd', *args[:2]), r'bcdadd_32(0x0, 0x1)') self.assertRaises(NotImplementedError, translator.from_expr, ExprOp('X', *args[:2])) # Other cases self.translationTest( ExprOp('+', *args[:3]), r'(((0x0&0xffffffff)+(0x1&0xffffffff)+(0x2&0xffffffff))&0xffffffff)') self.assertRaises(NotImplementedError, translator.from_expr, ExprOp('X', *args[:3]))
def OnFormChange(self, fid): if fid == self.cbLanguage.id: # Display the Field (may be hide) self.ShowField(self.result, True) # Translate the expression dest_lang = self.languages[self.GetControlValue(self.cbLanguage)] try: text = Translator.to_language(dest_lang).from_expr(self.expr) except Exception as error: self.ShowField(self.result, False) return -1 # Update the form self.SetControlValue(self.result, idaapi.textctrl_info_t(text=str(text), flags=translatorForm.flags)) return 1
def OnFormChange(self, fid): if fid == self.cbLanguage.id: # Display the Field (may be hide) self.ShowField(self.result, True) # Translate the expression dest_lang = self.languages[self.GetControlValue(self.cbLanguage)] try: text = Translator.to_language(dest_lang).from_expr(self.expr) except Exception as error: self.ShowField(self.result, False) return -1 # Update the form self.SetControlValue( self.result, idaapi.textctrl_info_t(text=str(text), flags=translatorForm.flags)) return 1
def __init__(self, machine, produce_solution=PRODUCE_SOLUTION_CODE_COV, known_solutions=None, **kwargs): """Init a DSEPathConstraint @machine: Machine of the targeted architecture instance @produce_solution: (optional) if set, new solutions will be computed""" super(DSEPathConstraint, self).__init__(machine, **kwargs) # Dependency check assert z3 is not None # Init PathConstraint specifics structures self.cur_solver = z3.Solver() self.new_solutions = {} # solution identifier -> solution's model self._known_solutions = set() # set of solution identifiers self.z3_trans = Translator.to_language("z3") self._produce_solution_strategy = produce_solution self._previous_addr = None self._history = None if produce_solution == self.PRODUCE_SOLUTION_PATH_COV: self._history = [] # List of addresses in the current path
def emul(self, lifter, ctx=None, step=False): # Init ctx_init = {} if ctx is not None: ctx_init.update(ctx) solver = z3.Solver() symb_exec = SymbolicExecutionEngine(lifter, ctx_init) history = self.history[::-1] history_size = len(history) translator = Translator.to_language("z3") size = self._ircfg.IRDst.size for hist_nb, loc_key in enumerate(history, 1): if hist_nb == history_size and loc_key == self.initial_state.loc_key: line_nb = self.initial_state.line_nb else: line_nb = None irb = self.irblock_slice(self._ircfg.blocks[loc_key], line_nb) # Emul the block and get back destination dst = symb_exec.eval_updt_irblock(irb, step=step) # Add constraint if hist_nb < history_size: next_loc_key = history[hist_nb] expected = symb_exec.eval_expr(ExprLoc(next_loc_key, size)) solver.add( self._gen_path_constraints(translator, dst, expected)) # Save the solver self._solver = solver # Return only inputs values (others could be wrongs) return { element: symb_exec.eval_expr(element) for element in self.inputs }
action="store_true", help="Enable check against z3") parser.add_argument("-v", "--verbose", action="store_true", help="Verbose simplify") args = parser.parse_args() if args.verbose: log_exprsimp.setLevel(logging.DEBUG) # Additional imports and definitions if args.z3: import z3 from miasm.ir.translators import Translator trans = Translator.to_language("z3") def check(expr_in, expr_out): """Check that expr_in is always equals to expr_out""" print("Ensure %s = %s" % (expr_in, expr_out)) solver = z3.Solver() solver.add(trans.from_expr(expr_in) != trans.from_expr(expr_out)) result = solver.check() if result != z3.unsat: print("ERROR: a counter-example has been founded:") model = solver.model() print(model) print("Reinjecting in the simplifier:")
def _process_jmp_table(self, cur_bloc, mn, attrib, loc_db, pool_bin, offsets_to_dis): # TODO add support for jump tables with "AND cntrl_var, range" boundary check; such jmp tables were present only # in library functions in Stantinko samples # add current block to the asmcfg to make it accessible in the ircfg edges, add_block is called anyway right # after this callback, it will notice that the block has been already added self.add_block(cur_bloc) dst_address = loc_db.get_location_offset(cur_bloc.loc_key) logger.info("Possible jump table addr: 0x%x" % dst_address) ira = get_ira(mn, attrib) ir_arch = ira(loc_db) ircfg = ir_arch.new_ircfg_from_asmcfg(self) # the previous blocks should have exactly 1 predecessor dictating range predecessors = self.predecessors(cur_bloc.loc_key) if len(predecessors) != 1: logger.info("Expected exactly one predecessor") return predecessor = ircfg.blocks[predecessors.pop()] irdst_block = ircfg.blocks[cur_bloc.loc_key] if len(irdst_block.assignblks) != len(cur_bloc.lines): processed = set() todo = {irdst_block.loc_key} while not irdst_block.dst.is_mem(): loc_key = todo.pop() if loc_key in processed: continue processed.add(loc_key) irdst_block = ircfg.blocks[loc_key] todo.update(ircfg.successors(loc_key)) # we shouldn't stumble upon crashing segm and call operators even thought implicit is required to process # initial IRDst(mentioned operators cause crashes of the engine behind implicit) since we operate only on the # 2 crucial basic blocks. The predecessor contains range of the jump table, we use it to determine constructs # of the jump table and track back base code segment address assignment to target the msvc compiler and x64 # architecture, other compilers use directly RIP related addressing to get the address. # get real predecessor asm_block = self.loc_key_to_block(predecessor.loc_key) if len(predecessor.assignblks) != len(asm_block.lines): processed = set() todo = {predecessor.loc_key} while cur_bloc.loc_key not in ircfg.successors(predecessor.loc_key): loc_key = todo.pop() if loc_key in processed: continue processed.add(loc_key) predecessor = ircfg.blocks[loc_key] todo.update(ircfg.successors(loc_key)) # get jump_table_control_variable from predecessor dg = DependencyGraph(ircfg, implicit=True, apply_simp=True, follow_mem=True, follow_call=False) jtcdg = JTCVariableDependencyGraph(predecessor.loc_key, ircfg, implicit=True, apply_simp=True, follow_mem=False, follow_call=False) dependency_result_iter = iter(jtcdg.get(irdst_block.loc_key, {ircfg.IRDst}, len(predecessor.assignblks), {predecessor.loc_key})) solution_predecessor = next(dependency_result_iter) # jump table control variable jtc_var = jtcdg.jtc_var if not jtc_var: logger.info("couldn't determine single jump table control variable") return # get symbolic execution engine to be used in both predecessor and jmp table block symb_exec_both = MySymbolicExecutionEngine(pool_bin, jtc_var, ir_arch) try: # symbolically evaluate lines influencing IRDst of the predecessor leading to jtc_var for line_nb in sorted({node.line_nb for node in solution_predecessor.relevant_nodes if node.loc_key == predecessor.loc_key}): assign_blk = predecessor.assignblks[line_nb] symb_exec_both.eval_updt_assignblk(assign_blk) except (KeyError, TypeError): logger.error( "Couldn't symbolically eval predecessor of 0x%x" % loc_db.get_location_offset(cur_bloc.loc_key)) # stantinko contains illegal unreachable dereferences prior jmp tables, such as # xor eax, eax; movsx eax, byte ptr [eax] return # get symbolic execution engine supporting binary memory dereference symb_exec_minimal = MySymbolicExecutionEngine(pool_bin, ir_arch, symb_exec_both.symbols.copy()) predecessor_irdst_equation = symb_exec_both.symbols[ircfg.IRDst] # get equation whose solutions solve the indirect jump irdst_block = ircfg.blocks[cur_bloc.loc_key] if len(irdst_block.assignblks) != len(cur_bloc.lines): processed = set() todo = {irdst_block.loc_key} while not irdst_block.dst.is_mem(): symb_exec_both.eval_updt_irblock(irdst_block) loc_key = todo.pop() if loc_key in processed: continue processed.add(loc_key) irdst_block = ircfg.blocks[loc_key] todo.update(ircfg.successors(loc_key)) irdst_equation = symb_exec_both.eval_updt_irblock(irdst_block) sizes = set() # prevent mem processing via raw arrays by using var ID instead # we also want to set a maximum boundary so slices don't cause the sat solver generate a huge number of results visitor = ExprVisitorCallbackTopToBottom(lambda x: self._eliminate_jtc_var_slice_cb(x, sizes, jtc_var)) irdst_equation = visitor.visit(irdst_equation) predecessor_irdst_equation = visitor.visit(predecessor_irdst_equation) size_boundary = jtc_var.size sizes = sorted(filter(lambda x: x > 1, sizes)) if sizes: size_boundary = sizes[0] jtc_var_id = ExprId("jtc_var", jtc_var.size) irdst_equation = irdst_equation.replace_expr({jtc_var: jtc_var_id}) predecessor_irdst_equation = predecessor_irdst_equation.replace_expr({jtc_var: jtc_var_id}) # track possible CS base address dependency, ignore control variable from predecessor eliminated_jtc_var_equation = irdst_equation.replace_expr({jtc_var_id: ExprInt(0, jtc_var_id.size)}) evaluated_ejtc_var_equation = symb_exec_both.eval_expr(eliminated_jtc_var_equation) if not evaluated_ejtc_var_equation.is_int(): # we need to determine code base dependencies = dg._follow_apply_cb(evaluated_ejtc_var_equation) expr_deps = {fexpr.element for fexpr in dependencies if fexpr.follow} dg_base = DependencyGraph(ircfg, implicit=False, apply_simp=True, follow_mem=True, follow_call=False) dependency_result_iter = iter(dg_base.get(cur_bloc.loc_key, expr_deps, len(cur_bloc.lines), {self.heads()[0]})) solution = next(dependency_result_iter) code_base_dict = {expr: solution.emul(ir_arch)[expr] for expr in expr_deps} irdst_equation = irdst_equation.replace_expr(code_base_dict) predecessor_irdst_equation = predecessor_irdst_equation.replace_expr(code_base_dict) # we need backward slice of the jump table destination dependencies to retain the other independent assignments # during cmp chain assembling dependency_result = dg.get(cur_bloc.loc_key, {ircfg.IRDst}, len(cur_bloc.lines), {cur_bloc.loc_key}) dependent_line_nbs = {} for solution in dependency_result: dependent_line_nbs.setdefault(solution.loc_key, set()).update( {dn.line_nb for dn in solution.relevant_nodes}) cur_bloc_new_lines = [] for loc_key, lines in dependent_line_nbs.items(): for line_nb, assignblk in enumerate(ircfg.blocks[loc_key].assignblks): if line_nb not in lines: symb_exec_minimal.eval_assignblk(assignblk) cur_bloc_new_lines.append(assignblk.instr) comparison_reg_id = None comparison_reg_value = None if jtc_var not in symb_exec_minimal.symbols.symbols_id: comparison_reg_id = jtc_var comparison_reg_value = jtc_var else: for symbol, comparison_reg_value in symb_exec_minimal.symbols.symbols_id.items(): if jtc_var in comparison_reg_value and (symbol.is_mem() or (symbol.is_id() and symbol.name not in ["RIP", "EIP", "zf", "nf", "pf", "of", "cf", "af", "df", ircfg.IRDst.name])): replaced_jtcv = comparison_reg_value.replace_expr({jtc_var: ExprInt(0, jtc_var.size)}) if isinstance(symb_exec_minimal.eval_expr(replaced_jtcv), ExprInt): comparison_reg_id = symbol break if not comparison_reg_id or not comparison_reg_value: logger.debug("Couldn't find any candidate for comparison register at 0x%x" % loc_db.get_location_offset(cur_bloc.loc_key)) return from miasm.ir.translators import Translator import z3 translator = Translator.to_language("z3") solver = z3.Solver() logger.debug("predecessor_irdst_equation: %s" % str(predecessor_irdst_equation)) logger.debug(("dst_address: 0x%x" % dst_address)) logger.debug(("jump_table_control_variable: %s" % str(jtc_var))) solver.add(translator.from_expr(predecessor_irdst_equation) == dst_address) translated_jtc_var = translator.from_expr(jtc_var_id) solver.add(translated_jtc_var >= 0) solver.add(translated_jtc_var < 2 ** (size_boundary - 1) - 1) if solver.check() != z3.sat: logger.debug("Couldn't find at least one jump table control variable") return dbg_destinations = set() next_loc_key = new_block_loc_key = loc_db.add_location() logger.debug("comparison_reg_id: %s" % str(comparison_reg_id)) dst_ranges = {} counter = 0 while counter < 500: val = solver.model()[translated_jtc_var].as_long() final_irdst_equation = irdst_equation.replace_expr({jtc_var_id: ExprInt(val, jtc_var_id.size)}) final_dst = int(symb_exec_both.eval_expr(final_irdst_equation)) cmp_reg_val = comparison_reg_value.replace_expr({jtc_var: ExprInt(val, jtc_var.size)}) cmp_reg_val = int(symb_exec_minimal.eval_expr(cmp_reg_val)) dst_ranges[final_dst] = dst_ranges.get(final_dst, interval()).union([(cmp_reg_val, cmp_reg_val)]) dbg_destinations.add(final_dst) offsets_to_dis.add(final_dst) solver.add(translated_jtc_var != translator.from_expr(ExprInt(val, jtc_var_id.size))) if solver.check() != z3.sat: break counter += 1 if counter == 500: raise RuntimeError("Interrupted; there might be a broken slice") for dst, interv in dst_ranges.items(): cond_target_loc_key = loc_db.get_or_create_offset_location(dst) for lower, upper in interv: lower = ExprInt(lower, self.mode) upper = ExprInt(upper, self.mode) new_asm_block = AsmBlock(new_block_loc_key) new_block_loc_key = loc_db.add_location() if lower == upper: new_asm_block.lines = create_cmp_j_instructions(self.mode, comparison_reg_id, lower, ExprLoc(cond_target_loc_key, self.mode), "JZ") new_asm_block.add_cst(cond_target_loc_key, "c_to") new_asm_block.add_cst(new_block_loc_key, "c_next") else: upper_check_loc_key = loc_db.add_location() # lower boundary check new_asm_block.lines = create_cmp_j_instructions(self.mode, comparison_reg_id, lower, ExprLoc(new_block_loc_key, self.mode), "JB") new_asm_block.add_cst(new_block_loc_key, "c_to") new_asm_block.add_cst(upper_check_loc_key, "c_next") # upper boundary check upper_check_block = AsmBlock(upper_check_loc_key) upper_check_block.lines = create_cmp_j_instructions(self.mode, comparison_reg_id, upper, ExprLoc(cond_target_loc_key, self.mode), "JBE") upper_check_block.add_cst(cond_target_loc_key, "c_to") upper_check_block.add_cst(new_block_loc_key, "c_next") self.add_block(upper_check_block) self.add_block(new_asm_block) # trigger last jump unconditionally new_asm_block.bto = {AsmConstraintTo(cond_target_loc_key)} new_asm_block.lines = [create_jump_instruction(self.mode, ExprLoc(cond_target_loc_key, self.mode))] cur_bloc.lines = cur_bloc_new_lines cur_bloc.add_cst(next_loc_key, "c_next") if not cur_bloc.lines: cur_bloc.lines = [create_nop(self.mode)] self.jmp_table_loc_keys.add(cur_bloc.loc_key) logger.debug("destinations: %s" % pformat([hex(i or 0) for i in dbg_destinations])) logger.debug("blocks: %d" % counter)
from __future__ import print_function from miasm.expression.expression import * from miasm.analysis.expression_range import expr_range from miasm.ir.translators import Translator import z3 trans = Translator.to_language("z3") a = ExprId("a", 8) b = ExprId("b", 32) for expr in [ a, b, b[4:6], a + ExprInt(4, 8), ExprInt(5, 8) + ExprInt(4, 8), a.zeroExtend(32) + ExprInt(0x100, 32), (a.zeroExtend(32) * ExprInt(3, 32)) + ExprInt(0x100, 32), (a.zeroExtend(32) + ExprInt(0x80, 32)) * ExprInt(3, 32), ExprCond(b, a.zeroExtend(32) + ExprInt(0x100, 32), a.zeroExtend(32) + ExprInt(0x500, 32)), ExprCond(b[1:2], a.zeroExtend(32), a.zeroExtend(32) + ExprInt(0x1000, 32)) + \ ExprCond(b[0:1], a.zeroExtend(32) + ExprInt(0x5000, 32), a.zeroExtend(32) + ExprInt(0x10000, 32)), - a, - ExprInt(4, 8), b[:8].zeroExtend(16) - ExprInt(4, 16), a[4:6].zeroExtend(32) + ExprInt(-1, 32), a >> ExprInt(4, 8), a << ExprInt(4, 8), ExprOp("a>>", a, ExprInt(4, 8)), ExprInt(4, 8) >> a,
1: ["-"], 2: [ "<<", ">>", ], "2+": ["+", "*", "&", "|", "^"], } print("[+] Compute a random expression:") expr = ExprRandom_OpSubRange.get(depth=8) print("-> %s" % expr) print() target_exprs = { lang: Translator.to_language(lang).from_expr(expr) for lang in Translator.available_languages() } for target_lang, target_expr in viewitems(target_exprs): print("[+] Translate in %s:" % target_lang) print(target_expr) print() print("[+] Eval in Python:") def memory(addr, size): ret = random.randint(0, (1 << size) - 1) print("Memory access: @0x%x -> 0x%x" % (addr, ret)) return ret
random.seed(0) class ExprRandom_OpSubRange(ExprRandom): operations_by_args_number = {1: ["-"], 2: ["<<", ">>",], "2+": ["+", "*", "&", "|", "^"], } print("[+] Compute a random expression:") expr = ExprRandom_OpSubRange.get(depth=8) print("-> %s" % expr) print() target_exprs = {lang:Translator.to_language(lang).from_expr(expr) for lang in Translator.available_languages()} for target_lang, target_expr in viewitems(target_exprs): print("[+] Translate in %s:" % target_lang) print(target_expr) print() print("[+] Eval in Python:") def memory(addr, size): ret = random.randint(0, (1 << size) - 1) print("Memory access: @0x%x -> 0x%x" % (addr, ret)) return ret for expr_id in expr.get_r(mem_read=True): if isinstance(expr_id, ExprId): value = random.randint(0, (1 << expr_id.size) - 1)
def translationTest(self, expr, expected): from miasm.ir.translators import Translator translator = Translator.to_language("C") self.assertEqual(translator.from_expr(expr), expected)