def emul(self, ir_arch, ctx=None, step=False): """Symbolic execution of relevant nodes according to the history Return the values of inputs nodes' elements @ir_arch: IntermediateRepresentation instance @ctx: (optional) Initial context as dictionary @step: (optional) Verbose execution Warning: The emulation is not sound if the inputs nodes depend on loop variant. """ # Init ctx_init = {} if ctx is not None: ctx_init.update(ctx) assignblks = [] # Build a single assignment block according to history last_index = len(self.relevant_loc_keys) for index, loc_key in enumerate(reversed(self.relevant_loc_keys), 1): if index == last_index and loc_key == self.initial_state.loc_key: line_nb = self.initial_state.line_nb else: line_nb = None assignblks += self.irblock_slice(self._ircfg.blocks[loc_key], line_nb).assignblks # Eval the block loc_db = LocationDB() temp_loc = loc_db.get_or_create_name_location("Temp") symb_exec = SymbolicExecutionEngine(ir_arch, ctx_init) symb_exec.eval_updt_irblock(IRBlock(temp_loc, assignblks), step=step) # Return only inputs values (others could be wrongs) return {element: symb_exec.symbols[element] for element in self.inputs}
def exec_instruction(mn_str, init_values, results, index=0, offset=0): """Symbolically execute an instruction and check the expected results.""" # Assemble and disassemble the instruction instr = mn_mep.fromstring(mn_str, "b") instr.mode = "b" mn_bin = mn_mep.asm(instr)[index] try: instr = mn_mep.dis(mn_bin, "b") except Disasm_Exception: assert (False) # miasm don't know what to do # Specify the instruction offset and compute the destination label instr.offset = offset loc_db = LocationDB() if instr.dstflow(): instr.dstflow2label(loc_db) # Get the IR im = Lifter_MEPb(loc_db) iir, eiir = im.get_ir(instr) # Filter out IRDst iir = [ ir for ir in iir if not (isinstance(ir, ExprAssign) and isinstance(ir.dst, ExprId) and ir.dst.name == "IRDst") ] # Prepare symbolic execution sb = SymbolicExecutionEngine(LifterModelCallMepb(loc_db), regs_init) # Assign int values before symbolic evaluation for expr_id, expr_value in init_values: sb.symbols[expr_id] = expr_value # Execute the IR ab = AssignBlock(iir) sb.eval_updt_assignblk(ab) # Check if expected expr_id were modified matched_results = 0 for expr_id, expr_value in results: result = sb.eval_expr(expr_id) if isinstance(result, ExprLoc): addr = loc_db.get_location_offset(result.loc_key) if expr_value.arg == addr: matched_results += 1 continue elif result == expr_value: matched_results += 1 continue # Ensure that all expected results were verified if len(results) is not matched_results: print("Expected:", results) print("Modified:", [r for r in sb.modified(mems=False)]) assert (False)
def miasm_dis(r2_op, r2_address, r2_buffer, r2_length): """Disassemble an instruction using miasm.""" # Cast radare2 variables rasmop = ffi.cast("RAsmOp_r2m2*", r2_op) opcode = ffi.cast("char*", r2_buffer) # Prepare the opcode opcode = ffi.unpack(opcode, r2_length) # Get the miasm machine machine = miasm_machine() if machine is None: return # Disassemble the opcode loc_db = LocationDB() try: mode = machine.dis_engine().attrib instr = machine.mn().dis(opcode, mode) instr.offset = r2_address if instr.dstflow(): # Remember ExprInt arguments sizes args_size = list() for i in range(len(instr.args)): if isinstance(instr.args[i], ExprInt): args_size.append(instr.args[i].size) else: args_size.append(None) # Adjust arguments values using the instruction offset instr.dstflow2label(loc_db) # Convert ExprLoc to ExprInt for i in range(len(instr.args)): if args_size[i] is None: continue if isinstance(instr.args[i], ExprLoc): addr = loc_db.get_location_offset(instr.args[i].loc_key) instr.args[i] = ExprInt(addr, args_size[i]) dis_str = str(instr) dis_len = instr.l except Exception: dis_str = "/!\ Can't disassemble using miasm /!\\" dis_len = 2 # GV: seems fischy ! # Remaining bytes buf_hex = opcode[0:dis_len].encode("hex") # Check buffer sizes if len(dis_str) - 1 > 256: dis_str = "/!\ Disassembled instruction is too long /!\\" if len(buf_hex) - 1 > 256: buf_hex = buf_hex[:255] # Fill the RAsmOp structure rasmop.size = dis_len set_rbuf(rasmop.buf_asm, dis_str)
def __init__(self): self._loc_key_to_loop = {} self.loc_db = LocationDB() self.loops = [] # for blocks outside of any loop self._outside_of_scope = FlatteningLoop([], set(), {}, {}, self.loc_db.add_location()) self._outside_of_scope.is_default = True self._address = None
def exec_instruction(mn_str, init_values, results, index=0, offset=0): """Symbolically execute an instruction and check the expected results.""" # Assemble and disassemble the instruction instr = mn_mep.fromstring(mn_str, "b") instr.mode = "b" mn_bin = mn_mep.asm(instr)[index] try: instr = mn_mep.dis(mn_bin, "b") except Disasm_Exception: assert(False) # miasm don't know what to do # Specify the instruction offset and compute the destination label instr.offset = offset loc_db = LocationDB() if instr.dstflow(): instr.dstflow2label(loc_db) # Get the IR im = ir_mepb(loc_db) iir, eiir = im.get_ir(instr) # Filter out IRDst iir = [ir for ir in iir if not (isinstance(ir, ExprAssign) and isinstance(ir.dst, ExprId) and ir.dst.name == "IRDst")] # Prepare symbolic execution sb = SymbolicExecutionEngine(ir_a_mepb(loc_db), regs_init) # Assign int values before symbolic evaluation for expr_id, expr_value in init_values: sb.symbols[expr_id] = expr_value # Execute the IR ab = AssignBlock(iir) sb.eval_updt_assignblk(ab) # Check if expected expr_id were modified matched_results = 0 for expr_id, expr_value in results: result = sb.eval_expr(expr_id) if isinstance(result, ExprLoc): addr = loc_db.get_location_offset(result.loc_key) if expr_value.arg == addr: matched_results += 1 continue elif result == expr_value: matched_results += 1 continue # Ensure that all expected results were verified if len(results) is not matched_results: print("Expected:", results) print("Modified:", [r for r in sb.modified(mems=False)]) assert(False)
def __init__(self, jitter_engine): self.loc_db = LocationDB() self.machine = Machine(self.arch_name) jitter = self.machine.jitter self.myjit = jitter(self.loc_db, jitter_engine) self.myjit.init_stack() self.myjit.set_trace_log() self.dse = None self.assembly = None
def __init__(self, recognizer): self.ircfg = recognizer.ircfg self.asmcfg = recognizer.asmcfg self.flat_loops = recognizer.flat_loops self.all_affected_lines = recognizer.all_affected_lines self.ir_arch = recognizer.ir_arch loc_db = LocationDB() loc_db.merge(recognizer.asmcfg.loc_db) self.out_asmcfg = AsmCFG(loc_db) self.merging_var = recognizer.merging_var self.pad = recognizer.pad self.possible_merge_funcs = recognizer.possible_merge_funcs self.relevant_nodes = set()
def jit_instructions(mn_str): """JIT instructions and return the jitter object.""" # Get the miasm Machine machine = Machine("mepb") mn_mep = machine.mn() loc_db = LocationDB() # Assemble the instructions asm = b"" for instr_str in mn_str.split("\n"): instr = mn_mep.fromstring(instr_str, "b") instr.mode = "b" asm += mn_mep.asm(instr)[0] # Init the jitter and add the assembled instructions to memory jitter = machine.jitter(loc_db, jit_type="gcc") jitter.vm.add_memory_page(0, PAGE_READ | PAGE_WRITE, asm) # Set the breakpoint jitter.add_breakpoint(len(asm), lambda x: False) # Jit the instructions #jitter.init_stack() jitter.init_run(0) jitter.continue_run() return jitter
def ExecuteSymbolicSingleStep(addr, state=INIT_REG): size = idc.ItemSize(addr) code = idc.GetManyBytes(addr, size) loc_db = LocationDB() base = addr try: ins = mn_x86.dis(bin_stream_str(code, base_address=base), 64, base) except: return state.copy() ira = machine.ira(loc_db) ircfg = ira.new_ircfg() try: ira.add_instr_to_ircfg(ins, ircfg) sb = SymbolicExecutionEngine(ira, state) symbolic_pc = sb.run_at(ircfg, base) except: return state.copy() ret = state.copy() for key, value in sb.modified(): if isinstance(value, ExprOp) and value.op == "call_func_ret": value = ExprInt(0, 64) ret[key] = value return ret
def recoverAlgorithm(self): if self.normalIRCFG is None: self.getNormalIRCFG() newLocDB = LocationDB() size = BinaryAnalysis.disasmEngine.attrib newIRA = BinaryAnalysis.iraType(newLocDB) newIRCFG = newIRA.new_ircfg() numLockey = 0 head = LocKey(numLockey) todo = [(self.address, head, {}, None)] numLockey += 1 while todo: nextTarget, lockey, state, preBlock = todo.pop() nextTarget, state = self.symbolicExecution(self.normalIRA, self.normalIRCFG, nextTarget, state) if isinstance(nextTarget, ExprCond): newLockey1 = LocKey(numLockey) newLockey2 = LocKey(numLockey + 1) ir_dst = state[newIRCFG.IRDst] new_cond = ExprCond(ir_dst.cond, ExprLoc(newLockey1, size), ExprLoc(newLockey2, size)) state[newIRCFG.IRDst] = new_cond numLockey += 2 newIRBlock = self.addIRBlock(newIRCFG, state, lockey) state[newIRCFG.IRDst] = ir_dst todo.append((nextTarget.src1, newLockey1, state, newIRBlock)) todo.append((nextTarget.src2, newLockey2, state, newIRBlock)) else: self.addIRBlock(newIRCFG, state, lockey) return newLocDB, newIRCFG
def __init__(self, *args, **kwargs): sp = LocationDB() Jitter.__init__(self, ir_x86_16(sp), *args, **kwargs) self.vm.set_little_endian() self.ir_arch.do_stk_segm = False self.orig_irbloc_fix_regs_for_mode = self.ir_arch.irbloc_fix_regs_for_mode self.ir_arch.irbloc_fix_regs_for_mode = self.ir_archbloc_fix_regs_for_mode
def jit_mips32_binary(args): loc_db = LocationDB() filepath, entryp = args.binary, int(args.addr, 0) myjit = machine.jitter(loc_db, jit_type=args.jitter) myjit.init_stack() # Log level (if available with jitter engine) myjit.set_trace_log(trace_instr=args.trace, trace_regs=args.trace, trace_new_blocks=args.log_newbloc) myjit.vm.add_memory_page(0, PAGE_READ | PAGE_WRITE, open(filepath, 'rb').read()) myjit.add_breakpoint(0x1337BEEF, code_sentinelle) # for stack myjit.vm.add_memory_page(0xF000, PAGE_READ | PAGE_WRITE, b"\x00" * 0x1000) myjit.cpu.SP = 0xF800 myjit.cpu.RA = 0x1337BEEF myjit.init_run(entryp) # Handle debugging if args.debugging is True: dbg = debugging.Debugguer(myjit) cmd = debugging.DebugCmd(dbg) cmd.cmdloop() else: print(myjit.continue_run()) return myjit
def exec_instruction(hex_asm, init_values): """Symbolically execute an instruction""" print("Hex:", hex_asm) # Disassemble an instruction mn = mn_mep.dis(decode_hex(hex_asm), "b") print("Dis:", mn) loc_db = LocationDB() # Get the IR im = ir_mepb(loc_db) iir, eiir, = im.get_ir(mn) print("\nInternal representation:", iir) # Symbolic execution sb = SymbolicExecutionEngine(ir_a_mepb(loc_db), regs_init) # Assign register values before symbolic evaluation for reg_expr_id, reg_expr_value in init_values: sb.symbols[reg_expr_id] = reg_expr_value print("\nModified registers:", [reg for reg in sb.modified(mems=False)]) print("Modified memories:", [mem for mem in sb.modified()]) print("\nFinal registers:") sb.dump(mems=False) print("\nFinal mems:") sb.dump()
def test_ParseTxt(self): from miasm.arch.x86.arch import mn_x86 from miasm.core.parse_asm import parse_txt loc_db = LocationDB() ASM0 = ''' ; .LFB0: .LA: .text .data .bss .string .ustring .byte 0 0x0 .byte a .comm .split .dontsplit .file .cfi_0 label: JMP EAX ;comment ''' ASM1 = ''' .XXX ''' self.assertTrue(parse_txt(mn_x86, 32, ASM0, loc_db)) self.assertRaises(ValueError, parse_txt, mn_x86, 32, ASM1, loc_db)
def compute(asm, inputstate={}, debug=False): loc_db = LocationDB() sympool = dict(regs_init) sympool.update({k: ExprInt(v, k.size) for k, v in viewitems(inputstate)}) ir_tmp = ir_arch(loc_db) ircfg = ir_tmp.new_ircfg() symexec = SymbolicExecutionEngine(ir_tmp, sympool) instr = mn.fromstring(asm, loc_db, "b") code = mn.asm(instr)[0] instr = mn.dis(code, "b") instr.offset = inputstate.get(PC, 0) lbl = ir_tmp.add_instr_to_ircfg(instr, ircfg) symexec.run_at(ircfg, lbl) if debug: for k, v in viewitems(symexec.symbols): if regs_init.get(k, None) != v: print(k, v) out = {} for k, v in viewitems(symexec.symbols): if k in EXCLUDE_REGS: continue elif regs_init.get(k, None) == v: continue elif isinstance(v, ExprInt): out[k] = int(v) else: out[k] = v return out
def test_DirectiveDontSplit(self): from miasm.arch.x86.arch import mn_x86 from miasm.core.parse_asm import parse_txt from miasm.core.asmblock import asm_resolve_final loc_db = LocationDB() ASM0 = ''' lbl0: INC EAX JNZ lbl0 INC EAX JZ lbl2 lbl1: NOP JMP lbl0 .dontsplit lbl2: MOV EAX, ECX RET .dontsplit lbl3: ADD EAX, EBX .dontsplit lbl4: .align 0x10 .string "test" lbl5: .string "toto" ''' asmcfg = parse_txt(mn_x86, 32, ASM0, loc_db) patches = asm_resolve_final(mn_x86, asmcfg) lbls = [] for i in range(6): lbls.append(loc_db.get_name_location('lbl%d' % i)) # align test offset = loc_db.get_location_offset(lbls[5]) assert (offset % 0x10 == 0) lbl2block = {} for block in asmcfg.blocks: lbl2block[block.loc_key] = block # dontsplit test assert (lbls[2] == lbl2block[lbls[1]].get_next()) assert (lbls[3] == lbl2block[lbls[2]].get_next()) assert (lbls[4] == lbl2block[lbls[3]].get_next()) assert (lbls[5] == lbl2block[lbls[4]].get_next())
def resolve_args_with_symbols(self, symbols=None): if symbols is None: symbols = LocationDB() args_out = [] for expr in self.args: # try to resolve symbols using symbols (0 for default value) loc_keys = m2_expr.get_expr_locs(expr) fixed_expr = {} for exprloc in loc_keys: loc_key = exprloc.loc_key names = symbols.get_location_names(loc_key) # special symbols if b'$' in names: fixed_expr[exprloc] = self.get_asm_offset(exprloc) continue if b'_' in names: fixed_expr[exprloc] = self.get_asm_next_offset(exprloc) continue arg_int = symbols.get_location_offset(loc_key) if arg_int is not None: fixed_expr[exprloc] = m2_expr.ExprInt( arg_int, exprloc.size) continue if not names: raise ValueError('Unresolved symbol: %r' % exprloc) offset = symbols.get_location_offset(loc_key) if offset is None: raise ValueError( 'The offset of loc_key "%s" cannot be determined' % names) else: # Fix symbol with its offset size = exprloc.size if size is None: default_size = self.get_symbol_size(exprloc, symbols) size = default_size value = m2_expr.ExprInt(offset, size) fixed_expr[exprloc] = value expr = expr.replace_expr(fixed_expr) expr = expr_simp(expr) args_out.append(expr) return args_out
def resolve_args_with_symbols(self, symbols=None): if symbols is None: symbols = LocationDB() args_out = [] for expr in self.args: # try to resolve symbols using symbols (0 for default value) loc_keys = m2_expr.get_expr_locs(expr) fixed_expr = {} for exprloc in loc_keys: loc_key = exprloc.loc_key names = symbols.get_location_names(loc_key) # special symbols if b'$' in names: fixed_expr[exprloc] = self.get_asm_offset(exprloc) continue if b'_' in names: fixed_expr[exprloc] = self.get_asm_next_offset(exprloc) continue arg_int = symbols.get_location_offset(loc_key) if arg_int is not None: fixed_expr[exprloc] = m2_expr.ExprInt(arg_int, exprloc.size) continue if not names: raise ValueError('Unresolved symbol: %r' % exprloc) offset = symbols.get_location_offset(loc_key) if offset is None: raise ValueError( 'The offset of loc_key "%s" cannot be determined' % names ) else: # Fix symbol with its offset size = exprloc.size if size is None: default_size = self.get_symbol_size(exprloc, symbols) size = default_size value = m2_expr.ExprInt(offset, size) fixed_expr[exprloc] = value expr = expr.replace_expr(fixed_expr) expr = expr_simp(expr) args_out.append(expr) return args_out
def compute(Lifter, mode, asm, inputstate={}, debug=False): loc_db = LocationDB() instr = mn.fromstring(asm, loc_db, mode) code = mn.asm(instr)[0] instr = mn.dis(code, mode) instr.offset = inputstate.get(EIP, 0) lifter = Lifter(loc_db) ircfg = lifter.new_ircfg() lbl = lifter.add_instr_to_ircfg(instr, ircfg) return symb_exec(lbl, lifter, ircfg, inputstate, debug)
class Asm_Test(object): run_addr = 0x0 def __init__(self, jitter_engine): self.loc_db = LocationDB() self.myjit = Machine(self.arch_name).jitter(self.loc_db, jitter_engine) self.myjit.init_stack() def test_init(self): pass def prepare(self): pass def __call__(self): self.prepare() self.asm() self.init_machine() self.test_init() self.run() self.check() def run(self): self.myjit.init_run(self.run_addr) self.myjit.continue_run() assert(self.myjit.pc == self.ret_addr) def asm(self): asmcfg = parse_asm.parse_txt(mn_x86, self.arch_attrib, self.TXT, self.loc_db) # fix shellcode addr self.loc_db.set_location_offset(self.loc_db.get_name_location("main"), 0x0) s = StrPatchwork() patches = asmblock.asm_resolve_final(mn_x86, asmcfg) for offset, raw in viewitems(patches): s[offset] = raw s = bytes(s) self.assembly = s def check(self): raise NotImplementedError('abstract method')
def simple_unwrap_expr(expr: Expr, loc_db: LocationDB): ra = -1 if expr.is_int(): ra = int(expr) elif expr.is_loc(): ra = loc_db.get_location_offset(expr.loc_key) if ra is None: ra = -1 return ra
def arm_guess_jump_table( mnemo, attrib, pool_bin, cur_bloc, offsets_to_dis, loc_db): ira = get_ira(mnemo, attrib) jra = ExprId('jra') jrb = ExprId('jrb') sp = LocationDB() ir_arch = ira(sp) ircfg = ira.new_ircfg() ir_arch.add_asmblock_to_ircfg(cur_bloc, ircfg) for irblock in viewvalues(ircfg.blocks): pc_val = None for exprs in irblock: for e in exprs: if e.dst == ir_arch.pc: pc_val = e.src if pc_val is None: continue if not isinstance(pc_val, ExprMem): continue assert(pc_val.size == 32) print(pc_val) ad = pc_val.arg ad = expr_simp(ad) print(ad) res = match_expr(ad, jra + jrb, set([jra, jrb])) if res is False: raise NotImplementedError('not fully functional') print(res) if not isinstance(res[jrb], ExprInt): raise NotImplementedError('not fully functional') base_ad = int(res[jrb]) print(base_ad) addrs = set() i = -1 max_table_entry = 10000 max_diff_addr = 0x100000 # heuristic while i < max_table_entry: i += 1 try: ad = upck32(pool_bin.getbytes(base_ad + 4 * i, 4)) except: break if abs(ad - base_ad) > max_diff_addr: break addrs.add(ad) print([hex(x) for x in addrs]) for ad in addrs: offsets_to_dis.add(ad) l = loc_db.get_or_create_offset_location(ad) c = AsmConstraintTo(l) cur_bloc.addto(c)
class Asm_Test(object): def __init__(self, jitter): self.loc_db = LocationDB() self.myjit = Machine("mips32l").jitter(self.loc_db, jitter) self.myjit.init_stack() def __call__(self): self.asm() self.run() self.check() def asm(self): asmcfg = parse_asm.parse_txt(mn_mips32, 'l', self.TXT, self.loc_db) # fix shellcode addr self.loc_db.set_location_offset(self.loc_db.get_name_location("main"), 0x0) s = StrPatchwork() patches = asmblock.asm_resolve_final(mn_mips32, asmcfg) for offset, raw in viewitems(patches): s[offset] = raw s = bytes(s) self.assembly = s def run(self): run_addr = 0 self.myjit.vm.add_memory_page(run_addr, PAGE_READ | PAGE_WRITE, self.assembly) self.myjit.cpu.RA = 0x1337beef self.myjit.add_breakpoint(0x1337beef, lambda x: False) self.myjit.init_run(run_addr) self.myjit.continue_run() assert (self.myjit.pc == 0x1337beef) def check(self): raise NotImplementedError('abstract method')
def test_DirectiveSplit(self): from miasm.arch.x86.arch import mn_x86 from miasm.core.parse_asm import parse_txt loc_db = LocationDB() ASM0 = ''' lbl0: JNZ lbl0 .split lbl1: RET ''' asmcfg = parse_txt(mn_x86, 32, ASM0, loc_db) lbls = [] for i in range(2): lbls.append(loc_db.get_name_location('lbl%d' % i)) lbl2block = {} for block in asmcfg.blocks: lbl2block[block.loc_key] = block # split test assert (lbl2block[lbls[1]].get_next() is None)
def __init__(self, machine): self.machine = machine self.loc_db = LocationDB() self.handler = {} # addr -> callback(DSEEngine instance) self.instrumentation = {} # addr -> callback(DSEEngine instance) self.addr_to_cacheblocks = {} # addr -> {label -> IRBlock} self.ir_arch = self.machine.ir(loc_db=self.loc_db) # corresponding IR self.ircfg = self.ir_arch.new_ircfg() # corresponding IR # Defined after attachment self.jitter = None # Jitload (concrete execution) self.symb = None # SymbolicExecutionEngine self.symb_concrete = None # Concrete SymbExec for path desambiguisation self.mdis = None # DisasmEngine
def __init__(self, data, loc_db=None, **kwargs): "Alias for 'parse'" # Init attributes self._executable = None self._bin_stream = None self._entry_point = None self._arch = None if loc_db is None: self._loc_db = LocationDB() else: self._loc_db = loc_db # Launch parsing self.parse(data, **kwargs)
def compute_txt(Lifter, mode, txt, inputstate={}, debug=False): loc_db = LocationDB() asmcfg = parse_asm.parse_txt(mn, mode, txt, loc_db) loc_db.set_location_offset(loc_db.get_name_location("main"), 0x0) patches = asmblock.asm_resolve_final(mn, asmcfg) lifter = Lifter(loc_db) lbl = loc_db.get_name_location("main") ircfg = lifter.new_ircfg_from_asmcfg(asmcfg) return symb_exec(lbl, lifter, ircfg, inputstate, debug)
def main(): global dse, todo, current sys.setrecursionlimit(2000) # oof # Parse arguments parser = Sandbox_Win_x86_64.parser(description="PE sandboxer") parser.add_argument("filename", help="PE Filename") options = parser.parse_args() options.dependencies = True # So we dont need to reimplement qt sb = Sandbox_Win_x86_64(LocationDB(), options.filename, options, custom_methods=qt_methods) sb.jitter.add_breakpoint(end_ptr, stop_exec) # End condition # Setup the qt string memory and a pointer to it sb.jitter.vm.add_memory_page(0x10000018, PAGE_READ | PAGE_WRITE, pck64(0x20000000)) # Hooking in here sb.jitter.vm.add_memory_page(0x20000000, PAGE_READ | PAGE_WRITE, qtstring) # The initial qstring sb.jitter.vm.add_memory_page(0x10000020, PAGE_READ | PAGE_WRITE, b'\x00') # The result sb.jitter.cpu.R15 = 0x10000000 sb.jitter.cpu.RSP = sb.jitter.stack_base + 0x8000 # Setup and attach the DSE dse = DSEPC(sb.machine, sb.loc_db, produce_solution=DSEPC.PRODUCE_SOLUTION_PATH_COV) sb.jitter.init_run(0x140004B61) dse.attach(sb.jitter) dse.update_state_from_concrete() dse.symbolize_memory(interval([(flag_ptr, flag_ptr + 0x20)])) # Printable unicode only for address in range(flag_ptr, flag_ptr + 0x20, 0x2): z3_mem = dse.z3_trans.from_expr( dse.eval_expr(ExprMem(ExprInt(address, 64), 16))) unicode_constraint = z3.And( \ z3.UGE(z3_mem, dse.z3_trans.from_expr(ExprInt(0x0020, 16))), \ z3.ULE(z3_mem, dse.z3_trans.from_expr(ExprInt(0x007E, 16))) \ ) dse.cur_solver.add(unicode_constraint) snapshot = dse.take_snapshot() # Begin run todo = [b'\x41\x00' * 0x10] while todo: dse.restore_snapshot(snapshot) current = todo.pop() sb.jitter.vm.set_mem(flag_ptr, current) # Update the password in jitter memory print('-' * 40 + f' CONCRETE: {unicode_string(current)}') sb.jitter.continue_run()
def main(file_path: Path, start_addr: int, oracle_path: Path) -> None: # symbol table loc_db = LocationDB() # open the binary for analysis container = Container.from_stream(open(file_path, 'rb'), loc_db) # cpu abstraction machine = Machine(container.arch) # init disassemble engine mdis = machine.dis_engine(container.bin_stream, loc_db=loc_db) # initialize intermediate representation lifter = machine.lifter_model_call(mdis.loc_db) # disassemble the function at address asm_block = mdis.dis_block(start_addr) # lift to Miasm IR ira_cfg = lifter.new_ircfg() lifter.add_asmblock_to_ircfg(asm_block, ira_cfg) # init symbolic execution engine sb = SymbolicExecutionEngine(lifter) # symbolically execute basic block sb.run_block_at(ira_cfg, start_addr) # initialize simplifier simplifier = Simplifier(oracle_path) for k, v in sb.modified(): if v.is_int() or v.is_id() or v.is_loc(): continue print(f"before: {v}") simplified = simplifier.simplify(v) print(f"simplified: {simplified}") print("\n\n")
def miasm_asm(r2_op, r2_address, r2_buffer): """Assemble an instruction using miasm.""" # Cast radare2 variables rasmop = ffi.cast("RAsmOp_r2m2*", r2_op) mn_str = ffi.string(r2_buffer) # miasm only parses upper case mnemonics mn_str = mn_str.upper() mn_str = mn_str.replace("X", "x") # hexadecimal # Get the miasm machine machine = miasm_machine() if machine is None: return # Get the miasm mnemonic object mn = machine.mn() # Assemble and return all possible candidates loc_db = LocationDB() mode = machine.dis_engine().attrib instr = mn.fromstring(mn_str, loc_db, mode) instr.mode = mode instr.offset = r2_address if instr.offset and instr.dstflow(): # Adjust arguments values using the instruction offset instr.fixDstOffset() asm_instr = [i for i in mn.asm(instr)][0] # Check buffer sizes if len(asm_instr) - 1 > 256: print >> sys.stderr, "/!\ Assembled instruction is too long /!\\" return # Fill the RAsmOp structure rasmop.size = len(asm_instr) set_rbuf(rasmop.buf, asm_instr) rasmop.buf.len = len(asm_instr)
def compute(asm, inputstate={}, debug=False): loc_db = LocationDB() sympool = dict(regs_init) sympool.update({k: ExprInt(v, k.size) for k, v in viewitems(inputstate)}) ir_tmp = ir_arch(loc_db) ircfg = ir_tmp.new_ircfg() symexec = SymbolicExecutionEngine(ir_tmp, sympool) instr = mn.fromstring(asm, mode) code = mn.asm(instr)[0] instr = mn.dis(code, mode) instr.offset = inputstate.get(PC, 0) loc_key = ir_tmp.add_instr_to_ircfg(instr, ircfg) symexec.run_at(ircfg, loc_key) if debug: for k, v in viewitems(symexec.symbols): if regs_init.get(k, None) != v: print(k, v) return { k: v.arg.arg for k, v in viewitems(symexec.symbols) if k not in EXCLUDE_REGS and regs_init.get(k, None) != v }
def symbolic_exec(): from miasm.ir.symbexec import SymbolicExecutionEngine from miasm.core.bin_stream_ida import bin_stream_ida from utils import guess_machine start, end = idc.read_selection_start(), idc.read_selection_end() loc_db = LocationDB() bs = bin_stream_ida() machine = guess_machine(addr=start) mdis = machine.dis_engine(bs, loc_db=loc_db) if start == idc.BADADDR and end == idc.BADADDR: start = idc.get_screen_ea() end = idc.next_head(start) # Get next instruction address mdis.dont_dis = [end] asmcfg = mdis.dis_multiblock(start) ira = machine.ira(loc_db=loc_db) ircfg = ira.new_ircfg_from_asmcfg(asmcfg) print("Run symbolic execution...") sb = SymbolicExecutionEngine(ira, machine.mn.regs.regs_init) sb.run_at(ircfg, start) modified = {} for dst, src in sb.modified(init_state=machine.mn.regs.regs_init): modified[dst] = src view = symbolicexec_t() all_views.append(view) if not view.Create( modified, machine, loc_db, "Symbolic Execution - 0x%x to 0x%x" % (start, idc.prev_head(end))): return view.Show()
from builtins import str from miasm.core.locationdb import LocationDB # Basic tests (LocationDB description) loc_db = LocationDB() loc_key1 = loc_db.add_location() loc_key2 = loc_db.add_location(offset=0x1234) loc_key3 = loc_db.add_location(name="first_name") loc_db.add_location_name(loc_key3, "second_name") loc_db.set_location_offset(loc_key3, 0x5678) loc_db.remove_location_name(loc_key3, "second_name") assert loc_db.get_location_offset(loc_key1) is None assert loc_db.get_location_offset(loc_key2) == 0x1234 assert loc_db.pretty_str(loc_key1) == str(loc_key1) assert loc_db.pretty_str(loc_key2) == "loc_1234" assert loc_db.pretty_str(loc_key3) == "first_name" loc_db.consistency_check() # Offset manipulation loc_key4 = loc_db.add_location() assert loc_db.get_location_offset(loc_key4) is None loc_db.set_location_offset(loc_key4, 0x1122) assert loc_db.get_location_offset(loc_key4) == 0x1122 loc_db.unset_location_offset(loc_key4) assert loc_db.get_location_offset(loc_key4) is None try: loc_db.set_location_offset(loc_key4, 0x1234) has_raised = False
] ) else: st = StrPatchwork() addr_main = 0 virt = st output = st # Get and parse the source code with open(args.source) as fstream: source = fstream.read() loc_db = LocationDB() asmcfg, loc_db = parse_asm.parse_txt(machine.mn, attrib, source, loc_db) # Fix shellcode addrs loc_db.set_location_offset(loc_db.get_name_location("main"), addr_main) if args.PE: loc_db.set_location_offset( loc_db.get_or_create_name_location("MessageBoxA"), pe.DirImport.get_funcvirt( 'USER32.dll', 'MessageBoxA' ) )
from __future__ import print_function from future.utils import viewitems from miasm.expression.expression import ExprId, ExprInt, ExprAssign, \ ExprCond, ExprLoc, LocKey from miasm.core.locationdb import LocationDB from miasm.ir.analysis import ira from miasm.ir.ir import IRBlock, AssignBlock from miasm.core.graph import DiGraph from miasm.analysis.depgraph import DependencyNode, DependencyGraph from itertools import count from pdb import pm import re loc_db = LocationDB() EMULATION = True try: import z3 except ImportError: EMULATION = False STEP_COUNTER = count() A = ExprId("a", 32) B = ExprId("b", 32) C = ExprId("c", 32) D = ExprId("d", 32) R = ExprId("r", 32) COND = ExprId("cond", 32)
from __future__ import print_function import z3 from miasm.core.locationdb import LocationDB from miasm.expression.expression import * from miasm.ir.translators.z3_ir import Z3Mem, TranslatorZ3 # Some examples of use/unit tests. loc_db = LocationDB() translator1 = TranslatorZ3(endianness="<", loc_db=loc_db) translator2 = TranslatorZ3(endianness=">", loc_db=loc_db) def equiv(z3_expr1, z3_expr2): s = z3.Solver() s.add(z3.Not(z3_expr1 == z3_expr2)) return s.check() == z3.unsat def check_interp(interp, constraints, bits=32, valbits=8): """Checks that a list of @constraints (addr, value) (as python ints) match a z3 FuncInterp (@interp). """ constraints = dict((addr, z3.BitVecVal(val, valbits)) for addr, val in constraints) l = interp.as_list() for entry in l: if not isinstance(entry, list) or len(entry) < 2: continue addr, value = entry[0], entry[1]
def check_instruction(mn_str, mn_hex, multi=None, offset=0): """Try to disassemble and assemble this instruction""" # Rename objdump registers names mn_str = re.sub("\$([0-9]+)", lambda m: "R"+m.group(1), mn_str) mn_str = mn_str.replace("$", "") # Disassemble mn = dis(mn_hex) mn.offset = offset if mn.dstflow(): # Remember ExprInt arguments sizes args_size = list() for i in range(len(mn.args)): if isinstance(mn.args[i], ExprInt): args_size.append(mn.args[i].size) else: args_size.append(None) # Adjust arguments values using the instruction offset loc_db = LocationDB() mn.dstflow2label(loc_db) # Convert ExprLoc to ExprInt for i in range(len(mn.args)): if args_size[i] is None: continue if isinstance(mn.args[i], ExprLoc): addr = loc_db.get_location_offset(mn.args[i].loc_key) mn.args[i] = ExprInt(addr, args_size[i]) print("dis: %s -> %s" % (mn_hex.rjust(20), str(mn).rjust(20))) assert(str(mn) == mn_str) # disassemble assertion # Assemble and return all possible candidates instr = mn_mep.fromstring(mn_str, "b") instr.offset = offset instr.mode = "b" if instr.offset: instr.fixDstOffset() asm_list = [encode_hex(i).decode() for i in mn_mep.asm(instr)] # Check instructions variants if multi: print("Instructions count:", len(asm_list)) assert(len(asm_list) == multi) # Ensure that variants correspond to the same disassembled instruction for mn_hex_tmp in asm_list: mn = dis(mn_hex_tmp) print("dis: %s -> %s" % (mn_hex_tmp.rjust(20), str(mn).rjust(20))) # Check the assembly result print( "asm: %s -> %s" % ( mn_str.rjust(20), ", ".join(asm_list).rjust(20) ) ) assert(mn_hex in asm_list) # assemble assertion