class MiasmSEOracle: def __init__(self, code, architecture): self.code = code self.machine = Machine(architecture) self.mdis = self.machine.dis_engine(code) self.ira = SubIRA64(self.mdis.symbol_pool) self.se_engine = SymbolicExecutionEngine( self.ira, self.machine.mn.regs.regs_init) def determine_ira(self, architecture): if architecture == "x86_64": return SubIRA64(self.mdis.symbol_pool) elif architecture == "x86_32": return SubIRA32(self.mdis.symbol_pool) else: raise NotImplementedError( "Architecture {} not supported".format(architecture)) def execute(self): addr = 0 while addr < len(self.code): basic_block = self.mdis.dis_block(addr) self.ira.add_block(basic_block) ira_block = self.ira.get_block(addr) self.se_engine.emulbloc(ira_block) addr = basic_block.get_range()[1]
class ethRE: def __init__(self): self.machine = Machine("evm") self.mn = self.machine.mn def get_bytecode(self, account_addr): code = evm_env.code(int(account_addr[2:], 16)) code = code[2:] # To remove '0x'.. if len(code) % 2 == 1: code = "0" + code code = binascii.unhexlify(code) return code def from_bytecode(self, bytecode): container = Container.from_string(bytecode) mdis = self.machine.dis_engine(container.bin_stream) self.blks = mdis.dis_multibloc(0) def from_asm(self, asm_text): all_bloc, symbol_pool = parse_asm.parse_txt(self.mn, 0, asm_text) self.blks = all_bloc raise Exception("Not correctly implemented") def graph(self): if not self.blks: raise Exception("Need to parse bytecode before") return self.blks.dot()
class ethRE: def __init__(self): self.machine = Machine("evm") self.mn = self.machine.mn def get_bytecode(self, account_addr): code = evm_env.code(int(account_addr[2:],16)) code = code[2:] # To remove '0x'.. if len(code) % 2 == 1: code = "0"+code code = binascii.unhexlify(code) return code def from_bytecode(self, bytecode): container = Container.from_string(bytecode) mdis = self.machine.dis_engine(container.bin_stream) self.blks = mdis.dis_multibloc(0) def from_asm(self, asm_text): all_bloc, symbol_pool = parse_asm.parse_txt(self.mn,0, asm_text) self.blks = all_bloc raise Exception("Not correctly implemented") def graph(self): if not self.blks: raise Exception("Need to parse bytecode before") return self.blks.dot()
def symexec(handler): inst_bytes = handler.bytes_without_jmp machine = Machine("x86_32") cont = Container.from_string(inst_bytes) bs = cont.bin_stream mdis = machine.dis_engine(bs, symbol_pool=cont.symbol_pool) end_offset = len(inst_bytes) mdis.dont_dis = [end_offset] asm_block = mdis.dis_block(0) # print asm_block ira = machine.ira(mdis.symbol_pool) ira.add_block(asm_block) symb = SymbolicExecutionEngine(ira, symbols_init) cur_addr = symb.emul_ir_block(0) count = 0 while cur_addr != ExprInt(end_offset, 32): # execute to end cur_addr = symb.emul_ir_block(cur_addr) count += 1 if count > 1000: print '[!] to many loop at %s' % handler.name break return symb
def main(): global cfg global block global data #Paint the cfg_before image from disassembly cont = Container.from_stream(open('300.bin')) bin_stream = cont.bin_stream adr = 0x401550 machine = Machine(cont.arch) mdis = machine.dis_engine(bin_stream) blocks = mdis.dis_multibloc(adr) open("cfg_before.dot", "w").write(blocks.dot()) #Get filename parser = Sandbox_Linux_x86_64.parser(description="300.bin") parser.add_argument("filename", help="filename") options = parser.parse_args() options.mimic_env = True #Start Sandbox sb = Sandbox_Linux_x86_64(options.filename, options, globals()) sb.jitter.init_run(sb.entry_point) sb.jitter.add_breakpoint(sb.entry_point, stop) machine = Machine("x86_64") sb.run() #Get bytecode interpret() #Paint cfg open("vm_graph.dot", "w").write(cfg.dot())
def r2m2_dis(opcode): """Disassemble an instruction using miasm.""" machine = Machine("x86_64") mode = machine.dis_engine().attrib instr = machine.mn().dis(opcode, mode) return [instr.l, str(instr)]
def main(): #Setup Machine for arm, get filename machine = Machine('armtl') parser = ArgumentParser("Description") parser.add_argument('filename', help='filename') args = parser.parse_args() #Setup disassembly stream in container, get blocks and draw the graph cont = Container.from_stream(open(args.filename)) bin_stream = cont.bin_stream mdis = machine.dis_engine(bin_stream) blocks = mdis.dis_multibloc(0x614) open("cfg.dot", "w").write(blocks.dot()) #Create a template for matching blocks in the control flow graph #Requirement 1) Don't get block 0xdf8, it can't disassemble #Requirement 2) Get ones that start with LDR #Requirement 3) Get ones where the second to last instruction is CMP #No restructions for in going and out going edges mblock = MatchGraphJoker( name='mblock', restrict_in=False, restrict_out=False, filt=lambda block: block.label.offset != 0xdf8 and "LDR" in block. lines[0].name and "CMP" in block.lines[-2].name) #Basic block matcher nblock = MatchGraphJoker(name="next", restrict_in=False, restrict_out=False) #Now it should match the blocks we want with the checks matcher = nblock >> mblock flag_storage = {} #Loop through matching template blocks for sol in matcher.match(blocks): try: #Grab position line pline = sol[mblock].lines[3] #Grab character check line cline = sol[mblock].lines[-2] #Transform character and position to integer pos = int(pline.arg2str(pline.args[1]), 16) c = int(cline.arg2str(cline.args[1]), 16) #If its NULL, ignore if c != 0: flag_storage.update({pos: c}) except ValueError: #The F at the beginning is a NULL check pass #Print Flag flag = "".join(map(lambda x: chr(flag_storage[x]), sorted(flag_storage))).replace("F", "I") print "F" + flag pass
def execc(self, code): machine = Machine('x86_32') mdis = machine.dis_engine(code) blocs = mdis.dis_multibloc(0) ira = machine.ira() for b in blocs: ira.add_bloc(b) sb = symbexec(ira, machine.mn.regs.regs_init) sb.emul_ir_blocs(ira, 0) return sb
class ReverseFlashairBinary(object): """ Frequent operations on the flashair binary """ def __init__(self, filename, offset=0, r2_options=None): """ Initialize the object """ # Remember the file offset self.offset = offset # Get a r2pipe handle self.r2p = get_r2pipe(filename, offset, r2_options) # Create the miasm Machine self.machine = Machine("mepl") self.mn = self.machine.mn() def strings(self): """ get_strings wrapper """ return get_strings(self.r2p) def prologues(self): """ get_prologues wrapper """ return get_prologues(self.r2p) def nearest_prologue(self, address): """ find the nearest function prologue """ tmp = [(address - p, p) for p in self.prologues() if p < address] prologues = sorted(tmp) if len(prologues): return prologues[0][1] return prologues def assemble( self, instruction, ): """ Assemble one instructtion with miasm2 """ mode = self.machine.dis_engine().attrib instr = self.mn.fromstring(instruction, mode) instr.mode = mode return self.mn.asm(instr, mode)
def r2m2_asm(mn_str): """Assemble an instruction using miasm.""" # miasm2 only parses upper case mnemonics mn_str = mn_str.upper() mn_str = mn_str.replace("X", "x") # hexadecimal machine = Machine("x86_64") mode = machine.dis_engine().attrib mn = machine.mn() instr = mn.fromstring(mn_str, mode) asm_instr = [i for i in mn.asm(instr)][0] return [struct.unpack("!B", byte)[0] for byte in asm_instr]
class MiasmSEOracle: def __init__(self, code, architecture): self.code = code self.machine = Machine(architecture) self.mdis = self.machine.dis_engine(code) self.ira = SubIRA64(self.mdis.symbol_pool) self.se_engine = SymbolicExecutionEngine( self.ira, self.machine.mn.regs.regs_init) def execute(self): addr = 0 while addr < len(self.code): basic_block = self.mdis.dis_block(addr) self.ira.add_block(basic_block) ira_block = self.ira.get_block(addr) self.se_engine.emulbloc(ira_block) addr = basic_block.get_range()[1]
class ExtractRef(object): ''' Class used to concolic run a snapshot and extract references to input ''' def __init__(self, testcreator, replayed_snapshot): ''' @testcreator: TestCreator instance with associated information @replayed_snapshot: snapshot to be used ''' self.isFuncFound = False self.filename = testcreator.program self.learned_addr = testcreator.address self.snapshot = replayed_snapshot self.replayexception = [] self.abicls = testcreator.abicls self.machine = Machine(testcreator.machine) self.ira = self.machine.ira() self.ptr_size = self.ira.sizeof_pointer() / 8 self.types = testcreator.types self.prototype = testcreator.prototype self.logger = testcreator.logger def use_snapshot(self, jitter): '''Initilize the VM with the snapshot informations''' for reg, value in self.snapshot.input_reg.iteritems(): setattr(jitter.cpu, reg, value) # Set values for input memory for addr, mem in self.snapshot.in_memory.iteritems(): assert mem.access != 0 if not jitter.vm.is_mapped(addr, mem.size): jitter.vm.add_memory_page(addr, mem.access, mem.data) else: if jitter.vm.get_mem_access(addr) & 0b11 == mem.access & 0b11: jitter.vm.set_mem(addr, mem.data) else: # TODO memory page is already set but have not the # same access right. However delete page does not # exist jitter.vm.set_mem(addr, mem.data) def compare_snapshot(self, jitter): '''Compare the expected result with the real one to determine if the function is recognize or not''' func_found = True for reg, value in self.snapshot.output_reg.iteritems(): if value != getattr(jitter.cpu, reg): self.replayexception += [ "output register %s wrong : %i expected, %i found" % (reg, value, getattr(jitter.cpu, reg)) ] func_found = False for addr, mem in self.snapshot.out_memory.iteritems(): self.logger.debug("Check @%s, %s bytes: %r", hex(addr), hex(mem.size), mem.data[:0x10]) if mem.data != jitter.vm.get_mem(addr, mem.size): self.replayexception += [ "output memory wrong at 0x%x: %s expected, %s found" % (addr + offset, repr(mem.data), repr(jitter.vm.get_mem(addr + offset, mem.size))) ] func_found = False return func_found def end_func(self, jitter): if jitter.vm.is_mapped(getattr(jitter.cpu, self.ira.ret_reg.name), 1): self.replayexception += ["return value might be a pointer"] self.isFuncFound = self.compare_snapshot(jitter) jitter.run = False return False def is_pointer(self, expr): """Return True if expr may be a pointer""" target_types = expr_to_types(self.c_handler, expr) return any( objc_is_dereferenceable(target_type) for target_type in target_types) def callback(self, jitter): # Check previous state is_symbolic = lambda expr: (isinstance(expr, m2_expr.ExprMem) and not isinstance(expr.arg, m2_expr.ExprInt)) # When it is possible, consider only elements modified in the last run # -> speed up to avoid browsing the whole memory to_consider = self.symb.modified_exprs for symbol in to_consider: # Do not consider PC if symbol == self.ira.pc: continue # Write to @NN[... argX ...] if is_symbolic(symbol): self.memories_write.add(symbol) # Read from ... @NN[... argX ...] ... symb_value = self.symb.eval_expr(symbol) to_replace = {} for expr in m2_expr.ExprAff(symbol, symb_value).get_r(mem_read=True): if is_symbolic(expr): if isinstance(expr, m2_expr.ExprMem): # Consider each byte individually # Case: @32[X] with only @8[X+1] to replace addr_expr = expr.arg new_expr = [] consider = False for offset in xrange(expr.size / 8): sub_expr = m2_expr.ExprMem( self.symb.expr_simp( addr_expr + m2_expr.ExprInt( offset, size=addr_expr.size)), 8) if not self.is_pointer(sub_expr): # Not a PTR, we have to replace with the real value original_sub_expr = sub_expr.replace_expr( self.init_values) new_expr.append( self.symb.eval_expr(original_sub_expr)) consider = True else: new_expr.append(sub_expr) # Rebuild the corresponding expression if consider: assert len(new_expr) == expr.size / 8 to_replace[expr] = m2_expr.ExprCompose(*new_expr) if expr not in self.memories_write: # Do not consider memory already written during the run self.memories_read.add(expr) # Replace with real value for non-pointer symbols if to_replace: symb_value = self.symb.expr_simp( symb_value.replace_expr(to_replace)) if isinstance(symbol, m2_expr.ExprMem): # Replace only in ptr (case to_replace: @[arg] = 8, expr: # @[arg] = @[arg]) symbol = m2_expr.ExprMem( self.symb.expr_simp( symbol.arg.replace_expr(to_replace)), symbol.size) self.symb.apply_change(symbol, symb_value) # Check computed values against real ones # TODO idem memory if (isinstance(symbol, m2_expr.ExprId) and isinstance(symb_value, m2_expr.ExprInt)): if hasattr(jitter.cpu, symbol.name): value = m2_expr.ExprInt(getattr(jitter.cpu, symbol.name), symbol.size) assert value == self.symb.symbols[symbol] cur_addr = jitter.pc self.logger.debug("Current address: %s", hex(cur_addr)) if cur_addr == 0x1337BEEF or cur_addr == self.return_addr: # End reached if self.logger.isEnabledFor(logging.DEBUG): print "In:" for x in self.memories_read: print "\t%s (%s)" % ( x, self.c_handler.expr_to_c(x), ) print "Out:" for x in self.memories_write: print "\t%s (%s)" % ( x, self.c_handler.expr_to_c(x), ) return True # Update state ## Reset cache structures self.mdis.job_done.clear() self.symb_ir.blocs.clear() ## Update current state asm_block = self.mdis.dis_bloc(cur_addr) irblocks = self.symb_ir.add_bloc(asm_block) self.symb.emul_ir_blocks(cur_addr) return True def prepare_symbexec(self, jitter, return_addr): # Activate callback on each instr jitter.jit.set_options(max_exec_per_call=1, jit_maxline=1) #jitter.jit.log_mn = True #jitter.jit.log_regs = True jitter.exec_cb = self.callback # Disassembler self.mdis = self.machine.dis_engine(bin_stream_vm(jitter.vm), lines_wd=1) # Symbexec engine ## Prepare the symbexec engine self.symb_ir = self.machine.ir() self.symb = EmulatedSymbExecWithModif(jitter.cpu, jitter.vm, self.symb_ir, {}) self.symb.enable_emulated_simplifications() ## Update registers value self.symb.reset_regs() self.symb.update_engine_from_cpu() ## Load the memory as ExprMem self.symb.func_read = None self.symb.func_write = None for base_addr, mem_segment in jitter.vm.get_all_memory().iteritems(): # Split into 8 bytes chunk for get_mem_overlapping for start in xrange(0, mem_segment["size"], 8): expr_mem = m2_expr.ExprMem(m2_expr.ExprInt(base_addr + start, size=64), size=8 * min(8, mem_segment["size"] - start)) # Its initialisation, self.symb.apply_change is not necessary self.symb.symbols[expr_mem] = self.symb._func_read(expr_mem) ## Save the initial state self.symbols_init = self.symb.symbols.copy() ## Save the returning address self.return_addr = return_addr # Inject argument # TODO # TODO: use abicls abi_order = ["RDI", "RSI", "RDX", "RCX", "R8", "R9"] self.init_values = {} struct_expr_types = {} self.args_symbols = [] for i, param_name in enumerate(self.prototype.args_order): cur_arg_abi = getattr(self.ira.arch.regs, abi_order[i]) cur_arg = m2_expr.ExprId("arg%d_%s" % (i, param_name), size=cur_arg_abi.size) arg_type = self.prototype.args[param_name] if objc_is_dereferenceable(arg_type): # Convert the argument to symbol to track access based on it self.init_values[cur_arg] = self.symb.symbols[cur_arg_abi] self.symb.apply_change(cur_arg_abi, cur_arg) struct_expr_types[cur_arg.name] = arg_type self.args_symbols.append(cur_arg) # Init Expr <-> C conversion # Strict access is deliberately not enforced (example: memcpy(struct)) self.c_handler = CHandler(self.types, struct_expr_types, enforce_strict_access=False) # Init output structures self.memories_read = set() self.memories_write = set() def build_references(self): """At the end of the execution, - Fill memories accesses - Prepare output structures Enrich the snapshot with outputs """ memory_in = {} memory_out = {} # Get the resulting symbolic value # TODO use abi output_value = self.symb.symbols[self.symb.ir_arch.arch.regs.RAX] # Fill memory *out* (written) for expr in self.memories_write: # Eval the expression with the *output* state value = self.symb.eval_expr(expr) assert isinstance(value, m2_expr.ExprInt) memory_out[expr] = value # Fill memory *in* (read) saved_symbols = self.symb.symbols self.symb.symbols = self.symbols_init for expr in self.memories_read: # Eval the expression with the *input* state original_expr = expr.replace_expr(self.init_values) value = self.symb.eval_expr(original_expr) assert isinstance(value, m2_expr.ExprInt) memory_in[expr] = value self.symb.symbols = saved_symbols if self.logger.isEnabledFor(logging.DEBUG): print "In:" print memory_in print "Out:" print memory_out print "Final value:" print output_value self.snapshot.memory_in = AssignBlock(memory_in) self.snapshot.memory_out = AssignBlock(memory_out) self.snapshot.output_value = output_value self.snapshot.c_handler = self.c_handler self.snapshot.arguments_symbols = self.args_symbols def run(self): '''Main function that is in charge of running the test and return the result: true if the snapshot has recognized the function, false else.''' # TODO inherit from Replay jitter = self.machine.jitter(config.miasm_engine) vm_load_elf(jitter.vm, open(self.filename, "rb").read()) # Init segment jitter.ir_arch.do_stk_segm = True jitter.ir_arch.do_ds_segm = True jitter.ir_arch.do_str_segm = True jitter.ir_arch.do_all_segm = True FS_0_ADDR = 0x7ff70000 jitter.cpu.FS = 0x4 jitter.cpu.set_segm_base(jitter.cpu.FS, FS_0_ADDR) jitter.vm.add_memory_page(FS_0_ADDR + 0x28, PAGE_READ, "\x42\x42\x42\x42\x42\x42\x42\x42", "Stack canary FS[0x28]") # Init the jitter with the snapshot self.use_snapshot(jitter) # Get the return address for our breakpoint return_addr = struct.unpack("P", jitter.vm.get_mem(jitter.cpu.RSP, 0x8))[0] jitter.add_breakpoint(return_addr, self.end_func) # Prepare the execution jitter.init_run(self.learned_addr) self.prepare_symbexec(jitter, return_addr) # Run the execution try: jitter.continue_run() assert jitter.run == False except AssertionError: if jitter.vm.get_exception() & EXCEPT_ACCESS_VIOL: self.replayexception += ["access violation"] elif jitter.vm.get_exception() & EXCEPT_DIV_BY_ZERO: self.replayexception += ["division by zero"] elif jitter.vm.get_exception() & EXCEPT_PRIV_INSN: self.replayexception += ["execution of private instruction"] elif jitter.vm.get_exception(): self.replayexception += [ "exception no %i" % (jitter.vm.get_exception()) ] else: raise self.isFuncFound = False # Rebuild references self.build_references() return self.isFuncFound
# Minimalist Symbol Exec example from miasm2.core.bin_stream import bin_stream_str from miasm2.ir.symbexec import SymbolicExecutionEngine from miasm2.analysis.machine import Machine START_ADDR = 0 machine = Machine("x86_32") # Assemble and disassemble a MOV ## Ensure that attributes 'offset' and 'l' are set line = machine.mn.fromstring("MOV EAX, EBX", 32) asm = machine.mn.asm(line)[0] # Get back block bin_stream = bin_stream_str(asm) mdis = machine.dis_engine(bin_stream) asm_block = mdis.dis_bloc(START_ADDR) # Translate ASM -> IR ira = machine.ira(mdis.symbol_pool) ira.add_bloc(asm_block) # Instanciate a Symbolic Execution engine with default value for registers ## EAX = EAX_init, ... symbols_init = ira.arch.regs.regs_init symb = SymbolicExecutionEngine(ira, symbols_init) # Emulate one IR basic block ## Emulation of several basic blocks can be done through .emul_ir_blocks cur_addr = symb.emul_ir_block(START_ADDR)
p = log.progress("emulating step%d" % x) for i in range((len(bytecode) * 8) - 1): if i % 100 == 0: p.status("%d/%d" % (i, (len(bytecode) * 8) - 1)) sb.call(0x5DD9, i) # VMDecodeInstruction # with open("all_steps", "w") as f: # import json # json.dump(lt, f) print(lt) parser = Sandbox_Linux_x86_64.parser(description="ELF sandboxer") parser.add_argument("filename", help="ELF Filename") options = parser.parse_args() sb = Sandbox_Linux_x86_64(options.filename, options, globals()) machine = Machine('x86_64') cont = Container.from_stream(open(options.filename)) mdis = machine.dis_engine(cont.bin_stream) sb.jitter.add_breakpoint(0x2C87F, GetMem) # break on return of VMDecodeInstruction sb.jitter.add_breakpoint( 0x21B0, Malloc ) # Break on call to _malloc because for some reason MIASM can't find the symbol sb.jitter.add_breakpoint( 0x4A9C, GetBytecode ) # break on return of VMDecodeBytecode to return the good value that could not be loaded because of global variable not set emulate(sb, 0, step0, lookup_table_step0) emulate(sb, 1, step1, lookup_table_step1) emulate(sb, 2, step2, lookup_table_step2) emulate(sb, -1, step1 + step2, lookup_table_stepX)
from miasm2.ir.symbexec import SymbolicExecutionEngine from miasm2.analysis.machine import Machine from miasm2.core.locationdb import LocationDB START_ADDR = 0 machine = Machine("x86_32") loc_db = LocationDB() # Assemble and disassemble a MOV ## Ensure that attributes 'offset' and 'l' are set line = machine.mn.fromstring("MOV EAX, EBX", loc_db, 32) asm = machine.mn.asm(line)[0] # Get back block bin_stream = bin_stream_str(asm) mdis = machine.dis_engine(bin_stream, loc_db=loc_db) mdis.lines_wd = 1 asm_block = mdis.dis_block(START_ADDR) # Translate ASM -> IR ira = machine.ira(mdis.loc_db) ira.add_block(asm_block) # Instanciate a Symbolic Execution engine with default value for registers symb = SymbolicExecutionEngine(ira, {}) # Emulate one IR basic block ## Emulation of several basic blocks can be done through .emul_ir_blocks cur_addr = symb.run_at(START_ADDR) # Modified elements
from miasm2.core.asmblock import AsmSymbolPool START_ADDR = 0 machine = Machine("x86_32") symbol_pool = AsmSymbolPool() # Assemble and disassemble a MOV ## Ensure that attributes 'offset' and 'l' are set line = machine.mn.fromstring("MOV EAX, EBX", symbol_pool, 32) asm = machine.mn.asm(line)[0] # Get back block bin_stream = bin_stream_str(asm) mdis = machine.dis_engine(bin_stream, symbol_pool=symbol_pool) mdis.lines_wd = 1 asm_block = mdis.dis_block(START_ADDR) # Translate ASM -> IR ira = machine.ira(mdis.symbol_pool) ira.add_block(asm_block) # Instanciate a Symbolic Execution engine with default value for registers symb = SymbolicExecutionEngine(ira, {}) # Emulate one IR basic block ## Emulation of several basic blocks can be done through .emul_ir_blocks cur_addr = symb.run_at(START_ADDR) # Modified elements
from miasm2.ir.symbexec import SymbolicExecutionEngine from miasm2.analysis.machine import Machine from miasm2.core.locationdb import LocationDB START_ADDR = 0 machine = Machine("x86_32") loc_db = LocationDB() # Assemble and disassemble a MOV ## Ensure that attributes 'offset' and 'l' are set line = machine.mn.fromstring("MOV EAX, EBX", loc_db, 32) asm = machine.mn.asm(line)[0] # Get back block bin_stream = bin_stream_str(asm) mdis = machine.dis_engine(bin_stream, loc_db=loc_db) mdis.lines_wd = 1 asm_block = mdis.dis_block(START_ADDR) # Translate ASM -> IR ira = machine.ira(mdis.loc_db) ircfg = ira.new_ircfg() ira.add_asmblock_to_ircfg(asm_block, ircfg) # Instanciate a Symbolic Execution engine with default value for registers symb = SymbolicExecutionEngine(ira) # Emulate one IR basic block ## Emulation of several basic blocks can be done through .emul_ir_blocks cur_addr = symb.run_at(ircfg, START_ADDR)
addr = int(addr.arg) states_todo.add((addr, symbexec.symbols.copy(), tuple(conds))) elif addr.is_loc(): states_todo.add((addr, symbexec.symbols.copy(), tuple(conds))) else: raise ValueError("Unsupported destination") if __name__ == '__main__': translator_smt2 = Translator.to_language("smt2") addr = int(options.address, 16) cont = Container.from_stream(open(args[0])) mdis = machine.dis_engine(cont.bin_stream, loc_db=cont.loc_db) ir_arch = machine.ir(mdis.loc_db) ircfg = ir_arch.new_ircfg() symbexec = SymbolicExecutionEngine(ir_arch) asmcfg, loc_db = parse_asm.parse_txt(machine.mn, 32, ''' init: PUSH argv PUSH argc PUSH ret_addr ''', loc_db=mdis.loc_db) argc_lbl = loc_db.get_name_location('argc')
with open(args.filename) as fstream: cont = Container.from_stream(fstream) arch = args.architecture if args.architecture else cont.arch machine = Machine(arch) # Check elements elements = set() regs = machine.mn.regs.all_regs_ids_byname for element in args.element: try: elements.add(regs[element]) except KeyError: raise ValueError("Unknown element '%s'" % element) mdis = machine.dis_engine(cont.bin_stream, dont_dis_nulstart_bloc=True) ir_arch = machine.ira(mdis.symbol_pool) # Common argument forms init_ctx = {} if args.rename_args: if arch == "x86_32": # StdCall example for i in xrange(4): e_mem = ExprMem(ExprId("ESP_init") + ExprInt32(4 * (i + 1)), 32) init_ctx[e_mem] = ExprId("arg%d" % i) # Disassemble the targeted function blocks = mdis.dis_multibloc(int(args.func_addr, 0)) # Generate IR
class ExtractRef(object): ''' Class used to concolic run a snapshot and extract references to input ''' def __init__(self, testcreator, replayed_snapshot): ''' @testcreator: TestCreator instance with associated information @replayed_snapshot: snapshot to be used ''' self.isFuncFound = False self.filename = testcreator.program self.learned_addr = testcreator.address self.snapshot = replayed_snapshot self.replayexception = [] self.abicls = testcreator.abicls self.machine = Machine(testcreator.machine) self.ira = self.machine.ira() self.ptr_size = self.ira.sizeof_pointer()/8 self.types = testcreator.types self.prototype = testcreator.prototype self.logger = testcreator.logger def use_snapshot(self, jitter): '''Initilize the VM with the snapshot informations''' for reg, value in self.snapshot.input_reg.iteritems(): setattr(jitter.cpu, reg, value) # Set values for input memory for addr, mem in self.snapshot.in_memory.iteritems(): assert mem.access != 0 if not jitter.vm.is_mapped(addr, mem.size): jitter.vm.add_memory_page(addr, mem.access, mem.data) else: if jitter.vm.get_mem_access(addr) & 0b11 == mem.access & 0b11: jitter.vm.set_mem(addr, mem.data) else: # TODO memory page is already set but have not the # same access right. However delete page does not # exist jitter.vm.set_mem(addr, mem.data) def compare_snapshot(self, jitter): '''Compare the expected result with the real one to determine if the function is recognize or not''' func_found = True for reg, value in self.snapshot.output_reg.iteritems(): if value != getattr(jitter.cpu, reg): self.replayexception += ["output register %s wrong : %i expected, %i found" % (reg, value, getattr(jitter.cpu, reg))] func_found = False for addr, mem in self.snapshot.out_memory.iteritems(): self.logger.debug("Check @%s, %s bytes: %r", hex(addr), hex(mem.size), mem.data[:0x10]) if mem.data != jitter.vm.get_mem(addr, mem.size): self.replayexception += ["output memory wrong at 0x%x: %s expected, %s found" % (addr + offset, repr(mem.data), repr(jitter.vm.get_mem(addr + offset, mem.size)))] func_found = False return func_found def end_func(self, jitter): if jitter.vm.is_mapped(getattr(jitter.cpu, self.ira.ret_reg.name), 1): self.replayexception += ["return value might be a pointer"] self.isFuncFound = self.compare_snapshot(jitter) jitter.run = False return False def is_pointer(self, expr): """Return True if expr may be a pointer""" target_types = expr_to_types(self.c_handler, expr) return any(objc_is_dereferenceable(target_type) for target_type in target_types) def is_symbolic(self, expr): return expr.is_mem() and not expr.arg.is_int() def get_arg_n(self, arg_number): """Return the Expression corresponding to the argument number @arg_number""" # TODO use abicls abi_order = ["RDI", "RSI", "RDX", "RCX", "R8", "R9"] size = 64 sp = m2_expr.ExprId("RSP", 64) if arg_number < len(abi_order): return m2_expr.ExprId(abi_order[arg_number], size) else: destack = (arg_number - len(abi_order) + 1) return m2_expr.ExprMem(sp + m2_expr.ExprInt(destack * size / 8, size), size) def callback(self, jitter): # Check previous state # When it is possible, consider only elements modified in the last run # -> speed up to avoid browsing the whole memory to_consider = self.symb.modified_exprs for symbol in to_consider: # Do not consider PC if symbol == self.ira.pc: continue # Read from ... @NN[... argX ...] ... symb_value = self.symb.eval_expr(symbol) to_replace = {} for expr in m2_expr.ExprAff(symbol, symb_value).get_r(mem_read=True): if self.is_symbolic(expr): if isinstance(expr, m2_expr.ExprMem): # Consider each byte individually # Case: @32[X] with only @8[X+1] to replace addr_expr = expr.arg new_expr = [] consider = False for offset in xrange(expr.size/8): sub_expr = m2_expr.ExprMem(self.symb.expr_simp(addr_expr + m2_expr.ExprInt(offset, size=addr_expr.size)), 8) if not self.is_pointer(sub_expr): # Not a PTR, we have to replace with the real value original_sub_expr = sub_expr.replace_expr(self.init_values) new_expr.append(self.symb.eval_expr(original_sub_expr)) consider = True else: new_expr.append(sub_expr) # Rebuild the corresponding expression if consider: assert len(new_expr) == expr.size / 8 to_replace[expr] = m2_expr.ExprCompose(*new_expr) if expr not in self.memories_write: # Do not consider memory already written during the run self.memories_read.add(expr) # Write to @NN[... argX ...] # Must be after Read, case: @[X] = f(@[X]) if self.is_symbolic(symbol): self.memories_write.add(symbol) # Replace with real value for non-pointer symbols if to_replace: symb_value = self.symb.expr_simp(symb_value.replace_expr(to_replace)) if isinstance(symbol, m2_expr.ExprMem): # Replace only in ptr (case to_replace: @[arg] = 8, expr: # @[arg] = @[arg]) symbol = m2_expr.ExprMem(self.symb.expr_simp(symbol.arg.replace_expr(to_replace)), symbol.size) self.symb.apply_change(symbol, symb_value) # Check computed values against real ones # TODO idem memory if (isinstance(symbol, m2_expr.ExprId) and isinstance(symb_value, m2_expr.ExprInt)): if hasattr(jitter.cpu, symbol.name): value = m2_expr.ExprInt(getattr(jitter.cpu, symbol.name), symbol.size) assert value == self.symb.symbols[symbol] cur_addr = jitter.pc self.logger.debug("Current address: %s", hex(cur_addr)) if cur_addr == 0x1337BEEF or cur_addr == self.return_addr: # End reached if self.logger.isEnabledFor(logging.DEBUG): print "In:" for x in self.memories_read: print "\t%s (%s)" % (x, self.c_handler.expr_to_c(x), ) print "Out:" for x in self.memories_write: print "\t%s (%s)" % (x, self.c_handler.expr_to_c(x), ) return True # Update state ## Reset cache structures self.mdis.job_done.clear() self.symb_ir.blocks.clear() ## Update current state asm_block = self.mdis.dis_bloc(cur_addr) irblocks = self.symb_ir.add_bloc(asm_block) self.symb.emul_ir_blocks(cur_addr) return True def prepare_symbexec(self, jitter, return_addr): # Activate callback on each instr jitter.jit.set_options(max_exec_per_call=1, jit_maxline=1) #jitter.jit.log_mn = True #jitter.jit.log_regs = True jitter.exec_cb = self.callback # Disassembler self.mdis = self.machine.dis_engine(bin_stream_vm(jitter.vm), lines_wd=1) # Symbexec engine ## Prepare the symbexec engine self.symb_ir = self.machine.ir() self.symb = EmulatedSymbExecWithModif(jitter.cpu, jitter.vm, self.symb_ir, {}) self.symb.enable_emulated_simplifications() ## Update registers value self.symb.reset_regs() self.symb.update_engine_from_cpu() ## Load the memory as ExprMem self.symb.func_read = None self.symb.func_write = None for base_addr, mem_segment in jitter.vm.get_all_memory().iteritems(): # Split into 8 bytes chunk for get_mem_overlapping for start in xrange(0, mem_segment["size"], 8): expr_mem = m2_expr.ExprMem(m2_expr.ExprInt(base_addr + start, size=64), size=8*min(8, mem_segment["size"] - start)) # Its initialisation, self.symb.apply_change is not necessary self.symb.symbols[expr_mem] = self.symb._func_read(expr_mem) ## Save the initial state self.symbols_init = self.symb.symbols.copy() ## Save the returning address self.return_addr = return_addr # Inject argument self.init_values = {} struct_expr_types = {} self.args_symbols = [] for i, param_name in enumerate(self.prototype.args_order): cur_arg_abi = self.get_arg_n(i) cur_arg = m2_expr.ExprId("arg%d_%s" % (i, param_name), size=cur_arg_abi.size) self.init_values[cur_arg] = self.symb.eval_expr(cur_arg_abi) arg_type = self.prototype.args[param_name] if objc_is_dereferenceable(arg_type): # Convert the argument to symbol to track access based on it self.symb.apply_change(cur_arg_abi, cur_arg) struct_expr_types[cur_arg.name] = arg_type self.args_symbols.append(cur_arg) # Init Expr <-> C conversion # Strict access is deliberately not enforced (example: memcpy(struct)) self.c_handler = CHandler(self.types, struct_expr_types, enforce_strict_access=False) # Init output structures self.memories_read = set() self.memories_write = set() def build_references(self): """At the end of the execution, - Fill memories accesses - Prepare output structures Enrich the snapshot with outputs """ memory_in = {} memory_out = {} # Get the resulting symbolic value # TODO use abi output_value = self.symb.symbols[self.symb.ir_arch.arch.regs.RAX] # Fill memory *out* (written) for expr in self.memories_write: # Eval the expression with the *output* state value = self.symb.eval_expr(expr) memory_out[expr] = value # Fill memory *in* (read) saved_symbols = self.symb.symbols self.symb.symbols = self.symbols_init for expr in self.memories_read: # Eval the expression with the *input* state original_expr = expr.replace_expr(self.init_values) value = self.symb.eval_expr(original_expr) assert isinstance(value, m2_expr.ExprInt) memory_in[expr] = value self.symb.symbols = saved_symbols if self.logger.isEnabledFor(logging.DEBUG): print "In:" print memory_in print "Out:" print memory_out print "Final value:" print output_value self.snapshot.memory_in = AssignBlock(memory_in) self.snapshot.memory_out = AssignBlock(memory_out) self.snapshot.output_value = output_value self.snapshot.c_handler = self.c_handler self.snapshot.arguments_symbols = self.args_symbols self.snapshot.init_values = self.init_values def run(self): '''Main function that is in charge of running the test and return the result: true if the snapshot has recognized the function, false else.''' # TODO inherit from Replay jitter = self.machine.jitter(config.miasm_engine) vm_load_elf(jitter.vm, open(self.filename, "rb").read()) # Init segment jitter.ir_arch.do_stk_segm = True jitter.ir_arch.do_ds_segm = True jitter.ir_arch.do_str_segm = True jitter.ir_arch.do_all_segm = True FS_0_ADDR = 0x7ff70000 jitter.cpu.FS = 0x4 jitter.cpu.set_segm_base(jitter.cpu.FS, FS_0_ADDR) jitter.vm.add_memory_page( FS_0_ADDR + 0x28, PAGE_READ, "\x42\x42\x42\x42\x42\x42\x42\x42", "Stack canary FS[0x28]") # Init the jitter with the snapshot self.use_snapshot(jitter) # Get the return address for our breakpoint return_addr = struct.unpack("P", jitter.vm.get_mem(jitter.cpu.RSP, 0x8))[0] jitter.add_breakpoint(return_addr, self.end_func) # Prepare the execution jitter.init_run(self.learned_addr) self.prepare_symbexec(jitter, return_addr) # Run the execution try: jitter.continue_run() assert jitter.run == False except AssertionError: if jitter.vm.get_exception() & EXCEPT_ACCESS_VIOL: self.replayexception += ["access violation"] elif jitter.vm.get_exception() & EXCEPT_DIV_BY_ZERO: self.replayexception += ["division by zero"] elif jitter.vm.get_exception() & EXCEPT_PRIV_INSN: self.replayexception += ["execution of private instruction"] elif jitter.vm.get_exception(): self.replayexception += ["exception no %i" % (jitter.vm.get_exception())] else: raise self.isFuncFound = False # Rebuild references self.build_references() return self.isFuncFound
with open(args.filename) as fstream: cont = Container.from_stream(fstream) arch = args.architecture if args.architecture else cont.arch machine = Machine(arch) # Check elements elements = set() regs = machine.mn.regs.all_regs_ids_byname for element in args.element: try: elements.add(regs[element.upper()]) except KeyError: raise ValueError("Unknown element '%s'" % element) mdis = machine.dis_engine(cont.bin_stream, dont_dis_nulstart_bloc=True) ir_arch = machine.ira(mdis.symbol_pool) # Disassemble the targeted function blocks = mdis.dis_multibloc(int(args.func_addr, 16)) # Generate IR for block in blocks: ir_arch.add_bloc(block) # Build the IRA Graph ir_arch.gen_graph() # Get the instance dg = DependencyGraph(ir_arch, implicit=args.implicit,
class PEAnalysis(object): """A basic static analysis for binary files that utilises Miasm to recover functions contained in the file. """ def _set_logging(self, verbose): """Setup up an internal logger. Args: verbose (bool): affects log verbosity """ self._logger = getLogger(hex(hash(self))) if verbose: self._logger.setLevel(DEBUG) else: self._logger.setLevel(INFO) log_handler = StreamHandler(stdout) log_handler.setFormatter(Formatter('[%(levelname)s]\t%(message)s')) self._logger.addHandler(log_handler) def __init__(self, filename, verbose=False): """Load binary file. Args: filename (str): path to a file to be analyzed verbose (bool): affects log verbosity """ self._container = Container.from_stream(open(filename)) self.bin_stream = self._container.bin_stream self.entry_point = self._container.entry_point self.machine = Machine(self._container.arch) self.fn = {} self.interval = interval() self.deep = 0 self.offset = 0 self._set_logging(verbose) self._logger.info("PE loaded") def _update_interval(self, block): """Update analyzed interval. Args: block (AsmBlock): extend interval describing used code by offsets from this block """ for line in block.lines: self.interval += interval([(line.offset, line.offset + line.l)]) def process_fn(self, offset): """Use Miasm to explore reachable code and add discovered functions. Args: offset (int): a starting offset, preferably entry point """ if offset in self.fn: return mdis = self.machine.dis_engine(self.bin_stream) self.fn[offset] = mdis.dis_multiblock(offset) self._logger.debug("sub_" + hex(offset)[2:] + " added") map(self._update_interval, self.fn[offset]) for block in self.fn[offset]: instr = block.get_subcall_instr() if not instr: continue for dest in instr.getdstflow(mdis.symbol_pool): if not (isinstance(dest, ExprId) and isinstance(dest.name, AsmLabel)): continue self.process_fn(dest.name.offset) def process_rest(self): """Try to naively explore bytes lying in regions that are not covered by interval. """ for _, right in self.interval.intervals: if right in self.fn: continue self.process_fn(right) def analyze(self, analyze_unreachable=False): """Explore the binary file, try to find code. The search starts at the binary file's declared entry point. Args: analyze_unreachable (bool): analyze code that could not be located during reachable code exploration. """ self.process_fn(self.entry_point) self._logger.info("reachable code analysis done") self.deep = 1 if analyze_unreachable: self.deep = 2 self.process_rest() self._logger.info("unreachable code analysis done")