def get_msr(uc: Uc, scratch: int, msr: int) -> int: """ fetch the contents of the given model-specific register (MSR). this will clobber some memory at the given scratch address, as it emits some code. """ # save clobbered registers orax = uc.reg_read(UC_X86_REG_RAX) ordx = uc.reg_read(UC_X86_REG_RDX) orcx = uc.reg_read(UC_X86_REG_RCX) orip = uc.reg_read(UC_X86_REG_RIP) # x86: rdmsr buf = b"\x0f\x32" uc.mem_write(scratch, buf) uc.reg_write(UC_X86_REG_RCX, msr & 0xFFFFFFFF) uc.emu_start(scratch, scratch + len(buf), count=1) eax = uc.reg_read(UC_X86_REG_EAX) edx = uc.reg_read(UC_X86_REG_EDX) # restore clobbered registers uc.reg_write(UC_X86_REG_RAX, orax) uc.reg_write(UC_X86_REG_RDX, ordx) uc.reg_write(UC_X86_REG_RCX, orcx) uc.reg_write(UC_X86_REG_RIP, orip) return (edx << 32) | (eax & 0xFFFFFFFF)
def speculate_instruction(emulator: Uc, address, size, model) -> None: # reached max spec. window? skip if len(model.checkpoints) >= model.nesting: return # decode the instruction code = emulator.mem_read(address, size) flags = emulator.reg_read(UC_X86_REG_EFLAGS) rcx = emulator.reg_read(UC_X86_REG_RCX) target, will_jump, is_loop = X86UnicornCond.decode(code, flags, rcx) # not a a cond. jump? ignore if not target: return # LOOP instructions must also decrement RCX if is_loop: emulator.reg_write(UC_X86_REG_RCX, rcx - 1) # Take a checkpoint next_instr = address + size + target if will_jump else address + size model.checkpoint(emulator, next_instr) # Simulate misprediction if will_jump: emulator.reg_write(UC_X86_REG_RIP, address + size) else: emulator.reg_write(UC_X86_REG_RIP, address + size + target)
def test_multiprocess(self): record = [] def hook(uc, address, size, userdata): record.append(address) uc1 = Uc(UC_ARCH_X86, UC_MODE_32) uc1.hook_add(UC_HOOK_CODE, hook) uc1.mem_map(0, 0x2000) uc2 = Uc(UC_ARCH_X86, UC_MODE_32) uc2.hook_add(UC_HOOK_CODE, hook) uc2.mem_map(0, 0x2000) uc1.emu_start(0x1000, 0x1006) self.assertListEqual(record, [0x1000, 0x1002, 0x1004]) uc2.emu_start(0x1000, 0x1006) self.assertListEqual(record, [0x1000, 0x1002, 0x1004, 0x1000, 0x1002, 0x1004]) uc1.reg_write(UC_X86_REG_EAX, 5) context1 = uc1.context_save() uc2.reg_write(UC_X86_REG_EAX, 6) context2 = uc2.context_save() self.assertEqual(uc1.reg_read(UC_X86_REG_EAX), 5) self.assertEqual(uc2.reg_read(UC_X86_REG_EAX), 6) uc2.context_restore(context1) uc1.context_restore(context2) self.assertEqual(uc1.reg_read(UC_X86_REG_EAX), 6) self.assertEqual(uc2.reg_read(UC_X86_REG_EAX), 5)
def hook_code(uc: unicorn.Uc, address, size, user_data): inst_code = uc.mem_read(address, size) for inst in cs.disasm(inst_code, size): # 判断是否保存有上次的指令,有的话,则先打印上次的指令,并且查询上次的第一个寄存器的新数值 if globalData.has_pre and globalData.pre_regname: regindex = reg_names[globalData.pre_regname.upper()] regvalue = uc.reg_read(regindex) globalData.pre_codestr += "\t//%s=0x%x" % (globalData.pre_regname, regvalue) print(globalData.pre_codestr) globalData.pre_codestr = "" globalData.has_pre = False # 监控我关心的内存空间,如果发生变动会再打印 if len(globalData.watch_addrs) > 0: for i, v in globalData.watch_addrs.items(): idata = uc.mem_read(i, 0x10) buf = binascii.b2a_hex(idata) hexstr = buf.decode(encoding="utf-8") if globalData.watch_addrs[i] == hexstr: continue globalData.watch_addrs[i] = hexstr print("0x%x\t%s" % (i, hexstr)) # 拼接当前行的汇编指令 opstr = "0x%x:\t%s\t%s" % (address, inst.mnemonic, inst.op_str) # 从当前行指令中匹配出所有的寄存器 res = re.findall(r'[^0]([wx][0-9]+)', " " + inst.op_str, re.I | re.M) # 如果有多个寄存器,取第一个为数值被改变的寄存器 if len(res) > 0: globalData.pre_regname = res[0] res = list(set(res)) # 如果有sp寄存器,则单独插入 if "sp" in inst.op_str: res.append("sp") # 如果没有寄存器,则不需要记录为上次的,直接打印即可 if len(res) <= 0: has_pre = False print(opstr) continue # 记录数据为上次的指令 fenge = "\t\t------" curreg = "" for regname in res: regindex = reg_names[regname.upper()] regvalue = uc.reg_read(regindex) curreg += "%s=0x%x\t" % (regname, regvalue) globalData.pre_codestr = opstr + fenge + curreg globalData.has_pre = True
def run(): print("Start emulate ARM...") try: # 创建虚拟机 mu = Uc(UC_ARCH_ARM, UC_MODE_THUMB) # 分配内存 ADDRESS = 0x10000 mu.mem_map(ADDRESS, 0x1000) mu.mem_write(ADDRESS, ARM_CODE) # 写寄存器 mu.reg_write(UC_ARM_REG_R0, 0x1234) mu.reg_write(UC_ARM_REG_R2, 0x6789) mu.reg_write(UC_ARM_REG_R3, 0x3333) # Hook 代码 mu.hook_add(UC_HOOK_CODE, hook_code, None, ADDRESS, ADDRESS + 0x1000) # 启动虚拟机 mu.emu_start(ADDRESS, ADDRESS + len(ARM_CODE)) # 获取结果 r0 = mu.reg_read(UC_ARM_REG_R0) r1 = mu.reg_read(UC_ARM_REG_R1) print(f">>> R0 = 0x{r0:x}") print(f">>> R1 = 0x{r1:x}") except UcError as e: print(f"Emulate error: {e}")
def hook_intr(self, uc: Uc, intno, user_data): # self.debug_addr(uc.reg_read(UC_ARM_REG_PC) - 40, 40) if intno == 2: swi = from_bytes(uc.mem_read(uc.reg_read(UC_ARM_REG_PC) - 2, 1)) r0 = uc.reg_read(UC_ARM_REG_R0) r1 = uc.reg_read(UC_ARM_REG_R1) r2 = uc.reg_read(UC_ARM_REG_R2) r3 = uc.reg_read(UC_ARM_REG_R3) if swi == 0: print("done?") print(intno, swi, ":", uc.reg_read(UC_ARM_REG_R0), uc.reg_read(UC_ARM_REG_R1), uc.reg_read(UC_ARM_REG_R2), uc.reg_read(UC_ARM_REG_R3)) uc.reg_write(UC_ARM_REG_R0, 16) uc.reg_write(UC_ARM_REG_R1, 32) uc.reg_write(UC_ARM_REG_R2, 48) uc.reg_write(UC_ARM_REG_R3, 64) elif swi == 1: # TODO: address and size vaild required? buffer = uc.mem_read(r0, r1).decode('utf-8', 'replace') if self.state.write_to_stdout: print("API_REQ", buffer) self.api_response("hello") self.uc.emu_stop() else: self.has_error = True self.uc.emu_stop()
def syscall_exit_hook( uc: Uc, user_data: Tuple[List[int], Callable[[int], None]]) -> None: """ Syscalls rarely happen, so we use them as speedy-ish hook hack for additional exits. """ exits, abort_func = user_data address = uc.reg_read(UC_X86_REG_RIP) print("Run over at {0:x}".format(address)) if address in exits: # print("Run over at {0:x}".format(address)) uc.emu_stop() abort_func(0) return # could add other hooks here print("No handler for syscall insn at {0:x}".format(address))
def place_input(ucf: Unicorefuzz, uc: Uc, input: bytes) -> None: """ Places the input in memory and alters the input. This is an example for sk_buff in openvsswitch """ if len(input) < 1500: import os os._exit(0) rdx = uc.reg_read(UC_X86_REG_RDX) # struct sk_buff* skb ucf.map_page(uc, rdx) # ensure sk_buf is mapped data_ptr = struct.unpack("<Q", uc.mem_read(rdx + 0xD0, 8))[0] ucf.map_page(uc, data_ptr) # ensure the buffer is mapped uc.mem_write(data_ptr, input) # insert afl input
def set_msr(uc: Uc, scratch: int, msr: int, val: int) -> None: """ set the given model-specific register (MSR) to the given value. this will clobber some memory at the given scratch address, as it emits some code. """ # save clobbered registers orax = uc.reg_read(UC_X86_REG_RAX) ordx = uc.reg_read(UC_X86_REG_RDX) orcx = uc.reg_read(UC_X86_REG_RCX) orip = uc.reg_read(UC_X86_REG_RIP) # x86: wrmsr uc.mem_write(scratch, INSN_WRMSR) uc.reg_write(UC_X86_REG_RAX, val & 0xFFFFFFFF) uc.reg_write(UC_X86_REG_RDX, (val >> 32) & 0xFFFFFFFF) uc.reg_write(UC_X86_REG_RCX, msr & 0xFFFFFFFF) uc.emu_start(scratch, scratch + len(INSN_WRMSR), count=1) # restore clobbered registers uc.reg_write(UC_X86_REG_RAX, orax) uc.reg_write(UC_X86_REG_RDX, ordx) uc.reg_write(UC_X86_REG_RCX, orcx) uc.reg_write(UC_X86_REG_RIP, orip)
def place_input_skb(ucf: Unicorefuzz, uc: Uc, input: bytes) -> None: """ Places the input in memory and alters the input. This is an example for sk_buff in openvsswitch """ if len(input) > 1500: import os os._exit(0) # too big! # read input to the correct position at param rdx here: rdx = uc.reg_read(UC_X86_REG_RDX) rdi = uc.reg_read(UC_X86_REG_RDI) ucf.map_page(uc, rdx) # ensure sk_buf is mapped bufferPtr = struct.unpack("<Q", uc.mem_read(rdx + 0xD8, 8))[0] ucf.map_page(uc, bufferPtr) # ensure the buffer is mapped uc.mem_write(rdi, input) # insert afl input uc.mem_write(rdx + 0xC4, b"\xdc\x05") # fix tail
def hook_instr(mu: Uc, address, size, user_data): # BL sha1sum # if address == 0x0369F6: # mu.reg_write(UC_ARM_REG_PC, address + size) # if address == 0x369F8: # d = input() # if d: # dump_hex_buf(mu, 0x200000D8, 128) if address >= 0x3642A and address <= 0x3658A: # input() pass if address >= 0x0369E0 and address <= 0x36A00: print(">>> Tracing instruction at 0x%X, instruction size = 0x%X" % (address, size)) R0 = mu.reg_read(UC_ARM_REG_R0) R1 = mu.reg_read(UC_ARM_REG_R1) R2 = mu.reg_read(UC_ARM_REG_R2) R3 = mu.reg_read(UC_ARM_REG_R3) R4 = mu.reg_read(UC_ARM_REG_R4) PC = mu.reg_read(UC_ARM_REG_PC) print(f"R0: {R0:08X} R1: {R1:08X} R2: {R2:08X} " f"R3: {R3:08X} R4: {R4:08X} PC: {PC:08X}") mem = mu.mem_read(address, size) for i in cs.disasm(mem, address): print(f" 0x{i.address:08X}:\t{i.mnemonic}\t{i.op_str}") # if i.mnemonic == "blx": # print(f"[-] Skipping call to {R1:08X}") # mu.reg_write(UC_ARM_REG_PC, address + size + 1) print() # dump_hex_buf(mu, R0, R1) mu.last_instr = ( f">>> Tracing instruction at 0x{address:08X}, instruction size = 0x{size:X}\n" ) R0 = mu.reg_read(UC_ARM_REG_R0) R1 = mu.reg_read(UC_ARM_REG_R1) R2 = mu.reg_read(UC_ARM_REG_R2) R3 = mu.reg_read(UC_ARM_REG_R3) R4 = mu.reg_read(UC_ARM_REG_R4) PC = mu.reg_read(UC_ARM_REG_PC) mu.last_instr += (f"R0: {R0:08X} R1: {R1:08X} R2: {R2:08X} " f"R3: {R3:08X} R4: {R4:08X} PC: {PC:08X}\n") # branch = False mem = mu.mem_read(address, size) for i in cs.disasm(mem, address): mu.last_instr += f" 0x{i.address:08X}:\t{i.mnemonic}\t{i.op_str}\n"
def test_x86_64_syscall(self): print("Emulate x86_64 code with 'syscall' instruction") ADDRESS = 0x1000000 X86_CODE64_SYSCALL = b"\x0f\x05" # SYSCALL # Initialize emulator in X86-64bit mode mu = Uc(UC_ARCH_X86, UC_MODE_64) # map 2MB memory for this emulation mu.mem_map(ADDRESS, 2 * 1024 * 1024) # write machine code to be emulated to memory mu.mem_write(ADDRESS, X86_CODE64_SYSCALL) def hook_syscall(mu, user_data): rax = mu.reg_read(UC_X86_REG_RAX) if rax == 0x100: mu.reg_write(UC_X86_REG_RAX, 0x200) else: print("ERROR: was not expecting rax=%d in syscall" % rax) # hook interrupts for syscall mu.hook_add(UC_HOOK_INSN, hook_syscall, None, 1, 0, UC_X86_INS_SYSCALL) # syscall handler is expecting rax=0x100 mu.reg_write(UC_X86_REG_RAX, 0x100) try: # emulate machine code in infinite time mu.emu_start(ADDRESS, ADDRESS + len(X86_CODE64_SYSCALL)) except UcError as e: print("ERROR: %s" % e) # now print out some registers print(">>> Emulation done. Below is the CPU context") rax = mu.reg_read(UC_X86_REG_RAX) print(">>> RAX = 0x%x" % rax)
class Emulator: """ :type mu Uc :type modules Modules """ def __init__(self, vfs_root: str = None, vfp_inst_set: bool = False): # Unicorn. self.mu = Uc(UC_ARCH_ARM, UC_MODE_ARM) if vfp_inst_set: self._enable_vfp() # Android self.system_properties = {"libc.debug.malloc.options": ""} # Stack. self.mu.mem_map(STACK_ADDR, STACK_SIZE) self.mu.reg_write(UC_ARM_REG_SP, STACK_ADDR + STACK_SIZE) # Executable data. self.modules = Modules(self) self.memory_manager = MemoryManager(self.mu) # CPU self.interrupt_handler = InterruptHandler(self.mu) self.syscall_handler = SyscallHandlers(self.interrupt_handler) self.syscall_hooks = SyscallHooks(self.mu, self.syscall_handler, self.modules) self.syscall_hooks_memory = SyscallHooksMemory(self.mu, self.memory_manager, self.syscall_handler) # File System if vfs_root is not None: self.vfs = VirtualFileSystem(vfs_root, self.syscall_handler) else: self.vfs = None # Hooker self.mu.mem_map(HOOK_MEMORY_BASE, HOOK_MEMORY_SIZE) self.hooker = Hooker(self, HOOK_MEMORY_BASE, HOOK_MEMORY_SIZE) # JavaVM self.java_classloader = JavaClassLoader() self.java_vm = JavaVM(self, self.java_classloader, self.hooker) # Native self.native_hooks = NativeHooks(self, self.memory_manager, self.modules, self.hooker) # Tracer self.tracer = Tracer(self.mu, self.modules) # Thread. self._setup_thread_register() # https://github.com/unicorn-engine/unicorn/blob/8c6cbe3f3cabed57b23b721c29f937dd5baafc90/tests/regress/arm_fp_vfp_disabled.py#L15 def _enable_vfp(self): # MRC p15, #0, r1, c1, c0, #2 # ORR r1, r1, #(0xf << 20) # MCR p15, #0, r1, c1, c0, #2 # MOV r1, #0 # MCR p15, #0, r1, c7, c5, #4 # MOV r0,#0x40000000 # FMXR FPEXC, r0 code = '11EE501F' code += '41F47001' code += '01EE501F' code += '4FF00001' code += '07EE951F' code += '4FF08040' code += 'E8EE100A' # vpush {d8} code += '2ded028b' address = 0x1000 mem_size = 0x1000 code_bytes = bytes.fromhex(code) try: self.mu.mem_map(address, mem_size) self.mu.mem_write(address, code_bytes) self.mu.reg_write(UC_ARM_REG_SP, address + mem_size) self.mu.emu_start(address | 1, address + len(code_bytes)) finally: self.mu.mem_unmap(address, mem_size) def _setup_thread_register(self): """ Set up thread register. This is currently not accurate and just filled with garbage to ensure the emulator does not crash. https://developer.arm.com/documentation/ddi0211/k/system-control-coprocessor/system-control-coprocessor-register-descriptions/c13--thread-and-process-id-registers """ thread_info_size = 64 thread_info = self.memory_manager.allocate(thread_info_size * 5) thread_info_1 = thread_info + (thread_info_size * 0) thread_info_2 = thread_info + (thread_info_size * 1) thread_info_3 = thread_info + (thread_info_size * 2) thread_info_4 = thread_info + (thread_info_size * 3) thread_info_5 = thread_info + (thread_info_size * 4) # Thread name write_utf8(self.mu, thread_info_5, "AndroidNativeEmu") # R4 self.mu.mem_write(thread_info_2 + 0x4, int(thread_info_5).to_bytes(4, byteorder='little')) self.mu.mem_write(thread_info_2 + 0xC, int(thread_info_3).to_bytes(4, byteorder='little')) # R1 self.mu.mem_write(thread_info_1 + 0x4, int(thread_info_4).to_bytes(4, byteorder='little')) self.mu.mem_write(thread_info_1 + 0xC, int(thread_info_2).to_bytes(4, byteorder='little')) self.mu.reg_write(UC_ARM_REG_C13_C0_3, thread_info_1) def load_library(self, filename, do_init=True): libmod = self.modules.load_module(filename) if do_init: logger.debug("Calling init for: %s " % filename) for fun_ptr in libmod.init_array: logger.debug("Calling Init function: %x " % fun_ptr) self.call_native(fun_ptr, 0, 0, 0) return libmod def call_symbol(self, module, symbol_name, *argv, is_return_jobject=True): symbol = module.find_symbol(symbol_name) if symbol is None: logger.error('Unable to find symbol \'%s\' in module \'%s\'.' % (symbol_name, module.filename)) return return self.call_native(symbol.address, *argv, is_return_jobject=is_return_jobject) def call_native(self, addr, *argv, is_return_jobject=True): # Detect JNI call is_jni = False if len(argv) >= 1: is_jni = argv[0] == self.java_vm.address_ptr or argv[0] == self.java_vm.jni_env.address_ptr # TODO: Write JNI args to local ref table if jni. try: # Execute native call. self.mu.reg_write(UC_ARM_REG_SP, STACK_ADDR + STACK_SIZE) native_write_args(self, *argv) stop_pos = randint(HOOK_MEMORY_BASE, HOOK_MEMORY_BASE + HOOK_MEMORY_SIZE) | 1 self.mu.reg_write(UC_ARM_REG_LR, stop_pos) self.mu.emu_start(addr, stop_pos - 1) # Read result from locals if jni. if is_jni and is_return_jobject: result_idx = self.mu.reg_read(UC_ARM_REG_R0) result = self.java_vm.jni_env.get_local_reference(result_idx) if result is None: return result return result.value else: return self.mu.reg_read(UC_ARM_REG_R0) finally: # Clear locals if jni. if is_jni: self.java_vm.jni_env.clear_locals() def dump(self, out_dir): os.makedirs(out_dir) for begin, end, prot in [reg for reg in self.mu.mem_regions()]: filename = "{:#010x}-{:#010x}.bin".format(begin, end) pathname = os.path.join(out_dir, filename) with open(pathname, "w") as f: f.write(hexdump.hexdump(self.mu.mem_read(begin, end - begin), result='return'))
class Emulator: """ :type mu Uc :type modules Modules :type memory Memory """ def __init__(self, vfs_root=None, vfp_inst_set=False): # Unicorn. self.mu = Uc(UC_ARCH_ARM, UC_MODE_ARM) if vfp_inst_set: self._enable_vfp() # Android self.system_properties = {"libc.debug.malloc.options": ""} # Stack. self.mu.mem_map(config.STACK_ADDR, config.STACK_SIZE) self.mu.reg_write(UC_ARM_REG_SP, config.STACK_ADDR + config.STACK_SIZE) # Executable data. self.modules = Modules(self) self.memory = Memory(self) # CPU self.interrupt_handler = InterruptHandler(self.mu) self.syscall_handler = SyscallHandlers(self.interrupt_handler) self.syscall_hooks = SyscallHooks(self.mu, self.syscall_handler) # File System if vfs_root is not None: self.vfs = VirtualFileSystem(vfs_root, self.syscall_handler) else: self.vfs = None # Hooker self.mu.mem_map(config.HOOK_MEMORY_BASE, config.HOOK_MEMORY_SIZE) self.hooker = Hooker(self, config.HOOK_MEMORY_BASE, config.HOOK_MEMORY_SIZE) # JavaVM self.java_classloader = JavaClassLoader() self.java_vm = JavaVM(self, self.java_classloader, self.hooker) # Native self.native_memory = NativeMemory(self.mu, config.HEAP_BASE, config.HEAP_SIZE, self.syscall_handler) self.native_hooks = NativeHooks(self, self.native_memory, self.modules, self.hooker) # Tracer self.tracer = Tracer(self.mu, self.modules) # https://github.com/unicorn-engine/unicorn/blob/8c6cbe3f3cabed57b23b721c29f937dd5baafc90/tests/regress/arm_fp_vfp_disabled.py#L15 def _enable_vfp(self): # MRC p15, #0, r1, c1, c0, #2 # ORR r1, r1, #(0xf << 20) # MCR p15, #0, r1, c1, c0, #2 # MOV r1, #0 # MCR p15, #0, r1, c7, c5, #4 # MOV r0,#0x40000000 # FMXR FPEXC, r0 code = '11EE501F' code += '41F47001' code += '01EE501F' code += '4FF00001' code += '07EE951F' code += '4FF08040' code += 'E8EE100A' # vpush {d8} code += '2ded028b' address = 0x1000 mem_size = 0x1000 code_bytes = bytes.fromhex(code) try: self.mu.mem_map(address, mem_size) self.mu.mem_write(address, code_bytes) self.mu.reg_write(UC_ARM_REG_SP, address + mem_size) self.mu.emu_start(address | 1, address + len(code_bytes)) finally: self.mu.mem_unmap(address, mem_size) def _call_init_array(self): pass def load_library(self, filename, do_init=True): libmod = self.modules.load_module(filename) if do_init: logger.debug("Calling Init for: %s " % filename) for fun_ptr in libmod.init_array: logger.debug("Calling Init function: %x " % fun_ptr) self.call_native(fun_ptr) return libmod def call_symbol(self, module, symbol_name, *argv): symbol = module.find_symbol(symbol_name) if symbol is None: logger.error('Unable to find symbol \'%s\' in module \'%s\'.' % (symbol_name, module.filename)) return self.call_native(symbol.address, *argv) def call_native(self, addr, *argv): # Detect JNI call is_jni = False if len(argv) >= 1: is_jni = argv[0] == self.java_vm.address_ptr or argv[ 0] == self.java_vm.jni_env.address_ptr # TODO: Write JNI args to local ref table if jni. try: # Execute native call. native_write_args(self, *argv) stop_pos = randint(HOOK_MEMORY_BASE, HOOK_MEMORY_BASE + HOOK_MEMORY_SIZE) | 1 self.mu.reg_write(UC_ARM_REG_LR, stop_pos) self.mu.emu_start(addr, stop_pos - 1) # Read result from locals if jni. if is_jni: result_idx = self.mu.reg_read(UC_ARM_REG_R0) result = self.java_vm.jni_env.get_local_reference(result_idx) if result is None: return result return result.value finally: # Clear locals if jni. if is_jni: self.java_vm.jni_env.clear_locals() def dump(self, out_dir): os.makedirs(out_dir) for begin, end, prot in [reg for reg in self.mu.mem_regions()]: filename = "{:#010x}-{:#010x}.bin".format(begin, end) pathname = os.path.join(out_dir, filename) with open(pathname, "w") as f: f.write( hexdump.hexdump(self.mu.mem_read(begin, end - begin), result='return'))
ARM_BYTECODE, _ = ks.asm(ARM_CODE) # convert the array of integers into bytes ARM_BYTECODE = bytes(ARM_BYTECODE) print(f"Code successfully assembled (length = {len(ARM_BYTECODE)})") print("ARM bytecode:", ARM_BYTECODE) except KsError as e: print("Keystone Error: %s" % e) exit(1) # memory address where emulation starts ADDRESS = 0x1000000 print("Emulating the ARM code") try: # Initialize emulator in ARM mode mu = Uc(UC_ARCH_ARM, UC_MODE_ARM) # map 2MB memory for this emulation mu.mem_map(ADDRESS, 2 * 1024 * 1024) # write machine code to be emulated to memory mu.mem_write(ADDRESS, ARM_BYTECODE) # Set the r0 register in the code, let's calculate factorial(5) mu.reg_write(UC_ARM_REG_R0, 5) # emulate code in infinite time and unlimited instructions mu.emu_start(ADDRESS, ADDRESS + len(ARM_BYTECODE)) # now print out the R0 register print("Emulation done. Below is the result") # retrieve the result from the R1 register r1 = mu.reg_read(UC_ARM_REG_R1) print(">> R1 = %u" % r1) except UcError as e: print("Unicorn Error: %s" % e)
class Emulator: """ :type mu Uc :type modules Modules :type memory Memory """ def __init__(self, vfs_root=None, vfp_inst_set=False): # Unicorn. self.mu = Uc(UC_ARCH_ARM, UC_MODE_ARM) if vfp_inst_set: self._enable_vfp() # Stack. self.mu.mem_map(config.STACK_ADDR, config.STACK_SIZE) self.mu.reg_write(UC_ARM_REG_SP, config.STACK_ADDR + config.STACK_SIZE) # Executable data. self.modules = Modules(self) self.memory = Memory(self) # CPU self.interrupt_handler = InterruptHandler(self.mu) self.syscall_handler = SyscallHandlers(self.interrupt_handler) self.syscall_hooks = SyscallHooks(self.mu, self.syscall_handler) # File System if vfs_root is not None: self.vfs = VirtualFileSystem(vfs_root, self.syscall_handler) else: self.vfs = None # Hooker self.mu.mem_map(config.MEMORY_BASE, config.MEMORY_SIZE) self.hooker = Hooker(self, config.MEMORY_BASE, config.MEMORY_SIZE) # JavaVM self.java_classloader = JavaClassLoader() self.java_vm = JavaVM(self.java_classloader, self.hooker) # Native self.native_memory = NativeMemory(self.mu, config.MEMORY_DYN_BASE, config.MEMORY_DYN_SIZE, self.syscall_handler) self.native_hooks = NativeHooks(self.native_memory, self.modules, self.hooker) # https://github.com/unicorn-engine/unicorn/blob/8c6cbe3f3cabed57b23b721c29f937dd5baafc90/tests/regress/arm_fp_vfp_disabled.py#L15 def _enable_vfp(self): # MRC p15, #0, r1, c1, c0, #2 # ORR r1, r1, #(0xf << 20) # MCR p15, #0, r1, c1, c0, #2 # MOV r1, #0 # MCR p15, #0, r1, c7, c5, #4 # MOV r0,#0x40000000 # FMXR FPEXC, r0 code = '11EE501F' code += '41F47001' code += '01EE501F' code += '4FF00001' code += '07EE951F' code += '4FF08040' code += 'E8EE100A' # vpush {d8} code += '2ded028b' address = 0x1000 mem_size = 0x1000 code_bytes = bytes.fromhex(code) try: self.mu.mem_map(address, mem_size) self.mu.mem_write(address, code_bytes) self.mu.reg_write(UC_ARM_REG_SP, address + mem_size) self.mu.emu_start(address | 1, address + len(code_bytes)) finally: self.mu.mem_unmap(address, mem_size) def load_library(self, filename): return self.modules.load_module(filename) def call_symbol(self, module, symbol_name, *argv): symbol = module.find_symbol(symbol_name) if symbol is None: logger.error('Unable to find symbol \'%s\' in module \'%s\'.' % (symbol_name, module.filename)) return self.call_native(symbol.address, *argv) def call_native(self, addr, *argv): # Detect JNI call is_jni = False if len(argv) >= 1: is_jni = argv[0] == self.java_vm.address_ptr or argv[ 0] == self.java_vm.jni_env.address_ptr # TODO: Write JNI args to local ref table if jni. try: # Execute native call. native_write_args(self.mu, *argv) stop_pos = randint(MEMORY_BASE, MEMORY_BASE + MEMORY_SIZE) | 1 self.mu.reg_write(UC_ARM_REG_LR, stop_pos) self.mu.emu_start(addr, stop_pos - 1) # Read result from locals if jni. if is_jni: result_idx = self.mu.reg_read(UC_ARM_REG_R0) result = self.java_vm.jni_env.get_local_reference(result_idx) if result is None: return result return result.value finally: # Clear locals if jni. if is_jni: self.java_vm.jni_env.clear_locals()
def fuzzing_end_point(mu: Uc, emu_env: EmulatorEnv, fct_start: int, input_regs: Dict[int, RegisterInput]) -> bool: """ Performs fuzzing of the memory locations to reach a valid function end point. :param mu: unicorn instance. :param emu_env: emulation environment. :param fct_start: function start address. :param input_regs: input registers. :return: returns True if a valid function end point was reached. """ if emu_env.debug_disable_fuzzing_end_point: return False fuzzing_last_end_addrs = set() fuzzing_curr_mem_locations_list = list() fuzzing_next_mem_locations_list = list() last_end_addr = mu.reg_read(UC_X86_REG_RIP) emu_env.fuzzing_start_time = int(time.time()) emu_env.fuzzing_is_timeout = False emu_env.fuzzing_used_end_point = True do_fuzzing = True while do_fuzzing: do_fuzzing = False # Generate fuzzing rules. old_number_mem_objs = len(fuzzing_curr_mem_locations_list) fuzzing_curr_mem_locations_list = fuzzing_generate_mem_locations(emu_env, fuzzing_next_mem_locations_list) fuzzing_rounds = fuzzing_generate_rounds(fuzzing_curr_mem_locations_list) # Start fuzzing the actual function with the provided rules. emu_env.runtime_memory_changes = set() for fuzzing_round in fuzzing_rounds: # Check if we run our fuzzing approach for too long. curr_time = int(time.time()) if (curr_time - emu_env.fuzzing_start_time) > emu_env.fuzzing_timeout: print("Fuzzing timeout limit reached. Stopping it.") emu_env.fuzzing_is_timeout = True return False for i in range(len(fuzzing_round)): fuzz_mem_obj = fuzzing_curr_mem_locations_list[i] fuzz_rule_type = fuzzing_round[i] fuzz_mem_obj.fuzz_type = fuzz_rule_type emu_env.runtime_memory_changes.add(fuzz_mem_obj) emu_success = emulate_function(mu, emu_env, fct_start, input_regs) fuzzing_next_mem_locations_list = fuzzing_generate_mem_locations(emu_env, fuzzing_next_mem_locations_list) # Continue fuzzing if emulation was not successful. if not emu_success: continue # Stop fuzzing rounds if we ended up in an instruction we did not fuzz before. last_end_addr = mu.reg_read(UC_X86_REG_RIP) if last_end_addr not in fuzzing_last_end_addrs: break # Check if the current memory configuration ended in a valid end instruction. if emu_env.fct_ends.end_valid(last_end_addr): return True # If we do not have reached this end address before or we have found new memory read locations # we did not have seen before start fuzzing. if (last_end_addr not in fuzzing_last_end_addrs or len(fuzzing_curr_mem_locations_list) > old_number_mem_objs): fuzzing_last_end_addrs.add(last_end_addr) do_fuzzing = True return False
class emu: """ Loads ELF file to unicorn, sets watchpoints and stdin """ def __init__(self, fname, stdin, watchpoints=[], drcov=True, emulator_base=None, fw_entry_symbol="cont"): self.stdin = stdin self.exception = "" self.uc = Uc(UC_ARCH_ARM, UC_MODE_ARM) self.fname = fname self.fd = open(fname, "rb") self.elf = elffile.ELFFile(self.fd) self.symbols = {} self.symbols_reverse = {} for i in range(self.elf.num_sections()): sec = self.elf.get_section(i) if sec.name == ".symtab": for sym in sec.iter_symbols(): self.symbols[sym.name] = sym.entry["st_value"] self.symbols_reverse[sym.entry["st_value"]] = sym.name self.results = [] self.result_id = 0 self.coverage_pc = set() self.coverage_bb = set() self.read = set() self.write = set() self.trace_initialized = False self.coverage_activity = {} self.read_activity = {} self.write_activity = {} self.stdout = "" self.stderr = "" self.emulator_base_start = None self.emulator_base_stop = None if fw_entry_symbol in self.symbols: self.fw_entry = self.symbols[fw_entry_symbol] # ignore everything until that symbol else: self.fw_entry = None #loading prog headrs self.state = [] self.segments = [] for i in range(self.elf.num_sections()): section = self.elf.get_section(i) if section.header["sh_flags"] & SH_FLAGS.SHF_ALLOC != 0: addr = section.header["sh_addr"] size = section.header["sh_size"] name = section.name #NOBITS sections contains no data in file #Will be initialized with zero if section.header["sh_type"] == "SHT_NOBITS": data = b"\x00" * size else: data = section.data() print("Found %s @ 0x%x - 0x%x (%d bytes)" % (name, addr, addr+len(data), len(data))) if emulator_base == addr: self.emulator_base_start = emulator_base self.emulator_base_stop = emulator_base + size self.segments += [(name, addr, size)] self.state += [(addr, size, data)] #compute memory map from sections self.maps = [] if self.emulator_base_start is not None: self.maps += [(self.emulator_base_start, self.emulator_base_stop)] self.segments = sorted(self.segments, key=lambda x:x[0]) for name, addr, size in self.segments: size += addr & 0x3ff addr = addr & (~0x3ff) altered = False for i in range(len(self.maps)): map_addr, map_size = self.maps[i] offset = addr - map_addr if addr >= map_addr and addr <= map_addr + map_size: self.maps[i] = (map_addr, self.pageresize(max(map_size, offset+size))) altered = True if not altered: self.maps += [(addr, self.pageresize(size))] for addr, size in self.maps: print("Mapping 0x%x - 0x%x (%d bytes)" % (addr, addr+size, size)) self.uc.mem_map(addr, size, UC_PROT_ALL) for addr,size,data in self.state: print("Loading 0x%x - 0x%x (%d bytes)" % (addr, addr+len(data), len(data))) self.uc.mem_write(addr, data) #stack stack = 0xdead0000 stack_size = 16384 print("Mapping Stack 0x%x - 0x%x (%d bytes)" % (stack, stack+stack_size, stack_size)) self.uc.mem_map(stack, stack_size, UC_PROT_ALL) self.uc.reg_write(arm_const.UC_ARM_REG_SP, stack + stack_size) #syscalls self.uc.hook_add(UC_HOOK_INTR, self.hook_intr, self) #tracing self.watchpoints = watchpoints self.uc.hook_add(UC_HOOK_CODE, self.hook_code, self) self.uc.hook_add(UC_HOOK_MEM_READ | UC_HOOK_MEM_WRITE, self.hook_mem_access, self) #prepare drcov file self.drcov = drcov if drcov: self.uc.hook_add(UC_HOOK_BLOCK, self.hook_bb, self) def pageresize(self, s, pagesize=1024): if s % pagesize == 0: return s return (int(s / pagesize) + 1) * pagesize """ We need to emulate read and write for emulation """ @staticmethod def hook_intr(uc, size, self): #print hex(uc.reg_read(arm_const.UC_ARM_REG_PC)) pc = uc.reg_read(arm_const.UC_ARM_REG_PC) for name in ["read","write"]: if self.symbols[name] <= pc and self.symbols[name] + 8 >= pc: #print name if name == "read": fd = uc.reg_read(arm_const.UC_ARM_REG_R0) target = uc.reg_read(arm_const.UC_ARM_REG_R1) size = uc.reg_read(arm_const.UC_ARM_REG_R2) data = self.stdin[:size] self.stdin = self.stdin[size:] uc.mem_write(target, data) self.uc.reg_write(arm_const.UC_ARM_REG_R0, len(data)) elif name == "write": fd = uc.reg_read(arm_const.UC_ARM_REG_R0) target = uc.reg_read(arm_const.UC_ARM_REG_R1) size = uc.reg_read(arm_const.UC_ARM_REG_R2) data = uc.mem_read(target, size) if fd == 1: self.stdout += data.decode("utf-8") sys.stdout.write(data.decode("utf-8")) else: self.stderr += data.decode("utf-8") sys.stderr.write(data.decode("utf-8")) else: print("unknown intr") """ Implement memory and code watchpoints """ @staticmethod def hook_bb(uc, address, size, self): if self.emulator_base_start is not None: if address >= self.emulator_base_start and address < self.emulator_base_stop: return #print(hex(address)) self.coverage_bb.add((address, size)) @staticmethod def hook_code(uc, address, size, self): # Unicorn will for some reason giv old register values after a crash # The last update seems to be on the entry of the bb self.regs = {} self.regs["r0"] = self.uc.reg_read(arm_const.UC_ARM_REG_R0) self.regs["r1"] = self.uc.reg_read(arm_const.UC_ARM_REG_R1) self.regs["r2"] = self.uc.reg_read(arm_const.UC_ARM_REG_R2) self.regs["r3"] = self.uc.reg_read(arm_const.UC_ARM_REG_R3) self.regs["r4"] = self.uc.reg_read(arm_const.UC_ARM_REG_R4) self.regs["r5"] = self.uc.reg_read(arm_const.UC_ARM_REG_R5) self.regs["r6"] = self.uc.reg_read(arm_const.UC_ARM_REG_R6) self.regs["r7"] = self.uc.reg_read(arm_const.UC_ARM_REG_R7) self.regs["r8"] = self.uc.reg_read(arm_const.UC_ARM_REG_R8) self.regs["r9"] = self.uc.reg_read(arm_const.UC_ARM_REG_R9) self.regs["r10"] = self.uc.reg_read(arm_const.UC_ARM_REG_R10) self.regs["r11"] = self.uc.reg_read(arm_const.UC_ARM_REG_R11) self.regs["r12"] = self.uc.reg_read(arm_const.UC_ARM_REG_R12) self.regs["sp"] = self.uc.reg_read(arm_const.UC_ARM_REG_R13) self.regs["lr"] = self.uc.reg_read(arm_const.UC_ARM_REG_R14) self.regs["pc"] = self.uc.reg_read(arm_const.UC_ARM_REG_R15) if self.fw_entry is not None and address & 0xfffffffe == self.fw_entry & 0xfffffffe: self.trace_init_state() if self.fw_entry is None and not self.trace_initialized: self.trace_init_state() if self.emulator_base_start is not None: if address >= self.emulator_base_start and address < self.emulator_base_stop: return self.coverage_pc.add(address) if address in self.coverage_activity: self.coverage_activity[address] += 1 else: self.coverage_activity[address] = 1 if address in self.watchpoints or address^1 in self.watchpoints: self.trace_state_change("Execute") @staticmethod def hook_mem_access(uc, access, address, size, value, self): pc = self.uc.reg_read(arm_const.UC_ARM_REG_R15) if self.emulator_base_start is not None: if pc >= self.emulator_base_start and pc < self.emulator_base_stop: return if access == UC_MEM_WRITE: self.write.add((pc, address, value)) if address in self.write_activity: self.write_activity[address] += 1 else: self.write_activity[address] = 1 else: self.read.add((pc, address)) if address in self.read_activity: self.read_activity[address] += 1 else: self.read_activity[address] = 1 if address in self.watchpoints: if access == UC_MEM_WRITE: self.trace_state_change("Write 0x%x" % address) else: self.trace_state_change("Read 0x%x" % address) """ For each tracepoint that was hit Dump Registers Do Memory Dump """ def trace_init_state(self): self.state = [] self.trace_initialized = True for name, addr, size in self.segments: data = self.uc.mem_read(addr, size) #data = list(map(chr, data)) self.state += [(addr, size, data)] """ Called if a tracepoint is hit Will save registers and analyzes changes made im memory """ def trace_state_change(self, reason): print(reason) new_state = [] memdiff = [] for addr, size, data in self.state: new_data = self.uc.mem_read(addr, size) #new_data = list(map(chr, new_data)) if data != new_data: new = old = "" for i in range(len(data)): if data[i] != new_data[i]: old += "%02x" % data[i] new += "%02x" % new_data[i] elif new != "": memdiff += [(i+addr-len(new), old, new)] new = old = "" new_state += [(addr, size, new_data)] #XXX memdif_rendered = self.render_mem_diff() sys.stderr.write(self.stderr) sys.stderr.write("\n"+memdif_rendered+"\n") self.state = new_state # disassemble current instruction try: pc = self.regs["pc"] md = capstone.Cs(capstone.CS_ARCH_ARM, capstone.CS_MODE_THUMB) instr = list(md.disasm(self.uc.mem_read(pc, 4), pc))[0] instr = instr.mnemonic + " " + instr.op_str except: import traceback; traceback.print_exc() instr = hexlify(self.uc.mem_read(pc, 4)) # Save tracepoint object tp = {} tp["reason"] = reason tp["regs"] = self.regs tp["instr"] = instr tp["memdiff"] = memdiff tp["memdif_rendered"] = memdif_rendered tp["stdout"] = self.stdout tp["stderr"] = self.stderr tp["resid"] = self.result_id self.results += [tp] self.stdout = "" self.stderr = "" self.result_id += 1 return [{"regs": self.regs, "memdiff": sorted(memdiff)}] def render_mem_diff(self, block_size=32): ret = "----------" + ("-"*(3*block_size+1)) + "\n" ret += " |\n" print_dots = False for addr, size, data in self.state: new_data = self.uc.mem_read(addr, size) #new_data = list(map(chr, new_data)) current_offset = 0 #print(len(data), len(new_data), size) #for each hexdump row while current_offset < size: old_row = data[current_offset: current_offset+block_size] new_row = new_data[current_offset: current_offset+block_size] #ugly equal comparison equal = True for x,y in zip(new_row, old_row): equal = equal and (x==y) if not equal: hex_new = "%8x | " % (addr + current_offset) hex_old = " | " symbols = "" #render diff for i in range(min(block_size, len(new_row))): if new_row[i] == old_row[i]: hex_new += "%02x " % new_row[i] hex_old += " " else: hex_new += "\033[;32m%02x\033[;00m " % new_row[i] hex_old += "\033[;31m%02x\033[;00m " % old_row[i] if (addr + current_offset + i) in self.watchpoints: symbols += " | " if len("Watchpoint") < 3*i - 1: symbols += " " * (3*i - len("Watchpoint") - 1) symbols += "\033[;33mWatchpoint ^^\033[;00m\n" else: symbols += " " * i symbols += "\033[;33m^^ Watchpoint\033[;00m\n" elif (addr + current_offset + i) in self.symbols_reverse: name = self.symbols_reverse[addr + current_offset + i] symbols += " | " if len(name) < 3*i - 1: symbols += " " * (3*i - len(name) - 1) symbols += "%s ^^\n" % name else: symbols += " " * i symbols += "^^ %s\n" % name ret += hex_new + "\n" + hex_old + "\n" if len(symbols) > 1: ret += symbols print_dots = True else: if print_dots: print_dots = False ret += " |\n" ret += " |" + ("-"*(3*block_size+1)) + "\n" ret += " |\n" current_offset += block_size #cleanup end split = ret.split("\n") if len(split) <= 3: return "" ret = "\n".join(split[:-3]) + "\n" ret += "----------" + ("-"*(3*block_size+1)) + "\n" return ret """ Run the Emulation """ def run(self, timeout=300): try: print("running until exit @ 0x%x" % self.symbols["exit"]) self.uc.emu_start(self.elf.header.e_entry, self.symbols["exit"], timeout=timeout*UC_SECOND_SCALE) self.trace_state_change("Exit") except KeyboardInterrupt: sys.exit(1) except Exception as e: self.exception = str(e) print(e) import traceback; traceback.print_exc() print(hex(self.uc.reg_read(arm_const.UC_ARM_REG_PC))) self.trace_state_change(str(e)) # Seems to be broken n lighthouse def get_drcov(self): drcov = b"DRCOV VERSION: 2\nDRCOV FLAVOR: drcov\n" drcov += b"Module Table: version 2, count %d\n" % len(self.state) drcov += b"Columns: id, base, end, entry, path\n" for i in range(len(self.state)): addr, size, _ = self.state[i] drcov += b"%d, 0x%x, 0x%x, 0x%x, %s\n" % (i, addr, addr+size+1, addr, os.path.basename(self.fname).encode()) drcov += b"BB Table: %d bbs\n" % len(self.coverage_bb) bb_table = b"" for address, size in self.coverage_bb: for module_id in range(len(self.state)): base_addr, module_size, _ = self.state[module_id] if address >= base_addr and address <= base_addr + module_size: bb_table += struct.pack("<Ihh", address - base_addr, size, module_id) break return drcov + bb_table def get_tracefile(self): trace = "" for address in self.coverage_pc: trace += "0x%x\n" % address return trace.encode()
# memory address where emulation starts ADDRESS = 0x1000000 try: # Initialize the disassembler in x86 mode md = Cs(CS_ARCH_X86, CS_MODE_64) # iterate over each instruction and print it for instruction in md.disasm(X86_MACHINE_CODE, 0x1000): print("0x%x:\t%s\t%s" % (instruction.address, instruction.mnemonic, instruction.op_str)) except CsError as e: print("Capstone Error: %s" % e) try: # Initialize emulator in x86_64 mode mu = Uc(UC_ARCH_X86, UC_MODE_64) # map 2MB memory for this emulation mu.mem_map(ADDRESS, 2 * 1024 * 1024) # write machine code to be emulated to memory mu.mem_write(ADDRESS, X86_MACHINE_CODE) # Set the r0 register in the code to the number of 7 mu.reg_write(UC_X86_REG_RDI, 7) # emulate code in infinite time & unlimited instructions mu.emu_start(ADDRESS, ADDRESS + len(X86_MACHINE_CODE)) # now print out the R0 register print("Emulation done. Below is the result") rax = mu.reg_read(UC_X86_REG_RAX) print(">>> RAX = %u" % rax) except UcError as e: print("Unicorn Error: %s" % e)
class CPU: def __init__(self, firmware: Firmware = None, state: CpuState = None, verbose=0, init=True): self.firmware = firmware self.uc = Uc(UC_ARCH_ARM, UC_MODE_THUMB) self.cs = Cs(CS_ARCH_ARM, CS_MODE_THUMB) self.cs.detail = True self.state = state self.has_error = None self.last_addr = None self.ready = False self.context = None self.verbose = verbose if init: self.init() def init(self): if self.firmware: self.firmware.refresh() self.state.verify() self.init_memory() self.init_hook() self.init_firmware() self.context = self.uc.context_save() self.reset() self.ready = True def init_firmware(self): if not self.firmware: raise Exception("firmware missing error") addr = MemoryMap.FLASH.address self.uc.mem_write(addr, self.firmware.buffer) def reset(self): addr = MemoryMap.FLASH.address self.uc.context_restore(self.context) self.uc.reg_write(UC_ARM_REG_PC, from_bytes(self.uc.mem_read(addr + 4, 4))) def run(self): if not self.ready: raise Exception("init() does not called") INST_SIZE = 2 if self.firmware: self.last_func = self.firmware.text_map[self.uc.reg_read( UC_ARM_REG_PC)] if self.verbose >= 2: print(self.last_func) try: while self.step(): pass except UcError as e: print("ERROR:", e) addr = self.uc.reg_read(UC_ARM_REG_PC) self.debug_addr(addr - INST_SIZE * 8 - 2, count=7) print(">", end=" ") self.debug_addr(addr) self.debug_addr(addr + INST_SIZE, count=7) for reg in REGS: uc_value = self.uc.reg_read(reg) print(REGS_NAME[reg].ljust(5), hex32(uc_value), sep='\t') raise def step(self, count=None): addr = self.uc.reg_read(UC_ARM_REG_PC) cycle = self.state.cycle if count is not None: self.state.cycle = count try: self.uc.emu_start(addr | 1, MemoryMap.FLASH.address_until, 0, self.state.cycle) finally: if count is not None: self.state.cycle = cycle if self.has_error: raise UcError(0) return True def init_memory(self): for region in MemoryMap: # type: MemoryRegion self.uc.mem_map(region.address, region.size, region.uc_mode) def init_hook(self): peripheral = MemoryMap.PERIPHERAL self.uc.hook_add( UC_HOOK_MEM_READ, self.hook_peripheral_read, None, peripheral.address, peripheral.address_until, ) self.uc.hook_add(UC_HOOK_MEM_WRITE, self.hook_peripheral_write, None, peripheral.address, peripheral.address_until) self.uc.hook_add( UC_HOOK_MEM_READ_UNMAPPED | UC_HOOK_MEM_WRITE_UNMAPPED, self.hook_unmapped) self.uc.hook_add( UC_HOOK_INTR, self.hook_intr, ) if self.verbose >= 2: self.uc.hook_add(UC_HOOK_CODE, self.hook_inst) def hook_intr(self, uc: Uc, intno, user_data): # self.debug_addr(uc.reg_read(UC_ARM_REG_PC) - 40, 40) if intno == 2: swi = from_bytes(uc.mem_read(uc.reg_read(UC_ARM_REG_PC) - 2, 1)) r0 = uc.reg_read(UC_ARM_REG_R0) r1 = uc.reg_read(UC_ARM_REG_R1) r2 = uc.reg_read(UC_ARM_REG_R2) r3 = uc.reg_read(UC_ARM_REG_R3) if swi == 0: print("done?") print(intno, swi, ":", uc.reg_read(UC_ARM_REG_R0), uc.reg_read(UC_ARM_REG_R1), uc.reg_read(UC_ARM_REG_R2), uc.reg_read(UC_ARM_REG_R3)) uc.reg_write(UC_ARM_REG_R0, 16) uc.reg_write(UC_ARM_REG_R1, 32) uc.reg_write(UC_ARM_REG_R2, 48) uc.reg_write(UC_ARM_REG_R3, 64) elif swi == 1: # TODO: address and size vaild required? buffer = uc.mem_read(r0, r1).decode('utf-8', 'replace') if self.state.write_to_stdout: print("API_REQ", buffer) self.api_response("hello") self.uc.emu_stop() else: self.has_error = True self.uc.emu_stop() def api_response(self, *args): bufs = json.dumps(args) buf = bufs.encode("utf-8") if self.state.write_to_stdout: print("API_RES", buf) self.uc.mem_write(MemoryMap.SYSCALL_BUFFER.address, buf) self.uc.mem_write(MemoryMap.SYSCALL_BUFFER.address + len(buf), b'\0') self.uc.reg_write(UC_ARM_REG_R0, MemoryMap.SYSCALL_BUFFER.address) self.uc.reg_write(UC_ARM_REG_R1, len(buf)) def hook_peripheral_read(self, uc: Uc, access, address, size, value, data): if address == PeripheralAddress.OP_CON_RAM_SIZE: uc.mem_write(address, to_bytes(self.state.ram_size)) elif address == PeripheralAddress.OP_IO_RXR: if self.state.input_buffer: uc.mem_write(address, to_bytes(self.state.input_buffer.pop(0))) else: uc.mem_write(address, to_bytes(0)) elif address == PeripheralAddress.OP_RTC_TICKS_MS: pass # uc.mem_write(address, to_bytes(int((time.time() - self.state.epoch) * 1000))) else: if self.verbose >= 1: print("read", access, hex(address), size, value, data) def hook_peripheral_write(self, uc: Uc, access, address, size, value, data): if address == PeripheralAddress.OP_CON_PENDING: if self.verbose >= 1: print("OPENPYTHON_CONTROLLER_PENDING", value) elif address == PeripheralAddress.OP_CON_EXCEPTION: if self.verbose >= 1: print("OPENPYTHON_CONTROLLER_EXCEPTION", value) elif address == PeripheralAddress.OP_CON_INTR_CHAR: if self.verbose >= 1: print("OPENPYTHON_CONTROLLER_INTR_CHAR", value) elif address == PeripheralAddress.OP_IO_TXR: self.state.output_storage.append(value) if self.state.write_to_stdout: print(chr(value), end="") sys.stdout.flush() else: if self.verbose >= 1: print("write", access, hex(address), size, value, data) def hook_unmapped(self, uc: Uc, access, address, size, value, data): print("unmapped:", access, hex(address), size, value, data) uc.emu_stop() self.has_error = True def hook_inst(self, uc: Uc, address, size, data): func = None if self.firmware: func = self.firmware.text_map[address] if func in HELPER_FUNCTIONS: return if self.last_func != func: self.last_func = func print("#inst", hex(address), func) self.last_addr = address def report_memory(self): total_size = 0 for mem_start, mem_end, perm in self.uc.mem_regions(): total_size += mem_end - mem_start print("memory:", hex(mem_start), hex(mem_end - mem_start), perm) print("memory total:", total_size / 1024, "kb") INST_SIZE = 2 def debug_addr(self, addr, count=1, *, end="\n"): INST_SIZE = 4 try: for inst in self.cs.disasm( self.uc.mem_read(addr, INST_SIZE * count), addr, count): # type: CsInsn if self.firmware: print(self.firmware.text_map[inst.address], end=" ") print(hex(inst.address), hex(from_bytes(inst.bytes)), inst.mnemonic, inst.op_str, end=end) except UcError as exc: if exc.errno == UC_ERR_READ_UNMAPPED: print("fail to read memory", hex(addr)) def debug_addr_bin(self, addr, count=1): INST_SIZE = 4 try: for inst in self.cs.disasm( self.uc.mem_read(addr, INST_SIZE * count), addr, count): # type: CsInsn if self.firmware: print(self.firmware.text_map[inst.address], end=" ") if len(inst.bytes) != 2: raise Exception( f"len(inst) != 2; {inst.bytes} => {inst.mnemonic} {inst.op_str}" ) bcode = bin(from_bytes(inst.bytes))[2:].zfill(16) print(hex(inst.address), bcode[0:4], bcode[4:8], bcode[8:12], bcode[12:16], inst.mnemonic, inst.op_str) except UcError as exc: if exc.errno == UC_ERR_READ_UNMAPPED: print("fail to read memory", hex(addr))
def uc_get_pc(uc: Uc, arch: Architecture) -> int: """ Gets the current program counter from a unicorn instance """ # noinspection PyUnresolvedReferences return uc.reg_read(uc_reg_const(arch, arch.pc_name))
def place_input(ucf: Unicorefuzz, uc: Uc, input: bytes) -> None: rax = uc.reg_read(UC_X86_REG_RAX) # make sure the parameter memory is mapped ucf.map_page(uc, rax) uc.mem_write(rax, input) # insert afl input
stack_size = 0x10000 * 3 stack_top = stack_base + stack_size - 0x4 mu.mem_map(stack_base, stack_size) mu.reg_write(UC_ARM_REG_SP, stack_top) # 分配数据内存 data_base = 0xF0000 data_size = 0x10000 * 3 mu.mem_map(data_base, data_size) mu.mem_write(data_base, a1) mu.reg_write(UC_ARM_REG_R0, data_base) # 修复 Got 表 mu.mem_write(image_base + 0x1EDB0, b"\xD9\x98\x00\x00") # 设置 Hook mu.hook_add(UC_HOOK_CODE, hook_code, None) mu.hook_add(UC_HOOK_MEM_UNMAPPED, hook_memory, None) # 设置需要 Run 的函数地址 func_start = image_base + 0x9B68 + 0x1 func_end = image_base + 0x9C2C try: mu.emu_start(func_start, func_end) r2 = mu.reg_read(UC_ARM_REG_R2) result = mu.mem_read(r2, 16) print(result.hex()) except UcError as e: print(f"UC run error {e}")
class ConcreteUnicornEmulator: """ Helper class to emulate instructions in bulk via Unicorn. --- The regular Unicorn Emulator is used as a fallback for emulating single instructions that don't have their own implementations in Manticore. This Emulator is instead intended to completely replace Manticore's executor when operating on purely concrete data. To use the emulator, register a callback for the will_run event that calls `state.cpu.emulate_until` with an address at which it should switch back from Unicorn to Manticore. Passing 0 will result in the entire target being executed concretely. As a result of the concrete data requirement, this emulator is good for preloading concrete state, but typically should not be used once symbolic data is introduced. At time of writing, if you try emulate under Unicorn up until the point where symbolic data is introduced, switch to Manticore, fork states, then switch back, it *definitely* won't work. Only supports X86_64 for now. """ def __init__(self, cpu): self._cpu = cpu self._mem_delta = {} self.flag_registers = {"CF", "PF", "AF", "ZF", "SF", "IF", "DF", "OF"} self.write_backs_disabled = False self._stop_at = None # Holds key of range (addr, addr + size) and value of permissions # Key doesn't include permissions because unmap doesn't care about permissions self.already_mapped: IntervalTree = IntervalTree() cpu.subscribe("did_write_memory", self.write_back_memory) cpu.subscribe("did_write_register", self.write_back_register) cpu.subscribe("did_set_descriptor", self.update_segment) cpu.subscribe("did_map_memory", self.map_memory_callback) cpu.subscribe("did_unmap_memory", self.unmap_memory_callback) cpu.subscribe("did_protect_memory", self.protect_memory_callback) if self._cpu.arch == CS_ARCH_X86: self._uc_arch = UC_ARCH_X86 self._uc_mode = { CS_MODE_32: UC_MODE_32, CS_MODE_64: UC_MODE_64 }[self._cpu.mode] else: raise NotImplementedError( f"Unsupported architecture: {self._cpu.arch}") self.reset() self._emu.hook_add(UC_HOOK_MEM_READ_UNMAPPED, self._hook_unmapped) self._emu.hook_add(UC_HOOK_MEM_WRITE_UNMAPPED, self._hook_unmapped) self._emu.hook_add(UC_HOOK_MEM_FETCH_UNMAPPED, self._hook_unmapped) self._emu.hook_add(UC_HOOK_MEM_WRITE, self._hook_write_mem) self._emu.hook_add(UC_HOOK_INTR, self._interrupt) self._emu.hook_add(UC_HOOK_INSN, self._hook_syscall, arg1=UC_X86_INS_SYSCALL) self.registers = set(self._cpu.canonical_registers) # The last 8 canonical registers of x86 are individual flags; replace with the eflags self.registers -= self.flag_registers self.registers.add("EFLAGS") self.load_state_from_manticore() def reset(self): self._emu = Uc(self._uc_arch, self._uc_mode) self._to_raise = None def copy_memory(self, address: int, size: int): """ Copy the bytes from address to address+size into Unicorn Used primarily for copying memory maps :param address: start of buffer to copy :param size: How many bytes to copy """ start_time = time.time() map_bytes = self._cpu._raw_read(address, size, force=True) self._emu.mem_write(address, map_bytes) if time.time() - start_time > 3: logger.info( f"Copying {hr_size(size)} map at {address:#x} took {time.time() - start_time} seconds" ) def load_state_from_manticore(self) -> None: for reg in self.registers: val = self._cpu.read_register(reg) if issymbolic(val): from ..native.cpu.abstractcpu import ConcretizeRegister raise ConcretizeRegister(self._cpu, reg, "Concretizing for emulation.", policy="ONE") if reg in {"FS", "GS"}: if reg == "FS" and val in self._cpu._segments: base, limit, perms = self._cpu._segments[val] self.update_segment(val, base, limit, perms) continue logger.debug("Writing {val} into {reg}") self.msr_write(reg, val) continue logger.debug("Writing {val} into {reg}") self._emu.reg_write(self._to_unicorn_id(reg), val) for m in self._cpu.memory.maps: self.map_memory_callback(m.start, len(m), m.perms, m.name, 0, m.start) def map_memory_callback(self, address: int, size: int, perms: str, name: str, offset: int, result: int) -> None: """ Catches did_map_memory and copies the mapping into Manticore """ begin = address end = address + size perms_value = convert_permissions(perms) # Check for exact match # Overlap match if (Interval(begin, end, perms_value) not in self.already_mapped and not self.already_mapped.overlaps(begin, end) and not self.already_mapped.envelop(begin, end)): logger.info(" ".join(( "Mapping Memory @", hex(address), ":", hex(address + size), hr_size(size), "-", perms, "-", f"{name}:{offset:#x}" if name else "", "->", hex(result), ))) self._emu.mem_map(begin, size, perms_value) self.already_mapped[begin:end] = perms_value logger.debug(" ".join(( "Copying Memory @", hex(address), hr_size(size), "-", perms, "-", f"{name}:{offset:#x}" if name else "", "->", hex(result), ))) self.copy_memory(address, size) self.protect_memory_callback(address, size, perms) def unmap_memory_callback(self, start, size): """Unmap Unicorn maps when Manticore unmaps them""" # Need this check because our memory events are leaky to internal implementation details end = start + size parent_map = self.already_mapped.overlap(start, end) # Only unmap whole original maps if (len(parent_map) == 1 and list(parent_map)[0].begin == start and list(parent_map)[0].end == end): mask = (1 << 12) - 1 if (start & mask) != 0: logger.error("Memory to be unmapped is not aligned to a page") if (size & mask) != 0: size = ((size >> 12) + 1) << 12 logger.warning("Forcing unmap size to align to a page") logger.info(f"Unmapping memory from {start:#x} to {start+size:#x}") self._emu.mem_unmap(start, size) self.already_mapped.remove_overlap(start, start + size) else: logger.debug( f"Not unmapping because bounds ({start:#x} - {start+size:#x}) are enveloped in existing map:" ) logger.debug(f"\tParent map(s) {parent_map}") def protect_memory_callback(self, start, size, perms): """ Set memory protections in Unicorn correctly """ logger.debug( f"Changing permissions on {start:#x}:{start+size:#x} to '{perms}'") self._emu.mem_protect(start, size, convert_permissions(perms)) def get_unicorn_pc(self): """Get the program counter from Unicorn regardless of architecture. Legacy method, since this module only works on x86.""" if self._cpu.arch == CS_ARCH_ARM: return self._emu.reg_read(UC_ARM_REG_R15) elif self._cpu.arch == CS_ARCH_X86: if self._cpu.mode == CS_MODE_32: return self._emu.reg_read(UC_X86_REG_EIP) elif self._cpu.mode == CS_MODE_64: return self._emu.reg_read(UC_X86_REG_RIP) def _hook_syscall(self, uc, data): """ Unicorn hook that transfers control to Manticore so it can execute the syscall """ logger.debug( f"Stopping emulation at {uc.reg_read(self._to_unicorn_id('RIP')):#x} to perform syscall" ) self.sync_unicorn_to_manticore() from ..native.cpu.abstractcpu import Syscall self._to_raise = Syscall() uc.emu_stop() def _hook_write_mem(self, uc, _access, address: int, size: int, value: int, _data) -> bool: """ Captures memory written by Unicorn """ self._mem_delta[address] = (value, size) return True def _hook_unmapped(self, uc, access, address, size, value, _data) -> bool: """ We hit an unmapped region; map it into unicorn. """ try: self.sync_unicorn_to_manticore() logger.warning( f"Encountered an operation on unmapped memory at {address:#x}") m = self._cpu.memory.map_containing(address) self.copy_memory(m.start, m.end - m.start) except MemoryException as e: logger.error( f"Failed to map memory {address:#x}-{address+size:#x}, ({access}): {e}" ) self._to_raise = e self._should_try_again = False return False self._should_try_again = True return False def _interrupt(self, uc, number: int, _data) -> bool: """ Handle software interrupt (SVC/INT) """ logger.info(f"Caught interrupt: {number}") from ..native.cpu.abstractcpu import Interruption # prevent circular imports self._to_raise = Interruption(number) return True def _to_unicorn_id(self, reg_name: str) -> int: if self._cpu.arch == CS_ARCH_ARM: return globals()["UC_ARM_REG_" + reg_name] elif self._cpu.arch == CS_ARCH_X86: # TODO(yan): This needs to handle AF register custom_mapping = { "PC": "RIP", "STACK": "RSP", "FRAME": "RBP", "FS_BASE": "FS_BASE" } try: return globals()["UC_X86_REG_" + custom_mapping.get(reg_name, reg_name)] except KeyError: logger.error("Can't find register UC_X86_REG_%s", str(reg_name)) raise else: # TODO(yan): raise a more appropriate exception raise TypeError def emulate(self, instruction) -> None: """ Wrapper that runs the _step function in a loop while handling exceptions """ # The emulation might restart if Unicorn needs to bring in a memory map # or bring a value from Manticore state. while True: # Try emulation self._should_try_again = False self._to_raise = None self._step(instruction) if not self._should_try_again: break def _step(self, instruction, chunksize: int = 0) -> None: """ Execute a chunk fo instructions starting from instruction :param instruction: Where to start :param chunksize: max number of instructions to execute. Defaults to infinite. """ try: pc = self._cpu.PC m = self._cpu.memory.map_containing(pc) if self._stop_at: logger.info(f"Emulating from {pc:#x} to {self._stop_at:#x}") self._emu.emu_start(pc, m.end if not self._stop_at else self._stop_at, count=chunksize) except UcError: # We request re-execution by signaling error; if we we didn't set # _should_try_again, it was likely an actual error if not self._should_try_again: raise if self._should_try_again: return # self.sync_unicorn_to_manticore() self._cpu.PC = self.get_unicorn_pc() if self._cpu.PC == self._stop_at: logger.info( "Reached emulation target, switching to Manticore mode") self.sync_unicorn_to_manticore() self._stop_at = None self.write_backs_disabled = True # Raise the exception from a hook that Unicorn would have eaten if self._to_raise: from ..native.cpu.abstractcpu import Syscall if type(self._to_raise) is Syscall: # NOTE: raises Syscall within sem_SYSCALL # NOTE: Need to call syscall semantic function due to # @instruction around SYSCALL self._cpu.sem_SYSCALL() logger.info(f"Raising {self._to_raise}") raise self._to_raise logger.info(f"Exiting Unicorn Mode at {self._cpu.PC:#x}") return def sync_unicorn_to_manticore(self): """ Copy registers and written memory back into Manticore """ self.write_backs_disabled = True for reg in self.registers: val = self._emu.reg_read(self._to_unicorn_id(reg)) self._cpu.write_register(reg, val) if len(self._mem_delta) > 0: logger.debug( f"Syncing {len(self._mem_delta)} writes back into Manticore") for location in self._mem_delta: value, size = self._mem_delta[location] self._cpu.write_int(location, value, size * 8) self.write_backs_disabled = False self._mem_delta = {} def write_back_memory(self, where, expr, size): """ Copy memory writes from Manticore back into Unicorn in real-time """ if self.write_backs_disabled: return if type(expr) is bytes: self._emu.mem_write(where, expr) else: if issymbolic(expr): data = [ Operators.CHR(Operators.EXTRACT(expr, offset, 8)) for offset in range(0, size, 8) ] concrete_data = [] for c in data: if issymbolic(c): c = chr(SelectedSolver.instance().get_value( self._cpu.memory.constraints, c)) concrete_data.append(c) data = concrete_data else: data = [ Operators.CHR(Operators.EXTRACT(expr, offset, 8)) for offset in range(0, size, 8) ] logger.debug( f"Writing back {hr_size(size // 8)} to {hex(where)}: {data}") # TODO - the extra encoding is to handle null bytes output as strings when we concretize. That's probably a bug. self._emu.mem_write( where, b"".join( b.encode("utf-8") if type(b) is str else b for b in data)) def write_back_register(self, reg, val): """ Sync register state from Manticore -> Unicorn""" if self.write_backs_disabled: return if issymbolic(val): logger.warning("Skipping Symbolic write-back") return if reg in self.flag_registers: self._emu.reg_write(self._to_unicorn_id("EFLAGS"), self._cpu.read_register("EFLAGS")) return self._emu.reg_write(self._to_unicorn_id(reg), val) def update_segment(self, selector, base, size, perms): """ Only useful for setting FS right now. """ logger.debug("Updating selector %s to 0x%02x (%s bytes) (%s)", selector, base, size, perms) self.write_back_register("FS", selector) self.write_back_register("FS_BASE", base) self.msr_write("FS", base) def msr_write(self, reg, data): """ set the hidden descriptor-register fields to the given address. This enables referencing the fs segment on x86-64. https://wiki.osdev.org/SWAPGS """ magic = {"FS": 0xC0000100, "GS": 0xC0000101} return self._emu.msr_write(magic[reg], data)