def register_kernel_call(self): # expect in buffer: the address to call and all dword to push on the stack CCall_IOCTL = x64.MultipleInstr() CCall_IOCTL += x64.Mov('RAX', self.IO_STACK_INPUT_BUFFER_LEN) CCall_IOCTL += x64.Cmp('RAX', 0) CCall_IOCTL += x64.Jz(":FAIL") # Need at least the function to call CCall_IOCTL += x64.Mov('R15', 4 * 8) # Size to pop on the stack at the end (4 * push RDI) CCall_IOCTL += x64.Mov('R10', self.IO_STACK_INPUT_BUFFER) CCall_IOCTL += x64.Label(':PUSH_NEXT_ARG') CCall_IOCTL += x64.Cmp('RAX', (8 * 5)) CCall_IOCTL += x64.Jbe(":SETUP_REG_ARGS") CCall_IOCTL += x64.Sub('RAX', 8) INPUT_BUFFER_NEXT_ARG = x64.create_displacement(base='R10', index='RAX') CCall_IOCTL += x64.Mov('RBX', INPUT_BUFFER_NEXT_ARG) CCall_IOCTL += x64.Push('RBX') CCall_IOCTL += x64.Add('R15', 8) # Add at Size to pop on the stack at the end CCall_IOCTL += x64.Jmp(':PUSH_NEXT_ARG') CCall_IOCTL += x64.Label(":SETUP_REG_ARGS") # Could be done in a loop # But do I really want to generate x86 in a loop.. CCall_IOCTL += x64.Cmp('RAX', (8 * 5)) CCall_IOCTL += x64.Jz(":SETUP_4_ARGS") CCall_IOCTL += x64.Cmp('RAX', (8 * 4)) CCall_IOCTL += x64.Jz(":SETUP_3_ARGS") CCall_IOCTL += x64.Cmp('RAX', (8 * 3)) CCall_IOCTL += x64.Jz(":SETUP_2_ARGS") CCall_IOCTL += x64.Cmp('RAX', (8 * 2)) CCall_IOCTL += x64.Jz(":SETUP_1_ARGS") CCall_IOCTL += x64.Jmp(":SETUP_0_ARGS") CCall_IOCTL += x64.Label(":SETUP_4_ARGS") CCall_IOCTL += x64.Mov('R9', x64.mem('[R10 + 0x20]')) CCall_IOCTL += x64.Label(":SETUP_3_ARGS") CCall_IOCTL += x64.Mov('R8', x64.mem('[R10 + 0x18]')) CCall_IOCTL += x64.Label(":SETUP_2_ARGS") CCall_IOCTL += x64.Mov('RDX', x64.mem('[R10 + 0x10]')) CCall_IOCTL += x64.Label(":SETUP_1_ARGS") CCall_IOCTL += x64.Mov('RCX', x64.mem('[R10 + 8]')) CCall_IOCTL += x64.Label(":SETUP_0_ARGS") CCall_IOCTL += x64.Mov('RAX', x64.mem('[R10]')) # Fix Reserve space (calling convention) CCall_IOCTL += x64.Push('RDI') CCall_IOCTL += x64.Push('RDI') CCall_IOCTL += x64.Push('RDI') CCall_IOCTL += x64.Push('RDI') CCall_IOCTL += x64.Call('RAX') CCall_IOCTL += x64.Mov('RDX', self.IRP_OUTPUT_BUFFER) CCall_IOCTL += x64.Mov(x64.mem('[RDX]'), 'RAX') CCall_IOCTL += x64.Xor('RAX', 'RAX') CCall_IOCTL += x64.Add('RSP', 'R15') CCall_IOCTL += x64.Ret() CCall_IOCTL += x64.Label(":FAIL") CCall_IOCTL += x64.Mov('RAX', 0x0C000000D) CCall_IOCTL += x64.Ret() self.upgrade_driver_add_new_ioctl_handler(DU_KCALL_IOCTL, CCall_IOCTL.get_code())
def NtCreateThreadEx_32_to_64(process, addr, param): NtCreateThreadEx = get_NtCreateThreadEx_syswow_addr() create_thread = x64.MultipleInstr() # Save registers create_thread += x64.Push('RBX') create_thread += x64.Push('RCX') create_thread += x64.Push('RDX') create_thread += x64.Push('RSI') create_thread += x64.Push('RDI') create_thread += x64.Push('R8') create_thread += x64.Push('R9') create_thread += x64.Push('R10') create_thread += x64.Push('R11') create_thread += x64.Push('R12') create_thread += x64.Push('R13') # Setup args create_thread += x64.Push(0) create_thread += x64.Mov('RCX', 'RSP') # Arg1 create_thread += x64.Mov('RDX', 0x1fffff) # Arg2 create_thread += x64.Mov('R8', 0) # Arg3 create_thread += x64.Mov('R9', process.handle) # Arg4 create_thread += x64.Mov('RAX', 0) create_thread += x64.Push('RAX') # Arg11 create_thread += x64.Push('RAX') # Arg10 create_thread += x64.Push('RAX') # Arg9 create_thread += x64.Push('RAX') # Arg8 create_thread += x64.Push('RAX') # Arg7 create_thread += x64.Mov('RAX', param) create_thread += x64.Push('RAX') # Arg6 create_thread += x64.Mov('RAX', addr) create_thread += x64.Push('RAX') # Arg5 # reserve space for register (calling convention) create_thread += x64.Push('R9') create_thread += x64.Push('R8') create_thread += x64.Push('RDX') create_thread += x64.Push('RCX') # Call create_thread += x64.Mov('R13', NtCreateThreadEx) create_thread += x64.Call('R13') # Clean stack create_thread += x64.Add('RSP', 12 * 8) create_thread += x64.Pop('R13') create_thread += x64.Pop('R12') create_thread += x64.Pop('R11') create_thread += x64.Pop('R10') create_thread += x64.Pop('R9') create_thread += x64.Pop('R8') create_thread += x64.Pop('RDI') create_thread += x64.Pop('RSI') create_thread += x64.Pop('RDX') create_thread += x64.Pop('RCX') create_thread += x64.Pop('RBX') return execute_64bits_code_from_syswow(create_thread.get_code())
def _upgrade_driver_inject_base_upgrade(self): kldbgdrv = self.kldbgdrv upgrade = x64.MultipleInstr() # R14 : IOCODE # RSI -> IO_STACK_LOCATION # RDI -> IRP upgrade = x64.MultipleInstr() upgrade += x64.Cmp('R14', self.NORMAL_IO_CODE) upgrade += x64.Jz(self.normal_io_offset - (self.init_driver_offset + len(upgrade.get_code()))) upgrade += x64.Mov('Rax', x64.create_displacement(disp=kldbgdrv + self.HANDLE_ARRAY_ADDR)) upgrade += x64.Label(":LOOP") upgrade += x64.Mov('RCX', x64.create_displacement('RAX')) upgrade += x64.Cmp('R14', 'RCX') upgrade += x64.Jnz(':END') upgrade += x64.Mov('RAX', x64.create_displacement('RAX', disp=8)) upgrade += x64.Call('RAX') upgrade += x64.Mov('RBX', 'RAX') upgrade += x64.JmpAt(kldbgdrv + self.normal_end_offset) upgrade += x64.Label(":END") upgrade += x64.Cmp('RCX', 0) upgrade += x64.Jnz(':NEXT') upgrade += x64.JmpAt(kldbgdrv + self.fail_offset) upgrade += x64.Label(":NEXT") upgrade += x64.Add('RAX', 0x10) upgrade += x64.Jmp(':LOOP') self.kdbg.write_pfv_memory(kldbgdrv + self.init_driver_offset, str(upgrade.get_code())) # Write first array dest self.write_pfv_ptr(kldbgdrv + self.HANDLE_ARRAY_ADDR, kldbgdrv + self.FIRST_ARRAY_ADDR) self.write_pfv_ptr(kldbgdrv + self.FIRST_ARRAY_ADDR, 0) self.write_pfv_ptr(kldbgdrv + self.FIRST_ARRAY_ADDR + 8, 0) # Jump hijack jump_init_function = x64.Jmp(self.init_driver_offset - (self.hijack_offset)) self.kdbg.write_pfv_memory(kldbgdrv + self.hijack_offset, str(jump_init_function.get_code())) self.ioctl_array = kldbgdrv + self.FIRST_ARRAY_ADDR self.ioctl_array_ptr = kldbgdrv + self.HANDLE_ARRAY_ADDR self.next_code_addr = kldbgdrv + self.init_driver_offset + len(upgrade.get_code()) self.is_upgraded = True
def generate_syswow64_call(target, errcheck=None): nb_args = len(target.prototype._argtypes_) target_addr = get_syswow_ntdll_exports()[target.__name__] argument_buffer_len = (nb_args * 8) argument_buffer = windows.current_process.allocator.reserve_size(argument_buffer_len) alignement_information = windows.current_process.allocator.reserve_size(8) nb_args_on_stack = max(nb_args - 4, 0) code_64b = x64.MultipleInstr() # Save registers code_64b += x64.Push('RBX') code_64b += x64.Push('RCX') code_64b += x64.Push('RDX') code_64b += x64.Push('RSI') code_64b += x64.Push('RDI') code_64b += x64.Push('R8') code_64b += x64.Push('R9') code_64b += x64.Push('R10') code_64b += x64.Push('R11') code_64b += x64.Push('R12') code_64b += x64.Push('R13') # Alignment stuff :) code_64b += x64.Mov('RCX', 'RSP') code_64b += x64.And('RCX', 0x0f) code_64b += x64.Mov(x64.deref(alignement_information), 'RCX') code_64b += x64.Sub('RSP', 'RCX') # retrieve argument from the argument buffer if nb_args >= 1: code_64b += x64.Mov('RCX', x64.create_displacement(disp=argument_buffer)) if nb_args >= 2: code_64b += x64.Mov('RDX', x64.create_displacement(disp=argument_buffer + (8 * 1))) if nb_args >= 3: code_64b += x64.Mov('R8', x64.create_displacement(disp=argument_buffer + (8 * 2))) if nb_args >= 4: code_64b += x64.Mov('R9', x64.create_displacement(disp=argument_buffer + (8 * 3))) for i in range(nb_args_on_stack): code_64b += x64.Mov('RAX', x64.create_displacement(disp=argument_buffer + 8 * (nb_args - 1 - i))) code_64b += x64.Push('RAX') # reserve space for register (calling convention) code_64b += x64.Push('R9') code_64b += x64.Push('R8') code_64b += x64.Push('RDX') code_64b += x64.Push('RCX') # Call code_64b += x64.Mov('R13', target_addr) code_64b += x64.Call('R13') # Realign stack :) code_64b += x64.Add('RSP', x64.deref(alignement_information)) # Clean stack code_64b += x64.Add('RSP', (4 + nb_args_on_stack) * 8) code_64b += x64.Pop('R13') code_64b += x64.Pop('R12') code_64b += x64.Pop('R11') code_64b += x64.Pop('R10') code_64b += x64.Pop('R9') code_64b += x64.Pop('R8') code_64b += x64.Pop('RDI') code_64b += x64.Pop('RSI') code_64b += x64.Pop('RDX') code_64b += x64.Pop('RCX') code_64b += x64.Pop('RBX') code_64b += x64.Ret() return try_generate_stub_target(code_64b.get_code(), argument_buffer, target, errcheck=errcheck)
GetProcAddress64 += x64.Jz(":DLL_NOT_FOUND") GetProcAddress64 += x64.Mov( "RCX", x64.mem("[RAX + 80]")) # RCX = NAME (UNICODE_STRING.Buffer) GetProcAddress64 += x64.Call(":FUNC_STRLENW64") GetProcAddress64 += x64.Mov("RDI", "RCX") GetProcAddress64 += x64.Mov("RCX", "RAX") GetProcAddress64 += x64.Mov("RSI", "R11") GetProcAddress64 += x64.Rep + x64.CmpsW( ) #;cmp with current dll name (unicode) GetProcAddress64 += x64.Test("RCX", "RCX") GetProcAddress64 += x64.Jz(":DLL_FOUND") GetProcAddress64 += x64.Mov("RDX", x64.mem("[RDX]")) GetProcAddress64 += x64.Jmp(":a_dest") GetProcAddress64 += x64.Label(":DLL_FOUND") # here rbx = base GetProcAddress64 += x64.Mov("EAX", x64.mem("[RBX + 60]")) # rax = PEBASE RVA GetProcAddress64 += x64.Add("RAX", "RBX") # RAX = PEBASE GetProcAddress64 += x64.Add("RAX", 24) # ;OPTIONAL HEADER GetProcAddress64 += x64.Mov("ECX", x64.mem("[rax + 112]")) # ;rcx = RVA export dir GetProcAddress64 += x64.Add("RCX", "RBX") # ;rcx = export_dir GetProcAddress64 += x64.Mov("RAX", "RCX") # ;RAX = export_dir GetProcAddress64 += x64.Push("RAX") # ;Save it for after function search # ; EBX = BASE | EAX = EXPORT DIR GetProcAddress64 += x64.Mov("ECX", x64.mem("[RAX + 24] ")) GetProcAddress64 += x64.Mov("R13", "RCX") # ;r13 = NB names GetProcAddress64 += x64.Mov("EDX", x64.mem("[RAX + 32] ")) # EDX = names array RVA GetProcAddress64 += x64.Add("RDX", "RBX") # RDX = names array GetProcAddress64 += x64.Xor("RCX", "RCX") GetProcAddress64 += x64.Label(":SEARCH_LOOP") GetProcAddress64 += x64.Cmp("RCX", "R13")