def sc_64_AllocRWX(address, rwx_qword): dll = "KERNEL32.DLL\x00".encode("utf-16-le") api = "VirtualAlloc\x00" AllocRWX64_sc = x64.MultipleInstr() map(AllocRWX64_sc.__iadd__, [ shellcraft.amd64.pushstr(dll), x64.Mov("RCX", "RSP"), shellcraft.amd64.pushstr(api), x64.Mov("RDX", "RSP"), x64.Call(":FUNC_GETPROCADDRESS64"), x64.Mov("R10", "RAX"), x64.Mov("RCX", address), x64.Mov("RDX", 0x1000), x64.Mov("R8", MEM_COMMIT | MEM_RESERVE), x64.Mov("R9", PAGE_EXECUTE_READWRITE), x64.Sub("RSP", 0x30), x64.And("RSP", -32), x64.Call("R10"), x64.Mov('RAX', rwx_qword), x64.Mov("RCX", address), x64.Mov(x64.mem('[RCX]'), 'RAX'), x64.Call("RCX"), windows.native_exec.nativeutils.GetProcAddress64, ]) return AllocRWX64_sc.get_code()
def sc_64_LoadLibrary(dll_path): dll = bytes("KERNEL32.DLL\x00".encode("utf-16-le")) api = b"LoadLibraryA\x00" if PY3 and isinstance(dll_path, str): dll_path = bytes(dll_path.encode()) LoadLibrary64_sc = x64.MultipleInstr() LoadLibrary64_sc += shellcraft.amd64.pushstr(dll) LoadLibrary64_sc += x64.Mov("RCX", "RSP") LoadLibrary64_sc += shellcraft.amd64.pushstr(api) LoadLibrary64_sc += x64.Mov("RDX", "RSP") LoadLibrary64_sc += x64.Call(":FUNC_GETPROCADDRESS64") LoadLibrary64_sc += x64.Mov("R10", "RAX") LoadLibrary64_sc += shellcraft.amd64.pushstr(dll_path) LoadLibrary64_sc += x64.Mov("RCX", "RSP") LoadLibrary64_sc += x64.Sub("RSP", 0x30) LoadLibrary64_sc += x64.And("RSP", -32) LoadLibrary64_sc += x64.Call("R10") LoadLibrary64_sc += x64.Label(":HERE") LoadLibrary64_sc += x64.Jmp(":HERE") LoadLibrary64_sc += windows.native_exec.nativeutils.GetProcAddress64 return LoadLibrary64_sc.get_code()
def sc_64_WinExec(exe): dll = bytes("KERNEL32.DLL\x00".encode("utf-16-le")) api = b"WinExec\x00" if PY3 and isinstance(exe, str): exe = bytes(exe.encode()) WinExec64_sc = x64.MultipleInstr() WinExec64_sc += shellcraft.amd64.pushstr(dll) WinExec64_sc += x64.Mov("RCX", "RSP") WinExec64_sc += shellcraft.amd64.pushstr(api) WinExec64_sc += x64.Mov("RDX", "RSP") WinExec64_sc += x64.Call(":FUNC_GETPROCADDRESS64") WinExec64_sc += x64.Mov("R10", "RAX") WinExec64_sc += shellcraft.amd64.pushstr(exe) WinExec64_sc += x64.Mov("RCX", "RSP") WinExec64_sc += x64.Sub("RSP", 0x30) WinExec64_sc += x64.And("RSP", -32) WinExec64_sc += x64.Call("R10") WinExec64_sc += x64.Label(":HERE") WinExec64_sc += x64.Jmp(":HERE") WinExec64_sc += windows.native_exec.nativeutils.GetProcAddress64 # Dirty infinite loop #WinExec64_sc +=# x64.Ret(), return WinExec64_sc.get_code()
def perform_manual_getproc_loadlib_64_for_dbg(target, dll_name): dll = "KERNEL32.DLL\x00".encode("utf-16-le") api = "LoadLibraryA\x00" dll_to_load = dll_name + "\x00" RemoteManualLoadLibray = x64.MultipleInstr() code = RemoteManualLoadLibray code += x64.Mov("R15", "RCX") code += x64.Mov("RCX", x64.mem("[R15 + 0]")) code += x64.Mov("RDX", x64.mem("[R15 + 8]")) code += x64.Call(":FUNC_GETPROCADDRESS64") code += x64.Mov("RCX", x64.mem("[R15 + 0x10]")) code += x64.Push("RCX") code += x64.Push("RCX") code += x64.Push("RCX") code += x64.Call("RAX") # LoadLibrary code += x64.Pop("RCX") code += x64.Pop("RCX") code += x64.Pop("RCX") code += x64.Ret() RemoteManualLoadLibray += nativeutils.GetProcAddress64 addr = target.virtual_alloc(0x1000) addr2 = addr + len(dll) addr3 = addr2 + len(api) addr4 = addr3 + len(dll_to_load) target.write_memory(addr, dll) target.write_memory(addr2, api) target.write_memory(addr3, dll_to_load) target.write_qword(addr4, addr) target.write_qword(addr4 + 8, addr2) target.write_qword(addr4 + 0x10, addr3) t = target.execute(RemoteManualLoadLibray.get_code(), addr4) return t
def register_alloc_memory(self): ExAllocatePoolWithTag = self.kdbg.get_symbol_offset("nt!ExAllocatePoolWithTag") if ExAllocatePoolWithTag is None: raise ValueError("Could not resolve <ExAllocatePoolWithTag>") INPUT_BUFFER_ALLOC_TYPE = x64.mem('[RCX]') INPUT_BUFFER_ALLOC_SIZE = x64.mem('[RCX + 0x8]') INPUT_BUFFER_ALLOC_TAG = x64.mem('[RCX + 0x10]') Alloc_IOCTL = x64.MultipleInstr() Alloc_IOCTL += x64.Cmp(self.IO_STACK_INPUT_BUFFER_LEN, 0x18) Alloc_IOCTL += x64.Jnz(':FAIL') Alloc_IOCTL += x64.Mov('RCX', self.IO_STACK_INPUT_BUFFER) Alloc_IOCTL += x64.Mov('R8', INPUT_BUFFER_ALLOC_TAG) Alloc_IOCTL += x64.Mov('RDX', INPUT_BUFFER_ALLOC_SIZE) Alloc_IOCTL += x64.Mov('RCX', INPUT_BUFFER_ALLOC_TYPE) Alloc_IOCTL += x64.Mov('RAX', ExAllocatePoolWithTag) Alloc_IOCTL += x64.Call('RAX') Alloc_IOCTL += x64.Mov('RBX', self.IRP_OUTPUT_BUFFER) Alloc_IOCTL += x64.Mov(x64.mem('[RBX]'), 'RAX') Alloc_IOCTL += x64.Xor('RAX', 'RAX') Alloc_IOCTL += x64.Ret() Alloc_IOCTL += x64.Label(":FAIL") Alloc_IOCTL += x64.Mov('RAX', 0x0C000000D) Alloc_IOCTL += x64.Ret() self.upgrade_driver_add_new_ioctl_handler(DU_MEMALLOC_IOCTL, Alloc_IOCTL.get_code())
def generate_64bits_execution_stub_from_syswow(x64shellcode): """shellcode must NOT end by a ret""" current_process = windows.current_process if not current_process.is_wow_64: raise ValueError( "Calling generate_64bits_execution_stub_from_syswow from non-syswow process" ) transition64 = x64.MultipleInstr() transition64 += x64.Call(":TOEXEC") transition64 += x64.Mov("RDX", "RAX") transition64 += x64.Shr("RDX", 32) transition64 += x64.Retf32() # 32 bits return addr transition64 += x64.Label(":TOEXEC") x64shellcodeaddr = windows.current_process.allocator.write_code( transition64.get_code() + x64shellcode) transition = x86.MultipleInstr() transition += x86.Call(CS_64bits, x64shellcodeaddr) transition += x86.Ret() stubaddr = windows.current_process.allocator.write_code( transition.get_code()) exec_stub = ctypes.CFUNCTYPE(ULONG64)(stubaddr) return exec_stub
def generate_64bits_execution_stub_from_syswow(x64shellcode): """shellcode must NOT end by a ret""" current_process = windows.current_process if not current_process.is_wow_64: raise ValueError( "Calling generate_64bits_execution_stub_from_syswow from non-syswow process" ) transition64 = x64.MultipleInstr() transition64 += x64.Call(":TOEXEC") transition64 += x64.Mov("RDX", "RAX") transition64 += x64.Shr("RDX", 32) transition64 += x64.Retf32() # 32 bits return addr transition64 += x64.Label(":TOEXEC") x64shellcodeaddr = thread_state.allocator.write_code( transition64.get_code() + x64shellcode) transition = x86.MultipleInstr() transition += x86.Call(CS_64bits, x64shellcodeaddr) # Reset the SS segment selector. # We need to do that due to a bug in AMD CPUs with RETF & SS # https://github.com/hakril/PythonForWindows/issues/10 # http://blog.rewolf.pl/blog/?p=1484 transition += x86.Mov("ECX", "SS") transition += x86.Mov("SS", "ECX") transition += x86.Ret() stubaddr = thread_state.allocator.write_code(transition.get_code()) exec_stub = ctypes.CFUNCTYPE(ULONG64)(stubaddr) return exec_stub
def register_kernel_call(self): # expect in buffer: the address to call and all dword to push on the stack CCall_IOCTL = x64.MultipleInstr() CCall_IOCTL += x64.Mov('RAX', self.IO_STACK_INPUT_BUFFER_LEN) CCall_IOCTL += x64.Cmp('RAX', 0) CCall_IOCTL += x64.Jz(":FAIL") # Need at least the function to call CCall_IOCTL += x64.Mov('R15', 4 * 8) # Size to pop on the stack at the end (4 * push RDI) CCall_IOCTL += x64.Mov('R10', self.IO_STACK_INPUT_BUFFER) CCall_IOCTL += x64.Label(':PUSH_NEXT_ARG') CCall_IOCTL += x64.Cmp('RAX', (8 * 5)) CCall_IOCTL += x64.Jbe(":SETUP_REG_ARGS") CCall_IOCTL += x64.Sub('RAX', 8) INPUT_BUFFER_NEXT_ARG = x64.create_displacement(base='R10', index='RAX') CCall_IOCTL += x64.Mov('RBX', INPUT_BUFFER_NEXT_ARG) CCall_IOCTL += x64.Push('RBX') CCall_IOCTL += x64.Add('R15', 8) # Add at Size to pop on the stack at the end CCall_IOCTL += x64.Jmp(':PUSH_NEXT_ARG') CCall_IOCTL += x64.Label(":SETUP_REG_ARGS") # Could be done in a loop # But do I really want to generate x86 in a loop.. CCall_IOCTL += x64.Cmp('RAX', (8 * 5)) CCall_IOCTL += x64.Jz(":SETUP_4_ARGS") CCall_IOCTL += x64.Cmp('RAX', (8 * 4)) CCall_IOCTL += x64.Jz(":SETUP_3_ARGS") CCall_IOCTL += x64.Cmp('RAX', (8 * 3)) CCall_IOCTL += x64.Jz(":SETUP_2_ARGS") CCall_IOCTL += x64.Cmp('RAX', (8 * 2)) CCall_IOCTL += x64.Jz(":SETUP_1_ARGS") CCall_IOCTL += x64.Jmp(":SETUP_0_ARGS") CCall_IOCTL += x64.Label(":SETUP_4_ARGS") CCall_IOCTL += x64.Mov('R9', x64.mem('[R10 + 0x20]')) CCall_IOCTL += x64.Label(":SETUP_3_ARGS") CCall_IOCTL += x64.Mov('R8', x64.mem('[R10 + 0x18]')) CCall_IOCTL += x64.Label(":SETUP_2_ARGS") CCall_IOCTL += x64.Mov('RDX', x64.mem('[R10 + 0x10]')) CCall_IOCTL += x64.Label(":SETUP_1_ARGS") CCall_IOCTL += x64.Mov('RCX', x64.mem('[R10 + 8]')) CCall_IOCTL += x64.Label(":SETUP_0_ARGS") CCall_IOCTL += x64.Mov('RAX', x64.mem('[R10]')) # Fix Reserve space (calling convention) CCall_IOCTL += x64.Push('RDI') CCall_IOCTL += x64.Push('RDI') CCall_IOCTL += x64.Push('RDI') CCall_IOCTL += x64.Push('RDI') CCall_IOCTL += x64.Call('RAX') CCall_IOCTL += x64.Mov('RDX', self.IRP_OUTPUT_BUFFER) CCall_IOCTL += x64.Mov(x64.mem('[RDX]'), 'RAX') CCall_IOCTL += x64.Xor('RAX', 'RAX') CCall_IOCTL += x64.Add('RSP', 'R15') CCall_IOCTL += x64.Ret() CCall_IOCTL += x64.Label(":FAIL") CCall_IOCTL += x64.Mov('RAX', 0x0C000000D) CCall_IOCTL += x64.Ret() self.upgrade_driver_add_new_ioctl_handler(DU_KCALL_IOCTL, CCall_IOCTL.get_code())
def NtCreateThreadEx_32_to_64(process, addr, param): NtCreateThreadEx = get_NtCreateThreadEx_syswow_addr() create_thread = x64.MultipleInstr() # Save registers create_thread += x64.Push('RBX') create_thread += x64.Push('RCX') create_thread += x64.Push('RDX') create_thread += x64.Push('RSI') create_thread += x64.Push('RDI') create_thread += x64.Push('R8') create_thread += x64.Push('R9') create_thread += x64.Push('R10') create_thread += x64.Push('R11') create_thread += x64.Push('R12') create_thread += x64.Push('R13') # Setup args create_thread += x64.Push(0) create_thread += x64.Mov('RCX', 'RSP') # Arg1 create_thread += x64.Mov('RDX', 0x1fffff) # Arg2 create_thread += x64.Mov('R8', 0) # Arg3 create_thread += x64.Mov('R9', process.handle) # Arg4 create_thread += x64.Mov('RAX', 0) create_thread += x64.Push('RAX') # Arg11 create_thread += x64.Push('RAX') # Arg10 create_thread += x64.Push('RAX') # Arg9 create_thread += x64.Push('RAX') # Arg8 create_thread += x64.Push('RAX') # Arg7 create_thread += x64.Mov('RAX', param) create_thread += x64.Push('RAX') # Arg6 create_thread += x64.Mov('RAX', addr) create_thread += x64.Push('RAX') # Arg5 # reserve space for register (calling convention) create_thread += x64.Push('R9') create_thread += x64.Push('R8') create_thread += x64.Push('RDX') create_thread += x64.Push('RCX') # Call create_thread += x64.Mov('R13', NtCreateThreadEx) create_thread += x64.Call('R13') # Clean stack create_thread += x64.Add('RSP', 12 * 8) create_thread += x64.Pop('R13') create_thread += x64.Pop('R12') create_thread += x64.Pop('R11') create_thread += x64.Pop('R10') create_thread += x64.Pop('R9') create_thread += x64.Pop('R8') create_thread += x64.Pop('RDI') create_thread += x64.Pop('RSI') create_thread += x64.Pop('RDX') create_thread += x64.Pop('RCX') create_thread += x64.Pop('RBX') return execute_64bits_code_from_syswow(create_thread.get_code())
def sc_64_LoadLibrary(dll_path): dll = "KERNEL32.DLL\x00".encode("utf-16-le") api = "LoadLibraryA\x00" LoadLibrary64_sc = x64.MultipleInstr() map(LoadLibrary64_sc.__iadd__, [ shellcraft.amd64.pushstr(dll), x64.Mov("RCX", "RSP"), shellcraft.amd64.pushstr(api), x64.Mov("RDX", "RSP"), x64.Call(":FUNC_GETPROCADDRESS64"), x64.Mov("R10", "RAX"), shellcraft.amd64.pushstr(dll_path), x64.Mov("RCX", "RSP"), x64.Sub("RSP", 0x30), x64.And("RSP", -32), x64.Call("R10"), x64.Label(":HERE"), x64.Jmp(":HERE"), # Dirty infinite loop # x64.Ret(), windows.native_exec.nativeutils.GetProcAddress64, ]) return LoadLibrary64_sc.get_code()
def _upgrade_driver_inject_base_upgrade(self): kldbgdrv = self.kldbgdrv upgrade = x64.MultipleInstr() # R14 : IOCODE # RSI -> IO_STACK_LOCATION # RDI -> IRP upgrade = x64.MultipleInstr() upgrade += x64.Cmp('R14', self.NORMAL_IO_CODE) upgrade += x64.Jz(self.normal_io_offset - (self.init_driver_offset + len(upgrade.get_code()))) upgrade += x64.Mov('Rax', x64.create_displacement(disp=kldbgdrv + self.HANDLE_ARRAY_ADDR)) upgrade += x64.Label(":LOOP") upgrade += x64.Mov('RCX', x64.create_displacement('RAX')) upgrade += x64.Cmp('R14', 'RCX') upgrade += x64.Jnz(':END') upgrade += x64.Mov('RAX', x64.create_displacement('RAX', disp=8)) upgrade += x64.Call('RAX') upgrade += x64.Mov('RBX', 'RAX') upgrade += x64.JmpAt(kldbgdrv + self.normal_end_offset) upgrade += x64.Label(":END") upgrade += x64.Cmp('RCX', 0) upgrade += x64.Jnz(':NEXT') upgrade += x64.JmpAt(kldbgdrv + self.fail_offset) upgrade += x64.Label(":NEXT") upgrade += x64.Add('RAX', 0x10) upgrade += x64.Jmp(':LOOP') self.kdbg.write_pfv_memory(kldbgdrv + self.init_driver_offset, str(upgrade.get_code())) # Write first array dest self.write_pfv_ptr(kldbgdrv + self.HANDLE_ARRAY_ADDR, kldbgdrv + self.FIRST_ARRAY_ADDR) self.write_pfv_ptr(kldbgdrv + self.FIRST_ARRAY_ADDR, 0) self.write_pfv_ptr(kldbgdrv + self.FIRST_ARRAY_ADDR + 8, 0) # Jump hijack jump_init_function = x64.Jmp(self.init_driver_offset - (self.hijack_offset)) self.kdbg.write_pfv_memory(kldbgdrv + self.hijack_offset, str(jump_init_function.get_code())) self.ioctl_array = kldbgdrv + self.FIRST_ARRAY_ADDR self.ioctl_array_ptr = kldbgdrv + self.HANDLE_ARRAY_ADDR self.next_code_addr = kldbgdrv + self.init_driver_offset + len(upgrade.get_code()) self.is_upgraded = True
import windows.native_exec.simple_x64 as x64 import windows.native_exec.nativeutils from windows.generated_def.winstructs import * GetProcAddress64 = windows.native_exec.nativeutils.GetProcAddress64 dll = "KERNEL32.DLL\x00".encode("utf-16-le") api = "LoadLibraryA\x00" dll_to_load = "SUCE" RemoteManualLoadLibray = x64.MultipleInstr() c = RemoteManualLoadLibray c += x64.Mov("R15", "RCX") c += x64.Mov("RCX", x64.mem("[R15 + 0]")) c += x64.Mov("RDX", x64.mem("[R15 + 8]")) c += x64.Call(":FUNC_GETPROCADDRESS64") c += x64.Mov("RCX", x64.mem("[R15 + 0x10]")) c += x64.Push("RCX") c += x64.Push("RCX") c += x64.Push("RCX") c += x64.Call("RAX") c += x64.Pop("RCX") c += x64.Pop("RCX") c += x64.Pop("RCX") c += x64.Ret() RemoteManualLoadLibray += GetProcAddress64 calc = windows.test.pop_calc_64(dwCreationFlags=CREATE_SUSPENDED) addr = calc.virtual_alloc(0x1000)
def generate_syswow64_call(target, errcheck=None): nb_args = len(target.prototype._argtypes_) target_addr = get_syswow_ntdll_exports()[target.__name__] argument_buffer_len = (nb_args * 8) argument_buffer = windows.current_process.allocator.reserve_size(argument_buffer_len) alignement_information = windows.current_process.allocator.reserve_size(8) nb_args_on_stack = max(nb_args - 4, 0) code_64b = x64.MultipleInstr() # Save registers code_64b += x64.Push('RBX') code_64b += x64.Push('RCX') code_64b += x64.Push('RDX') code_64b += x64.Push('RSI') code_64b += x64.Push('RDI') code_64b += x64.Push('R8') code_64b += x64.Push('R9') code_64b += x64.Push('R10') code_64b += x64.Push('R11') code_64b += x64.Push('R12') code_64b += x64.Push('R13') # Alignment stuff :) code_64b += x64.Mov('RCX', 'RSP') code_64b += x64.And('RCX', 0x0f) code_64b += x64.Mov(x64.deref(alignement_information), 'RCX') code_64b += x64.Sub('RSP', 'RCX') # retrieve argument from the argument buffer if nb_args >= 1: code_64b += x64.Mov('RCX', x64.create_displacement(disp=argument_buffer)) if nb_args >= 2: code_64b += x64.Mov('RDX', x64.create_displacement(disp=argument_buffer + (8 * 1))) if nb_args >= 3: code_64b += x64.Mov('R8', x64.create_displacement(disp=argument_buffer + (8 * 2))) if nb_args >= 4: code_64b += x64.Mov('R9', x64.create_displacement(disp=argument_buffer + (8 * 3))) for i in range(nb_args_on_stack): code_64b += x64.Mov('RAX', x64.create_displacement(disp=argument_buffer + 8 * (nb_args - 1 - i))) code_64b += x64.Push('RAX') # reserve space for register (calling convention) code_64b += x64.Push('R9') code_64b += x64.Push('R8') code_64b += x64.Push('RDX') code_64b += x64.Push('RCX') # Call code_64b += x64.Mov('R13', target_addr) code_64b += x64.Call('R13') # Realign stack :) code_64b += x64.Add('RSP', x64.deref(alignement_information)) # Clean stack code_64b += x64.Add('RSP', (4 + nb_args_on_stack) * 8) code_64b += x64.Pop('R13') code_64b += x64.Pop('R12') code_64b += x64.Pop('R11') code_64b += x64.Pop('R10') code_64b += x64.Pop('R9') code_64b += x64.Pop('R8') code_64b += x64.Pop('RDI') code_64b += x64.Pop('RSI') code_64b += x64.Pop('RDX') code_64b += x64.Pop('RCX') code_64b += x64.Pop('RBX') code_64b += x64.Ret() return try_generate_stub_target(code_64b.get_code(), argument_buffer, target, errcheck=errcheck)
"RAX", x64.mem("[RAX + 24] ")) # ; RAX = ldr (+ 6 for 64 cause of 2 ptr) GetProcAddress64 += x64.Mov( "RAX", x64.mem("[RAX + 32]")) # ; RAX on the first elt of the list (first module) GetProcAddress64 += x64.Mov("RDX", "RAX") GetProcAddress64 += x64.Label(":a_dest") GetProcAddress64 += x64.Mov("RAX", "RDX") GetProcAddress64 += x64.Mov( "RBX", x64.mem("[RAX + 32]")) # RBX : first base ! (base of current module) #GetProcAddress64 += x64.Mov("RBX ", x64.mem("[RAX + 32]")) # RBX : first base ! (base of current module) GetProcAddress64 += x64.Cmp("RBX", 0) GetProcAddress64 += x64.Jz(":DLL_NOT_FOUND") GetProcAddress64 += x64.Mov( "RCX", x64.mem("[RAX + 80]")) # RCX = NAME (UNICODE_STRING.Buffer) GetProcAddress64 += x64.Call(":FUNC_STRLENW64") GetProcAddress64 += x64.Mov("RDI", "RCX") GetProcAddress64 += x64.Mov("RCX", "RAX") GetProcAddress64 += x64.Mov("RSI", "R11") GetProcAddress64 += x64.Rep + x64.CmpsW( ) #;cmp with current dll name (unicode) GetProcAddress64 += x64.Test("RCX", "RCX") GetProcAddress64 += x64.Jz(":DLL_FOUND") GetProcAddress64 += x64.Mov("RDX", x64.mem("[RDX]")) GetProcAddress64 += x64.Jmp(":a_dest") GetProcAddress64 += x64.Label(":DLL_FOUND") # here rbx = base GetProcAddress64 += x64.Mov("EAX", x64.mem("[RBX + 60]")) # rax = PEBASE RVA GetProcAddress64 += x64.Add("RAX", "RBX") # RAX = PEBASE GetProcAddress64 += x64.Add("RAX", 24) # ;OPTIONAL HEADER GetProcAddress64 += x64.Mov("ECX", x64.mem("[rax + 112]")) # ;rcx = RVA export dir