def allocate_stack(size=DEFAULT_STACK_SIZE): """Allocate some memory that can be used as a stack. @return: a ctypes void pointer to the *top* of the stack. """ # Allocate memory with appropriate flags for a stack as in https://blog.fefe.de/?ts=a85c8ba7 base = libc.mmap( None, size + GUARD_PAGE_SIZE, libc.PROT_READ | libc.PROT_WRITE, libc.MAP_PRIVATE | libc.MAP_ANONYMOUS | libc.MAP_GROWSDOWN | libc.MAP_STACK, -1, 0) try: # create a guard page that crashes the application when it is written to (on stack overflow) libc.mprotect(base, GUARD_PAGE_SIZE, libc.PROT_NONE) yield ctypes.c_void_p(base + size + GUARD_PAGE_SIZE) finally: libc.munmap(base, size + GUARD_PAGE_SIZE)
def allocate_stack(size=DEFAULT_STACK_SIZE): """Allocate some memory that can be used as a stack. @return: a ctypes void pointer to the *top* of the stack. """ # Allocate memory with appropriate flags for a stack as in # https://blog.fefe.de/?ts=a85c8ba7 base = libc.mmap_anonymous( size + PAGE_SIZE, # allocate one page more for a guard page libc.PROT_READ | libc.PROT_WRITE, libc.MAP_GROWSDOWN | libc.MAP_STACK, ) try: # configure guard page that crashes the application when it is written to # (on stack overflow) libc.mprotect(base, PAGE_SIZE, libc.PROT_NONE) yield ctypes.c_void_p(base + size + PAGE_SIZE) finally: libc.munmap(base, size + PAGE_SIZE)
def _generate_native_clone_child_callback(): """Generate Linux x86_64 machine code that does the same as _python_clone_child_callback""" # Inspired by https://csl.name/post/python-jit/ # Allocate one page of memory where we put the code page_size = libc.sysconf(libc.SC_PAGESIZE) mem = libc.mmap_anonymous(page_size, libc.PROT_READ | libc.PROT_WRITE) # Get address of PyOS_AfterFork_Child that we want to call # On Python 3 we could use to_bytes() instead of struct.pack afterfork_address = struct.pack( "Q", ctypes.cast(ctypes.pythonapi.PyOS_AfterFork_Child, ctypes.c_void_p).value ) # Generate machine code that does the same as _python_clone_child_callback # We use this C code as template (with dummy address for PyOS_AfterFork_Child): """ int clone_child_callback(int (*func_p)()) { void (*PyOS_AfterFork_Child)() = (void*)0xffeeddccbbaa9988; PyOS_AfterFork_Child(); return func_p(); } """ # We compile this code and disassemble it with """ gcc -Os -fPIC -shared -fomit-frame-pointer -march=native clone_child_callback.c \ -o clone_child_callback.o objdump -d --disassembler-options=suffix clone_child_callback.o """ # This gives the following code (machine code left, assembler right): # # <clone_child_callback>: # Store address in rdx: # 48 ba 88 99 aa bb cc movabsq $0xffeeddccbbaa9988,%rdx # dd ee ff # Allocate space on stack: # 48 83 ec 18 subq $0x18,%rsp # Clear eax: # 31 c0 xorl %eax,%eax # Copy rdi (value of parameter func_p) to stack: # 48 89 7c 24 08 movq %rdi,0x8(%rsp) # Call rdx (where address is stored): # ff d2 callq *%rdx # Copy stack value func_p back to rdi: # 48 8b 7c 24 08 movq 0x8(%rsp),%rdi # Clear eax: # 31 c0 xorl %eax,%eax # Deallocate space on stack: # 48 83 c4 18 addq $0x18,%rsp # Call function pointer in rdi (func_p) as tail call: # ff e7 jmpq *%rdi # # The following creates exactly the same machine code, just with the real address: movabsq_address_rdx = b"\x48\xba" + afterfork_address subq_0x18_rsp = b"\x48\x83\xec\x18" xorl_eax_eax = b"\x32\xc0" movq_rdi_stack = b"\x48\x89\x7c\x24\x08" callq_rdx = b"\xff\xd2" movq_stack_rdi = b"\x48\x8b\x7c\x24\x08" addq_0x18_rsp = b"\x48\x83\xc4\x18" jmpq_rdi = b"\xff\xe7" code = ( movabsq_address_rdx + subq_0x18_rsp + xorl_eax_eax + movq_rdi_stack + callq_rdx + movq_stack_rdi + xorl_eax_eax + addq_0x18_rsp + jmpq_rdi ) ctypes.memmove(mem, code, len(code)) # Make code executable libc.mprotect(mem, page_size, libc.PROT_READ | libc.PROT_EXEC) return libc.CLONE_CALLBACK(mem)