def _generate_native_clone_child_callback(): """Generate Linux x86_64 machine code that does the same as _python_clone_child_callback""" # Inspired by https://csl.name/post/python-jit/ # Allocate one page of memory where we put the code page_size = libc.sysconf(libc.SC_PAGESIZE) mem = libc.mmap_anonymous(page_size, libc.PROT_READ | libc.PROT_WRITE) # Get address of PyOS_AfterFork_Child that we want to call # On Python 3 we could use to_bytes() instead of struct.pack afterfork_address = struct.pack( "Q", ctypes.cast(ctypes.pythonapi.PyOS_AfterFork_Child, ctypes.c_void_p).value ) # Generate machine code that does the same as _python_clone_child_callback # We use this C code as template (with dummy address for PyOS_AfterFork_Child): """ int clone_child_callback(int (*func_p)()) { void (*PyOS_AfterFork_Child)() = (void*)0xffeeddccbbaa9988; PyOS_AfterFork_Child(); return func_p(); } """ # We compile this code and disassemble it with """ gcc -Os -fPIC -shared -fomit-frame-pointer -march=native clone_child_callback.c \ -o clone_child_callback.o objdump -d --disassembler-options=suffix clone_child_callback.o """ # This gives the following code (machine code left, assembler right): # # <clone_child_callback>: # Store address in rdx: # 48 ba 88 99 aa bb cc movabsq $0xffeeddccbbaa9988,%rdx # dd ee ff # Allocate space on stack: # 48 83 ec 18 subq $0x18,%rsp # Clear eax: # 31 c0 xorl %eax,%eax # Copy rdi (value of parameter func_p) to stack: # 48 89 7c 24 08 movq %rdi,0x8(%rsp) # Call rdx (where address is stored): # ff d2 callq *%rdx # Copy stack value func_p back to rdi: # 48 8b 7c 24 08 movq 0x8(%rsp),%rdi # Clear eax: # 31 c0 xorl %eax,%eax # Deallocate space on stack: # 48 83 c4 18 addq $0x18,%rsp # Call function pointer in rdi (func_p) as tail call: # ff e7 jmpq *%rdi # # The following creates exactly the same machine code, just with the real address: movabsq_address_rdx = b"\x48\xba" + afterfork_address subq_0x18_rsp = b"\x48\x83\xec\x18" xorl_eax_eax = b"\x32\xc0" movq_rdi_stack = b"\x48\x89\x7c\x24\x08" callq_rdx = b"\xff\xd2" movq_stack_rdi = b"\x48\x8b\x7c\x24\x08" addq_0x18_rsp = b"\x48\x83\xc4\x18" jmpq_rdi = b"\xff\xe7" code = ( movabsq_address_rdx + subq_0x18_rsp + xorl_eax_eax + movq_rdi_stack + callq_rdx + movq_stack_rdi + xorl_eax_eax + addq_0x18_rsp + jmpq_rdi ) ctypes.memmove(mem, code, len(code)) # Make code executable libc.mprotect(mem, page_size, libc.PROT_READ | libc.PROT_EXEC) return libc.CLONE_CALLBACK(mem)
"mount_proc", "make_bind_mount", "get_my_pid_from_procfs", "drop_capabilities", "forward_all_signals_async", "wait_for_child_and_forward_signals", "setup_container_system_config", "CONTAINER_UID", "CONTAINER_GID", "CONTAINER_HOME", "CONTAINER_HOSTNAME", ] DEFAULT_STACK_SIZE = 1024 * 1024 GUARD_PAGE_SIZE = libc.sysconf(libc.SC_PAGESIZE) # size of guard page at end of stack CONTAINER_UID = 1000 CONTAINER_GID = 1000 CONTAINER_HOME = "/home/benchexec" CONTAINER_HOSTNAME = "benchexec" CONTAINER_ETC_NSSWITCH_CONF = """ passwd: files group: files shadow: files hosts: files networks: files protocols: db files services: db files