def execute_in_namespace(func, use_network_ns=True): """Execute a function in a child process in separate namespaces. @param func: a parameter-less function returning an int (which will be the process' exit value) @return: the PID of the created child process """ flags = (signal.SIGCHLD | libc.CLONE_NEWNS | libc.CLONE_NEWUTS | libc.CLONE_NEWIPC | libc.CLONE_NEWUSER | libc.CLONE_NEWPID) if use_network_ns: flags |= libc.CLONE_NEWNET # We use the syscall clone() here, which is similar to fork(). # Calling it without letting Python know about it is dangerous (especially because # we want to execute Python code in the child, too), but so far it seems to work. # Basically we attempt to do (almost) the same that os.fork() does (cf. function os_fork_impl # in https://github.com/python/cpython/blob/master/Modules/posixmodule.c). # We currently do not take the import lock os.lock() does because it is only available # via an internal API, and because the child should never import anything anyway # (inside the container, modules might not be visible). # It is very important, however, that we have the GIL during clone(), # otherwise the child will often deadlock when trying to execute Python code. # Luckily, the ctypes module allows us to hold the GIL while executing the # function by using ctypes.PyDLL as library access instead of ctypes.CLL. def child_func(): # This is necessary for correcting the Python interpreter state after a # fork-like operation. For example, it resets the GIL and fixes state of # several modules like threading and signal. ctypes.pythonapi.PyOS_AfterFork() return func() with allocate_stack() as stack: pid = libc.clone(ctypes.CFUNCTYPE(ctypes.c_int)(child_func), stack, flags, None) return pid
def execute_in_namespace(func, use_network_ns=True): """Execute a function in a child process in separate namespaces. @param func: a parameter-less function returning an int (which will be the process' exit value) @return: the PID of the created child process """ flags = ( signal.SIGCHLD | libc.CLONE_NEWNS | libc.CLONE_NEWUTS | libc.CLONE_NEWIPC | libc.CLONE_NEWUSER | libc.CLONE_NEWPID ) if use_network_ns: flags |= libc.CLONE_NEWNET # We need to use the syscall clone(), which is similar to fork(), but not available # in the Python API. We can call it directly using ctypes, but then the state of the # Python interpreter is inconsistent, so we need to fix that. Python >= 3.7 has # three C functions that should be called before and after fork/clone: # https://docs.python.org/3/c-api/sys.html#c.PyOS_BeforeFork # This is the same that os.fork() does (cf. os_fork_impl # in https://github.com/python/cpython/blob/master/Modules/posixmodule.c). # Furthermore, it is very important that we have the GIL during clone(), # otherwise the child will often deadlock when trying to execute Python code. # Luckily, the ctypes module allows us to hold the GIL while executing the # function by using ctypes.PyDLL as library access instead of ctypes.CLL. # Two difficulties remain: # 1. On Python < 3.7, only PyOS_AfterFork() (to be called in the child) exists. # Other cleanup done by os_fork_impl is not accessible to us, so we ignore it. # For example, we do not take the import lock because it is only # available via an internal API, and because the child should never import anything # anyway (inside the container, modules might not be visible). # 2. On all Python versions, the interpreter state in the child is inconsistent # until PyOS_AfterFork_Child() is called. However, if we pass the Python function # _python_clone_child_callback() as callback to clone and do the cleanup in # its first line, it is too late because the Python interpreter is already used. # This actually causes problems if benchexec is executed with a high number of # parallel runs because of thread contention, the gil_drop_request and a deadlock # in drop_gil (cf. https://github.com/sosy-lab/benchexec/issues/435). # So we should avoid executing Python code at all before PyOS_AfterFork_Child(). # We do not want to take the hassle of shipping C code with BenchExec, so we use # _generate_native_clone_child_callback() to generate machine code on the fly # as replacement for _python_clone_child_callback(). This works for x86_64 Linux # and we expect practically all BenchExec users to fall in this category. For others # there is still the pure Python callback, which in practice works totally fine as # long as there does not exist a huge number of threads. # There is a workaround using sys.setswitchinterval(), however, it is too late to # apply it here in this function, because gil_drop_request could already be set. # Summary: # - For Linux x86_64 we use native code from _generate_native_clone_child_callback() # - Otherwise, we use sys.setswitchinterval() as workaround in localexecution.py. # - Direct users of ContainerExecutor are fine in practice if they use few threads. func_p = _CLONE_NESTED_CALLBACK(func) # store in variable to avoid GC with allocate_stack() as stack: try: ctypes.pythonapi.PyOS_BeforeFork() pid = libc.clone(_clone_child_callback, stack, flags, func_p) finally: ctypes.pythonapi.PyOS_AfterFork_Parent() return pid