async def clone(self, flags: CLONE = CLONE.NONE) -> ChildThread: """Create a new child thread manpage: clone(2) """ child_process, task = await clone_child_task( self.task, self.ram, self.connection, self.loader, self.monitor, flags, lambda sock: Trampoline(self.loader.server_func, [sock, sock])) ram = RAM( task, # We don't inherit the transport because it leads to a deadlock: # If when a child task calls transport.read, it performs a syscall in the child task, # then the parent task will need to call waitid to monitor the child task during the syscall, # which will in turn need to also call transport.read. # But the child is already using the transport and holding the lock, # so the parent will block forever on taking the lock, # and child's read syscall will never complete. self.ram.transport, self.ram.allocator.inherit(task), ) if flags & CLONE.NEWPID: # if the new process is pid 1, then CLONE_PARENT isn't allowed so we can't use inherit_to_child. # if we are a reaper, than we don't want our child CLONE_PARENTing to us, so we can't use inherit_to_child. # in both cases we just fall back to making a new ChildProcessMonitor for the child. epoller = await Epoller.make_root(ram, task) # this signal is already blocked, we inherited the block, um... I guess... # TODO handle this more formally signal_block = SignalBlock(task, await ram.ptr(Sigset({SIG.CHLD}))) monitor = await ChildProcessMonitor.make(ram, task, epoller, signal_block=signal_block) else: epoller = self.epoller.inherit(ram) monitor = self.monitor.inherit_to_child(task) thread = ChildThread( Thread( task, ram, self.connection.inherit(task, ram), self.loader, epoller, monitor, self.environ.inherit(task, ram), stdin=self.stdin.inherit(task), stdout=self.stdout.inherit(task), stderr=self.stderr.inherit(task), ), child_process) if flags & CLONE.NEWUSER: # hack, we should really track the [ug]id ahead of this so we don't have to get it # we have to get the [ug]id from the parent because it will fail in the child uid = await self.task.getuid() gid = await self.task.getgid() await write_user_mappings(thread, uid, gid) return thread
async def clone_persistent( parent: Thread, path: t.Union[str, os.PathLike], ) -> PersistentThread: """Create a new not-yet-persistent thread and return the thread and its tracking object To make the thread actually persistent, you must call PersistentServer.make_persistent(). The point of this hoop-jumping is just to prevent unnecessary resource leakage, so you can set up things in a persistent thread and only make it persistent when you're actually ready. A persistent thread is essentially the same as a normal thread, just running a different function. As such, it starts off sharing its file descriptor table and everything else with its parent thread. It's only when we disconnect and reconnect that it changes behavior. """ listening_sock = await parent.task.socket(AF.UNIX, SOCK.STREAM) await listening_sock.bind(await parent.ram.ptr(await SockaddrUn.from_path( parent, path))) await listening_sock.listen(1) child_process, task = await clone_child_task( parent.task, parent.ram, parent.connection, parent.loader, parent.monitor, CLONE.FILES | CLONE.FS | CLONE.SIGHAND, lambda sock: Trampoline(parent.loader.persistent_server_func, [sock, sock, listening_sock])) listening_sock_handle = listening_sock.move(task) ram = RAM(task, parent.ram.transport, parent.ram.allocator.inherit(task)) ## create the new persistent task epoller = await Epoller.make_root(ram, task) signal_block = SignalBlock(task, await ram.ptr(Sigset({SIG.CHLD}))) # TODO use an inherited signalfd instead child_monitor = await ChildProcessMonitor.make(ram, task, epoller, signal_block=signal_block) return PersistentThread(Thread( task, ram, parent.connection.inherit(task, ram), parent.loader, epoller, child_monitor, parent.environ.inherit(task, ram), stdin=parent.stdin.for_task(task), stdout=parent.stdout.for_task(task), stderr=parent.stderr.for_task(task), ), persistent_path=path, persistent_sock=listening_sock_handle)
def __init__( self, local: AsyncFileDescriptor, remote: FileDescriptor, remote_allocator: AllocatorInterface, ) -> None: self.local = local self.remote = remote self.remote_allocator = remote_allocator self.primitive = PrimitiveSocketMemoryTransport(local, remote) self.primitive_remote_ram = RAM(self.remote.task, self.primitive, self.remote_allocator) self.pending_writes: t.List[WriteOp] = [] self.running_write = OneAtATime() self.pending_reads: t.List[ReadOp] = [] self.running_read = OneAtATime()
async def fork(self, flags: CLONE = CLONE.SIGHAND) -> ChildUnixThread: "Create a new child thread" child_process, task = await self._fork_task(flags) ram = RAM( task, # We don't inherit the transport because it leads to a deadlock: # If when a child task calls transport.read, it performs a syscall in the child task, # then the parent task will need to call waitid to monitor the child task during the syscall, # which will in turn need to also call transport.read. # But the child is already using the transport and holding the lock, # so the parent will block forever on taking the lock, # and child's read syscall will never complete. self.ram.transport, self.ram.allocator.inherit(task), ) if flags & CLONE.NEWPID: # if the new process is pid 1, then CLONE_PARENT isn't allowed so we can't use inherit_to_child. # if we are a reaper, than we don't want our child CLONE_PARENTing to us, so we can't use inherit_to_child. # in both cases we just fall back to making a new ChildProcessMonitor for the child. epoller = await Epoller.make_root(ram, task) # this signal is already blocked, we inherited the block, um... I guess... # TODO handle this more formally signal_block = SignalBlock(task, await ram.ptr(Sigset({SIG.CHLD}))) monitor = await ChildProcessMonitor.make(ram, task, epoller, signal_block=signal_block) else: epoller = self.epoller.inherit(ram) monitor = self.monitor.inherit_to_child(ram, task) return ChildUnixThread(UnixThread( task, ram, self.connection.for_task(task, ram), self.loader, epoller, monitor, self.environ.inherit(task, ram), stdin=self.stdin.for_task(task), stdout=self.stdout.for_task(task), stderr=self.stderr.for_task(task), ), process=child_process)
async def _make_local_thread() -> Thread: """Create the local thread, allocating various resources locally. For the most part, the local thread is like any other thread; it just bootstraps differently, and uses syscall and memory interfaces which are specialized to the local thread. """ process = near.Process(os.getpid()) task = Task( LocalSyscall(), process, far.FDTable(process.id), far.AddressSpace(process.id), far.PidNamespace(process.id), ) ram = RAM(task, LocalMemoryTransport(task), memory.AllocatorClient.make_allocator(task)) epfd = await task.epoll_create() async def wait_readable(): logger.debug("wait_readable(%s)", epfd.near.number) await trio.hazmat.wait_readable(epfd.near.number) epoller = Epoller.make_subsidiary(ram, epfd, wait_readable) thread = Thread( task, ram, await FDPassConnection.make(task, ram, epoller), NativeLoader.make_from_symbols(task, lib), epoller, await ChildProcessMonitor.make(ram, task, epoller), Environment(task, ram, { key.encode(): value.encode() for key, value in os.environ.items() }), stdin=task.make_fd_handle(near.FileDescriptor(0)), stdout=task.make_fd_handle(near.FileDescriptor(1)), stderr=task.make_fd_handle(near.FileDescriptor(2)), ) return thread
async def stdin_bootstrap( parent: Thread, bootstrap_command: Command, ) -> t.Tuple[AsyncChildProcess, Thread]: """Create a thread from running an arbitrary command which must run rsyscall-stdin-bootstrap bootstrap_command can be any arbitrary command, but it must eventually exec rsyscall-stdin-bootstrap, and pass down stdin when it does. We'll fork and exec bootstrap_command, passing down a socketpair for stdin, and try to bootstrap over the other end of the socketpair. Once rsyscall-stdin-bootstrap starts, it will respond to our bootstrap and we'll create a new thread. """ #### fork and exec into the bootstrap command child = await parent.fork() # create the socketpair that will be used as stdin stdin_pair = await (await parent.task.socketpair( AF.UNIX, SOCK.STREAM, 0, await parent.ram.malloc(Socketpair))).read() parent_sock = stdin_pair.first child_sock = stdin_pair.second.move(child.task) # set up stdin with socketpair await child.unshare_files(going_to_exec=True) await child.stdin.replace_with(child_sock) # exec child_process = await child.exec(bootstrap_command) #### set up all the fds we'll want to pass over # the basic connections [(access_syscall_sock, passed_syscall_sock), (access_data_sock, passed_data_sock)] = await parent.open_async_channels(2) # memfd for setting up the futex futex_memfd = await parent.task.memfd_create( await parent.ram.ptr(Path("child_robust_futex_list"))) # send the fds to the new process connection_fd, make_connection = await parent.connection.prep_fd_transfer() async def sendmsg_op(sem: RAM) -> WrittenPointer[SendMsghdr]: iovec = await sem.ptr(IovecList([await sem.malloc(bytes, 1)])) cmsgs = await sem.ptr(CmsgList([CmsgSCMRights([ passed_syscall_sock, passed_data_sock, futex_memfd, connection_fd])])) return await sem.ptr(SendMsghdr(None, iovec, cmsgs)) _, [] = await parent_sock.sendmsg(await parent.ram.perform_batch(sendmsg_op), SendmsgFlags.NONE) # close our reference to fds that only the new process needs await passed_syscall_sock.close() await passed_data_sock.close() # close the socketpair await parent_sock.close() #### read describe to get all the information we need from the new process describe_buf = AsyncReadBuffer(access_data_sock) describe_struct = await describe_buf.read_cffi('struct rsyscall_stdin_bootstrap') environ = await describe_buf.read_envp(describe_struct.envp_count) #### build the new task pid = describe_struct.pid fd_table = far.FDTable(pid) address_space = far.AddressSpace(pid) # we assume pid namespace is shared # TODO include namespace inode numbers numbers in describe # note: if we start dealing with namespace numbers then we need to # have a Kernel namespace which tells us which kernel we get those # numbers from. # oh hey we can conveniently dump the inode numbers with getdents! pidns = parent.task.pidns process = near.Process(pid) remote_syscall_fd = near.FileDescriptor(describe_struct.syscall_fd) syscall = NonChildSyscallInterface(SyscallConnection(access_syscall_sock, access_syscall_sock), process) base_task = Task(syscall, process, fd_table, address_space, pidns) handle_remote_syscall_fd = base_task.make_fd_handle(remote_syscall_fd) syscall.store_remote_side_handles(handle_remote_syscall_fd, handle_remote_syscall_fd) allocator = memory.AllocatorClient.make_allocator(base_task) # we assume our SignalMask is zero'd before being started, so we don't inherit it ram = RAM(base_task, SocketMemoryTransport(access_data_sock, base_task.make_fd_handle(near.FileDescriptor(describe_struct.data_fd)), allocator), allocator) # TODO I think I can maybe elide creating this epollcenter and instead inherit it or share it, maybe? epoller = await Epoller.make_root(ram, base_task) child_monitor = await ChildProcessMonitor.make(ram, base_task, epoller) connection = make_connection(base_task, ram, base_task.make_fd_handle(near.FileDescriptor(describe_struct.connecting_fd))) new_parent = Thread( task=base_task, ram=ram, connection=connection, loader=NativeLoader.make_from_symbols(base_task, describe_struct.symbols), epoller=epoller, child_monitor=child_monitor, environ=Environment(base_task, ram, environ), stdin=base_task.make_fd_handle(near.FileDescriptor(0)), stdout=base_task.make_fd_handle(near.FileDescriptor(1)), stderr=base_task.make_fd_handle(near.FileDescriptor(2)), ) #### TODO set up futex I guess remote_futex_memfd = near.FileDescriptor(describe_struct.futex_memfd) return child_process, new_parent
async def ssh_bootstrap( parent: Process, # the actual ssh command to run ssh_command: SSHCommand, # the local path we'll use for the socket local_socket_path: Path, # the directory we're bootstrapping out of tmp_path_bytes: bytes, ) -> t.Tuple[AsyncChildPid, Process]: "Over ssh, run the bootstrap executable, " # identify local path local_data_addr = await parent.ram.ptr( await SockaddrUn.from_path(parent, local_socket_path)) # start port forwarding; we'll just leak this process, no big deal # TODO we shouldn't leak processes; we should be GCing processes at some point forward_child_pid = await ssh_forward( parent, ssh_command, local_socket_path, (tmp_path_bytes + b"/data").decode()) # start bootstrap bootstrap_process = await parent.fork() bootstrap_child_pid = await bootstrap_process.exec(ssh_command.args( "-n", f"cd {tmp_path_bytes.decode()}; exec ./bootstrap rsyscall" )) # TODO should unlink the bootstrap after I'm done execing. # it would be better if sh supported fexecve, then I could unlink it before I exec... # Connect to local socket 4 times async def make_async_connection() -> AsyncFileDescriptor: sock = await parent.make_afd(await parent.socket(AF.UNIX, SOCK.STREAM|SOCK.NONBLOCK)) await sock.connect(local_data_addr) return sock async_local_syscall_sock = await make_async_connection() async_local_data_sock = await make_async_connection() # Read description off of the data sock describe_buf = AsyncReadBuffer(async_local_data_sock) describe_struct = await describe_buf.read_cffi('struct rsyscall_bootstrap') new_pid = describe_struct.pid environ = await describe_buf.read_envp(describe_struct.envp_count) # Build the new task! new_address_space = far.AddressSpace(new_pid) # TODO the pid namespace will probably be common for all connections... # TODO we should get this from the SSHHost, this is usually going # to be common for all connections and we should express that new_pid_namespace = far.PidNamespace(new_pid) new_pid = near.Pid(new_pid) new_base_task = Task( new_pid, handle.FDTable(new_pid), new_address_space, new_pid_namespace, ) handle_remote_syscall_fd = new_base_task.make_fd_handle(near.FileDescriptor(describe_struct.syscall_sock)) new_base_task.sysif = SyscallConnection( logger.getChild(str(new_pid)), async_local_syscall_sock, async_local_syscall_sock, handle_remote_syscall_fd, handle_remote_syscall_fd, ) handle_remote_data_fd = new_base_task.make_fd_handle(near.FileDescriptor(describe_struct.data_sock)) handle_listening_fd = new_base_task.make_fd_handle(near.FileDescriptor(describe_struct.listening_sock)) new_allocator = memory.AllocatorClient.make_allocator(new_base_task) new_transport = SocketMemoryTransport(async_local_data_sock, handle_remote_data_fd) # we don't inherit SignalMask; we assume ssh zeroes the sigmask before starting us new_ram = RAM(new_base_task, new_transport, new_allocator) epoller = await Epoller.make_root(new_ram, new_base_task) child_monitor = await ChildPidMonitor.make(new_ram, new_base_task, epoller) await handle_listening_fd.fcntl(F.SETFL, O.NONBLOCK) connection = ListeningConnection( parent.task, parent.ram, parent.epoller, local_data_addr, new_base_task, new_ram, await AsyncFileDescriptor.make(epoller, new_ram, handle_listening_fd), ) new_process = Process( task=new_base_task, ram=new_ram, connection=connection, loader=NativeLoader.make_from_symbols(new_base_task, describe_struct.symbols), epoller=epoller, child_monitor=child_monitor, environ=Environment.make_from_environ(new_base_task, new_ram, environ), stdin=new_base_task.make_fd_handle(near.FileDescriptor(0)), stdout=new_base_task.make_fd_handle(near.FileDescriptor(1)), stderr=new_base_task.make_fd_handle(near.FileDescriptor(2)), ) return bootstrap_child_pid, new_process
async def _setup_stub( thread: Thread, bootstrap_sock: FileDescriptor, ) -> t.Tuple[t.List[str], Thread]: "Setup a stub thread" [(access_syscall_sock, passed_syscall_sock), (access_data_sock, passed_data_sock) ] = await thread.open_async_channels(2) # memfd for setting up the futex futex_memfd = await thread.task.memfd_create(await thread.ram.ptr( Path("child_robust_futex_list"))) # send the fds to the new process connection_fd, make_connection = await thread.connection.prep_fd_transfer() async def sendmsg_op(sem: RAM) -> WrittenPointer[SendMsghdr]: iovec = await sem.ptr(IovecList([await sem.malloc(bytes, 1)])) cmsgs = await sem.ptr( CmsgList([ CmsgSCMRights([ passed_syscall_sock, passed_data_sock, futex_memfd, connection_fd ]) ])) return await sem.ptr(SendMsghdr(None, iovec, cmsgs)) _, [] = await bootstrap_sock.sendmsg( await thread.ram.perform_batch(sendmsg_op), SendmsgFlags.NONE) # close our reference to fds that only the new process needs await passed_syscall_sock.invalidate() await passed_data_sock.invalidate() # close the socketpair await bootstrap_sock.invalidate() #### read describe to get all the information we need from the new process describe_buf = AsyncReadBuffer(access_data_sock) describe_struct = await describe_buf.read_cffi('struct rsyscall_unix_stub') argv_raw = await describe_buf.read_length_prefixed_array( describe_struct.argc) argv = [os.fsdecode(arg) for arg in argv_raw] environ = await describe_buf.read_envp(describe_struct.envp_count) #### build the new task pid = describe_struct.pid fd_table = handle.FDTable(pid) address_space = far.AddressSpace(pid) # we assume pid namespace is shared pidns = thread.task.pidns process = near.Process(pid) # we assume net namespace is shared - that's dubious... # we should make it possible to control the namespace sharing more, hmm. # TODO maybe the describe should contain the net namespace number? and we can store our own as well? # then we can automatically do it right base_task = Task(process, fd_table, address_space, pidns) remote_syscall_fd = base_task.make_fd_handle( near.FileDescriptor(describe_struct.syscall_fd)) base_task.sysif = SyscallConnection( logger.getChild(str(process)), access_syscall_sock, access_syscall_sock, remote_syscall_fd, remote_syscall_fd, ) allocator = memory.AllocatorClient.make_allocator(base_task) base_task.sigmask = Sigset( {SIG(bit) for bit in rsyscall.struct.bits(describe_struct.sigmask)}) ram = RAM( base_task, SocketMemoryTransport( access_data_sock, base_task.make_fd_handle( near.FileDescriptor(describe_struct.data_fd))), allocator) # TODO I think I can maybe elide creating this epollcenter and instead inherit it or share it, maybe? # I guess I need to write out the set too in describe epoller = await Epoller.make_root(ram, base_task) child_monitor = await ChildProcessMonitor.make(ram, base_task, epoller) connection = make_connection( base_task, ram, base_task.make_fd_handle( near.FileDescriptor(describe_struct.connecting_fd))) new_thread = Thread( task=base_task, ram=ram, connection=connection, loader=NativeLoader.make_from_symbols(base_task, describe_struct.symbols), epoller=epoller, child_monitor=child_monitor, environ=Environment.make_from_environ(base_task, ram, environ), stdin=base_task.make_fd_handle(near.FileDescriptor(0)), stdout=base_task.make_fd_handle(near.FileDescriptor(1)), stderr=base_task.make_fd_handle(near.FileDescriptor(2)), ) #### TODO set up futex I guess remote_futex_memfd = near.FileDescriptor(describe_struct.futex_memfd) return argv, new_thread