async def _connect_and_send( self: PersistentProcess, process: Process, syscall_sock: FileDescriptor, data_sock: FileDescriptor, ) -> t.Tuple[FileDescriptor, FileDescriptor]: """Connect to a persistent process's socket, send some file descriptors """ fds = [syscall_sock, data_sock] sock = await process.make_afd(await process.socket(AF.UNIX, SOCK.STREAM|SOCK.NONBLOCK)) sockaddr_un = await SockaddrUn.from_path(process, self.persistent_path) addr = await process.ptr(sockaddr_un) count = await process.ptr(Int32(len(fds))) iovec = await process.ptr(IovecList([await process.malloc(bytes, 1)])) cmsgs = await process.ptr(CmsgList([CmsgSCMRights(fds)])) hdr = await process.ptr(SendMsghdr(None, iovec, cmsgs)) response: Pointer = await process.ptr(StructList(Int32, [Int32(0)]*len(fds))) data = None await sock.connect(addr) _, _ = await sock.write(count) _, [] = await sock.handle.sendmsg(hdr, SendmsgFlags.NONE) while response.size() > 0: valid, response = await sock.read(response) data += valid remote_syscall_sock, remote_data_sock = [self.task.make_fd_handle(near.FileDescriptor(int(i))) for i in ((await data.read()).elems if data else [])] await sock.close() return remote_syscall_sock, remote_data_sock
async def test_reuseaddr_listen(self) -> None: """If you use SO.REUSEADDR, your local and peer address can be the same This is kind of alarming and surprising, but it's a real behavior. """ sockfd = await self.process.task.socket(AF.INET, SOCK.STREAM) await sockfd.setsockopt(SOL.SOCKET, SO.REUSEADDR, await self.process.ptr(Int32(1))) addr = await self.process.bind_getsockname(sockfd, SockaddrIn(0, '127.0.0.1')) sockfd2 = await self.process.task.socket(AF.INET, SOCK.STREAM) await sockfd2.setsockopt(SOL.SOCKET, SO.REUSEADDR, await self.process.ptr(Int32(1))) await sockfd2.bind(await self.process.ptr(addr)) await sockfd.listen(10) await sockfd2.connect(await self.process.ptr(addr)) sockbuf_ptr = await sockfd2.getsockname(await self.process.ptr( Sockbuf(await self.process.malloc(SockaddrIn)))) self.assertEqual(addr, await (await sockbuf_ptr.read()).buf.read()) sockbuf_ptr = await sockfd2.getpeername(await self.process.ptr( Sockbuf(await self.process.malloc(SockaddrIn)))) self.assertEqual(addr, await (await sockbuf_ptr.read()).buf.read())
async def set_miredo_sockopts(process: Process, fd: FileDescriptor) -> None: # set a bunch of sockopts one = await process.ram.ptr(Int32(1)) await fd.setsockopt(SOL.IP, IP.RECVERR, one) await fd.setsockopt(SOL.IP, IP.PKTINFO, one) await fd.setsockopt(SOL.IP, IP.MULTICAST_TTL, one) # hello fragments my old friend await fd.setsockopt(SOL.IP, IP.MTU_DISCOVER, await process.ram.ptr(Int32(IP.PMTUDISC_DONT)))
async def sendmsg_op( sem: RAM ) -> t.Tuple[WrittenPointer[SockaddrUn], WrittenPointer[Int32], WrittenPointer[SendMsghdr], Pointer[StructList[Int32]]]: addr = await sem.ptr(sockaddr_un) count = await sem.ptr(Int32(len(fds))) iovec = await sem.ptr(IovecList([await sem.malloc(bytes, 1)])) cmsgs = await sem.ptr(CmsgList([CmsgSCMRights(fds)])) hdr = await sem.ptr(SendMsghdr(None, iovec, cmsgs)) response_buf = await sem.ptr(StructList(Int32, [Int32(0)] * len(fds))) return addr, count, hdr, response_buf
async def start_miredo(nursery, miredo_exec: MiredoExecutables, thread: Thread) -> Miredo: inet_sock = await thread.task.socket(AF.INET, SOCK.DGRAM) await inet_sock.bind(await thread.ram.ptr(SockaddrIn(0, 0))) # set a bunch of sockopts one = await thread.ram.ptr(Int32(1)) await inet_sock.setsockopt(SOL.IP, IP.RECVERR, one) await inet_sock.setsockopt(SOL.IP, IP.PKTINFO, one) await inet_sock.setsockopt(SOL.IP, IP.MULTICAST_TTL, one) # hello fragments my old friend await inet_sock.setsockopt(SOL.IP, IP.MTU_DISCOVER, await thread.ram.ptr(Int32(IP.PMTUDISC_DONT))) ns_thread = await thread.fork() await ns_thread.unshare(CLONE.NEWNET | CLONE.NEWUSER) # create icmp6 fd so miredo can relay pings icmp6_fd = await ns_thread.task.socket(AF.INET6, SOCK.RAW, IPPROTO.ICMPV6) # create the TUN interface tun_fd = await ns_thread.task.open( await ns_thread.ram.ptr(Path("/dev/net/tun")), O.RDWR) ptr = await thread.ram.ptr(netif.Ifreq(b'teredo', flags=netif.IFF_TUN)) await tun_fd.ioctl(netif.TUNSETIFF, ptr) # create reqsock for ifreq operations in this network namespace reqsock = await ns_thread.task.socket(AF.INET, SOCK.STREAM) await reqsock.ioctl(netif.SIOCGIFINDEX, ptr) tun_index = (await ptr.read()).ifindex # create socketpair for communication between privileged process and teredo client privproc_pair = await (await ns_thread.task.socketpair( AF.UNIX, SOCK.STREAM, 0, await ns_thread.ram.malloc(Socketpair))).read() privproc_thread = await ns_thread.fork() await add_to_ambient(privproc_thread, {CAP.NET_ADMIN}) privproc_child = await exec_miredo_privproc(miredo_exec, privproc_thread, privproc_pair.first, tun_index) nursery.start_soon(privproc_child.check) # TODO lock down the client thread, it's talking on the network and isn't audited... # should clear out the mount namespace # iterate through / and umount(MNT_DETACH) everything that isn't /nix # ummm and let's use UMOUNT_NOFOLLOW too # ummm no let's just only umount directories client_thread = await ns_thread.fork(CLONE.NEWPID) await client_thread.unshare(CLONE.NEWNET | CLONE.NEWNS) await client_thread.unshare_user() client_child = await exec_miredo_run_client(miredo_exec, client_thread, inet_sock, tun_fd, reqsock, icmp6_fd, privproc_pair.second, "teredo.remlab.net") nursery.start_soon(client_child.check) # we keep the ns thread around so we don't have to mess with setns return Miredo(ns_thread)
async def test_stream_reuseaddr(self) -> None: "With STREAM sockets, even if you use SO.REUSEADDR, binding 0 twice will never give you the same port." sockfd = await self.process.task.socket(AF.INET, SOCK.STREAM) await sockfd.setsockopt(SOL.SOCKET, SO.REUSEADDR, await self.process.ptr(Int32(1))) await sockfd.bind(await self.process.task.ptr(SockaddrIn(0, '127.0.0.1'))) sockfd2 = await self.process.task.socket(AF.INET, SOCK.STREAM) await sockfd2.setsockopt(SOL.SOCKET, SO.REUSEADDR, await self.process.ptr(Int32(1))) with self.assertRaises(OSError) as cm: await sockfd2.bind(await self.process.task.ptr( SockaddrIn(0, '127.0.0.1'))) self.assertEqual(cm.exception.errno, errno.EADDRINUSE)
async def op(sem: RAM) -> t.Tuple[t.Tuple[Pointer[Stack], WrittenPointer[Stack]], WrittenPointer[FutexNode]]: stack_value = parent.loader.make_trampoline_stack(trampoline) stack_buf = await sem.malloc(Stack, 4096) stack = await stack_buf.write_to_end(stack_value, alignment=16) futex_pointer = await sem.ptr(FutexNode(None, Int32(0))) return stack, futex_pointer
async def test_dgram_reuseaddr(self) -> None: """With DGRAM sockets, if you use SO.REUSEADDR, binding 0 *can* give you the same port. But note that you can also just set REUSEADDR after binding. """ sockfd = await self.process.task.socket(AF.INET, SOCK.DGRAM) await sockfd.setsockopt(SOL.SOCKET, SO.REUSEADDR, await self.process.ptr(Int32(1))) addr = await self.process.bind_getsockname(sockfd, SockaddrIn(0, '127.0.0.1')) sockfd2 = await self.process.task.socket(AF.INET, SOCK.DGRAM) await sockfd2.setsockopt(SOL.SOCKET, SO.REUSEADDR, await self.process.ptr(Int32(1))) addr2 = await self.process.bind_getsockname(sockfd2, SockaddrIn(0, '127.0.0.1')) self.assertEqual(addr, addr2)
async def op( sem: RAM ) -> t.Tuple[WrittenPointer[FutexNode], WrittenPointer[RobustListHead]]: robust_list_entry = await sem.ptr(FutexNode(None, Int32(futex_value))) robust_list_head = await sem.ptr(RobustListHead(robust_list_entry)) return robust_list_entry, robust_list_head
async def clone_child_task( task: Task, connection: Connection, loader: NativeLoader, monitor: ChildPidMonitor, flags: CLONE, trampoline_func: t.Callable[[FileDescriptor], Trampoline], ) -> t.Tuple[AsyncChildPid, Task]: """Clone a new child process and setup the sysif and task to manage it We rely on trampoline_func to take a socket and give us a native function call with arguments that will speak the rsyscall protocol over that socket. We want to see EOF on our local socket if that remote socket is no longer being read; for example, if the process exits or execs. This is not automatic for us: Since the process might share its file descriptor table with other processes, remote_sock might not be closed when the process exits or execs. To ensure that we get an EOF, we use the ctid futex, which, thanks to CLONE.CHILD_CLEARTID, will be cleared and receive a futex wakeup when the child process exits or execs. When we see that futex wakeup (from Python, with the futex integrated into our event loop through launch_futex_monitor), we call shutdown(SHUT.RDWR) on the local socket from the parent. This results in future reads returning EOF. """ # These flags are mandatory; if we don't use CLONE_VM then CHILD_CLEARTID doesn't work # properly and our only other recourse to detect exec is to abuse robust futexes. flags |= CLONE.VM | CLONE.CHILD_CLEARTID # Open a channel which we'll use for the rsyscall connection [(access_sock, remote_sock)] = await connection.open_async_channels(1) # Create a trampoline that will start the new process running an rsyscall server trampoline = trampoline_func(remote_sock) # TODO it is unclear why we sometimes need to make a new mapping here, instead of # allocating with our normal allocator; all our memory is already MAP.SHARED, I think. # We should resolve this so we can use the normal allocator. arena = Arena(await task.mmap(4096 * 2, PROT.READ | PROT.WRITE, MAP.SHARED)) # Create the stack we'll need, and the zero-initialized futex stack_value = loader.make_trampoline_stack(trampoline) stack_buf = await task.malloc(Stack, 4096) stack = await stack_buf.write_to_end(stack_value, alignment=16) futex_pointer = await task.ptr(FutexNode(None, Int32(1))) # it's important to start the processes in this order, so that the process # process is the first process started; this is relevant in several # situations, including unshare(NEWPID) and manipulation of ns_last_pid child_pid = await monitor.clone(flags, stack, ctid=futex_pointer) # We want to be able to rely on getting an EOF if the other side of the syscall # connection is no longer being read (e.g., if the process exits or execs). Since the # process might share its file descriptor table with other processes, remote_sock # might not be closed when the process exits or execs. To ensure that we get an EOF, # we use the ctid futex, which will be cleared on process exit or exec; we shutdown # access_sock when the ctid futex is cleared, to get an EOF. # We do this with launch_futex_monitor and a background coroutine. futex_pid = await launch_futex_monitor(loader, monitor, futex_pointer) async def shutdown_access_sock_on_futex_process_exit(): try: await futex_pid.waitpid(W.EXITED) except SyscallError: # if the parent of the futex_process dies, this syscall # connection is broken anyway, so shut it down. pass await access_sock.handle.shutdown(SHUT.RDWR) # Running this in the background, without an associated object, is a bit dubious... reset(shutdown_access_sock_on_futex_process_exit()) # Set up the new task with appropriately inherited namespaces, tables, etc. # TODO correctly track all the namespaces we're in if flags & CLONE.NEWPID: pidns = far.PidNamespace(child_pid.pid.near.id) else: pidns = task.pidns if flags & CLONE.FILES: fd_table = task.fd_table else: fd_table = handle.FDTable(child_pid.pid.near.id, task.fd_table) if flags & CLONE.NEWNS: mountns = far.MountNamespace(child_pid.pid.near.id) else: mountns = task.mountns child_task = Task(child_pid.pid, fd_table, task.address_space, pidns, mountns) child_task.sigmask = task.sigmask # Move ownership of the remote sock into the task and store it so it isn't closed remote_sock_handle = remote_sock.inherit(child_task) await remote_sock.invalidate() # Create the new syscall interface, which needs to use not just the connection, # but also the futex process. child_task.sysif = SyscallConnection( logger.getChild(str(child_pid.pid.near)), access_sock, remote_sock_handle, ) child_task.allocator = task.allocator.inherit(child_task) return child_pid, child_task