def free(self, mapping: MemoryMapping) -> None: self.valid = False def pg(offset: int) -> int: "Convert this bytes-offset in this mapping into a page-offset in the mapping" return offset // mapping.near.page_size # Now that this allocation is freed, everything in this (exclusive) range is free. # (Some of it may have been free before this allocation is freed.) free_start, free_end = pg(self.prev.end)+1, pg(self.next.start) # We could just MADV_FREE all of free_range, but that would cause us to do excessive syscalls. # Instead we'll also calculate what pages *may* have become free due to freeing this allocation, # which is exactly the range of pages covered by this allocation: changed_start, changed_end = pg(self.start), pg(self.end)+1 # Now, the intersection of these two ranges is exactly the range of pages which have just become free # and therefore haven't yet had MADV_FREE called on them. # The start of the intersection is the higher of the two start addresses... just_freed_start = max(free_start, changed_start) # ...and the end of the intersection is the lower of the two end addresses. just_freed_end = min(free_end, changed_end) # asynchronously, if there's something in the intersection, MADV.REMOVE it async def final_free(): try: if just_freed_start < just_freed_end: await mapping[just_freed_start:just_freed_end].madvise(MADV.REMOVE) except OSError: logger.exception("Error while returning memory to the OS") # don't care beyond this, it's harmless except SyscallError: # a SyscallError can easily happen, if we free in a process that's dead # TODO try again with the main mapping in Arena, which shouldn't SyscallError pass # only now do we actually remove this allocation from the linked list; # if we remove the allocation before MADV.REMOVE completes, # we might allocate in that space, which will later be MADV.REMOVEd and deleted self._remove() reset(final_free())
async def _run(self) -> None: input_buf: Pointer = await self.epfd.task.malloc( EpollEventList, 32 * EpollEvent.sizeof()) number_to_cb: t.Dict[int, Continuation[EPOLL]] = {} registered_activity_fd: t.Optional[FileDescriptor] = None while True: if self.wait_readable: await self.wait_readable() activity_fd = self.epfd.task.sysif.get_activity_fd() if activity_fd and (registered_activity_fd is not activity_fd): # the activity fd changed, we need to register the new one if registered_activity_fd: # delete the old registered activity fd await self.epfd.epoll_ctl(EPOLL_CTL.DEL, registered_activity_fd) activity_fd_number = self.allocate_number(int(activity_fd)) # start up a coroutine to consume events from the activity_fd async def devnull(activity_fd_number=activity_fd_number): while True: await self.queue.request(activity_fd_number) reset(devnull()) await self.epfd.epoll_ctl( EPOLL_CTL.ADD, activity_fd, await self.epfd.task.ptr( EpollEvent( activity_fd_number, # not edge triggered; we don't want to block if there's # anything that can be read. EPOLL.IN | EPOLL.RDHUP | EPOLL.PRI | EPOLL.ERR | EPOLL.HUP))) registered_activity_fd = activity_fd try: valid_events_buf, rest = await self.epfd.epoll_wait( input_buf, self.timeout) received_events = await valid_events_buf.read() except SyscallHangup: # retry the epoll_wait to support rsyscall.tasks.persistent, as documented there; # for non-persistent tasks this will just fail with a SyscallSendError next time around. continue except Exception as wait_error: final_exn = wait_error break input_buf = valid_events_buf + rest for num, cb in self.queue.fetch_any(): number_to_cb[num] = cb for event in received_events: number_to_cb[event.data].send(event.events) del number_to_cb[event.data] for num, cb in self.queue.fetch_any(): number_to_cb[num] = cb for number in list(self.pending_remove): number_to_cb[number].throw(RemovedFromEpollError()) del number_to_cb[number] self.pending_remove.remove(number) self.queue.close(final_exn)
def start(func: t.Callable[[], t.Awaitable[T]]) -> Future[T]: self = Future[T]() @functools.wraps(func) async def wrapper() -> None: result = await outcome.acapture(func) self._result = result if self._result_cb: self._result_cb.resume(result) reset(wrapper()) return self
def __init__( self, asyncfd: AsyncFileDescriptor, buf: Pointer[InotifyEventList], ) -> None: "Private; use Inotify.make instead." self.asyncfd = asyncfd self.buf = buf self.wd_to_watch: t.Dict[WatchDescriptor, Watch] = {} self.queue = RequestQueue[WatchDescriptor, t.List[InotifyEvent]]() reset(self._run())
def __init__( self, afd: AsyncFileDescriptor, signal_block: SignalBlock, buf: Pointer[SignalfdSiginfo], ) -> None: "Use the constructor method AsyncSignalfd.make" self.afd = afd self.signal_block = signal_block self.buf = buf self.next_signal = Event() reset(self._run())
def __init__(self, logger: logging.Logger, fd: AsyncFileDescriptor, server_fd: FileDescriptor, ) -> None: self.logger = logger self.fd = fd self.server_fd = server_fd self.valid: t.Optional[Pointer[bytes]] = None self.request_queue = RequestQueue[t.Union[RsyscallSyscall, Write, Read, Barrier], t.Union[int, bytes, None]]() reset(self._run_requests()) self.response_queue = RequestQueue[t.Union[RsyscallSyscall, Read, Barrier], t.Union[int, bytes, None]]() reset(self._run_responses())
def __init__(self, ram: RAM, epfd: FileDescriptor, wait_readable: t.Optional[t.Callable[[], t.Awaitable[None]]], timeout: int, ) -> None: "To make this, use one of the constructor methods of Epoller: make_subsidiary or make_root" self.ram = ram self.epfd = epfd self.wait_readable = wait_readable self.timeout = timeout self.used_numbers: t.Set[int] = set() self.pending_remove: t.Set[int] = set() self.queue = RequestQueue[int, EPOLL]() reset(self._run())
def start(coro: t.Awaitable[T]) -> Future[T]: self = Future[T]() async def wrapper() -> None: result = await outcome.acapture(lambda: coro) self._result = result if self._result_cb: self._result_cb.resume(result) wrapper_coro = wrapper() wrapper_coro.__name__ = getattr(coro, "__name__", "wrapper") wrapper_coro.__qualname__ = getattr(coro, "__qualname__", "wrapper") reset(wrapper_coro) return self
def __init__( self, logger: logging.Logger, tofd: AsyncFileDescriptor, fromfd: AsyncFileDescriptor, server_infd: FileDescriptor, server_outfd: FileDescriptor, ) -> None: self.logger = logger self.tofd = tofd self.fromfd = fromfd self.server_infd = server_infd self.server_outfd = server_outfd self.valid: t.Optional[Pointer[bytes]] = None self.request_queue = RequestQueue[RsyscallSyscall, int]() reset(self._run_requests()) self.response_queue = RequestQueue[RsyscallSyscall, int]() reset(self._run_responses())
def __init__(self, epoller: Epoller, fd: FileDescriptor, number: int, ) -> None: self.epoller = epoller self.fd = fd # TODO, we should copy this so it can't be closed out from under us # self.fd = fd.copy() self.number = number # We optimistically assume that the FD is ready for reading/writing immediately, # by setting our initial status to EPOLL.OUT|EPOLL.IN. # This has two benefits: # 1. Performance improves on our test suite and in many real-world cases, and # 2. More critically, if a user erroneously tries to read or write an FD which # will never receive an EPOLL.OUT or EPOLL.IN because it doesn't support # reading/writing, we'll fail immediately instead of blocking forever. self.status = FDStatus(EPOLL.OUT|EPOLL.IN) self.in_epollfd = True self.queue = RequestQueue[EPOLL, None]() self.total_events: t.Counter[EPOLL] = collections.Counter() self.consumed_events: t.Dict[EPOLL, int] = {flag: 0 for flag in EPOLL} reset(self._run())
def __init__(self, task: Task) -> None: self.task = task self.lock = trio.Lock() self.arenas: t.List[Arena] = [] self.queue = RequestQueue[t.List[t.Tuple[int, int]], t.Sequence[t.Tuple[MemoryMapping, Allocation]]]() reset(self._run())
async def clone_child_task( task: Task, ram: RAM, connection: Connection, loader: NativeLoader, monitor: ChildProcessMonitor, flags: CLONE, trampoline_func: t.Callable[[FileDescriptor], Trampoline], ) -> t.Tuple[AsyncChildProcess, Task]: """Clone a new child process and setup the sysif and task to manage it We rely on trampoline_func to take a socket and give us a native function call with arguments that will speak the rsyscall protocol over that socket. We want to see EOF on our local socket if that remote socket is no longer being read; for example, if the process exits or execs. This is not automatic for us: Since the process might share its file descriptor table with other processes, remote_sock might not be closed when the process exits or execs. To ensure that we get an EOF, we use the ctid futex, which, thanks to CLONE.CHILD_CLEARTID, will be cleared and receive a futex wakeup when the child process exits or execs. When we see that futex wakeup (from Python, with the futex integrated into our event loop through launch_futex_monitor), we call shutdown(SHUT.RDWR) on the local socket from the parent. This results in future reads returning EOF. """ # These flags are mandatory; if we don't use CLONE_VM then CHILD_CLEARTID doesn't work # properly and our only other recourse to detect exec is to abuse robust futexes. flags |= CLONE.VM | CLONE.CHILD_CLEARTID # Open a channel which we'll use for the rsyscall connection [(access_sock, remote_sock)] = await connection.open_async_channels(1) # Create a trampoline that will start the new process running an rsyscall server trampoline = trampoline_func(remote_sock) # TODO it is unclear why we sometimes need to make a new mapping here, instead of # allocating with our normal allocator; all our memory is already MAP.SHARED, I think. # We should resolve this so we can use the normal allocator. arena = Arena(await task.mmap(4096 * 2, PROT.READ | PROT.WRITE, MAP.SHARED)) async def op( sem: RAM ) -> t.Tuple[t.Tuple[Pointer[Stack], WrittenPointer[Stack]], WrittenPointer[FutexNode]]: stack_value = loader.make_trampoline_stack(trampoline) stack_buf = await sem.malloc(Stack, 4096) stack = await stack_buf.write_to_end(stack_value, alignment=16) futex_pointer = await sem.ptr(FutexNode(None, Int32(1))) return stack, futex_pointer # Create the stack we'll need, and the zero-initialized futex stack, futex_pointer = await ram.perform_batch(op, arena) # it's important to start the processes in this order, so that the thread # process is the first process started; this is relevant in several # situations, including unshare(NEWPID) and manipulation of ns_last_pid child_process = await monitor.clone(flags, stack, ctid=futex_pointer) # We want to be able to rely on getting an EOF if the other side of the syscall # connection is no longer being read (e.g., if the process exits or execs). Since the # process might share its file descriptor table with other processes, remote_sock # might not be closed when the process exits or execs. To ensure that we get an EOF, # we use the ctid futex, which will be cleared on process exit or exec; we shutdown # access_sock when the ctid futex is cleared, to get an EOF. # We do this with launch_futex_monitor and a background coroutine. futex_process = await launch_futex_monitor(ram, loader, monitor, futex_pointer) async def shutdown_access_sock_on_futex_process_exit(): try: await futex_process.waitpid(W.EXITED) except SyscallError: # if the parent of the futex_process dies, this syscall # connection is broken anyway, so shut it down. pass await access_sock.handle.shutdown(SHUT.RDWR) # Running this in the background, without an associated object, is a bit dubious... reset(shutdown_access_sock_on_futex_process_exit()) # Set up the new task with appropriately inherited namespaces, tables, etc. # TODO correctly track all the namespaces we're in if flags & CLONE.NEWPID: pidns = far.PidNamespace(child_process.process.near.id) else: pidns = task.pidns if flags & CLONE.FILES: fd_table = task.fd_table else: fd_table = handle.FDTable(child_process.process.near.id, task.fd_table) child_task = Task(child_process.process, fd_table, task.address_space, pidns) child_task.sigmask = task.sigmask # Move ownership of the remote sock into the task and store it so it isn't closed remote_sock_handle = remote_sock.inherit(child_task) await remote_sock.invalidate() # Create the new syscall interface, which needs to use not just the connection, # but also the futex process. child_task.sysif = SyscallConnection( logger.getChild(str(child_process.process.near)), access_sock, access_sock, remote_sock_handle, remote_sock_handle, ) return child_process, child_task
def __init__(self, inotify: Inotify, wd: WatchDescriptor) -> None: self.inotify = inotify self.wd = wd self.pending_events = [] self.queue = RequestQueue[None, t.List[InotifyEvent]]() reset(self._run())
async def write(self, dest: Pointer, data: bytes) -> None: if dest.size() != len(data): raise Exception("mismatched pointer size", dest.size(), "and data size", len(data)) self.logger.debug("writing to %s, num bytes: %s", dest, len(data)) reset(self.infallible_recv(to_span(dest))) await self.write_to_fd(data)
async def asyncSetUp(self) -> None: self.queue = RequestQueue[int, int]() reset(self._first_runner(self.queue)) self.second_queue = RequestQueue[int, int]() reset(self._second_runner(self.second_queue))
def __init__(self, conn: SyscallConnection) -> None: self._conn: t.Optional[SyscallConnection] = conn self.logger = self._conn.logger self.conn_queue: RequestQueue[t.Union[Get, Broken, New], t.Union[SyscallConnection, None]] = RequestQueue() reset(self._run_conn_queue())