Example #1
0
async def _make_local_process() -> Process:
    """Create the local process, allocating various resources locally.

    For the most part, the local process is like any other process; it just bootstraps
    differently, and uses syscall and memory interfaces which are specialized to the local
    process.

    """
    pid = near.Pid(os.getpid())
    task = Task(
        pid,
        handle.FDTable(pid.id),
        far.AddressSpace(pid.id),
        far.PidNamespace(pid.id),
        far.MountNamespace(pid.id),
    )
    task.sysif = LocalSyscall(task)
    task.allocator = await memory.AllocatorClient.make_allocator(task)
    epfd = await task.epoll_create()

    async def wait_readable():
        logger.debug("wait_readable(%s)", epfd.near.number)
        await trio.lowlevel.wait_readable(epfd.near.number)

    trio_system_wait_readable = TrioSystemWaitReadable(epfd.near.number)
    set_trio_system_wait_readable(trio_system_wait_readable)
    epoller = Epoller.make_subsidiary(epfd, trio_system_wait_readable.wait)
    process = Process(
        task,
        await FDPassConnection.make(task, epoller),
        NativeLoader.make_from_symbols(task, lib),
        epoller,
        await ChildPidMonitor.make(task, epoller),
        Environment.make_from_environ(task, {**os.environ}),
        stdin=task.make_fd_handle(near.FileDescriptor(0)),
        stdout=task.make_fd_handle(near.FileDescriptor(1)),
        stderr=task.make_fd_handle(near.FileDescriptor(2)),
    )
    return process
Example #2
0
async def ssh_bootstrap(
    parent: Process,
    # the actual ssh command to run
    ssh_command: SSHCommand,
    # the local path we'll use for the socket
    local_socket_path: Path,
    # the directory we're bootstrapping out of
    tmp_path_bytes: bytes,
) -> t.Tuple[AsyncChildPid, Process]:
    "Over ssh, run the bootstrap executable, "
    # identify local path
    local_data_addr = await parent.task.ptr(await SockaddrUn.from_path(
        parent, local_socket_path))
    # start port forwarding; we'll just leak this process, no big deal
    # TODO we shouldn't leak processes; we should be GCing processes at some point
    forward_child_pid = await ssh_forward(parent, ssh_command,
                                          local_socket_path,
                                          (tmp_path_bytes + b"/data").decode())
    # start bootstrap
    bootstrap_process = await parent.fork()
    bootstrap_child_pid = await bootstrap_process.exec(
        ssh_command.args(
            "-n", f"cd {tmp_path_bytes.decode()}; exec ./bootstrap rsyscall"))

    # TODO should unlink the bootstrap after I'm done execing.
    # it would be better if sh supported fexecve, then I could unlink it before I exec...
    # Connect to local socket 4 times
    async def make_async_connection() -> AsyncFileDescriptor:
        sock = await parent.make_afd(await
                                     parent.socket(AF.UNIX,
                                                   SOCK.STREAM | SOCK.NONBLOCK)
                                     )
        await sock.connect(local_data_addr)
        return sock

    async_local_syscall_sock = await make_async_connection()
    async_local_data_sock = await make_async_connection()
    # Read description off of the data sock
    describe_buf = AsyncReadBuffer(async_local_data_sock)
    describe_struct = await describe_buf.read_cffi('struct rsyscall_bootstrap')
    new_pid = describe_struct.pid
    environ = await describe_buf.read_envp(describe_struct.envp_count)
    # Build the new task!
    new_address_space = far.AddressSpace(new_pid)
    # TODO the pid namespace will probably be common for all connections...
    # TODO we should get this from the SSHHost, this is usually going
    # to be common for all connections and we should express that
    new_pid_namespace = far.PidNamespace(new_pid)
    new_mountns = far.MountNamespace(new_pid)
    new_pid = near.Pid(new_pid)
    new_base_task = Task(
        new_pid,
        handle.FDTable(new_pid),
        new_address_space,
        new_pid_namespace,
        new_mountns,
    )
    handle_remote_syscall_fd = new_base_task.make_fd_handle(
        near.FileDescriptor(describe_struct.syscall_sock))
    new_base_task.sysif = SyscallConnection(
        logger.getChild(str(new_pid)),
        async_local_syscall_sock,
        handle_remote_syscall_fd,
    )
    handle_remote_data_fd = new_base_task.make_fd_handle(
        near.FileDescriptor(describe_struct.data_sock))
    handle_listening_fd = new_base_task.make_fd_handle(
        near.FileDescriptor(describe_struct.listening_sock))
    new_base_task.allocator = await memory.AllocatorClient.make_allocator(
        new_base_task)
    # we don't inherit SignalMask; we assume ssh zeroes the sigmask before starting us
    epoller = await Epoller.make_root(new_base_task)
    child_monitor = await ChildPidMonitor.make(new_base_task, epoller)
    await handle_listening_fd.fcntl(F.SETFL, O.NONBLOCK)
    connection = ListeningConnection(
        parent.task,
        parent.epoller,
        local_data_addr,
        new_base_task,
        await AsyncFileDescriptor.make(epoller, handle_listening_fd),
    )
    new_process = Process(
        task=new_base_task,
        connection=connection,
        loader=NativeLoader.make_from_symbols(new_base_task,
                                              describe_struct.symbols),
        epoller=epoller,
        child_monitor=child_monitor,
        environ=Environment.make_from_environ(new_base_task, environ),
        stdin=new_base_task.make_fd_handle(near.FileDescriptor(0)),
        stdout=new_base_task.make_fd_handle(near.FileDescriptor(1)),
        stderr=new_base_task.make_fd_handle(near.FileDescriptor(2)),
    )
    return bootstrap_child_pid, new_process
Example #3
0
async def clone_child_task(
    task: Task,
    connection: Connection,
    loader: NativeLoader,
    monitor: ChildPidMonitor,
    flags: CLONE,
    trampoline_func: t.Callable[[FileDescriptor], Trampoline],
) -> t.Tuple[AsyncChildPid, Task]:
    """Clone a new child process and setup the sysif and task to manage it

    We rely on trampoline_func to take a socket and give us a native function call with
    arguments that will speak the rsyscall protocol over that socket.

    We want to see EOF on our local socket if that remote socket is no longer being read;
    for example, if the process exits or execs.
    This is not automatic for us: Since the process might share its file descriptor table
    with other processes, remote_sock might not be closed when the process exits or execs.

    To ensure that we get an EOF, we use the ctid futex, which, thanks to
    CLONE.CHILD_CLEARTID, will be cleared and receive a futex wakeup when the child
    process exits or execs.

    When we see that futex wakeup (from Python, with the futex integrated into our event
    loop through launch_futex_monitor), we call shutdown(SHUT.RDWR) on the local socket
    from the parent. This results in future reads returning EOF.

    """
    # These flags are mandatory; if we don't use CLONE_VM then CHILD_CLEARTID doesn't work
    # properly and our only other recourse to detect exec is to abuse robust futexes.
    flags |= CLONE.VM | CLONE.CHILD_CLEARTID
    # Open a channel which we'll use for the rsyscall connection
    [(access_sock, remote_sock)] = await connection.open_async_channels(1)
    # Create a trampoline that will start the new process running an rsyscall server
    trampoline = trampoline_func(remote_sock)
    # TODO it is unclear why we sometimes need to make a new mapping here, instead of
    # allocating with our normal allocator; all our memory is already MAP.SHARED, I think.
    # We should resolve this so we can use the normal allocator.
    arena = Arena(await task.mmap(4096 * 2, PROT.READ | PROT.WRITE,
                                  MAP.SHARED))
    # Create the stack we'll need, and the zero-initialized futex
    stack_value = loader.make_trampoline_stack(trampoline)
    stack_buf = await task.malloc(Stack, 4096)
    stack = await stack_buf.write_to_end(stack_value, alignment=16)
    futex_pointer = await task.ptr(FutexNode(None, Int32(1)))
    # it's important to start the processes in this order, so that the process
    # process is the first process started; this is relevant in several
    # situations, including unshare(NEWPID) and manipulation of ns_last_pid
    child_pid = await monitor.clone(flags, stack, ctid=futex_pointer)
    # We want to be able to rely on getting an EOF if the other side of the syscall
    # connection is no longer being read (e.g., if the process exits or execs).  Since the
    # process might share its file descriptor table with other processes, remote_sock
    # might not be closed when the process exits or execs. To ensure that we get an EOF,
    # we use the ctid futex, which will be cleared on process exit or exec; we shutdown
    # access_sock when the ctid futex is cleared, to get an EOF.
    # We do this with launch_futex_monitor and a background coroutine.
    futex_pid = await launch_futex_monitor(loader, monitor, futex_pointer)

    async def shutdown_access_sock_on_futex_process_exit():
        try:
            await futex_pid.waitpid(W.EXITED)
        except SyscallError:
            # if the parent of the futex_process dies, this syscall
            # connection is broken anyway, so shut it down.
            pass
        await access_sock.handle.shutdown(SHUT.RDWR)

    # Running this in the background, without an associated object, is a bit dubious...
    reset(shutdown_access_sock_on_futex_process_exit())
    # Set up the new task with appropriately inherited namespaces, tables, etc.
    # TODO correctly track all the namespaces we're in
    if flags & CLONE.NEWPID:
        pidns = far.PidNamespace(child_pid.pid.near.id)
    else:
        pidns = task.pidns
    if flags & CLONE.FILES:
        fd_table = task.fd_table
    else:
        fd_table = handle.FDTable(child_pid.pid.near.id, task.fd_table)
    if flags & CLONE.NEWNS:
        mountns = far.MountNamespace(child_pid.pid.near.id)
    else:
        mountns = task.mountns
    child_task = Task(child_pid.pid, fd_table, task.address_space, pidns,
                      mountns)
    child_task.sigmask = task.sigmask
    # Move ownership of the remote sock into the task and store it so it isn't closed
    remote_sock_handle = remote_sock.inherit(child_task)
    await remote_sock.invalidate()
    # Create the new syscall interface, which needs to use not just the connection,
    # but also the futex process.
    child_task.sysif = SyscallConnection(
        logger.getChild(str(child_pid.pid.near)),
        access_sock,
        remote_sock_handle,
    )
    child_task.allocator = task.allocator.inherit(child_task)
    return child_pid, child_task
Example #4
0
async def stdin_bootstrap(
    parent: Process,
    bootstrap_command: Command,
) -> t.Tuple[AsyncChildPid, Process]:
    """Create a process from running an arbitrary command which must run rsyscall-stdin-bootstrap

    bootstrap_command can be any arbitrary command, but it must eventually exec
    rsyscall-stdin-bootstrap, and pass down stdin when it does.

    We'll clone and exec bootstrap_command, passing down a socketpair for stdin, and try to
    bootstrap over the other end of the socketpair. Once rsyscall-stdin-bootstrap starts,
    it will respond to our bootstrap and we'll create a new process.

    """
    #### clone and exec into the bootstrap command
    # create the socketpair that will be used as stdin
    stdin_pair = await (await parent.task.socketpair(
        AF.UNIX, SOCK.STREAM, 0, await parent.task.malloc(Socketpair))).read()
    parent_sock = stdin_pair.first
    child = await parent.fork()
    # set up stdin with socketpair
    await child.task.inherit_fd(stdin_pair.second).dup2(child.stdin)
    await stdin_pair.second.close()
    # exec
    child_pid = await child.exec(bootstrap_command)
    #### set up all the fds we'll want to pass over
    # the basic connections
    [(access_syscall_sock, passed_syscall_sock),
     (access_data_sock, passed_data_sock)
     ] = await parent.open_async_channels(2)
    # send the fds to the new process
    connection_fd, make_connection = await parent.connection.prep_fd_transfer()
    iovec = await parent.ptr(IovecList([await parent.malloc(bytes, 1)]))
    cmsgs = await parent.ptr(
        CmsgList([
            CmsgSCMRights(
                [passed_syscall_sock, passed_data_sock, connection_fd])
        ]))
    _, [] = await parent_sock.sendmsg(
        await parent.ptr(SendMsghdr(None, iovec, cmsgs)), SendmsgFlags.NONE)
    # close our reference to fds that only the new process needs
    await passed_syscall_sock.close()
    await passed_data_sock.close()
    # close the socketpair
    await parent_sock.close()
    #### read describe to get all the information we need from the new process
    describe_buf = AsyncReadBuffer(access_data_sock)
    describe_struct = await describe_buf.read_cffi(
        'struct rsyscall_stdin_bootstrap')
    environ = await describe_buf.read_envp(describe_struct.envp_count)
    #### build the new task
    pid = describe_struct.pid
    fd_table = handle.FDTable(pid)
    address_space = far.AddressSpace(pid)
    # we assume pid namespace is shared
    # TODO include namespace inode numbers numbers in describe
    # note: if we start dealing with namespace numbers then we need to
    # have a Kernel namespace which tells us which kernel we get those
    # numbers from.
    # oh hey we can conveniently dump the inode numbers with getdents!
    pidns = parent.task.pidns
    # we assume mount namespace is not shared (can't hurt)
    mountns = far.MountNamespace(pid)
    pid = near.Pid(pid)
    base_task = Task(pid, fd_table, address_space, pidns, mountns)
    remote_syscall_fd = base_task.make_fd_handle(
        near.FileDescriptor(describe_struct.syscall_fd))
    base_task.sysif = SyscallConnection(
        logger.getChild(str(pid)),
        access_syscall_sock,
        remote_syscall_fd,
    )
    base_task.allocator = await memory.AllocatorClient.make_allocator(base_task
                                                                      )
    # we assume our SignalMask is zero'd before being started, so we don't inherit it
    # TODO I think I can maybe elide creating this epollcenter and instead inherit it or share it, maybe?
    epoller = await Epoller.make_root(base_task)
    child_monitor = await ChildPidMonitor.make(base_task, epoller)
    connection = make_connection(
        base_task,
        base_task.make_fd_handle(
            near.FileDescriptor(describe_struct.connecting_fd)))
    new_parent = Process(
        task=base_task,
        connection=connection,
        loader=NativeLoader.make_from_symbols(base_task,
                                              describe_struct.symbols),
        epoller=epoller,
        child_monitor=child_monitor,
        environ=Environment.make_from_environ(base_task, environ),
        stdin=base_task.make_fd_handle(near.FileDescriptor(0)),
        stdout=base_task.make_fd_handle(near.FileDescriptor(1)),
        stderr=base_task.make_fd_handle(near.FileDescriptor(2)),
    )
    return child_pid, new_parent
Example #5
0
async def _setup_stub(
    process: Process,
    bootstrap_sock: FileDescriptor,
) -> t.Tuple[t.List[str], Process]:
    "Setup a stub process"
    [(access_syscall_sock, passed_syscall_sock),
     (access_data_sock, passed_data_sock)
     ] = await process.open_async_channels(2)
    # memfd for setting up the futex
    futex_memfd = await process.task.memfd_create(await process.task.ptr(
        Path("child_robust_futex_list")))
    # send the fds to the new process
    connection_fd, make_connection = await process.connection.prep_fd_transfer(
    )
    iovec = await process.ptr(IovecList([await process.malloc(bytes, 1)]))
    cmsgs = await process.ptr(
        CmsgList([
            CmsgSCMRights([
                passed_syscall_sock, passed_data_sock, futex_memfd,
                connection_fd
            ])
        ]))
    _, [] = await bootstrap_sock.sendmsg(
        await process.ptr(SendMsghdr(None, iovec, cmsgs)), SendmsgFlags.NONE)
    # close our reference to fds that only the new process needs
    await passed_syscall_sock.invalidate()
    await passed_data_sock.invalidate()
    # close the socketpair
    await bootstrap_sock.invalidate()
    #### read describe to get all the information we need from the new process
    describe_buf = AsyncReadBuffer(access_data_sock)
    describe_struct = await describe_buf.read_cffi('struct rsyscall_unix_stub')
    argv_raw = await describe_buf.read_length_prefixed_array(
        describe_struct.argc)
    argv = [os.fsdecode(arg) for arg in argv_raw]
    environ = await describe_buf.read_envp(describe_struct.envp_count)
    #### build the new task
    pid = describe_struct.pid
    fd_table = handle.FDTable(pid)
    address_space = far.AddressSpace(pid)
    # we assume pid namespace is shared
    pidns = process.task.pidns
    # we assume mount namespace is not shared (won't hurt)
    mountns = far.MountNamespace(pid)
    pid = near.Pid(pid)
    # we assume net namespace is shared - that's dubious...
    # we should make it possible to control the namespace sharing more, hmm.
    # TODO maybe the describe should contain the net namespace number? and we can store our own as well?
    # then we can automatically do it right
    base_task = Task(pid, fd_table, address_space, pidns, mountns)
    remote_syscall_fd = base_task.make_fd_handle(
        near.FileDescriptor(describe_struct.syscall_fd))
    base_task.sysif = SyscallConnection(
        logger.getChild(str(pid)),
        access_syscall_sock,
        remote_syscall_fd,
    )
    base_task.allocator = await memory.AllocatorClient.make_allocator(base_task
                                                                      )
    base_task.sigmask = Sigset(
        {SIG(bit)
         for bit in rsyscall.struct.bits(describe_struct.sigmask)})
    # TODO I think I can maybe elide creating this epollcenter and instead inherit it or share it, maybe?
    # I guess I need to write out the set too in describe
    epoller = await Epoller.make_root(base_task)
    child_monitor = await ChildPidMonitor.make(base_task, epoller)
    connection = make_connection(
        base_task,
        base_task.make_fd_handle(
            near.FileDescriptor(describe_struct.connecting_fd)))
    new_process = Process(
        task=base_task,
        connection=connection,
        loader=NativeLoader.make_from_symbols(base_task,
                                              describe_struct.symbols),
        epoller=epoller,
        child_monitor=child_monitor,
        environ=Environment.make_from_environ(base_task, environ),
        stdin=base_task.make_fd_handle(near.FileDescriptor(0)),
        stdout=base_task.make_fd_handle(near.FileDescriptor(1)),
        stderr=base_task.make_fd_handle(near.FileDescriptor(2)),
    )
    #### TODO set up futex I guess
    remote_futex_memfd = near.FileDescriptor(describe_struct.futex_memfd)
    return argv, new_process