def grandchild():
            """Setup everything inside the process that finally exec()s the tool."""
            try:
                # We know that this process has PID 2 in the inner namespace,
                # but we actually need to know its PID in the outer namespace
                # such that parent can put us into the correct cgroups.  According to
                # http://man7.org/linux/man-pages/man7/pid_namespaces.7.html,
                # there are two ways to achieve this: sending a message with the PID
                # via a socket (but Python 2 lacks a convenient API for sendmsg),
                # and reading /proc/self in the outer procfs instance
                # (that's what we do).
                my_outer_pid = container.get_my_pid_from_procfs()

                container.mount_proc(self._container_system_config)
                container.drop_capabilities()
                container.reset_signal_handling()
                child_setup_fn()  # Do some other setup the caller wants.

                # Signal readiness to parent by sending our PID
                # and wait until parent is also ready
                os.write(to_parent, str(my_outer_pid).encode())
                received = os.read(from_parent, 1)
                assert received == MARKER_PARENT_COMPLETED, received
            finally:
                # close remaining ends of pipe
                os.close(from_parent)
                os.close(to_parent)
        def grandchild():
            """Setup everything inside the process that finally exec()s the tool."""
            try:
                # We know that this process has PID 2 in the inner namespace,
                # but we actually need to know its PID in the outer namespace
                # such that parent can put us into the correct cgroups.
                # According to http://man7.org/linux/man-pages/man7/pid_namespaces.7.html,
                # there are two ways to achieve this: sending a message with the PID
                # via a socket (but Python < 3.3 lacks a convenient API for sendmsg),
                # and reading /proc/self in the outer procfs instance (that's what we do).
                my_outer_pid = container.get_my_pid_from_procfs()

                container.mount_proc()
                container.drop_capabilities()
                container.reset_signal_handling()
                child_setup_fn()  # Do some other setup the caller wants.

                # Signal readiness to parent by sending our PID and wait until parent is also ready
                os.write(to_parent, str(my_outer_pid).encode())
                received = os.read(from_parent, 1)
                assert received == b"\0", received
            finally:
                # close remaining ends of pipe
                os.close(from_parent)
                os.close(to_parent)
Example #3
0
def _init_container_and_load_tool(
        tool_module,
        temp_dir,
        network_access,
        dir_modes,
        container_system_config,
        container_tmpfs,  # ignored, tmpfs is always used
):
    """Initialize container for the current process and load given tool-info module."""
    # Prepare for private home directory, some tools write there
    if container_system_config:
        dir_modes.setdefault(container.CONTAINER_HOME, container.DIR_HIDDEN)
        os.environ["HOME"] = container.CONTAINER_HOME

    # Preparations
    temp_dir = temp_dir.encode()
    dir_modes = collections.OrderedDict(
        sorted(
            ((path.encode(), kind) for (path, kind) in dir_modes.items()),
            key=lambda tupl: len(tupl[0]),
        ))
    uid = container.CONTAINER_UID if container_system_config else os.getuid()
    gid = container.CONTAINER_GID if container_system_config else os.getgid()

    # Create container.
    # Contrary to ContainerExecutor, which uses clone to start a new process in new
    # namespaces, we use unshare, which puts the current process (the multiprocessing
    # worker process) into new namespaces.
    # The exception is the PID namespace, which will only apply to children processes.
    flags = (libc.CLONE_NEWNS
             | libc.CLONE_NEWUTS
             | libc.CLONE_NEWIPC
             | libc.CLONE_NEWUSER
             | libc.CLONE_NEWPID)
    if not network_access:
        flags |= libc.CLONE_NEWNET
    libc.unshare(flags)

    # Container config
    container.setup_user_mapping(os.getpid(), uid, gid)
    _setup_container_filesystem(temp_dir, dir_modes, container_system_config)
    if container_system_config:
        libc.sethostname(container.CONTAINER_HOSTNAME)
    if not network_access:
        container.activate_network_interface("lo")

    # Because this process is not actually in the new PID namespace, we fork.
    # The child will be in the new PID namespace and will assume the role of the acting
    # multiprocessing worker (which it can do because it inherits the file descriptors
    # that multiprocessing uses for communication).
    # The original multiprocessing worker (the parent of the fork) must do nothing in
    # order to not confuse multiprocessing.
    pid = os.fork()
    if pid:
        container.drop_capabilities()
        # block parent such that it does nothing
        os.waitpid(pid, 0)
        os._exit(0)

    # Finalize container setup in child
    container.mount_proc(container_system_config)  # only possible in child
    container.drop_capabilities()
    libc.prctl(libc.PR_SET_DUMPABLE, libc.SUID_DUMP_DISABLE, 0, 0, 0)

    logging.debug("Loading tool-info module %s in container", tool_module)
    global tool
    try:
        tool = __import__(tool_module, fromlist=["Tool"]).Tool()
    except BaseException as e:
        tool = None
        return e
    return tool.__doc__
Example #4
0
def _init_container(
    temp_dir,
    network_access,
    dir_modes,
    container_system_config,
    container_tmpfs,  # ignored, tmpfs is always used
):
    """
    Create a fork of this process in a container. This method only returns in the fork,
    so calling it seems like moving the current process into a container.
    """
    # Prepare for private home directory, some tools write there
    if container_system_config:
        dir_modes.setdefault(container.CONTAINER_HOME, container.DIR_HIDDEN)
        os.environ["HOME"] = container.CONTAINER_HOME

    # Preparations
    temp_dir = temp_dir.encode()
    dir_modes = collections.OrderedDict(
        sorted(
            ((path.encode(), kind) for (path, kind) in dir_modes.items()),
            key=lambda tupl: len(tupl[0]),
        )
    )
    uid = container.CONTAINER_UID if container_system_config else os.getuid()
    gid = container.CONTAINER_GID if container_system_config else os.getgid()

    # Create container.
    # Contrary to ContainerExecutor, which uses clone to start a new process in new
    # namespaces, we use unshare, which puts the current process (the multiprocessing
    # worker process) into new namespaces.
    # The exception is the PID namespace, which will only apply to children processes.
    flags = (
        libc.CLONE_NEWNS
        | libc.CLONE_NEWUTS
        | libc.CLONE_NEWIPC
        | libc.CLONE_NEWUSER
        | libc.CLONE_NEWPID
    )
    if not network_access:
        flags |= libc.CLONE_NEWNET
    try:
        libc.unshare(flags)
    except OSError as e:
        if (
            e.errno == errno.EPERM
            and util.try_read_file("/proc/sys/kernel/unprivileged_userns_clone") == "0"
        ):
            raise BenchExecException(
                "Unprivileged user namespaces forbidden on this system, please "
                "enable them with 'sysctl kernel.unprivileged_userns_clone=1' "
                "or disable container mode"
            )
        else:
            raise BenchExecException(
                "Creating namespace for container mode failed: " + os.strerror(e.errno)
            )

    # Container config
    container.setup_user_mapping(os.getpid(), uid, gid)
    _setup_container_filesystem(temp_dir, dir_modes, container_system_config)
    if container_system_config:
        libc.sethostname(container.CONTAINER_HOSTNAME)
    if not network_access:
        container.activate_network_interface("lo")

    # Because this process is not actually in the new PID namespace, we fork.
    # The child will be in the new PID namespace and will assume the role of the acting
    # multiprocessing worker (which it can do because it inherits the file descriptors
    # that multiprocessing uses for communication).
    # The original multiprocessing worker (the parent of the fork) must do nothing in
    # order to not confuse multiprocessing.
    pid = os.fork()
    if pid:
        container.drop_capabilities()
        # block parent such that it does nothing
        os.waitpid(pid, 0)
        os._exit(0)

    # Finalize container setup in child
    container.mount_proc(container_system_config)  # only possible in child
    container.drop_capabilities()
    libc.prctl(libc.PR_SET_DUMPABLE, libc.SUID_DUMP_DISABLE, 0, 0, 0)
    container.setup_seccomp_filter()