def _exec_middleman(command, env, exit_event, stdout, stderr, rw):
    stdout_r, stdout_w = stdout
    stderr_r, stderr_w = stderr
    r, w = rw

    # Close unused file descriptors to enforce PIPE behavior.
    stdout_r.close()
    stderr_r.close()
    w.close()
    os.setsid()

    executor_shell = subprocess.Popen(command,
                                      shell=True,
                                      env=env,
                                      stdout=stdout_w,
                                      stderr=stderr_w)

    on_event(exit_event,
             terminate_executor_shell_and_children,
             args=(executor_shell.pid, ))

    def kill_executor_children_if_parent_dies():
        # This read blocks until the pipe is closed on the other side
        # due to parent process termination (for any reason, including -9).
        os.read(r.fileno(), 1)
        terminate_executor_shell_and_children(executor_shell.pid)

    in_thread(kill_executor_children_if_parent_dies)

    exit_code = executor_shell.wait()
    if exit_code < 0:
        # See: https://www.gnu.org/software/bash/manual/html_node/Exit-Status.html
        exit_code = 128 + abs(exit_code)

    sys.exit(exit_code)
Exemple #2
0
def rsh(driver_addresses, key, settings, host_hash, command, env, local_rank,
        background=True, events=None):
    """
    Method to run a command remotely given a host hash, local rank and driver addresses.

    This method connects to the SparkDriverService running on the Spark driver,
    retrieves all information required to connect to the task with given local rank
    of that host hash and invoke the command there.

    The method returns immediately after launching the command if background is True (default).
    When background is set to False, this method waits for command termination and returns
    command's result. If there is an exception while waiting for the result (i.e. connection reset)
    it returns -1.

    :param driver_addresses: driver's addresses
    :param key: used for encryption of parameters passed across the hosts
    :param settings: settings
    :param host_hash: host hash to connect to
    :param command: command and arguments to invoke
    :param env: environment to use
    :param local_rank: local rank on the host of task to run the command in
    :param background: run command in background if True, returns command result otherwise
    :param events: events to abort the command, only if background is True
    """
    if ':' in host_hash:
        raise Exception('Illegal host hash provided. Are you using Open MPI 4.0.0+?')

    driver_client = driver_service.SparkDriverClient(driver_addresses, key,
                                                     verbose=settings.verbose)
    task_indices = driver_client.task_host_hash_indices(host_hash)
    task_index = task_indices[local_rank]
    task_addresses = driver_client.all_task_addresses(task_index)
    task_client = task_service.SparkTaskClient(task_index, task_addresses,
                                               key, verbose=settings.verbose)
    task_client.run_command(command, env)

    if not background:
        stop = None
        events = events or []
        for event in events:
            stop = threading.Event()
            on_event(event, task_client.abort_command, stop=stop)

        try:
            return task_client.wait_for_command_exit_code()
        except:
            traceback.print_exc()
            return -1
        finally:
            if stop is not None:
                stop.set()
Exemple #3
0
    def test_on_event(self):
        # a happy run without args and stop event
        event = threading.Event()
        fn = mock.Mock()
        thread = on_event(event, fn)
        fn.assert_not_called()
        event.set()
        thread.join(1.0)
        self.assertFalse(thread.is_alive())
        fn.assert_called_once()

        # a happy run with args but without stop event
        event = threading.Event()
        fn = mock.Mock()
        thread = on_event(event, fn, ('a', 1))
        fn.assert_not_called()
        event.set()
        thread.join(1.0)
        self.assertFalse(thread.is_alive())
        fn.assert_called_once()
        fn.assert_called_once_with('a', 1)

        # a happy run with stop event but unused
        event = threading.Event()
        stop = threading.Event()
        fn = mock.Mock()
        thread = on_event(event, fn, stop=stop, check_interval_seconds=0.01)
        fn.assert_not_called()
        event.set()
        thread.join(1.0)
        self.assertFalse(thread.is_alive())
        fn.assert_called_once()
        stop.set()
        time.sleep(0.1)
        fn.assert_called_once()

        # stop the thread before we set the event
        event = threading.Event()
        stop = threading.Event()
        fn = mock.Mock()
        thread = on_event(event, fn, stop=stop, check_interval_seconds=0.01)
        fn.assert_not_called()
        stop.set()
        thread.join(1.0)
        self.assertFalse(thread.is_alive())
        fn.assert_not_called()
        event.set()
        time.sleep(0.1)
        fn.assert_not_called()

        # test with exception
        def exception():
            raise Exception("Test Exception")

        event = threading.Event()
        fn = mock.Mock(side_effect=exception)
        thread = on_event(event, fn)
        fn.assert_not_called()
        event.set()
        thread.join(1.0)
        self.assertFalse(thread.is_alive())
        fn.assert_called_once()

        # test with exception but silent
        event = threading.Event()
        fn = mock.Mock(side_effect=exception)
        thread = on_event(event, fn)
        fn.assert_not_called()
        event.set()
        thread.join(1.0)
        self.assertFalse(thread.is_alive())
        fn.assert_called_once()

        # test non-tuple args
        event = threading.Event()
        fn = mock.Mock()
        with pytest.raises(
                ValueError,
                match="^args must be a tuple, not <(class|type) 'int'>, "
                "for a single argument use \\(arg,\\)$"):
            on_event(event, fn, args=1)
        fn.assert_not_called()
def execute(command,
            env=None,
            stdout=None,
            stderr=None,
            index=None,
            events=None):
    ctx = multiprocessing.get_context('spawn') if _PY3 else multiprocessing

    # When this event is set, signal to middleman to terminate its children and exit.
    exit_event = _create_event(ctx)

    # Make a pipe for the subprocess stdout/stderr.
    (stdout_r, stdout_w) = ctx.Pipe()
    (stderr_r, stderr_w) = ctx.Pipe()

    # This Pipe is how we ensure that the executed process is properly terminated (not orphaned) if
    # the parent process is hard killed (-9). If the parent (this process) is killed for any reason,
    # this Pipe will be closed, which can be detected by the middleman. When the middleman sees the
    # closed Pipe, it will issue a SIGTERM to the subprocess executing the command. The assumption
    # here is that users will be inclined to hard kill this process, not the middleman.
    (r, w) = ctx.Pipe()

    middleman = ctx.Process(target=_exec_middleman,
                            args=(command, env, exit_event,
                                  (stdout_r, stdout_w), (stderr_r,
                                                         stderr_w), (r, w)))
    middleman.start()

    # Close unused file descriptors to enforce PIPE behavior.
    r.close()
    stdout_w.close()
    stderr_w.close()

    # Redirect command stdout & stderr to provided streams or sys.stdout/sys.stderr.
    # This is useful for Jupyter Notebook that uses custom sys.stdout/sys.stderr or
    # for redirecting to a file on disk.
    if stdout is None:
        stdout = sys.stdout
    if stderr is None:
        stderr = sys.stderr

    stdout_fwd = in_thread(target=forward_stream,
                           args=(stdout_r, stdout, 'stdout', index))
    stderr_fwd = in_thread(target=forward_stream,
                           args=(stderr_r, stderr, 'stderr', index))

    # TODO: Currently this requires explicitly declaration of the events and signal handler to set
    #  the event (gloo_run.py:_launch_jobs()). Need to figure out a generalized way to hide this behind
    #  interfaces.
    stop = threading.Event()
    events = events or []
    for event in events:
        on_event(event, exit_event.set, stop=stop, silent=True)

    try:
        middleman.join()
    except:
        # interrupted, send middleman TERM signal which will terminate children
        exit_event.set()
        while True:
            try:
                middleman.join()
                break
            except:
                # interrupted, wait for middleman to finish
                pass
    finally:
        stop.set()

    stdout_fwd.join()
    stderr_fwd.join()

    return middleman.exitcode
Exemple #5
0
def execute(command,
            env=None,
            stdout=None,
            stderr=None,
            index=None,
            event=None):
    # Make a pipe for the subprocess stdout/stderr.
    (stdout_r, stdout_w) = os.pipe()
    (stderr_r, stderr_w) = os.pipe()

    # Make a pipe for notifying the child that parent has died.
    (r, w) = os.pipe()

    middleman_pid = os.fork()
    if middleman_pid == 0:
        # Close unused file descriptors to enforce PIPE behavior.
        os.close(w)
        os.setsid()

        executor_shell = subprocess.Popen(command,
                                          shell=True,
                                          env=env,
                                          stdout=stdout_w,
                                          stderr=stderr_w)

        sigterm_received = threading.Event()

        def set_sigterm_received(signum, frame):
            sigterm_received.set()

        signal.signal(signal.SIGINT, set_sigterm_received)
        signal.signal(signal.SIGTERM, set_sigterm_received)

        def kill_executor_children_if_parent_dies():
            # This read blocks until the pipe is closed on the other side
            # due to the process termination.
            os.read(r, 1)
            terminate_executor_shell_and_children(executor_shell.pid)

        in_thread(kill_executor_children_if_parent_dies)

        on_event(sigterm_received,
                 terminate_executor_shell_and_children,
                 args=(executor_shell.pid, ))

        exit_code = executor_shell.wait()
        os._exit(exit_code)

    # Close unused file descriptors to enforce PIPE behavior.
    os.close(r)
    os.close(stdout_w)
    os.close(stderr_w)

    # Redirect command stdout & stderr to provided streams or sys.stdout/sys.stderr.
    # This is useful for Jupyter Notebook that uses custom sys.stdout/sys.stderr or
    # for redirecting to a file on disk.
    if stdout is None:
        stdout = sys.stdout
    if stderr is None:
        stderr = sys.stderr

    stdout_fwd = in_thread(target=forward_stream,
                           args=(stdout_r, stdout, 'stdout', index))
    stderr_fwd = in_thread(target=forward_stream,
                           args=(stderr_r, stderr, 'stderr', index))

    # TODO: Currently this requires explicitly declaration of the event and signal handler to set
    #  the event (gloo_run.py:_launch_jobs()). Need to figure out a generalized way to hide this behind
    #  interfaces.
    stop = None
    if event is not None:
        stop = threading.Event()
        on_event(event,
                 os.kill, (middleman_pid, signal.SIGTERM),
                 stop=stop,
                 silent=True)

    try:
        res, status = os.waitpid(middleman_pid, 0)
    except:
        # interrupted, send middleman TERM signal which will terminate children
        os.kill(middleman_pid, signal.SIGTERM)
        while True:
            try:
                _, status = os.waitpid(middleman_pid, 0)
                break
            except:
                # interrupted, wait for middleman to finish
                pass
    finally:
        if stop is not None:
            stop.set()

    stdout_fwd.join()
    stderr_fwd.join()
    exit_code = status >> 8
    return exit_code