Beispiel #1
0
class Janitor:
    """Janitor to properly cleanup after spawned subprocesses and greenlets.

    The goal of the janitor is to:

    - Propagate errors, if any of the watched processes / greenlets fails.
    - Keep track of spawned subprocesses and greenlets and make sure that
      everything is cleanup once the Janitor is done.
        - If the janitor is exiting because the monitored block is done (i.e.
          the with block is done executing), then a "clean" shutdown is
          performed.
        - Otherwise an exception occurred and the greenlets are killed it.
    """
    def __init__(self, stop_timeout: float = 20) -> None:
        self.stop_timeout = stop_timeout
        self._stop = AsyncResult()
        self._processes: Set[Popen] = set()
        self._exit_in_progress = False

        # Lock to protect changes to `_stop` and `_processes`. The `_stop`
        # synchronization is necessary to fix the race described below,
        # `_processes` synchronization is necessary to avoid iteration over a
        # changing container.
        #
        # Important: It is very important to register any executed subprocess,
        # otherwise no signal will be sent during shutdown and the subprocess
        # will become orphan. To properly register the subprocesses it is very
        # important to finish any pending call to `exec_under_watch` before
        # exiting the `Janitor`, and if the exit does run, `exec_under_watch`
        # must not start a new process.
        #
        # Note this only works if the greenlet that instantiated the Janitor
        # itself has a chance to run.
        self._processes_lock = RLock()

    def __enter__(self) -> Nursery:
        # Registers an atexit callback in case the __exit__ doesn't get a
        # chance to run. This happens when the Janitor is not used in the main
        # greenlet, and its greenlet is not the one that is dying.
        atexit.register(self._free_resources)

        # Hide the nursery to require the context manager to be used. This
        # leads to better behavior in the happy case since the exit handler is
        # used.
        janitor = self

        class ProcessNursery(Nursery):
            @staticmethod
            def exec_under_watch(process_args: List[str],
                                 **kwargs: Any) -> Optional[Popen]:
                msg = (
                    "The Janitor can not work with `shell=True`. When that flag "
                    "is used a proxy shell process is used to start the real "
                    "target process, the result is that the Janitor will monitor "
                    "and kill the proxy shell instead of the target, once the "
                    "shell is killed the real targets are kept around as "
                    "orphans, which is exactly what the Janitor is trying to "
                    "prevent from happening.")
                assert not kwargs.get("shell", False), msg

                def subprocess_stopped(result: AsyncResult) -> None:
                    if janitor._exit_in_progress:
                        # During __exit__ we expect processes to stop, since
                        # they are killed by the janitor.
                        return

                    with janitor._processes_lock:
                        # Processes are expected to quit while the nursery is
                        # active, remove them from the track list to clear memory
                        janitor._processes.remove(process)

                        # if the subprocess error'ed propagate the error.
                        try:
                            exit_code = result.get()
                            if exit_code != STATUS_CODE_FOR_SUCCESS:
                                log.error(
                                    "Process died! Bailing out.",
                                    args=process.args,
                                    exit_code=exit_code,
                                )
                                exception = SystemExit(exit_code)
                                janitor._stop.set_exception(exception)
                        except Exception as exception:
                            log.exception(
                                "Process erroed! Propagating error.",
                                args=process.args,
                            )
                            janitor._stop.set_exception(exception)

                with janitor._processes_lock:
                    if janitor._stop.ready():
                        return None

                    process = Popen(process_args, **kwargs)
                    janitor._processes.add(process)

                    # `rawlink`s are executed from within the hub, the problem
                    # is that locks can not be acquire at that point.
                    # SpawnedLink creates a new greenlet to run the callback to
                    # circumvent that.
                    callback = SpawnedLink(subprocess_stopped)

                    process.result.rawlink(callback)

                    # Important: `stop` may be set after Popen started, but before
                    # it returned. If that happens `GreenletExit` exception is
                    # raised here. In order to have proper cleared, exceptions have
                    # to be handled and the process installed.
                    if janitor._stop.ready():
                        process.send_signal(signal.SIGINT)

                    return process

            @staticmethod
            def spawn_under_watch(function: Callable, *args: Any,
                                  **kwargs: Any) -> Greenlet:
                greenlet = gevent.spawn(function, *args, **kwargs)

                # The callback provided to `AsyncResult.rawlink` is executed
                # inside the Hub thread, the callback is calling `throw` which
                # has to be called from the Hub, so here there is no need to
                # wrap the callback in a SpawnedLink.
                #
                # `throw` does not raise the exception if the greenlet has
                # finished, which is exactly the semantics needed here.
                def stop_greenlet_from_hub(result: AsyncResult) -> None:
                    """ Stop the greenlet if the nursery is stopped. """
                    try:
                        result.get()
                    except BaseException as e:
                        greenlet.throw(e)
                    else:
                        greenlet.throw(GreenletExit())

                def propagate_error(g: Greenlet) -> None:
                    """ If the greenlet fails, stop the nursery. """
                    janitor._stop.set_exception(g.exception)

                greenlet.link_exception(propagate_error)
                janitor._stop.rawlink(stop_greenlet_from_hub)

                return greenlet

            @staticmethod
            def wait(timeout: Optional[float]) -> None:
                janitor._stop.wait(timeout)

        return ProcessNursery()

    def __exit__(
        self,
        exc_type: Optional[Type[BaseException]],
        exc_value: Optional[BaseException],
        traceback: Optional[TracebackType],
    ) -> Optional[bool]:
        self._exit_in_progress = True
        with self._processes_lock:
            # Make sure to signal that we are exiting.
            if not self._stop.done():
                self._stop.set()

            self._free_resources()

            # Behave nicely if context manager's __exit__ is executed. This
            # implements the expected behavior of a context manager, which will
            # clear the resources when exiting.
            atexit.unregister(self._free_resources)

        self._stop.get()
        return None

    def _free_resources(self) -> None:
        with self._processes_lock:
            for p in self._processes:
                p.send_signal(signal.SIGINT)

            try:
                for p in self._processes:
                    exit_code = p.wait(timeout=self.stop_timeout)
                    if exit_code != STATUS_CODE_FOR_SUCCESS:
                        log.warning(
                            "Process did not exit cleanly",
                            exit_code=exit_code,
                            communicate=p.communicate(),
                        )
            except TimeoutExpired:
                log.warning(
                    "Process did not stop in time. Sending SIGKILL to all remaining processes!",
                    command=p.args,
                )
                for p in self._processes:
                    p.send_signal(signal.SIGKILL)
Beispiel #2
0
class Janitor:
    """Tries to properly stop all subprocesses before quitting the script.

    - This watches for the status of the subprocess, if the processes exits
      with a non-zero error code then the failure is propagated.
    - If for any reason this process is dying, then all the spawned processes
      have to be killed in order for a proper clean up to happen.
    """
    def __init__(self, stop_timeout: float = 20) -> None:
        self.stop_timeout = stop_timeout
        self._stop = AsyncResult()
        self._processes: Set[Popen] = set()
        self._exit_in_progress = False

        # Lock to protect changes to `_stop` and `_processes`. The `_stop`
        # synchronization is necessary to fix the race described below,
        # `_processes` synchronization is necessary to avoid iteration over a
        # changing container.
        #
        # Important: It is very important to register any executed subprocess,
        # otherwise no signal will be sent during shutdown and the subprocess
        # will become orphan. To properly register the subprocesses it is very
        # important to finish any pending call to `exec_under_watch` before
        # exiting the `Janitor`, and if the exit does run, `exec_under_watch`
        # must not start a new process.
        #
        # Note this only works if the greenlet that instantiated the Janitor
        # itself has a chance to run.
        self._processes_lock = RLock()

    def __enter__(self) -> Nursery:
        # Registers an atexit callback in case the __exit__ doesn't get a
        # chance to run. This happens when the Janitor is not used in the main
        # greenlet, and its greenlet is not the one that is dying.
        atexit.register(self._free_resources)

        # Hide the nursery to require the context manager to be used. This
        # leads to better behavior in the happy case since the exit handler is
        # used.
        janitor = self

        class ProcessNursery(Nursery):
            @staticmethod
            def exec_under_watch(process_args: List[str],
                                 **kwargs: Any) -> Optional[Popen]:
                assert not kwargs.get(
                    "shell", False), "Janitor does not work with shell=True"

                def subprocess_stopped(result: AsyncResult) -> None:
                    if janitor._exit_in_progress:
                        # During __exit__ we expect processes to stop, since
                        # they are killed by the janitor.
                        return

                    with janitor._processes_lock:
                        # Processes are expected to quit while the nursery is
                        # active, remove them from the track list to clear memory
                        janitor._processes.remove(process)

                        # if the subprocess error'ed propagate the error.
                        try:
                            exit_code = result.get()
                            if exit_code != STATUS_CODE_FOR_SUCCESS:
                                log.error(
                                    "Process died! Bailing out.",
                                    args=process.args,
                                    exit_code=exit_code,
                                )
                                exception = SystemExit(exit_code)
                                janitor._stop.set_exception(exception)
                        except Exception as exception:
                            janitor._stop.set_exception(exception)

                with janitor._processes_lock:
                    if janitor._stop.ready():
                        return None

                    process = Popen(process_args, **kwargs)
                    janitor._processes.add(process)

                    # `rawlink`s are executed from within the hub, the problem
                    # is that locks can not be acquire at that point.
                    # SpawnedLink creates a new greenlet to run the callback to
                    # circumvent that.
                    callback = SpawnedLink(subprocess_stopped)

                    process.result.rawlink(callback)

                    # Important: `stop` may be set after Popen started, but before
                    # it returned. If that happens `GreenletExit` exception is
                    # raised here. In order to have proper cleared, exceptions have
                    # to be handled and the process installed.
                    if janitor._stop.ready():
                        process.send_signal(signal.SIGINT)

                    return process

            @staticmethod
            def spawn_under_watch(function: Callable, *args: Any,
                                  **kwargs: Any) -> Greenlet:
                greenlet = gevent.spawn(function, *args, **kwargs)

                # The Event.rawlink is executed inside the Hub thread, which
                # does validation and *raises on blocking calls*, to go around
                # this a new greenlet has to be spawned, that in turn will
                # raise the exception.
                def spawn_to_kill() -> None:
                    gevent.spawn(greenlet.throw, gevent.GreenletExit())

                janitor._stop.rawlink(lambda _stop: spawn_to_kill())
                return greenlet

            @staticmethod
            def wait(timeout: Optional[float]) -> None:
                janitor._stop.wait(timeout)

        return ProcessNursery()

    def __exit__(
        self,
        exc_type: Optional[Type[BaseException]],
        exc_value: Optional[BaseException],
        traceback: Optional[TracebackType],
    ) -> Optional[bool]:
        self._exit_in_progress = True
        with self._processes_lock:
            # Make sure to signal that we are exiting.
            if not self._stop.done():
                self._stop.set()

            self._free_resources()

            # Behave nicely if context manager's __exit__ is executed. This
            # implements the expected behavior of a context manager, which will
            # clear the resources when exiting.
            atexit.unregister(self._free_resources)

        self._stop.get()
        return None

    def _free_resources(self) -> None:
        with self._processes_lock:
            for p in self._processes:
                p.send_signal(signal.SIGINT)

            try:
                for p in self._processes:
                    exit_code = p.wait(timeout=self.stop_timeout)
                    if exit_code != STATUS_CODE_FOR_SUCCESS:
                        log.warning(
                            "Process did not exit cleanly",
                            exit_code=exit_code,
                            communicate=p.communicate(),
                        )
            except TimeoutExpired:
                log.warning(
                    "Process did not stop in time. Sending SIGKILL to all remaining processes!",
                    command=p.args,
                )
                for p in self._processes:
                    p.send_signal(signal.SIGKILL)