def test_counted_ping_failures_reset_on_success(self):
        """
        When a failing ping is followed by a successful ping, it will then
        require 5 more ping failures to restart the daemon.
        """
        clock = Clock()
        dog = WatchDog(clock,
                       broker=AsynchronousPingDaemon("test-broker"),
                       monitor=AsynchronousPingDaemon("test-monitor"),
                       manager=AsynchronousPingDaemon("test-manager"))
        dog.start_monitoring()

        clock.advance(5)
        dog.broker.fire_running(False)
        dog.monitor.fire_running(True)
        dog.manager.fire_running(True)

        clock.advance(5)
        dog.broker.fire_running(True)
        dog.monitor.fire_running(True)
        dog.manager.fire_running(True)

        for i in range(4):
            clock.advance(5)
            dog.broker.fire_running(False)
            dog.monitor.fire_running(True)
            dog.manager.fire_running(True)
            self.assertEqual(dog.broker.boots, [])

        clock.advance(5)
        dog.broker.fire_running(False)
        dog.monitor.fire_running(True)
        dog.manager.fire_running(True)
        self.assertEqual(dog.broker.boots, [STOP, START])
    def test_ping_failure_counter_reset_after_restart(self):
        """
        When a daemon stops responding and gets restarted after 5 failed pings,
        it will wait for another 5 failed pings before it will be restarted
        again.
        """
        clock = Clock()
        dog = WatchDog(clock,
                       broker=AsynchronousPingDaemon("test-broker"),
                       monitor=BoringDaemon("test-monitor"),
                       manager=BoringDaemon("test-manager"))
        dog.start_monitoring()

        for i in range(5):
            clock.advance(5)
            dog.broker.fire_running(False)

        self.assertEqual(dog.broker.boots, ["stop", "start"])
        for i in range(4):
            clock.advance(5)
            dog.broker.fire_running(False)
            self.assertEqual(dog.broker.boots, ["stop", "start"])
        clock.advance(5)
        dog.broker.fire_running(False)
        self.assertEqual(dog.broker.boots, ["stop", "start", "stop", "start"])
    def test_wait_for_stop_before_ping(self):
        """
        When a daemon times out and the watchdog restarts it, it should not be
        pinged until after the restart completes.
        """
        clock = Clock()
        dog = WatchDog(clock,
                       broker=AsynchronousPingDaemon("test-broker"),
                       monitor=BoringDaemon("test-monitor"),
                       manager=BoringDaemon("test-manager"))
        stop_result = Deferred()
        dog.broker.stop = lambda: stop_result
        dog.start_monitoring()

        for i in range(5):
            clock.advance(5)
            dog.broker.fire_running(False)

        self.assertEqual(dog.broker.boots, [])
        self.assertEqual(dog.broker.pings, 5)
        clock.advance(5)  # wait some more to see if a ping happens
        self.assertEqual(dog.broker.pings, 5)
        stop_result.callback(None)
        self.assertEqual(dog.broker.boots, ["start"])
        clock.advance(5)
        self.assertEqual(dog.broker.pings, 6)
 def test_exiting_during_outstanding_ping_works(self):
     """
     This is a regression test. Some code called .cancel() on a timed call
     without checking if it was active first. Asynchronous is_running will
     cause the scheduled call to exist but already fired.
     """
     clock = Clock()
     dog = WatchDog(clock,
                    broker=BoringDaemon("test-broker"),
                    monitor=BoringDaemon("test-monitor"),
                    manager=AsynchronousPingDaemon("test-manager"))
     dog.start_monitoring()
     clock.advance(5)
     return dog.request_exit()
    def test_die_when_broker_unavailable(self):
        """
        If the broker is not running, the client should still be able to shut
        down.
        """
        self.log_helper.ignore_errors(
            "Couldn't request that broker gracefully shut down; "
            "killing forcefully.")
        clock = Clock()
        dog = WatchDog(clock,
                       broker=BoringDaemon("test-broker"),
                       monitor=BoringDaemon("test-monitor"),
                       manager=BoringDaemon("test-manager"))

        # request_exit returns False when there's no broker, as tested by
        # DaemonTest.test_request_exit_without_broker
        dog.broker.request_exit = lambda: succeed(False)
        # The manager's wait method never fires its deferred because nothing
        # told it to die because the broker is dead!

        manager_result = Deferred()
        dog.manager.wait = lambda: manager_result

        def stop():
            manager_result.callback(True)
            return succeed(True)
        dog.manager.stop = stop

        result = dog.request_exit()
        return result
    def test_start_and_stop_daemons(self):
        """The WatchDog will start all daemons, starting with the broker."""
        self.setup_daemons_mocks()

        self.broker.start()
        self.monitor.start()
        self.manager.start()

        self.setup_request_exit()

        clock = Clock()
        dog = WatchDog(clock, config=self.config)
        dog.start()
        clock.advance(0)
        result = dog.request_exit()
        result.addCallback(lambda _: self.assert_request_exit())
        return result
    def test_wait_for_stop_before_start(self):
        """
        When a daemon times out and the watchdog attempts to kill it, it should
        not be restarted until the process has fully died.
        """
        clock = Clock()
        dog = WatchDog(clock,
                       broker=AsynchronousPingDaemon("test-broker"),
                       monitor=BoringDaemon("test-monitor"),
                       manager=BoringDaemon("test-manager"))
        stop_result = Deferred()
        dog.broker.stop = lambda: stop_result
        dog.start_monitoring()

        for i in range(5):
            clock.advance(5)
            dog.broker.fire_running(False)

        self.assertEqual(dog.broker.boots, [])
        stop_result.callback(None)
        self.assertEqual(dog.broker.boots, ["start"])
    def test_start_limited_daemons(self):
        """
        start only starts the daemons which are actually enabled.
        """
        self.setup_daemons_mocks()

        clock = Clock()
        dog = WatchDog(
            clock, enabled_daemons=[self.broker_factory], config=self.config)
        dog.start()

        self.broker.start.assert_called_once_with()
        self.monitor.start.assert_not_called()
        self.manager.start.assert_not_called()
    def test_ping_reply_after_request_exit_should_not_restart_processes(self):
        """
        When request_exit occurs between a ping request and response, a failing
        ping response should not cause the process to be restarted.
        """
        self.setup_daemons_mocks()

        self.broker.start()
        self.monitor.start()
        self.manager.start()

        monitor_ping_result = Deferred()

        self.broker.is_running.return_value = succeed(True)
        self.monitor.is_running.return_value = monitor_ping_result
        self.manager.is_running.return_value = succeed(True)

        self.setup_request_exit()

        clock = Clock()

        dog = WatchDog(clock, config=self.config)
        dog.start()
        clock.advance(0)
        clock.advance(5)
        result = dog.request_exit()
        monitor_ping_result.callback(False)

        def check(_):
            # The monitor should never be explicitly stopped / restarted.
            self.monitor.stop.assert_not_called()
            # Start *is* called
            self.monitor.start.call_count = 2
            self.assert_request_exit()

        return result.addCallback(check)
    def test_ping_is_not_rescheduled_until_pings_complete(self):
        clock = Clock()
        dog = WatchDog(clock,
                       broker=AsynchronousPingDaemon("test-broker"),
                       monitor=AsynchronousPingDaemon("test-monitor"),
                       manager=AsynchronousPingDaemon("test-manager"))

        dog.start_monitoring()

        clock.advance(5)
        for daemon in dog.daemons:
            self.assertEqual(daemon.pings, 1)
        clock.advance(5)
        for daemon in dog.daemons:
            self.assertEqual(daemon.pings, 1)
            daemon.fire_running(True)
        clock.advance(5)
        for daemon in dog.daemons:
            self.assertEqual(daemon.pings, 2)
    def test_check_daemons(self):
        """
        The daemons are checked to be running every so often. When N=5 of these
        checks fail, the daemon will be restarted.
        """
        clock = Clock()
        dog = WatchDog(clock,
                       broker=AsynchronousPingDaemon("test-broker"),
                       monitor=AsynchronousPingDaemon("test-monitor"),
                       manager=AsynchronousPingDaemon("test-manager"))
        dog.start_monitoring()

        for i in range(4):
            clock.advance(5)
            dog.broker.fire_running(False)
            dog.monitor.fire_running(True)
            dog.manager.fire_running(True)
            self.assertEqual(dog.broker.boots, [])

        clock.advance(5)
        dog.broker.fire_running(False)
        dog.monitor.fire_running(True)
        dog.manager.fire_running(True)
        self.assertEqual(dog.broker.boots, [STOP, START])