Beispiel #1
0
  def test_success_outside_grace_period(self):
    '''
    Health checks fail inside grace period, but pass outside and leads to success
    '''

    self.append_health_checks(False, num_calls=2)
    self.append_health_checks(True)
    hct = HealthChecker(
              self._checker.health,
              interval_secs=self.interval_secs,
              clock=self._clock)
    hct.start()
    assert self._clock.converge(threads=[hct.threaded_health_checker])
    self._clock.assert_waiting(hct.threaded_health_checker, self.interval_secs)
    assert hct.status == StatusResult(None, TaskState.Value('TASK_STARTING'))
    assert hct.threaded_health_checker.running is False
    self._clock.tick(self.interval_secs)
    assert self._clock.converge(threads=[hct.threaded_health_checker])
    self._clock.assert_waiting(hct.threaded_health_checker, self.interval_secs)
    assert hct.status == StatusResult(None, TaskState.Value('TASK_STARTING'))
    assert hct.threaded_health_checker.running is False
    self._clock.tick(self.interval_secs)
    assert self._clock.converge(threads=[hct.threaded_health_checker])
    self._clock.assert_waiting(hct.threaded_health_checker, self.interval_secs)
    assert hct.status == StatusResult('Task is healthy.', TaskState.Value('TASK_RUNNING'))
    assert hct.threaded_health_checker.running is True
    hct.stop()
    assert self._checker.health.call_count == 3
Beispiel #2
0
  def test_grace_period_2x_failure(self):
    '''
      Grace period is 2 x interval and all health checks fail.
      Failures are ignored when in grace period.
    '''

    self.append_health_checks(False, num_calls=3)
    hct = HealthChecker(
              self._checker.health,
              interval_secs=self.interval_secs,
              clock=self._clock)
    hct.start()
    assert self._clock.converge(threads=[hct.threaded_health_checker])
    self._clock.assert_waiting(hct.threaded_health_checker, self.interval_secs)
    assert hct.status == StatusResult(None, TaskState.Value('TASK_STARTING'))
    assert hct.threaded_health_checker.running is False
    self._clock.tick(self.interval_secs)
    assert self._clock.converge(threads=[hct.threaded_health_checker])
    self._clock.assert_waiting(hct.threaded_health_checker, self.interval_secs)
    assert hct.status == StatusResult(None, TaskState.Value('TASK_STARTING'))
    assert hct.threaded_health_checker.running is False
    self._clock.tick(self.interval_secs)
    assert self._clock.converge(threads=[hct.threaded_health_checker])
    self._clock.assert_waiting(hct.threaded_health_checker, self.interval_secs)
    assert hct.status == StatusResult('Failed health check! reason', TaskState.Value('TASK_FAILED'))
    assert hct.threaded_health_checker.running is False
    hct.stop()
    assert self._checker.health.call_count == 3
Beispiel #3
0
  def test_consecutive_failures_failfast(self):
    '''Verify that health check is failed fast'''
    grace_period_secs = self.initial_interval_secs
    interval_secs = self.interval_secs
    self.append_health_checks(False, num_calls=3)
    hct = HealthChecker(
        self._checker.health,
        interval_secs=interval_secs,
        grace_period_secs=grace_period_secs,
        max_consecutive_failures=2,
        min_consecutive_successes=2,
        clock=self._clock)
    hct.start()

    # 3 consecutive health check failures causes fail-fast
    self._clock.converge(threads=[hct.threaded_health_checker])
    self._clock.assert_waiting(hct.threaded_health_checker, interval_secs)
    assert hct.status == StatusResult(None, TaskState.Value('TASK_STARTING'))
    # failure is ignored inside grace_period_secs
    assert hct.metrics.sample()['consecutive_failures'] == 0
    self._clock.tick(interval_secs)
    self._clock.converge(threads=[hct.threaded_health_checker])
    self._clock.assert_waiting(hct.threaded_health_checker, interval_secs)
    assert hct.status == StatusResult('Failed health check! reason', TaskState.Value('TASK_FAILED'))
    assert hct.metrics.sample()['consecutive_failures'] == 1
    hct.stop()
    assert self._checker.health.call_count == 2
Beispiel #4
0
  def test_consecutive_failures(self):
    '''Verify that a task is unhealthy only after max_consecutive_failures is exceeded'''
    initial_interval_secs = 2
    interval_secs = 1
    self.append_health_checks(False, num_calls=2)
    self.append_health_checks(True)
    self.append_health_checks(False, num_calls=3)
    hct = HealthChecker(
        self._checker.health,
        interval_secs=interval_secs,
        initial_interval_secs=initial_interval_secs,
        max_consecutive_failures=2,
        clock=self._clock)
    hct.start()

    # 2 consecutive health check failures followed by a successful health check.
    self._clock.tick(initial_interval_secs)
    assert hct.status is None
    self._clock.tick(interval_secs)
    assert hct.status is None
    self._clock.tick(interval_secs)
    assert hct.status is None

    # 3 consecutive health check failures.
    self._clock.tick(interval_secs)
    assert hct.status is None
    self._clock.tick(interval_secs)
    assert hct.status is None
    self._clock.tick(interval_secs)
    thread_yield()
    assert hct.status.status == TaskState.Value('TASK_FAILED')
    hct.stop()
    assert self._checker.health.call_count == 6
Beispiel #5
0
 def test_initial_interval_whatev(self):
   self.append_health_checks(False)
   hct = HealthChecker(
     self._checker.health,
     interval_secs=5,
     initial_interval_secs=0,
     clock=self._clock)
   hct.start()
   assert hct.status.status == TaskState.Value('TASK_FAILED')
   hct.stop()
   assert self._checker.health.call_count == 1
  def test_consecutive_failures(self):
    '''Verify that a task is unhealthy only after max_consecutive_failures is exceeded'''
    initial_interval_secs = 2
    interval_secs = 1
    self.append_health_checks(False, num_calls=2)
    self.append_health_checks(True)
    self.append_health_checks(False, num_calls=3)
    hct = HealthChecker(
        self._checker.health,
        interval_secs=interval_secs,
        initial_interval_secs=initial_interval_secs,
        max_consecutive_failures=2,
        clock=self._clock)
    hct.start()
    self._clock.converge(threads=[hct.threaded_health_checker])

    # 2 consecutive health check failures followed by a successful health check.
    epsilon = 0.001
    self._clock.tick(initial_interval_secs + epsilon)
    self._clock.converge(threads=[hct.threaded_health_checker])
    self._clock.assert_waiting(hct.threaded_health_checker, amount=1)
    assert hct.status is None
    assert hct.metrics.sample()['consecutive_failures'] == 1
    self._clock.tick(interval_secs + epsilon)
    self._clock.converge(threads=[hct.threaded_health_checker])
    self._clock.assert_waiting(hct.threaded_health_checker, amount=1)
    assert hct.status is None
    assert hct.metrics.sample()['consecutive_failures'] == 2
    self._clock.tick(interval_secs + epsilon)
    self._clock.converge(threads=[hct.threaded_health_checker])
    self._clock.assert_waiting(hct.threaded_health_checker, amount=1)
    assert hct.status is None
    assert hct.metrics.sample()['consecutive_failures'] == 0

    # 3 consecutive health check failures.
    self._clock.tick(interval_secs + epsilon)
    self._clock.converge(threads=[hct.threaded_health_checker])
    self._clock.assert_waiting(hct.threaded_health_checker, amount=1)
    assert hct.status is None
    assert hct.metrics.sample()['consecutive_failures'] == 1
    self._clock.tick(interval_secs + epsilon)
    self._clock.converge(threads=[hct.threaded_health_checker])
    self._clock.assert_waiting(hct.threaded_health_checker, amount=1)
    assert hct.status is None
    assert hct.metrics.sample()['consecutive_failures'] == 2
    self._clock.tick(interval_secs + epsilon)
    self._clock.converge(threads=[hct.threaded_health_checker])
    self._clock.assert_waiting(hct.threaded_health_checker, amount=1)
    assert hct.status.status == TaskState.Value('TASK_FAILED')
    assert hct.metrics.sample()['consecutive_failures'] == 3
    hct.stop()
    assert self._checker.health.call_count == 6
Beispiel #7
0
 def test_initial_interval_whatev(self):
   self.append_health_checks(False, 2)
   hct = HealthChecker(
       self._checker.health,
       interval_secs=self.interval_secs,
       grace_period_secs=0,
       clock=self._clock)
   hct.start()
   self._clock.converge(threads=[hct.threaded_health_checker])
   self._clock.assert_waiting(hct.threaded_health_checker, self.interval_secs)
   assert hct.status == StatusResult('Failed health check! reason', TaskState.Value('TASK_FAILED'))
   hct.stop()
   assert self._checker.health.call_count == 1
Beispiel #8
0
 def test_initial_interval_2x(self):
   self.append_health_checks(False)
   hct = HealthChecker(self._checker.health, interval_secs=5, clock=self._clock)
   hct.start()
   thread_yield()
   assert hct.status is None
   self._clock.tick(6)
   assert hct.status is None
   self._clock.tick(3)
   assert hct.status is None
   self._clock.tick(5)
   thread_yield()
   assert hct.status.status == TaskState.Value('TASK_FAILED')
   hct.stop()
   assert self._checker.health.call_count == 1
Beispiel #9
0
  def test_grace_period_2x_success(self):
    '''Grace period is 2 x interval and health checks succeed.'''

    self.append_health_checks(True, num_calls=2)
    hct = HealthChecker(
              self._checker.health,
              interval_secs=self.interval_secs,
              clock=self._clock)
    hct.start()
    assert self._clock.converge(threads=[hct.threaded_health_checker])
    self._clock.assert_waiting(hct.threaded_health_checker, self.interval_secs)
    assert hct.status == StatusResult('Task is healthy.', TaskState.Value('TASK_RUNNING'))
    assert hct.threaded_health_checker.running is True
    hct.stop()
    assert self._checker.health.call_count == 1
 def test_initial_interval_whatev(self):
   self.append_health_checks(False, 2)
   hct = HealthChecker(
       self._checker.health,
       interval_secs=5,
       initial_interval_secs=0,
       clock=self._clock)
   hct.start()
   self._clock.converge(threads=[hct.threaded_health_checker])
   self._clock.assert_waiting(hct.threaded_health_checker, amount=5)
   assert hct.status.status == TaskState.Value('TASK_FAILED')
   hct.stop()
   # this is an implementation detail -- we healthcheck in the initializer and
   # healthcheck in the run loop.  if we ever change the implementation, expect
   # this to break.
   assert self._checker.health.call_count == 2
 def test_initial_interval_2x(self):
   self.append_health_checks(False)
   hct = HealthChecker(self._checker.health, interval_secs=5, clock=self._clock)
   hct.start()
   assert self._clock.converge(threads=[hct.threaded_health_checker])
   self._clock.assert_waiting(hct.threaded_health_checker, 10)
   assert hct.status is None
   self._clock.tick(6)
   assert self._clock.converge(threads=[hct.threaded_health_checker])
   assert hct.status is None
   self._clock.tick(3)
   assert self._clock.converge(threads=[hct.threaded_health_checker])
   assert hct.status is None
   self._clock.tick(5)
   assert self._clock.converge(threads=[hct.threaded_health_checker])
   assert hct.status.status == TaskState.Value('TASK_FAILED')
   hct.stop()
   assert self._checker.health.call_count == 1
Beispiel #12
0
    def test_health_checker_metrics(self):
        def slow_check():
            self._clock.sleep(0.5)
            return (True, None)

        hct = HealthChecker(slow_check,
                            interval_secs=1,
                            initial_interval_secs=1,
                            clock=self._clock)
        hct.start()
        self._clock.converge(threads=[hct.threaded_health_checker])
        self._clock.assert_waiting(hct.threaded_health_checker, amount=1)

        assert hct._total_latency == 0
        assert hct.metrics.sample()['total_latency_secs'] == 0

        # start the health check (during health check it is still 0)
        epsilon = 0.001
        self._clock.tick(1.0 + epsilon)
        self._clock.converge(threads=[hct.threaded_health_checker])
        self._clock.assert_waiting(hct.threaded_health_checker, amount=0.5)
        assert hct._total_latency == 0
        assert hct.metrics.sample()['total_latency_secs'] == 0
        assert hct.metrics.sample()['checks'] == 0

        # finish the health check
        self._clock.tick(0.5 + epsilon)
        self._clock.converge(threads=[hct.threaded_health_checker])
        self._clock.assert_waiting(hct.threaded_health_checker,
                                   amount=1)  # interval_secs
        assert hct._total_latency == 0.5
        assert hct.metrics.sample()['total_latency_secs'] == 0.5
        assert hct.metrics.sample()['checks'] == 1

        # tick again
        self._clock.tick(1.0 + epsilon)
        self._clock.converge(threads=[hct.threaded_health_checker])
        self._clock.tick(0.5 + epsilon)
        self._clock.converge(threads=[hct.threaded_health_checker])
        self._clock.assert_waiting(hct.threaded_health_checker,
                                   amount=1)  # interval_secs
        assert hct._total_latency == 1.0
        assert hct.metrics.sample()['total_latency_secs'] == 1.0
        assert hct.metrics.sample()['checks'] == 2
Beispiel #13
0
  def test_consecutive_failures_max_failures(self):
    '''Verify that a task is unhealthy after max_consecutive_failures is exceeded'''
    grace_period_secs = self.initial_interval_secs
    interval_secs = self.interval_secs
    self.append_health_checks(True, num_calls=2)
    self.append_health_checks(False, num_calls=3)
    hct = HealthChecker(
        self._checker.health,
        interval_secs=interval_secs,
        grace_period_secs=grace_period_secs,
        max_consecutive_failures=2,
        min_consecutive_successes=2,
        clock=self._clock)
    hct.start()

    self._clock.converge(threads=[hct.threaded_health_checker])
    self._clock.assert_waiting(hct.threaded_health_checker, interval_secs)
    assert hct.status == StatusResult(None, TaskState.Value('TASK_STARTING'))
    assert hct.metrics.sample()['consecutive_failures'] == 0
    self._clock.tick(interval_secs)
    self._clock.converge(threads=[hct.threaded_health_checker])
    self._clock.assert_waiting(hct.threaded_health_checker, interval_secs)
    assert hct.status == StatusResult('Task is healthy.', TaskState.Value('TASK_RUNNING'))
    assert hct.metrics.sample()['consecutive_failures'] == 0
    assert hct.threaded_health_checker.running is True
    self._clock.tick(interval_secs)
    self._clock.converge(threads=[hct.threaded_health_checker])
    self._clock.assert_waiting(hct.threaded_health_checker, interval_secs)
    assert hct.status == StatusResult('Task is healthy.', TaskState.Value('TASK_RUNNING'))
    assert hct.metrics.sample()['consecutive_failures'] == 1
    self._clock.tick(interval_secs)
    self._clock.converge(threads=[hct.threaded_health_checker])
    self._clock.assert_waiting(hct.threaded_health_checker, interval_secs)
    assert hct.status == StatusResult('Task is healthy.', TaskState.Value('TASK_RUNNING'))
    assert hct.metrics.sample()['consecutive_failures'] == 2
    self._clock.tick(interval_secs)
    self._clock.converge(threads=[hct.threaded_health_checker])
    self._clock.assert_waiting(hct.threaded_health_checker, interval_secs)
    assert hct.status == StatusResult('Failed health check! reason', TaskState.Value('TASK_FAILED'))
    assert hct.metrics.sample()['consecutive_failures'] == 3
    hct.stop()
    assert self._checker.health.call_count == 5
Beispiel #14
0
  def test_consecutive_failures(self):
    '''Verify that a task is unhealthy only after max_consecutive_failures is exceeded'''
    initial_interval_secs = 2
    interval_secs = 1
    self.append_health_checks(False, num_calls=2)
    self.append_health_checks(True)
    self.append_health_checks(False, num_calls=3)
    hct = HealthChecker(
        self._checker.health,
        interval_secs=interval_secs,
        initial_interval_secs=initial_interval_secs,
        max_consecutive_failures=2,
        clock=self._clock)
    hct.start()

    # 2 consecutive health check failures followed by a successful health check.
    self._clock.tick(initial_interval_secs)
    assert hct.status is None
    self._clock.tick(interval_secs)
    assert hct.status is None
    self._clock.tick(interval_secs)
    assert hct.status is None

    # 3 consecutive health check failures.
    self._clock.tick(interval_secs)
    assert hct.status is None
    self._clock.tick(interval_secs)
    assert hct.status is None
    self._clock.tick(interval_secs)
    thread_yield()
    assert hct.status.status == TaskState.Value('TASK_FAILED')
    hct.stop()
    assert self._checker.health.call_count == 6
Beispiel #15
0
    def setUp(self):
        self.health = mock.Mock()
        self.health.return_value = (True, 'Fake')

        self.sandbox = mock.Mock(spec_set=SandboxInterface)
        self.sandbox.exists.return_value = True
        self.sandbox.root = '/root'

        self.initial_interval_secs = 1
        self.interval_secs = 5
        self.max_consecutive_failures = 2
        self.clock = mock.Mock(spec=time)
        self.clock.time.return_value = 1.0
        self.health_checker = HealthChecker(self.health, None,
                                            self.interval_secs,
                                            self.initial_interval_secs,
                                            self.max_consecutive_failures,
                                            self.clock)
        self.health_checker_sandbox_exists = HealthChecker(
            self.health, self.sandbox, self.interval_secs,
            self.initial_interval_secs, self.max_consecutive_failures,
            self.clock)
  def test_health_checker_metrics(self):
    def slow_check():
      self._clock.sleep(0.5)
      return (True, None)
    hct = HealthChecker(slow_check, interval_secs=1, initial_interval_secs=1, clock=self._clock)
    hct.start()
    self._clock.converge(threads=[hct.threaded_health_checker])
    self._clock.assert_waiting(hct.threaded_health_checker, amount=1)

    assert hct._total_latency == 0
    assert hct.metrics.sample()['total_latency_secs'] == 0

    # start the health check (during health check it is still 0)
    epsilon = 0.001
    self._clock.tick(1.0 + epsilon)
    self._clock.converge(threads=[hct.threaded_health_checker])
    self._clock.assert_waiting(hct.threaded_health_checker, amount=0.5)
    assert hct._total_latency == 0
    assert hct.metrics.sample()['total_latency_secs'] == 0
    assert hct.metrics.sample()['checks'] == 0

    # finish the health check
    self._clock.tick(0.5 + epsilon)
    self._clock.converge(threads=[hct.threaded_health_checker])
    self._clock.assert_waiting(hct.threaded_health_checker, amount=1)  # interval_secs
    assert hct._total_latency == 0.5
    assert hct.metrics.sample()['total_latency_secs'] == 0.5
    assert hct.metrics.sample()['checks'] == 1

    # tick again
    self._clock.tick(1.0 + epsilon)
    self._clock.converge(threads=[hct.threaded_health_checker])
    self._clock.tick(0.5 + epsilon)
    self._clock.converge(threads=[hct.threaded_health_checker])
    self._clock.assert_waiting(hct.threaded_health_checker, amount=1)  # interval_secs
    assert hct._total_latency == 1.0
    assert hct.metrics.sample()['total_latency_secs'] == 1.0
    assert hct.metrics.sample()['checks'] == 2
 def test_initial_interval_whatev(self):
     self.append_health_checks(False)
     hct = HealthChecker(self._checker.health,
                         interval_secs=5,
                         initial_interval_secs=0,
                         clock=self._clock)
     hct.start()
     assert hct.status.status == TaskState.Value('TASK_FAILED')
     hct.stop()
     assert self._checker.health.call_count == 1
Beispiel #18
0
    def test_consecutive_failures(self):
        '''Verify that a task is unhealthy only after max_consecutive_failures is exceeded'''
        initial_interval_secs = 2
        interval_secs = 1
        self.append_health_checks(False, num_calls=2)
        self.append_health_checks(True)
        self.append_health_checks(False, num_calls=3)
        hct = HealthChecker(self._checker.health,
                            interval_secs=interval_secs,
                            initial_interval_secs=initial_interval_secs,
                            max_consecutive_failures=2,
                            clock=self._clock)
        hct.start()
        self._clock.converge(threads=[hct.threaded_health_checker])

        # 2 consecutive health check failures followed by a successful health check.
        epsilon = 0.001
        self._clock.tick(initial_interval_secs + epsilon)
        self._clock.converge(threads=[hct.threaded_health_checker])
        self._clock.assert_waiting(hct.threaded_health_checker, amount=1)
        assert hct.status is None
        assert hct.metrics.sample()['consecutive_failures'] == 1
        self._clock.tick(interval_secs + epsilon)
        self._clock.converge(threads=[hct.threaded_health_checker])
        self._clock.assert_waiting(hct.threaded_health_checker, amount=1)
        assert hct.status is None
        assert hct.metrics.sample()['consecutive_failures'] == 2
        self._clock.tick(interval_secs + epsilon)
        self._clock.converge(threads=[hct.threaded_health_checker])
        self._clock.assert_waiting(hct.threaded_health_checker, amount=1)
        assert hct.status is None
        assert hct.metrics.sample()['consecutive_failures'] == 0

        # 3 consecutive health check failures.
        self._clock.tick(interval_secs + epsilon)
        self._clock.converge(threads=[hct.threaded_health_checker])
        self._clock.assert_waiting(hct.threaded_health_checker, amount=1)
        assert hct.status is None
        assert hct.metrics.sample()['consecutive_failures'] == 1
        self._clock.tick(interval_secs + epsilon)
        self._clock.converge(threads=[hct.threaded_health_checker])
        self._clock.assert_waiting(hct.threaded_health_checker, amount=1)
        assert hct.status is None
        assert hct.metrics.sample()['consecutive_failures'] == 2
        self._clock.tick(interval_secs + epsilon)
        self._clock.converge(threads=[hct.threaded_health_checker])
        self._clock.assert_waiting(hct.threaded_health_checker, amount=1)
        assert hct.status.status == TaskState.Value('TASK_FAILED')
        assert hct.metrics.sample()['consecutive_failures'] == 3
        hct.stop()
        assert self._checker.health.call_count == 6
Beispiel #19
0
 def test_initial_interval_whatev(self):
     self.append_health_checks(False, 2)
     hct = HealthChecker(self._checker.health,
                         interval_secs=5,
                         initial_interval_secs=0,
                         clock=self._clock)
     hct.start()
     self._clock.converge(threads=[hct.threaded_health_checker])
     self._clock.assert_waiting(hct.threaded_health_checker, amount=5)
     assert hct.status.status == TaskState.Value('TASK_FAILED')
     hct.stop()
     # this is an implementation detail -- we healthcheck in the initializer and
     # healthcheck in the run loop.  if we ever change the implementation, expect
     # this to break.
     assert self._checker.health.call_count == 2
Beispiel #20
0
 def test_initial_interval_2x(self):
   self.append_health_checks(False)
   hct = HealthChecker(self._checker.health, interval_secs=5, clock=self._clock)
   hct.start()
   thread_yield()
   assert hct.status is None
   self._clock.tick(6)
   assert hct.status is None
   self._clock.tick(3)
   assert hct.status is None
   self._clock.tick(5)
   thread_yield()
   assert hct.status.status == TaskState.Value('TASK_FAILED')
   hct.stop()
   assert self._checker.health.call_count == 1
Beispiel #21
0
 def test_initial_interval_2x(self):
     self.append_health_checks(False)
     hct = HealthChecker(self._checker.health,
                         interval_secs=5,
                         clock=self._clock)
     hct.start()
     assert self._clock.converge(threads=[hct.threaded_health_checker])
     self._clock.assert_waiting(hct.threaded_health_checker, 10)
     assert hct.status is None
     self._clock.tick(6)
     assert self._clock.converge(threads=[hct.threaded_health_checker])
     assert hct.status is None
     self._clock.tick(3)
     assert self._clock.converge(threads=[hct.threaded_health_checker])
     assert hct.status is None
     self._clock.tick(5)
     assert self._clock.converge(threads=[hct.threaded_health_checker])
     assert hct.status.status == TaskState.Value('TASK_FAILED')
     hct.stop()
     assert self._checker.health.call_count == 1