Ejemplo n.º 1
0
    def test_queue_burst_long_time_to_clear_allowed(self) -> None:
        """
        For a queue that is allowed > 300s to clear a burst of events,
        we need to verify that the checker will not stop categorizing this as a burst
        while the worker is still processing the events, within the allowed time limit.
        """
        start_time = time.time()
        with mock.patch.dict('scripts.lib.check_rabbitmq_queue.CRITICAL_SECONDS_TO_CLEAR_FOR_BURSTS',
                             {'name': 600}), \
                mock.patch.dict('scripts.lib.check_rabbitmq_queue.MAX_SECONDS_TO_CLEAR_FOR_BURSTS',
                                {'name': 600}):
            with mock.patch('time.time', return_value=start_time + 599):
                result = analyze_queue_stats(
                    'name', {
                        'update_time': time.time(),
                        'current_queue_size': 599,
                        'queue_last_emptied_timestamp': start_time,
                        'recent_average_consume_time': 1
                    }, 599)
                self.assertEqual(result['status'], OK)

            with mock.patch('time.time', return_value=start_time + 601):
                result = analyze_queue_stats(
                    'name', {
                        'update_time': time.time(),
                        'current_queue_size': 599,
                        'queue_last_emptied_timestamp': start_time,
                        'recent_average_consume_time': 1
                    }, 599)
                self.assertEqual(result['status'], CRITICAL)
Ejemplo n.º 2
0
    def test_queue_burst(self) -> None:
        """Test logic for just after a large number of events were added
        to an empty queue.  Happens routinely for digest emails, for example."""
        result = analyze_queue_stats(
            'name', {
                'update_time': time.time(),
                'current_queue_size': 10000,
                'queue_last_emptied_timestamp': time.time() - 1,
                'recent_average_consume_time': 1
            }, 10000)
        self.assertEqual(result['status'], CRITICAL)
        self.assertIn('clearing the burst', result['message'])

        # verify logic around MAX_SECONDS_TO_CLEAR_FOR_BURSTS.
        with mock.patch.dict(
                'scripts.lib.check_rabbitmq_queue.MAX_SECONDS_TO_CLEAR_FOR_BURSTS',
            {'name': 10}):
            result = analyze_queue_stats(
                'name', {
                    'update_time': time.time(),
                    'current_queue_size': 11,
                    'queue_last_emptied_timestamp': time.time() - 1,
                    'recent_average_consume_time': 1
                }, 11)
            self.assertEqual(result['status'], WARNING)
            self.assertIn('clearing the burst', result['message'])

            result = analyze_queue_stats(
                'name', {
                    'update_time': time.time(),
                    'current_queue_size': 9,
                    'queue_last_emptied_timestamp': time.time() - 1,
                    'recent_average_consume_time': 1
                }, 9)
            self.assertEqual(result['status'], OK)
Ejemplo n.º 3
0
    def test_queue_normal(self) -> None:
        """10000 events and each takes a second => it'll take a long time to empty."""
        result = analyze_queue_stats(
            "name",
            {
                "update_time": time.time(),
                "current_queue_size": 10000,
                "queue_last_emptied_timestamp": time.time() - 10000,
                "recent_average_consume_time": 1,
            },
            10000,
        )
        self.assertEqual(result["status"], CRITICAL)
        self.assertIn("clearing the backlog", result["message"])

        # If we're doing 10K/sec, it's OK.
        result = analyze_queue_stats(
            "name",
            {
                "update_time": time.time(),
                "current_queue_size": 10000,
                "queue_last_emptied_timestamp": time.time() - 10000,
                "recent_average_consume_time": 0.0001,
            },
            10000,
        )
        self.assertEqual(result["status"], OK)

        # Verify logic around whether it'll take MAX_SECONDS_TO_CLEAR to clear queue.
        with mock.patch.dict("scripts.lib.check_rabbitmq_queue.MAX_SECONDS_TO_CLEAR", {"name": 10}):
            result = analyze_queue_stats(
                "name",
                {
                    "update_time": time.time(),
                    "current_queue_size": 11,
                    "queue_last_emptied_timestamp": time.time() - 10000,
                    "recent_average_consume_time": 1,
                },
                11,
            )
            self.assertEqual(result["status"], WARNING)
            self.assertIn("clearing the backlog", result["message"])

            result = analyze_queue_stats(
                "name",
                {
                    "update_time": time.time(),
                    "current_queue_size": 9,
                    "queue_last_emptied_timestamp": time.time() - 10000,
                    "recent_average_consume_time": 1,
                },
                9,
            )
            self.assertEqual(result["status"], OK)
Ejemplo n.º 4
0
    def test_queue_stuck(self) -> None:
        """Last update > 5 minutes ago and there's events in the queue."""

        result = analyze_queue_stats("name",
                                     {"update_time": time.time() - 301}, 100)
        self.assertEqual(result["status"], CRITICAL)
        self.assertIn("queue appears to be stuck", result["message"])
Ejemplo n.º 5
0
    def test_queue_stuck(self) -> None:
        """Last update > 5 minutes ago and there's events in the queue."""

        result = analyze_queue_stats('name',
                                     {'update_time': time.time() - 301}, 100)
        self.assertEqual(result['status'], CRITICAL)
        self.assertIn('queue appears to be stuck', result['message'])
Ejemplo n.º 6
0
    def test_queue_normal(self) -> None:
        """10000 events and each takes a second => it'll take a long time to empty."""
        result = analyze_queue_stats(
            'name', {
                'update_time': time.time(),
                'current_queue_size': 10000,
                'queue_last_emptied_timestamp': time.time() - 10000,
                'recent_average_consume_time': 1
            }, 10000)
        self.assertEqual(result['status'], CRITICAL)
        self.assertIn('clearing the backlog', result['message'])

        # If we're doing 10K/sec, it's OK.
        result = analyze_queue_stats(
            'name', {
                'update_time': time.time(),
                'current_queue_size': 10000,
                'queue_last_emptied_timestamp': time.time() - 10000,
                'recent_average_consume_time': 0.0001
            }, 10000)
        self.assertEqual(result['status'], OK)

        # Verify logic around whether it'll take MAX_SECONDS_TO_CLEAR_NORMAL to clear queue.
        with mock.patch.dict(
                'scripts.lib.check_rabbitmq_queue.MAX_SECONDS_TO_CLEAR_NORMAL',
            {'name': 10}):
            result = analyze_queue_stats(
                'name', {
                    'update_time': time.time(),
                    'current_queue_size': 11,
                    'queue_last_emptied_timestamp': time.time() - 10000,
                    'recent_average_consume_time': 1
                }, 11)
            self.assertEqual(result['status'], WARNING)
            self.assertIn('clearing the backlog', result['message'])

            result = analyze_queue_stats(
                'name', {
                    'update_time': time.time(),
                    'current_queue_size': 9,
                    'queue_last_emptied_timestamp': time.time() - 10000,
                    'recent_average_consume_time': 1
                }, 9)
            self.assertEqual(result['status'], OK)
Ejemplo n.º 7
0
    def test_queue_stuck(self) -> None:
        """Last update > 5 minutes ago and there's events in the queue.

        In theory, we could be having bad luck with a race where in
        the last (event_handing_time * 50) a burst was added, but it's
        unlikely and shouldn't fail 2 in a row for Nagios anyway.
        """
        result = analyze_queue_stats('name',
                                     {'update_time': time.time() - 301}, 100)
        self.assertEqual(result['status'], CRITICAL)
        self.assertIn('queue appears to be stuck', result['message'])
Ejemplo n.º 8
0
 def test_queue_just_started(self) -> None:
     """
     We just started processing a burst of events, and haven't processed enough
     to log productivity statistics yet.
     """
     result = analyze_queue_stats(
         'name', {
             'update_time': time.time(),
             'current_queue_size': 10000,
             'recent_average_consume_time': None
         }, 10000)
     self.assertEqual(result['status'], OK)
Ejemplo n.º 9
0
 def test_queue_just_started(self) -> None:
     """
     We just started processing a burst of events, and haven't processed enough
     to log productivity statistics yet.
     """
     result = analyze_queue_stats(
         "name",
         {
             "update_time": time.time(),
             "recent_average_consume_time": None,
         },
         10000,
     )
     self.assertEqual(result["status"], OK)
Ejemplo n.º 10
0
 def test_no_stats_available(self) -> None:
     result = analyze_queue_stats('name', {}, 0)
     self.assertEqual(result['status'], UNKNOWN)
Ejemplo n.º 11
0
 def test_no_stats_available(self) -> None:
     result = analyze_queue_stats("name", {}, 0)
     self.assertEqual(result["status"], UNKNOWN)