def check_celery(): blocked_queues = [] for queue, threshold in settings.CELERY_HEARTBEAT_THRESHOLDS.items(): if threshold: threshold = datetime.timedelta(seconds=threshold) try: blockage_duration = Heartbeat( queue).get_and_report_blockage_duration() except HeartbeatNeverRecorded: blocked_queues.append( (queue, 'as long as we can see', threshold)) else: # We get a lot of self-resolving celery "downtime" under 5 minutes # so to make actionable, we never alert on blockage under 5 minutes # It is still counted as out of SLA for the celery uptime metric in datadog if blockage_duration > max(threshold, datetime.timedelta(minutes=5)): blocked_queues.append( (queue, blockage_duration, threshold)) if blocked_queues: return ServiceStatus( False, '\n'.join( "{} has been blocked for {} (max allowed is {})".format( queue, blockage_duration, threshold) for queue, blockage_duration, threshold in blocked_queues)) else: return ServiceStatus(True, "OK")
def _is_alive(): queue = getattr(settings, 'CELERY_PERIODIC_QUEUE', 'celery') try: blockage = Heartbeat(queue).get_blockage_duration() if blockage > datetime.timedelta(minutes=5): return False return True except HeartbeatNeverRecorded: return False
def check_celery(): blocked_queues = [] for queue, threshold in settings.CELERY_HEARTBEAT_THRESHOLDS.items(): if threshold: threshold = datetime.timedelta(seconds=threshold) try: blockage_duration = Heartbeat(queue).get_and_report_blockage_duration() except HeartbeatNeverRecorded: blocked_queues.append((queue, 'as long as we can see', threshold)) else: if blockage_duration > threshold: blocked_queues.append((queue, blockage_duration, threshold)) if blocked_queues: return ServiceStatus(False, '\n'.join( "{} has been blocked for {} (max allowed is {})".format( queue, blockage_duration, threshold ) for queue, blockage_duration, threshold in blocked_queues)) else: return ServiceStatus(True, "OK")
from __future__ import absolute_import from __future__ import print_function from django.conf import settings from corehq.celery_monitoring.heartbeat import Heartbeat # Create one periodic_task named heartbeat__{queue} for each queue for queue, time_to_start_alert_threshold in settings.CELERY_HEARTBEAT_THRESHOLDS.items( ): heartbeat = Heartbeat(queue) locals()[heartbeat.periodic_task_name] = heartbeat.make_periodic_task()
def test_import_tasks(): from . import tasks for queue in settings.CELERY_HEARTBEAT_THRESHOLDS: # assert each heartbeat task is there getattr(tasks, Heartbeat(queue).periodic_task_name)
def test_get_and_report_blockage_duration(): hb = Heartbeat('celery_periodic') hb.mark_seen() # just assert that this doesn't error hb.get_and_report_blockage_duration()
def test_heartbeat(): hb = Heartbeat('celery_periodic') hb.clear_last_seen() with assert_raises(HeartbeatNeverRecorded): hb.get_last_seen() with assert_raises(HeartbeatNeverRecorded): hb.get_blockage_duration() seen_time = datetime.datetime.utcnow() with freeze_time(seen_time): hb.mark_seen() eq(hb.get_last_seen(), seen_time) eq(hb.get_blockage_duration(), datetime.timedelta(seconds=0)) with freeze_time(seen_time + datetime.timedelta(minutes=10)): eq(hb.get_last_seen(), seen_time) eq(hb.get_blockage_duration(), datetime.timedelta(minutes=10) - HEARTBEAT_FREQUENCY)
from __future__ import absolute_import from __future__ import print_function from django.conf import settings from corehq.celery_monitoring.heartbeat import Heartbeat # Create one periodic_task named heartbeat__{queue} for each queue for queue, time_to_start_alert_threshold in settings.CELERY_HEARTBEAT_THRESHOLDS.items(): heartbeat = Heartbeat(queue) locals()[heartbeat.periodic_task_name] = heartbeat.make_periodic_task()