def check_celery():
    blocked_queues = []

    for queue, threshold in settings.CELERY_HEARTBEAT_THRESHOLDS.items():
        if threshold:
            threshold = datetime.timedelta(seconds=threshold)
            try:
                blockage_duration = Heartbeat(
                    queue).get_and_report_blockage_duration()
            except HeartbeatNeverRecorded:
                blocked_queues.append(
                    (queue, 'as long as we can see', threshold))
            else:
                # We get a lot of self-resolving celery "downtime" under 5 minutes
                # so to make actionable, we never alert on blockage under 5 minutes
                # It is still counted as out of SLA for the celery uptime metric in datadog
                if blockage_duration > max(threshold,
                                           datetime.timedelta(minutes=5)):
                    blocked_queues.append(
                        (queue, blockage_duration, threshold))

    if blocked_queues:
        return ServiceStatus(
            False, '\n'.join(
                "{} has been blocked for {} (max allowed is {})".format(
                    queue, blockage_duration, threshold)
                for queue, blockage_duration, threshold in blocked_queues))
    else:
        return ServiceStatus(True, "OK")
Exemple #2
0
def _is_alive():
    queue = getattr(settings, 'CELERY_PERIODIC_QUEUE', 'celery')
    try:
        blockage = Heartbeat(queue).get_blockage_duration()
        if blockage > datetime.timedelta(minutes=5):
            return False
        return True
    except HeartbeatNeverRecorded:
        return False
def check_celery():
    blocked_queues = []

    for queue, threshold in settings.CELERY_HEARTBEAT_THRESHOLDS.items():
        if threshold:
            threshold = datetime.timedelta(seconds=threshold)
            try:
                blockage_duration = Heartbeat(queue).get_and_report_blockage_duration()
            except HeartbeatNeverRecorded:
                blocked_queues.append((queue, 'as long as we can see', threshold))
            else:
                if blockage_duration > threshold:
                    blocked_queues.append((queue, blockage_duration, threshold))

    if blocked_queues:
        return ServiceStatus(False, '\n'.join(
            "{} has been blocked for {} (max allowed is {})".format(
                queue, blockage_duration, threshold
            ) for queue, blockage_duration, threshold in blocked_queues))
    else:
        return ServiceStatus(True, "OK")
Exemple #4
0
from __future__ import absolute_import
from __future__ import print_function
from django.conf import settings

from corehq.celery_monitoring.heartbeat import Heartbeat

# Create one periodic_task named heartbeat__{queue} for each queue
for queue, time_to_start_alert_threshold in settings.CELERY_HEARTBEAT_THRESHOLDS.items(
):
    heartbeat = Heartbeat(queue)
    locals()[heartbeat.periodic_task_name] = heartbeat.make_periodic_task()
Exemple #5
0
def test_import_tasks():
    from . import tasks
    for queue in settings.CELERY_HEARTBEAT_THRESHOLDS:
        # assert each heartbeat task is there
        getattr(tasks, Heartbeat(queue).periodic_task_name)
Exemple #6
0
def test_get_and_report_blockage_duration():
    hb = Heartbeat('celery_periodic')
    hb.mark_seen()
    # just assert that this doesn't error
    hb.get_and_report_blockage_duration()
Exemple #7
0
def test_heartbeat():
    hb = Heartbeat('celery_periodic')
    hb.clear_last_seen()

    with assert_raises(HeartbeatNeverRecorded):
        hb.get_last_seen()

    with assert_raises(HeartbeatNeverRecorded):
        hb.get_blockage_duration()

    seen_time = datetime.datetime.utcnow()

    with freeze_time(seen_time):
        hb.mark_seen()
        eq(hb.get_last_seen(), seen_time)
        eq(hb.get_blockage_duration(), datetime.timedelta(seconds=0))

    with freeze_time(seen_time + datetime.timedelta(minutes=10)):
        eq(hb.get_last_seen(), seen_time)
        eq(hb.get_blockage_duration(),
           datetime.timedelta(minutes=10) - HEARTBEAT_FREQUENCY)
Exemple #8
0
from __future__ import absolute_import
from __future__ import print_function
from django.conf import settings

from corehq.celery_monitoring.heartbeat import Heartbeat


# Create one periodic_task named heartbeat__{queue} for each queue
for queue, time_to_start_alert_threshold in settings.CELERY_HEARTBEAT_THRESHOLDS.items():
    heartbeat = Heartbeat(queue)
    locals()[heartbeat.periodic_task_name] = heartbeat.make_periodic_task()
Exemple #9
0
def test_get_and_report_blockage_duration():
    hb = Heartbeat('celery_periodic')
    hb.mark_seen()
    # just assert that this doesn't error
    hb.get_and_report_blockage_duration()
Exemple #10
0
def test_heartbeat():
    hb = Heartbeat('celery_periodic')
    hb.clear_last_seen()

    with assert_raises(HeartbeatNeverRecorded):
        hb.get_last_seen()

    with assert_raises(HeartbeatNeverRecorded):
        hb.get_blockage_duration()

    seen_time = datetime.datetime.utcnow()

    with freeze_time(seen_time):
        hb.mark_seen()
        eq(hb.get_last_seen(), seen_time)
        eq(hb.get_blockage_duration(), datetime.timedelta(seconds=0))

    with freeze_time(seen_time + datetime.timedelta(minutes=10)):
        eq(hb.get_last_seen(), seen_time)
        eq(hb.get_blockage_duration(), datetime.timedelta(minutes=10) - HEARTBEAT_FREQUENCY)