Example #1
0
def server_up(req):
    '''
    Hit serverup.txt to check any of the below item with always_check: True
    Hit serverup.txt?celery (or heartbeat) to check a specific service
    View that just returns "success", which can be hooked into server monitoring tools like: pingdom
    '''

    checkers = {
        "heartbeat": {
            "always_check": False,
            "check_func": checks.check_heartbeat,
        },
        "celery": {
            "always_check": True,
            "check_func": checks.check_celery,
        },
        "postgres": {
            "always_check": True,
            "check_func": checks.check_postgres,
        },
        "couch": {
            "always_check": True,
            "check_func": checks.check_couch,
        },
        "redis": {
            "always_check": True,
            "check_func": checks.check_redis,
        },
        "formplayer": {
            "always_check": True,
            "check_func": checks.check_formplayer,
        },
        "elasticsearch": {
            "always_check": True,
            "check_func": checks.check_elasticsearch,
        },
    }

    failed = False
    message = ['Problems with HQ (%s):' % os.uname()[1]]
    for check, check_info in checkers.items():
        if check_info['always_check'] or req.GET.get(check, None) is not None:
            try:
                status = check_info['check_func']()
            except Exception:
                # Don't display the exception message
                status = checks.ServiceStatus(False, "{} has issues".format(check))
            if not status.success:
                failed = True
                message.append(status.msg)

    if failed and not is_deploy_in_progress():
        create_datadog_event(
            'Serverup check failed', '\n'.join(message),
            alert_type='error', aggregation_key='serverup',
        )
        return HttpResponse('<br>'.join(message), status=500)
    else:
        return HttpResponse("success")
Example #2
0
def _all_zeros(data):
    values = [(kpi['value'] == 0 and kpi['all'] == 0)
              for row in data['records'] for kpi in row]
    if all(values):
        create_datadog_event('ICDS 0s',
                             'All indicators in program summary equals 0',
                             aggregation_key='icds_0')
    return all(values)
Example #3
0
def _all_zeros(data, agg_level):
    values = [(not kpi['value'] and not kpi['all']) for row in data['records'] for kpi in row]
    retry = False
    if agg_level <= 1:
        retry = any(values)
    else:
        retry = all(values)
    if retry:
        create_datadog_event('ICDS 0s', 'All indicators in program summary equals 0', aggregation_key='icds_0')
    return retry
Example #4
0
def server_up(req):
    '''
    Hit serverup.txt to check any of the below item with always_check: True
    Hit serverup.txt?celery (or heartbeat) to check a specific service
    View that just returns "success", which can be hooked into server monitoring tools like: pingdom
    '''


    checkers = {
        "heartbeat": {
            "always_check": False,
            "message": "* celery heartbeat is down",
            "check_func": hb_check
        },
        "celery": {
            "always_check": True,
            "message": "* celery is down",
            "check_func": celery_check
        },
        "postgres": {
            "always_check": True,
            "message": "* postgres has issues",
            "check_func": pg_check
        },
        "couch": {
            "always_check": True,
            "message": "* couch has issues",
            "check_func": couch_check
        },
        "redis": {
            "always_check": True,
            "message": "* redis has issues",
            "check_func": redis_check
        },
    }

    failed = False
    message = ['Problems with HQ (%s):' % os.uname()[1]]
    for check, check_info in checkers.items():
        if check_info['always_check'] or req.GET.get(check, None) is not None:
            check_results, custom_msg = check_info['check_func']()
            if not check_results:
                failed = True
                if custom_msg:
                    message.append(custom_msg)
                else:
                    message.append(check_info['message'])
    if failed:
        create_datadog_event(
            'Serverup check failed', '\n'.join(message),
            alert_type='error', aggregation_key='serverup',
        )
        return HttpResponse('<br>'.join(message), status=500)
    else:
        return HttpResponse("success")
Example #5
0
def server_up(req):
    '''
    Hit serverup.txt to check any of the below item with always_check: True
    Hit serverup.txt?celery (or heartbeat) to check a specific service
    View that just returns "success", which can be hooked into server monitoring tools like: pingdom
    '''

    checkers = {
        "heartbeat": {
            "always_check": False,
            "check_func": checks.check_heartbeat,
        },
        "celery": {
            "always_check": True,
            "check_func": checks.check_celery,
        },
        "postgres": {
            "always_check": True,
            "check_func": checks.check_postgres,
        },
        "couch": {
            "always_check": True,
            "check_func": checks.check_couch,
        },
        "redis": {
            "always_check": True,
            "check_func": checks.check_redis,
        },
        "formplayer": {
            "always_check": True,
            "check_func": checks.check_formplayer
        },
    }

    failed = False
    message = ['Problems with HQ (%s):' % os.uname()[1]]
    for check, check_info in checkers.items():
        if check_info['always_check'] or req.GET.get(check, None) is not None:
            try:
                status = check_info['check_func']()
            except Exception:
                # Don't display the exception message
                status = checks.ServiceStatus(False, "{} has issues".format(check))
            if not status.success:
                failed = True
                message.append(status.msg)

    if failed and not is_deploy_in_progress():
        create_datadog_event(
            'Serverup check failed', '\n'.join(message),
            alert_type='error', aggregation_key='serverup',
        )
        return HttpResponse('<br>'.join(message), status=500)
    else:
        return HttpResponse("success")
Example #6
0
def server_up(req):
    """
    Health check view which can be hooked into server monitoring tools like 'pingdom'

    Returns:
        HttpResponse("success", status_code=200)
        HttpResponse(error_message, status_code=500)

    Hit serverup.txt to check all the default enabled services (always_check=True)
    Hit serverup.txt?only={check_name} to only check a specific service
    Hit serverup.txt?{check_name} to include a non-default check (currently only ``heartbeat``)
    """
    only = req.GET.get('only', None)
    if only and only in CHECKS:
        checks_to_do = [only]
    else:
        checks_to_do = [
            check for check, check_info in CHECKS.items() if
            check_info['always_check'] or req.GET.get(check, None) is not None
        ]

    statuses = run_checks(checks_to_do)
    failed_checks = [(check, status) for check, status in statuses
                     if not status.success]

    for check_name, status in statuses:
        tags = [
            'status:{}'.format('failed' if not status.success else 'ok'),
            'check:{}'.format(check_name)
        ]
        datadog_gauge('commcare.serverup.check', status.duration, tags=tags)

    if failed_checks and not is_deploy_in_progress():
        status_messages = [
            html.linebreaks('<strong>{}</strong>: {}'.format(
                check, html.escape(status.msg)).strip())
            for check, status in failed_checks
        ]
        create_datadog_event(
            'Serverup check failed',
            '\n'.join(status_messages),
            alert_type='error',
            aggregation_key='serverup',
        )
        status_messages.insert(0, 'Failed Checks (%s):' % os.uname()[1])
        return HttpResponse(''.join(status_messages), status=500)
    else:
        return HttpResponse("success")
Example #7
0
def _all_zeros_graph(step, data, agg_level):
    if step == 'map':
        if agg_level <= 3:
            map_data_by_location = data['data']
        else:
            map_data_by_location = data['tooltips_data']

        values = [not all(map_data_by_location[key].values()) for key in map_data_by_location
                  if key not in ['original_name', 'fillKey']]
    else:
        values = [(not location['value']) for location in data['all_locations']]

    retry = all(values)
    if retry:
        create_datadog_event('ICDS 0s', 'All indicators in awc_covered equals 0', aggregation_key='icds_0')
    return retry
Example #8
0
def server_up(req):
    """
    Health check view which can be hooked into server monitoring tools like 'pingdom'

    Returns:
        HttpResponse("success", status_code=200)
        HttpResponse(error_message, status_code=500)

    Hit serverup.txt to check all the default enabled services (always_check=True)
    Hit serverup.txt?only={check_name} to only check a specific service
    Hit serverup.txt?{check_name} to include a non-default check (currently only ``heartbeat``)
    """
    only = req.GET.get('only', None)
    if only and only in CHECKS:
        checks_to_do = [only]
    else:
        checks_to_do = [
            check
            for check, check_info in CHECKS.items()
            if check_info['always_check'] or req.GET.get(check, None) is not None
        ]

    statuses = run_checks(checks_to_do)
    failed_checks = [(check, status) for check, status in statuses if not status.success]

    for check_name, status in statuses:
        tags = [
            'status:{}'.format('failed' if not status.success else 'ok'),
            'check:{}'.format(check_name)
        ]
        datadog_gauge('commcare.serverup.check', status.duration, tags=tags)

    if failed_checks and not is_deploy_in_progress():
        status_messages = [
            html.linebreaks('<strong>{}</strong>: {}'.format(check, html.escape(status.msg)).strip())
            for check, status in failed_checks
        ]
        create_datadog_event(
            'Serverup check failed', '\n'.join(status_messages),
            alert_type='error', aggregation_key='serverup',
        )
        status_messages.insert(0, 'Failed Checks (%s):' % os.uname()[1])
        return HttpResponse(''.join(status_messages), status=500)
    else:
        return HttpResponse("success")
Example #9
0
def _bust_awc_cache():
    create_datadog_event('redis: delete dashboard keys', 'start')
    reach_keys = cache.keys('*cas_reach_data*')
    for key in reach_keys:
        cache.delete(key)
    create_datadog_event('redis: delete dashboard keys', 'finish')
Example #10
0
def record_command_event(sender, args, kwargs, outcome, **extra):
    if isinstance(outcome, BaseException):
        outcome = f'{outcome.__class__}: {outcome}'
    text = f'args: {args}\noptions: {kwargs}\noutcome: {outcome}'
    event = '{}'.format(sender.__name__)
    create_datadog_event(event, text, aggregation_key=sender.__name__)