Example #1
0
    def handle(self, service_name, **options):
        checks_to_do = [service_name] if service_name else list(CHECKS)

        try:
            statuses = run_checks(checks_to_do)
        except UnknownCheckException:
            print("Services available are:")
            for service_name in CHECKS.keys():
                print("- {}".format(service_name))
        else:
            self.print_results(statuses)
Example #2
0
def server_up(req):
    """
    Health check view which can be hooked into server monitoring tools like 'pingdom'

    Returns:
        HttpResponse("success", status_code=200)
        HttpResponse(error_message, status_code=500)

    Hit serverup.txt to check all the default enabled services (always_check=True)
    Hit serverup.txt?only={check_name} to only check a specific service
    Hit serverup.txt?{check_name} to include a non-default check (currently only ``heartbeat``)
    """
    only = req.GET.get('only', None)
    if only and only in CHECKS:
        checks_to_do = [only]
    else:
        checks_to_do = [
            check for check, check_info in CHECKS.items() if
            check_info['always_check'] or req.GET.get(check, None) is not None
        ]

    statuses = run_checks(checks_to_do)
    failed_checks = [(check, status) for check, status in statuses
                     if not status.success]

    for check_name, status in statuses:
        tags = {
            'status': 'failed' if not status.success else 'ok',
            'check': check_name
        }
        metrics_gauge('commcare.serverup.check',
                      status.duration,
                      tags=tags,
                      multiprocess_mode=MPM_MAX)

    if failed_checks and not is_deploy_in_progress():
        status_messages = [
            html.linebreaks('<strong>{}</strong>: {}'.format(
                check, html.escape(status.msg)).strip())
            for check, status in failed_checks
        ]
        create_metrics_event(
            'Serverup check failed',
            '\n'.join(status_messages),
            alert_type='error',
            aggregation_key='serverup',
        )
        status_messages.insert(0, 'Failed Checks (%s):' % os.uname()[1])
        return HttpResponse(''.join(status_messages), status=500)
    else:
        return HttpResponse("success")
Example #3
0
def check_services(request):
    def get_message(service_name, result):
        if result.exception:
            status = "EXCEPTION"
            msg = repr(result.exception)
        else:
            status = "SUCCESS" if result.success else "FAILURE"
            msg = result.msg
        return "{} (Took {:6.2f}s) {:15}: {}<br/>".format(
            status, result.duration, service_name, msg)

    statuses = run_checks(list(service_checks.CHECKS))
    results = [get_message(name, status) for name, status in statuses]
    return HttpResponse("<pre>" + "".join(results) + "</pre>")
Example #4
0
    def handle(self, service_name, **options):
        checks_to_do = [service_name] if service_name else list(CHECKS)

        try:
            statuses = run_checks(checks_to_do)
        except UnknownCheckException:
            print("Services available are:")
            for service_name in CHECKS.keys():
                print("- {}".format(service_name))

            sys.exit(-1)
        else:
            self.print_results(statuses)
            if not all(status[1].success for status in statuses):
                sys.exit(1)
Example #5
0
def check_services(request):

    def get_message(service_name, result):
        if result.exception:
            status = "EXCEPTION"
            msg = repr(result.exception)
        else:
            status = "SUCCESS" if result.success else "FAILURE"
            msg = result.msg
        return "{} (Took {:6.2f}s) {:15}: {}<br/>".format(status, result.duration, service_name, msg)

    statuses = run_checks(list(service_checks.CHECKS))
    results = [
        get_message(name, status) for name, status in statuses
    ]
    return HttpResponse("<pre>" + "".join(results) + "</pre>")
Example #6
0
def server_up(req):
    """
    Health check view which can be hooked into server monitoring tools like 'pingdom'

    Returns:
        HttpResponse("success", status_code=200)
        HttpResponse(error_message, status_code=500)

    Hit serverup.txt to check all the default enabled services (always_check=True)
    Hit serverup.txt?only={check_name} to only check a specific service
    Hit serverup.txt?{check_name} to include a non-default check (currently only ``heartbeat``)
    """
    only = req.GET.get('only', None)
    if only and only in CHECKS:
        checks_to_do = [only]
    else:
        checks_to_do = [
            check
            for check, check_info in CHECKS.items()
            if check_info['always_check'] or req.GET.get(check, None) is not None
        ]

    statuses = run_checks(checks_to_do)
    failed_checks = [(check, status) for check, status in statuses if not status.success]

    for check_name, status in statuses:
        tags = [
            'status:{}'.format('failed' if not status.success else 'ok'),
            'check:{}'.format(check_name)
        ]
        datadog_gauge('commcare.serverup.check', status.duration, tags=tags)

    if failed_checks and not is_deploy_in_progress():
        status_messages = [
            html.linebreaks('<strong>{}</strong>: {}'.format(check, html.escape(status.msg)).strip())
            for check, status in failed_checks
        ]
        create_datadog_event(
            'Serverup check failed', '\n'.join(status_messages),
            alert_type='error', aggregation_key='serverup',
        )
        status_messages.insert(0, 'Failed Checks (%s):' % os.uname()[1])
        return HttpResponse(''.join(status_messages), status=500)
    else:
        return HttpResponse("success")