예제 #1
0
 def check_for_alert(cls, resource, *args, **kwargs):
     """ Check for open uptime alerts. Create new one if nothing is found
     and send alerts """
     db_session = get_db_session(kwargs.get("db_session"))
     request = get_current_request()
     event_type = "uptime_alert"
     metric = kwargs["metric"]
     event = EventService.for_resource(
         [resource.resource_id],
         event_type=Event.types[event_type],
         status=Event.statuses["active"],
     )
     if event.first():
         log.info("ALERT: PROGRESS: %s %s" % (event_type, resource))
     else:
         log.warning("ALERT: OPEN: %s %s" % (event_type, resource))
         event_values = {
             "status_code": metric["status_code"],
             "tries": metric["tries"],
             "response_time": metric["response_time"],
         }
         new_event = Event(
             resource_id=resource.resource_id,
             event_type=Event.types[event_type],
             status=Event.statuses["active"],
             values=event_values,
         )
         db_session.add(new_event)
         new_event.send_alerts(request=request, resource=resource)
예제 #2
0
    def check_for_groups_alert(cls, resource, event_type, *args, **kwargs):
        """ Check for open alerts depending on group type.
        Create new one if nothing is found and send alerts """
        db_session = get_db_session(kwargs.get("db_session"))
        request = get_current_request()
        report_groups = kwargs["report_groups"]
        occurence_dict = kwargs["occurence_dict"]

        error_reports = 0
        slow_reports = 0
        for group in report_groups:
            occurences = occurence_dict.get(group.id, 1)
            if group.get_report().report_type == ReportType.error:
                error_reports += occurences
            elif group.get_report().report_type == ReportType.slow:
                slow_reports += occurences

        log_msg = "LIMIT INFO: %s : %s error reports. %s slow_reports" % (
            resource,
            error_reports,
            slow_reports,
        )
        logging.warning(log_msg)
        threshold = 10
        for event_type in ["error_report_alert", "slow_report_alert"]:
            if (
                error_reports < resource.error_report_threshold
                and event_type == "error_report_alert"
            ):
                continue
            elif (
                slow_reports <= resource.slow_report_threshold
                and event_type == "slow_report_alert"
            ):
                continue
            if event_type == "error_report_alert":
                amount = error_reports
                threshold = resource.error_report_threshold
            elif event_type == "slow_report_alert":
                amount = slow_reports
                threshold = resource.slow_report_threshold

            event = EventService.for_resource(
                [resource.resource_id],
                event_type=Event.types[event_type],
                status=Event.statuses["active"],
            )
            if event.first():
                log.info("ALERT: PROGRESS: %s %s" % (event_type, resource))
            else:
                log.warning("ALERT: OPEN: %s %s" % (event_type, resource))
                new_event = Event(
                    resource_id=resource.resource_id,
                    event_type=Event.types[event_type],
                    status=Event.statuses["active"],
                    values={"reports": amount, "threshold": threshold},
                )
                db_session.add(new_event)
                new_event.send_alerts(request=request, resource=resource)
예제 #3
0
def alert_chart(pkey, chart_uuid):
    start = datetime.utcnow()
    request = get_current_request()
    alert_action = AlertChannelActionService.by_pkey(pkey)
    chart = DashboardChartService.by_uuid(chart_uuid)
    chart.migrate_json_config()
    resource = chart.dashboard
    json_body = chart.config
    ids_to_override = [json_body["resource"]]
    filter_settings = build_filter_settings_from_chart_config(
        request, json_body, override_app_ids=ids_to_override
    )

    log.warning("alert_chart, resource:{}, chart:{}".format(resource, chart_uuid))

    # determine start and end date for dataset
    start_date, end_date = determine_date_boundries_json(json_body)
    if not filter_settings["start_date"]:
        filter_settings["start_date"] = start_date.replace(
            hour=0, minute=0, second=0, microsecond=0
        )

    if not filter_settings["end_date"]:
        filter_settings["end_date"] = end_date

    event_type = Event.types["chart_alert"]
    open_event = None
    latest_closed_event = None
    events_query = EventService.for_resource(
        [resource.resource_id], event_type=event_type, target_uuid=chart_uuid, limit=20
    )

    for event in events_query:
        if event.status == Event.statuses["active"] and not open_event:
            open_event = event
        if event.status == Event.statuses["closed"] and not latest_closed_event:
            latest_closed_event = event

    if latest_closed_event:
        filter_settings["start_date"] = latest_closed_event.end_date

    es_config = transform_json_to_es_config(
        request, json_body, filter_settings, ids_to_override=ids_to_override
    )

    if not es_config["index_names"]:
        return
    result = Datastores.es.search(
        body=es_config["query"], index=es_config["index_names"], doc_type="log", size=0
    )
    series, info_dict = parse_es_result(result, es_config, json_config=json_body)

    # we need to make a deepcopy since we will mutate it
    rule_config = copy.deepcopy(alert_action.rule)
    field_mappings = alert_action.config

    rule_obj = RuleService.rule_from_config(
        rule_config, field_mappings, info_dict["system_labels"]
    )
    matched_interval = None
    finished_interval = None
    for step in reversed(series):
        if rule_obj.match(step):
            log.info("matched start")
            if not matched_interval:
                matched_interval = step
                break
        else:
            finished_interval = step

    if matched_interval:
        if open_event:
            log.info("ALERT: PROGRESS: %s %s" % (event_type, resource))
            if finished_interval:
                open_event.values = copy.deepcopy(open_event.values)
                end_interval = finished_interval["key"].strftime(DATE_FORMAT)
                open_event.values["end_interval"] = end_interval
                open_event.close()
        else:
            log.warning("ALERT: OPEN: %s %s" % (event_type, resource))
            step_size = None
            parent_agg = json_body.get("parentAgg")
            if parent_agg and parent_agg["type"] == "time_histogram":
                step_size = time_deltas[parent_agg["config"]["interval"]][
                    "delta"
                ].total_seconds()
            matched_step_values = {
                "values": matched_interval,
                "labels": info_dict["system_labels"],
            }
            values_dict = {
                "matched_rule": alert_action.get_dict(),
                "matched_step_values": matched_step_values,
                "start_interval": step["key"],
                "end_interval": None,
                "resource": chart.config.get("resource"),
                "chart_name": chart.name,
                "chart_uuid": chart_uuid,
                "step_size": step_size,
                "action_name": alert_action.name,
            }
            new_event = Event(
                resource_id=resource.resource_id,
                event_type=event_type,
                status=Event.statuses["active"],
                values=values_dict,
                target_uuid=chart_uuid,
            )
            DBSession.add(new_event)
            DBSession.flush()
            new_event.send_alerts(request=request, resource=resource)
    elif open_event:
        if finished_interval:
            open_event.values = copy.deepcopy(open_event.values)
            end_interval = finished_interval["key"].strftime(DATE_FORMAT)
            open_event.values["end_interval"] = end_interval
        open_event.close()
    took = datetime.utcnow() - start
    log.warning("chart alert rule check took: {}".format(took))