def check_for_alert(cls, resource, *args, **kwargs): """ Check for open uptime alerts. Create new one if nothing is found and send alerts """ db_session = get_db_session(kwargs.get("db_session")) request = get_current_request() event_type = "uptime_alert" metric = kwargs["metric"] event = EventService.for_resource( [resource.resource_id], event_type=Event.types[event_type], status=Event.statuses["active"], ) if event.first(): log.info("ALERT: PROGRESS: %s %s" % (event_type, resource)) else: log.warning("ALERT: OPEN: %s %s" % (event_type, resource)) event_values = { "status_code": metric["status_code"], "tries": metric["tries"], "response_time": metric["response_time"], } new_event = Event( resource_id=resource.resource_id, event_type=Event.types[event_type], status=Event.statuses["active"], values=event_values, ) db_session.add(new_event) new_event.send_alerts(request=request, resource=resource)
def check_for_groups_alert(cls, resource, event_type, *args, **kwargs): """ Check for open alerts depending on group type. Create new one if nothing is found and send alerts """ db_session = get_db_session(kwargs.get("db_session")) request = get_current_request() report_groups = kwargs["report_groups"] occurence_dict = kwargs["occurence_dict"] error_reports = 0 slow_reports = 0 for group in report_groups: occurences = occurence_dict.get(group.id, 1) if group.get_report().report_type == ReportType.error: error_reports += occurences elif group.get_report().report_type == ReportType.slow: slow_reports += occurences log_msg = "LIMIT INFO: %s : %s error reports. %s slow_reports" % ( resource, error_reports, slow_reports, ) logging.warning(log_msg) threshold = 10 for event_type in ["error_report_alert", "slow_report_alert"]: if ( error_reports < resource.error_report_threshold and event_type == "error_report_alert" ): continue elif ( slow_reports <= resource.slow_report_threshold and event_type == "slow_report_alert" ): continue if event_type == "error_report_alert": amount = error_reports threshold = resource.error_report_threshold elif event_type == "slow_report_alert": amount = slow_reports threshold = resource.slow_report_threshold event = EventService.for_resource( [resource.resource_id], event_type=Event.types[event_type], status=Event.statuses["active"], ) if event.first(): log.info("ALERT: PROGRESS: %s %s" % (event_type, resource)) else: log.warning("ALERT: OPEN: %s %s" % (event_type, resource)) new_event = Event( resource_id=resource.resource_id, event_type=Event.types[event_type], status=Event.statuses["active"], values={"reports": amount, "threshold": threshold}, ) db_session.add(new_event) new_event.send_alerts(request=request, resource=resource)
def alert_chart(pkey, chart_uuid): start = datetime.utcnow() request = get_current_request() alert_action = AlertChannelActionService.by_pkey(pkey) chart = DashboardChartService.by_uuid(chart_uuid) chart.migrate_json_config() resource = chart.dashboard json_body = chart.config ids_to_override = [json_body["resource"]] filter_settings = build_filter_settings_from_chart_config( request, json_body, override_app_ids=ids_to_override ) log.warning("alert_chart, resource:{}, chart:{}".format(resource, chart_uuid)) # determine start and end date for dataset start_date, end_date = determine_date_boundries_json(json_body) if not filter_settings["start_date"]: filter_settings["start_date"] = start_date.replace( hour=0, minute=0, second=0, microsecond=0 ) if not filter_settings["end_date"]: filter_settings["end_date"] = end_date event_type = Event.types["chart_alert"] open_event = None latest_closed_event = None events_query = EventService.for_resource( [resource.resource_id], event_type=event_type, target_uuid=chart_uuid, limit=20 ) for event in events_query: if event.status == Event.statuses["active"] and not open_event: open_event = event if event.status == Event.statuses["closed"] and not latest_closed_event: latest_closed_event = event if latest_closed_event: filter_settings["start_date"] = latest_closed_event.end_date es_config = transform_json_to_es_config( request, json_body, filter_settings, ids_to_override=ids_to_override ) if not es_config["index_names"]: return result = Datastores.es.search( body=es_config["query"], index=es_config["index_names"], doc_type="log", size=0 ) series, info_dict = parse_es_result(result, es_config, json_config=json_body) # we need to make a deepcopy since we will mutate it rule_config = copy.deepcopy(alert_action.rule) field_mappings = alert_action.config rule_obj = RuleService.rule_from_config( rule_config, field_mappings, info_dict["system_labels"] ) matched_interval = None finished_interval = None for step in reversed(series): if rule_obj.match(step): log.info("matched start") if not matched_interval: matched_interval = step break else: finished_interval = step if matched_interval: if open_event: log.info("ALERT: PROGRESS: %s %s" % (event_type, resource)) if finished_interval: open_event.values = copy.deepcopy(open_event.values) end_interval = finished_interval["key"].strftime(DATE_FORMAT) open_event.values["end_interval"] = end_interval open_event.close() else: log.warning("ALERT: OPEN: %s %s" % (event_type, resource)) step_size = None parent_agg = json_body.get("parentAgg") if parent_agg and parent_agg["type"] == "time_histogram": step_size = time_deltas[parent_agg["config"]["interval"]][ "delta" ].total_seconds() matched_step_values = { "values": matched_interval, "labels": info_dict["system_labels"], } values_dict = { "matched_rule": alert_action.get_dict(), "matched_step_values": matched_step_values, "start_interval": step["key"], "end_interval": None, "resource": chart.config.get("resource"), "chart_name": chart.name, "chart_uuid": chart_uuid, "step_size": step_size, "action_name": alert_action.name, } new_event = Event( resource_id=resource.resource_id, event_type=event_type, status=Event.statuses["active"], values=values_dict, target_uuid=chart_uuid, ) DBSession.add(new_event) DBSession.flush() new_event.send_alerts(request=request, resource=resource) elif open_event: if finished_interval: open_event.values = copy.deepcopy(open_event.values) end_interval = finished_interval["key"].strftime(DATE_FORMAT) open_event.values["end_interval"] = end_interval open_event.close() took = datetime.utcnow() - start log.warning("chart alert rule check took: {}".format(took))