def check_for_alert(cls, resource, *args, **kwargs): """ Check for open uptime alerts. Create new one if nothing is found and send alerts """ db_session = get_db_session(kwargs.get("db_session")) request = get_current_request() event_type = "uptime_alert" metric = kwargs["metric"] event = EventService.for_resource( [resource.resource_id], event_type=Event.types[event_type], status=Event.statuses["active"], ) if event.first(): log.info("ALERT: PROGRESS: %s %s" % (event_type, resource)) else: log.warning("ALERT: OPEN: %s %s" % (event_type, resource)) event_values = { "status_code": metric["status_code"], "tries": metric["tries"], "response_time": metric["response_time"], } new_event = Event( resource_id=resource.resource_id, event_type=Event.types[event_type], status=Event.statuses["active"], values=event_values, ) db_session.add(new_event) new_event.send_alerts(request=request, resource=resource)
def check_for_groups_alert(cls, resource, event_type, *args, **kwargs): """ Check for open alerts depending on group type. Create new one if nothing is found and send alerts """ db_session = get_db_session(kwargs.get("db_session")) request = get_current_request() report_groups = kwargs["report_groups"] occurence_dict = kwargs["occurence_dict"] error_reports = 0 slow_reports = 0 for group in report_groups: occurences = occurence_dict.get(group.id, 1) if group.get_report().report_type == ReportType.error: error_reports += occurences elif group.get_report().report_type == ReportType.slow: slow_reports += occurences log_msg = "LIMIT INFO: %s : %s error reports. %s slow_reports" % ( resource, error_reports, slow_reports, ) logging.warning(log_msg) threshold = 10 for event_type in ["error_report_alert", "slow_report_alert"]: if ( error_reports < resource.error_report_threshold and event_type == "error_report_alert" ): continue elif ( slow_reports <= resource.slow_report_threshold and event_type == "slow_report_alert" ): continue if event_type == "error_report_alert": amount = error_reports threshold = resource.error_report_threshold elif event_type == "slow_report_alert": amount = slow_reports threshold = resource.slow_report_threshold event = EventService.for_resource( [resource.resource_id], event_type=Event.types[event_type], status=Event.statuses["active"], ) if event.first(): log.info("ALERT: PROGRESS: %s %s" % (event_type, resource)) else: log.warning("ALERT: OPEN: %s %s" % (event_type, resource)) new_event = Event( resource_id=resource.resource_id, event_type=Event.types[event_type], status=Event.statuses["active"], values={"reports": amount, "threshold": threshold}, ) db_session.add(new_event) new_event.send_alerts(request=request, resource=resource)
def test_uptime_alert(self): if not self.integration: self.request.session.flash("Integration needs to be configured", "warning") return False resource = self.integration.resource new_event = Event( resource_id=resource.resource_id, event_type=Event.types["uptime_alert"], start_date=datetime.utcnow(), status=Event.statuses["active"], values={"status_code": 500, "tries": 2, "response_time": 0}, ) channel = AlertChannelService.by_integration_id(self.integration.id) channel.notify_uptime_alert( resource=resource, event=new_event, user=self.request.user, request=self.request, ) self.request.session.flash("Notification sent") return True
def test_error_alert(self): if not self.integration: self.request.session.flash("Integration needs to be configured", "warning") return False resource = self.integration.resource event_name = random.choice(("error_report_alert", "slow_report_alert")) new_event = Event( resource_id=resource.resource_id, event_type=Event.types[event_name], start_date=datetime.utcnow(), status=Event.statuses["active"], values={"reports": random.randint(11, 99), "threshold": 10}, ) channel = AlertChannelService.by_integration_id(self.integration.id) channel.notify_alert( resource=resource, event=new_event, user=self.request.user, request=self.request, ) self.request.session.flash("Notification sent") return True
def test_error_alert(self): if not self.integration: self.request.session.flash('Integration needs to be configured', 'warning') return False resource = self.integration.resource event_name = random.choice(( 'error_report_alert', 'slow_report_alert', )) new_event = Event(resource_id=resource.resource_id, event_type=Event.types[event_name], start_date=datetime.utcnow(), status=Event.statuses['active'], values={ 'reports': random.randint(11, 99), 'threshold': 10 }) channel = AlertChannelService.by_integration_id(self.integration.id) channel.notify_alert(resource=resource, event=new_event, user=self.request.user, request=self.request) self.request.session.flash('Notification sent') return True
def test_chart_alert(self): if not self.integration: self.request.session.flash('Integration needs to be configured', 'warning') return False resource = self.integration.resource chart_values = { "matched_rule": { 'name': 'Fraud attempt limit' }, "matched_step_values": { "labels": { "0_1": { "human_label": "Attempts sum" } }, "values": { "0_1": random.randint(11, 55), "key": "2015-12-16T15:49:00" } }, "start_interval": datetime.utcnow(), "resource": 1, "chart_name": "Fraud attempts per day", "chart_uuid": "some_uuid", "step_size": 3600, "action_name": "Notify excessive fraud attempts" } new_event = Event(resource_id=resource.resource_id, event_type=Event.types['chart_alert'], status=Event.statuses['active'], values=chart_values, target_uuid="some_uuid", start_date=datetime.utcnow()) channel = AlertChannelService.by_integration_id(self.integration.id) channel.notify_chart_alert(resource=resource, event=new_event, user=self.request.user, request=self.request) self.request.session.flash('Notification sent') return True
def alerting_test(request): """ Allows to test send data on various registered alerting channels """ applications = UserService.resources_with_perms( request.user, ["view"], resource_types=["application"]) # what we can select in total all_possible_app_ids = [app.resource_id for app in applications] resource = applications[0] alert_channels = [] for channel in request.user.alert_channels: alert_channels.append(channel.get_dict()) cname = request.params.get("channel_name") cvalue = request.params.get("channel_value") event_name = request.params.get("event_name") if cname and cvalue: for channel in request.user.alert_channels: if channel.channel_value == cvalue and channel.channel_name == cname: break if event_name in ["error_report_alert", "slow_report_alert"]: # opened new_event = Event( resource_id=resource.resource_id, event_type=Event.types[event_name], start_date=datetime.datetime.utcnow(), status=Event.statuses["active"], values={ "reports": 5, "threshold": 10 }, ) channel.notify_alert(resource=resource, event=new_event, user=request.user, request=request) # closed ev_type = Event.types[event_name.replace("open", "close")] new_event = Event( resource_id=resource.resource_id, event_type=ev_type, start_date=datetime.datetime.utcnow(), status=Event.statuses["closed"], values={ "reports": 5, "threshold": 10 }, ) channel.notify_alert(resource=resource, event=new_event, user=request.user, request=request) elif event_name == "notify_reports": report = ( ReportGroupService.by_app_ids(all_possible_app_ids).filter( ReportGroup.report_type == ReportType.error).first()) confirmed_reports = [(5, report), (1, report)] channel.notify_reports( resource=resource, user=request.user, request=request, since_when=datetime.datetime.utcnow(), reports=confirmed_reports, ) confirmed_reports = [(5, report)] channel.notify_reports( resource=resource, user=request.user, request=request, since_when=datetime.datetime.utcnow(), reports=confirmed_reports, ) elif event_name == "notify_uptime": new_event = Event( resource_id=resource.resource_id, event_type=Event.types["uptime_alert"], start_date=datetime.datetime.utcnow(), status=Event.statuses["active"], values={ "status_code": 500, "tries": 2, "response_time": 0 }, ) channel.notify_uptime_alert(resource=resource, event=new_event, user=request.user, request=request) elif event_name == "chart_alert": event = EventService.by_type_and_status( event_types=(Event.types["chart_alert"], ), status_types=(Event.statuses["active"], ), ).first() channel.notify_chart_alert(resource=event.resource, event=event, user=request.user, request=request) elif event_name == "daily_digest": since_when = datetime.datetime.utcnow() - datetime.timedelta( hours=8) filter_settings = { "resource": [resource.resource_id], "tags": [{ "name": "type", "value": ["error"], "op": None }], "type": "error", "start_date": since_when, } reports = ReportGroupService.get_trending( request, filter_settings=filter_settings, limit=50) channel.send_digest( resource=resource, user=request.user, request=request, since_when=datetime.datetime.utcnow(), reports=reports, ) return { "alert_channels": alert_channels, "applications": dict([(app.resource_id, app.resource_name) for app in applications.all()]), }
def alerting_test(request): """ Allows to test send data on various registered alerting channels """ applications = request.user.resources_with_perms( ['view'], resource_types=['application']) # what we can select in total all_possible_app_ids = [app.resource_id for app in applications] resource = applications[0] alert_channels = [] for channel in request.user.alert_channels: alert_channels.append(channel.get_dict()) cname = request.params.get('channel_name') cvalue = request.params.get('channel_value') event_name = request.params.get('event_name') if cname and cvalue: for channel in request.user.alert_channels: if (channel.channel_value == cvalue and channel.channel_name == cname): break if event_name in ['error_report_alert', 'slow_report_alert']: # opened new_event = Event(resource_id=resource.resource_id, event_type=Event.types[event_name], start_date=datetime.datetime.utcnow(), status=Event.statuses['active'], values={ 'reports': 5, 'threshold': 10 }) channel.notify_alert(resource=resource, event=new_event, user=request.user, request=request) # closed ev_type = Event.types[event_name.replace('open', 'close')] new_event = Event(resource_id=resource.resource_id, event_type=ev_type, start_date=datetime.datetime.utcnow(), status=Event.statuses['closed'], values={ 'reports': 5, 'threshold': 10 }) channel.notify_alert(resource=resource, event=new_event, user=request.user, request=request) elif event_name == 'notify_reports': report = ReportGroupService.by_app_ids(all_possible_app_ids) \ .filter(ReportGroup.report_type == ReportType.error).first() confirmed_reports = [(5, report), (1, report)] channel.notify_reports(resource=resource, user=request.user, request=request, since_when=datetime.datetime.utcnow(), reports=confirmed_reports) confirmed_reports = [(5, report)] channel.notify_reports(resource=resource, user=request.user, request=request, since_when=datetime.datetime.utcnow(), reports=confirmed_reports) elif event_name == 'notify_uptime': new_event = Event(resource_id=resource.resource_id, event_type=Event.types['uptime_alert'], start_date=datetime.datetime.utcnow(), status=Event.statuses['active'], values={ "status_code": 500, "tries": 2, "response_time": 0 }) channel.notify_uptime_alert(resource=resource, event=new_event, user=request.user, request=request) elif event_name == 'chart_alert': event = EventService.by_type_and_status( event_types=(Event.types['chart_alert'], ), status_types=(Event.statuses['active'], )).first() channel.notify_chart_alert(resource=event.resource, event=event, user=request.user, request=request) elif event_name == 'daily_digest': since_when = datetime.datetime.utcnow() - datetime.timedelta( hours=8) filter_settings = { 'resource': [resource.resource_id], 'tags': [{ 'name': 'type', 'value': ['error'], 'op': None }], 'type': 'error', 'start_date': since_when } reports = ReportGroupService.get_trending( request, filter_settings=filter_settings, limit=50) channel.send_digest(resource=resource, user=request.user, request=request, since_when=datetime.datetime.utcnow(), reports=reports) return { 'alert_channels': alert_channels, 'applications': dict([(app.resource_id, app.resource_name) for app in applications.all()]) }
def alert_chart(pkey, chart_uuid): start = datetime.utcnow() request = get_current_request() alert_action = AlertChannelActionService.by_pkey(pkey) chart = DashboardChartService.by_uuid(chart_uuid) chart.migrate_json_config() resource = chart.dashboard json_body = chart.config ids_to_override = [json_body["resource"]] filter_settings = build_filter_settings_from_chart_config( request, json_body, override_app_ids=ids_to_override ) log.warning("alert_chart, resource:{}, chart:{}".format(resource, chart_uuid)) # determine start and end date for dataset start_date, end_date = determine_date_boundries_json(json_body) if not filter_settings["start_date"]: filter_settings["start_date"] = start_date.replace( hour=0, minute=0, second=0, microsecond=0 ) if not filter_settings["end_date"]: filter_settings["end_date"] = end_date event_type = Event.types["chart_alert"] open_event = None latest_closed_event = None events_query = EventService.for_resource( [resource.resource_id], event_type=event_type, target_uuid=chart_uuid, limit=20 ) for event in events_query: if event.status == Event.statuses["active"] and not open_event: open_event = event if event.status == Event.statuses["closed"] and not latest_closed_event: latest_closed_event = event if latest_closed_event: filter_settings["start_date"] = latest_closed_event.end_date es_config = transform_json_to_es_config( request, json_body, filter_settings, ids_to_override=ids_to_override ) if not es_config["index_names"]: return result = Datastores.es.search( body=es_config["query"], index=es_config["index_names"], doc_type="log", size=0 ) series, info_dict = parse_es_result(result, es_config, json_config=json_body) # we need to make a deepcopy since we will mutate it rule_config = copy.deepcopy(alert_action.rule) field_mappings = alert_action.config rule_obj = RuleService.rule_from_config( rule_config, field_mappings, info_dict["system_labels"] ) matched_interval = None finished_interval = None for step in reversed(series): if rule_obj.match(step): log.info("matched start") if not matched_interval: matched_interval = step break else: finished_interval = step if matched_interval: if open_event: log.info("ALERT: PROGRESS: %s %s" % (event_type, resource)) if finished_interval: open_event.values = copy.deepcopy(open_event.values) end_interval = finished_interval["key"].strftime(DATE_FORMAT) open_event.values["end_interval"] = end_interval open_event.close() else: log.warning("ALERT: OPEN: %s %s" % (event_type, resource)) step_size = None parent_agg = json_body.get("parentAgg") if parent_agg and parent_agg["type"] == "time_histogram": step_size = time_deltas[parent_agg["config"]["interval"]][ "delta" ].total_seconds() matched_step_values = { "values": matched_interval, "labels": info_dict["system_labels"], } values_dict = { "matched_rule": alert_action.get_dict(), "matched_step_values": matched_step_values, "start_interval": step["key"], "end_interval": None, "resource": chart.config.get("resource"), "chart_name": chart.name, "chart_uuid": chart_uuid, "step_size": step_size, "action_name": alert_action.name, } new_event = Event( resource_id=resource.resource_id, event_type=event_type, status=Event.statuses["active"], values=values_dict, target_uuid=chart_uuid, ) DBSession.add(new_event) DBSession.flush() new_event.send_alerts(request=request, resource=resource) elif open_event: if finished_interval: open_event.values = copy.deepcopy(open_event.values) end_interval = finished_interval["key"].strftime(DATE_FORMAT) open_event.values["end_interval"] = end_interval open_event.close() took = datetime.utcnow() - start log.warning("chart alert rule check took: {}".format(took))