def test_get_body(self): from flask import g with self.app.test_request_context('/'): g.login = '******' alert_in = Alert( resource='test1', event='event1', environment='Development', service=['svc1', 'svc2'] ) self.assertTrue(isinstance(alert_in.create_time, datetime)) self.assertEqual(alert_in.last_receive_time, None) self.assertTrue(isinstance(alert_in.receive_time, datetime)) self.assertEqual(alert_in.update_time, None) body = alert_in.get_body() self.assertEqual(type(body['createTime']), str) self.assertEqual(body['lastReceiveTime'], None) self.assertEqual(type(body['receiveTime']), str) self.assertEqual(body['updateTime'], None) alert_out = process_alert(alert_in) self.assertTrue(isinstance(alert_out.create_time, datetime)) self.assertTrue(isinstance(alert_out.last_receive_time, datetime)) self.assertTrue(isinstance(alert_out.receive_time, datetime)) self.assertTrue(isinstance(alert_out.update_time, datetime)) body = alert_out.get_body() self.assertEqual(type(body['createTime']), str) self.assertEqual(type(body['lastReceiveTime']), str) self.assertEqual(type(body['receiveTime']), str) self.assertEqual(type(body['updateTime']), str)
def get_counts(): query = qb.from_params(request.args) severity_count = Alert.get_counts_by_severity(query) status_count = Alert.get_counts_by_status(query) return jsonify( status="ok", total=sum(severity_count.values()), severityCounts=severity_count, statusCounts=status_count )
def process_alert(alert: Alert) -> Alert: wanted_plugins, wanted_config = plugins.routing(alert) skip_plugins = False for plugin in wanted_plugins: if alert.is_suppressed: skip_plugins = True break try: alert = plugin.pre_receive(alert, config=wanted_config) except TypeError: alert = plugin.pre_receive(alert) # for backward compatibility except (RejectException, HeartbeatReceived, BlackoutPeriod, RateLimit): raise except Exception as e: if current_app.config['PLUGINS_RAISE_ON_ERROR']: raise RuntimeError("Error while running pre-receive plugin '{}': {}".format(plugin.name, str(e))) else: logging.error("Error while running pre-receive plugin '{}': {}".format(plugin.name, str(e))) if not alert: raise SyntaxError("Plugin '%s' pre-receive hook did not return modified alert" % plugin.name) try: is_duplicate = alert.is_duplicate() if is_duplicate: alert = alert.deduplicate(is_duplicate) else: is_correlated = alert.is_correlated() if is_correlated: alert = alert.update(is_correlated) else: alert = alert.create() except Exception as e: raise ApiError(str(e)) updated = None for plugin in wanted_plugins: if skip_plugins: break try: updated = plugin.post_receive(alert, config=wanted_config) except TypeError: updated = plugin.post_receive(alert) # for backward compatibility except Exception as e: if current_app.config['PLUGINS_RAISE_ON_ERROR']: raise ApiError("Error while running post-receive plugin '{}': {}".format(plugin.name, str(e))) else: logging.error("Error while running post-receive plugin '{}': {}".format(plugin.name, str(e))) if updated: alert = updated if updated: alert.tag(alert.tags) alert.update_attributes(alert.attributes) return alert
def housekeeping(): DEFAULT_EXPIRED_DELETE_HRS = 2 # hours DEFAULT_INFO_DELETE_HRS = 12 # hours try: expired_threshold = int(request.args.get('expired', DEFAULT_EXPIRED_DELETE_HRS)) info_threshold = int(request.args.get('info', DEFAULT_INFO_DELETE_HRS)) except Exception as e: raise ApiError(str(e), 400) try: Alert.housekeeping(expired_threshold, info_threshold) return 'OK' except Exception as e: return 'HOUSEKEEPING FAILED: %s' % e, 503
def pagerduty(): data = request.json updated = False if data and 'messages' in data: for message in data['messages']: try: incident_key, status, text = parse_pagerduty(message) except ValueError as e: raise ApiError(str(e), 400) if not incident_key: raise ApiError('no incident key in PagerDuty data payload', 400) customer = g.get('customer', None) try: alert = Alert.find_by_id(id=incident_key, customer=customer) except Exception as e: raise ApiError(str(e), 500) if not alert: raise ApiError("not found", 404) try: updated = alert.set_status(status, text) except Exception as e: raise ApiError(str(e), 500) else: raise ApiError("no messages in PagerDuty data payload", 400) if updated: return jsonify(status="ok"), 200 else: raise ApiError("update PagerDuty incident status failed", 500)
def action_alerts(alerts: List[str], action: str, text: str, timeout: int) -> None: updated = [] errors = [] for alert_id in alerts: alert = Alert.find_by_id(alert_id) try: previous_status = alert.status alert, action, text = process_action(alert, action, text) alert = alert.from_action(action, text, timeout) except RejectException as e: errors.append(str(e)) continue except InvalidAction as e: errors.append(str(e)) continue except Exception as e: errors.append(str(e)) continue if previous_status != alert.status: try: alert, status, text = process_status(alert, alert.status, text) alert = alert.from_status(status, text, timeout) except RejectException as e: errors.append(str(e)) continue except Exception as e: errors.append(str(e)) continue updated.append(alert.id)
def incoming(self, query_string, payload): if 'callback_query' in payload: author = payload['callback_query']['from'] user = '******'.format(author.get('first_name'), author.get('last_name')) command, alert_id = payload['callback_query']['data'].split(' ', 1) customers = g.get('customers', None) alert = Alert.find_by_id(alert_id, customers=customers) if not alert: jsonify(status='error', message='alert not found for Telegram message') action = command.lstrip('/') if action in ['open', 'ack', 'close']: alert.set_status(status=action, text='status change via Telegram') elif action in ['watch', 'unwatch']: alert.untag(tags=['{}:{}'.format(action, user)]) elif action == 'blackout': environment, resource, event = command.split('|', 2) blackout = Blackout(environment, resource=resource, event=event) blackout.create() send_message_reply(alert, action, user, payload) text = 'alert updated via telegram webhook' write_audit_trail.send(current_app._get_current_object(), event='webhook-updated', message=text, user=g.login, customers=g.customers, scopes=g.scopes, resource_id=alert.id, type='alert', request=request) return jsonify(status='ok') else: return jsonify(status='ok', message='no callback_query in Telegram message')
def incoming(self, query_string, payload): if payload and payload['state'] == 'alerting': return [parse_grafana(payload, match, query_string) for match in payload.get('evalMatches', [])] elif payload and payload['state'] == 'ok' and payload.get('ruleId'): try: query = qb.from_dict({'attributes.ruleId': str(payload['ruleId'])}) existingAlerts = Alert.find_all(query) except Exception as e: raise ApiError(str(e), 500) alerts = [] for updateAlert in existingAlerts: updateAlert.severity = 'normal' updateAlert.status = 'closed' try: alert = process_alert(updateAlert) except RejectException as e: raise ApiError(str(e), 403) except Exception as e: raise ApiError(str(e), 500) alerts.append(alert) return alerts else: raise ApiError('no alerts in Grafana notification payload', 400)
def incoming(self, query_string, payload): updated = False if payload and 'messages' in payload: for message in payload['messages']: try: incident_key, status, text = parse_pagerduty(message) except ValueError as e: raise ApiError(str(e), 400) if not incident_key: raise ApiError('no incident key in PagerDuty data payload', 400) customers = g.get('customers', None) try: alert = Alert.find_by_id(id=incident_key, customers=customers) except Exception as e: raise ApiError(str(e), 500) if not alert: raise ApiError('not found', 404) try: updated = alert.set_status(status, text) except Exception as e: raise ApiError(str(e), 500) if updated: return jsonify(status='ok') else: raise ApiError('update PagerDuty incident status failed', 500) else: raise ApiError('no messages in PagerDuty data payload', 400)
def receive(): try: incomingAlert = Alert.parse(request.json) except ValueError as e: raise ApiError(str(e), 400) if g.get('customer', None): incomingAlert.customer = g.get('customer') add_remote_ip(request, incomingAlert) try: alert = process_alert(incomingAlert) except RejectException as e: raise ApiError(str(e), 403) except RateLimit as e: return jsonify(status="error", message=str(e), id=incomingAlert.id), 429 except BlackoutPeriod as e: return jsonify(status="ok", message=str(e), id=incomingAlert.id), 202 except Exception as e: raise ApiError(str(e), 500) if alert: return jsonify(status="ok", id=alert.id, alert=alert.serialize), 201 else: raise ApiError("insert or update of received alert failed", 500)
def process_action(alert: Alert, action: str, text: str) -> Tuple[Alert, str, str]: wanted_plugins, wanted_config = plugins.routing(alert) updated = None for plugin in wanted_plugins: if alert.is_suppressed: break try: updated = plugin.take_action(alert, action, text, config=wanted_config) except NotImplementedError: pass # plugin does not support action() method except RejectException: raise except Exception as e: if current_app.config['PLUGINS_RAISE_ON_ERROR']: raise ApiError("Error while running action plugin '{}': {}".format(plugin.name, str(e))) else: logging.error("Error while running action plugin '{}': {}".format(plugin.name, str(e))) if updated: try: alert, action, text = updated except Exception: alert = updated # remove keys from attributes with None values new_attrs = {k: v for k, v in alert.attributes.items() if v is not None} alert.attributes = new_attrs return alert, action, text
def telegram(): data = request.json if 'callback_query' in data: author = data['callback_query']['from'] user = "******".format(author.get('first_name'), author.get('last_name')) command, alert_id = data['callback_query']['data'].split(' ', 1) alert = Alert.find_by_id(alert_id) if not alert: jsonify(status="error", message="alert not found for Telegram message") action = command.lstrip('/') if action in ['open', 'ack', 'close']: alert.set_status(status=action, text='status change via Telegram') elif action in ['watch', 'unwatch']: alert.untag(tags=["{}:{}".format(action, user)]) elif action == 'blackout': environment, resource, event = alert.split('|', 2) blackout = Blackout(environment, resource=resource, event=event) blackout.create() send_message_reply(alert, action, user, data) return jsonify(status="ok") else: return jsonify(status="error", message="no callback_query in Telegram message"), 400
def bulk_set_status(): status = request.json.get('status', None) text = request.json.get('text', 'bulk status update') timeout = request.json.get('timeout', None) if not status: raise ApiError("must supply 'status' as json data", 400) query = qb.from_params(request.args) alerts = Alert.find_all(query) if not alerts: raise ApiError('not found', 404) updated = [] errors = [] for alert in alerts: try: alert, status, text = process_status(alert, status, text) except RejectException as e: errors.append(str(e)) continue except Exception as e: errors.append(str(e)) continue if alert.set_status(status, text, timeout): updated.append(alert.id) if errors: raise ApiError('failed to bulk set alert status', 500, errors=errors) else: return jsonify(status='ok', updated=updated, count=len(updated))
def process_status(alert: Alert, status: str, text: str) -> Tuple[Alert, str, str]: wanted_plugins, wanted_config = plugins.routing(alert) updated = None for plugin in wanted_plugins: if alert.is_suppressed: break try: updated = plugin.status_change(alert, status, text, config=wanted_config) except TypeError: updated = plugin.status_change(alert, status, text) # for backward compatibility except RejectException: raise except Exception as e: if current_app.config['PLUGINS_RAISE_ON_ERROR']: raise ApiError("Error while running status plugin '{}': {}".format(plugin.name, str(e))) else: logging.error("Error while running status plugin '{}': {}".format(plugin.name, str(e))) if updated: try: alert, status, text = updated except Exception: alert = updated # remove keys from attributes with None values new_attrs = {k: v for k, v in alert.attributes.items() if v is not None} alert.attributes = new_attrs return alert, status, text
def bulk_update_attributes(): if not request.json.get('attributes', None): raise ApiError("must supply 'attributes' as json data", 400) query = qb.from_params(request.args) updated = Alert.update_attributes_find_all(query, request.json['attributes']) return jsonify(status='ok', updated=updated, count=len(updated))
def bulk_untag_alert(): if not request.json.get('tags', None): raise ApiError("must supply 'tags' as json list") query = qb.from_params(request.args) updated = Alert.untag_find_all(query, tags=request.json['tags']) return jsonify(status='ok', updated=updated, count=len(updated))
def get_alert(alert_id): customer = g.get('customer', None) alert = Alert.find_by_id(alert_id, customer) if alert: return jsonify(status="ok", total=1, alert=alert.serialize) else: raise ApiError("not found", 404)
def delete_alert(alert_id): customer = g.get('customer', None) alert = Alert.find_by_id(alert_id, customer) if not alert: raise ApiError("not found", 404) if alert.delete(): return jsonify(status="ok") else: raise ApiError("failed to delete alert", 500)
def history(): query = qb.from_params(request.args, customers=g.customers) paging = Page.from_params(request.args, items=0) history = Alert.get_history(query, paging.page, paging.page_size) if history: return jsonify(status='ok', history=[h.serialize for h in history], total=len(history)) else: return jsonify(status='ok', message='not found', history=[], total=0)
def prometheus_metrics(): total_alert_gauge.set(Alert.get_count()) output = Gauge.find_all() output += Counter.find_all() output += Timer.find_all() return Response( [o.serialize(format='prometheus') for o in output], content_type='text/plain; version=0.0.4; charset=utf-8' )
def receive(): try: alert = Alert.parse(request.json) except ValueError as e: raise ApiError(str(e), 400) alert.customer = assign_customer(wanted=alert.customer) def audit_trail_alert(event: str): write_audit_trail.send(current_app._get_current_object(), event=event, message=alert.text, user=g.login, customers=g.customers, scopes=g.scopes, resource_id=alert.id, type='alert', request=request) try: alert = process_alert(alert) except RejectException as e: audit_trail_alert(event='alert-rejected') raise ApiError(str(e), 403) except RateLimit as e: audit_trail_alert(event='alert-rate-limited') return jsonify(status='error', message=str(e), id=alert.id), 429 except HeartbeatReceived as heartbeat: audit_trail_alert(event='alert-heartbeat') return jsonify(status='ok', message=str(heartbeat), id=heartbeat.id), 202 except BlackoutPeriod as e: audit_trail_alert(event='alert-blackout') return jsonify(status='ok', message=str(e), id=alert.id), 202 except ForwardingLoop as e: return jsonify(status='ok', message=str(e)), 202 except Exception as e: raise ApiError(str(e), 500) write_audit_trail.send(current_app._get_current_object(), event='alert-received', message=alert.text, user=g.login, customers=g.customers, scopes=g.scopes, resource_id=alert.id, type='alert', request=request) if alert: return jsonify(status='ok', id=alert.id, alert=alert.serialize), 201 else: raise ApiError('insert or update of received alert failed', 500)
def status(): now = int(time.time() * 1000) total_alert_gauge.set(Alert.get_count()) metrics = Gauge.find_all() metrics.extend(Counter.find_all()) metrics.extend(Timer.find_all()) metrics.extend(Switch.find_all()) return jsonify(application="alerta", version=__version__, time=now, uptime=int(now - started), metrics=[metric.serialize() for metric in metrics])
def search_alerts(): query_time = datetime.utcnow() query = qb.from_params(request.args, query_time) severity_count = Alert.get_counts_by_severity(query) status_count = Alert.get_counts_by_status(query) total = sum(severity_count.values()) paging = Page.from_params(request.args, total) alerts = Alert.find_all(query, paging.page, paging.page_size) if alerts: return jsonify( status="ok", page=paging.page, pageSize=paging.page_size, pages=paging.pages, more=paging.has_more, alerts=[alert.serialize for alert in alerts], total=total, statusCounts=status_count, severityCounts=severity_count, lastTime=max([alert.last_receive_time for alert in alerts]), autoRefresh=Switch.find_by_name('auto-refresh-allow').is_on ) else: return jsonify( status="ok", message="not found", page=paging.page, pageSize=paging.page_size, pages=0, more=False, alerts=[], total=0, severityCounts=severity_count, statusCounts=status_count, lastTime=query_time, autoRefresh=Switch.find_by_name('auto-refresh-allow').is_on )
def search_alerts(): query_time = datetime.utcnow() query = qb.from_params(request.args, customers=g.customers, query_time=query_time) severity_count = Alert.get_counts_by_severity(query) status_count = Alert.get_counts_by_status(query) total = sum(severity_count.values()) paging = Page.from_params(request.args, total) alerts = Alert.find_all(query, paging.page, paging.page_size) if alerts: return jsonify( status='ok', page=paging.page, pageSize=paging.page_size, pages=paging.pages, more=paging.has_more, alerts=[alert.serialize for alert in alerts], total=total, statusCounts=status_count, severityCounts=severity_count, lastTime=max([alert.last_receive_time for alert in alerts]), autoRefresh=Switch.find_by_name('auto-refresh-allow').is_on) else: return jsonify( status='ok', message='not found', page=paging.page, pageSize=paging.page_size, pages=0, more=False, alerts=[], total=0, severityCounts=severity_count, statusCounts=status_count, lastTime=query_time, autoRefresh=Switch.find_by_name('auto-refresh-allow').is_on)
def incoming(self, query_string, payload): incident = payload['incident'] state = incident['state'] # 'documentation' is an optional field that you can use to customize # your alert sending a json if 'documentation' in incident: try: content = json.loads(incident['documentation']['content']) incident.update(content) except Exception: current_app.logger.warning("Invalid documentation content: '{}'".format(incident['documentation'])) service = [] status = None create_time = None # type: ignore severity = incident.get('severity', 'critical') if incident['policy_name']: service.append(incident['policy_name']) if state == 'open': create_time = datetime.utcfromtimestamp(incident['started_at']) elif state == 'acknowledged': status = 'ack' elif state == 'closed': severity = 'ok' create_time = datetime.utcfromtimestamp(incident['ended_at']) else: severity = 'indeterminate' return Alert( resource=incident['resource_name'], event=incident['condition_name'], environment=incident.get('environment', 'Production'), severity=severity, status=status, service=service, group=incident.get('group', 'Cloud'), text=incident['summary'], attributes={ 'incidentId': incident['incident_id'], 'resourceId': incident['resource_id'], 'moreInfo': '<a href="%s" target="_blank">Stackdriver Console</a>' % incident['url'] }, customer=incident.get('customer'), origin=incident.get('origin', 'Stackdriver'), event_type='stackdriverAlert', create_time=create_time, raw_data=payload )
def incoming(self, path, query_string, payload): if 'version' not in payload: raise ValueError('New Relic Legacy Alerting is not supported') status = payload['current_state'].lower() if status == 'open': severity = payload['severity'].lower() elif status == 'acknowledged': severity = payload['severity'].lower() status = 'ack' elif status == 'closed': severity = 'ok' elif payload['severity'].lower() == 'info': severity = 'informational' status = 'open' else: severity = payload['severity'].lower() status = 'open' if severity not in SEVERITY_MAP: if severity.lower() == 'info': severity = 'informational' else: severity = 'unknown' attributes = dict() if 'incident_url' in payload and payload['incident_url'] is not None: attributes['incident_url'] = payload['incident_url'] if 'runbook_url' in payload and payload['runbook_url'] is not None: attributes['runbook_url'] = payload['runbook_url'] resource = payload['targets'][0]['name'] or UNKNOWN event = payload['condition_name'] or UNKNOWN return Alert( resource=resource, event=event, environment='Production', severity=severity, status=status, service=[payload['account_name']], group=payload['targets'][0]['type'], text=payload['details'], tags=[ '{}:{}'.format(key, value) for (key, value) in payload['targets'][0]['labels'].items() ], attributes=attributes, origin='New Relic/v%s' % payload['version'], event_type=payload['event_type'].lower(), raw_data=payload)
def grafana(): alerts = [] data = request.json if data and data['state'] == 'alerting': for match in data.get('evalMatches', []): try: incomingAlert = parse_grafana(data, match, request.args) except ValueError as e: return jsonify(status='error', message=str(e)), 400 incomingAlert.customer = assign_customer(wanted=incomingAlert.customer) add_remote_ip(request, incomingAlert) try: alert = process_alert(incomingAlert) except RejectException as e: return jsonify(status='error', message=str(e)), 403 except Exception as e: return jsonify(status='error', message=str(e)), 500 alerts.append(alert) elif data and data['state'] == 'ok' and data.get('ruleId', None): try: query = qb.from_dict({'attributes.ruleId': str(data['ruleId'])}) existingAlerts = Alert.find_all(query) except Exception as e: raise ApiError(str(e), 500) for updateAlert in existingAlerts: updateAlert.severity = 'normal' updateAlert.status = 'closed' try: alert = process_alert(updateAlert) except RejectException as e: raise ApiError(str(e), 403) except Exception as e: raise ApiError(str(e), 500) alerts.append(alert) else: raise ApiError('no alerts in Grafana notification payload', 400) for alert in alerts: text = 'grafana alert received via webhook' write_audit_trail.send(current_app._get_current_object(), event='webhook-received', message=text, user=g.user, customers=g.customers, scopes=g.scopes, resource_id=alert.id, type='alert', request=request) if len(alerts) == 1: return jsonify(status='ok', id=alerts[0].id, alert=alerts[0].serialize), 201 else: return jsonify(status='ok', ids=[alert.id for alert in alerts]), 201
def action_alert(alert_id): action = request.json.get('action', None) text = request.json.get('text', f'{action} operator action') timeout = request.json.get('timeout', None) if not action: raise ApiError("must supply 'action' as json data", 400) customers = g.get('customers', None) alert = Alert.find_by_id(alert_id, customers) if not alert: raise ApiError('not found', 404) try: alert, action, text, timeout = process_action(alert, action, text, timeout) alert = alert.from_action(action, text, timeout) except RejectException as e: write_audit_trail.send(current_app._get_current_object(), event='alert-action-rejected', message=alert.text, user=g.login, customers=g.customers, scopes=g.scopes, resource_id=alert.id, type='alert', request=request) raise ApiError(str(e), 400) except InvalidAction as e: raise ApiError(str(e), 409) except ForwardingLoop as e: return jsonify(status='ok', message=str(e)), 202 except AlertaException as e: raise ApiError(e.message, code=e.code, errors=e.errors) except Exception as e: raise ApiError(str(e), 500) write_audit_trail.send(current_app._get_current_object(), event='alert-actioned', message=text, user=g.login, customers=g.customers, scopes=g.scopes, resource_id=alert.id, type='alert', request=request) if alert: return jsonify(status='ok') else: raise ApiError('failed to action alert', 500)
def incoming(self, path, query_string, payload): incident = payload['incident'] state = incident['state'] # 'documentation' is an optional field that you can use to customize # your alert sending a json if 'documentation' in incident: try: content = json.loads(incident['documentation']['content']) incident.update(content) except Exception: current_app.logger.warning(f"Invalid documentation content: '{incident['documentation']}'") status = None severity = incident.get('severity', 'critical') if state == 'open': status = None elif state == 'acknowledged': status = 'ack' elif state == 'closed': severity = 'ok' else: severity = 'indeterminate' service = [] if incident['policy_name']: service.append(incident['policy_name']) return Alert( resource=incident['resource_name'], event=incident['condition_name'], environment=incident.get('environment', current_app.config['DEFAULT_ENVIRONMENT']), severity=severity, status=status, service=service, group=incident.get('group', 'Cloud'), text=incident['summary'], attributes={ 'incidentId': incident['incident_id'], 'resourceId': incident['resource_id'], 'moreInfo': f"<a href=\"{incident['url']}\" target=\"_blank\">Stackdriver Console</a>", 'startedAt': incident['started_at'], 'endedAt': incident['ended_at'] }, customer=incident.get('customer'), origin=incident.get('origin', 'Stackdriver'), event_type='stackdriverAlert', raw_data=payload )
def set_customer(alert_id): want_customer = request.json.get('customer', None) query = qb.from_params(MultiDict([]), customers=g.customers) query_data = [ c for c in Customer.find_all(query) if Scope.admin in g.scopes or Scope.admin_customers in g.scopes or c.customer in g.customers ] if query_data: list_customers = [q.serialize for q in query_data] else: raise ApiError('not found any customer ', 404) #LOG.info(list_customers) found = False for c in list_customers: if want_customer == c['customer']: found = True if found == False: raise ApiError('not found customer ', 404) alert = Alert.find_by_id(alert_id, g.get('customers', None)) if not alert: raise ApiError('not found alert', 404) try: alert = alert.from_customer(want_customer) except RejectException as e: write_audit_trail.send(current_app._get_current_object(), event='alert-customer-change-rejected', message=alert.text, user=g.login, customers=g.customers, scopes=g.scopes, resource_id=alert.id, type='alert', request=request) raise ApiError(str(e), 400) except Exception as e: raise ApiError(str(e), 500) write_audit_trail.send(current_app._get_current_object(), event='alert-customer-changed', message='change customer to' + want_customer, user=g.login, customers=g.customers, scopes=g.scopes, resource_id=alert.id, type='alert', request=request) if alert: return jsonify(status='ok') else: raise ApiError('failed to set customer', 500)
def test_invalid(self): with self.app.test_request_context('/'): self.app.preprocess_request() with self.assertRaises(Exception) as e: process_alert( Alert(resource='foo', event='bar', environment='Development', service=['Svc'], severity='baz')) exc = e.exception self.assertEqual(str(exc), '\'baz\' is not a valid severity')
def process_status(alert: Alert, status: str, text: str) -> Tuple[Alert, str, str]: wanted_plugins, wanted_config = plugins.routing(alert) updated = None for plugin in wanted_plugins: if alert.is_suppressed: break try: updated = plugin.status_change(alert, status, text, config=wanted_config) except TypeError: updated = plugin.status_change(alert, status, text) # for backward compatibility except (RejectException, AlertaException): raise except Exception as e: if current_app.config['PLUGINS_RAISE_ON_ERROR']: raise ApiError( f"Error while running status plugin '{plugin.name}': {str(e)}" ) else: logging.error( f"Error while running status plugin '{plugin.name}': {str(e)}" ) if updated: try: alert, status, text = updated except Exception: alert = updated if updated: alert.update_tags(alert.tags) alert.attributes = alert.update_attributes(alert.attributes) return alert, status, text
def get_environments(): query = qb.from_params(request.args) environments = Alert.get_environments(query) if environments: return jsonify(status="ok", environments=environments, total=len(environments)) else: return jsonify(status="ok", message="not found", environments=[], total=0)
def get_environments(): query = qb.from_params(request.args, customers=g.customers) environments = Alert.get_environments(query) if environments: return jsonify(status='ok', environments=environments, total=len(environments)) else: return jsonify(status='ok', message='not found', environments=[], total=0)
def parse_riemann(alert): return Alert(resource='%s-%s' % (alert['host'], alert['service']), event=alert.get('event', alert['service']), environment=alert.get('environment', 'Production'), severity=alert.get('state', 'unknown'), service=[alert['service']], group=alert.get('group', 'Performance'), text=alert.get('description', None), value=alert.get('metric', None), tags=alert.get('tags', None), origin='Riemann', raw_data=alert)
def parse_stackdriver(notification: JSON) -> Alert: incident = notification['incident'] state = incident['state'] # 'documentation' is an optional field that you can use to customize # your alert sending a json if 'documentation' in incident: try: content = json.loads(incident['documentation']['content']) incident.update(content) except Exception as e: LOG.warning("Invalid documentation content: '{}'".format(incident['documentation'])) if state == 'open': severity = 'critical' status = None create_time = datetime.utcfromtimestamp(incident['started_at']) elif state == 'acknowledged': severity = 'critical' status = 'ack' create_time = None # type: ignore elif state == 'closed': severity = 'ok' status = None create_time = datetime.utcfromtimestamp(incident['ended_at']) else: severity = 'indeterminate' status = None create_time = None # type: ignore return Alert( resource=incident['resource_name'], event=incident['condition_name'], environment='Production', severity=severity, status=status, service=[incident['policy_name']], group='Cloud', text=incident['summary'], attributes={ 'incidentId': incident['incident_id'], 'resourceId': incident['resource_id'], 'moreInfo': '<a href="%s" target="_blank">Stackdriver Console</a>' % incident['url'] }, customer=incident.get('customer'), origin='Stackdriver', event_type='stackdriverAlert', create_time=create_time, raw_data=notification )
def add_note(alert_id): note_text = request.json.get('text') or request.json.get('note') if not note_text: raise ApiError("must supply 'note' text", 400) customers = g.get('customers', None) alert = Alert.find_by_id(alert_id, customers) if not alert: raise ApiError('not found', 404) try: alert, note_text = process_note(alert, note_text) note = alert.add_note(note_text) except RejectException as e: write_audit_trail.send(current_app._get_current_object(), event='alert-note-rejected', message='', user=g.login, customers=g.customers, scopes=g.scopes, resource_id=note.id, type='note', request=request) raise ApiError(str(e), 400) except ForwardingLoop as e: return jsonify(status='ok', message=str(e)), 202 except AlertaException as e: raise ApiError(e.message, code=e.code, errors=e.errors) except Exception as e: raise ApiError(str(e), 500) write_audit_trail.send(current_app._get_current_object(), event='alert-note-added', message='', user=g.login, customers=g.customers, scopes=g.scopes, resource_id=note.id, type='note', request=request) if note: return jsonify(status='ok', id=note.id, note=note.serialize), 201, { 'Location': absolute_url('/alert/{}/note/{}'.format(alert.id, note.id)) } else: raise ApiError('failed to add note for alert', 500)
def update_attributes(alert_id): if not request.json.get('attributes', None): raise ApiError("must supply 'attributes' as json data", 400) customer = g.get('customer', None) alert = Alert.find_by_id(alert_id, customer) if not alert: raise ApiError("not found", 404) if alert.update_attributes(request.json['attributes']): return jsonify(status="ok") else: raise ApiError("failed to update attributes", 500)
def untag_alert(alert_id): if not request.json.get('tags', None): raise ApiError("must supply 'tags' as json list") customer = g.get('customer', None) alert = Alert.find_by_id(alert_id, customer) if not alert: raise ApiError("not found", 404) if alert.untag(tags=request.json['tags']): return jsonify(status="ok") else: raise ApiError("failed to untag alert", 500)
def tag_alert(alert_id): if not request.json.get('tags', None): raise ApiError("must supply 'tags' as json list") customers = g.get('customers', None) alert = Alert.find_by_id(alert_id, customers) if not alert: raise ApiError('not found', 404) if alert.tag(tags=request.json['tags']): return jsonify(status='ok') else: raise ApiError('failed to tag alert', 500)
def grafana(): alerts = [] data = request.json if data and data['state'] == 'alerting': for match in data.get('evalMatches', []): try: incomingAlert = parse_grafana(data, match) except ValueError as e: return jsonify(status="error", message=str(e)), 400 if g.get('customer', None): incomingAlert.customer = g.get('customer') add_remote_ip(request, incomingAlert) try: alert = process_alert(incomingAlert) except RejectException as e: return jsonify(status="error", message=str(e)), 403 except Exception as e: return jsonify(status="error", message=str(e)), 500 alerts.append(alert) elif data and data['state'] == 'ok' and data.get('ruleId', None): try: query = qb.from_dict({'attributes.ruleId': str(data['ruleId'])}) existingAlerts = Alert.find_all(query) except Exception as e: raise ApiError(str(e), 500) for updateAlert in existingAlerts: updateAlert.severity = 'normal' updateAlert.status = 'closed' try: alert = process_alert(updateAlert) except RejectException as e: raise ApiError(str(e), 403) except Exception as e: raise ApiError(str(e), 500) alerts.append(alert) else: raise ApiError("no alerts in Grafana notification payload", 400) if len(alerts) == 1: return jsonify(status="ok", id=alerts[0].id, alert=alerts[0].serialize), 201 else: return jsonify(status="ok", ids=[alert.id for alert in alerts]), 201
def incoming(self, path, query_string, payload): if payload['criticality'] == 'ALERT_CRITICALITY_LEVEL_WARNING': severity = 'critical' else: severity = 'normal' return Alert(id=payload['alertId'], resource=payload['resourceName'], event=payload['subType'], environment='Production', service=[payload['resourceKind']], severity=severity, group=payload['resourceKind'], type=payload['type'], text=payload['info'])
def incoming(self, query_string, payload): return Alert(resource='{}-{}'.format(payload['host'], payload['service']), event=payload.get('event', payload['service']), environment=payload.get('environment', 'Production'), severity=payload.get('state', 'unknown'), service=[payload['service']], group=payload.get('group', 'Performance'), text=payload.get('description', None), value=payload.get('metric', None), tags=payload.get('tags', None), origin='Riemann', raw_data=payload)
def parse_graylog(alert): return Alert( resource=alert['stream']['title'], event="Alert", environment='Development', service=["test"], severity="critical", value="n/a", text=alert['check_result']['result_description'], attributes={'checkId': alert['check_result']['triggered_condition']['id']}, origin='Graylog', event_type='performanceAlert', raw_data=alert)
def delete_alert(alert_id): customers = g.get('customers', None) alert = Alert.find_by_id(alert_id, customers) if not alert: raise ApiError('not found', 404) write_audit_trail.send(current_app._get_current_object(), event='alert-deleted', message='', user=g.user, customers=g.customers, scopes=g.scopes, resource_id=alert.id, type='alert', request=request) if alert.delete(): return jsonify(status='ok') else: raise ApiError('failed to delete alert', 500)
def parse_grafana(alert, match, args): alerting_severity = args.get('severity', 'major') if alerting_severity not in Severity.SEVERITY_MAP: raise ValueError('Invalid severity parameter, expected one of %s' % ', '.join(sorted(Severity.SEVERITY_MAP))) if alert['state'] == 'alerting': severity = alerting_severity elif alert['state'] == 'ok': severity = 'normal' else: severity = 'indeterminate' environment = args.get('environment', 'Production') # TODO: verify at create? event_type = args.get('event_type', 'performanceAlert') group = args.get('group', 'Performance') origin = args.get('origin', 'Grafana') service = args.get('service', 'Grafana') timeout = args.get('timeout', current_app.config['ALERT_TIMEOUT']) attributes = match.get('tags', None) or dict() attributes = {k.replace('.', '_'): v for (k, v) in attributes.items()} attributes['ruleId'] = str(alert['ruleId']) if 'ruleUrl' in alert: attributes[ 'ruleUrl'] = '<a href="%s" target="_blank">Rule</a>' % alert[ 'ruleUrl'] if 'imageUrl' in alert: attributes[ 'imageUrl'] = '<a href="%s" target="_blank">Image</a>' % alert[ 'imageUrl'] return Alert(resource=match['metric'], event=alert['ruleName'], environment=environment, severity=severity, service=[service], group=group, value='%s' % match['value'], text=alert.get('message', None) or alert.get('title', alert['state']), tags=list(), attributes=attributes, origin=origin, event_type=event_type, timeout=timeout, raw_data=json.dumps(alert))
def incoming(self, query_string, payload): if 'environment' not in payload: raise ValueError('Environment must be set.') status = payload['state'].lower() if status == '0': severity = payload['severity'].lower() elif status == '1': severity = payload['severity'].lower() status = 'ack' elif status == '4': severity = 'ok' elif status == '5': severity = 'cleared' elif payload['severity'].lower() == 'info': severity = 'informational' status = 'open' else: severity = payload['severity'].lower() status = 'open' attributes = dict() if 'incident_url' in payload: attributes[ 'event_url'] = '<a href="%s" target="_blank">Event URL</a>' % payload[ 'event_url'] if 'runbook_url' in payload: attributes[ 'runBook'] = '<a href="%s" target="_blank">Runbook URL</a>' % payload[ 'runbook_url'] return Alert( resource=payload['resource'], event=payload['event'], environment='Production', severity=severity, status=status, service=[payload['service']], group=payload['group'], text=payload['test'], tags=[ '{}:{}'.format(key, value) for (key, value) in payload['targets'][0]['labels'].items() ], attributes=attributes, origin=payload['origin'], event_type=payload['event_type'].lower(), raw_data=payload)
def get_notes(alert_id): customers = g.get('customers', None) alert = Alert.find_by_id(alert_id, customers) if not alert: raise ApiError('not found', 404) notes = alert.get_alert_notes() if notes: return jsonify(status='ok', notes=[note.serialize for note in notes], total=len(notes)) else: return jsonify(status='ok', message='not found', notes=[], total=0)
def incoming(self, path, query_string, payload): return Alert( resource=f"{payload['host']}-{payload['service']}", event=payload.get('event', payload['service']), environment=payload.get('environment', current_app.config['DEFAULT_ENVIRONMENT']), severity=payload.get('state', 'unknown'), service=[payload['service']], group=payload.get('group', 'Performance'), text=payload.get('description', None), value=payload.get('metric', None), tags=payload.get('tags', None), origin='Riemann', raw_data=payload )
def slack(): alert_id, user, action = parse_slack(request.form) alert = Alert.find_by_id(alert_id) if not alert: jsonify(status="error", message="alert not found for #slack message") if action in ['open', 'ack', 'close']: alert.set_status(status=action, text="status change via #slack by {}".format(user)) elif action in ['watch', 'unwatch']: alert.untag(alert.id, ["{}:{}".format(action, user)]) else: raise ApiError('Unsupported #slack action', 400) response = build_slack_response(alert, action, user, request.form) return jsonify(**response), 201
def send_message_reply(alert: Alert, action: str, user: str, data: JSON) -> None: try: import telepot # type: ignore except ImportError as e: current_app.logger.warning("You have configured Telegram but 'telepot' client is not installed", exc_info=True) return try: bot_id = os.environ.get('TELEGRAM_TOKEN') or current_app.config.get('TELEGRAM_TOKEN') dashboard_url = os.environ.get('DASHBOARD_URL') or current_app.config.get('DASHBOARD_URL') chat_id = os.environ.get('TELEGRAM_CHAT_ID') or current_app.config.get('TELEGRAM_CHAT_ID') bot = telepot.Bot(bot_id) # message info message_id = data['callback_query']['message']['message_id'] message_log = '\n'.join(data['callback_query']['message']['text'].split('\n')[1:]) # process buttons for reply text inline_keyboard, reply = [], 'The status of alert {alert} is *{status}* now!' # type: List[List[JSON]], str actions = ['watch', 'unwatch'] if action in actions: reply = 'User `{user}` is _{status}ing_ alert {alert}' next_action = actions[(actions.index(action) + 1) % len(actions)] inline_keyboard = [ [ {'text': next_action.capitalize(), 'callback_data': '/{} {}'.format(next_action, alert.id)}, {'text': 'Ack', 'callback_data': '{} {}'.format('/ack', alert.id)}, {'text': 'Close', 'callback_data': '{} {}'.format('/close', alert.id)} ] ] # format message response alert_short_id = alert.get_id(short=True) alert_url = '{}/#/alert/{}'.format(dashboard_url, alert.id) reply = reply.format(alert=alert_short_id, status=action, user=user) message = '{alert} *{level} - {event} on {resouce}*\n{log}\n{reply}'.format( alert='[{}]({})'.format(alert_short_id, alert_url), level=alert.severity.capitalize(), event=alert.event, resouce=alert.resource, log=message_log, reply=reply) # send message bot.editMessageText( msg_identifier=(chat_id, message_id), text=message, parse_mode='Markdown', reply_markup={'inline_keyboard': inline_keyboard} ) except Exception as e: current_app.logger.warning('Error sending reply message', exc_info=True)
def get_services(): query = qb.from_params(request.args) services = Alert.get_services(query) if services: return jsonify( status="ok", services=services, total=len(services) ) else: return jsonify( status="ok", message="not found", services=[], total=0 )
def get_tags(): query = qb.from_params(request.args) tags = Alert.get_tags(query) if tags: return jsonify( status="ok", tags=tags, total=len(tags) ) else: return jsonify( status="ok", message="not found", tags=[], total=0 )
def get_environments(): query = qb.from_params(request.args) environments = Alert.get_environments(query) if environments: return jsonify( status="ok", environments=environments, total=len(environments) ) else: return jsonify( status="ok", message="not found", environments=[], total=0 )
def get_top10_flapping(): query = qb.from_params(request.args) top10 = Alert.get_top10_flapping(query) if top10: return jsonify( status="ok", top10=top10, total=len(top10) ) else: return jsonify( status="ok", message="not found", top10=[], total=0 )