def parse_notification(notification): notification = json.loads(notification) if notification['Type'] == 'SubscriptionConfirmation': return Alert( resource=notification['TopicArn'], event=notification['Type'], environment='Production', severity='informational', service=['Unknown'], group='AWS/CloudWatch', text='%s <a href="%s" target="_blank">SubscribeURL</a>' % (notification['Message'], notification['SubscribeURL']), origin=notification['TopicArn'], event_type='cloudwatchAlarm', create_time=datetime.datetime.strptime(notification['Timestamp'], '%Y-%m-%dT%H:%M:%S.%fZ'), raw_data=notification, ) elif notification['Type'] == 'Notification': alarm = json.loads(notification['Message']) if 'Trigger' not in alarm: raise ValueError("SNS message is not a Cloudwatch notification") return Alert( resource='%s:%s' % (alarm['Trigger']['Dimensions'][0]['name'], alarm['Trigger']['Dimensions'][0]['value']), event=alarm['AlarmName'], environment='Production', severity=cw_state_to_severity(alarm['NewStateValue']), service=[alarm['AWSAccountId']], group=alarm['Trigger']['Namespace'], value=alarm['NewStateValue'], text=alarm['AlarmDescription'], tags=[alarm['Region']], attributes={ 'incidentKey': alarm['AlarmName'], 'thresholdInfo': alarm['Trigger'] }, origin=notification['TopicArn'], event_type='cloudwatchAlarm', create_time=datetime.datetime.strptime(notification['Timestamp'], '%Y-%m-%dT%H:%M:%S.%fZ'), raw_data=alarm)
def parse_pingdom(check): check = json.loads(check) if check['action'] == 'assign': return Alert( resource=check['host'], event=check['description'], correlate=['up', 'down'], environment='Production', severity='critical', service=[check['checkname']], group='Network', text='%s is %s.' % (check['checkname'], check['description']), attributes={'incidentKey': check['incidentid']}, origin='Pingdom', event_type='availabilityAlert', raw_data=check, ) elif check['action'] == 'notify_of_close': return Alert( resource=check['host'], event=check['description'], correlate=['up', 'down'], environment='Production', severity='normal', service=[check['checkname']], group='Network', text='%s is %s.' % (check['checkname'], check['description']), attributes={'incidentKey': check['incidentid']}, origin='Pingdom', event_type='availabilityAlert', raw_data=check, ) else: return Alert( resource=check['host'], event=check['description'], correlate=['up', 'down', check['description']], environment='Production', severity='indeterminate', service=[check['checkname']], group='Network', text='%s is %s.' % (check['checkname'], check['description']), attributes={'incidentKey': check['incidentid']}, origin='Pingdom', event_type='availabilityAlert', raw_data=check, )
def parse_grafana(alert, match): if alert['state'] == 'alerting': severity = 'major' elif alert['state'] == 'ok': severity = 'normal' else: severity = 'indeterminate' attributes = { 'ruleId': alert['ruleId'] } if 'ruleUrl' in alert: attributes['ruleUrl'] = '<a href="%s" target="_blank">Rule</a>' % alert['ruleUrl'] if 'imageUrl' in alert: attributes['imageUrl'] = '<a href="%s" target="_blank">Image</a>' % alert['imageUrl'] return Alert( resource=match['metric'], event=alert['ruleName'], environment='Production', severity=severity, service=['Grafana'], group='Performance', value='%s' % match['value'], text=alert.get('message', None) or alert.get('title', alert['state']), tags=match.get('tags', []), attributes=attributes, origin='Grafana', event_type='performanceAlert', timeout=300, raw_data=alert )
def parse_newrelic(alert): if 'version' not in alert: raise ValueError("New Relic Legacy Alerting is not supported") status = alert['current_state'].lower() if status == 'open': severity = alert['severity'].lower() elif status == 'acknowledged': severity = alert['severity'].lower() status = 'ack' elif status == 'closed': severity = 'ok' else: severity = alert['severity'].lower() return Alert( resource=alert['targets'][0]['name'], event=alert['condition_name'], environment='Production', severity=severity, status=status, service=[alert['account_name']], group=alert['targets'][0]['type'], text=alert['details'], tags=['%s:%s' % (key, value) for (key, value) in alert['targets'][0]['labels'].items()], attributes={ 'moreInfo': '<a href="%s" target="_blank">Incident URL</a>' % alert['incident_url'], 'runBook': '<a href="%s" target="_blank">Runbook URL</a>' % alert['runbook_url'] }, origin='New Relic/v%s' % alert['version'], event_type=alert['event_type'].lower(), raw_data=alert )
def parse_serverdensity(alert): if alert['fixed']: severity = 'ok' else: severity = 'critical' return Alert( resource=alert['item_name'], event=alert['alert_type'], environment='Production', severity=severity, service=[alert['item_type']], group=alert['alert_section'], value=alert['configured_trigger_value'], text='Alert created for %s:%s' % (alert['item_type'], alert['item_name']), tags=['cloud'] if alert['item_cloud'] else [], attributes={ 'alertId': alert['alert_id'], 'itemId': alert['item_id'] }, origin='ServerDensity', event_type='serverDensityAlert', raw_data=alert )
def parse_pingdom(check): if check['importance_level'] == 'HIGH': severity = 'critical' else: severity = 'warning' if check['current_state'] == 'UP': severity = 'normal' return Alert( resource=check['check_name'], event=check['current_state'], correlate=['UP', 'DOWN'], environment='Production', severity=severity, service=[check['check_type']], group='Network', value=check['description'], text='%s: %s' % (check['importance_level'], check['long_description']), tags=check['tags'], attributes={'checkId': check['check_id']}, origin='Pingdom', event_type='availabilityAlert', raw_data=check )
def parse_prometheus(alert): status = alert.get('status', 'firing') labels = copy(alert['labels']) annotations = copy(alert['annotations']) starts_at = parse_date(alert['startsAt']) if alert['endsAt'] == '0001-01-01T00:00:00Z': ends_at = None else: ends_at = parse_date(alert['endsAt']) if status == 'firing': severity = labels.pop('severity', 'warning') create_time = starts_at elif status == 'resolved': severity = 'normal' create_time = ends_at else: severity = 'unknown' create_time = ends_at or starts_at summary = annotations.pop('summary', None) description = annotations.pop('description', None) text = description or summary or '%s: %s on %s' % ( labels['job'], labels['alertname'], labels['instance']) try: timeout = int(labels.pop('timeout', 0)) or None except ValueError: timeout = None if 'generatorURL' in alert: annotations[ 'moreInfo'] = '<a href="%s" target="_blank">Prometheus Graph</a>' % alert[ 'generatorURL'] return Alert( resource=labels.pop('exported_instance', None) or labels.pop('instance'), event=labels.pop('alertname'), environment=labels.pop('environment', 'Production'), severity=severity, correlate=labels.pop('correlate').split(',') if 'correlate' in labels else None, service=labels.pop('service', '').split(','), group=labels.pop('group', None), value=labels.pop('value', None), text=text, attributes=annotations, origin='prometheus/' + labels.get('job', '-'), event_type='prometheusAlert', create_time=create_time.astimezone(tz=pytz.UTC).replace(tzinfo=None), timeout=timeout, raw_data=alert, customer=labels.pop('customer', None), tags=["%s=%s" % t for t in labels.items()] # any labels left are used for tags )
def parse_riemann(alert): return Alert(resource='%s-%s' % (alert['host'], alert['service']), event=alert['service'], environment=alert.get('environment', 'Production'), severity=alert.get('state', 'unknown'), service=[alert['service']], group=alert.get('group', 'Performance'), text=alert.get('description', None), value=alert.get('metric', None), tags=alert.get('tags', None), origin='Riemann', raw_data=alert)
def receive_alert(): if not Switch.get('sender-api-allow').is_on(): return jsonify( status="error", message="API not accepting alerts. Try again later."), 503 recv_started = receive_timer.start_timer() try: incomingAlert = Alert.parse_alert(request.data) except ValueError as e: receive_timer.stop_timer(recv_started) return jsonify(status="error", message=str(e)), 400 if g.get('customer', None): incomingAlert.customer = g.get('customer') add_remote_ip(request, incomingAlert) try: alert = process_alert(incomingAlert) except RejectException as e: receive_timer.stop_timer(recv_started) return jsonify(status="error", message=str(e)), 403 except RateLimit as e: receive_timer.stop_timer(recv_started) return jsonify(status="error", id=incomingAlert.id, message=str(e)), 429 except BlackoutPeriod as e: receive_timer.stop_timer(recv_started) return jsonify(status="ok", id=incomingAlert.id, message=str(e)), 202 except Exception as e: receive_timer.stop_timer(recv_started) return jsonify(status="error", message=str(e)), 500 receive_timer.stop_timer(recv_started) if alert: body = alert.get_body() body['href'] = absolute_url('/alert/' + alert.id) return jsonify(status="ok", id=alert.id, alert=body), 201, { 'Location': body['href'] } else: return jsonify( status="error", message="insert or update of received alert failed"), 500
def receive_alert(): if not Switch.get('sender-api-allow').is_on(): return jsonify( status="error", message="API not accepting alerts. Try again later."), 503 recv_started = receive_timer.start_timer() try: incomingAlert = Alert.parse_alert(request.data) except ValueError as e: receive_timer.stop_timer(recv_started) return jsonify(status="error", message=str(e)), 400 if g.get('customer', None): incomingAlert.customer = g.get('customer') if request.headers.getlist("X-Forwarded-For"): incomingAlert.attributes.update( ip=request.headers.getlist("X-Forwarded-For")[0]) else: incomingAlert.attributes.update(ip=request.remote_addr) try: alert = process_alert(incomingAlert) except RejectException as e: receive_timer.stop_timer(recv_started) return jsonify(status="error", message=str(e)), 403 except RuntimeWarning as e: receive_timer.stop_timer(recv_started) return jsonify(status="ok", id=incomingAlert.id, message=str(e)), 202 except Exception as e: receive_timer.stop_timer(recv_started) return jsonify(status="error", message=str(e)), 500 receive_timer.stop_timer(recv_started) if alert: body = alert.get_body() body['href'] = absolute_url('/alert/' + alert.id) return jsonify(status="ok", id=alert.id, alert=body), 201, { 'Location': body['href'] } else: return jsonify( status="error", message="insert or update of received alert failed"), 500
def parse_stackdriver(notification): incident = notification['incident'] state = incident['state'] if state == 'open': severity = 'critical' status = None create_time = datetime.datetime.fromtimestamp(incident['started_at']) elif state == 'acknowledged': severity = 'critical' status = 'ack' create_time = None elif state == 'closed': severity = 'ok' status = None create_time = datetime.datetime.fromtimestamp(incident['ended_at']) else: severity = 'indeterminate' status = None create_time = None return state, Alert( resource=incident['resource_name'], event=incident['condition_name'], environment='Production', severity=severity, status=status, service=[incident['policy_name']], group='Cloud', text=incident['summary'], attributes={ 'incidentId': incident['incident_id'], 'resourceId': incident['resource_id'], 'moreInfo': '<a href="%s" target="_blank">Stackdriver Console</a>' % incident['url'] }, origin='Stackdriver', event_type='stackdriverAlert', create_time=create_time, raw_data=notification)
def receive_alert(): if not Switch.get('sender-api-allow').is_on(): return jsonify(status="error", message="API not accepting alerts. Try again later."), 503 recv_started = receive_timer.start_timer() try: incomingAlert = Alert.parse_alert(request.data) except ValueError as e: receive_timer.stop_timer(recv_started) return jsonify(status="error", message=str(e)), 400 if g.get('customer', None): incomingAlert.customer = g.get('customer') add_remote_ip(request, incomingAlert) try: alert = process_alert(incomingAlert) except RejectException as e: receive_timer.stop_timer(recv_started) return jsonify(status="error", message=str(e)), 403 except RateLimit as e: receive_timer.stop_timer(recv_started) return jsonify(status="error", id=incomingAlert.id, message=str(e)), 429 except BlackoutPeriod as e: receive_timer.stop_timer(recv_started) return jsonify(status="ok", id=incomingAlert.id, message=str(e)), 202 except Exception as e: receive_timer.stop_timer(recv_started) return jsonify(status="error", message=str(e)), 500 receive_timer.stop_timer(recv_started) if alert: body = alert.get_body() body['href'] = absolute_url('/alert/' + alert.id) return jsonify(status="ok", id=alert.id, alert=body), 201, {'Location': body['href']} else: return jsonify(status="error", message="insert or update of received alert failed"), 500