def determine_threshold_trigger(alert_params, events): """Given a threshold alert's params, and a set of events determine if it should fire and if so, resolve it's summary, event snippets, etc. """ # mostCommon the events by the dotted aggregation key counts = mostCommon(events, alert_params["aggregation_key"]) # determine if these events trigger an alert # according to the parameters logger.debug(counts) for i in counts: if i[1] >= alert_params["threshold"]: alert = alert_params alert["triggered"] = True # set the summary via chevron/mustache template # with the alert plus metadata metadata = {"metadata": {"value": i[0], "count": i[1]}} alert = merge(alert, metadata) # limit events to those matching the aggregation_key value # so the alert only gets events that match the count mostCommon results alert["events"] = [] for event in events: dotted_event = DotDict(event) if i[0] == dotted_event.get(alert_params["aggregation_key"]): alert["events"].append(dotted_event) alert["summary"] = chevron.render(alert["summary"], alert) # walk the alert events for any requested event snippets for event in alert["events"][:alert_params["event_sample_count"]]: alert["summary"] += " " + chevron.render( alert_params["event_snippet"], event) yield alert
def onMessage(self, message, metadata): # our target shell event_shell = { "utctimestamp": utcnow().isoformat(), "severity": "INFO", "summary": "UNKNOWN", "category": "UNKNOWN", "source": "UNKNOWN", "tags": [], "plugins": [], "details": {}, } # maybe the shell elements are already there? event_set = set(enum_keys(event_shell)) message_set = set(enum_keys(message)) if not event_set.issubset(message_set): # we have work to do # merge the dicts letting any message values win # if the message lacks any keys, our shell values win message = merge(event_shell, message) # move any non shell keys to 'details' for item in message_set: # enum_keys traverses sub dicts, we only move the top level # so check if the key is note a core element # present in the top level and move it to details if item not in event_shell and item in message: message["details"][item] = message.get(item) del message[item] return (message, metadata)
def test_merge(self): dict1 = {"some_key": "some value"} dict2 = {"some_other_key": "some other value"} dict3 = merge(dict1, dict2) assert dict3 == { "some_key": "some value", "some_other_key": "some other value" }
def determine_deadman_trigger(alert_params, events): """Given a deadman alert's params and a set of events (or lack thereof) determine if it should fire and resolve summary/snippets, etc Largely the same as a threshold alert, except this accounts for a lack of events (altogether missing, or below a count) as the trigger """ counts = mostCommon(events, alert_params["aggregation_key"]) if not events: # deadman alerts are built to notice # when expected events are missing # but it means we have no events to pass on # make a meta event for the fact that events are missing events = [] meta_event = { "utctimestamp": utcnow().isoformat(), "severity": "INFO", "summary": "Expected event not found", "category": "deadman", "source": "deadman", "tags": ["deadman"], "plugins": [], "details": {}, } events.append(meta_event) if not counts: # make up a metadata count counts = [(alert_params["aggregation_key"], 0)] for i in counts: # lack of events, or event count below the threshold is a trigger if i[1] <= alert_params["threshold"]: alert = alert_params alert["triggered"] = True # set the summary via chevron/mustache template # with the alert plus metadata metadata = {"metadata": {"value": i[0], "count": i[1]}} alert = merge(alert, metadata) # limit events to those matching the aggregation_key value # so the alert only gets events that match the count mostCommon results alert["events"] = [] for event in events: dotted_event = DotDict(event) if i[0] == dotted_event.get(alert_params["aggregation_key"]): alert["events"].append(dotted_event) alert["summary"] = chevron.render(alert["summary"], alert) # walk the alert events for any requested event snippets for event in alert["events"][:alert_params["event_sample_count"]]: alert["summary"] += " " + chevron.render( alert_params["event_snippet"], event) yield alert
def process_sequence_alert(config, db, session, athena, alert_params): # For this sequence alert # for the first unfilled slot, search for matching events # load default params that may be missing in the alert config alert_params = merge(alert_params, get_sequence_alert_shell(alert_params)) # we change the value of the slot # so lets iterate on index instead of just "for slot in slots" # find the first slot without matching events index, slot = first_matching_index_value( alert_params["slots"], condition=lambda i: not "triggered" in i) if slot: events = None # Search for slot criteria try: # resolve the criteria in case it's a chevron templated string criteria = chevron.render(slot["criteria"], alert_params) events = get_athena_events(criteria, config, athena, session) except Exception as e: logger.exception("Received exception while querying athena: %r" % e) # if slot is a threshold, are events matching criteria found? if slot["alert_type"] == "threshold" and events: # check to see if event(s) are already captured in an inflight alert # TODO: need to check events and their slot? or just events events = remove_inflight_events(db, events, alert_params) events = remove_previously_alerted(db, events, alert_params) if events: # do these events trigger the threshold alert in this slot? for alert in determine_threshold_trigger(slot, events): # threshold met, save or create an inflight alert inflight = deepcopy(alert_params) inflight["slots"][index] = alert save_inflight_alert(db, inflight) # if slot is a deadman, are we lacking enough events? if slot["alert_type"] == "deadman": # does the count or lack of events trigger the deadman alert in this slot? for alert in determine_deadman_trigger(slot, events): # criteria met, save or create an inflight alert inflight = deepcopy(alert_params) inflight["slots"][index] = alert save_inflight_alert(db, inflight) return
def get_sequence_alert_shell(alert_params): """ default dict for a sequence alert """ alert = { "alert_name": alert_params.get("alert_name", "unnamed"), "alert_type": alert_params.get("alert_type", "sequence"), "utctimestamp": alert_params.get("utctimestamp", utcnow().isoformat()), "lifespan": alert_params.get("lifespan", "3 days"), "severity": alert_params.get("severity", "INFO"), "summary": alert_params.get("summary", "sequence alert!"), "debug": alert_params.get("debug", True), "slots": alert_params.get("slots", []), } # calculate expiration in date format offset = pd.Timedelta(alert["lifespan"]).to_pytimedelta() alert["expiration"] = alert_params.get( "expiration", (toUTC(alert["utctimestamp"]) + offset).isoformat()) return merge(alert_params, alert)
def get_deadman_alert_shell(alert_params): """ default dict for a deadman alert """ alert = { "alert_name": alert_params.get("alert_name", "unnamed"), "alert_type": alert_params.get("alert_type", "deadman"), "utctimestamp": utcnow().isoformat(), "severity": alert_params.get("severity", "INFO"), "summary": alert_params.get("summary", "deadman alert!"), "event_snippet": alert_params.get("event_snippet", ""), "event_sample_count": alert_params.get("event_sample_count", 0), "category": alert_params.get("category", "deadman"), "tags": alert_params.get("tags", ["deadman"]), "threshold": alert_params.get("threshold", 0), "aggregation_key": alert_params.get("aggregation_key", "none"), "criteria": alert_params.get("criteria", ""), "debug": alert_params.get("debug", True), "events": [], } return merge(alert_params, alert)