def add_aggregated_alert(self, match, rule): """ Save a match as a pending aggregate alert to elasticsearch. """ if not rule['current_aggregate_id'] or rule['aggregate_alert_time'] < ts_to_dt(match[rule['timestamp_field']]): # First match, set alert_time match_time = ts_to_dt(match[rule['timestamp_field']]) alert_time = match_time + rule['aggregation'] rule['aggregate_alert_time'] = alert_time agg_id = None else: # Already pending aggregation, use existing alert_time alert_time = rule['aggregate_alert_time'] agg_id = rule['current_aggregate_id'] logging.info('Adding alert for %s to aggregation, next alert at %s' % (rule['name'], alert_time)) alert_body = self.get_alert_body(match, rule, False, alert_time) if agg_id: alert_body['aggregate_id'] = agg_id res = self.writeback('elastalert', alert_body) # If new aggregation, save _id if res and not agg_id: rule['current_aggregate_id'] = res['_id'] # Couldn't write the match to ES, save it in memory for now if not res: rule['agg_matches'].append(match) return res
def is_silenced(self, rule_name): """ Checks if rule_name is currently silenced. Returns false on exception. """ if rule_name in self.silence_cache: if ts_now() < self.silence_cache[rule_name][0]: return True else: return False query = {'filter': {'term': {'rule_name': rule_name}}, 'sort': {'until': {'order': 'desc'}}} if self.writeback_es: try: res = self.writeback_es.search(index=self.writeback_index, doc_type='silence', size=1, body=query, _source_include=['until', 'exponent']) except ElasticsearchException as e: self.handle_error("Error while querying for alert silence status: %s" % (e), {'rule': rule_name}) return False if res['hits']['hits']: until_ts = res['hits']['hits'][0]['_source']['until'] exponent = res['hits']['hits'][0]['_source'].get('exponent', 0) self.silence_cache[rule_name] = (ts_to_dt(until_ts), exponent) if ts_now() < ts_to_dt(until_ts): return True return False
def start(self): """ Periodically go through each rule and run it """ starttime = self.args.start if starttime: try: starttime = ts_to_dt(starttime) except (TypeError, ValueError): self.handle_error("%s is not a valid ISO 8601 timestamp (YYYY-MM-DDTHH:MM:SS+XX:00)" % (starttime)) exit(1) while True: # If writeback_es errored, it's disabled until the next query cycle if not self.writeback_es: self.writeback_es = self.new_elasticsearch(self.es_conn_config) self.send_pending_alerts() next_run = datetime.datetime.utcnow() + self.run_every for rule in self.rules: # Set endtime based on the rule's delay delay = rule.get('query_delay') if hasattr(self.args, 'end') and self.args.end: endtime = ts_to_dt(self.args.end) elif delay: endtime = ts_now() - delay else: endtime = ts_now() try: num_matches = self.run_rule(rule, endtime, starttime) except EAException as e: self.handle_error("Error running rule %s: %s" % (rule['name'], e), {'rule': rule['name']}) else: old_starttime = pretty_ts(rule.get('original_starttime'), rule.get('use_local_time')) logging.info("Ran %s from %s to %s: %s query hits, %s matches," " %s alerts sent" % (rule['name'], old_starttime, pretty_ts(endtime, rule.get('use_local_time')), self.num_hits, num_matches, self.alerts_sent)) self.alerts_sent = 0 self.remove_old_events(rule) if next_run < datetime.datetime.utcnow(): # We were processing for longer than our refresh interval # This can happen if --start was specified with a large time period # or if we are running too slow to process events in real time. logging.warning("Querying from %s to %s took longer than %s!" % (old_starttime, endtime, self.run_every)) continue # Only force starttime once starttime = None if not self.args.pin_rules: self.load_rule_changes() # Wait before querying again sleep_for = (next_run - datetime.datetime.utcnow()).seconds logging.info("Sleeping for %s seconds" % (sleep_for)) time.sleep(sleep_for)
def add_aggregated_alert(self, match, rule): """ Save a match as a pending aggregate alert to elasticsearch. """ if (not rule['current_aggregate_id'] or ('aggregate_alert_time' in rule and rule['aggregate_alert_time'] < ts_to_dt(match[rule['timestamp_field']]))): # Elastalert may have restarted while pending alerts exist pending_alert = self.find_pending_aggregate_alert(rule) if pending_alert: alert_time = rule['aggregate_alert_time'] = ts_to_dt(pending_alert['_source']['alert_time']) agg_id = rule['current_aggregate_id'] = pending_alert['_id'] elastalert_logger.info('Adding alert for %s to aggregation(id: %s), next alert at %s' % (rule['name'], agg_id, alert_time)) else: # First match, set alert_time match_time = ts_to_dt(match[rule['timestamp_field']]) alert_time = '' if isinstance(rule['aggregation'], dict) and rule['aggregation'].get('schedule'): croniter._datetime_to_timestamp = cronite_datetime_to_timestamp # For Python 2.6 compatibility try: iter = croniter(rule['aggregation']['schedule'], ts_now()) alert_time = unix_to_dt(iter.get_next()) except Exception as e: self.handle_error("Error parsing aggregate send time Cron format %s" % (e), rule['aggregation']['schedule']) else: alert_time = match_time + rule['aggregation'] rule['aggregate_alert_time'] = alert_time agg_id = None elastalert_logger.info('New aggregation for %s. next alert at %s.' % (rule['name'], alert_time)) else: # Already pending aggregation, use existing alert_time alert_time = rule['aggregate_alert_time'] agg_id = rule['current_aggregate_id'] elastalert_logger.info('Adding alert for %s to aggregation(id: %s), next alert at %s' % (rule['name'], agg_id, alert_time)) alert_body = self.get_alert_body(match, rule, False, alert_time) if agg_id: alert_body['aggregate_id'] = agg_id res = self.writeback('elastalert', alert_body) # If new aggregation, save _id if res and not agg_id: rule['current_aggregate_id'] = res['_id'] # Couldn't write the match to ES, save it in memory for now if not res: rule['agg_matches'].append(match) return res
def get_all_terms(self, args): """ Performs a terms aggregation for each field to get every existing term. """ self.es = Elasticsearch(host=self.rules["es_host"], port=self.rules["es_port"]) window_size = datetime.timedelta(**self.rules.get("terms_window_size", {"days": 30})) field_name = {"field": "", "size": 2147483647} # Integer.MAX_VALUE query_template = {"aggs": {"values": {"terms": field_name}}} if args and args.start: end = ts_to_dt(args.start) else: end = ts_now() start = end - window_size if self.rules.get("use_strftime_index"): index = format_index(self.rules["index"], start, end) else: index = self.rules["index"] time_filter = {self.rules["timestamp_field"]: {"lte": dt_to_ts(end), "gte": dt_to_ts(start)}} query_template["filter"] = {"bool": {"must": [{"range": time_filter}]}} query = {"aggs": {"filtered": query_template}} for field in self.fields: field_name["field"] = field res = self.es.search(body=query, index=index, ignore_unavailable=True, timeout=50) if "aggregations" in res: buckets = res["aggregations"]["filtered"]["values"]["buckets"] keys = [bucket["key"] for bucket in buckets] self.seen_values[field] = keys elastalert_logger.info("Found %s unique values for %s" % (len(keys), field)) else: self.seen_values[field] = [] elastalert_logger.info("Found no values for %s" % (field))
def get_all_terms(self, args): """ Performs a terms aggregation for each field to get every existing term. """ self.es = Elasticsearch(host=self.rules['es_host'], port=self.rules['es_port']) window_size = datetime.timedelta(**self.rules.get('terms_window_size', {'days': 30})) field_name = {"field": "", "size": 2147483647} # Integer.MAX_VALUE query_template = {"aggs": {"values": {"terms": field_name}}} if args and args.start: end = ts_to_dt(args.start) else: end = ts_now() start = end - window_size if self.rules.get('use_strftime_index'): index = format_index(self.rules['index'], start, end) else: index = self.rules['index'] time_filter = {self.rules['timestamp_field']: {'lte': dt_to_ts(end), 'gte': dt_to_ts(start)}} query_template['filter'] = {'bool': {'must': [{'range': time_filter}]}} query = {'aggs': {'filtered': query_template}} for field in self.fields: field_name['field'] = field res = self.es.search(body=query, index=index, ignore_unavailable=True, timeout=50) if 'aggregations' in res: buckets = res['aggregations']['filtered']['values']['buckets'] keys = [bucket['key'] for bucket in buckets] self.seen_values[field] = keys elastalert_logger.info('Found %s unique values for %s' % (len(keys), field)) else: self.seen_values[field] = [] elastalert_logger.info('Found no values for %s' % (field))
def get_starttime(self, rule): """ Query ES for the last time we ran this rule. :param rule: The rule configuration. :return: A timestamp or None. """ query = {'filter': {'term': {'rule_name': '%s' % (rule['name'])}}, 'sort': {'@timestamp': {'order': 'desc'}}} try: if self.writeback_es: res = self.writeback_es.search(index=self.writeback_index, doc_type='elastalert_status', size=1, body=query, _source_include=['endtime', 'rule_name']) if res['hits']['hits']: endtime = ts_to_dt(res['hits']['hits'][0]['_source']['endtime']) if ts_now() - endtime < self.old_query_limit: return endtime else: logging.info("Found expired previous run for %s at %s" % (rule['name'], endtime)) return None except (ElasticsearchException, KeyError) as e: self.handle_error('Error querying for last run: %s' % (e), {'rule': rule['name']}) self.writeback_es = None return None
def silence(self): """ Silence an alert for a period of time. --silence and --rule must be passed as args. """ if self.debug: logging.error('--silence not compatible with --debug') exit(1) if not self.args.rule: logging.error('--silence must be used with --rule') exit(1) # With --rule, self.rules will only contain that specific rule rule_name = self.rules[0]['name'] try: unit, num = self.args.silence.split('=') silence_time = datetime.timedelta(**{unit: int(num)}) # Double conversion to add tzinfo silence_ts = ts_to_dt(dt_to_ts(silence_time + datetime.datetime.utcnow())) except (ValueError, TypeError): logging.error('%s is not a valid time period' % (self.args.silence)) exit(1) if not self.set_realert(rule_name, silence_ts, 0): logging.error('Failed to save silence command to elasticsearch') exit(1) logging.info('Success. %s will be silenced until %s' % (rule_name, silence_ts))
def get_match_str(self, match): lt = self.rules.get('use_local_time') starttime = pretty_ts(dt_to_ts(ts_to_dt(match[self.ts_field]) - self.rules['timeframe']), lt) endtime = pretty_ts(match[self.ts_field], lt) message = 'At least %d events occurred between %s and %s\n\n' % (self.rules['num_events'], starttime, endtime) return message
def get_all_terms(self, args): """ Performs a terms aggregation for each field to get every existing term. """ self.es = Elasticsearch(host=self.rules['es_host'], port=self.rules['es_port'], timeout=self.rules.get('es_conn_timeout', 50)) window_size = datetime.timedelta(**self.rules.get('terms_window_size', {'days': 30})) field_name = {"field": "", "size": 2147483647} # Integer.MAX_VALUE query_template = {"aggs": {"values": {"terms": field_name}}} if args and args.start: end = ts_to_dt(args.start) else: end = ts_now() start = end - window_size if self.rules.get('use_strftime_index'): index = format_index(self.rules['index'], start, end) else: index = self.rules['index'] time_filter = {self.rules['timestamp_field']: {'lte': dt_to_ts(end), 'gte': dt_to_ts(start)}} query_template['filter'] = {'bool': {'must': [{'range': time_filter}]}} query = {'aggs': {'filtered': query_template}} for field in self.fields: # For composite keys, we will need to perform sub-aggregations if type(field) == list: level = query_template['aggs'] # Iterate on each part of the composite key and add a sub aggs clause to the elastic search query for i, sub_field in enumerate(field): level['values']['terms']['field'] = sub_field if i < len(field) - 1: # If we have more fields after the current one, then set up the next nested structure level['values']['aggs'] = {'values': {'terms': copy.deepcopy(field_name)}} level = level['values']['aggs'] else: # For non-composite keys, only a single agg is needed field_name['field'] = field res = self.es.search(body=query, index=index, ignore_unavailable=True, timeout='50s') if 'aggregations' in res: buckets = res['aggregations']['filtered']['values']['buckets'] if type(field) == list: # For composite keys, make the lookup based on all fields # Make it a tuple since it can be hashed and used in dictionary lookups self.seen_values[tuple(field)] = [] for bucket in buckets: # We need to walk down the hierarchy and obtain the value at each level self.seen_values[tuple(field)] += self.flatten_aggregation_hierarchy(bucket) # If we don't have any results, it could either be because of the absence of any baseline data # OR it may be because the composite key contained a non-primitive type. Either way, give the # end-users a heads up to help them debug what might be going on. if not self.seen_values[tuple(field)]: elastalert_logger.warning(( 'No results were found from all sub-aggregations. This can either indicate that there is ' 'no baseline data OR that a non-primitive field was used in a composite key.' )) else: keys = [bucket['key'] for bucket in buckets] self.seen_values[field] = keys elastalert_logger.info('Found %s unique values for %s' % (len(keys), field)) else: self.seen_values[field] = [] elastalert_logger.info('Found no values for %s' % (field))
def get_match_str(self, match): lt = self.rules.get('use_local_time') starttime = pretty_ts(dt_to_ts(ts_to_dt(match[self.ts_field]) - self.rules['timeframe']), lt) endtime = pretty_ts(match[self.ts_field], lt) message = ('A maximum of %d unique %s(s) occurred since last alert or ' 'between %s and %s\n\n' % (self.rules['max_cardinality'], self.rules['cardinality_field'], starttime, endtime)) return message
def get_match_str(self, match): lt = self.rules.get('use_local_time') match_ts = lookup_es_key(match, self.ts_field) starttime = pretty_ts(dt_to_ts(ts_to_dt(match_ts) - self.rules['timeframe']), lt) message = 'At least %d(%d) events occurred between %s and %s\n\n' % (self.rules['num_events'], match['count'], starttime, endtime) return message
def get_match_str(self, match): ts = match[self.rules['timestamp_field']] lt = self.rules.get('use_local_time') message = 'An abnormally low number of events occurred around %s.\n' % (pretty_ts(ts, lt)) message += 'Between %s and %s, there were less than %s events.\n\n' % ( pretty_ts(dt_to_ts(ts_to_dt(ts) - self.rules['timeframe']), lt), pretty_ts(ts, lt), self.rules['threshold']) return message
def get_match_str(self, match): lt = self.rules.get("use_local_time") starttime = pretty_ts(dt_to_ts(ts_to_dt(match[self.ts_field]) - self.rules["timeframe"]), lt) endtime = pretty_ts(match[self.ts_field], lt) message = "A maximum of %d unique %s(s) occurred since last alert or " "between %s and %s\n\n" % ( self.rules["max_cardinality"], self.rules["cardinality_field"], starttime, endtime, ) return message
def send_pending_alerts(self): pending_alerts = self.find_recent_pending_alerts(self.alert_time_limit) for alert in pending_alerts: _id = alert['_id'] alert = alert['_source'] try: rule_name = alert.pop('rule_name') alert_time = alert.pop('alert_time') match_body = alert.pop('match_body') except KeyError: # Malformed alert, drop it continue # Find original rule for rule in self.rules: if rule['name'] == rule_name: break else: # Original rule is missing, keep alert for later if rule reappears continue # Set current_es for top_count_keys query rule_es_conn_config = self.build_es_conn_config(rule) self.current_es = self.new_elasticsearch(rule_es_conn_config) self.current_es_addr = (rule['es_host'], rule['es_port']) # Send the alert unless it's a future alert if ts_now() > ts_to_dt(alert_time): aggregated_matches = self.get_aggregated_matches(_id) if aggregated_matches: matches = [match_body] + [agg_match['match_body'] for agg_match in aggregated_matches] self.alert(matches, rule, alert_time=alert_time) if rule['current_aggregate_id'] == _id: rule['current_aggregate_id'] = None else: self.alert([match_body], rule, alert_time=alert_time) # Delete it from the index try: self.writeback_es.delete(index=self.writeback_index, doc_type='elastalert', id=_id) except: # TODO: Give this a more relevant exception, try:except: is evil. self.handle_error("Failed to delete alert %s at %s" % (_id, alert_time)) # Send in memory aggregated alerts for rule in self.rules: if rule['agg_matches']: if ts_now() > rule['aggregate_alert_time']: self.alert(rule['agg_matches'], rule) rule['agg_matches'] = []
def send_pending_alerts(self): pending_alerts = self.find_recent_pending_alerts(self.alert_time_limit) for alert in pending_alerts: _id = alert['_id'] alert = alert['_source'] try: rule_name = alert.pop('rule_name') alert_time = alert.pop('alert_time') match_body = alert.pop('match_body') except KeyError: # Malformed alert, drop it continue agg_id = alert.get('aggregate_id', None) if agg_id: # Aggregated alerts will be taken care of by get_aggregated_matches continue # Find original rule for rule in self.rules: if rule['name'] == rule_name: break else: # Original rule is missing, drop alert continue # Retry the alert unless it's a future alert if ts_now() > ts_to_dt(alert_time): aggregated_matches = self.get_aggregated_matches(_id) if aggregated_matches: matches = [match_body] + [agg_match['match_body'] for agg_match in aggregated_matches] self.alert(matches, rule, alert_time=alert_time) rule['current_aggregate_id'] = None else: self.alert([match_body], rule, alert_time=alert_time) # Delete it from the index try: self.writeback_es.delete(index=self.writeback_index, doc_type='elastalert', id=_id) except: self.handle_error("Failed to delete alert %s at %s" % (_id, alert_time)) # Send in memory aggregated alerts for rule in self.rules: if rule['agg_matches']: if ts_now() > rule['aggregate_alert_time']: self.alert(rule['agg_matches'], rule) rule['agg_matches'] = []
def get_match_str(self, match): lt = self.rules.get('use_local_time') starttime = (ts_to_dt(match[self.ts_field]) - self.rules['timeframe']).isoformat() endtime = match[self.ts_field] if 'max_cardinality' in self.rules: message = ('A maximum of %d unique %s(s) occurred since last alert or between %s and %s\n\n' % ( self.rules['max_cardinality'], self.rules['query_key'], starttime, endtime)) else: message = ('Less than %d unique %s(s) occurred since last alert or between %s and %s\n\n' % ( self.rules['min_cardinality'], self.rules['query_key'], starttime, endtime)) return message
def get_match_str(self, match): ts = match[self.rules['timestamp_field']] lt = self.rules.get('use_local_time') try: match_value = self.match_value[-1][:5] except: match_value = [] message = "Between %s and %s\n" % (pretty_ts(dt_to_ts(ts_to_dt(ts) - self.rules['timeframe']), lt), pretty_ts(ts, lt)) message += "%s(%s) %s %s\nmatch value:\n\t%s...\n\n" % ( self.rules['stat'], self.rules['stat_field'], self.rules['stat_type'], self.rules['threshold'], '\n\t'.join(match_value) ) return message
def run_all_rules(self): """ Run each rule one time """ # If writeback_es errored, it's disabled until the next query cycle if not self.writeback_es: self.writeback_es = self.new_elasticsearch(self.es_conn_config) self.send_pending_alerts() next_run = datetime.datetime.utcnow() + self.run_every for rule in self.rules: # Set endtime based on the rule's delay delay = rule.get('query_delay') if hasattr(self.args, 'end') and self.args.end: endtime = ts_to_dt(self.args.end) elif delay: endtime = ts_now() - delay else: endtime = ts_now() try: num_matches = self.run_rule(rule, endtime, self.starttime) except EAException as e: self.handle_error("Error running rule %s: %s" % (rule['name'], e), {'rule': rule['name']}) else: old_starttime = pretty_ts(rule.get('original_starttime'), rule.get('use_local_time')) logging.info("Ran %s from %s to %s: %s query hits, %s matches," " %s alerts sent" % (rule['name'], old_starttime, pretty_ts(endtime, rule.get('use_local_time')), self.num_hits, num_matches, self.alerts_sent)) self.alerts_sent = 0 self.remove_old_events(rule) if next_run < datetime.datetime.utcnow(): # We were processing for longer than our refresh interval # This can happen if --start was specified with a large time period # or if we are running too slow to process events in real time. logging.warning("Querying from %s to %s took longer than %s!" % (old_starttime, endtime, self.run_every)) # Only force starttime once self.starttime = None if not self.args.pin_rules: self.load_rule_changes()
def start(self): """ Periodically go through each rule and run it """ if self.starttime: try: self.starttime = ts_to_dt(self.starttime) except (TypeError, ValueError): self.handle_error("%s is not a valid ISO 8601 timestamp (YYYY-MM-DDTHH:MM:SS+XX:00)" % (self.starttime)) exit(1) self.running = True while self.running: next_run = datetime.datetime.utcnow() + self.run_every self.run_all_rules() if next_run < datetime.datetime.utcnow(): continue # Wait before querying again sleep_duration = (next_run - datetime.datetime.utcnow()).seconds self.sleep_for(sleep_duration)
def get_all_terms(self, args): """ Performs a terms aggregation for each field to get every existing term. """ self.es = Elasticsearch(host=self.rules['es_host'], port=self.rules['es_port'], timeout=self.rules.get('es_conn_timeout', 50)) window_size = datetime.timedelta( **self.rules.get('terms_window_size', {'days': 30})) field_name = {"field": "", "size": 2147483647} # Integer.MAX_VALUE query_template = {"aggs": {"values": {"terms": field_name}}} if args and args.start: end = ts_to_dt(args.start) else: end = ts_now() start = end - window_size if self.rules.get('use_strftime_index'): index = format_index(self.rules['index'], start, end) else: index = self.rules['index'] time_filter = { self.rules['timestamp_field']: { 'lte': dt_to_ts(end), 'gte': dt_to_ts(start) } } query_template['filter'] = {'bool': {'must': [{'range': time_filter}]}} query = {'aggs': {'filtered': query_template}} for field in self.fields: field_name['field'] = field res = self.es.search(body=query, index=index, ignore_unavailable=True, timeout='50s') if 'aggregations' in res: buckets = res['aggregations']['filtered']['values']['buckets'] keys = [bucket['key'] for bucket in buckets] self.seen_values[field] = keys elastalert_logger.info('Found %s unique values for %s' % (len(keys), field)) else: self.seen_values[field] = [] elastalert_logger.info('Found no values for %s' % (field))
def create_default_title(self, matches): subject = '%s: %d matches found - %s' % \ (self.rule['name'], len(matches), pretty_ts(ts_to_dt(self.pipeline['alert_time']))) return subject
def alert(self, matches, rule, alert_time=None): """ Send out an alert. :param matches: A list of matches. :param rule: A rule configuration. """ if alert_time is None: alert_time = ts_now() # Compute top count keys if rule.get('top_count_keys'): for match in matches: if 'query_key' in rule and rule['query_key'] in match: qk = match[rule['query_key']] else: qk = None start = ts_to_dt(match[rule['timestamp_field']]) - rule.get('timeframe', datetime.timedelta(minutes=10)) end = ts_to_dt(match[rule['timestamp_field']]) + datetime.timedelta(minutes=10) keys = rule.get('top_count_keys') counts = self.get_top_counts(rule, start, end, keys, rule.get('top_count_number'), qk) match.update(counts) # Generate a kibana dashboard for the first match if rule.get('generate_kibana_link') or rule.get('use_kibana_dashboard'): try: if rule.get('generate_kibana_link'): kb_link = self.generate_kibana_db(rule, matches[0]) else: kb_link = self.use_kibana_link(rule, matches[0]) except EAException as e: self.handle_error("Could not generate kibana dash for %s match: %s" % (rule['name'], e)) else: if kb_link: matches[0]['kibana_link'] = kb_link for enhancement in rule['match_enhancements']: for match in matches: try: enhancement.process(match) except EAException as e: self.handle_error("Error running match enhancement: %s" % (e), {'rule': rule['name']}) # Don't send real alerts in debug mode if self.debug: alerter = DebugAlerter(rule) alerter.alert(matches) return # Run the alerts alert_sent = False alert_exception = None for alert in rule['alert']: try: alert.alert(matches) except EAException as e: self.handle_error('Error while running alert %s: %s' % (alert.get_info()['type'], e), {'rule': rule['name']}) alert_exception = str(e) else: self.alerts_sent += 1 alert_sent = True # Write the alert(s) to ES agg_id = None for match in matches: alert_body = self.get_alert_body(match, rule, alert_sent, alert_time, alert_exception) # Set all matches to aggregate together if agg_id: alert_body['aggregate_id'] = agg_id res = self.writeback('elastalert', alert_body) if res and not agg_id: agg_id = res['_id']
def send_alert(self, matches, rule, alert_time=None): """ Send out an alert. :param matches: A list of matches. :param rule: A rule configuration. """ if alert_time is None: alert_time = ts_now() # Compute top count keys if rule.get('top_count_keys'): for match in matches: if 'query_key' in rule and rule['query_key'] in match: qk = match[rule['query_key']] else: qk = None start = ts_to_dt(match[rule['timestamp_field']]) - rule.get('timeframe', datetime.timedelta(minutes=10)) end = ts_to_dt(match[rule['timestamp_field']]) + datetime.timedelta(minutes=10) keys = rule.get('top_count_keys') counts = self.get_top_counts(rule, start, end, keys, qk=qk) match.update(counts) # Generate a kibana3 dashboard for the first match if rule.get('generate_kibana_link') or rule.get('use_kibana_dashboard'): try: if rule.get('generate_kibana_link'): kb_link = self.generate_kibana_db(rule, matches[0]) else: kb_link = self.use_kibana_link(rule, matches[0]) except EAException as e: self.handle_error("Could not generate kibana dash for %s match: %s" % (rule['name'], e)) else: if kb_link: matches[0]['kibana_link'] = kb_link if rule.get('use_kibana4_dashboard'): kb_link = self.generate_kibana4_db(rule, matches[0]) if kb_link: matches[0]['kibana_link'] = kb_link #if not rule.get('run_enhancements_first'): # for enhancement in rule.get('match_enhancements'): # valid_matches = [] # for match in matches: # try: # enhancement.process(match) # valid_matches.append(match) # except DropMatchException as e: # pass # except EAException as e: # self.handle_error("Error running match enhancement: %s" % (e), {'rule': rule['name']}) # matches = valid_matches # if not matches: # return None # Don't send real alerts in debug mode if self.debug: alerter = DebugAlerter(rule) alerter.alert(matches) return None # Run the alerts alert_sent = False alert_exception = None # Alert.pipeline is a single object shared between every alerter # This allows alerters to pass objects and data between themselves alert_pipeline = {"alert_time": alert_time} for alert in rule.get("actions"): alert.pipeline = alert_pipeline try: alert.alert(matches) except EAException as e: self.handle_error('Error while running alert %s: %s' % (alert.get_info()['type'], e), {'rule': rule['name']}) alert_exception = str(e) else: self.alerts_sent += 1 alert_sent = True
def replace_ts(self, hits, rule): for hit in hits: hit['_source'][rule['timestamp_field']] = ts_to_dt(hit['_source'][rule['timestamp_field']])
def unwrap_interval_buckets(self, timestamp, query_key, interval_buckets): for interval_data in interval_buckets: # Use bucket key here instead of start_time for more accurate match timestamp self.check_matches(ts_to_dt(interval_data['key_as_string']), query_key, interval_data)
def get_all_terms(self, args): """ Performs a terms aggregation for each field to get every existing term. """ self.es = elasticsearch_client(self.rules) window_size = datetime.timedelta(**self.rules.get('terms_window_size', {'days': 30})) field_name = {"field": "", "size": 2147483647} # Integer.MAX_VALUE query_template = {"aggs": {"values": {"terms": field_name}}} if args and args.start: end = ts_to_dt(args.start) else: end = ts_now() start = end - window_size step = datetime.timedelta(**self.rules.get('window_step_size', {'days': 1})) for field in self.fields: tmp_start = start tmp_end = min(start + step, end) time_filter = {self.rules['timestamp_field']: {'lt': dt_to_ts(tmp_end), 'gte': dt_to_ts(tmp_start)}} query_template['filter'] = {'bool': {'must': [{'range': time_filter}]}} query = {'aggs': {'filtered': query_template}} # For composite keys, we will need to perform sub-aggregations if type(field) == list: self.seen_values.setdefault(tuple(field), []) level = query_template['aggs'] # Iterate on each part of the composite key and add a sub aggs clause to the elastic search query for i, sub_field in enumerate(field): level['values']['terms']['field'] = add_raw_postfix(sub_field) if i < len(field) - 1: # If we have more fields after the current one, then set up the next nested structure level['values']['aggs'] = {'values': {'terms': copy.deepcopy(field_name)}} level = level['values']['aggs'] else: self.seen_values.setdefault(field, []) # For non-composite keys, only a single agg is needed field_name['field'] = add_raw_postfix(field) # Query the entire time range in small chunks while tmp_start < end: if self.rules.get('use_strftime_index'): index = format_index(self.rules['index'], tmp_start, tmp_end) else: index = self.rules['index'] res = self.es.search(body=query, index=index, ignore_unavailable=True, timeout='50s') if 'aggregations' in res: buckets = res['aggregations']['filtered']['values']['buckets'] if type(field) == list: # For composite keys, make the lookup based on all fields # Make it a tuple since it can be hashed and used in dictionary lookups for bucket in buckets: # We need to walk down the hierarchy and obtain the value at each level self.seen_values[tuple(field)] += self.flatten_aggregation_hierarchy(bucket) else: keys = [bucket['key'] for bucket in buckets] self.seen_values[field] += keys else: self.seen_values.setdefault(field, []) if tmp_start == tmp_end: break tmp_start = tmp_end tmp_end = min(tmp_start + step, end) time_filter[self.rules['timestamp_field']] = {'lt': dt_to_ts(tmp_end), 'gte': dt_to_ts(tmp_start)} for key, values in self.seen_values.iteritems(): if not values: if type(key) == tuple: # If we don't have any results, it could either be because of the absence of any baseline data # OR it may be because the composite key contained a non-primitive type. Either way, give the # end-users a heads up to help them debug what might be going on. elastalert_logger.warning(( 'No results were found from all sub-aggregations. This can either indicate that there is ' 'no baseline data OR that a non-primitive field was used in a composite key.' )) else: elastalert_logger.info('Found no values for %s' % (field)) continue self.seen_values[key] = list(set(values)) elastalert_logger.info('Found %s unique values for %s' % (len(values), key))
def get_match_str(self, match): ts = match[self.rules['timestamp_field']] lt = self.rules.get('use_local_time') message = 'An abnormally low number of events occurred around %s.\n' % (pretty_ts(ts, lt)) message += 'Between %s and %s, there were less than %s events.\n\n' % (pretty_ts(dt_to_ts(ts_to_dt(ts) - self.rules['timeframe']), lt), pretty_ts(ts, lt), self.rules['threshold']) return message
def alert(self, matches, rule, alert_time=None): """ Send out an alert. :param matches: A list of matches. :param rule: A rule configuration. """ if alert_time is None: alert_time = ts_now() # Compute top count keys if rule.get('top_count_keys'): for match in matches: if 'query_key' in rule and rule['query_key'] in match: qk = match[rule['query_key']] else: qk = None start = ts_to_dt(match[rule['timestamp_field']]) - rule.get( 'timeframe', datetime.timedelta(minutes=10)) end = ts_to_dt( match[rule['timestamp_field']]) + datetime.timedelta( minutes=10) keys = rule.get('top_count_keys') counts = self.get_top_counts(rule, start, end, keys, rule.get('top_count_number'), qk) match.update(counts) # Generate a kibana dashboard for the first match if rule.get('generate_kibana_link') or rule.get( 'use_kibana_dashboard'): try: if rule.get('generate_kibana_link'): kb_link = self.generate_kibana_db(rule, matches[0]) else: kb_link = self.use_kibana_link(rule, matches[0]) except EAException as e: self.handle_error( "Could not generate kibana dash for %s match: %s" % (rule['name'], e)) else: if kb_link: matches[0]['kibana_link'] = kb_link for enhancement in rule['match_enhancements']: for match in matches: try: enhancement.process(match) except EAException as e: self.handle_error( "Error running match enhancement: %s" % (e), {'rule': rule['name']}) # Don't send real alerts in debug mode if self.debug: alerter = DebugAlerter(rule) alerter.alert(matches) return # Run the alerts alert_sent = False alert_exception = None for alert in rule['alert']: try: alert.alert(matches) except EAException as e: self.handle_error( 'Error while running alert %s: %s' % (alert.get_info()['type'], e), {'rule': rule['name']}) alert_exception = str(e) else: self.alerts_sent += 1 alert_sent = True # Write the alert(s) to ES agg_id = None for match in matches: alert_body = self.get_alert_body(match, rule, alert_sent, alert_time, alert_exception) # Set all matches to aggregate together if agg_id: alert_body['aggregate_id'] = agg_id res = self.writeback('elastalert', alert_body) if res and not agg_id: agg_id = res['_id']
def start(self): """ Periodically go through each rule and run it """ starttime = self.args.start if starttime: try: starttime = ts_to_dt(starttime) except (TypeError, ValueError): self.handle_error( "%s is not a valid ISO 8601 timestamp (YYYY-MM-DDTHH:MM:SS+XX:00)" % (starttime)) exit(1) while True: # If writeback_es errored, it's disabled until the next query cycle if not self.writeback_es: self.writeback_es = Elasticsearch(host=self.es_host, port=self.es_port) self.send_pending_alerts() next_run = datetime.datetime.utcnow() + self.run_every for rule in self.rules: # Set endtime based on the rule's delay delay = rule.get('query_delay') if hasattr(self.args, 'end') and self.args.end: endtime = ts_to_dt(self.args.end) elif delay: endtime = ts_now() - delay else: endtime = ts_now() try: num_matches = self.run_rule(rule, endtime, starttime) except EAException as e: self.handle_error( "Error running rule %s: %s" % (rule['name'], e), {'rule': rule['name']}) else: old_starttime = pretty_ts(rule.get('original_starttime'), rule.get('use_local_time')) logging.info( "Ran %s from %s to %s: %s query hits, %s matches," " %s alerts sent" % (rule['name'], old_starttime, pretty_ts(endtime, rule.get('use_local_time')), self.num_hits, num_matches, self.alerts_sent)) self.alerts_sent = 0 self.remove_old_events(rule) if next_run < datetime.datetime.utcnow(): # We were processing for longer than our refresh interval # This can happen if --start was specified with a large time period # or if we are running too slow to process events in real time. logging.warning("Querying from %s to %s took longer than %s!" % (old_starttime, endtime, self.run_every)) continue # Only force starttime once starttime = None if not self.args.pin_rules: self.load_rule_changes() # Wait before querying again sleep_for = (next_run - datetime.datetime.utcnow()).seconds logging.info("Sleeping for %s seconds" % (sleep_for)) time.sleep(sleep_for)
def send_alert(self, matches, rule, alert_time=None): """ Send out an alert. :param matches: A list of matches. :param rule: A rule configuration. """ if alert_time is None: alert_time = ts_now() # Compute top count keys if rule.get('top_count_keys'): for match in matches: if 'query_key' in rule and rule['query_key'] in match: qk = match[rule['query_key']] else: qk = None start = ts_to_dt(match[rule['timestamp_field']]) - rule.get( 'timeframe', datetime.timedelta(minutes=10)) end = ts_to_dt( match[rule['timestamp_field']]) + datetime.timedelta( minutes=10) keys = rule.get('top_count_keys') counts = self.get_top_counts(rule, start, end, keys, qk=qk) match.update(counts) # Generate a kibana3 dashboard for the first match if rule.get('generate_kibana_link') or rule.get( 'use_kibana_dashboard'): try: if rule.get('generate_kibana_link'): kb_link = self.generate_kibana_db(rule, matches[0]) else: kb_link = self.use_kibana_link(rule, matches[0]) except EAException as e: self.handle_error( "Could not generate kibana dash for %s match: %s" % (rule['name'], e)) else: if kb_link: matches[0]['kibana_link'] = kb_link if rule.get('use_kibana4_dashboard'): kb_link = self.generate_kibana4_db(rule, matches[0]) if kb_link: matches[0]['kibana_link'] = kb_link #if not rule.get('run_enhancements_first'): # for enhancement in rule.get('match_enhancements'): # valid_matches = [] # for match in matches: # try: # enhancement.process(match) # valid_matches.append(match) # except DropMatchException as e: # pass # except EAException as e: # self.handle_error("Error running match enhancement: %s" % (e), {'rule': rule['name']}) # matches = valid_matches # if not matches: # return None # Don't send real alerts in debug mode if self.debug: alerter = DebugAlerter(rule) alerter.alert(matches) return None # Run the alerts alert_sent = False alert_exception = None # Alert.pipeline is a single object shared between every alerter # This allows alerters to pass objects and data between themselves alert_pipeline = {"alert_time": alert_time} for alert in rule.get("actions"): alert.pipeline = alert_pipeline try: alert.alert(matches) except EAException as e: self.handle_error( 'Error while running alert %s: %s' % (alert.get_info()['type'], e), {'rule': rule['name']}) alert_exception = str(e) else: self.alerts_sent += 1 alert_sent = True
def replace_ts(self, hits, rule): for hit in hits: hit['_source'][rule['timestamp_field']] = ts_to_dt( hit['_source'][rule['timestamp_field']])