def find_recent_pending_alerts(self, time_limit): """ Queries writeback_es to find alerts that did not send and are newer than time_limit """ query = { 'query': { 'query_string': { 'query': 'alert_sent:false' } }, 'filter': { 'range': { 'alert_time': { 'from': dt_to_ts(ts_now() - time_limit), 'to': dt_to_ts(ts_now()) } } } } if self.writeback_es: try: res = self.writeback_es.search(index=self.writeback_index, doc_type='elastalert', body=query, size=1000) if res['hits']['hits']: return res['hits']['hits'] except: pass return []
def is_silenced(self, rule_name): """ Checks if rule_name is currently silenced. Returns false on exception. """ if rule_name in self.silence_cache: if ts_now() < self.silence_cache[rule_name][0]: return True else: return False query = {'filter': {'term': {'rule_name': rule_name}}, 'sort': {'until': {'order': 'desc'}}} if self.writeback_es: try: res = self.writeback_es.search(index=self.writeback_index, doc_type='silence', size=1, body=query, _source_include=['until', 'exponent']) except ElasticsearchException as e: self.handle_error("Error while querying for alert silence status: %s" % (e), {'rule': rule_name}) return False if res['hits']['hits']: until_ts = res['hits']['hits'][0]['_source']['until'] exponent = res['hits']['hits'][0]['_source'].get('exponent', 0) self.silence_cache[rule_name] = (ts_to_dt(until_ts), exponent) if ts_now() < ts_to_dt(until_ts): return True return False
def check_for_match(self, key): now = ts_now().time() start_time = self.rules.get('start_time') end_time = self.rules.get('end_time') if (start_time is not None and now < start_time) or (end_time is not None and now > end_time): return weekdays = self.rules.get('weekdays') if weekdays is not None and ts_now().weekday() not in weekdays: return most_recent_ts = self.get_ts(self.occurrences[key].data[-1]) if self.first_event.get(key) is None: self.first_event[key] = most_recent_ts # Don't check for matches until timeframe has elapsed if most_recent_ts - self.first_event[key] < self.rules['timeframe']: return # Match if, after removing old events, we hit num_events count = self.occurrences[key].count() if count < self.rules['threshold']: event = self.occurrences[key].data[-1][0] event.update(key=key, count=count) self.add_match(event) # we after adding this match, let's remove this key so we don't realert on it self.occurrences.pop(key) del self.first_event[key]
def start(self): """ Periodically go through each rule and run it """ starttime = self.args.start if starttime: try: starttime = ts_to_dt(starttime) except (TypeError, ValueError): self.handle_error("%s is not a valid ISO 8601 timestamp (YYYY-MM-DDTHH:MM:SS+XX:00)" % (starttime)) exit(1) while True: # If writeback_es errored, it's disabled until the next query cycle if not self.writeback_es: self.writeback_es = self.new_elasticsearch(self.es_conn_config) self.send_pending_alerts() next_run = datetime.datetime.utcnow() + self.run_every for rule in self.rules: # Set endtime based on the rule's delay delay = rule.get('query_delay') if hasattr(self.args, 'end') and self.args.end: endtime = ts_to_dt(self.args.end) elif delay: endtime = ts_now() - delay else: endtime = ts_now() try: num_matches = self.run_rule(rule, endtime, starttime) except EAException as e: self.handle_error("Error running rule %s: %s" % (rule['name'], e), {'rule': rule['name']}) else: old_starttime = pretty_ts(rule.get('original_starttime'), rule.get('use_local_time')) logging.info("Ran %s from %s to %s: %s query hits, %s matches," " %s alerts sent" % (rule['name'], old_starttime, pretty_ts(endtime, rule.get('use_local_time')), self.num_hits, num_matches, self.alerts_sent)) self.alerts_sent = 0 self.remove_old_events(rule) if next_run < datetime.datetime.utcnow(): # We were processing for longer than our refresh interval # This can happen if --start was specified with a large time period # or if we are running too slow to process events in real time. logging.warning("Querying from %s to %s took longer than %s!" % (old_starttime, endtime, self.run_every)) continue # Only force starttime once starttime = None if not self.args.pin_rules: self.load_rule_changes() # Wait before querying again sleep_for = (next_run - datetime.datetime.utcnow()).seconds logging.info("Sleeping for %s seconds" % (sleep_for)) time.sleep(sleep_for)
def send_pending_alerts(self): pending_alerts = self.find_recent_pending_alerts(self.alert_time_limit) for alert in pending_alerts: _id = alert['_id'] alert = alert['_source'] try: rule_name = alert.pop('rule_name') alert_time = alert.pop('alert_time') match_body = alert.pop('match_body') except KeyError: # Malformed alert, drop it continue # Find original rule for rule in self.rules: if rule['name'] == rule_name: break else: # Original rule is missing, keep alert for later if rule reappears continue # Set current_es for top_count_keys query rule_es_conn_config = self.build_es_conn_config(rule) self.current_es = self.new_elasticsearch(rule_es_conn_config) self.current_es_addr = (rule['es_host'], rule['es_port']) # Send the alert unless it's a future alert if ts_now() > ts_to_dt(alert_time): aggregated_matches = self.get_aggregated_matches(_id) if aggregated_matches: matches = [match_body] + [ agg_match['match_body'] for agg_match in aggregated_matches ] self.alert(matches, rule, alert_time=alert_time) if rule['current_aggregate_id'] == _id: rule['current_aggregate_id'] = None else: self.alert([match_body], rule, alert_time=alert_time) # Delete it from the index try: self.writeback_es.delete(index=self.writeback_index, doc_type='elastalert', id=_id) except: # TODO: Give this a more relevant exception, try:except: is evil. self.handle_error("Failed to delete alert %s at %s" % (_id, alert_time)) # Send in memory aggregated alerts for rule in self.rules: if rule['agg_matches']: if ts_now() > rule['aggregate_alert_time']: self.alert(rule['agg_matches'], rule) rule['agg_matches'] = []
def send_pending_alerts(self): pending_alerts = self.find_recent_pending_alerts(self.alert_time_limit) for alert in pending_alerts: _id = alert['_id'] alert = alert['_source'] try: rule_name = alert.pop('rule_name') alert_time = alert.pop('alert_time') match_body = alert.pop('match_body') except KeyError: # Malformed alert, drop it continue agg_id = alert.get('aggregate_id', None) if agg_id: # Aggregated alerts will be taken care of by get_aggregated_matches continue # Find original rule for rule in self.rules: if rule['name'] == rule_name: break else: # Original rule is missing, drop alert continue # Retry the alert unless it's a future alert if ts_now() > ts_to_dt(alert_time): aggregated_matches = self.get_aggregated_matches(_id) if aggregated_matches: matches = [match_body] + [ agg_match['match_body'] for agg_match in aggregated_matches ] self.alert(matches, rule, alert_time=alert_time) rule['current_aggregate_id'] = None else: self.alert([match_body], rule, alert_time=alert_time) # Delete it from the index try: self.writeback_es.delete(index=self.writeback_index, doc_type='elastalert', id=_id) except: self.handle_error("Failed to delete alert %s at %s" % (_id, alert_time)) # Send in memory aggregated alerts for rule in self.rules: if rule['agg_matches']: if ts_now() > rule['aggregate_alert_time']: self.alert(rule['agg_matches'], rule) rule['agg_matches'] = []
def send_pending_alerts(self): pending_alerts = self.find_recent_pending_alerts(self.alert_time_limit) for alert in pending_alerts: _id = alert['_id'] alert = alert['_source'] try: rule_name = alert.pop('rule_name') alert_time = alert.pop('alert_time') match_body = alert.pop('match_body') except KeyError: # Malformed alert, drop it continue # Find original rule for rule in self.rules: if rule['name'] == rule_name: break else: # Original rule is missing, keep alert for later if rule reappears continue # Set current_es for top_count_keys query rule_es_conn_config = self.build_es_conn_config(rule) self.current_es = self.new_elasticsearch(rule_es_conn_config) self.current_es_addr = (rule['es_host'], rule['es_port']) # Send the alert unless it's a future alert if ts_now() > ts_to_dt(alert_time): aggregated_matches = self.get_aggregated_matches(_id) if aggregated_matches: matches = [match_body] + [agg_match['match_body'] for agg_match in aggregated_matches] self.alert(matches, rule, alert_time=alert_time) if rule['current_aggregate_id'] == _id: rule['current_aggregate_id'] = None else: self.alert([match_body], rule, alert_time=alert_time) # Delete it from the index try: self.writeback_es.delete(index=self.writeback_index, doc_type='elastalert', id=_id) except: # TODO: Give this a more relevant exception, try:except: is evil. self.handle_error("Failed to delete alert %s at %s" % (_id, alert_time)) # Send in memory aggregated alerts for rule in self.rules: if rule['agg_matches']: if ts_now() > rule['aggregate_alert_time']: self.alert(rule['agg_matches'], rule) rule['agg_matches'] = []
def send_pending_alerts(self): pending_alerts = self.find_recent_pending_alerts(self.alert_time_limit) for alert in pending_alerts: _id = alert['_id'] alert = alert['_source'] try: rule_name = alert.pop('rule_name') alert_time = alert.pop('alert_time') match_body = alert.pop('match_body') except KeyError: # Malformed alert, drop it continue agg_id = alert.get('aggregate_id', None) if agg_id: # Aggregated alerts will be taken care of by get_aggregated_matches continue # Find original rule for rule in self.rules: if rule['name'] == rule_name: break else: # Original rule is missing, drop alert continue # Retry the alert unless it's a future alert if ts_now() > ts_to_dt(alert_time): aggregated_matches = self.get_aggregated_matches(_id) if aggregated_matches: matches = [match_body] + [agg_match['match_body'] for agg_match in aggregated_matches] self.alert(matches, rule, alert_time=alert_time) rule['current_aggregate_id'] = None else: self.alert([match_body], rule, alert_time=alert_time) # Delete it from the index try: self.writeback_es.delete(index=self.writeback_index, doc_type='elastalert', id=_id) except: self.handle_error("Failed to delete alert %s at %s" % (_id, alert_time)) # Send in memory aggregated alerts for rule in self.rules: if rule['agg_matches']: if ts_now() > rule['aggregate_alert_time']: self.alert(rule['agg_matches'], rule) rule['agg_matches'] = []
def add_aggregated_alert(self, match, rule): """ Save a match as a pending aggregate alert to elasticsearch. """ if (not rule['current_aggregate_id'] or ('aggregate_alert_time' in rule and rule['aggregate_alert_time'] < ts_to_dt(match[rule['timestamp_field']]))): # Elastalert may have restarted while pending alerts exist pending_alert = self.find_pending_aggregate_alert(rule) if pending_alert: alert_time = rule['aggregate_alert_time'] = ts_to_dt( pending_alert['_source']['alert_time']) agg_id = rule['current_aggregate_id'] = pending_alert['_id'] elastalert_logger.info( 'Adding alert for %s to aggregation(id: %s), next alert at %s' % (rule['name'], agg_id, alert_time)) else: # First match, set alert_time match_time = ts_to_dt(match[rule['timestamp_field']]) alert_time = '' if isinstance(rule['aggregation'], dict) and rule['aggregation'].get('schedule'): croniter._datetime_to_timestamp = cronite_datetime_to_timestamp # For Python 2.6 compatibility try: iter = croniter(rule['aggregation']['schedule'], ts_now()) alert_time = unix_to_dt(iter.get_next()) except Exception as e: self.handle_error( "Error parsing aggregate send time Cron format %s" % (e), rule['aggregation']['schedule']) else: alert_time = match_time + rule['aggregation'] rule['aggregate_alert_time'] = alert_time agg_id = None elastalert_logger.info( 'New aggregation for %s. next alert at %s.' % (rule['name'], alert_time)) else: # Already pending aggregation, use existing alert_time alert_time = rule['aggregate_alert_time'] agg_id = rule['current_aggregate_id'] elastalert_logger.info( 'Adding alert for %s to aggregation(id: %s), next alert at %s' % (rule['name'], agg_id, alert_time)) alert_body = self.get_alert_body(match, rule, False, alert_time) if agg_id: alert_body['aggregate_id'] = agg_id res = self.writeback('elastalert', alert_body) # If new aggregation, save _id if res and not agg_id: rule['current_aggregate_id'] = res['_id'] # Couldn't write the match to ES, save it in memory for now if not res: rule['agg_matches'].append(match) return res
def get_all_terms(self, args): """ Performs a terms aggregation for each field to get every existing term. """ self.es = Elasticsearch(host=self.rules["es_host"], port=self.rules["es_port"]) window_size = datetime.timedelta(**self.rules.get("terms_window_size", {"days": 30})) field_name = {"field": "", "size": 2147483647} # Integer.MAX_VALUE query_template = {"aggs": {"values": {"terms": field_name}}} if args and args.start: end = ts_to_dt(args.start) else: end = ts_now() start = end - window_size if self.rules.get("use_strftime_index"): index = format_index(self.rules["index"], start, end) else: index = self.rules["index"] time_filter = {self.rules["timestamp_field"]: {"lte": dt_to_ts(end), "gte": dt_to_ts(start)}} query_template["filter"] = {"bool": {"must": [{"range": time_filter}]}} query = {"aggs": {"filtered": query_template}} for field in self.fields: field_name["field"] = field res = self.es.search(body=query, index=index, ignore_unavailable=True, timeout=50) if "aggregations" in res: buckets = res["aggregations"]["filtered"]["values"]["buckets"] keys = [bucket["key"] for bucket in buckets] self.seen_values[field] = keys elastalert_logger.info("Found %s unique values for %s" % (len(keys), field)) else: self.seen_values[field] = [] elastalert_logger.info("Found no values for %s" % (field))
def get_all_terms(self, args): """ Performs a terms aggregation for each field to get every existing term. """ self.es = Elasticsearch(host=self.rules['es_host'], port=self.rules['es_port']) window_size = datetime.timedelta(**self.rules.get('terms_window_size', {'days': 30})) field_name = {"field": "", "size": 2147483647} # Integer.MAX_VALUE query_template = {"aggs": {"values": {"terms": field_name}}} if args and args.start: end = ts_to_dt(args.start) else: end = ts_now() start = end - window_size if self.rules.get('use_strftime_index'): index = format_index(self.rules['index'], start, end) else: index = self.rules['index'] time_filter = {self.rules['timestamp_field']: {'lte': dt_to_ts(end), 'gte': dt_to_ts(start)}} query_template['filter'] = {'bool': {'must': [{'range': time_filter}]}} query = {'aggs': {'filtered': query_template}} for field in self.fields: field_name['field'] = field res = self.es.search(body=query, index=index, ignore_unavailable=True, timeout=50) if 'aggregations' in res: buckets = res['aggregations']['filtered']['values']['buckets'] keys = [bucket['key'] for bucket in buckets] self.seen_values[field] = keys elastalert_logger.info('Found %s unique values for %s' % (len(keys), field)) else: self.seen_values[field] = [] elastalert_logger.info('Found no values for %s' % (field))
def get_starttime(self, rule): """ Query ES for the last time we ran this rule. :param rule: The rule configuration. :return: A timestamp or None. """ query = {'filter': {'term': {'rule_name': '%s' % (rule['name'])}}, 'sort': {'@timestamp': {'order': 'desc'}}} try: if self.writeback_es: res = self.writeback_es.search(index=self.writeback_index, doc_type='elastalert_status', size=1, body=query, _source_include=['endtime', 'rule_name']) if res['hits']['hits']: endtime = ts_to_dt(res['hits']['hits'][0]['_source']['endtime']) if ts_now() - endtime < self.old_query_limit: return endtime else: logging.info("Found expired previous run for %s at %s" % (rule['name'], endtime)) return None except (ElasticsearchException, KeyError) as e: self.handle_error('Error querying for last run: %s' % (e), {'rule': rule['name']}) self.writeback_es = None return None
def find_recent_pending_alerts(self, time_limit): """ Queries writeback_es to find alerts that did not send and are newer than time_limit """ query = {'query': {'query_string': {'query': 'alert_sent:false'}}, 'filter': {'range': {'alert_time': {'from': dt_to_ts(ts_now() - time_limit), 'to': dt_to_ts(ts_now())}}}} if self.writeback_es: try: res = self.writeback_es.search(index=self.writeback_index, doc_type='elastalert', body=query, size=1000) if res['hits']['hits']: return res['hits']['hits'] except: pass return []
def set_realert(self, rule_name, timestamp, exponent): """ Write a silence to elasticsearch for rule_name until timestamp. """ body = {'exponent': exponent, 'rule_name': rule_name, '@timestamp': ts_now(), 'until': timestamp} self.silence_cache[rule_name] = (timestamp, exponent) return self.writeback('silence', body)
def get_all_terms(self, args): """ Performs a terms aggregation for each field to get every existing term. """ self.es = Elasticsearch(host=self.rules['es_host'], port=self.rules['es_port'], timeout=self.rules.get('es_conn_timeout', 50)) window_size = datetime.timedelta(**self.rules.get('terms_window_size', {'days': 30})) field_name = {"field": "", "size": 2147483647} # Integer.MAX_VALUE query_template = {"aggs": {"values": {"terms": field_name}}} if args and args.start: end = ts_to_dt(args.start) else: end = ts_now() start = end - window_size if self.rules.get('use_strftime_index'): index = format_index(self.rules['index'], start, end) else: index = self.rules['index'] time_filter = {self.rules['timestamp_field']: {'lte': dt_to_ts(end), 'gte': dt_to_ts(start)}} query_template['filter'] = {'bool': {'must': [{'range': time_filter}]}} query = {'aggs': {'filtered': query_template}} for field in self.fields: # For composite keys, we will need to perform sub-aggregations if type(field) == list: level = query_template['aggs'] # Iterate on each part of the composite key and add a sub aggs clause to the elastic search query for i, sub_field in enumerate(field): level['values']['terms']['field'] = sub_field if i < len(field) - 1: # If we have more fields after the current one, then set up the next nested structure level['values']['aggs'] = {'values': {'terms': copy.deepcopy(field_name)}} level = level['values']['aggs'] else: # For non-composite keys, only a single agg is needed field_name['field'] = field res = self.es.search(body=query, index=index, ignore_unavailable=True, timeout='50s') if 'aggregations' in res: buckets = res['aggregations']['filtered']['values']['buckets'] if type(field) == list: # For composite keys, make the lookup based on all fields # Make it a tuple since it can be hashed and used in dictionary lookups self.seen_values[tuple(field)] = [] for bucket in buckets: # We need to walk down the hierarchy and obtain the value at each level self.seen_values[tuple(field)] += self.flatten_aggregation_hierarchy(bucket) # If we don't have any results, it could either be because of the absence of any baseline data # OR it may be because the composite key contained a non-primitive type. Either way, give the # end-users a heads up to help them debug what might be going on. if not self.seen_values[tuple(field)]: elastalert_logger.warning(( 'No results were found from all sub-aggregations. This can either indicate that there is ' 'no baseline data OR that a non-primitive field was used in a composite key.' )) else: keys = [bucket['key'] for bucket in buckets] self.seen_values[field] = keys elastalert_logger.info('Found %s unique values for %s' % (len(keys), field)) else: self.seen_values[field] = [] elastalert_logger.info('Found no values for %s' % (field))
def remove_old_events(self, rule): # Anything older than the buffer time we can forget now = ts_now() remove = [] buffer_time = rule.get('buffer_time', self.buffer_time) for _id, timestamp in rule['processed_hits'].iteritems(): if now - timestamp > buffer_time: remove.append(_id) map(rule['processed_hits'].pop, remove)
def set_realert(self, rule_name, timestamp): """ Write a silence to elasticsearch for rule_name until timestamp. """ body = { 'rule_name': rule_name, '@timestamp': ts_now(), 'until': timestamp } self.silence_cache[rule_name] = timestamp return self.writeback('silence', body)
def run_all_rules(self): """ Run each rule one time """ # If writeback_es errored, it's disabled until the next query cycle if not self.writeback_es: self.writeback_es = self.new_elasticsearch(self.es_conn_config) self.send_pending_alerts() next_run = datetime.datetime.utcnow() + self.run_every for rule in self.rules: # Set endtime based on the rule's delay delay = rule.get('query_delay') if hasattr(self.args, 'end') and self.args.end: endtime = ts_to_dt(self.args.end) elif delay: endtime = ts_now() - delay else: endtime = ts_now() try: num_matches = self.run_rule(rule, endtime, self.starttime) except EAException as e: self.handle_error("Error running rule %s: %s" % (rule['name'], e), {'rule': rule['name']}) else: old_starttime = pretty_ts(rule.get('original_starttime'), rule.get('use_local_time')) logging.info("Ran %s from %s to %s: %s query hits, %s matches," " %s alerts sent" % (rule['name'], old_starttime, pretty_ts(endtime, rule.get('use_local_time')), self.num_hits, num_matches, self.alerts_sent)) self.alerts_sent = 0 self.remove_old_events(rule) if next_run < datetime.datetime.utcnow(): # We were processing for longer than our refresh interval # This can happen if --start was specified with a large time period # or if we are running too slow to process events in real time. logging.warning("Querying from %s to %s took longer than %s!" % (old_starttime, endtime, self.run_every)) # Only force starttime once self.starttime = None if not self.args.pin_rules: self.load_rule_changes()
def is_silenced(self, rule_name): """ Checks if rule_name is currently silenced. Returns false on exception. """ if rule_name in self.silence_cache: if ts_now() < ts_to_dt(self.silence_cache[rule_name]): return True else: self.silence_cache.pop(rule_name) return False query = { 'filter': { 'term': { 'rule_name': rule_name } }, 'sort': { 'until': { 'order': 'desc' } } } if self.writeback_es: try: res = self.writeback_es.search(index=self.writeback_index, doc_type='silence', size=1, body=query, _source_include=['until']) except ElasticsearchException as e: self.handle_error( "Error while querying for alert silence status: %s" % (e), {'rule': rule_name}) return False if res['hits']['hits']: until_ts = res['hits']['hits'][0]['_source']['until'] if ts_now() < ts_to_dt(until_ts): self.silence_cache[rule_name] = until_ts return True return False
def run_rule(self, rule): """ Run a rule including querying and alerting on results. :param rule: The rule configuration. :return: The number of matches that the rule produced. """ elastalert_logger.info('Start to run rule: %s', rule.get('name')) # Run the rule. If querying over a large time period, split it up into segments self.num_hits = 0 rule_request = rule.get("input").get("search").get("request") if rule_request.get("elastic_host", None) is not None and rule_request.get( "elastic_port", None) is not None: self.current_es = Elasticsearch( host=rule.get("input").get("search").get("request").get( "elastic_host"), port=rule.get("input").get("search").get("request").get( "elastic_port")) else: self.current_es = self.new_elasticsearch(self.global_config) self.run_query(rule) # Process any new matches num_matches = len(rule['type'].matches) while rule['type'].matches: match = rule['type'].matches.pop(0) #if self.is_silenced(rule['name'] + key) or self.is_silenced(rule['name']): # elastalert_logger.info('Ignoring match for silenced rule %s%s' % (rule['name'], key)) # continue if rule.get('realert'): next_alert, exponent = self.next_alert_time( rule, rule['name'] + key, ts_now()) self.set_realert(rule['name'] + key, next_alert, exponent) # If no aggregation, alert immediately #if not rule['aggregation']: # self.alert([match], rule) # continue self.alert([match], rule) # Add it as an aggregated match #self.add_aggregated_alert(match, rule) # Mark this endtime for next run's start #rule['previous_endtime'] = endtime #time_taken = time.time() - run_start return num_matches
def find_recent_pending_alerts(self, time_limit): """ Queries writeback_es to find alerts that did not send and are newer than time_limit """ # XXX only fetches 1000 results. If limit is reached, next loop will catch them # unless there is constantly more than 1000 alerts to send. # Fetch recent, unsent alerts that aren't part of an aggregate, earlier alerts first. query = { 'query': { 'query_string': { 'query': '!_exists_:aggregate_id AND alert_sent:false' } }, 'filter': { 'range': { 'alert_time': { 'from': dt_to_ts(ts_now() - time_limit), 'to': dt_to_ts(ts_now()) } } }, 'sort': { 'alert_time': { 'order': 'asc' } } } if self.writeback_es: try: res = self.writeback_es.search(index=self.writeback_index, doc_type='elastalert', body=query, size=1000) if res['hits']['hits']: return res['hits']['hits'] except: # TODO: Give this a more relevant exception, try:except: is evil. pass return []
def find_recent_pending_alerts(self, time_limit): """ Queries writeback_es to find alerts that did not send and are newer than time_limit """ # XXX only fetches 1000 results. If limit is reached, next loop will catch them # unless there is constantly more than 1000 alerts to send. # Fetch recent, unsent alerts that aren't part of an aggregate, earlier alerts first. query = {'query': {'query_string': {'query': '!_exists_:aggregate_id AND alert_sent:false'}}, 'filter': {'range': {'alert_time': {'from': dt_to_ts(ts_now() - time_limit), 'to': dt_to_ts(ts_now())}}}, 'sort': {'alert_time': {'order': 'asc'}}} if self.writeback_es: try: res = self.writeback_es.search(index=self.writeback_index, doc_type='elastalert', body=query, size=1000) if res['hits']['hits']: return res['hits']['hits'] except: # TODO: Give this a more relevant exception, try:except: is evil. pass return []
def find_pending_aggregate_alert(self, rule): query = { 'filter': { 'bool': { 'must': [{ 'term': { 'rule_name': rule['name'] } }, { 'range': { 'alert_time': { 'gt': ts_now() } } }, { 'not': { 'exists': { 'field': 'aggregate_id' } } }, { 'term': { 'alert_sent': 'false' } }] } }, 'sort': { 'alert_time': { 'order': 'desc' } } } if not self.writeback_es: self.writeback_es = self.new_elasticsearch(self.es_conn_config) try: res = self.writeback_es.search(index=self.writeback_index, doc_type='elastalert', body=query, size=1) if len(res['hits']['hits']) == 0: return None except (KeyError, ElasticsearchException) as e: self.handle_error( "Error searching for pending aggregated matches: %s" % (e), {'rule_name': rule['name']}) return None return res['hits']['hits'][0]
def add_aggregated_alert(self, match, rule): """ Save a match as a pending aggregate alert to elasticsearch. """ if (not rule['current_aggregate_id'] or ('aggregate_alert_time' in rule and rule['aggregate_alert_time'] < ts_to_dt(match[rule['timestamp_field']]))): # Elastalert may have restarted while pending alerts exist pending_alert = self.find_pending_aggregate_alert(rule) if pending_alert: alert_time = rule['aggregate_alert_time'] = ts_to_dt(pending_alert['_source']['alert_time']) agg_id = rule['current_aggregate_id'] = pending_alert['_id'] elastalert_logger.info('Adding alert for %s to aggregation(id: %s), next alert at %s' % (rule['name'], agg_id, alert_time)) else: # First match, set alert_time match_time = ts_to_dt(match[rule['timestamp_field']]) alert_time = '' if isinstance(rule['aggregation'], dict) and rule['aggregation'].get('schedule'): croniter._datetime_to_timestamp = cronite_datetime_to_timestamp # For Python 2.6 compatibility try: iter = croniter(rule['aggregation']['schedule'], ts_now()) alert_time = unix_to_dt(iter.get_next()) except Exception as e: self.handle_error("Error parsing aggregate send time Cron format %s" % (e), rule['aggregation']['schedule']) else: alert_time = match_time + rule['aggregation'] rule['aggregate_alert_time'] = alert_time agg_id = None elastalert_logger.info('New aggregation for %s. next alert at %s.' % (rule['name'], alert_time)) else: # Already pending aggregation, use existing alert_time alert_time = rule['aggregate_alert_time'] agg_id = rule['current_aggregate_id'] elastalert_logger.info('Adding alert for %s to aggregation(id: %s), next alert at %s' % (rule['name'], agg_id, alert_time)) alert_body = self.get_alert_body(match, rule, False, alert_time) if agg_id: alert_body['aggregate_id'] = agg_id res = self.writeback('elastalert', alert_body) # If new aggregation, save _id if res and not agg_id: rule['current_aggregate_id'] = res['_id'] # Couldn't write the match to ES, save it in memory for now if not res: rule['agg_matches'].append(match) return res
def run_query(self, rule, start=None, end=None): """ Query for the rule and pass all of the results to the RuleType instance. :param rule: The rule configuration. :param start: The earliest time to query. :param end: The latest time to query. Returns True on success and False on failure. """ if start is None: start = self.get_index_start(rule['index']) if end is None: end = ts_now() # Reset hit counter and query rule_inst = rule['type'] prev_num_hits = self.num_hits max_size = rule.get('max_query_size', self.max_query_size) index = self.get_index(rule, start, end) if rule.get('use_count_query'): data = self.get_hits_count(rule, start, end, index) elif rule.get('use_terms_query'): data = self.get_hits_terms(rule, start, end, index, rule['query_key']) else: data = self.get_hits(rule, start, end, index) if data: data = self.remove_duplicate_events(data, rule) # There was an exception while querying if data is None: return False elif data: if rule.get('use_count_query'): rule_inst.add_count_data(data) elif rule.get('use_terms_query'): rule_inst.add_terms_data(data) else: rule_inst.add_data(data) # Warn if we hit max_query_size if self.num_hits - prev_num_hits == max_size and not rule.get( 'use_count_query'): logging.warning("Hit max_query_size (%s) while querying for %s" % (max_size, rule['name'])) return True
def run_query(self, rule, start=None, end=None): """ Query for the rule and pass all of the results to the RuleType instance. :param rule: The rule configuration. :param start: The earliest time to query. :param end: The latest time to query. Returns True on success and False on failure. """ if start is None: start = self.get_index_start(rule['index']) if end is None: end = ts_now() # Reset hit counter and query rule_inst = rule['type'] prev_num_hits = self.num_hits max_size = rule.get('max_query_size', self.max_query_size) index = self.get_index(rule, start, end) if rule.get('use_count_query'): data = self.get_hits_count(rule, start, end, index) elif rule.get('use_terms_query'): data = self.get_hits_terms(rule, start, end, index, rule['query_key']) else: data = self.get_hits(rule, start, end, index) if data: data = self.remove_duplicate_events(data, rule) # There was an exception while querying if data is None: return False elif data: if rule.get('use_count_query'): rule_inst.add_count_data(data) elif rule.get('use_terms_query'): rule_inst.add_terms_data(data) else: rule_inst.add_data(data) # Warn if we hit max_query_size if self.num_hits - prev_num_hits == max_size and not rule.get('use_count_query'): logging.warning("Hit max_query_size (%s) while querying for %s" % (max_size, rule['name'])) return True
def get_all_terms(self, args): """ Performs a terms aggregation for each field to get every existing term. """ self.es = Elasticsearch(host=self.rules['es_host'], port=self.rules['es_port'], use_ssl=self.rule['use_ssl'], timeout=self.rules.get('es_conn_timeout', 50)) window_size = datetime.timedelta( **self.rules.get('terms_window_size', {'days': 30})) field_name = {"field": "", "size": 2147483647} # Integer.MAX_VALUE query_template = {"aggs": {"values": {"terms": field_name}}} if args and args.start: end = ts_to_dt(args.start) else: end = ts_now() start = end - window_size if self.rules.get('use_strftime_index'): index = format_index(self.rules['index'], start, end) else: index = self.rules['index'] time_filter = { self.rules['timestamp_field']: { 'lte': dt_to_ts(end), 'gte': dt_to_ts(start) } } query_template['filter'] = {'bool': {'must': [{'range': time_filter}]}} query = {'aggs': {'filtered': query_template}} for field in self.fields: field_name['field'] = field res = self.es.search(body=query, index=index, ignore_unavailable=True, timeout='50s') if 'aggregations' in res: buckets = res['aggregations']['filtered']['values']['buckets'] keys = [bucket['key'] for bucket in buckets] self.seen_values[field] = keys elastalert_logger.info('Found %s unique values for %s' % (len(keys), field)) else: self.seen_values[field] = [] elastalert_logger.info('Found no values for %s' % (field))
def writeback(self, doc_type, body): # Convert any datetime objects to timestamps for key in body.keys(): if isinstance(body[key], datetime.datetime): body[key] = dt_to_ts(body[key]) if self.debug: elastalert_logger.info("Skipping writing to ES: %s" % (body)) return None if '@timestamp' not in body: body['@timestamp'] = dt_to_ts(ts_now()) if self.writeback_es: try: res = self.writeback_es.create(index=self.writeback_index, doc_type=doc_type, body=body) return res except ElasticsearchException as e: logging.exception("Error writing alert info to elasticsearch: %s" % (e)) self.writeback_es = None
def get_starttime(self, rule): """ Query ES for the last time we ran this rule. :param rule: The rule configuration. :return: A timestamp or None. """ query = { 'filter': { 'term': { 'rule_name': '%s' % (rule['name']) } }, 'sort': { '@timestamp': { 'order': 'desc' } } } try: if self.writeback_es: res = self.writeback_es.search( index=self.writeback_index, doc_type='elastalert_status', size=1, body=query, _source_include=['endtime', 'rule_name']) if res['hits']['hits']: endtime = ts_to_dt( res['hits']['hits'][0]['_source']['endtime']) if ts_now() - endtime < self.old_query_limit: return endtime else: logging.info( "Found expired previous run for %s at %s" % (rule['name'], endtime)) return None except (ElasticsearchException, KeyError) as e: self.handle_error('Error querying for last run: %s' % (e), {'rule': rule['name']}) self.writeback_es = None return None
def get_all_terms(self): """ Performs a terms aggregation for each field to get every existing term. """ self.es = Elasticsearch(host=self.rules['es_host'], port=self.rules['es_port']) window_size = datetime.timedelta(**self.rules.get('terms_window_size', {'days': 30})) field_name = {"field": "", "size": 2147483647} # Integer.MAX_VALUE query_template = {"aggs": {"values": {"terms": field_name}}} if self.rules.get('use_strftime_index'): end = ts_now() start = end - window_size index = format_index(self.rules['index'], start, end) else: index = self.rules['index'] for field in self.fields: field_name['field'] = field res = self.es.search(body=query_template, index=index, ignore_unavailable=True, timeout=50) buckets = res['aggregations']['values']['buckets'] keys = [bucket['key'] for bucket in buckets] self.seen_values[field] = keys
def find_pending_aggregate_alert(self, rule): query = {'filter': {'bool': {'must': [{'term': {'rule_name': rule['name']}}, {'range': {'alert_time': {'gt': ts_now()}}}, {'not': {'exists': {'field': 'aggregate_id'}}}, {'term': {'alert_sent': 'false'}}]}}, 'sort': {'alert_time': {'order': 'desc'}}} if not self.writeback_es: self.writeback_es = self.new_elasticsearch(self.es_conn_config) try: res = self.writeback_es.search(index=self.writeback_index, doc_type='elastalert', body=query, size=1) if len(res['hits']['hits']) == 0: return None except (KeyError, ElasticsearchException) as e: self.handle_error("Error searching for pending aggregated matches: %s" % (e), {'rule_name': rule['name']}) return None return res['hits']['hits'][0]
def writeback(self, doc_type, body): # Convert any datetime objects to timestamps for key in body.keys(): if isinstance(body[key], datetime.datetime): body[key] = dt_to_ts(body[key]) if self.debug: elastalert_logger.info("Skipping writing to ES: %s" % (body)) return None if '@timestamp' not in body: body['@timestamp'] = dt_to_ts(ts_now()) if self.writeback_es: try: res = self.writeback_es.create(index=self.writeback_index, doc_type=doc_type, body=body) return res except ElasticsearchException as e: logging.exception( "Error writing alert info to elasticsearch: %s" % (e)) self.writeback_es = None
def add_data(self, data): if 'query_key' in self.rules: qk = self.rules['query_key'] else: qk = None count = 1 if not data: # insert dummy event data = [{self.ts_field: ts_now()}] count = 0 for event in data: if qk: key = hashable(lookup_es_key(event, qk)) else: # If no query_key, we use the key 'all' for all events key = 'all' # Store the timestamps of recent occurrences, per key self.occurrences.setdefault(key, EventWindow(self.rules['timeframe'], getTimestamp=self.get_ts)).append((event, count)) self.check_for_match(key)
def send_alert(self, matches, rule, alert_time=None): """ Send out an alert. :param matches: A list of matches. :param rule: A rule configuration. """ if alert_time is None: alert_time = ts_now() # Compute top count keys if rule.get('top_count_keys'): for match in matches: if 'query_key' in rule and rule['query_key'] in match: qk = match[rule['query_key']] else: qk = None start = ts_to_dt(match[rule['timestamp_field']]) - rule.get( 'timeframe', datetime.timedelta(minutes=10)) end = ts_to_dt( match[rule['timestamp_field']]) + datetime.timedelta( minutes=10) keys = rule.get('top_count_keys') counts = self.get_top_counts(rule, start, end, keys, qk=qk) match.update(counts) # Generate a kibana3 dashboard for the first match if rule.get('generate_kibana_link') or rule.get( 'use_kibana_dashboard'): try: if rule.get('generate_kibana_link'): kb_link = self.generate_kibana_db(rule, matches[0]) else: kb_link = self.use_kibana_link(rule, matches[0]) except EAException as e: self.handle_error( "Could not generate kibana dash for %s match: %s" % (rule['name'], e)) else: if kb_link: matches[0]['kibana_link'] = kb_link if rule.get('use_kibana4_dashboard'): kb_link = self.generate_kibana4_db(rule, matches[0]) if kb_link: matches[0]['kibana_link'] = kb_link #if not rule.get('run_enhancements_first'): # for enhancement in rule.get('match_enhancements'): # valid_matches = [] # for match in matches: # try: # enhancement.process(match) # valid_matches.append(match) # except DropMatchException as e: # pass # except EAException as e: # self.handle_error("Error running match enhancement: %s" % (e), {'rule': rule['name']}) # matches = valid_matches # if not matches: # return None # Don't send real alerts in debug mode if self.debug: alerter = DebugAlerter(rule) alerter.alert(matches) return None # Run the alerts alert_sent = False alert_exception = None # Alert.pipeline is a single object shared between every alerter # This allows alerters to pass objects and data between themselves alert_pipeline = {"alert_time": alert_time} for alert in rule.get("actions"): alert.pipeline = alert_pipeline try: alert.alert(matches) except EAException as e: self.handle_error( 'Error while running alert %s: %s' % (alert.get_info()['type'], e), {'rule': rule['name']}) alert_exception = str(e) else: self.alerts_sent += 1 alert_sent = True
def run_rule(self, rule, endtime, starttime=None): """ Run a rule for a given time period, including querying and alerting on results. :param rule: The rule configuration. :param starttime: The earliest timestamp to query. :param endtime: The latest timestamp to query. :return: The number of matches that the rule produced. """ run_start = time.time() self.current_es = Elasticsearch(host=rule['es_host'], port=rule['es_port']) self.current_es_addr = (rule['es_host'], rule['es_port']) # If there are pending aggregate matches, try processing them for x in range(len(rule['agg_matches'])): match = rule['agg_matches'].pop() self.add_aggregated_alert(match, rule) # Start from provided time if it's given if starttime: rule['starttime'] = starttime else: self.set_starttime(rule, endtime) rule['original_starttime'] = rule['starttime'] # Don't run if starttime was set to the future if ts_now() <= rule['starttime']: logging.warning( "Attempted to use query start time in the future (%s), sleeping instead" % (starttime)) return 0 # Run the rule # If querying over a large time period, split it up into chunks self.num_hits = 0 tmp_endtime = endtime buffer_time = rule.get('buffer_time', self.buffer_time) while endtime - rule['starttime'] > buffer_time: tmp_endtime = rule['starttime'] + self.run_every if not self.run_query(rule, rule['starttime'], tmp_endtime): return 0 rule['starttime'] = tmp_endtime if not self.run_query(rule, rule['starttime'], endtime): return 0 rule['type'].garbage_collect(endtime) # Process any new matches num_matches = len(rule['type'].matches) while rule['type'].matches: match = rule['type'].matches.pop(0) # If realert is set, silence the rule for that duration # Silence is cached by query_key, if it exists # Default realert time is 0 seconds # concatenate query_key (or none) with rule_name to form silence_cache key if 'query_key' in rule: try: key = '.' + match[rule['query_key']] except KeyError: # Some matches may not have a query key key = '' else: key = '' if self.is_silenced(rule['name'] + key) or self.is_silenced( rule['name']): logging.info('Ignoring match for silenced rule %s%s' % (rule['name'], key)) continue if rule['realert']: self.set_realert( rule['name'] + key, dt_to_ts(datetime.datetime.utcnow() + rule['realert'])) # If no aggregation, alert immediately if not rule['aggregation']: self.alert([match], rule) continue # Add it as an aggregated match self.add_aggregated_alert(match, rule) time_taken = time.time() - run_start # Write to ES that we've run this rule against this time period body = { 'rule_name': rule['name'], 'endtime': endtime, 'starttime': rule['starttime'], 'matches': num_matches, 'hits': self.num_hits, '@timestamp': ts_now(), 'time_taken': time_taken } self.writeback('elastalert_status', body) return num_matches
def alert(self, matches, rule, alert_time=None): """ Send out an alert. :param matches: A list of matches. :param rule: A rule configuration. """ if alert_time is None: alert_time = ts_now() # Compute top count keys if rule.get('top_count_keys'): for match in matches: if 'query_key' in rule and rule['query_key'] in match: qk = match[rule['query_key']] else: qk = None start = ts_to_dt(match[rule['timestamp_field']]) - rule.get('timeframe', datetime.timedelta(minutes=10)) end = ts_to_dt(match[rule['timestamp_field']]) + datetime.timedelta(minutes=10) keys = rule.get('top_count_keys') counts = self.get_top_counts(rule, start, end, keys, rule.get('top_count_number'), qk) match.update(counts) # Generate a kibana dashboard for the first match if rule.get('generate_kibana_link') or rule.get('use_kibana_dashboard'): try: if rule.get('generate_kibana_link'): kb_link = self.generate_kibana_db(rule, matches[0]) else: kb_link = self.use_kibana_link(rule, matches[0]) except EAException as e: self.handle_error("Could not generate kibana dash for %s match: %s" % (rule['name'], e)) else: if kb_link: matches[0]['kibana_link'] = kb_link for enhancement in rule['match_enhancements']: for match in matches: try: enhancement.process(match) except EAException as e: self.handle_error("Error running match enhancement: %s" % (e), {'rule': rule['name']}) # Don't send real alerts in debug mode if self.debug: alerter = DebugAlerter(rule) alerter.alert(matches) return # Run the alerts alert_sent = False alert_exception = None for alert in rule['alert']: try: alert.alert(matches) except EAException as e: self.handle_error('Error while running alert %s: %s' % (alert.get_info()['type'], e), {'rule': rule['name']}) alert_exception = str(e) else: self.alerts_sent += 1 alert_sent = True # Write the alert(s) to ES agg_id = None for match in matches: alert_body = self.get_alert_body(match, rule, alert_sent, alert_time, alert_exception) # Set all matches to aggregate together if agg_id: alert_body['aggregate_id'] = agg_id res = self.writeback('elastalert', alert_body) if res and not agg_id: agg_id = res['_id']
def start(self): """ Periodically go through each rule and run it """ starttime = self.args.start if starttime: try: starttime = ts_to_dt(starttime) except (TypeError, ValueError): self.handle_error( "%s is not a valid ISO 8601 timestamp (YYYY-MM-DDTHH:MM:SS+XX:00)" % (starttime)) exit(1) while True: # If writeback_es errored, it's disabled until the next query cycle if not self.writeback_es: self.writeback_es = Elasticsearch(host=self.es_host, port=self.es_port) self.send_pending_alerts() next_run = datetime.datetime.utcnow() + self.run_every for rule in self.rules: # Set endtime based on the rule's delay delay = rule.get('query_delay') if hasattr(self.args, 'end') and self.args.end: endtime = ts_to_dt(self.args.end) elif delay: endtime = ts_now() - delay else: endtime = ts_now() try: num_matches = self.run_rule(rule, endtime, starttime) except EAException as e: self.handle_error( "Error running rule %s: %s" % (rule['name'], e), {'rule': rule['name']}) else: old_starttime = pretty_ts(rule.get('original_starttime'), rule.get('use_local_time')) logging.info( "Ran %s from %s to %s: %s query hits, %s matches," " %s alerts sent" % (rule['name'], old_starttime, pretty_ts(endtime, rule.get('use_local_time')), self.num_hits, num_matches, self.alerts_sent)) self.alerts_sent = 0 self.remove_old_events(rule) if next_run < datetime.datetime.utcnow(): # We were processing for longer than our refresh interval # This can happen if --start was specified with a large time period # or if we are running too slow to process events in real time. logging.warning("Querying from %s to %s took longer than %s!" % (old_starttime, endtime, self.run_every)) continue # Only force starttime once starttime = None if not self.args.pin_rules: self.load_rule_changes() # Wait before querying again sleep_for = (next_run - datetime.datetime.utcnow()).seconds logging.info("Sleeping for %s seconds" % (sleep_for)) time.sleep(sleep_for)
def run_rule(self, rule, endtime, starttime=None): """ Run a rule for a given time period, including querying and alerting on results. :param rule: The rule configuration. :param starttime: The earliest timestamp to query. :param endtime: The latest timestamp to query. :return: The number of matches that the rule produced. """ run_start = time.time() rule_es_conn_config = self.build_es_conn_config(rule) self.current_es = self.new_elasticsearch(rule_es_conn_config) self.current_es_addr = (rule['es_host'], rule['es_port']) # If there are pending aggregate matches, try processing them for x in range(len(rule['agg_matches'])): match = rule['agg_matches'].pop() self.add_aggregated_alert(match, rule) # Start from provided time if it's given if starttime: rule['starttime'] = starttime else: self.set_starttime(rule, endtime) rule['original_starttime'] = rule['starttime'] # Don't run if starttime was set to the future if ts_now() <= rule['starttime']: logging.warning("Attempted to use query start time in the future (%s), sleeping instead" % (starttime)) return 0 # Run the rule # If querying over a large time period, split it up into chunks self.num_hits = 0 tmp_endtime = endtime buffer_time = rule.get('buffer_time', self.buffer_time) while endtime - rule['starttime'] > buffer_time: tmp_endtime = rule['starttime'] + self.run_every if not self.run_query(rule, rule['starttime'], tmp_endtime): return 0 rule['starttime'] = tmp_endtime if not self.run_query(rule, rule['starttime'], endtime): return 0 rule['type'].garbage_collect(endtime) # Process any new matches num_matches = len(rule['type'].matches) while rule['type'].matches: match = rule['type'].matches.pop(0) # If realert is set, silence the rule for that duration # Silence is cached by query_key, if it exists # Default realert time is 0 seconds # concatenate query_key (or none) with rule_name to form silence_cache key if 'query_key' in rule: try: key = '.' + match[rule['query_key']] except KeyError: # Some matches may not have a query key key = '' else: key = '' if self.is_silenced(rule['name'] + key) or self.is_silenced(rule['name']): logging.info('Ignoring match for silenced rule %s%s' % (rule['name'], key)) continue if rule['realert']: next_alert, exponent = self.next_alert_time(rule, rule['name'] + key, ts_now()) self.set_realert(rule['name'] + key, next_alert, exponent) # If no aggregation, alert immediately if not rule['aggregation']: self.alert([match], rule) continue # Add it as an aggregated match self.add_aggregated_alert(match, rule) time_taken = time.time() - run_start # Write to ES that we've run this rule against this time period body = {'rule_name': rule['name'], 'endtime': endtime, 'starttime': rule['starttime'], 'matches': num_matches, 'hits': self.num_hits, '@timestamp': ts_now(), 'time_taken': time_taken} self.writeback('elastalert_status', body) return num_matches
def alert(self, matches, rule, alert_time=None): """ Send out an alert. :param matches: A list of matches. :param rule: A rule configuration. """ if alert_time is None: alert_time = ts_now() # Compute top count keys if rule.get('top_count_keys'): for match in matches: if 'query_key' in rule and rule['query_key'] in match: qk = match[rule['query_key']] else: qk = None start = ts_to_dt(match[rule['timestamp_field']]) - rule.get( 'timeframe', datetime.timedelta(minutes=10)) end = ts_to_dt( match[rule['timestamp_field']]) + datetime.timedelta( minutes=10) keys = rule.get('top_count_keys') counts = self.get_top_counts(rule, start, end, keys, rule.get('top_count_number'), qk) match.update(counts) # Generate a kibana dashboard for the first match if rule.get('generate_kibana_link') or rule.get( 'use_kibana_dashboard'): try: if rule.get('generate_kibana_link'): kb_link = self.generate_kibana_db(rule, matches[0]) else: kb_link = self.use_kibana_link(rule, matches[0]) except EAException as e: self.handle_error( "Could not generate kibana dash for %s match: %s" % (rule['name'], e)) else: if kb_link: matches[0]['kibana_link'] = kb_link for enhancement in rule['match_enhancements']: for match in matches: try: enhancement.process(match) except EAException as e: self.handle_error( "Error running match enhancement: %s" % (e), {'rule': rule['name']}) # Don't send real alerts in debug mode if self.debug: alerter = DebugAlerter(rule) alerter.alert(matches) return # Run the alerts alert_sent = False alert_exception = None for alert in rule['alert']: try: alert.alert(matches) except EAException as e: self.handle_error( 'Error while running alert %s: %s' % (alert.get_info()['type'], e), {'rule': rule['name']}) alert_exception = str(e) else: self.alerts_sent += 1 alert_sent = True # Write the alert(s) to ES agg_id = None for match in matches: alert_body = self.get_alert_body(match, rule, alert_sent, alert_time, alert_exception) # Set all matches to aggregate together if agg_id: alert_body['aggregate_id'] = agg_id res = self.writeback('elastalert', alert_body) if res and not agg_id: agg_id = res['_id']
def send_alert(self, matches, rule, alert_time=None): """ Send out an alert. :param matches: A list of matches. :param rule: A rule configuration. """ if alert_time is None: alert_time = ts_now() # Compute top count keys if rule.get('top_count_keys'): for match in matches: if 'query_key' in rule and rule['query_key'] in match: qk = match[rule['query_key']] else: qk = None start = ts_to_dt(match[rule['timestamp_field']]) - rule.get('timeframe', datetime.timedelta(minutes=10)) end = ts_to_dt(match[rule['timestamp_field']]) + datetime.timedelta(minutes=10) keys = rule.get('top_count_keys') counts = self.get_top_counts(rule, start, end, keys, qk=qk) match.update(counts) # Generate a kibana3 dashboard for the first match if rule.get('generate_kibana_link') or rule.get('use_kibana_dashboard'): try: if rule.get('generate_kibana_link'): kb_link = self.generate_kibana_db(rule, matches[0]) else: kb_link = self.use_kibana_link(rule, matches[0]) except EAException as e: self.handle_error("Could not generate kibana dash for %s match: %s" % (rule['name'], e)) else: if kb_link: matches[0]['kibana_link'] = kb_link if rule.get('use_kibana4_dashboard'): kb_link = self.generate_kibana4_db(rule, matches[0]) if kb_link: matches[0]['kibana_link'] = kb_link #if not rule.get('run_enhancements_first'): # for enhancement in rule.get('match_enhancements'): # valid_matches = [] # for match in matches: # try: # enhancement.process(match) # valid_matches.append(match) # except DropMatchException as e: # pass # except EAException as e: # self.handle_error("Error running match enhancement: %s" % (e), {'rule': rule['name']}) # matches = valid_matches # if not matches: # return None # Don't send real alerts in debug mode if self.debug: alerter = DebugAlerter(rule) alerter.alert(matches) return None # Run the alerts alert_sent = False alert_exception = None # Alert.pipeline is a single object shared between every alerter # This allows alerters to pass objects and data between themselves alert_pipeline = {"alert_time": alert_time} for alert in rule.get("actions"): alert.pipeline = alert_pipeline try: alert.alert(matches) except EAException as e: self.handle_error('Error while running alert %s: %s' % (alert.get_info()['type'], e), {'rule': rule['name']}) alert_exception = str(e) else: self.alerts_sent += 1 alert_sent = True
def run_rule(self, rule): """ Run a rule including querying and alerting on results. :param rule: The rule configuration. :return: The number of matches that the rule produced. """ elastalert_logger.info('Start to run rule: %s', rule.get('name')) # Run the rule. If querying over a large time period, split it up into segments self.num_hits = 0 rule_request = rule.get("input").get("search").get("request") if rule_request.get("elastic_host", None) is not None and rule_request.get("elastic_port", None) is not None: self.current_es = Elasticsearch(host=rule.get("input").get("search").get("request").get("elastic_host"), port=rule.get("input").get("search").get("request").get("elastic_port")) else: self.current_es = self.new_elasticsearch(self.global_config) self.run_query(rule) # Process any new matches num_matches = len(rule['type'].matches) while rule['type'].matches: match = rule['type'].matches.pop(0) #if self.is_silenced(rule['name'] + key) or self.is_silenced(rule['name']): # elastalert_logger.info('Ignoring match for silenced rule %s%s' % (rule['name'], key)) # continue if rule.get('realert'): next_alert, exponent = self.next_alert_time(rule, rule['name'] + key, ts_now()) self.set_realert(rule['name'] + key, next_alert, exponent) # If no aggregation, alert immediately #if not rule['aggregation']: # self.alert([match], rule) # continue self.alert([match], rule) # Add it as an aggregated match #self.add_aggregated_alert(match, rule) # Mark this endtime for next run's start #rule['previous_endtime'] = endtime #time_taken = time.time() - run_start return num_matches