Beispiel #1
0
 def find_recent_pending_alerts(self, time_limit):
     """ Queries writeback_es to find alerts that did not send
     and are newer than time_limit """
     query = {
         'query': {
             'query_string': {
                 'query': 'alert_sent:false'
             }
         },
         'filter': {
             'range': {
                 'alert_time': {
                     'from': dt_to_ts(ts_now() - time_limit),
                     'to': dt_to_ts(ts_now())
                 }
             }
         }
     }
     if self.writeback_es:
         try:
             res = self.writeback_es.search(index=self.writeback_index,
                                            doc_type='elastalert',
                                            body=query,
                                            size=1000)
             if res['hits']['hits']:
                 return res['hits']['hits']
         except:
             pass
     return []
Beispiel #2
0
    def is_silenced(self, rule_name):
        """ Checks if rule_name is currently silenced. Returns false on exception. """
        if rule_name in self.silence_cache:
            if ts_now() < self.silence_cache[rule_name][0]:
                return True
            else:
                return False

        query = {'filter': {'term': {'rule_name': rule_name}},
                 'sort': {'until': {'order': 'desc'}}}

        if self.writeback_es:
            try:
                res = self.writeback_es.search(index=self.writeback_index, doc_type='silence',
                                               size=1, body=query, _source_include=['until', 'exponent'])
            except ElasticsearchException as e:
                self.handle_error("Error while querying for alert silence status: %s" % (e), {'rule': rule_name})

                return False

            if res['hits']['hits']:
                until_ts = res['hits']['hits'][0]['_source']['until']
                exponent = res['hits']['hits'][0]['_source'].get('exponent', 0)
                self.silence_cache[rule_name] = (ts_to_dt(until_ts), exponent)
                if ts_now() < ts_to_dt(until_ts):
                    return True
        return False
Beispiel #3
0
    def check_for_match(self, key):
        now = ts_now().time()
        start_time = self.rules.get('start_time')
        end_time = self.rules.get('end_time')
        if (start_time is not None and now < start_time) or (end_time is not None and now > end_time):
            return

        weekdays = self.rules.get('weekdays')
        if weekdays is not None and ts_now().weekday() not in weekdays:
            return

        most_recent_ts = self.get_ts(self.occurrences[key].data[-1])
        if self.first_event.get(key) is None:
            self.first_event[key] = most_recent_ts

        # Don't check for matches until timeframe has elapsed
        if most_recent_ts - self.first_event[key] < self.rules['timeframe']:
            return

        # Match if, after removing old events, we hit num_events
        count = self.occurrences[key].count()
        if count < self.rules['threshold']:
            event = self.occurrences[key].data[-1][0]
            event.update(key=key, count=count)
            self.add_match(event)

            # we after adding this match, let's remove this key so we don't realert on it
            self.occurrences.pop(key)
            del self.first_event[key]
Beispiel #4
0
    def start(self):
        """ Periodically go through each rule and run it """
        starttime = self.args.start
        if starttime:
            try:
                starttime = ts_to_dt(starttime)
            except (TypeError, ValueError):
                self.handle_error("%s is not a valid ISO 8601 timestamp (YYYY-MM-DDTHH:MM:SS+XX:00)" % (starttime))
                exit(1)
        while True:
            # If writeback_es errored, it's disabled until the next query cycle
            if not self.writeback_es:
                self.writeback_es = self.new_elasticsearch(self.es_conn_config)

            self.send_pending_alerts()

            next_run = datetime.datetime.utcnow() + self.run_every

            for rule in self.rules:
                # Set endtime based on the rule's delay
                delay = rule.get('query_delay')
                if hasattr(self.args, 'end') and self.args.end:
                    endtime = ts_to_dt(self.args.end)
                elif delay:
                    endtime = ts_now() - delay
                else:
                    endtime = ts_now()

                try:
                    num_matches = self.run_rule(rule, endtime, starttime)
                except EAException as e:
                    self.handle_error("Error running rule %s: %s" % (rule['name'], e), {'rule': rule['name']})
                else:
                    old_starttime = pretty_ts(rule.get('original_starttime'), rule.get('use_local_time'))
                    logging.info("Ran %s from %s to %s: %s query hits, %s matches,"
                                 " %s alerts sent" % (rule['name'], old_starttime, pretty_ts(endtime, rule.get('use_local_time')),
                                                      self.num_hits, num_matches, self.alerts_sent))
                    self.alerts_sent = 0

                self.remove_old_events(rule)

            if next_run < datetime.datetime.utcnow():
                # We were processing for longer than our refresh interval
                # This can happen if --start was specified with a large time period
                # or if we are running too slow to process events in real time.
                logging.warning("Querying from %s to %s took longer than %s!" % (old_starttime, endtime, self.run_every))
                continue

            # Only force starttime once
            starttime = None

            if not self.args.pin_rules:
                self.load_rule_changes()

            # Wait before querying again
            sleep_for = (next_run - datetime.datetime.utcnow()).seconds
            logging.info("Sleeping for %s seconds" % (sleep_for))
            time.sleep(sleep_for)
Beispiel #5
0
    def send_pending_alerts(self):
        pending_alerts = self.find_recent_pending_alerts(self.alert_time_limit)
        for alert in pending_alerts:
            _id = alert['_id']
            alert = alert['_source']
            try:
                rule_name = alert.pop('rule_name')
                alert_time = alert.pop('alert_time')
                match_body = alert.pop('match_body')
            except KeyError:
                # Malformed alert, drop it
                continue

            # Find original rule
            for rule in self.rules:
                if rule['name'] == rule_name:
                    break
            else:
                # Original rule is missing, keep alert for later if rule reappears
                continue

            # Set current_es for top_count_keys query
            rule_es_conn_config = self.build_es_conn_config(rule)
            self.current_es = self.new_elasticsearch(rule_es_conn_config)
            self.current_es_addr = (rule['es_host'], rule['es_port'])

            # Send the alert unless it's a future alert
            if ts_now() > ts_to_dt(alert_time):
                aggregated_matches = self.get_aggregated_matches(_id)
                if aggregated_matches:
                    matches = [match_body] + [
                        agg_match['match_body']
                        for agg_match in aggregated_matches
                    ]
                    self.alert(matches, rule, alert_time=alert_time)
                    if rule['current_aggregate_id'] == _id:
                        rule['current_aggregate_id'] = None
                else:
                    self.alert([match_body], rule, alert_time=alert_time)

                # Delete it from the index
                try:
                    self.writeback_es.delete(index=self.writeback_index,
                                             doc_type='elastalert',
                                             id=_id)
                except:  # TODO: Give this a more relevant exception, try:except: is evil.
                    self.handle_error("Failed to delete alert %s at %s" %
                                      (_id, alert_time))

        # Send in memory aggregated alerts
        for rule in self.rules:
            if rule['agg_matches']:
                if ts_now() > rule['aggregate_alert_time']:
                    self.alert(rule['agg_matches'], rule)
                    rule['agg_matches'] = []
Beispiel #6
0
    def send_pending_alerts(self):
        pending_alerts = self.find_recent_pending_alerts(self.alert_time_limit)
        for alert in pending_alerts:
            _id = alert['_id']
            alert = alert['_source']
            try:
                rule_name = alert.pop('rule_name')
                alert_time = alert.pop('alert_time')
                match_body = alert.pop('match_body')
            except KeyError:
                # Malformed alert, drop it
                continue

            agg_id = alert.get('aggregate_id', None)
            if agg_id:
                # Aggregated alerts will be taken care of by get_aggregated_matches
                continue

            # Find original rule
            for rule in self.rules:
                if rule['name'] == rule_name:
                    break
            else:
                # Original rule is missing, drop alert
                continue

            # Retry the alert unless it's a future alert
            if ts_now() > ts_to_dt(alert_time):
                aggregated_matches = self.get_aggregated_matches(_id)
                if aggregated_matches:
                    matches = [match_body] + [
                        agg_match['match_body']
                        for agg_match in aggregated_matches
                    ]
                    self.alert(matches, rule, alert_time=alert_time)
                    rule['current_aggregate_id'] = None
                else:
                    self.alert([match_body], rule, alert_time=alert_time)

                # Delete it from the index
                try:
                    self.writeback_es.delete(index=self.writeback_index,
                                             doc_type='elastalert',
                                             id=_id)
                except:
                    self.handle_error("Failed to delete alert %s at %s" %
                                      (_id, alert_time))

        # Send in memory aggregated alerts
        for rule in self.rules:
            if rule['agg_matches']:
                if ts_now() > rule['aggregate_alert_time']:
                    self.alert(rule['agg_matches'], rule)
                    rule['agg_matches'] = []
Beispiel #7
0
    def send_pending_alerts(self):
        pending_alerts = self.find_recent_pending_alerts(self.alert_time_limit)
        for alert in pending_alerts:
            _id = alert['_id']
            alert = alert['_source']
            try:
                rule_name = alert.pop('rule_name')
                alert_time = alert.pop('alert_time')
                match_body = alert.pop('match_body')
            except KeyError:
                # Malformed alert, drop it
                continue

            # Find original rule
            for rule in self.rules:
                if rule['name'] == rule_name:
                    break
            else:
                # Original rule is missing, keep alert for later if rule reappears
                continue

            # Set current_es for top_count_keys query
            rule_es_conn_config = self.build_es_conn_config(rule)
            self.current_es = self.new_elasticsearch(rule_es_conn_config)
            self.current_es_addr = (rule['es_host'], rule['es_port'])

            # Send the alert unless it's a future alert
            if ts_now() > ts_to_dt(alert_time):
                aggregated_matches = self.get_aggregated_matches(_id)
                if aggregated_matches:
                    matches = [match_body] + [agg_match['match_body'] for agg_match in aggregated_matches]
                    self.alert(matches, rule, alert_time=alert_time)
                    if rule['current_aggregate_id'] == _id:
                        rule['current_aggregate_id'] = None
                else:
                    self.alert([match_body], rule, alert_time=alert_time)

                # Delete it from the index
                try:
                    self.writeback_es.delete(index=self.writeback_index,
                                             doc_type='elastalert',
                                             id=_id)
                except:  # TODO: Give this a more relevant exception, try:except: is evil.
                    self.handle_error("Failed to delete alert %s at %s" % (_id, alert_time))

        # Send in memory aggregated alerts
        for rule in self.rules:
            if rule['agg_matches']:
                if ts_now() > rule['aggregate_alert_time']:
                    self.alert(rule['agg_matches'], rule)
                    rule['agg_matches'] = []
Beispiel #8
0
    def send_pending_alerts(self):
        pending_alerts = self.find_recent_pending_alerts(self.alert_time_limit)
        for alert in pending_alerts:
            _id = alert['_id']
            alert = alert['_source']
            try:
                rule_name = alert.pop('rule_name')
                alert_time = alert.pop('alert_time')
                match_body = alert.pop('match_body')
            except KeyError:
                # Malformed alert, drop it
                continue

            agg_id = alert.get('aggregate_id', None)
            if agg_id:
                # Aggregated alerts will be taken care of by get_aggregated_matches
                continue

            # Find original rule
            for rule in self.rules:
                if rule['name'] == rule_name:
                    break
            else:
                # Original rule is missing, drop alert
                continue

            # Retry the alert unless it's a future alert
            if ts_now() > ts_to_dt(alert_time):
                aggregated_matches = self.get_aggregated_matches(_id)
                if aggregated_matches:
                    matches = [match_body] + [agg_match['match_body'] for agg_match in aggregated_matches]
                    self.alert(matches, rule, alert_time=alert_time)
                    rule['current_aggregate_id'] = None
                else:
                    self.alert([match_body], rule, alert_time=alert_time)

                # Delete it from the index
                try:
                    self.writeback_es.delete(index=self.writeback_index,
                                             doc_type='elastalert',
                                             id=_id)
                except:
                    self.handle_error("Failed to delete alert %s at %s" % (_id, alert_time))

        # Send in memory aggregated alerts
        for rule in self.rules:
            if rule['agg_matches']:
                if ts_now() > rule['aggregate_alert_time']:
                    self.alert(rule['agg_matches'], rule)
                    rule['agg_matches'] = []
Beispiel #9
0
    def add_aggregated_alert(self, match, rule):
        """ Save a match as a pending aggregate alert to elasticsearch. """
        if (not rule['current_aggregate_id'] or
            ('aggregate_alert_time' in rule and rule['aggregate_alert_time'] <
             ts_to_dt(match[rule['timestamp_field']]))):

            # Elastalert may have restarted while pending alerts exist
            pending_alert = self.find_pending_aggregate_alert(rule)
            if pending_alert:
                alert_time = rule['aggregate_alert_time'] = ts_to_dt(
                    pending_alert['_source']['alert_time'])
                agg_id = rule['current_aggregate_id'] = pending_alert['_id']
                elastalert_logger.info(
                    'Adding alert for %s to aggregation(id: %s), next alert at %s'
                    % (rule['name'], agg_id, alert_time))
            else:
                # First match, set alert_time
                match_time = ts_to_dt(match[rule['timestamp_field']])
                alert_time = ''
                if isinstance(rule['aggregation'],
                              dict) and rule['aggregation'].get('schedule'):
                    croniter._datetime_to_timestamp = cronite_datetime_to_timestamp  # For Python 2.6 compatibility
                    try:
                        iter = croniter(rule['aggregation']['schedule'],
                                        ts_now())
                        alert_time = unix_to_dt(iter.get_next())
                    except Exception as e:
                        self.handle_error(
                            "Error parsing aggregate send time Cron format %s"
                            % (e), rule['aggregation']['schedule'])
                else:
                    alert_time = match_time + rule['aggregation']

                rule['aggregate_alert_time'] = alert_time
                agg_id = None
                elastalert_logger.info(
                    'New aggregation for %s. next alert at %s.' %
                    (rule['name'], alert_time))
        else:
            # Already pending aggregation, use existing alert_time
            alert_time = rule['aggregate_alert_time']
            agg_id = rule['current_aggregate_id']
            elastalert_logger.info(
                'Adding alert for %s to aggregation(id: %s), next alert at %s'
                % (rule['name'], agg_id, alert_time))

        alert_body = self.get_alert_body(match, rule, False, alert_time)
        if agg_id:
            alert_body['aggregate_id'] = agg_id
        res = self.writeback('elastalert', alert_body)

        # If new aggregation, save _id
        if res and not agg_id:
            rule['current_aggregate_id'] = res['_id']

        # Couldn't write the match to ES, save it in memory for now
        if not res:
            rule['agg_matches'].append(match)

        return res
Beispiel #10
0
    def get_all_terms(self, args):
        """ Performs a terms aggregation for each field to get every existing term. """
        self.es = Elasticsearch(host=self.rules["es_host"], port=self.rules["es_port"])
        window_size = datetime.timedelta(**self.rules.get("terms_window_size", {"days": 30}))
        field_name = {"field": "", "size": 2147483647}  # Integer.MAX_VALUE
        query_template = {"aggs": {"values": {"terms": field_name}}}
        if args and args.start:
            end = ts_to_dt(args.start)
        else:
            end = ts_now()
        start = end - window_size
        if self.rules.get("use_strftime_index"):
            index = format_index(self.rules["index"], start, end)
        else:
            index = self.rules["index"]
        time_filter = {self.rules["timestamp_field"]: {"lte": dt_to_ts(end), "gte": dt_to_ts(start)}}
        query_template["filter"] = {"bool": {"must": [{"range": time_filter}]}}
        query = {"aggs": {"filtered": query_template}}

        for field in self.fields:
            field_name["field"] = field
            res = self.es.search(body=query, index=index, ignore_unavailable=True, timeout=50)
            if "aggregations" in res:
                buckets = res["aggregations"]["filtered"]["values"]["buckets"]
                keys = [bucket["key"] for bucket in buckets]
                self.seen_values[field] = keys
                elastalert_logger.info("Found %s unique values for %s" % (len(keys), field))
            else:
                self.seen_values[field] = []
                elastalert_logger.info("Found no values for %s" % (field))
Beispiel #11
0
    def get_all_terms(self, args):
        """ Performs a terms aggregation for each field to get every existing term. """
        self.es = Elasticsearch(host=self.rules['es_host'], port=self.rules['es_port'])
        window_size = datetime.timedelta(**self.rules.get('terms_window_size', {'days': 30}))
        field_name = {"field": "", "size": 2147483647}  # Integer.MAX_VALUE
        query_template = {"aggs": {"values": {"terms": field_name}}}
        if args and args.start:
            end = ts_to_dt(args.start)
        else:
            end = ts_now()
        start = end - window_size
        if self.rules.get('use_strftime_index'):
            index = format_index(self.rules['index'], start, end)
        else:
            index = self.rules['index']
        time_filter = {self.rules['timestamp_field']: {'lte': dt_to_ts(end), 'gte': dt_to_ts(start)}}
        query_template['filter'] = {'bool': {'must': [{'range': time_filter}]}}
        query = {'aggs': {'filtered': query_template}}

        for field in self.fields:
            field_name['field'] = field
            res = self.es.search(body=query, index=index, ignore_unavailable=True, timeout=50)
            if 'aggregations' in res:
                buckets = res['aggregations']['filtered']['values']['buckets']
                keys = [bucket['key'] for bucket in buckets]
                self.seen_values[field] = keys
                elastalert_logger.info('Found %s unique values for %s' % (len(keys), field))
            else:
                self.seen_values[field] = []
                elastalert_logger.info('Found no values for %s' % (field))
Beispiel #12
0
    def get_starttime(self, rule):
        """ Query ES for the last time we ran this rule.

        :param rule: The rule configuration.
        :return: A timestamp or None.
        """
        query = {'filter': {'term': {'rule_name': '%s' % (rule['name'])}},
                 'sort': {'@timestamp': {'order': 'desc'}}}
        try:
            if self.writeback_es:
                res = self.writeback_es.search(index=self.writeback_index, doc_type='elastalert_status',
                                               size=1, body=query, _source_include=['endtime', 'rule_name'])
                if res['hits']['hits']:
                    endtime = ts_to_dt(res['hits']['hits'][0]['_source']['endtime'])

                    if ts_now() - endtime < self.old_query_limit:
                        return endtime
                    else:
                        logging.info("Found expired previous run for %s at %s" % (rule['name'], endtime))
                        return None
        except (ElasticsearchException, KeyError) as e:
            self.handle_error('Error querying for last run: %s' % (e), {'rule': rule['name']})
            self.writeback_es = None

        return None
Beispiel #13
0
 def find_recent_pending_alerts(self, time_limit):
     """ Queries writeback_es to find alerts that did not send
     and are newer than time_limit """
     query = {'query': {'query_string': {'query': 'alert_sent:false'}},
              'filter': {'range': {'alert_time': {'from': dt_to_ts(ts_now() - time_limit),
                                                  'to': dt_to_ts(ts_now())}}}}
     if self.writeback_es:
         try:
             res = self.writeback_es.search(index=self.writeback_index,
                                            doc_type='elastalert',
                                            body=query,
                                            size=1000)
             if res['hits']['hits']:
                 return res['hits']['hits']
         except:
             pass
     return []
Beispiel #14
0
 def set_realert(self, rule_name, timestamp, exponent):
     """ Write a silence to elasticsearch for rule_name until timestamp. """
     body = {'exponent': exponent,
             'rule_name': rule_name,
             '@timestamp': ts_now(),
             'until': timestamp}
     self.silence_cache[rule_name] = (timestamp, exponent)
     return self.writeback('silence', body)
Beispiel #15
0
    def get_all_terms(self, args):
        """ Performs a terms aggregation for each field to get every existing term. """
        self.es = Elasticsearch(host=self.rules['es_host'], port=self.rules['es_port'], timeout=self.rules.get('es_conn_timeout', 50))
        window_size = datetime.timedelta(**self.rules.get('terms_window_size', {'days': 30}))
        field_name = {"field": "", "size": 2147483647}  # Integer.MAX_VALUE
        query_template = {"aggs": {"values": {"terms": field_name}}}
        if args and args.start:
            end = ts_to_dt(args.start)
        else:
            end = ts_now()
        start = end - window_size
        if self.rules.get('use_strftime_index'):
            index = format_index(self.rules['index'], start, end)
        else:
            index = self.rules['index']
        time_filter = {self.rules['timestamp_field']: {'lte': dt_to_ts(end), 'gte': dt_to_ts(start)}}
        query_template['filter'] = {'bool': {'must': [{'range': time_filter}]}}
        query = {'aggs': {'filtered': query_template}}

        for field in self.fields:
            # For composite keys, we will need to perform sub-aggregations
            if type(field) == list:
                level = query_template['aggs']
                # Iterate on each part of the composite key and add a sub aggs clause to the elastic search query
                for i, sub_field in enumerate(field):
                    level['values']['terms']['field'] = sub_field
                    if i < len(field) - 1:
                        # If we have more fields after the current one, then set up the next nested structure
                        level['values']['aggs'] = {'values': {'terms': copy.deepcopy(field_name)}}
                        level = level['values']['aggs']
            else:
                # For non-composite keys, only a single agg is needed
                field_name['field'] = field
            res = self.es.search(body=query, index=index, ignore_unavailable=True, timeout='50s')
            if 'aggregations' in res:
                buckets = res['aggregations']['filtered']['values']['buckets']
                if type(field) == list:
                    # For composite keys, make the lookup based on all fields
                    # Make it a tuple since it can be hashed and used in dictionary lookups
                    self.seen_values[tuple(field)] = []
                    for bucket in buckets:
                        # We need to walk down the hierarchy and obtain the value at each level
                        self.seen_values[tuple(field)] += self.flatten_aggregation_hierarchy(bucket)
                    # If we don't have any results, it could either be because of the absence of any baseline data
                    # OR it may be because the composite key contained a non-primitive type.  Either way, give the
                    # end-users a heads up to help them debug what might be going on.
                    if not self.seen_values[tuple(field)]:
                        elastalert_logger.warning((
                            'No results were found from all sub-aggregations.  This can either indicate that there is '
                            'no baseline data OR that a non-primitive field was used in a composite key.'
                        ))
                else:
                    keys = [bucket['key'] for bucket in buckets]
                    self.seen_values[field] = keys
                    elastalert_logger.info('Found %s unique values for %s' % (len(keys), field))
            else:
                self.seen_values[field] = []
                elastalert_logger.info('Found no values for %s' % (field))
Beispiel #16
0
    def get_all_terms(self, args):
        """ Performs a terms aggregation for each field to get every existing term. """
        self.es = Elasticsearch(host=self.rules['es_host'], port=self.rules['es_port'], timeout=self.rules.get('es_conn_timeout', 50))
        window_size = datetime.timedelta(**self.rules.get('terms_window_size', {'days': 30}))
        field_name = {"field": "", "size": 2147483647}  # Integer.MAX_VALUE
        query_template = {"aggs": {"values": {"terms": field_name}}}
        if args and args.start:
            end = ts_to_dt(args.start)
        else:
            end = ts_now()
        start = end - window_size
        if self.rules.get('use_strftime_index'):
            index = format_index(self.rules['index'], start, end)
        else:
            index = self.rules['index']
        time_filter = {self.rules['timestamp_field']: {'lte': dt_to_ts(end), 'gte': dt_to_ts(start)}}
        query_template['filter'] = {'bool': {'must': [{'range': time_filter}]}}
        query = {'aggs': {'filtered': query_template}}

        for field in self.fields:
            # For composite keys, we will need to perform sub-aggregations
            if type(field) == list:
                level = query_template['aggs']
                # Iterate on each part of the composite key and add a sub aggs clause to the elastic search query
                for i, sub_field in enumerate(field):
                    level['values']['terms']['field'] = sub_field
                    if i < len(field) - 1:
                        # If we have more fields after the current one, then set up the next nested structure
                        level['values']['aggs'] = {'values': {'terms': copy.deepcopy(field_name)}}
                        level = level['values']['aggs']
            else:
                # For non-composite keys, only a single agg is needed
                field_name['field'] = field
            res = self.es.search(body=query, index=index, ignore_unavailable=True, timeout='50s')
            if 'aggregations' in res:
                buckets = res['aggregations']['filtered']['values']['buckets']
                if type(field) == list:
                    # For composite keys, make the lookup based on all fields
                    # Make it a tuple since it can be hashed and used in dictionary lookups
                    self.seen_values[tuple(field)] = []
                    for bucket in buckets:
                        # We need to walk down the hierarchy and obtain the value at each level
                        self.seen_values[tuple(field)] += self.flatten_aggregation_hierarchy(bucket)
                    # If we don't have any results, it could either be because of the absence of any baseline data
                    # OR it may be because the composite key contained a non-primitive type.  Either way, give the
                    # end-users a heads up to help them debug what might be going on.
                    if not self.seen_values[tuple(field)]:
                        elastalert_logger.warning((
                            'No results were found from all sub-aggregations.  This can either indicate that there is '
                            'no baseline data OR that a non-primitive field was used in a composite key.'
                        ))
                else:
                    keys = [bucket['key'] for bucket in buckets]
                    self.seen_values[field] = keys
                    elastalert_logger.info('Found %s unique values for %s' % (len(keys), field))
            else:
                self.seen_values[field] = []
                elastalert_logger.info('Found no values for %s' % (field))
Beispiel #17
0
 def remove_old_events(self, rule):
     # Anything older than the buffer time we can forget
     now = ts_now()
     remove = []
     buffer_time = rule.get('buffer_time', self.buffer_time)
     for _id, timestamp in rule['processed_hits'].iteritems():
         if now - timestamp > buffer_time:
             remove.append(_id)
     map(rule['processed_hits'].pop, remove)
Beispiel #18
0
 def set_realert(self, rule_name, timestamp):
     """ Write a silence to elasticsearch for rule_name until timestamp. """
     body = {
         'rule_name': rule_name,
         '@timestamp': ts_now(),
         'until': timestamp
     }
     self.silence_cache[rule_name] = timestamp
     return self.writeback('silence', body)
Beispiel #19
0
 def remove_old_events(self, rule):
     # Anything older than the buffer time we can forget
     now = ts_now()
     remove = []
     buffer_time = rule.get('buffer_time', self.buffer_time)
     for _id, timestamp in rule['processed_hits'].iteritems():
         if now - timestamp > buffer_time:
             remove.append(_id)
     map(rule['processed_hits'].pop, remove)
Beispiel #20
0
    def run_all_rules(self):
        """ Run each rule one time """
        # If writeback_es errored, it's disabled until the next query cycle
        if not self.writeback_es:
            self.writeback_es = self.new_elasticsearch(self.es_conn_config)

        self.send_pending_alerts()

        next_run = datetime.datetime.utcnow() + self.run_every

        for rule in self.rules:
            # Set endtime based on the rule's delay
            delay = rule.get('query_delay')
            if hasattr(self.args, 'end') and self.args.end:
                endtime = ts_to_dt(self.args.end)
            elif delay:
                endtime = ts_now() - delay
            else:
                endtime = ts_now()

            try:
                num_matches = self.run_rule(rule, endtime, self.starttime)
            except EAException as e:
                self.handle_error("Error running rule %s: %s" % (rule['name'], e), {'rule': rule['name']})
            else:
                old_starttime = pretty_ts(rule.get('original_starttime'), rule.get('use_local_time'))
                logging.info("Ran %s from %s to %s: %s query hits, %s matches,"
                             " %s alerts sent" % (rule['name'], old_starttime, pretty_ts(endtime, rule.get('use_local_time')),
                                                  self.num_hits, num_matches, self.alerts_sent))
                self.alerts_sent = 0

            self.remove_old_events(rule)

        if next_run < datetime.datetime.utcnow():
            # We were processing for longer than our refresh interval
            # This can happen if --start was specified with a large time period
            # or if we are running too slow to process events in real time.
            logging.warning("Querying from %s to %s took longer than %s!" % (old_starttime, endtime, self.run_every))

        # Only force starttime once
        self.starttime = None

        if not self.args.pin_rules:
            self.load_rule_changes()
Beispiel #21
0
    def is_silenced(self, rule_name):
        """ Checks if rule_name is currently silenced. Returns false on exception. """
        if rule_name in self.silence_cache:
            if ts_now() < ts_to_dt(self.silence_cache[rule_name]):
                return True
            else:
                self.silence_cache.pop(rule_name)
                return False

        query = {
            'filter': {
                'term': {
                    'rule_name': rule_name
                }
            },
            'sort': {
                'until': {
                    'order': 'desc'
                }
            }
        }

        if self.writeback_es:
            try:
                res = self.writeback_es.search(index=self.writeback_index,
                                               doc_type='silence',
                                               size=1,
                                               body=query,
                                               _source_include=['until'])
            except ElasticsearchException as e:
                self.handle_error(
                    "Error while querying for alert silence status: %s" % (e),
                    {'rule': rule_name})

                return False

            if res['hits']['hits']:
                until_ts = res['hits']['hits'][0]['_source']['until']
                if ts_now() < ts_to_dt(until_ts):
                    self.silence_cache[rule_name] = until_ts
                    return True
        return False
Beispiel #22
0
    def run_rule(self, rule):
        """ Run a rule including querying and alerting on results.

        :param rule: The rule configuration.
        :return: The number of matches that the rule produced.
        """

        elastalert_logger.info('Start to run rule: %s', rule.get('name'))
        # Run the rule. If querying over a large time period, split it up into segments
        self.num_hits = 0
        rule_request = rule.get("input").get("search").get("request")
        if rule_request.get("elastic_host",
                            None) is not None and rule_request.get(
                                "elastic_port", None) is not None:
            self.current_es = Elasticsearch(
                host=rule.get("input").get("search").get("request").get(
                    "elastic_host"),
                port=rule.get("input").get("search").get("request").get(
                    "elastic_port"))
        else:
            self.current_es = self.new_elasticsearch(self.global_config)

        self.run_query(rule)

        # Process any new matches
        num_matches = len(rule['type'].matches)

        while rule['type'].matches:
            match = rule['type'].matches.pop(0)

            #if self.is_silenced(rule['name'] + key) or self.is_silenced(rule['name']):
            #    elastalert_logger.info('Ignoring match for silenced rule %s%s' % (rule['name'], key))
            #    continue

            if rule.get('realert'):
                next_alert, exponent = self.next_alert_time(
                    rule, rule['name'] + key, ts_now())
                self.set_realert(rule['name'] + key, next_alert, exponent)

            # If no aggregation, alert immediately
            #if not rule['aggregation']:
            #    self.alert([match], rule)
            #    continue
            self.alert([match], rule)

            # Add it as an aggregated match
            #self.add_aggregated_alert(match, rule)

        # Mark this endtime for next run's start
        #rule['previous_endtime'] = endtime

        #time_taken = time.time() - run_start

        return num_matches
Beispiel #23
0
    def find_recent_pending_alerts(self, time_limit):
        """ Queries writeback_es to find alerts that did not send
        and are newer than time_limit """

        # XXX only fetches 1000 results. If limit is reached, next loop will catch them
        # unless there is constantly more than 1000 alerts to send.

        # Fetch recent, unsent alerts that aren't part of an aggregate, earlier alerts first.
        query = {
            'query': {
                'query_string': {
                    'query': '!_exists_:aggregate_id AND alert_sent:false'
                }
            },
            'filter': {
                'range': {
                    'alert_time': {
                        'from': dt_to_ts(ts_now() - time_limit),
                        'to': dt_to_ts(ts_now())
                    }
                }
            },
            'sort': {
                'alert_time': {
                    'order': 'asc'
                }
            }
        }
        if self.writeback_es:
            try:
                res = self.writeback_es.search(index=self.writeback_index,
                                               doc_type='elastalert',
                                               body=query,
                                               size=1000)
                if res['hits']['hits']:
                    return res['hits']['hits']
            except:  # TODO: Give this a more relevant exception, try:except: is evil.
                pass
        return []
Beispiel #24
0
    def find_recent_pending_alerts(self, time_limit):
        """ Queries writeback_es to find alerts that did not send
        and are newer than time_limit """

        # XXX only fetches 1000 results. If limit is reached, next loop will catch them
        # unless there is constantly more than 1000 alerts to send.

        # Fetch recent, unsent alerts that aren't part of an aggregate, earlier alerts first.
        query = {'query': {'query_string': {'query': '!_exists_:aggregate_id AND alert_sent:false'}},
                 'filter': {'range': {'alert_time': {'from': dt_to_ts(ts_now() - time_limit),
                                                     'to': dt_to_ts(ts_now())}}},
                 'sort': {'alert_time': {'order': 'asc'}}}
        if self.writeback_es:
            try:
                res = self.writeback_es.search(index=self.writeback_index,
                                               doc_type='elastalert',
                                               body=query,
                                               size=1000)
                if res['hits']['hits']:
                    return res['hits']['hits']
            except:  # TODO: Give this a more relevant exception, try:except: is evil.
                pass
        return []
Beispiel #25
0
    def find_pending_aggregate_alert(self, rule):
        query = {
            'filter': {
                'bool': {
                    'must': [{
                        'term': {
                            'rule_name': rule['name']
                        }
                    }, {
                        'range': {
                            'alert_time': {
                                'gt': ts_now()
                            }
                        }
                    }, {
                        'not': {
                            'exists': {
                                'field': 'aggregate_id'
                            }
                        }
                    }, {
                        'term': {
                            'alert_sent': 'false'
                        }
                    }]
                }
            },
            'sort': {
                'alert_time': {
                    'order': 'desc'
                }
            }
        }
        if not self.writeback_es:
            self.writeback_es = self.new_elasticsearch(self.es_conn_config)
        try:
            res = self.writeback_es.search(index=self.writeback_index,
                                           doc_type='elastalert',
                                           body=query,
                                           size=1)
            if len(res['hits']['hits']) == 0:
                return None
        except (KeyError, ElasticsearchException) as e:
            self.handle_error(
                "Error searching for pending aggregated matches: %s" % (e),
                {'rule_name': rule['name']})
            return None

        return res['hits']['hits'][0]
Beispiel #26
0
    def add_aggregated_alert(self, match, rule):
        """ Save a match as a pending aggregate alert to elasticsearch. """
        if (not rule['current_aggregate_id'] or
                ('aggregate_alert_time' in rule and rule['aggregate_alert_time'] < ts_to_dt(match[rule['timestamp_field']]))):

            # Elastalert may have restarted while pending alerts exist
            pending_alert = self.find_pending_aggregate_alert(rule)
            if pending_alert:
                alert_time = rule['aggregate_alert_time'] = ts_to_dt(pending_alert['_source']['alert_time'])
                agg_id = rule['current_aggregate_id'] = pending_alert['_id']
                elastalert_logger.info('Adding alert for %s to aggregation(id: %s), next alert at %s' % (rule['name'], agg_id, alert_time))
            else:
                # First match, set alert_time
                match_time = ts_to_dt(match[rule['timestamp_field']])
                alert_time = ''
                if isinstance(rule['aggregation'], dict) and rule['aggregation'].get('schedule'):
                    croniter._datetime_to_timestamp = cronite_datetime_to_timestamp  # For Python 2.6 compatibility
                    try:
                        iter = croniter(rule['aggregation']['schedule'], ts_now())
                        alert_time = unix_to_dt(iter.get_next())
                    except Exception as e:
                        self.handle_error("Error parsing aggregate send time Cron format %s" % (e), rule['aggregation']['schedule'])
                else:
                    alert_time = match_time + rule['aggregation']

                rule['aggregate_alert_time'] = alert_time
                agg_id = None
                elastalert_logger.info('New aggregation for %s. next alert at %s.' % (rule['name'], alert_time))
        else:
            # Already pending aggregation, use existing alert_time
            alert_time = rule['aggregate_alert_time']
            agg_id = rule['current_aggregate_id']
            elastalert_logger.info('Adding alert for %s to aggregation(id: %s), next alert at %s' % (rule['name'], agg_id, alert_time))

        alert_body = self.get_alert_body(match, rule, False, alert_time)
        if agg_id:
            alert_body['aggregate_id'] = agg_id
        res = self.writeback('elastalert', alert_body)

        # If new aggregation, save _id
        if res and not agg_id:
            rule['current_aggregate_id'] = res['_id']

        # Couldn't write the match to ES, save it in memory for now
        if not res:
            rule['agg_matches'].append(match)

        return res
Beispiel #27
0
    def run_query(self, rule, start=None, end=None):
        """ Query for the rule and pass all of the results to the RuleType instance.

        :param rule: The rule configuration.
        :param start: The earliest time to query.
        :param end: The latest time to query.
        Returns True on success and False on failure.
        """
        if start is None:
            start = self.get_index_start(rule['index'])
        if end is None:
            end = ts_now()

        # Reset hit counter and query
        rule_inst = rule['type']
        prev_num_hits = self.num_hits
        max_size = rule.get('max_query_size', self.max_query_size)
        index = self.get_index(rule, start, end)
        if rule.get('use_count_query'):
            data = self.get_hits_count(rule, start, end, index)
        elif rule.get('use_terms_query'):
            data = self.get_hits_terms(rule, start, end, index,
                                       rule['query_key'])
        else:
            data = self.get_hits(rule, start, end, index)
            if data:
                data = self.remove_duplicate_events(data, rule)

        # There was an exception while querying
        if data is None:
            return False
        elif data:
            if rule.get('use_count_query'):
                rule_inst.add_count_data(data)
            elif rule.get('use_terms_query'):
                rule_inst.add_terms_data(data)
            else:
                rule_inst.add_data(data)

        # Warn if we hit max_query_size
        if self.num_hits - prev_num_hits == max_size and not rule.get(
                'use_count_query'):
            logging.warning("Hit max_query_size (%s) while querying for %s" %
                            (max_size, rule['name']))

        return True
Beispiel #28
0
    def run_query(self, rule, start=None, end=None):
        """ Query for the rule and pass all of the results to the RuleType instance.

        :param rule: The rule configuration.
        :param start: The earliest time to query.
        :param end: The latest time to query.
        Returns True on success and False on failure.
        """
        if start is None:
            start = self.get_index_start(rule['index'])
        if end is None:
            end = ts_now()

        # Reset hit counter and query
        rule_inst = rule['type']
        prev_num_hits = self.num_hits
        max_size = rule.get('max_query_size', self.max_query_size)
        index = self.get_index(rule, start, end)
        if rule.get('use_count_query'):
            data = self.get_hits_count(rule, start, end, index)
        elif rule.get('use_terms_query'):
            data = self.get_hits_terms(rule, start, end, index, rule['query_key'])
        else:
            data = self.get_hits(rule, start, end, index)
            if data:
                data = self.remove_duplicate_events(data, rule)

        # There was an exception while querying
        if data is None:
            return False
        elif data:
            if rule.get('use_count_query'):
                rule_inst.add_count_data(data)
            elif rule.get('use_terms_query'):
                rule_inst.add_terms_data(data)
            else:
                rule_inst.add_data(data)

        # Warn if we hit max_query_size
        if self.num_hits - prev_num_hits == max_size and not rule.get('use_count_query'):
            logging.warning("Hit max_query_size (%s) while querying for %s" % (max_size, rule['name']))

        return True
Beispiel #29
0
    def get_all_terms(self, args):
        """ Performs a terms aggregation for each field to get every existing term. """
        self.es = Elasticsearch(host=self.rules['es_host'],
                                port=self.rules['es_port'],
                                use_ssl=self.rule['use_ssl'],
                                timeout=self.rules.get('es_conn_timeout', 50))
        window_size = datetime.timedelta(
            **self.rules.get('terms_window_size', {'days': 30}))
        field_name = {"field": "", "size": 2147483647}  # Integer.MAX_VALUE
        query_template = {"aggs": {"values": {"terms": field_name}}}
        if args and args.start:
            end = ts_to_dt(args.start)
        else:
            end = ts_now()
        start = end - window_size
        if self.rules.get('use_strftime_index'):
            index = format_index(self.rules['index'], start, end)
        else:
            index = self.rules['index']
        time_filter = {
            self.rules['timestamp_field']: {
                'lte': dt_to_ts(end),
                'gte': dt_to_ts(start)
            }
        }
        query_template['filter'] = {'bool': {'must': [{'range': time_filter}]}}
        query = {'aggs': {'filtered': query_template}}

        for field in self.fields:
            field_name['field'] = field
            res = self.es.search(body=query,
                                 index=index,
                                 ignore_unavailable=True,
                                 timeout='50s')
            if 'aggregations' in res:
                buckets = res['aggregations']['filtered']['values']['buckets']
                keys = [bucket['key'] for bucket in buckets]
                self.seen_values[field] = keys
                elastalert_logger.info('Found %s unique values for %s' %
                                       (len(keys), field))
            else:
                self.seen_values[field] = []
                elastalert_logger.info('Found no values for %s' % (field))
Beispiel #30
0
    def writeback(self, doc_type, body):
        # Convert any datetime objects to timestamps
        for key in body.keys():
            if isinstance(body[key], datetime.datetime):
                body[key] = dt_to_ts(body[key])
        if self.debug:
            elastalert_logger.info("Skipping writing to ES: %s" % (body))
            return None

        if '@timestamp' not in body:
            body['@timestamp'] = dt_to_ts(ts_now())
        if self.writeback_es:
            try:
                res = self.writeback_es.create(index=self.writeback_index,
                                               doc_type=doc_type, body=body)
                return res
            except ElasticsearchException as e:
                logging.exception("Error writing alert info to elasticsearch: %s" % (e))
                self.writeback_es = None
Beispiel #31
0
    def get_starttime(self, rule):
        """ Query ES for the last time we ran this rule.

        :param rule: The rule configuration.
        :return: A timestamp or None.
        """
        query = {
            'filter': {
                'term': {
                    'rule_name': '%s' % (rule['name'])
                }
            },
            'sort': {
                '@timestamp': {
                    'order': 'desc'
                }
            }
        }
        try:
            if self.writeback_es:
                res = self.writeback_es.search(
                    index=self.writeback_index,
                    doc_type='elastalert_status',
                    size=1,
                    body=query,
                    _source_include=['endtime', 'rule_name'])
                if res['hits']['hits']:
                    endtime = ts_to_dt(
                        res['hits']['hits'][0]['_source']['endtime'])

                    if ts_now() - endtime < self.old_query_limit:
                        return endtime
                    else:
                        logging.info(
                            "Found expired previous run for %s at %s" %
                            (rule['name'], endtime))
                        return None
        except (ElasticsearchException, KeyError) as e:
            self.handle_error('Error querying for last run: %s' % (e),
                              {'rule': rule['name']})
            self.writeback_es = None

        return None
Beispiel #32
0
    def get_all_terms(self):
        """ Performs a terms aggregation for each field to get every existing term. """
        self.es = Elasticsearch(host=self.rules['es_host'], port=self.rules['es_port'])
        window_size = datetime.timedelta(**self.rules.get('terms_window_size', {'days': 30}))

        field_name = {"field": "", "size": 2147483647}  # Integer.MAX_VALUE
        query_template = {"aggs": {"values": {"terms": field_name}}}
        if self.rules.get('use_strftime_index'):
            end = ts_now()
            start = end - window_size
            index = format_index(self.rules['index'], start, end)
        else:
            index = self.rules['index']

        for field in self.fields:
            field_name['field'] = field
            res = self.es.search(body=query_template, index=index, ignore_unavailable=True, timeout=50)
            buckets = res['aggregations']['values']['buckets']
            keys = [bucket['key'] for bucket in buckets]
            self.seen_values[field] = keys
Beispiel #33
0
    def find_pending_aggregate_alert(self, rule):
        query = {'filter': {'bool': {'must': [{'term': {'rule_name': rule['name']}},
                                              {'range': {'alert_time': {'gt': ts_now()}}},
                                              {'not': {'exists': {'field': 'aggregate_id'}}},
                                              {'term': {'alert_sent': 'false'}}]}},
                 'sort': {'alert_time': {'order': 'desc'}}}
        if not self.writeback_es:
            self.writeback_es = self.new_elasticsearch(self.es_conn_config)
        try:
            res = self.writeback_es.search(index=self.writeback_index,
                                           doc_type='elastalert',
                                           body=query,
                                           size=1)
            if len(res['hits']['hits']) == 0:
                return None
        except (KeyError, ElasticsearchException) as e:
            self.handle_error("Error searching for pending aggregated matches: %s" % (e), {'rule_name': rule['name']})
            return None

        return res['hits']['hits'][0]
Beispiel #34
0
    def writeback(self, doc_type, body):
        # Convert any datetime objects to timestamps
        for key in body.keys():
            if isinstance(body[key], datetime.datetime):
                body[key] = dt_to_ts(body[key])
        if self.debug:
            elastalert_logger.info("Skipping writing to ES: %s" % (body))
            return None

        if '@timestamp' not in body:
            body['@timestamp'] = dt_to_ts(ts_now())
        if self.writeback_es:
            try:
                res = self.writeback_es.create(index=self.writeback_index,
                                               doc_type=doc_type,
                                               body=body)
                return res
            except ElasticsearchException as e:
                logging.exception(
                    "Error writing alert info to elasticsearch: %s" % (e))
                self.writeback_es = None
Beispiel #35
0
    def add_data(self, data):
        if 'query_key' in self.rules:
            qk = self.rules['query_key']
        else:
            qk = None

        count = 1
        if not data:
            # insert dummy event
            data = [{self.ts_field: ts_now()}]
            count = 0

        for event in data:
            if qk:
                key = hashable(lookup_es_key(event, qk))
            else:
                # If no query_key, we use the key 'all' for all events
                key = 'all'

            # Store the timestamps of recent occurrences, per key
            self.occurrences.setdefault(key, EventWindow(self.rules['timeframe'], getTimestamp=self.get_ts)).append((event, count))
            self.check_for_match(key)
Beispiel #36
0
    def send_alert(self, matches, rule, alert_time=None):
        """ Send out an alert.

        :param matches: A list of matches.
        :param rule: A rule configuration.
        """
        if alert_time is None:
            alert_time = ts_now()

        # Compute top count keys
        if rule.get('top_count_keys'):
            for match in matches:
                if 'query_key' in rule and rule['query_key'] in match:
                    qk = match[rule['query_key']]
                else:
                    qk = None
                start = ts_to_dt(match[rule['timestamp_field']]) - rule.get(
                    'timeframe', datetime.timedelta(minutes=10))
                end = ts_to_dt(
                    match[rule['timestamp_field']]) + datetime.timedelta(
                        minutes=10)
                keys = rule.get('top_count_keys')
                counts = self.get_top_counts(rule, start, end, keys, qk=qk)
                match.update(counts)

        # Generate a kibana3 dashboard for the first match
        if rule.get('generate_kibana_link') or rule.get(
                'use_kibana_dashboard'):
            try:
                if rule.get('generate_kibana_link'):
                    kb_link = self.generate_kibana_db(rule, matches[0])
                else:
                    kb_link = self.use_kibana_link(rule, matches[0])
            except EAException as e:
                self.handle_error(
                    "Could not generate kibana dash for %s match: %s" %
                    (rule['name'], e))
            else:
                if kb_link:
                    matches[0]['kibana_link'] = kb_link

        if rule.get('use_kibana4_dashboard'):
            kb_link = self.generate_kibana4_db(rule, matches[0])
            if kb_link:
                matches[0]['kibana_link'] = kb_link

        #if not rule.get('run_enhancements_first'):
        #    for enhancement in rule.get('match_enhancements'):
        #        valid_matches = []
        #        for match in matches:
        #            try:
        #                enhancement.process(match)
        #                valid_matches.append(match)
        #            except DropMatchException as e:
        #                pass
        #            except EAException as e:
        #                self.handle_error("Error running match enhancement: %s" % (e), {'rule': rule['name']})
        #        matches = valid_matches
        #        if not matches:
        #            return None

        # Don't send real alerts in debug mode
        if self.debug:
            alerter = DebugAlerter(rule)
            alerter.alert(matches)
            return None

        # Run the alerts
        alert_sent = False
        alert_exception = None
        # Alert.pipeline is a single object shared between every alerter
        # This allows alerters to pass objects and data between themselves
        alert_pipeline = {"alert_time": alert_time}
        for alert in rule.get("actions"):
            alert.pipeline = alert_pipeline
            try:
                alert.alert(matches)
            except EAException as e:
                self.handle_error(
                    'Error while running alert %s: %s' %
                    (alert.get_info()['type'], e), {'rule': rule['name']})
                alert_exception = str(e)
            else:
                self.alerts_sent += 1
                alert_sent = True
Beispiel #37
0
    def run_rule(self, rule, endtime, starttime=None):
        """ Run a rule for a given time period, including querying and alerting on results.

        :param rule: The rule configuration.
        :param starttime: The earliest timestamp to query.
        :param endtime: The latest timestamp to query.
        :return: The number of matches that the rule produced.
        """
        run_start = time.time()
        self.current_es = Elasticsearch(host=rule['es_host'],
                                        port=rule['es_port'])
        self.current_es_addr = (rule['es_host'], rule['es_port'])

        # If there are pending aggregate matches, try processing them
        for x in range(len(rule['agg_matches'])):
            match = rule['agg_matches'].pop()
            self.add_aggregated_alert(match, rule)

        # Start from provided time if it's given
        if starttime:
            rule['starttime'] = starttime
        else:
            self.set_starttime(rule, endtime)
        rule['original_starttime'] = rule['starttime']

        # Don't run if starttime was set to the future
        if ts_now() <= rule['starttime']:
            logging.warning(
                "Attempted to use query start time in the future (%s), sleeping instead"
                % (starttime))
            return 0

        # Run the rule
        # If querying over a large time period, split it up into chunks
        self.num_hits = 0
        tmp_endtime = endtime
        buffer_time = rule.get('buffer_time', self.buffer_time)
        while endtime - rule['starttime'] > buffer_time:
            tmp_endtime = rule['starttime'] + self.run_every
            if not self.run_query(rule, rule['starttime'], tmp_endtime):
                return 0
            rule['starttime'] = tmp_endtime
        if not self.run_query(rule, rule['starttime'], endtime):
            return 0

        rule['type'].garbage_collect(endtime)

        # Process any new matches
        num_matches = len(rule['type'].matches)
        while rule['type'].matches:
            match = rule['type'].matches.pop(0)

            # If realert is set, silence the rule for that duration
            # Silence is cached by query_key, if it exists
            # Default realert time is 0 seconds

            # concatenate query_key (or none) with rule_name to form silence_cache key
            if 'query_key' in rule:
                try:
                    key = '.' + match[rule['query_key']]
                except KeyError:
                    # Some matches may not have a query key
                    key = ''
            else:
                key = ''

            if self.is_silenced(rule['name'] + key) or self.is_silenced(
                    rule['name']):
                logging.info('Ignoring match for silenced rule %s%s' %
                             (rule['name'], key))
                continue

            if rule['realert']:
                self.set_realert(
                    rule['name'] + key,
                    dt_to_ts(datetime.datetime.utcnow() + rule['realert']))

            # If no aggregation, alert immediately
            if not rule['aggregation']:
                self.alert([match], rule)
                continue

            # Add it as an aggregated match
            self.add_aggregated_alert(match, rule)

        time_taken = time.time() - run_start
        # Write to ES that we've run this rule against this time period
        body = {
            'rule_name': rule['name'],
            'endtime': endtime,
            'starttime': rule['starttime'],
            'matches': num_matches,
            'hits': self.num_hits,
            '@timestamp': ts_now(),
            'time_taken': time_taken
        }
        self.writeback('elastalert_status', body)

        return num_matches
Beispiel #38
0
    def alert(self, matches, rule, alert_time=None):
        """ Send out an alert.

        :param matches: A list of matches.
        :param rule: A rule configuration.
        """
        if alert_time is None:
            alert_time = ts_now()

        # Compute top count keys
        if rule.get('top_count_keys'):
            for match in matches:
                if 'query_key' in rule and rule['query_key'] in match:
                    qk = match[rule['query_key']]
                else:
                    qk = None
                start = ts_to_dt(match[rule['timestamp_field']]) - rule.get('timeframe', datetime.timedelta(minutes=10))
                end = ts_to_dt(match[rule['timestamp_field']]) + datetime.timedelta(minutes=10)
                keys = rule.get('top_count_keys')
                counts = self.get_top_counts(rule, start, end, keys, rule.get('top_count_number'), qk)
                match.update(counts)

        # Generate a kibana dashboard for the first match
        if rule.get('generate_kibana_link') or rule.get('use_kibana_dashboard'):
            try:
                if rule.get('generate_kibana_link'):
                    kb_link = self.generate_kibana_db(rule, matches[0])
                else:
                    kb_link = self.use_kibana_link(rule, matches[0])
            except EAException as e:
                self.handle_error("Could not generate kibana dash for %s match: %s" % (rule['name'], e))
            else:
                if kb_link:
                    matches[0]['kibana_link'] = kb_link

        for enhancement in rule['match_enhancements']:
            for match in matches:
                try:
                    enhancement.process(match)
                except EAException as e:
                    self.handle_error("Error running match enhancement: %s" % (e), {'rule': rule['name']})

        # Don't send real alerts in debug mode
        if self.debug:
            alerter = DebugAlerter(rule)
            alerter.alert(matches)
            return

        # Run the alerts
        alert_sent = False
        alert_exception = None
        for alert in rule['alert']:
            try:
                alert.alert(matches)
            except EAException as e:
                self.handle_error('Error while running alert %s: %s' % (alert.get_info()['type'], e), {'rule': rule['name']})
                alert_exception = str(e)
            else:
                self.alerts_sent += 1
                alert_sent = True

        # Write the alert(s) to ES
        agg_id = None
        for match in matches:
            alert_body = self.get_alert_body(match, rule, alert_sent, alert_time, alert_exception)
            # Set all matches to aggregate together
            if agg_id:
                alert_body['aggregate_id'] = agg_id
            res = self.writeback('elastalert', alert_body)
            if res and not agg_id:
                agg_id = res['_id']
Beispiel #39
0
    def start(self):
        """ Periodically go through each rule and run it """
        starttime = self.args.start
        if starttime:
            try:
                starttime = ts_to_dt(starttime)
            except (TypeError, ValueError):
                self.handle_error(
                    "%s is not a valid ISO 8601 timestamp (YYYY-MM-DDTHH:MM:SS+XX:00)"
                    % (starttime))
                exit(1)
        while True:
            # If writeback_es errored, it's disabled until the next query cycle
            if not self.writeback_es:
                self.writeback_es = Elasticsearch(host=self.es_host,
                                                  port=self.es_port)

            self.send_pending_alerts()

            next_run = datetime.datetime.utcnow() + self.run_every

            for rule in self.rules:
                # Set endtime based on the rule's delay
                delay = rule.get('query_delay')
                if hasattr(self.args, 'end') and self.args.end:
                    endtime = ts_to_dt(self.args.end)
                elif delay:
                    endtime = ts_now() - delay
                else:
                    endtime = ts_now()

                try:
                    num_matches = self.run_rule(rule, endtime, starttime)
                except EAException as e:
                    self.handle_error(
                        "Error running rule %s: %s" % (rule['name'], e),
                        {'rule': rule['name']})
                else:
                    old_starttime = pretty_ts(rule.get('original_starttime'),
                                              rule.get('use_local_time'))
                    logging.info(
                        "Ran %s from %s to %s: %s query hits, %s matches,"
                        " %s alerts sent" %
                        (rule['name'], old_starttime,
                         pretty_ts(endtime, rule.get('use_local_time')),
                         self.num_hits, num_matches, self.alerts_sent))
                    self.alerts_sent = 0

                self.remove_old_events(rule)

            if next_run < datetime.datetime.utcnow():
                # We were processing for longer than our refresh interval
                # This can happen if --start was specified with a large time period
                # or if we are running too slow to process events in real time.
                logging.warning("Querying from %s to %s took longer than %s!" %
                                (old_starttime, endtime, self.run_every))
                continue

            # Only force starttime once
            starttime = None

            if not self.args.pin_rules:
                self.load_rule_changes()

            # Wait before querying again
            sleep_for = (next_run - datetime.datetime.utcnow()).seconds
            logging.info("Sleeping for %s seconds" % (sleep_for))
            time.sleep(sleep_for)
Beispiel #40
0
    def run_rule(self, rule, endtime, starttime=None):
        """ Run a rule for a given time period, including querying and alerting on results.

        :param rule: The rule configuration.
        :param starttime: The earliest timestamp to query.
        :param endtime: The latest timestamp to query.
        :return: The number of matches that the rule produced.
        """
        run_start = time.time()

        rule_es_conn_config = self.build_es_conn_config(rule)
        self.current_es = self.new_elasticsearch(rule_es_conn_config)
        self.current_es_addr = (rule['es_host'], rule['es_port'])

        # If there are pending aggregate matches, try processing them
        for x in range(len(rule['agg_matches'])):
            match = rule['agg_matches'].pop()
            self.add_aggregated_alert(match, rule)

        # Start from provided time if it's given
        if starttime:
            rule['starttime'] = starttime
        else:
            self.set_starttime(rule, endtime)
        rule['original_starttime'] = rule['starttime']

        # Don't run if starttime was set to the future
        if ts_now() <= rule['starttime']:
            logging.warning("Attempted to use query start time in the future (%s), sleeping instead" % (starttime))
            return 0

        # Run the rule
        # If querying over a large time period, split it up into chunks
        self.num_hits = 0
        tmp_endtime = endtime
        buffer_time = rule.get('buffer_time', self.buffer_time)
        while endtime - rule['starttime'] > buffer_time:
            tmp_endtime = rule['starttime'] + self.run_every
            if not self.run_query(rule, rule['starttime'], tmp_endtime):
                return 0
            rule['starttime'] = tmp_endtime
        if not self.run_query(rule, rule['starttime'], endtime):
            return 0

        rule['type'].garbage_collect(endtime)

        # Process any new matches
        num_matches = len(rule['type'].matches)
        while rule['type'].matches:
            match = rule['type'].matches.pop(0)

            # If realert is set, silence the rule for that duration
            # Silence is cached by query_key, if it exists
            # Default realert time is 0 seconds

            # concatenate query_key (or none) with rule_name to form silence_cache key
            if 'query_key' in rule:
                try:
                    key = '.' + match[rule['query_key']]
                except KeyError:
                    # Some matches may not have a query key
                    key = ''
            else:
                key = ''

            if self.is_silenced(rule['name'] + key) or self.is_silenced(rule['name']):
                logging.info('Ignoring match for silenced rule %s%s' % (rule['name'], key))
                continue

            if rule['realert']:
                next_alert, exponent = self.next_alert_time(rule, rule['name'] + key, ts_now())
                self.set_realert(rule['name'] + key, next_alert, exponent)

            # If no aggregation, alert immediately
            if not rule['aggregation']:
                self.alert([match], rule)
                continue

            # Add it as an aggregated match
            self.add_aggregated_alert(match, rule)

        time_taken = time.time() - run_start
        # Write to ES that we've run this rule against this time period
        body = {'rule_name': rule['name'],
                'endtime': endtime,
                'starttime': rule['starttime'],
                'matches': num_matches,
                'hits': self.num_hits,
                '@timestamp': ts_now(),
                'time_taken': time_taken}
        self.writeback('elastalert_status', body)

        return num_matches
Beispiel #41
0
    def alert(self, matches, rule, alert_time=None):
        """ Send out an alert.

        :param matches: A list of matches.
        :param rule: A rule configuration.
        """
        if alert_time is None:
            alert_time = ts_now()

        # Compute top count keys
        if rule.get('top_count_keys'):
            for match in matches:
                if 'query_key' in rule and rule['query_key'] in match:
                    qk = match[rule['query_key']]
                else:
                    qk = None
                start = ts_to_dt(match[rule['timestamp_field']]) - rule.get(
                    'timeframe', datetime.timedelta(minutes=10))
                end = ts_to_dt(
                    match[rule['timestamp_field']]) + datetime.timedelta(
                        minutes=10)
                keys = rule.get('top_count_keys')
                counts = self.get_top_counts(rule, start, end, keys,
                                             rule.get('top_count_number'), qk)
                match.update(counts)

        # Generate a kibana dashboard for the first match
        if rule.get('generate_kibana_link') or rule.get(
                'use_kibana_dashboard'):
            try:
                if rule.get('generate_kibana_link'):
                    kb_link = self.generate_kibana_db(rule, matches[0])
                else:
                    kb_link = self.use_kibana_link(rule, matches[0])
            except EAException as e:
                self.handle_error(
                    "Could not generate kibana dash for %s match: %s" %
                    (rule['name'], e))
            else:
                if kb_link:
                    matches[0]['kibana_link'] = kb_link

        for enhancement in rule['match_enhancements']:
            for match in matches:
                try:
                    enhancement.process(match)
                except EAException as e:
                    self.handle_error(
                        "Error running match enhancement: %s" % (e),
                        {'rule': rule['name']})

        # Don't send real alerts in debug mode
        if self.debug:
            alerter = DebugAlerter(rule)
            alerter.alert(matches)
            return

        # Run the alerts
        alert_sent = False
        alert_exception = None
        for alert in rule['alert']:
            try:
                alert.alert(matches)
            except EAException as e:
                self.handle_error(
                    'Error while running alert %s: %s' %
                    (alert.get_info()['type'], e), {'rule': rule['name']})
                alert_exception = str(e)
            else:
                self.alerts_sent += 1
                alert_sent = True

        # Write the alert(s) to ES
        agg_id = None
        for match in matches:
            alert_body = self.get_alert_body(match, rule, alert_sent,
                                             alert_time, alert_exception)
            # Set all matches to aggregate together
            if agg_id:
                alert_body['aggregate_id'] = agg_id
            res = self.writeback('elastalert', alert_body)
            if res and not agg_id:
                agg_id = res['_id']
Beispiel #42
0
    def send_alert(self, matches, rule, alert_time=None):
        """ Send out an alert.

        :param matches: A list of matches.
        :param rule: A rule configuration.
        """
        if alert_time is None:
            alert_time = ts_now()

        # Compute top count keys
        if rule.get('top_count_keys'):
            for match in matches:
                if 'query_key' in rule and rule['query_key'] in match:
                    qk = match[rule['query_key']]
                else:
                    qk = None
                start = ts_to_dt(match[rule['timestamp_field']]) - rule.get('timeframe', datetime.timedelta(minutes=10))
                end = ts_to_dt(match[rule['timestamp_field']]) + datetime.timedelta(minutes=10)
                keys = rule.get('top_count_keys')
                counts = self.get_top_counts(rule, start, end, keys, qk=qk)
                match.update(counts)

        # Generate a kibana3 dashboard for the first match
        if rule.get('generate_kibana_link') or rule.get('use_kibana_dashboard'):
            try:
                if rule.get('generate_kibana_link'):
                    kb_link = self.generate_kibana_db(rule, matches[0])
                else:
                    kb_link = self.use_kibana_link(rule, matches[0])
            except EAException as e:
                self.handle_error("Could not generate kibana dash for %s match: %s" % (rule['name'], e))
            else:
                if kb_link:
                    matches[0]['kibana_link'] = kb_link

        if rule.get('use_kibana4_dashboard'):
            kb_link = self.generate_kibana4_db(rule, matches[0])
            if kb_link:
                matches[0]['kibana_link'] = kb_link

        #if not rule.get('run_enhancements_first'):
        #    for enhancement in rule.get('match_enhancements'):
        #        valid_matches = []
        #        for match in matches:
        #            try:
        #                enhancement.process(match)
        #                valid_matches.append(match)
        #            except DropMatchException as e:
        #                pass
        #            except EAException as e:
        #                self.handle_error("Error running match enhancement: %s" % (e), {'rule': rule['name']})
        #        matches = valid_matches
        #        if not matches:
        #            return None

        # Don't send real alerts in debug mode
        if self.debug:
            alerter = DebugAlerter(rule)
            alerter.alert(matches)
            return None

        # Run the alerts
        alert_sent = False
        alert_exception = None
        # Alert.pipeline is a single object shared between every alerter
        # This allows alerters to pass objects and data between themselves
        alert_pipeline = {"alert_time": alert_time}
        for alert in rule.get("actions"):
            alert.pipeline = alert_pipeline
            try:
                alert.alert(matches)
            except EAException as e:
                self.handle_error('Error while running alert %s: %s' % (alert.get_info()['type'], e), {'rule': rule['name']})
                alert_exception = str(e)
            else:
                self.alerts_sent += 1
                alert_sent = True
Beispiel #43
0
    def run_rule(self, rule):
        """ Run a rule including querying and alerting on results.

        :param rule: The rule configuration.
        :return: The number of matches that the rule produced.
        """

        elastalert_logger.info('Start to run rule: %s', rule.get('name'))
        # Run the rule. If querying over a large time period, split it up into segments
        self.num_hits = 0
        rule_request = rule.get("input").get("search").get("request")
        if rule_request.get("elastic_host", None) is not None and rule_request.get("elastic_port", None) is not None:
            self.current_es = Elasticsearch(host=rule.get("input").get("search").get("request").get("elastic_host"),
                             port=rule.get("input").get("search").get("request").get("elastic_port"))
        else:
            self.current_es = self.new_elasticsearch(self.global_config)
        
        self.run_query(rule)

        # Process any new matches
        num_matches = len(rule['type'].matches)

        while rule['type'].matches:
            match = rule['type'].matches.pop(0)

            #if self.is_silenced(rule['name'] + key) or self.is_silenced(rule['name']):
            #    elastalert_logger.info('Ignoring match for silenced rule %s%s' % (rule['name'], key))
            #    continue

            if rule.get('realert'):
                next_alert, exponent = self.next_alert_time(rule, rule['name'] + key, ts_now())
                self.set_realert(rule['name'] + key, next_alert, exponent)

            # If no aggregation, alert immediately
            #if not rule['aggregation']:
            #    self.alert([match], rule)
            #    continue
            self.alert([match], rule)

            # Add it as an aggregated match
            #self.add_aggregated_alert(match, rule)

        # Mark this endtime for next run's start
        #rule['previous_endtime'] = endtime

        #time_taken = time.time() - run_start
        

        return num_matches