Example #1
0
def test_new_term_with_terms():
    rules = {'fields': ['a'],
             'timestamp_field': '@timestamp',
             'es_host': 'example.com', 'es_port': 10, 'index': 'logstash', 'query_key': 'a',
             'window_step_size': {'days': 2}}
    mock_res = {'aggregations': {'filtered': {'values': {'buckets': [{'key': 'key1', 'doc_count': 1},
                                                                     {'key': 'key2', 'doc_count': 5}]}}}}

    with mock.patch('elastalert.ruletypes.elasticsearch_client') as mock_es:
        mock_es.return_value = mock.Mock()
        mock_es.return_value.search.return_value = mock_res
        mock_es.return_value.info.return_value = {'version': {'number': '2.x.x'}}
        rule = NewTermsRule(rules)

        # Only 15 queries because of custom step size
        assert rule.es.search.call_count == 15

    # Key1 and key2 shouldn't cause a match
    terms = {ts_now(): [{'key': 'key1', 'doc_count': 1},
                        {'key': 'key2', 'doc_count': 1}]}
    rule.add_terms_data(terms)
    assert rule.matches == []

    # Key3 causes an alert for field a
    terms = {ts_now(): [{'key': 'key3', 'doc_count': 1}]}
    rule.add_terms_data(terms)
    assert len(rule.matches) == 1
    assert rule.matches[0]['new_field'] == 'a'
    assert rule.matches[0]['a'] == 'key3'
    rule.matches = []

    # Key3 doesn't cause another alert
    terms = {ts_now(): [{'key': 'key3', 'doc_count': 1}]}
    rule.add_terms_data(terms)
    assert rule.matches == []
Example #2
0
def test_new_term_with_terms():
    rules = {'fields': ['a'],
             'timestamp_field': '@timestamp',
             'es_host': 'example.com', 'es_port': 10, 'index': 'logstash', 'query_key': 'a'}
    mock_res = {'aggregations': {'filtered': {'values': {'buckets': [{'key': 'key1', 'doc_count': 1},
                                                                     {'key': 'key2', 'doc_count': 5}]}}}}

    with mock.patch('elastalert.ruletypes.Elasticsearch') as mock_es:
        mock_es.return_value = mock.Mock()
        mock_es.return_value.search.return_value = mock_res
        rule = NewTermsRule(rules)

        assert rule.es.search.call_count == 1

    # Key1 and key2 shouldn't cause a match
    terms = {ts_now(): [{'key': 'key1', 'doc_count': 1},
                        {'key': 'key2', 'doc_count': 1}]}
    rule.add_terms_data(terms)
    assert rule.matches == []

    # Key3 causes an alert for field a
    terms = {ts_now(): [{'key': 'key3', 'doc_count': 1}]}
    rule.add_terms_data(terms)
    assert len(rule.matches) == 1
    assert rule.matches[0]['new_field'] == 'a'
    assert rule.matches[0]['a'] == 'key3'
    rule.matches = []

    # Key3 doesn't cause another alert
    terms = {ts_now(): [{'key': 'key3', 'doc_count': 1}]}
    rule.add_terms_data(terms)
    assert rule.matches == []
Example #3
0
def check_files():
    print("Note: This tool is for testing filters and config syntax. It will not process data or alert.\n")
    parser = argparse.ArgumentParser(description='Validate a rule configuration')
    parser.add_argument('files', metavar='file', type=str, nargs='+', help='rule configuration filename')
    parser.add_argument('--schema-only', action='store_true', help='Show only schema errors; do not run query')
    parser.add_argument('--days', type=int, default=[1, 7], nargs='+', help='Query the previous N days with this rule')
    args = parser.parse_args()

    for filename in args.files:
        with open(filename) as fh:
            conf = yaml.load(fh)
        load_options(conf)
        print("Successfully loaded %s\n" % (conf['name']))

        if args.schema_only:
            continue

        es_client = Elasticsearch(host=conf['es_host'], port=conf['es_port'])
        for days in args.days:
            start_time = ts_now() - datetime.timedelta(days=days)
            end_time = ts_now()
            ts = conf.get('timestamp_field', '@timestamp')
            query = ElastAlerter.get_query(conf['filter'], starttime=start_time, endtime=end_time, timestamp_field=ts)
            index = ElastAlerter.get_index(conf, start_time, end_time)
            try:
                res = es_client.search(index, size=1000, body=query)
            except Exception as e:
                print("Error running your filter:")
                print(repr(e)[:2048])
                exit(1)

            num_hits = len(res['hits']['hits'])
            print("Got %s hits from the last %s day%s" % (num_hits if num_hits != 1000 else '1000+', days,
                                                          's' if days > 1 else ''))

        if num_hits:
            print("\nAvailable terms in first hit:")
            terms = res['hits']['hits'][0]['_source']
            print_terms(terms, '')

            pk = conf.get('primary_key')
            ck = conf.get('compare_key')
            if pk and not lookup_es_key(terms, pk):
                print("Warning: primary key %s is either missing or null!")
            if ck and not lookup_es_key(terms, ck):
                print("Warning: compare key %s is either missing or null!")

            include = conf.get('include')
            if include:
                for term in include:
                    if not lookup_es_key(terms, term) and '*' not in term:
                        print("Included term %s may be missing or null" % (term))

            for term in conf.get('top_count_keys', []):
                # If the index starts with 'logstash', fields with .raw will be available but won't in _source
                if term not in terms and not (term.endswith('.raw') and term[:-4] in terms and index.startswith('logstash')):
                    print("top_count_key %s may be missing" % (term))
        print('')
Example #4
0
def test_new_term():
    rules = {
        "fields": ["a", "b"],
        "timestamp_field": "@timestamp",
        "es_host": "example.com",
        "es_port": 10,
        "index": "logstash",
    }
    mock_res = {
        "aggregations": {
            "filtered": {"values": {"buckets": [{"key": "key1", "doc_count": 1}, {"key": "key2", "doc_count": 5}]}}
        }
    }

    with mock.patch("elastalert.ruletypes.Elasticsearch") as mock_es:
        mock_es.return_value = mock.Mock()
        mock_es.return_value.search.return_value = mock_res
        rule = NewTermsRule(rules)

        assert rule.es.search.call_count == 2

    # Key1 and key2 shouldn't cause a match
    rule.add_data([{"@timestamp": ts_now(), "a": "key1", "b": "key2"}])
    assert rule.matches == []

    # Neither will missing values
    rule.add_data([{"@timestamp": ts_now(), "a": "key2"}])
    assert rule.matches == []

    # Key3 causes an alert for field b
    rule.add_data([{"@timestamp": ts_now(), "a": "key2", "b": "key3"}])
    assert len(rule.matches) == 1
    assert rule.matches[0]["new_field"] == "b"
    assert rule.matches[0]["b"] == "key3"
    rule.matches = []

    # Key3 doesn't cause another alert for field b
    rule.add_data([{"@timestamp": ts_now(), "a": "key2", "b": "key3"}])
    assert rule.matches == []

    # Missing_field
    rules["alert_on_missing_field"] = True
    with mock.patch("elastalert.ruletypes.Elasticsearch") as mock_es:
        mock_es.return_value = mock.Mock()
        mock_es.return_value.search.return_value = mock_res
        rule = NewTermsRule(rules)
    rule.add_data([{"@timestamp": ts_now(), "a": "key2"}])
    assert len(rule.matches) == 1
    assert rule.matches[0]["missing_field"] == "b"
Example #5
0
def test_new_term():
    rules = {'fields': ['a', 'b'],
             'timestamp_field': '@timestamp',
             'es_host': 'example.com', 'es_port': 10, 'index': 'logstash'}
    mock_res = {'aggregations': {'filtered': {'values': {'buckets': [{'key': 'key1', 'doc_count': 1},
                                                                     {'key': 'key2', 'doc_count': 5}]}}}}

    with mock.patch('elastalert.ruletypes.Elasticsearch') as mock_es:
        mock_es.return_value = mock.Mock()
        mock_es.return_value.search.return_value = mock_res
        rule = NewTermsRule(rules)

        assert rule.es.search.call_count == 2

    # Key1 and key2 shouldn't cause a match
    rule.add_data([{'@timestamp': ts_now(), 'a': 'key1', 'b': 'key2'}])
    assert rule.matches == []

    # Neither will missing values
    rule.add_data([{'@timestamp': ts_now(), 'a': 'key2'}])
    assert rule.matches == []

    # Key3 causes an alert for field b
    rule.add_data([{'@timestamp': ts_now(), 'a': 'key2', 'b': 'key3'}])
    assert len(rule.matches) == 1
    assert rule.matches[0]['new_field'] == 'b'
    assert rule.matches[0]['b'] == 'key3'
    rule.matches = []

    # Key3 doesn't cause another alert for field b
    rule.add_data([{'@timestamp': ts_now(), 'a': 'key2', 'b': 'key3'}])
    assert rule.matches == []

    # Missing_field
    rules['alert_on_missing_field'] = True
    with mock.patch('elastalert.ruletypes.Elasticsearch') as mock_es:
        mock_es.return_value = mock.Mock()
        mock_es.return_value.search.return_value = mock_res
        rule = NewTermsRule(rules)
    rule.add_data([{'@timestamp': ts_now(), 'a': 'key2'}])
    assert len(rule.matches) == 1
    assert rule.matches[0]['missing_field'] == 'b'
Example #6
0
def test_remove_old_events(ea):
    now = ts_now()
    minute = datetime.timedelta(minutes=1)
    ea.rules[0]['processed_hits'] = {'foo': now - minute,
                                     'bar': now - minute * 5,
                                     'baz': now - minute * 15}
    ea.rules[0]['buffer_time'] = datetime.timedelta(minutes=10)

    # With a query delay, only events older than 20 minutes will be removed (none)
    ea.rules[0]['query_delay'] = datetime.timedelta(minutes=10)
    ea.remove_old_events(ea.rules[0])
    assert len(ea.rules[0]['processed_hits']) == 3

    # With no query delay, the 15 minute old event will be removed
    ea.rules[0].pop('query_delay')
    ea.remove_old_events(ea.rules[0])
    assert len(ea.rules[0]['processed_hits']) == 2
    assert 'baz' not in ea.rules[0]['processed_hits']
Example #7
0
def test_new_term_nested_field():

    rules = {'fields': ['a', 'b.c'],
             'timestamp_field': '@timestamp',
             'es_host': 'example.com', 'es_port': 10, 'index': 'logstash'}
    mock_res = {'aggregations': {'filtered': {'values': {'buckets': [{'key': 'key1', 'doc_count': 1},
                                                                     {'key': 'key2', 'doc_count': 5}]}}}}
    with mock.patch('elastalert.ruletypes.Elasticsearch') as mock_es:
        mock_es.return_value = mock.Mock()
        mock_es.return_value.search.return_value = mock_res
        rule = NewTermsRule(rules)

        assert rule.es.search.call_count == 2

    # Key3 causes an alert for nested field b.c
    rule.add_data([{'@timestamp': ts_now(), 'b': {'c': 'key3'}}])
    assert len(rule.matches) == 1
    assert rule.matches[0]['new_field'] == 'b.c'
    assert rule.matches[0]['b']['c'] == 'key3'
    rule.matches = []
Example #8
0
    def test_file(self, conf, args):
        """ Loads a rule config file, performs a query over the last day (args.days), lists available keys
        and prints the number of results. """
        load_options(conf, {})
        print("Successfully loaded %s\n" % (conf['name']))

        if args.schema_only:
            return []

        # Set up elasticsearch client and query
        es_config = ElastAlerter.build_es_conn_config(conf)
        es_client = ElastAlerter.new_elasticsearch(es_config)
        start_time = ts_now() - datetime.timedelta(days=args.days)
        end_time = ts_now()
        ts = conf.get('timestamp_field', '@timestamp')
        query = ElastAlerter.get_query(conf['filter'], starttime=start_time, endtime=end_time, timestamp_field=ts)
        index = ElastAlerter.get_index(conf, start_time, end_time)

        # Get one document for schema
        try:
            res = es_client.search(index, size=1, body=query, ignore_unavailable=True)
        except Exception as e:
            print("Error running your filter:", file=sys.stderr)
            print(repr(e)[:2048], file=sys.stderr)
            return None
        num_hits = len(res['hits']['hits'])
        if not num_hits:
            return []

        terms = res['hits']['hits'][0]['_source']
        doc_type = res['hits']['hits'][0]['_type']

        # Get a count of all docs
        count_query = ElastAlerter.get_query(conf['filter'], starttime=start_time, endtime=end_time, timestamp_field=ts, sort=False)
        count_query = {'query': {'filtered': count_query}}
        try:
            res = es_client.count(index, doc_type=doc_type, body=count_query, ignore_unavailable=True)
        except Exception as e:
            print("Error querying Elasticsearch:", file=sys.stderr)
            print(repr(e)[:2048], file=sys.stderr)
            return None

        num_hits = res['count']
        print("Got %s hits from the last %s day%s" % (num_hits, args.days, 's' if args.days > 1 else ''))
        print("\nAvailable terms in first hit:")
        print_terms(terms, '')

        # Check for missing keys
        pk = conf.get('primary_key')
        ck = conf.get('compare_key')
        if pk and not lookup_es_key(terms, pk):
            print("Warning: primary key %s is either missing or null!", file=sys.stderr)
        if ck and not lookup_es_key(terms, ck):
            print("Warning: compare key %s is either missing or null!", file=sys.stderr)

        include = conf.get('include')
        if include:
            for term in include:
                if not lookup_es_key(terms, term) and '*' not in term:
                    print("Included term %s may be missing or null" % (term), file=sys.stderr)

        for term in conf.get('top_count_keys', []):
            # If the index starts with 'logstash', fields with .raw will be available but won't in _source
            if term not in terms and not (term.endswith('.raw') and term[:-4] in terms and index.startswith('logstash')):
                print("top_count_key %s may be missing" % (term), file=sys.stderr)
        print('')  # Newline

        # Download up to 10,000 documents to save
        if args.save and not args.count:
            try:
                res = es_client.search(index, size=10000, body=query, ignore_unavailable=True)
            except Exception as e:
                print("Error running your filter:", file=sys.stderr)
                print(repr(e)[:2048], file=sys.stderr)
                return None
            num_hits = len(res['hits']['hits'])
            print("Downloaded %s documents to save" % (num_hits))
            return res['hits']['hits']

        return None
Example #9
0
    def run_elastalert(self, rule, args):
        """ Creates an ElastAlert instance and run's over for a specific rule using either real or mock data. """
        # Mock configuration. Nothing here is used except run_every
        conf = {'rules_folder': 'rules',
                'run_every': datetime.timedelta(minutes=5),
                'buffer_time': datetime.timedelta(minutes=45),
                'alert_time_limit': datetime.timedelta(hours=24),
                'es_host': 'es',
                'es_port': 14900,
                'writeback_index': 'wb',
                'max_query_size': 100000,
                'old_query_limit': datetime.timedelta(weeks=1),
                'disable_rules_on_error': False}

        # Load and instantiate rule
        load_options(rule, conf)
        load_modules(rule)
        conf['rules'] = [rule]

        # If using mock data, make sure it's sorted and find appropriate time range
        timestamp_field = rule.get('timestamp_field', '@timestamp')
        if args.json:
            if not self.data:
                return
            try:
                self.data.sort(key=lambda x: x[timestamp_field])
                starttime = ts_to_dt(self.data[0][timestamp_field])
                endtime = self.data[-1][timestamp_field]
                endtime = ts_to_dt(endtime) + datetime.timedelta(seconds=1)
            except KeyError as e:
                print("All documents must have a timestamp and _id: %s" % (e), file=sys.stderr)
                return

            # Create mock _id for documents if it's missing
            used_ids = []

            def get_id():
                _id = ''.join([random.choice(string.letters) for i in range(16)])
                if _id in used_ids:
                    return get_id()
                used_ids.append(_id)
                return _id

            for doc in self.data:
                doc.update({'_id': doc.get('_id', get_id())})
        else:
            endtime = ts_now()
            starttime = endtime - datetime.timedelta(days=args.days)

        # Set run_every to cover the entire time range unless use_count_query or use_terms_query is set
        # This is to prevent query segmenting which unnecessarily slows down tests
        if not rule.get('use_terms_query') and not rule.get('use_count_query'):
            conf['run_every'] = endtime - starttime

        # Instantiate ElastAlert to use mock config and special rule
        with mock.patch('elastalert.elastalert.get_rule_hashes'):
            with mock.patch('elastalert.elastalert.load_rules') as load_conf:
                load_conf.return_value = conf
                if args.alert:
                    client = ElastAlerter(['--verbose'])
                else:
                    client = ElastAlerter(['--debug'])

        # Replace get_hits_* functions to use mock data
        if args.json:
            self.mock_elastalert(client)

        # Mock writeback for both real data and json data
        client.writeback_es = None
        with mock.patch.object(client, 'writeback') as mock_writeback:
            client.run_rule(rule, endtime, starttime)

            if mock_writeback.call_count:
                print("\nWould have written the following documents to elastalert_status:\n")
                for call in mock_writeback.call_args_list:
                    print("%s - %s\n" % (call[0][0], call[0][1]))
Example #10
0
def test_new_term_with_composite_fields():
    rules = {'fields': [['a', 'b', 'c'], ['d', 'e.f']],
             'timestamp_field': '@timestamp',
             'es_host': 'example.com', 'es_port': 10, 'index': 'logstash'}

    mock_res = {
        'aggregations': {
            'filtered': {
                'values': {
                    'buckets': [
                        {
                            'key': 'key1',
                            'doc_count': 5,
                            'values': {
                                'buckets': [
                                    {
                                        'key': 'key2',
                                        'doc_count': 5,
                                        'values': {
                                            'buckets': [
                                                {
                                                    'key': 'key3',
                                                    'doc_count': 3,
                                                },
                                                {
                                                    'key': 'key4',
                                                    'doc_count': 2,
                                                },
                                            ]
                                        }
                                    }
                                ]
                            }
                        }
                    ]
                }
            }
        }
    }

    with mock.patch('elastalert.ruletypes.elasticsearch_client') as mock_es:
        mock_es.return_value = mock.Mock()
        mock_es.return_value.search.return_value = mock_res
        mock_es.return_value.info.return_value = {'version': {'number': '2.x.x'}}
        rule = NewTermsRule(rules)

        assert rule.es.search.call_count == 60

    # key3 already exists, and thus shouldn't cause a match
    rule.add_data([{'@timestamp': ts_now(), 'a': 'key1', 'b': 'key2', 'c': 'key3'}])
    assert rule.matches == []

    # key5 causes an alert for composite field [a, b, c]
    rule.add_data([{'@timestamp': ts_now(), 'a': 'key1', 'b': 'key2', 'c': 'key5'}])
    assert len(rule.matches) == 1
    assert rule.matches[0]['new_field'] == ('a', 'b', 'c')
    assert rule.matches[0]['a'] == 'key1'
    assert rule.matches[0]['b'] == 'key2'
    assert rule.matches[0]['c'] == 'key5'
    rule.matches = []

    # New values in other fields that are not part of the composite key should not cause an alert
    rule.add_data([{'@timestamp': ts_now(), 'a': 'key1', 'b': 'key2', 'c': 'key4', 'd': 'unrelated_value'}])
    assert len(rule.matches) == 0
    rule.matches = []

    # Verify nested fields work properly
    # Key6 causes an alert for nested field e.f
    rule.add_data([{'@timestamp': ts_now(), 'd': 'key4', 'e': {'f': 'key6'}}])
    assert len(rule.matches) == 1
    assert rule.matches[0]['new_field'] == ('d', 'e.f')
    assert rule.matches[0]['d'] == 'key4'
    assert rule.matches[0]['e']['f'] == 'key6'
    rule.matches = []

    # Missing_fields
    rules['alert_on_missing_field'] = True
    with mock.patch('elastalert.ruletypes.elasticsearch_client') as mock_es:
        mock_es.return_value = mock.Mock()
        mock_es.return_value.search.return_value = mock_res
        mock_es.return_value.info.return_value = {'version': {'number': '2.x.x'}}
        rule = NewTermsRule(rules)
    rule.add_data([{'@timestamp': ts_now(), 'a': 'key2'}])
    assert len(rule.matches) == 2
    # This means that any one of the three n composite fields were not present
    assert rule.matches[0]['missing_field'] == ('a', 'b', 'c')
    assert rule.matches[1]['missing_field'] == ('d', 'e.f')
Example #11
0
    def test_file(self, conf, args):
        """ Loads a rule config file, performs a query over the last day (args.days), lists available keys
        and prints the number of results. """
        if args.schema_only:
            return []

        # Set up Elasticsearch client and query
        es_client = elasticsearch_client(conf)

        try:
            ElastAlerter.modify_rule_for_ES5(conf)
        except Exception as e:
            print("Error connecting to ElasticSearch:", file=sys.stderr)
            print(repr(e)[:2048], file=sys.stderr)
            if args.stop_error:
                exit(1)
            return None

        start_time = ts_now() - datetime.timedelta(days=args.days)
        end_time = ts_now()
        ts = conf.get('timestamp_field', '@timestamp')
        query = ElastAlerter.get_query(
            conf['filter'],
            starttime=start_time,
            endtime=end_time,
            timestamp_field=ts,
            to_ts_func=conf['dt_to_ts'],
            five=conf['five']
        )
        index = ElastAlerter.get_index(conf, start_time, end_time)

        # Get one document for schema
        try:
            res = es_client.search(index, size=1, body=query, ignore_unavailable=True)
        except Exception as e:
            print("Error running your filter:", file=sys.stderr)
            print(repr(e)[:2048], file=sys.stderr)
            if args.stop_error:
                exit(1)
            return None
        num_hits = len(res['hits']['hits'])
        if not num_hits:
            return []

        terms = res['hits']['hits'][0]['_source']
        doc_type = res['hits']['hits'][0]['_type']

        # Get a count of all docs
        count_query = ElastAlerter.get_query(
            conf['filter'],
            starttime=start_time,
            endtime=end_time,
            timestamp_field=ts,
            to_ts_func=conf['dt_to_ts'],
            sort=False,
            five=conf['five']
        )
        try:
            res = es_client.count(index, doc_type=doc_type, body=count_query, ignore_unavailable=True)
        except Exception as e:
            print("Error querying Elasticsearch:", file=sys.stderr)
            print(repr(e)[:2048], file=sys.stderr)
            if args.stop_error:
                exit(1)
            return None

        num_hits = res['count']

        if args.formatted_output:
            self.formatted_output['hits'] = num_hits
            self.formatted_output['days'] = args.days
            self.formatted_output['terms'] = terms.keys()
            self.formatted_output['result'] = terms
        else:
            print("Got %s hits from the last %s day%s" % (num_hits, args.days, 's' if args.days > 1 else ''))
            print("\nAvailable terms in first hit:")
            print_terms(terms, '')

        # Check for missing keys
        pk = conf.get('primary_key')
        ck = conf.get('compare_key')
        if pk and not lookup_es_key(terms, pk):
            print("Warning: primary key %s is either missing or null!", file=sys.stderr)
        if ck and not lookup_es_key(terms, ck):
            print("Warning: compare key %s is either missing or null!", file=sys.stderr)

        include = conf.get('include')
        if include:
            for term in include:
                if not lookup_es_key(terms, term) and '*' not in term:
                    print("Included term %s may be missing or null" % (term), file=sys.stderr)

        for term in conf.get('top_count_keys', []):
            # If the index starts with 'logstash', fields with .raw will be available but won't in _source
            if term not in terms and not (term.endswith('.raw') and term[:-4] in terms and index.startswith('logstash')):
                print("top_count_key %s may be missing" % (term), file=sys.stderr)
        if not args.formatted_output:
            print('')  # Newline

        # Download up to max_query_size (defaults to 10,000) documents to save
        if (args.save or args.formatted_output) and not args.count:
            try:
                res = es_client.search(index, size=args.max_query_size, body=query, ignore_unavailable=True)
            except Exception as e:
                print("Error running your filter:", file=sys.stderr)
                print(repr(e)[:2048], file=sys.stderr)
                if args.stop_error:
                    exit(1)
                return None
            num_hits = len(res['hits']['hits'])

            if args.save:
                print("Downloaded %s documents to save" % (num_hits))
            return res['hits']['hits']
Example #12
0
def test_new_term():
    rules = {'fields': ['a', 'b'],
             'timestamp_field': '@timestamp',
             'es_host': 'example.com', 'es_port': 10, 'index': 'logstash'}
    mock_res = {'aggregations': {'filtered': {'values': {'buckets': [{'key': 'key1', 'doc_count': 1},
                                                                     {'key': 'key2', 'doc_count': 5}]}}}}

    with mock.patch('elastalert.ruletypes.elasticsearch_client') as mock_es:
        mock_es.return_value = mock.Mock()
        mock_es.return_value.search.return_value = mock_res
        mock_es.return_value.info.return_value = {'version': {'number': '2.x.x'}}
        call_args = []

        # search is called with a mutable dict containing timestamps, this is required to test
        def record_args(*args, **kwargs):
            call_args.append((copy.deepcopy(args), copy.deepcopy(kwargs)))
            return mock_res

        mock_es.return_value.search.side_effect = record_args
        rule = NewTermsRule(rules)

    # 30 day default range, 1 day default step, times 2 fields
    assert rule.es.search.call_count == 60

    # Assert that all calls have the proper ordering of time ranges
    old_ts = '2010-01-01T00:00:00Z'
    old_field = ''
    for call in call_args:
        field = call[1]['body']['aggs']['filtered']['aggs']['values']['terms']['field']
        if old_field != field:
            old_field = field
            old_ts = '2010-01-01T00:00:00Z'
        gte = call[1]['body']['aggs']['filtered']['filter']['bool']['must'][0]['range']['@timestamp']['gte']
        assert gte > old_ts
        lt = call[1]['body']['aggs']['filtered']['filter']['bool']['must'][0]['range']['@timestamp']['lt']
        assert lt > gte
        old_ts = gte

    # Key1 and key2 shouldn't cause a match
    rule.add_data([{'@timestamp': ts_now(), 'a': 'key1', 'b': 'key2'}])
    assert rule.matches == []

    # Neither will missing values
    rule.add_data([{'@timestamp': ts_now(), 'a': 'key2'}])
    assert rule.matches == []

    # Key3 causes an alert for field b
    rule.add_data([{'@timestamp': ts_now(), 'a': 'key2', 'b': 'key3'}])
    assert len(rule.matches) == 1
    assert rule.matches[0]['new_field'] == 'b'
    assert rule.matches[0]['b'] == 'key3'
    rule.matches = []

    # Key3 doesn't cause another alert for field b
    rule.add_data([{'@timestamp': ts_now(), 'a': 'key2', 'b': 'key3'}])
    assert rule.matches == []

    # Missing_field
    rules['alert_on_missing_field'] = True
    with mock.patch('elastalert.ruletypes.elasticsearch_client') as mock_es:
        mock_es.return_value = mock.Mock()
        mock_es.return_value.search.return_value = mock_res
        mock_es.return_value.info.return_value = {'version': {'number': '2.x.x'}}
        rule = NewTermsRule(rules)
    rule.add_data([{'@timestamp': ts_now(), 'a': 'key2'}])
    assert len(rule.matches) == 1
    assert rule.matches[0]['missing_field'] == 'b'
Example #13
0
def test_new_term():
    rules = {
        'fields': ['a', 'b'],
        'timestamp_field': '@timestamp',
        'es_host': 'example.com',
        'es_port': 10,
        'index': 'logstash',
        'ts_to_dt': ts_to_dt,
        'dt_to_ts': dt_to_ts
    }
    mock_res = {
        'aggregations': {
            'filtered': {
                'values': {
                    'buckets': [{
                        'key': 'key1',
                        'doc_count': 1
                    }, {
                        'key': 'key2',
                        'doc_count': 5
                    }]
                }
            }
        }
    }

    with mock.patch('elastalert.ruletypes.elasticsearch_client') as mock_es:
        mock_es.return_value = mock.Mock()
        mock_es.return_value.search.return_value = mock_res
        mock_es.return_value.info.return_value = {
            'version': {
                'number': '2.x.x'
            }
        }
        call_args = []

        # search is called with a mutable dict containing timestamps, this is required to test
        def record_args(*args, **kwargs):
            call_args.append((copy.deepcopy(args), copy.deepcopy(kwargs)))
            return mock_res

        mock_es.return_value.search.side_effect = record_args
        rule = NewTermsRule(rules)

    # 30 day default range, 1 day default step, times 2 fields
    assert rule.es.search.call_count == 60

    # Assert that all calls have the proper ordering of time ranges
    old_ts = '2010-01-01T00:00:00Z'
    old_field = ''
    for call in call_args:
        field = call[1]['body']['aggs']['filtered']['aggs']['values']['terms'][
            'field']
        if old_field != field:
            old_field = field
            old_ts = '2010-01-01T00:00:00Z'
        gte = call[1]['body']['aggs']['filtered']['filter']['bool']['must'][0][
            'range']['@timestamp']['gte']
        assert gte > old_ts
        lt = call[1]['body']['aggs']['filtered']['filter']['bool']['must'][0][
            'range']['@timestamp']['lt']
        assert lt > gte
        old_ts = gte

    # Key1 and key2 shouldn't cause a match
    rule.add_data([{'@timestamp': ts_now(), 'a': 'key1', 'b': 'key2'}])
    assert rule.matches == []

    # Neither will missing values
    rule.add_data([{'@timestamp': ts_now(), 'a': 'key2'}])
    assert rule.matches == []

    # Key3 causes an alert for field b
    rule.add_data([{'@timestamp': ts_now(), 'a': 'key2', 'b': 'key3'}])
    assert len(rule.matches) == 1
    assert rule.matches[0]['new_field'] == 'b'
    assert rule.matches[0]['b'] == 'key3'
    rule.matches = []

    # Key3 doesn't cause another alert for field b
    rule.add_data([{'@timestamp': ts_now(), 'a': 'key2', 'b': 'key3'}])
    assert rule.matches == []

    # Missing_field
    rules['alert_on_missing_field'] = True
    with mock.patch('elastalert.ruletypes.elasticsearch_client') as mock_es:
        mock_es.return_value = mock.Mock()
        mock_es.return_value.search.return_value = mock_res
        mock_es.return_value.info.return_value = {
            'version': {
                'number': '2.x.x'
            }
        }
        rule = NewTermsRule(rules)
    rule.add_data([{'@timestamp': ts_now(), 'a': 'key2'}])
    assert len(rule.matches) == 1
    assert rule.matches[0]['missing_field'] == 'b'
Example #14
0
    def run_elastalert(self, rule, conf, args):
        """ Creates an ElastAlert instance and run's over for a specific rule using either real or mock data. """

        # Load and instantiate rule
        # Pass an args containing the context of whether we're alerting or not
        # It is needed to prevent unnecessary initialization of unused alerters
        load_modules_args = argparse.Namespace()
        load_modules_args.debug = not args.alert
        load_modules(rule, load_modules_args)
        conf['rules'] = [rule]

        # If using mock data, make sure it's sorted and find appropriate time range
        timestamp_field = rule.get('timestamp_field', '@timestamp')
        if args.json:
            if not self.data:
                return None
            try:
                self.data.sort(key=lambda x: x[timestamp_field])
                starttime = ts_to_dt(self.data[0][timestamp_field])
                endtime = self.data[-1][timestamp_field]
                endtime = ts_to_dt(endtime) + datetime.timedelta(seconds=1)
            except KeyError as e:
                print("All documents must have a timestamp and _id: %s" % (e), file=sys.stderr)
                if args.stop_error:
                    exit(1)
                return None

            # Create mock _id for documents if it's missing
            used_ids = []

            def get_id():
                _id = ''.join([random.choice(string.letters) for i in range(16)])
                if _id in used_ids:
                    return get_id()
                used_ids.append(_id)
                return _id

            for doc in self.data:
                doc.update({'_id': doc.get('_id', get_id())})
        else:
            if args.end:
                if args.end == 'NOW':
                    endtime = ts_now()
                else:
                    try:
                        endtime = ts_to_dt(args.end)
                    except (TypeError, ValueError):
                        self.handle_error("%s is not a valid ISO8601 timestamp (YYYY-MM-DDTHH:MM:SS+XX:00)" % (args.end))
                        exit(1)
            else:
                endtime = ts_now()
            if args.start:
                try:
                    starttime = ts_to_dt(args.start)
                except (TypeError, ValueError):
                    self.handle_error("%s is not a valid ISO8601 timestamp (YYYY-MM-DDTHH:MM:SS+XX:00)" % (args.start))
                    exit(1)
            else:
                # if days given as command line argument
                if args.days > 0:
                    starttime = endtime - datetime.timedelta(days=args.days)
                else:
                    # if timeframe is given in rule
                    if 'timeframe' in rule:
                        starttime = endtime - datetime.timedelta(seconds=rule['timeframe'].total_seconds() * 1.01)
                    # default is 1 days / 24 hours
                    else:
                        starttime = endtime - datetime.timedelta(days=1)

        # Set run_every to cover the entire time range unless count query, terms query or agg query used
        # This is to prevent query segmenting which unnecessarily slows down tests
        if not rule.get('use_terms_query') and not rule.get('use_count_query') and not rule.get('aggregation_query_element'):
            conf['run_every'] = endtime - starttime

        # Instantiate ElastAlert to use mock config and special rule
        with mock.patch('elastalert.elastalert.get_rule_hashes'):
            with mock.patch('elastalert.elastalert.load_rules') as load_conf:
                load_conf.return_value = conf
                if args.alert:
                    client = ElastAlerter(['--verbose'])
                else:
                    client = ElastAlerter(['--debug'])

        # Replace get_hits_* functions to use mock data
        if args.json:
            self.mock_elastalert(client)

        # Mock writeback to return empty results
        client.writeback_es = mock.MagicMock()
        client.writeback_es.search.return_value = {"hits": {"hits": []}}

        with mock.patch.object(client, 'writeback') as mock_writeback:
            client.run_rule(rule, endtime, starttime)

            if mock_writeback.call_count:

                if args.formatted_output:
                    self.formatted_output['writeback'] = {}
                else:
                    print("\nWould have written the following documents to writeback index (default is elastalert_status):\n")

                errors = False
                for call in mock_writeback.call_args_list:
                    if args.formatted_output:
                        self.formatted_output['writeback'][call[0][0]] = json.loads(json.dumps(call[0][1], default=str))
                    else:
                        print("%s - %s\n" % (call[0][0], call[0][1]))

                    if call[0][0] == 'elastalert_error':
                        errors = True
                if errors and args.stop_error:
                    exit(1)
Example #15
0
    def run_elastalert(self, rule, conf, args):
        """ Creates an ElastAlert instance and run's over for a specific rule using either real or mock data. """

        # Load and instantiate rule
        # Pass an args containing the context of whether we're alerting or not
        # It is needed to prevent unnecessary initialization of unused alerters
        load_modules_args = argparse.Namespace()
        load_modules_args.debug = not args.alert
        conf['rules_loader'].load_modules(rule, load_modules_args)

        # If using mock data, make sure it's sorted and find appropriate time range
        timestamp_field = rule.get('timestamp_field', '@timestamp')
        if args.json:
            if not self.data:
                return None
            try:
                self.data.sort(key=lambda x: x[timestamp_field])
                starttime = ts_to_dt(self.data[0][timestamp_field])
                endtime = self.data[-1][timestamp_field]
                endtime = ts_to_dt(endtime) + datetime.timedelta(seconds=1)
            except KeyError as e:
                print("All documents must have a timestamp and _id: %s" % (e),
                      file=sys.stderr)
                if args.stop_error:
                    exit(4)
                return None

            # Create mock _id for documents if it's missing
            used_ids = []

            def get_id():
                _id = ''.join(
                    [random.choice(string.ascii_letters) for i in range(16)])
                if _id in used_ids:
                    return get_id()
                used_ids.append(_id)
                return _id

            for doc in self.data:
                doc.update({'_id': doc.get('_id', get_id())})
        else:
            if args.end:
                if args.end == 'NOW':
                    endtime = ts_now()
                else:
                    try:
                        endtime = ts_to_dt(args.end)
                    except (TypeError, ValueError):
                        self.handle_error(
                            "%s is not a valid ISO8601 timestamp (YYYY-MM-DDTHH:MM:SS+XX:00)"
                            % (args.end))
                        exit(4)
            else:
                endtime = ts_now()
            if args.start:
                try:
                    starttime = ts_to_dt(args.start)
                except (TypeError, ValueError):
                    self.handle_error(
                        "%s is not a valid ISO8601 timestamp (YYYY-MM-DDTHH:MM:SS+XX:00)"
                        % (args.start))
                    exit(4)
            else:
                # if days given as command line argument
                if args.days > 0:
                    starttime = endtime - datetime.timedelta(days=args.days)
                else:
                    # if timeframe is given in rule
                    if 'timeframe' in rule:
                        starttime = endtime - datetime.timedelta(
                            seconds=rule['timeframe'].total_seconds() * 1.01)
                    # default is 1 days / 24 hours
                    else:
                        starttime = endtime - datetime.timedelta(days=1)

        # Set run_every to cover the entire time range unless count query, terms query or agg query used
        # This is to prevent query segmenting which unnecessarily slows down tests
        if not rule.get('use_terms_query') and not rule.get(
                'use_count_query') and not rule.get(
                    'aggregation_query_element'):
            conf['run_every'] = endtime - starttime

        # Instantiate ElastAlert to use mock config and special rule
        with mock.patch.object(conf['rules_loader'], 'get_hashes'):
            with mock.patch.object(conf['rules_loader'], 'load') as load_rules:
                load_rules.return_value = [rule]
                with mock.patch(
                        'elastalert.elastalert.load_conf') as load_conf:
                    load_conf.return_value = conf
                    if args.alert:
                        client = ElastAlerter(['--verbose'])
                    else:
                        client = ElastAlerter(['--debug'])

        # Replace get_hits_* functions to use mock data
        if args.json:
            self.mock_elastalert(client)

        # Mock writeback to return empty results
        client.writeback_es = mock.MagicMock()
        client.writeback_es.search.return_value = {"hits": {"hits": []}}

        with mock.patch.object(client, 'writeback') as mock_writeback:
            client.run_rule(rule, endtime, starttime)

            if mock_writeback.call_count:

                if args.formatted_output:
                    self.formatted_output['writeback'] = {}
                else:
                    print(
                        "\nWould have written the following documents to writeback index (default is elastalert_status):\n"
                    )

                errors = False
                for call in mock_writeback.call_args_list:
                    if args.formatted_output:
                        self.formatted_output['writeback'][
                            call[0][0]] = json.loads(
                                json.dumps(call[0][1], default=str))
                    else:
                        print("%s - %s\n" % (call[0][0], call[0][1]))

                    if call[0][0] == 'elastalert_error':
                        errors = True
                if errors and args.stop_error:
                    exit(2)
Example #16
0
    def test_file(self, conf, args):
        """ Loads a rule config file, performs a query over the last day (args.days), lists available keys
        and prints the number of results. """
        if args.schema_only:
            return []

        # Set up Elasticsearch client and query
        es_client = elasticsearch_client(conf)

        try:
            is_five = es_client.info()['version']['number'].startswith('5')
        except Exception as e:
            print("Error connecting to ElasticSearch:", file=sys.stderr)
            print(repr(e)[:2048], file=sys.stderr)
            return None

        if is_five:
            ElastAlerter.modify_rule_for_ES5(conf)

        start_time = ts_now() - datetime.timedelta(days=args.days)
        end_time = ts_now()
        ts = conf.get('timestamp_field', '@timestamp')
        query = ElastAlerter.get_query(conf['filter'], starttime=start_time, endtime=end_time, timestamp_field=ts, five=is_five)
        index = ElastAlerter.get_index(conf, start_time, end_time)

        # Get one document for schema
        try:
            res = es_client.search(index, size=1, body=query, ignore_unavailable=True)
        except Exception as e:
            print("Error running your filter:", file=sys.stderr)
            print(repr(e)[:2048], file=sys.stderr)
            return None
        num_hits = len(res['hits']['hits'])
        if not num_hits:
            return []

        terms = res['hits']['hits'][0]['_source']
        doc_type = res['hits']['hits'][0]['_type']

        # Get a count of all docs
        count_query = ElastAlerter.get_query(conf['filter'], starttime=start_time, endtime=end_time, timestamp_field=ts, sort=False, five=is_five)
        try:
            res = es_client.count(index, doc_type=doc_type, body=count_query, ignore_unavailable=True)
        except Exception as e:
            print("Error querying Elasticsearch:", file=sys.stderr)
            print(repr(e)[:2048], file=sys.stderr)
            return None

        num_hits = res['count']
        print("Got %s hits from the last %s day%s" % (num_hits, args.days, 's' if args.days > 1 else ''))
        print("\nAvailable terms in first hit:")
        print_terms(terms, '')

        # Check for missing keys
        pk = conf.get('primary_key')
        ck = conf.get('compare_key')
        if pk and not lookup_es_key(terms, pk):
            print("Warning: primary key %s is either missing or null!", file=sys.stderr)
        if ck and not lookup_es_key(terms, ck):
            print("Warning: compare key %s is either missing or null!", file=sys.stderr)

        include = conf.get('include')
        if include:
            for term in include:
                if not lookup_es_key(terms, term) and '*' not in term:
                    print("Included term %s may be missing or null" % (term), file=sys.stderr)

        for term in conf.get('top_count_keys', []):
            # If the index starts with 'logstash', fields with .raw will be available but won't in _source
            if term not in terms and not (term.endswith('.raw') and term[:-4] in terms and index.startswith('logstash')):
                print("top_count_key %s may be missing" % (term), file=sys.stderr)
        print('')  # Newline

        # Download up to 10,000 documents to save
        if args.save and not args.count:
            try:
                res = es_client.search(index, size=10000, body=query, ignore_unavailable=True)
            except Exception as e:
                print("Error running your filter:", file=sys.stderr)
                print(repr(e)[:2048], file=sys.stderr)
                return None
            num_hits = len(res['hits']['hits'])
            print("Downloaded %s documents to save" % (num_hits))
            return res['hits']['hits']
Example #17
0
    def run_elastalert(self, rule, conf, args):
        """ Creates an ElastAlert instance and run's over for a specific rule using either real or mock data. """

        # Load and instantiate rule
        load_modules(rule)
        conf['rules'] = [rule]

        # If using mock data, make sure it's sorted and find appropriate time range
        timestamp_field = rule.get('timestamp_field', '@timestamp')
        if args.json:
            if not self.data:
                return None
            try:
                self.data.sort(key=lambda x: x[timestamp_field])
                starttime = ts_to_dt(self.data[0][timestamp_field])
                endtime = self.data[-1][timestamp_field]
                endtime = ts_to_dt(endtime) + datetime.timedelta(seconds=1)
            except KeyError as e:
                print("All documents must have a timestamp and _id: %s" % (e), file=sys.stderr)
                return None

            # Create mock _id for documents if it's missing
            used_ids = []

            def get_id():
                _id = ''.join([random.choice(string.letters) for i in range(16)])
                if _id in used_ids:
                    return get_id()
                used_ids.append(_id)
                return _id

            for doc in self.data:
                doc.update({'_id': doc.get('_id', get_id())})
        else:
            endtime = ts_now()
            starttime = endtime - datetime.timedelta(days=args.days)

        # Set run_every to cover the entire time range unless use_count_query or use_terms_query is set
        # This is to prevent query segmenting which unnecessarily slows down tests
        if not rule.get('use_terms_query') and not rule.get('use_count_query'):
            conf['run_every'] = endtime - starttime

        # Instantiate ElastAlert to use mock config and special rule
        with mock.patch('elastalert.elastalert.get_rule_hashes'):
            with mock.patch('elastalert.elastalert.load_rules') as load_conf:
                load_conf.return_value = conf
                if args.alert:
                    client = ElastAlerter(['--verbose'])
                else:
                    client = ElastAlerter(['--debug'])

        # Replace get_hits_* functions to use mock data
        if args.json:
            self.mock_elastalert(client)

        # Mock writeback for both real data and json data
        client.writeback_es = None
        with mock.patch.object(client, 'writeback') as mock_writeback:
            client.run_rule(rule, endtime, starttime)

            if mock_writeback.call_count:
                print("\nWould have written the following documents to writeback index (default is elastalert_status):\n")
                for call in mock_writeback.call_args_list:
                    print("%s - %s\n" % (call[0][0], call[0][1]))
Example #18
0
def test_new_term_with_composite_fields():
    rules = {
        'fields': [['a', 'b', 'c'], ['d', 'e.f']],
        'timestamp_field': '@timestamp',
        'es_host': 'example.com',
        'es_port': 10,
        'index': 'logstash',
        'ts_to_dt': ts_to_dt,
        'dt_to_ts': dt_to_ts
    }

    mock_res = {
        'aggregations': {
            'filtered': {
                'values': {
                    'buckets': [{
                        'key': 'key1',
                        'doc_count': 5,
                        'values': {
                            'buckets': [{
                                'key': 'key2',
                                'doc_count': 5,
                                'values': {
                                    'buckets': [
                                        {
                                            'key': 'key3',
                                            'doc_count': 3,
                                        },
                                        {
                                            'key': 'key4',
                                            'doc_count': 2,
                                        },
                                    ]
                                }
                            }]
                        }
                    }]
                }
            }
        }
    }

    with mock.patch('elastalert.ruletypes.elasticsearch_client') as mock_es:
        mock_es.return_value = mock.Mock()
        mock_es.return_value.search.return_value = mock_res
        mock_es.return_value.info.return_value = {
            'version': {
                'number': '2.x.x'
            }
        }
        rule = NewTermsRule(rules)

        assert rule.es.search.call_count == 60

    # key3 already exists, and thus shouldn't cause a match
    rule.add_data([{
        '@timestamp': ts_now(),
        'a': 'key1',
        'b': 'key2',
        'c': 'key3'
    }])
    assert rule.matches == []

    # key5 causes an alert for composite field [a, b, c]
    rule.add_data([{
        '@timestamp': ts_now(),
        'a': 'key1',
        'b': 'key2',
        'c': 'key5'
    }])
    assert len(rule.matches) == 1
    assert rule.matches[0]['new_field'] == ('a', 'b', 'c')
    assert rule.matches[0]['a'] == 'key1'
    assert rule.matches[0]['b'] == 'key2'
    assert rule.matches[0]['c'] == 'key5'
    rule.matches = []

    # New values in other fields that are not part of the composite key should not cause an alert
    rule.add_data([{
        '@timestamp': ts_now(),
        'a': 'key1',
        'b': 'key2',
        'c': 'key4',
        'd': 'unrelated_value'
    }])
    assert len(rule.matches) == 0
    rule.matches = []

    # Verify nested fields work properly
    # Key6 causes an alert for nested field e.f
    rule.add_data([{'@timestamp': ts_now(), 'd': 'key4', 'e': {'f': 'key6'}}])
    assert len(rule.matches) == 1
    assert rule.matches[0]['new_field'] == ('d', 'e.f')
    assert rule.matches[0]['d'] == 'key4'
    assert rule.matches[0]['e']['f'] == 'key6'
    rule.matches = []

    # Missing_fields
    rules['alert_on_missing_field'] = True
    with mock.patch('elastalert.ruletypes.elasticsearch_client') as mock_es:
        mock_es.return_value = mock.Mock()
        mock_es.return_value.search.return_value = mock_res
        mock_es.return_value.info.return_value = {
            'version': {
                'number': '2.x.x'
            }
        }
        rule = NewTermsRule(rules)
    rule.add_data([{'@timestamp': ts_now(), 'a': 'key2'}])
    assert len(rule.matches) == 2
    # This means that any one of the three n composite fields were not present
    assert rule.matches[0]['missing_field'] == ('a', 'b', 'c')
    assert rule.matches[1]['missing_field'] == ('d', 'e.f')
Example #19
0
def test_new_term_with_terms():
    rules = {
        'fields': ['a'],
        'timestamp_field': '@timestamp',
        'es_host': 'example.com',
        'es_port': 10,
        'index': 'logstash',
        'query_key': 'a',
        'window_step_size': {
            'days': 2
        },
        'ts_to_dt': ts_to_dt,
        'dt_to_ts': dt_to_ts
    }
    mock_res = {
        'aggregations': {
            'filtered': {
                'values': {
                    'buckets': [{
                        'key': 'key1',
                        'doc_count': 1
                    }, {
                        'key': 'key2',
                        'doc_count': 5
                    }]
                }
            }
        }
    }

    with mock.patch('elastalert.ruletypes.elasticsearch_client') as mock_es:
        mock_es.return_value = mock.Mock()
        mock_es.return_value.search.return_value = mock_res
        mock_es.return_value.info.return_value = {
            'version': {
                'number': '2.x.x'
            }
        }
        rule = NewTermsRule(rules)

        # Only 15 queries because of custom step size
        assert rule.es.search.call_count == 15

    # Key1 and key2 shouldn't cause a match
    terms = {
        ts_now(): [{
            'key': 'key1',
            'doc_count': 1
        }, {
            'key': 'key2',
            'doc_count': 1
        }]
    }
    rule.add_terms_data(terms)
    assert rule.matches == []

    # Key3 causes an alert for field a
    terms = {ts_now(): [{'key': 'key3', 'doc_count': 1}]}
    rule.add_terms_data(terms)
    assert len(rule.matches) == 1
    assert rule.matches[0]['new_field'] == 'a'
    assert rule.matches[0]['a'] == 'key3'
    rule.matches = []

    # Key3 doesn't cause another alert
    terms = {ts_now(): [{'key': 'key3', 'doc_count': 1}]}
    rule.add_terms_data(terms)
    assert rule.matches == []
Example #20
0
    def run_elastalert(self, rule, conf, args):
        """ Creates an ElastAlert instance and run's over for a specific rule using either real or mock data. """

        # Load and instantiate rule
        load_modules(rule)
        conf['rules'] = [rule]

        # If using mock data, make sure it's sorted and find appropriate time range
        timestamp_field = rule.get('timestamp_field', '@timestamp')
        if args.json:
            if not self.data:
                return None
            try:
                self.data.sort(key=lambda x: x[timestamp_field])
                starttime = ts_to_dt(self.data[0][timestamp_field])
                endtime = self.data[-1][timestamp_field]
                endtime = ts_to_dt(endtime) + datetime.timedelta(seconds=1)
            except KeyError as e:
                print("All documents must have a timestamp and _id: %s" % (e), file=sys.stderr)
                return None

            # Create mock _id for documents if it's missing
            used_ids = []

            def get_id():
                _id = ''.join([random.choice(string.letters) for i in range(16)])
                if _id in used_ids:
                    return get_id()
                used_ids.append(_id)
                return _id

            for doc in self.data:
                doc.update({'_id': doc.get('_id', get_id())})
        else:
            endtime = ts_now()
            starttime = endtime - datetime.timedelta(days=args.days)

        # Set run_every to cover the entire time range unless count query, terms query or agg query used
        # This is to prevent query segmenting which unnecessarily slows down tests
        if not rule.get('use_terms_query') and not rule.get('use_count_query') and not rule.get('aggregation_query_element'):
            conf['run_every'] = endtime - starttime

        # Instantiate ElastAlert to use mock config and special rule
        with mock.patch('elastalert.elastalert.get_rule_hashes'):
            with mock.patch('elastalert.elastalert.load_rules') as load_conf:
                load_conf.return_value = conf
                if args.alert:
                    client = ElastAlerter(['--verbose'])
                else:
                    client = ElastAlerter(['--debug'])

        # Replace get_hits_* functions to use mock data
        if args.json:
            self.mock_elastalert(client)

        # Mock writeback to return empty results
        client.writeback_es = mock.MagicMock()
        client.writeback_es.search.return_value = {"hits": {"hits": []}}

        with mock.patch.object(client, 'writeback') as mock_writeback:
            client.run_rule(rule, endtime, starttime)

            if mock_writeback.call_count:
                print("\nWould have written the following documents to writeback index (default is elastalert_status):\n")
                for call in mock_writeback.call_args_list:
                    print("%s - %s\n" % (call[0][0], call[0][1]))
Example #21
0
    def test_file(self, args):
        """ Loads a rule config file, performs a query over the last day (args.days), lists available keys
        and prints the number of results. """
        filename = args.file
        with open(filename) as fh:
            conf = yaml.load(fh)
        load_options(conf)
        print("Successfully loaded %s\n" % (conf["name"]))

        if args.schema_only:
            return []

        # Set up elasticsearch client and query
        es_client = Elasticsearch(host=conf["es_host"], port=conf["es_port"])
        start_time = ts_now() - datetime.timedelta(days=args.days)
        end_time = ts_now()
        ts = conf.get("timestamp_field", "@timestamp")
        query = ElastAlerter.get_query(conf["filter"], starttime=start_time, endtime=end_time, timestamp_field=ts)
        index = ElastAlerter.get_index(conf, start_time, end_time)

        # Get one document for schema
        try:
            res = es_client.search(index, size=1, body=query, ignore_unavailable=True)
        except Exception as e:
            print("Error running your filter:", file=sys.stderr)
            print(repr(e)[:2048], file=sys.stderr)
            return None
        num_hits = len(res["hits"]["hits"])
        if not num_hits:
            return []

        terms = res["hits"]["hits"][0]["_source"]
        doc_type = res["hits"]["hits"][0]["_type"]

        # Get a count of all docs
        count_query = ElastAlerter.get_query(
            conf["filter"], starttime=start_time, endtime=end_time, timestamp_field=ts, sort=False
        )
        count_query = {"query": {"filtered": count_query}}
        try:
            res = es_client.count(index, doc_type=doc_type, body=count_query, ignore_unavailable=True)
        except Exception as e:
            print("Error querying Elasticsearch:", file=sys.stderr)
            print(repr(e)[:2048], file=sys.stderr)
            return None

        num_hits = res["count"]
        print("Got %s hits from the last %s day%s" % (num_hits, args.days, "s" if args.days > 1 else ""))
        print("\nAvailable terms in first hit:")
        print_terms(terms, "")

        # Check for missing keys
        pk = conf.get("primary_key")
        ck = conf.get("compare_key")
        if pk and not lookup_es_key(terms, pk):
            print("Warning: primary key %s is either missing or null!", file=sys.stderr)
        if ck and not lookup_es_key(terms, ck):
            print("Warning: compare key %s is either missing or null!", file=sys.stderr)

        include = conf.get("include")
        if include:
            for term in include:
                if not lookup_es_key(terms, term) and "*" not in term:
                    print("Included term %s may be missing or null" % (term), file=sys.stderr)

        for term in conf.get("top_count_keys", []):
            # If the index starts with 'logstash', fields with .raw will be available but won't in _source
            if term not in terms and not (
                term.endswith(".raw") and term[:-4] in terms and index.startswith("logstash")
            ):
                print("top_count_key %s may be missing" % (term), file=sys.stderr)
        print("")  # Newline

        # Download up to 10,000 documents to save
        if args.save and not args.count:
            try:
                res = es_client.search(index, size=10000, body=query, ignore_unavailable=True)
            except Exception as e:
                print("Error running your filter:", file=sys.stderr)
                print(repr(e)[:2048], file=sys.stderr)
                return None
            num_hits = len(res["hits"]["hits"])
            print("Downloaded %s documents to save" % (num_hits))
            return res["hits"]["hits"]

        return None
Example #22
0
def test_jira(caplog):
    caplog.set_level(logging.INFO)
    description_txt = "Description stuff goes here like a runbook link."
    rule = {
        'name': 'test alert',
        'jira_account_file': 'jirafile',
        'type': mock_rule(),
        'jira_project': 'testproject',
        'jira_priority': 0,
        'jira_issuetype': 'testtype',
        'jira_server': 'jiraserver',
        'jira_label': 'testlabel',
        'jira_component': 'testcomponent',
        'jira_description': description_txt,
        'jira_watchers': ['testwatcher1', 'testwatcher2'],
        'timestamp_field': '@timestamp',
        'alert_subject': 'Issue {0} occurred at {1}',
        'alert_subject_args': ['test_term', '@timestamp'],
        'rule_file': '/tmp/foo.yaml'
    }

    mock_priority = mock.Mock(id='5')

    with mock.patch('elastalert.alerters.jira.JIRA') as mock_jira, \
            mock.patch('elastalert.alerts.read_yaml') as mock_open:
        mock_open.return_value = {
            'user': '******',
            'password': '******'
        }
        mock_jira.return_value.priorities.return_value = [mock_priority]
        mock_jira.return_value.fields.return_value = []
        alert = JiraAlerter(rule)
        alert.alert([{
            'test_term': 'test_value',
            '@timestamp': '2014-10-31T00:00:00'
        }])

    expected = [
        mock.call('jiraserver', basic_auth=('jirauser', 'jirapassword')),
        mock.call().priorities(),
        mock.call().fields(),
        mock.call().create_issue(
            issuetype={'name': 'testtype'},
            priority={'id': '5'},
            project={'key': 'testproject'},
            labels=['testlabel'],
            components=[{
                'name': 'testcomponent'
            }],
            description=mock.ANY,
            summary='Issue test_value occurred at 2014-10-31T00:00:00',
        ),
        mock.call().add_watcher(mock.ANY, 'testwatcher1'),
        mock.call().add_watcher(mock.ANY, 'testwatcher2'),
    ]

    # We don't care about additional calls to mock_jira, such as __str__
    assert mock_jira.mock_calls[:6] == expected
    assert mock_jira.mock_calls[3][2]['description'].startswith(
        description_txt)
    user, level, message = caplog.record_tuples[0]
    assert 'elastalert' == user
    assert logging.INFO == level
    assert 'pened Jira ticket: ' in message

    # Search called if jira_bump_tickets
    rule['jira_bump_tickets'] = True
    with mock.patch('elastalert.alerters.jira.JIRA') as mock_jira, \
            mock.patch('elastalert.alerts.read_yaml') as mock_open:
        mock_open.return_value = {
            'user': '******',
            'password': '******'
        }
        mock_jira.return_value = mock.Mock()
        mock_jira.return_value.search_issues.return_value = []
        mock_jira.return_value.priorities.return_value = [mock_priority]
        mock_jira.return_value.fields.return_value = []

        alert = JiraAlerter(rule)
        alert.alert([{
            'test_term': 'test_value',
            '@timestamp': '2014-10-31T00:00:00'
        }])

    expected.insert(3, mock.call().search_issues(mock.ANY))
    assert mock_jira.mock_calls == expected

    # Remove a field if jira_ignore_in_title set
    rule['jira_ignore_in_title'] = 'test_term'
    with mock.patch('elastalert.alerters.jira.JIRA') as mock_jira, \
            mock.patch('elastalert.alerts.read_yaml') as mock_open:
        mock_open.return_value = {
            'user': '******',
            'password': '******'
        }
        mock_jira.return_value = mock.Mock()
        mock_jira.return_value.search_issues.return_value = []
        mock_jira.return_value.priorities.return_value = [mock_priority]
        mock_jira.return_value.fields.return_value = []

        alert = JiraAlerter(rule)
        alert.alert([{
            'test_term': 'test_value',
            '@timestamp': '2014-10-31T00:00:00'
        }])

    assert 'test_value' not in mock_jira.mock_calls[3][1][0]

    # Issue is still created if search_issues throws an exception
    with mock.patch('elastalert.alerters.jira.JIRA') as mock_jira, \
            mock.patch('elastalert.alerts.read_yaml') as mock_open:
        mock_open.return_value = {
            'user': '******',
            'password': '******'
        }
        mock_jira.return_value = mock.Mock()
        mock_jira.return_value.search_issues.side_effect = JIRAError
        mock_jira.return_value.priorities.return_value = [mock_priority]
        mock_jira.return_value.fields.return_value = []

        alert = JiraAlerter(rule)
        alert.alert([{
            'test_term': 'test_value',
            '@timestamp': '2014-10-31T00:00:00'
        }])

    assert mock_jira.mock_calls == expected
    user, level, message = caplog.record_tuples[3]
    assert 'elastalert' in user
    assert logging.ERROR == level
    assert 'Error while searching for JIRA ticket using jql' in message

    # Only bump after 3d of inactivity
    rule['jira_bump_after_inactivity'] = 3
    mock_issue = mock.Mock()

    # Check ticket is bumped if it is updated 4 days ago
    mock_issue.fields.updated = str(ts_now() - datetime.timedelta(days=4))
    with mock.patch('elastalert.alerters.jira.JIRA') as mock_jira, \
            mock.patch('elastalert.alerts.read_yaml') as mock_open:
        mock_open.return_value = {
            'user': '******',
            'password': '******'
        }
        mock_jira.return_value = mock.Mock()
        mock_jira.return_value.search_issues.return_value = [mock_issue]
        mock_jira.return_value.priorities.return_value = [mock_priority]
        mock_jira.return_value.fields.return_value = []

        alert = JiraAlerter(rule)
        alert.alert([{
            'test_term': 'test_value',
            '@timestamp': '2014-10-31T00:00:00'
        }])
        # Check add_comment is called
        assert len(mock_jira.mock_calls) == 5
        assert '().add_comment' == mock_jira.mock_calls[4][0]

    # Check ticket is bumped is not bumped if ticket is updated right now
    mock_issue.fields.updated = str(ts_now())
    with mock.patch('elastalert.alerters.jira.JIRA') as mock_jira, \
            mock.patch('elastalert.alerts.read_yaml') as mock_open:
        mock_open.return_value = {
            'user': '******',
            'password': '******'
        }
        mock_jira.return_value = mock.Mock()
        mock_jira.return_value.search_issues.return_value = [mock_issue]
        mock_jira.return_value.priorities.return_value = [mock_priority]
        mock_jira.return_value.fields.return_value = []

        alert = JiraAlerter(rule)
        alert.alert([{
            'test_term': 'test_value',
            '@timestamp': '2014-10-31T00:00:00'
        }])
        # Only 4 calls for mock_jira since add_comment is not called
        assert len(mock_jira.mock_calls) == 4

        # Test match resolved values
        rule = {
            'name': 'test alert',
            'jira_account_file': 'jirafile',
            'type': mock_rule(),
            'owner': 'the_owner',
            'jira_project': 'testproject',
            'jira_issuetype': 'testtype',
            'jira_server': 'jiraserver',
            'jira_label': 'testlabel',
            'jira_component': 'testcomponent',
            'jira_description': "DESC",
            'jira_watchers': ['testwatcher1', 'testwatcher2'],
            'timestamp_field': '@timestamp',
            'jira_affected_user': "******",
            'rule_file': '/tmp/foo.yaml'
        }
        mock_issue = mock.Mock()
        mock_issue.fields.updated = str(ts_now() - datetime.timedelta(days=4))
        mock_fields = [{
            'name': 'affected user',
            'id': 'affected_user_id',
            'schema': {
                'type': 'string'
            }
        }]
        with mock.patch('elastalert.alerters.jira.JIRA') as mock_jira, \
                mock.patch('elastalert.alerts.read_yaml') as mock_open:
            mock_open.return_value = {
                'user': '******',
                'password': '******'
            }
            mock_jira.return_value = mock.Mock()
            mock_jira.return_value.search_issues.return_value = [mock_issue]
            mock_jira.return_value.fields.return_value = mock_fields
            mock_jira.return_value.priorities.return_value = [mock_priority]
            alert = JiraAlerter(rule)
            alert.alert([{
                'gmail.the_user': '******',
                '@timestamp': '2014-10-31T00:00:00'
            }])
            assert mock_jira.mock_calls[4][2]['affected_user_id'] == "jdoe"