def test_starttime(ea): invalid = ['2014-13-13', '2014-11-24T30:00:00', 'Not A Timestamp'] for ts in invalid: with pytest.raises((TypeError, ValueError)): ts_to_dt(ts)
def test_flatline_query_key(): rules = { "timeframe": datetime.timedelta(seconds=30), "threshold": 1, "use_query_key": True, "query_key": "qk", "timestamp_field": "@timestamp", } rule = FlatlineRule(rules) # Adding two separate query keys, the flatline rule should trigger for both rule.add_data(hits(1, qk="key1")) rule.add_data(hits(1, qk="key2")) rule.add_data(hits(1, qk="key3")) assert rule.matches == [] # This will be run at the end of the hits rule.garbage_collect(ts_to_dt("2014-09-26T12:00:11Z")) assert rule.matches == [] # Add new data from key3. It will not immediately cause an alert rule.add_data([create_event(ts_to_dt("2014-09-26T12:00:20Z"), qk="key3")]) # key1 and key2 have not had any new data, so they will trigger the flatline alert timestamp = "2014-09-26T12:00:45Z" rule.garbage_collect(ts_to_dt(timestamp)) assert len(rule.matches) == 2 assert set(["key1", "key2"]) == set([m["key"] for m in rule.matches if m["@timestamp"] == timestamp]) # Next time the rule runs, the key1 and key2 will have been forgotten. Now key3 will cause an alert timestamp = "2014-09-26T12:01:20Z" rule.garbage_collect(ts_to_dt(timestamp)) assert len(rule.matches) == 3 assert set(["key3"]) == set([m["key"] for m in rule.matches if m["@timestamp"] == timestamp])
def test_agg_cron(ea): ea.max_aggregation = 1337 hits_timestamps = ['2014-09-26T12:34:45', '2014-09-26T12:40:45', '2014-09-26T12:47:45'] hits = generate_hits(hits_timestamps) ea.current_es.search.return_value = hits alerttime1 = dt_to_ts(ts_to_dt('2014-09-26T12:46:00')) alerttime2 = dt_to_ts(ts_to_dt('2014-09-26T13:04:00')) with mock.patch('elastalert.elastalert.Elasticsearch'): with mock.patch('elastalert.elastalert.croniter.get_next') as mock_ts: # Aggregate first two, query over full range mock_ts.side_effect = [dt_to_unix(ts_to_dt('2014-09-26T12:46:00')), dt_to_unix(ts_to_dt('2014-09-26T13:04:00'))] ea.rules[0]['aggregation'] = {'schedule': '*/5 * * * *'} ea.rules[0]['type'].matches = [{'@timestamp': h} for h in hits_timestamps] ea.run_rule(ea.rules[0], END, START) # Assert that the three matches were added to elasticsearch call1 = ea.writeback_es.create.call_args_list[0][1]['body'] call2 = ea.writeback_es.create.call_args_list[1][1]['body'] call3 = ea.writeback_es.create.call_args_list[2][1]['body'] assert call1['match_body'] == {'@timestamp': '2014-09-26T12:34:45'} assert not call1['alert_sent'] assert 'aggregate_id' not in call1 assert call1['alert_time'] == alerttime1 assert call2['match_body'] == {'@timestamp': '2014-09-26T12:40:45'} assert not call2['alert_sent'] assert call2['aggregate_id'] == 'ABCD' assert call3['match_body'] == {'@timestamp': '2014-09-26T12:47:45'} assert call3['alert_time'] == alerttime2 assert not call3['alert_sent'] assert 'aggregate_id' not in call3
def test_flatline_forget_query_key(): rules = { 'timeframe': datetime.timedelta(seconds=30), 'threshold': 1, 'query_key': 'qk', 'forget_keys': True, 'timestamp_field': '@timestamp' } rule = FlatlineRule(rules) # Adding two separate query keys, the flatline rule should trigger for both rule.add_data(hits(1, qk='key1')) assert rule.matches == [] # This will be run at the end of the hits rule.garbage_collect(ts_to_dt('2014-09-26T12:00:11Z')) assert rule.matches == [] # Key1 should not alert timestamp = '2014-09-26T12:00:45Z' rule.garbage_collect(ts_to_dt(timestamp)) assert len(rule.matches) == 1 rule.matches = [] # key1 was forgotten, so no more matches rule.garbage_collect(ts_to_dt('2014-09-26T12:01:11Z')) assert rule.matches == []
def test_base_aggregation_payloads(): with mock.patch.object(BaseAggregationRule, 'check_matches', return_value=None) as mock_check_matches: rules = {'bucket_interval': {'seconds': 10}, 'bucket_interval_timedelta': datetime.timedelta(seconds=10), 'buffer_time': datetime.timedelta(minutes=5), 'timestamp_field': '@timestamp'} timestamp = datetime.datetime.now() interval_agg = create_bucket_aggregation('interval_aggs', [{'key_as_string': '2014-01-01T00:00:00Z'}]) rule = BaseAggregationRule(rules) # Payload not wrapped rule.add_aggregation_data({timestamp: {}}) mock_check_matches.assert_called_once_with(timestamp, None, {}) mock_check_matches.reset_mock() # Payload wrapped by date_histogram interval_agg_data = {timestamp: interval_agg} rule.add_aggregation_data(interval_agg_data) mock_check_matches.assert_called_once_with(ts_to_dt('2014-01-01T00:00:00Z'), None, {'key_as_string': '2014-01-01T00:00:00Z'}) mock_check_matches.reset_mock() # Payload wrapped by terms bucket_agg_data = {timestamp: create_bucket_aggregation('bucket_aggs', [{'key': 'qk'}])} rule.add_aggregation_data(bucket_agg_data) mock_check_matches.assert_called_once_with(timestamp, 'qk', {'key': 'qk'}) mock_check_matches.reset_mock() # Payload wrapped by terms and date_histogram bucket_interval_agg_data = { timestamp: create_bucket_aggregation('bucket_aggs', [{'key': 'qk', 'interval_aggs': interval_agg['interval_aggs']}]) } rule.add_aggregation_data(bucket_interval_agg_data) mock_check_matches.assert_called_once_with(ts_to_dt('2014-01-01T00:00:00Z'), 'qk', {'key_as_string': '2014-01-01T00:00:00Z'}) mock_check_matches.reset_mock()
def test_get_starttime(ea): endtime = '2015-01-01T00:00:00Z' mock_es = mock.Mock() mock_es.search.return_value = { 'hits': { 'hits': [{ '_source': { 'endtime': endtime } }] } } ea.writeback_es = mock_es # 4 days old, will return endtime with mock.patch('elastalert.elastalert.ts_now') as mock_ts: mock_ts.return_value = ts_to_dt( '2015-01-05T00:00:00Z') # 4 days ahead of the endtime assert ea.get_starttime(ea.rules[0]) == ts_to_dt(endtime) # 10 days old, will return None with mock.patch('elastalert.elastalert.ts_now') as mock_ts: mock_ts.return_value = ts_to_dt( '2015-01-11T00:00:00Z') # 10 days ahead of the endtime assert ea.get_starttime(ea.rules[0]) is None
def test_flatline_query_key(): rules = {'timeframe': datetime.timedelta(seconds=30), 'threshold': 1, 'use_query_key': True, 'query_key': 'qk', 'timestamp_field': '@timestamp'} rule = FlatlineRule(rules) # Adding two separate query keys, the flatline rule should trigger for both rule.add_data(hits(1, qk='key1')) rule.add_data(hits(1, qk='key2')) rule.add_data(hits(1, qk='key3')) assert rule.matches == [] # This will be run at the end of the hits rule.garbage_collect(ts_to_dt('2014-09-26T12:00:11Z')) assert rule.matches == [] # Add new data from key3. It will not immediately cause an alert rule.add_data([create_event(ts_to_dt('2014-09-26T12:00:20Z'), qk='key3')]) # key1 and key2 have not had any new data, so they will trigger the flatline alert timestamp = '2014-09-26T12:00:45Z' rule.garbage_collect(ts_to_dt(timestamp)) assert len(rule.matches) == 2 assert set(['key1', 'key2']) == set([m['key'] for m in rule.matches if m['@timestamp'] == timestamp]) # Next time the rule runs, the key1 and key2 will have been forgotten. Now key3 will cause an alert timestamp = '2014-09-26T12:01:20Z' rule.garbage_collect(ts_to_dt(timestamp)) assert len(rule.matches) == 3 assert set(['key3']) == set([m['key'] for m in rule.matches if m['@timestamp'] == timestamp])
def test_freq_terms(): rules = {'num_events': 10, 'timeframe': datetime.timedelta(hours=1), 'query_key': 'username'} rule = FrequencyRule(rules) terms1 = {ts_to_dt('2014-01-01T00:01:00Z'): [{'key': 'userA', 'doc_count': 1}, {'key': 'userB', 'doc_count': 5}]} terms2 = {ts_to_dt('2014-01-01T00:10:00Z'): [{'key': 'userA', 'doc_count': 8}, {'key': 'userB', 'doc_count': 5}]} terms3 = {ts_to_dt('2014-01-01T00:25:00Z'): [{'key': 'userA', 'doc_count': 3}, {'key': 'userB', 'doc_count': 0}]} # Initial data rule.add_terms_data(terms1) assert len(rule.matches) == 0 # Match for user B rule.add_terms_data(terms2) assert len(rule.matches) == 1 assert rule.matches[0].get('username') == 'userB' # Match for user A rule.add_terms_data(terms3) assert len(rule.matches) == 2 assert rule.matches[1].get('username') == 'userA'
def test_spike_terms(): rules = {'threshold_ref': 5, 'spike_height': 2, 'timeframe': datetime.timedelta(minutes=10), 'spike_type': 'both', 'use_count_query': False, 'timestamp_field': 'ts', 'query_key': 'username', 'use_term_query': True} terms1 = {ts_to_dt('2014-01-01T00:01:00Z'): [{'key': 'userA', 'doc_count': 10}, {'key': 'userB', 'doc_count': 5}]} terms2 = {ts_to_dt('2014-01-01T00:10:00Z'): [{'key': 'userA', 'doc_count': 22}, {'key': 'userB', 'doc_count': 5}]} terms3 = {ts_to_dt('2014-01-01T00:25:00Z'): [{'key': 'userA', 'doc_count': 25}, {'key': 'userB', 'doc_count': 27}]} terms4 = {ts_to_dt('2014-01-01T00:27:00Z'): [{'key': 'userA', 'doc_count': 10}, {'key': 'userB', 'doc_count': 12}, {'key': 'userC', 'doc_count': 100}]} terms5 = {ts_to_dt('2014-01-01T00:30:00Z'): [{'key': 'userD', 'doc_count': 100}, {'key': 'userC', 'doc_count': 100}]} rule = SpikeRule(rules) # Initial input rule.add_terms_data(terms1) assert len(rule.matches) == 0 # No spike for UserA because windows not filled rule.add_terms_data(terms2) assert len(rule.matches) == 0 # Spike for userB only rule.add_terms_data(terms3) assert len(rule.matches) == 1 assert rule.matches[0].get('username') == 'userB' # Test no alert for new user over threshold rules.pop('threshold_ref') rules['threshold_cur'] = 50 rule = SpikeRule(rules) rule.add_terms_data(terms1) rule.add_terms_data(terms2) rule.add_terms_data(terms3) rule.add_terms_data(terms4) assert len(rule.matches) == 0 # Test alert_on_new_data rules['alert_on_new_data'] = True rule = SpikeRule(rules) rule.add_terms_data(terms1) rule.add_terms_data(terms2) rule.add_terms_data(terms3) rule.add_terms_data(terms4) assert len(rule.matches) == 1 # Test that another alert doesn't fire immediately for userC but it does for userD rule.matches = [] rule.add_terms_data(terms5) assert len(rule.matches) == 1 assert rule.matches[0]['username'] == 'userD'
def test_blacklist(): events = [{ '@timestamp': ts_to_dt('2014-09-26T12:34:56Z'), 'term': 'good' }, { '@timestamp': ts_to_dt('2014-09-26T12:34:57Z'), 'term': 'bad' }, { '@timestamp': ts_to_dt('2014-09-26T12:34:58Z'), 'term': 'also good' }, { '@timestamp': ts_to_dt('2014-09-26T12:34:59Z'), 'term': 'really bad' }, { '@timestamp': ts_to_dt('2014-09-26T12:35:00Z'), 'no_term': 'bad' }] rules = { 'blacklist': ['bad', 'really bad'], 'compare_key': 'term', 'timestamp_field': '@timestamp' } rule = BlacklistRule(rules) rule.add_data(events) assert_matches_have(rule.matches, [('term', 'bad'), ('term', 'really bad')])
def test_whitelist_dont_ignore_nulls(): events = [{ '@timestamp': ts_to_dt('2014-09-26T12:34:56Z'), 'term': 'good' }, { '@timestamp': ts_to_dt('2014-09-26T12:34:57Z'), 'term': 'bad' }, { '@timestamp': ts_to_dt('2014-09-26T12:34:58Z'), 'term': 'also good' }, { '@timestamp': ts_to_dt('2014-09-26T12:34:59Z'), 'term': 'really bad' }, { '@timestamp': ts_to_dt('2014-09-26T12:35:00Z'), 'no_term': 'bad' }] rules = { 'whitelist': ['good', 'also good'], 'compare_key': 'term', 'ignore_null': True, 'timestamp_field': '@timestamp' } rules['ignore_null'] = False rule = WhitelistRule(rules) rule.add_data(events) assert_matches_have(rule.matches, [('term', 'bad'), ('term', 'really bad'), ('no_term', 'bad')])
def test_duplicate_timestamps(ea): ea.current_es.search.side_effect = _duplicate_hits_generator([START_TIMESTAMP] * 3, blah='duplicate') ea.run_query(ea.rules[0], START, ts_to_dt('2014-01-01T00:00:00Z')) assert len(ea.rules[0]['type'].add_data.call_args_list[0][0][0]) == 3 assert ea.rules[0]['type'].add_data.call_count == 1 # Run the query again, duplicates will be removed and not added ea.run_query(ea.rules[0], ts_to_dt('2014-01-01T00:00:00Z'), END) assert ea.rules[0]['type'].add_data.call_count == 1
def test_freq_count(): rules = {'num_events': 100, 'timeframe': datetime.timedelta(hours=1), 'use_count_query': True} # Normal match rule = FrequencyRule(rules) rule.add_count_data({ts_to_dt('2014-10-10T00:00:00'): 75}) assert len(rule.matches) == 0 rule.add_count_data({ts_to_dt('2014-10-10T00:15:00'): 10}) assert len(rule.matches) == 0 rule.add_count_data({ts_to_dt('2014-10-10T00:25:00'): 10}) assert len(rule.matches) == 0 rule.add_count_data({ts_to_dt('2014-10-10T00:45:00'): 6}) assert len(rule.matches) == 1 # First data goes out of timeframe first rule = FrequencyRule(rules) rule.add_count_data({ts_to_dt('2014-10-10T00:00:00'): 75}) assert len(rule.matches) == 0 rule.add_count_data({ts_to_dt('2014-10-10T00:45:00'): 10}) assert len(rule.matches) == 0 rule.add_count_data({ts_to_dt('2014-10-10T00:55:00'): 10}) assert len(rule.matches) == 0 rule.add_count_data({ts_to_dt('2014-10-10T01:05:00'): 6}) assert len(rule.matches) == 0 rule.add_count_data({ts_to_dt('2014-10-10T01:00:00'): 75}) assert len(rule.matches) == 1
def test_freq_count(): rules = { 'num_events': 100, 'timeframe': datetime.timedelta(hours=1), 'use_count_query': True } # Normal match rule = FrequencyRule(rules) rule.add_count_data({ts_to_dt('2014-10-10T00:00:00'): 75}) assert len(rule.matches) == 0 rule.add_count_data({ts_to_dt('2014-10-10T00:15:00'): 10}) assert len(rule.matches) == 0 rule.add_count_data({ts_to_dt('2014-10-10T00:25:00'): 10}) assert len(rule.matches) == 0 rule.add_count_data({ts_to_dt('2014-10-10T00:45:00'): 6}) assert len(rule.matches) == 1 # First data goes out of timeframe first rule = FrequencyRule(rules) rule.add_count_data({ts_to_dt('2014-10-10T00:00:00'): 75}) assert len(rule.matches) == 0 rule.add_count_data({ts_to_dt('2014-10-10T00:45:00'): 10}) assert len(rule.matches) == 0 rule.add_count_data({ts_to_dt('2014-10-10T00:55:00'): 10}) assert len(rule.matches) == 0 rule.add_count_data({ts_to_dt('2014-10-10T01:05:00'): 6}) assert len(rule.matches) == 0 rule.add_count_data({ts_to_dt('2014-10-10T01:00:00'): 75}) assert len(rule.matches) == 1
def test_flatline_count(): rules = {"timeframe": datetime.timedelta(seconds=30), "threshold": 1, "timestamp_field": "@timestamp"} rule = FlatlineRule(rules) rule.add_count_data({ts_to_dt("2014-10-11T00:00:00"): 1}) rule.garbage_collect(ts_to_dt("2014-10-11T00:00:10")) assert len(rule.matches) == 0 rule.add_count_data({ts_to_dt("2014-10-11T00:00:15"): 0}) rule.garbage_collect(ts_to_dt("2014-10-11T00:00:20")) assert len(rule.matches) == 0 rule.add_count_data({ts_to_dt("2014-10-11T00:00:35"): 0}) assert len(rule.matches) == 1
def test_blacklist(): events = [ {"@timestamp": ts_to_dt("2014-09-26T12:34:56Z"), "term": "good"}, {"@timestamp": ts_to_dt("2014-09-26T12:34:57Z"), "term": "bad"}, {"@timestamp": ts_to_dt("2014-09-26T12:34:58Z"), "term": "also good"}, {"@timestamp": ts_to_dt("2014-09-26T12:34:59Z"), "term": "really bad"}, {"@timestamp": ts_to_dt("2014-09-26T12:35:00Z"), "no_term": "bad"}, ] rules = {"blacklist": ["bad", "really bad"], "compare_key": "term", "timestamp_field": "@timestamp"} rule = BlacklistRule(rules) rule.add_data(events) assert_matches_have(rule.matches, [("term", "bad"), ("term", "really bad")])
def test_blacklist(): events = [{'@timestamp': ts_to_dt('2014-09-26T12:34:56Z'), 'term': 'good'}, {'@timestamp': ts_to_dt('2014-09-26T12:34:57Z'), 'term': 'bad'}, {'@timestamp': ts_to_dt('2014-09-26T12:34:58Z'), 'term': 'also good'}, {'@timestamp': ts_to_dt('2014-09-26T12:34:59Z'), 'term': 'really bad'}, {'@timestamp': ts_to_dt('2014-09-26T12:35:00Z'), 'no_term': 'bad'}] rules = {'blacklist': ['bad', 'really bad'], 'compare_key': 'term', 'timestamp_field': '@timestamp'} rule = BlacklistRule(rules) rule.add_data(events) assert_matches_have(rule.matches, [('term', 'bad'), ('term', 'really bad')])
def test_flatline_count(): rules = {'timeframe': datetime.timedelta(seconds=30), 'threshold': 1, 'timestamp_field': '@timestamp'} rule = FlatlineRule(rules) rule.add_count_data({ts_to_dt('2014-10-11T00:00:00'): 1}) rule.garbage_collect(ts_to_dt('2014-10-11T00:00:10')) assert len(rule.matches) == 0 rule.add_count_data({ts_to_dt('2014-10-11T00:00:15'): 0}) rule.garbage_collect(ts_to_dt('2014-10-11T00:00:20')) assert len(rule.matches) == 0 rule.add_count_data({ts_to_dt('2014-10-11T00:00:35'): 0}) assert len(rule.matches) == 1
def test_whitelist_dont_ignore_nulls(): events = [{'@timestamp': ts_to_dt('2014-09-26T12:34:56Z'), 'term': 'good'}, {'@timestamp': ts_to_dt('2014-09-26T12:34:57Z'), 'term': 'bad'}, {'@timestamp': ts_to_dt('2014-09-26T12:34:58Z'), 'term': 'also good'}, {'@timestamp': ts_to_dt('2014-09-26T12:34:59Z'), 'term': 'really bad'}, {'@timestamp': ts_to_dt('2014-09-26T12:35:00Z'), 'no_term': 'bad'}] rules = {'whitelist': ['good', 'also good'], 'compare_key': 'term', 'ignore_null': True, 'timestamp_field': '@timestamp'} rules['ignore_null'] = False rule = WhitelistRule(rules) rule.add_data(events) assert_matches_have(rule.matches, [('term', 'bad'), ('term', 'really bad'), ('no_term', 'bad')])
def test_get_starttime(ea): endtime = "2015-01-01T00:00:00Z" mock_es = mock.Mock() mock_es.search.return_value = {"hits": {"hits": [{"_source": {"endtime": endtime}}]}} ea.writeback_es = mock_es # 4 days old, will return endtime with mock.patch("elastalert.elastalert.ts_now") as mock_ts: mock_ts.return_value = ts_to_dt("2015-01-05T00:00:00Z") # 4 days ahead of the endtime assert ea.get_starttime(ea.rules[0]) == ts_to_dt(endtime) # 10 days old, will return None with mock.patch("elastalert.elastalert.ts_now") as mock_ts: mock_ts.return_value = ts_to_dt("2015-01-11T00:00:00Z") # 10 days ahead of the endtime assert ea.get_starttime(ea.rules[0]) is None
def test_get_starttime(ea): endtime = '2015-01-01T00:00:00Z' mock_es = mock.Mock() mock_es.search.return_value = {'hits': {'hits': [{'_source': {'endtime': endtime}}]}} ea.writeback_es = mock_es # 4 days old, will return endtime with mock.patch('elastalert.elastalert.ts_now') as mock_ts: mock_ts.return_value = ts_to_dt('2015-01-05T00:00:00Z') # 4 days ahead of the endtime assert ea.get_starttime(ea.rules[0]) == ts_to_dt(endtime) # 10 days old, will return None with mock.patch('elastalert.elastalert.ts_now') as mock_ts: mock_ts.return_value = ts_to_dt('2015-01-11T00:00:00Z') # 10 days ahead of the endtime assert ea.get_starttime(ea.rules[0]) is None
def test_flatline_no_data(): rules = { 'timeframe': datetime.timedelta(seconds=30), 'threshold': 2, 'timestamp_field': '@timestamp', } rule = FlatlineRule(rules) # Initial lack of data rule.garbage_collect(ts_to_dt('2014-09-26T12:00:00Z')) assert len(rule.matches) == 0 # Passed the timeframe, still no events rule.garbage_collect(ts_to_dt('2014-09-26T12:35:00Z')) assert len(rule.matches) == 1
def test_freq(): events = hits(60, timestamp_field='blah', username='******') rules = {'num_events': 59, 'timeframe': datetime.timedelta(hours=1), 'timestamp_field': 'blah'} rule = FrequencyRule(rules) rule.add_data(events) assert len(rule.matches) == 1 # Test wit query_key events = hits(60, timestamp_field='blah', username='******') rules['query_key'] = 'username' rule = FrequencyRule(rules) rule.add_data(events) assert len(rule.matches) == 1 # Doesn't match events = hits(60, timestamp_field='blah', username='******') rules['num_events'] = 61 rule = FrequencyRule(rules) rule.add_data(events) assert len(rule.matches) == 0 # garbage collection assert 'qlo' in rule.occurrences rule.garbage_collect(ts_to_dt('2014-09-28T12:0:0')) assert rule.occurrences == {}
def test_freq(): events = hits(60, timestamp_field="blah", username="******") rules = {"num_events": 59, "timeframe": datetime.timedelta(hours=1), "timestamp_field": "blah"} rule = FrequencyRule(rules) rule.add_data(events) assert len(rule.matches) == 1 # Test wit query_key events = hits(60, timestamp_field="blah", username="******") rules["query_key"] = "username" rule = FrequencyRule(rules) rule.add_data(events) assert len(rule.matches) == 1 # Doesn't match events = hits(60, timestamp_field="blah", username="******") rules["num_events"] = 61 rule = FrequencyRule(rules) rule.add_data(events) assert len(rule.matches) == 0 # garbage collection assert "qlo" in rule.occurrences rule.garbage_collect(ts_to_dt("2014-09-28T12:0:0")) assert rule.occurrences == {}
def mock_hits(self, rule, start, end, index): """ Mocks the effects of get_hits using global data instead of Elasticsearch. """ docs = [] for doc in self.data: if start <= ts_to_dt(doc[rule['timestamp_field']]) < end: docs.append(doc) # Remove all fields which don't match 'include' for doc in docs: fields_to_remove = [] for field in doc: if field != '_id': if not any([ re.match(incl.replace('*', '.*'), field) for incl in rule['include'] ]): fields_to_remove.append(field) map(doc.pop, fields_to_remove) # Separate _source and _id, convert timestamps resp = [{'_source': doc, '_id': doc['_id']} for doc in docs] for doc in resp: doc['_source'].pop('_id') ElastAlerter.process_hits(rule, resp) return resp
def test_agg_not_matchtime(ea): ea.max_aggregation = 1337 hits_timestamps = ['2014-09-26T12:34:45', '2014-09-26T12:40:45', '2014-09-26T12:47:45'] match_time = ts_to_dt('2014-09-26T12:55:00Z') hits = generate_hits(hits_timestamps) ea.current_es.search.return_value = hits with mock.patch('elastalert.elastalert.elasticsearch_client'): with mock.patch('elastalert.elastalert.ts_now', return_value=match_time): ea.rules[0]['aggregation'] = datetime.timedelta(minutes=10) ea.rules[0]['type'].matches = [{'@timestamp': h} for h in hits_timestamps] ea.run_rule(ea.rules[0], END, START) # Assert that the three matches were added to Elasticsearch call1 = ea.writeback_es.index.call_args_list[0][1]['body'] call2 = ea.writeback_es.index.call_args_list[1][1]['body'] call3 = ea.writeback_es.index.call_args_list[2][1]['body'] assert call1['match_body']['@timestamp'] == '2014-09-26T12:34:45' assert not call1['alert_sent'] assert 'aggregate_id' not in call1 assert call1['alert_time'] == dt_to_ts(match_time + datetime.timedelta(minutes=10)) assert call2['match_body']['@timestamp'] == '2014-09-26T12:40:45' assert not call2['alert_sent'] assert call2['aggregate_id'] == 'ABCD' assert call3['match_body']['@timestamp'] == '2014-09-26T12:47:45' assert not call3['alert_sent'] assert call3['aggregate_id'] == 'ABCD'
def test_silence_query_key(ea): # Silence test rule for 4 hours ea.args.rule = 'test_rule.yaml' # Not a real name, just has to be set ea.args.silence = 'hours=4' ea.silence('anytest.qlo') # Don't alert even with a match match = [{'@timestamp': '2014-11-17T00:00:00', 'username': '******'}] ea.rules[0]['type'].matches = match ea.rules[0]['query_key'] = 'username' with mock.patch('elastalert.elastalert.elasticsearch_client'): ea.run_rule(ea.rules[0], END, START) assert ea.rules[0]['alert'][0].alert.call_count == 0 # If there is a new record with a different value for the query_key, we should get an alert match = [{'@timestamp': '2014-11-17T00:00:01', 'username': '******'}] ea.rules[0]['type'].matches = match with mock.patch('elastalert.elastalert.elasticsearch_client'): ea.run_rule(ea.rules[0], END, START) assert ea.rules[0]['alert'][0].alert.call_count == 1 # Mock ts_now() to +5 hours, alert on match match = [{'@timestamp': '2014-11-17T00:00:00', 'username': '******'}] ea.rules[0]['type'].matches = match with mock.patch('elastalert.elastalert.ts_now') as mock_ts: with mock.patch('elastalert.elastalert.elasticsearch_client'): # Converted twice to add tzinfo mock_ts.return_value = ts_to_dt(dt_to_ts(datetime.datetime.utcnow() + datetime.timedelta(hours=5))) ea.run_rule(ea.rules[0], END, START) assert ea.rules[0]['alert'][0].alert.call_count == 2
def mock_count(self, rule, start, end, index): """ Mocks the effects of get_hits_count using global data instead of Elasticsearch """ count = 0 for doc in self.data: if start <= ts_to_dt(doc[rule['timestamp_field']]) < end: count += 1 return {end: count}
def test_silence_query_key(ea): # Silence test rule for 4 hours ea.args.rule = 'test_rule.yaml' # Not a real name, just has to be set ea.args.silence = 'hours=4' ea.silence() # Don't alert even with a match match = [{'@timestamp': '2014-11-17T00:00:00', 'username': '******'}] ea.rules[0]['type'].matches = match ea.rules[0]['query_key'] = 'username' with mock.patch('elastalert.elastalert.Elasticsearch'): ea.run_rule(ea.rules[0], END, START) assert ea.rules[0]['alert'][0].alert.call_count == 0 # Mock ts_now() to +5 hours, alert on match match = [{'@timestamp': '2014-11-17T00:00:00', 'username': '******'}] ea.rules[0]['type'].matches = match with mock.patch('elastalert.elastalert.ts_now') as mock_ts: with mock.patch('elastalert.elastalert.Elasticsearch'): # Converted twice to add tzinfo mock_ts.return_value = ts_to_dt( dt_to_ts(datetime.datetime.utcnow() + datetime.timedelta(hours=5))) ea.run_rule(ea.rules[0], END, START) assert ea.rules[0]['alert'][0].alert.call_count == 1
def test_exponential_realert(ea): ea.rules[0]['exponential_realert'] = datetime.timedelta(days=1) # 1 day ~ 10 * 2**13 seconds ea.rules[0]['realert'] = datetime.timedelta(seconds=10) until = ts_to_dt('2015-03-24T00:00:00') ts5s = until + datetime.timedelta(seconds=5) ts15s = until + datetime.timedelta(seconds=15) ts1m = until + datetime.timedelta(minutes=1) ts5m = until + datetime.timedelta(minutes=5) ts4h = until + datetime.timedelta(hours=4) test_values = [(ts5s, until, 0), # Exp will increase to 1, 10*2**0 = 10s (ts15s, until, 0), # Exp will stay at 0, 10*2**0 = 10s (ts15s, until, 1), # Exp will increase to 2, 10*2**1 = 20s (ts1m, until, 2), # Exp will decrease to 1, 10*2**2 = 40s (ts1m, until, 3), # Exp will increase to 4, 10*2**3 = 1m20s (ts5m, until, 1), # Exp will lower back to 0, 10*2**1 = 20s (ts4h, until, 9), # Exp will lower back to 0, 10*2**9 = 1h25m (ts4h, until, 10), # Exp will lower back to 9, 10*2**10 = 2h50m (ts4h, until, 11)] # Exp will increase to 12, 10*2**11 = 5h results = (1, 0, 2, 1, 4, 0, 0, 9, 12) next_res = iter(results) for args in test_values: ea.silence_cache[ea.rules[0]['name']] = (args[1], args[2]) next_alert, exponent = ea.next_alert_time(ea.rules[0], ea.rules[0]['name'], args[0]) assert exponent == next_res.next()
def test_realert(ea): hits = ['2014-09-26T12:35:%sZ' % (x) for x in range(60)] matches = [{'@timestamp': x} for x in hits] ea.current_es.search.return_value = hits with mock.patch('elastalert.elastalert.Elasticsearch'): ea.rules[0]['realert'] = datetime.timedelta(seconds=50) ea.rules[0]['type'].matches = matches ea.run_rule(ea.rules[0], END, START) assert ea.rules[0]['alert'][0].alert.call_count == 1 # Doesn't alert again matches = [{'@timestamp': x} for x in hits] with mock.patch('elastalert.elastalert.Elasticsearch'): ea.run_rule(ea.rules[0], END, START) ea.rules[0]['type'].matches = matches assert ea.rules[0]['alert'][0].alert.call_count == 1 # mock ts_now() to past the realert time matches = [{'@timestamp': hits[0]}] with mock.patch('elastalert.elastalert.ts_now') as mock_ts: with mock.patch('elastalert.elastalert.Elasticsearch'): # mock_ts is converted twice to add tzinfo mock_ts.return_value = ts_to_dt(dt_to_ts(datetime.datetime.utcnow() + datetime.timedelta(minutes=10))) ea.rules[0]['type'].matches = matches ea.run_rule(ea.rules[0], END, START) assert ea.rules[0]['alert'][0].alert.call_count == 2
def test_run_rule_calls_garbage_collect(ea): start_time = '2014-09-26T00:00:00Z' end_time = '2014-09-26T12:00:00Z' ea.buffer_time = datetime.timedelta(hours=1) ea.run_every = datetime.timedelta(hours=1) with contextlib.nested(mock.patch.object(ea.rules[0]['type'], 'garbage_collect'), mock.patch.object(ea, 'run_query')) as (mock_gc, mock_get_hits): ea.run_rule(ea.rules[0], ts_to_dt(end_time), ts_to_dt(start_time)) # Running elastalert every hour for 12 hours, we should see self.garbage_collect called 12 times. assert mock_gc.call_count == 12 # The calls should be spaced 1 hour apart expected_calls = [ts_to_dt(start_time) + datetime.timedelta(hours=i) for i in range(1, 13)] for e in expected_calls: mock_gc.assert_any_call(e)
def test_freq(): events = hits(60, timestamp_field='blah', username='******') rules = { 'num_events': 59, 'timeframe': datetime.timedelta(hours=1), 'timestamp_field': 'blah' } rule = FrequencyRule(rules) rule.add_data(events) assert len(rule.matches) == 1 # Test wit query_key events = hits(60, timestamp_field='blah', username='******') rules['query_key'] = 'username' rule = FrequencyRule(rules) rule.add_data(events) assert len(rule.matches) == 1 # Doesn't match events = hits(60, timestamp_field='blah', username='******') rules['num_events'] = 61 rule = FrequencyRule(rules) rule.add_data(events) assert len(rule.matches) == 0 # garbage collection assert 'qlo' in rule.occurrences rule.garbage_collect(ts_to_dt('2014-09-28T12:0:0')) assert rule.occurrences == {}
def test_freq_out_of_order(): events = hits(60, timestamp_field='blah', username='******') rules = { 'num_events': 59, 'timeframe': datetime.timedelta(hours=1), 'timestamp_field': 'blah' } rule = FrequencyRule(rules) rule.add_data(events[:10]) assert len(rule.matches) == 0 # Try to add events from before the first occurrence rule.add_data([{ 'blah': ts_to_dt('2014-09-26T11:00:00'), 'username': '******' }] * 50) assert len(rule.matches) == 0 rule.add_data(events[15:20]) assert len(rule.matches) == 0 rule.add_data(events[10:15]) assert len(rule.matches) == 0 rule.add_data(events[20:55]) rule.add_data(events[57:]) assert len(rule.matches) == 0 rule.add_data(events[55:57]) assert len(rule.matches) == 1
def hits(size, **kwargs): ret = [] for n in range(size): ts = ts_to_dt('2014-09-26T12:%s:%sZ' % (n / 60, n % 60)) n += 1 event = create_event(ts, **kwargs) ret.append(event) return ret
def test_strf_index(ea): """ Test that the get_index function properly generates indexes spanning days """ ea.rules[0]['index'] = 'logstash-%Y.%m.%d' ea.rules[0]['use_strftime_index'] = True # Test formatting with times start = ts_to_dt('2015-01-02T12:34:45Z') end = ts_to_dt('2015-01-02T16:15:14Z') assert ea.get_index(ea.rules[0], start, end) == 'logstash-2015.01.02' end = ts_to_dt('2015-01-03T01:02:03Z') assert ea.get_index(ea.rules[0], start, end) == 'logstash-2015.01.02,logstash-2015.01.03' # Test formatting for wildcard assert ea.get_index(ea.rules[0]) == 'logstash-*' ea.rules[0]['index'] = 'logstash-%Y.%m' assert ea.get_index(ea.rules[0]) == 'logstash-*' ea.rules[0]['index'] = 'logstash-%Y.%m-stuff' assert ea.get_index(ea.rules[0]) == 'logstash-*-stuff'
def test_strf_index(ea): """ Test that the get_index function properly generates indexes spanning days """ ea.rules[0]["index"] = "logstash-%Y.%m.%d" ea.rules[0]["use_strftime_index"] = True # Test formatting with times start = ts_to_dt("2015-01-02T12:34:45Z") end = ts_to_dt("2015-01-02T16:15:14Z") assert ea.get_index(ea.rules[0], start, end) == "logstash-2015.01.02" end = ts_to_dt("2015-01-03T01:02:03Z") assert ea.get_index(ea.rules[0], start, end) == "logstash-2015.01.02,logstash-2015.01.03" # Test formatting for wildcard assert ea.get_index(ea.rules[0]) == "logstash-*" ea.rules[0]["index"] = "logstash-%Y.%m" assert ea.get_index(ea.rules[0]) == "logstash-*" ea.rules[0]["index"] = "logstash-%Y.%m-stuff" assert ea.get_index(ea.rules[0]) == "logstash-*-stuff"
def hits(x, timestamp='@timestamp', **kwargs): ret = [] for n in range(x): ts = ts_to_dt('2014-09-26T12:%s:%sZ' % (n / 60, n % 60)) n += 1 event = {timestamp: ts} event.update(**kwargs) ret.append(event) return ret
def test_whitelist_dont_ignore_nulls(): events = [ {"@timestamp": ts_to_dt("2014-09-26T12:34:56Z"), "term": "good"}, {"@timestamp": ts_to_dt("2014-09-26T12:34:57Z"), "term": "bad"}, {"@timestamp": ts_to_dt("2014-09-26T12:34:58Z"), "term": "also good"}, {"@timestamp": ts_to_dt("2014-09-26T12:34:59Z"), "term": "really bad"}, {"@timestamp": ts_to_dt("2014-09-26T12:35:00Z"), "no_term": "bad"}, ] rules = { "whitelist": ["good", "also good"], "compare_key": "term", "ignore_null": True, "timestamp_field": "@timestamp", } rules["ignore_null"] = False rule = WhitelistRule(rules) rule.add_data(events) assert_matches_have(rule.matches, [("term", "bad"), ("term", "really bad"), ("no_term", "bad")])
def str_to_ts(self, input: str) -> datetime: if input == "NOW": return self.ts_now try: return ts_to_dt(input) except (TypeError, ValueError): raise Exception( f"Input is not a valid ISO8601 timestamp (YYYY-MM-DDTHH:MM:SS+XX:00): {input}" )
def test_eventwindow(): timeframe = datetime.timedelta(minutes=10) window = EventWindow(timeframe, getTimestamp=lambda e: e[0]['@timestamp']) timestamps = [ts_to_dt(x) for x in ['2014-01-01T10:00:00', '2014-01-01T10:05:00', '2014-01-01T10:03:00', '2014-01-01T09:55:00', '2014-01-01T10:09:00']] for ts in timestamps: window.append([{'@timestamp': ts}, 1]) timestamps.sort() for exp, actual in zip(timestamps[1:], window.data): assert actual[0]['@timestamp'] == exp window.append([{'@timestamp': ts_to_dt('2014-01-01T10:14:00')}, 1]) timestamps.append(ts_to_dt('2014-01-01T10:14:00')) for exp, actual in zip(timestamps[3:], window.data): assert actual[0]['@timestamp'] == exp
def test_spike_deep_key(): rules = {'threshold_ref': 10, 'spike_height': 2, 'timeframe': datetime.timedelta(seconds=10), 'spike_type': 'both', 'timestamp_field': '@timestamp', 'query_key': 'foo.bar.baz'} rule = SpikeRule(rules) rule.add_data([{'@timestamp': ts_to_dt('2015'), 'foo': {'bar': {'baz': 'LOL'}}}]) assert 'LOL' in rule.cur_windows
def test_agg(ea): ea.max_aggregation = 1337 hits_timestamps = ['2014-09-26T12:34:45', '2014-09-26T12:40:45', '2014-09-26T12:47:45'] alerttime1 = dt_to_ts(ts_to_dt(hits_timestamps[0]) + datetime.timedelta(minutes=10)) hits = generate_hits(hits_timestamps) ea.current_es.search.return_value = hits with mock.patch('elastalert.elastalert.Elasticsearch'): # Aggregate first two, query over full range ea.rules[0]['aggregation'] = datetime.timedelta(minutes=10) ea.rules[0]['type'].matches = [{'@timestamp': h} for h in hits_timestamps] ea.run_rule(ea.rules[0], END, START) # Assert that the three matches were added to elasticsearch call1 = ea.writeback_es.create.call_args_list[0][1]['body'] call2 = ea.writeback_es.create.call_args_list[1][1]['body'] call3 = ea.writeback_es.create.call_args_list[2][1]['body'] assert call1['match_body'] == {'@timestamp': '2014-09-26T12:34:45'} assert not call1['alert_sent'] assert 'aggregate_id' not in call1 assert call1['alert_time'] == alerttime1 assert call2['match_body'] == {'@timestamp': '2014-09-26T12:40:45'} assert not call2['alert_sent'] assert call2['aggregate_id'] == 'ABCD' assert call3['match_body'] == {'@timestamp': '2014-09-26T12:47:45'} assert not call3['alert_sent'] assert 'aggregate_id' not in call3 # First call - Find all pending alerts # Second call - Find matches with agg_id == 'ABCD' # Third call - Find matches with agg_id == 'CDEF' ea.writeback_es.search.side_effect = [{'hits': {'hits': [{'_id': 'ABCD', '_source': call1}, {'_id': 'BCDE', '_source': call2}, {'_id': 'CDEF', '_source': call3}]}}, {'hits': {'hits': [{'_id': 'BCDE', '_source': call2}]}}, {'hits': {'hits': []}}] with mock.patch('elastalert.elastalert.Elasticsearch') as mock_es: ea.send_pending_alerts() # Assert that current_es was refreshed from the aggregate rules assert mock_es.called_with(host='', port='') assert mock_es.call_count == 2 assert_alerts(ea, [hits_timestamps[:2], hits_timestamps[2:]]) call1 = ea.writeback_es.search.call_args_list[6][1]['body'] call2 = ea.writeback_es.search.call_args_list[7][1]['body'] call3 = ea.writeback_es.search.call_args_list[8][1]['body'] assert 'alert_time' in call1['filter']['range'] assert call2['query']['query_string']['query'] == 'aggregate_id:ABCD' assert call3['query']['query_string']['query'] == 'aggregate_id:CDEF' assert ea.writeback_es.search.call_args_list[7][1]['size'] == 1337