def test_freq_terms(): rules = {'num_events': 10, 'timeframe': datetime.timedelta(hours=1), 'query_key': 'username'} rule = FrequencyRule(rules) terms1 = {ts_to_dt('2014-01-01T00:01:00Z'): [{'key': 'userA', 'doc_count': 1}, {'key': 'userB', 'doc_count': 5}]} terms2 = {ts_to_dt('2014-01-01T00:10:00Z'): [{'key': 'userA', 'doc_count': 8}, {'key': 'userB', 'doc_count': 5}]} terms3 = {ts_to_dt('2014-01-01T00:25:00Z'): [{'key': 'userA', 'doc_count': 3}, {'key': 'userB', 'doc_count': 0}]} # Initial data rule.add_terms_data(terms1) assert len(rule.matches) == 0 # Match for user B rule.add_terms_data(terms2) assert len(rule.matches) == 1 assert rule.matches[0].get('username') == 'userB' # Match for user A rule.add_terms_data(terms3) assert len(rule.matches) == 2 assert rule.matches[1].get('username') == 'userA'
def test_freq_terms(): rules = {"num_events": 10, "timeframe": datetime.timedelta(hours=1), "query_key": "username"} rule = FrequencyRule(rules) terms1 = {ts_to_dt("2014-01-01T00:01:00Z"): [{"key": "userA", "doc_count": 1}, {"key": "userB", "doc_count": 5}]} terms2 = {ts_to_dt("2014-01-01T00:10:00Z"): [{"key": "userA", "doc_count": 8}, {"key": "userB", "doc_count": 5}]} terms3 = {ts_to_dt("2014-01-01T00:25:00Z"): [{"key": "userA", "doc_count": 3}, {"key": "userB", "doc_count": 0}]} # Initial data rule.add_terms_data(terms1) assert len(rule.matches) == 0 # Match for user B rule.add_terms_data(terms2) assert len(rule.matches) == 1 assert rule.matches[0].get("username") == "userB" # Match for user A rule.add_terms_data(terms3) assert len(rule.matches) == 2 assert rule.matches[1].get("username") == "userA"
def test_freq_count(): rules = { 'num_events': 100, 'timeframe': datetime.timedelta(hours=1), 'use_count_query': True } # Normal match rule = FrequencyRule(rules) rule.add_count_data({ts_to_dt('2014-10-10T00:00:00'): 75}) assert len(rule.matches) == 0 rule.add_count_data({ts_to_dt('2014-10-10T00:15:00'): 10}) assert len(rule.matches) == 0 rule.add_count_data({ts_to_dt('2014-10-10T00:25:00'): 10}) assert len(rule.matches) == 0 rule.add_count_data({ts_to_dt('2014-10-10T00:45:00'): 6}) assert len(rule.matches) == 1 # First data goes out of timeframe first rule = FrequencyRule(rules) rule.add_count_data({ts_to_dt('2014-10-10T00:00:00'): 75}) assert len(rule.matches) == 0 rule.add_count_data({ts_to_dt('2014-10-10T00:45:00'): 10}) assert len(rule.matches) == 0 rule.add_count_data({ts_to_dt('2014-10-10T00:55:00'): 10}) assert len(rule.matches) == 0 rule.add_count_data({ts_to_dt('2014-10-10T01:05:00'): 6}) assert len(rule.matches) == 0 rule.add_count_data({ts_to_dt('2014-10-10T01:00:00'): 75}) assert len(rule.matches) == 1
def test_freq(): events = hits(60, timestamp_field='blah', username='******') rules = { 'num_events': 59, 'timeframe': datetime.timedelta(hours=1), 'timestamp_field': 'blah' } rule = FrequencyRule(rules) rule.add_data(events) assert len(rule.matches) == 1 # Test wit query_key events = hits(60, timestamp_field='blah', username='******') rules['query_key'] = 'username' rule = FrequencyRule(rules) rule.add_data(events) assert len(rule.matches) == 1 # Doesn't match events = hits(60, timestamp_field='blah', username='******') rules['num_events'] = 61 rule = FrequencyRule(rules) rule.add_data(events) assert len(rule.matches) == 0 # garbage collection assert 'qlo' in rule.occurrences rule.garbage_collect(ts_to_dt('2014-09-28T12:0:0')) assert rule.occurrences == {}
def test_freq_out_of_order(): events = hits(60, timestamp_field='blah', username='******') rules = { 'num_events': 59, 'timeframe': datetime.timedelta(hours=1), 'timestamp_field': 'blah' } rule = FrequencyRule(rules) rule.add_data(events[:10]) assert len(rule.matches) == 0 # Try to add events from before the first occurrence rule.add_data([{ 'blah': ts_to_dt('2014-09-26T11:00:00'), 'username': '******' }] * 50) assert len(rule.matches) == 0 rule.add_data(events[15:20]) assert len(rule.matches) == 0 rule.add_data(events[10:15]) assert len(rule.matches) == 0 rule.add_data(events[20:55]) rule.add_data(events[57:]) assert len(rule.matches) == 0 rule.add_data(events[55:57]) assert len(rule.matches) == 1
def test_freq_count(): rules = {'num_events': 100, 'timeframe': datetime.timedelta(hours=1), 'use_count_query': True} # Normal match rule = FrequencyRule(rules) rule.add_count_data({ts_to_dt('2014-10-10T00:00:00'): 75}) assert len(rule.matches) == 0 rule.add_count_data({ts_to_dt('2014-10-10T00:15:00'): 10}) assert len(rule.matches) == 0 rule.add_count_data({ts_to_dt('2014-10-10T00:25:00'): 10}) assert len(rule.matches) == 0 rule.add_count_data({ts_to_dt('2014-10-10T00:45:00'): 6}) assert len(rule.matches) == 1 # First data goes out of timeframe first rule = FrequencyRule(rules) rule.add_count_data({ts_to_dt('2014-10-10T00:00:00'): 75}) assert len(rule.matches) == 0 rule.add_count_data({ts_to_dt('2014-10-10T00:45:00'): 10}) assert len(rule.matches) == 0 rule.add_count_data({ts_to_dt('2014-10-10T00:55:00'): 10}) assert len(rule.matches) == 0 rule.add_count_data({ts_to_dt('2014-10-10T01:05:00'): 6}) assert len(rule.matches) == 0 rule.add_count_data({ts_to_dt('2014-10-10T01:00:00'): 75}) assert len(rule.matches) == 1
def test_freq(): events = hits(60, timestamp_field='blah', username='******') rules = {'num_events': 59, 'timeframe': datetime.timedelta(hours=1), 'timestamp_field': 'blah'} rule = FrequencyRule(rules) rule.add_data(events) assert len(rule.matches) == 1 # Test wit query_key events = hits(60, timestamp_field='blah', username='******') rules['query_key'] = 'username' rule = FrequencyRule(rules) rule.add_data(events) assert len(rule.matches) == 1 # Doesn't match events = hits(60, timestamp_field='blah', username='******') rules['num_events'] = 61 rule = FrequencyRule(rules) rule.add_data(events) assert len(rule.matches) == 0 # garbage collection assert 'qlo' in rule.occurrences rule.garbage_collect(ts_to_dt('2014-09-28T12:0:0')) assert rule.occurrences == {}
def test_freq_out_of_order(): events = hits(60, timestamp_field='blah', username='******') rules = {'num_events': 59, 'timeframe': datetime.timedelta(hours=1), 'timestamp_field': 'blah'} rule = FrequencyRule(rules) rule.add_data(events[:10]) assert len(rule.matches) == 0 # Try to add events from before the first occurrence rule.add_data([{'blah': ts_to_dt('2014-09-26T11:00:00'), 'username': '******'}] * 50) assert len(rule.matches) == 0 rule.add_data(events[15:20]) assert len(rule.matches) == 0 rule.add_data(events[10:15]) assert len(rule.matches) == 0 rule.add_data(events[20:55]) rule.add_data(events[57:]) assert len(rule.matches) == 0 rule.add_data(events[55:57]) assert len(rule.matches) == 1
def test_freq(): events = hits(60, timestamp_field="blah", username="******") rules = {"num_events": 59, "timeframe": datetime.timedelta(hours=1), "timestamp_field": "blah"} rule = FrequencyRule(rules) rule.add_data(events) assert len(rule.matches) == 1 # Test wit query_key events = hits(60, timestamp_field="blah", username="******") rules["query_key"] = "username" rule = FrequencyRule(rules) rule.add_data(events) assert len(rule.matches) == 1 # Doesn't match events = hits(60, timestamp_field="blah", username="******") rules["num_events"] = 61 rule = FrequencyRule(rules) rule.add_data(events) assert len(rule.matches) == 0 # garbage collection assert "qlo" in rule.occurrences rule.garbage_collect(ts_to_dt("2014-09-28T12:0:0")) assert rule.occurrences == {}
def test_freq_out_of_order(): events = hits(60, timestamp_field="blah", username="******") rules = {"num_events": 59, "timeframe": datetime.timedelta(hours=1), "timestamp_field": "blah"} rule = FrequencyRule(rules) rule.add_data(events[:10]) assert len(rule.matches) == 0 # Try to add events from before the first occurrence rule.add_data([{"blah": ts_to_dt("2014-09-26T11:00:00"), "username": "******"}] * 50) assert len(rule.matches) == 0 rule.add_data(events[15:20]) assert len(rule.matches) == 0 rule.add_data(events[10:15]) assert len(rule.matches) == 0 rule.add_data(events[20:55]) rule.add_data(events[57:]) assert len(rule.matches) == 0 rule.add_data(events[55:57]) assert len(rule.matches) == 1