def test_aggregate_total_counts(self): """Test that total counts are aggregated correctly.""" hosts = "abcdefghijklmnopqrstuvwxyz" input_counts = [] expected_count = 0 for count, host in enumerate(hosts, 1): expected_count += count data = {'hostname': host, 'key': 'totals', 'count': count} input_counts.append(AnalyticOutput.from_data([data])) random.shuffle(input_counts) reducer = get_reducer('| count') output_results = reducer(input_counts) self.assertEqual(len(output_results), 1) result = output_results[0] self.assertEqual(len(result.events), 1) data = result.events[0].data self.assertEqual(data['hosts'], list(hosts)) self.assertEqual(data['total_hosts'], len(hosts)) self.assertEqual(data['count'], expected_count)
def test_map_reduce_analytics(self): """Test map reduce functionality of python engines.""" input_events = defaultdict(list) host_results = [] for i, host in enumerate("abcdefghijklmnop"): events = [] for event_number in range(10): data = { 'number': event_number, 'a': host + '-a-' + str(event_number), 'b': -event_number } events.append(Event.from_data(data)) input_events[host] = events query_text = 'generic where true | sort a | head 5 | sort b' analytic = parse_analytic({ 'query': query_text, 'metadata': { 'id': 'test-analytic' } }) host_engine = get_engine(analytic) # Map across multiple 'hosts' for hostname, host_events in input_events.items(): for result in host_engine(host_events): # type: AnalyticOutput for event in result.events: event.data['hostname'] = hostname host_results.append(result) # Reduce across multiple 'hosts' reducer = get_reducer(analytic) reduced_results = reducer(host_results) expected_a = ['a-a-{}'.format(value) for value in range(10)][:5][::-1] actual_a = [ event.data['a'] for result in reduced_results for event in result.events ] self.validate_results(actual_a, expected_a, query_text)
def test_aggregate_multiple_key_counts(self): """Test that counts are aggregated correctly with multiple keys.""" input_results = [ ('host1', ['key1', 'key2', 'key3'], 2), ('host2', ['key1', 'key2', 'key3'], 4), ('host3', ['key1', 'key2', 'key3'], 2), ('host4', ['key1', 'key2', 'key5'], 7), ('host5', ['key1', 'key2', 'key5'], 9), ('host2', ['key2', 'key3', 'key4'], 5), ('host1', ['key4', 'key2', 'key5'], 3), ] random.shuffle(input_results) input_counts = [ Event.from_data({ 'hostname': h, 'key': k, 'count': c }) for h, k, c in input_results ] expected_counts = [ (('key4', 'key2', 'key5'), ['host1'], 3), (('key2', 'key3', 'key4'), ['host2'], 5), (('key1', 'key2', 'key3'), ['host1', 'host2', 'host3'], 2 + 4 + 2), (('key1', 'key2', 'key5'), ['host4', 'host5'], 7 + 9), ] reducer = get_reducer('| count a b c', config={'flatten': True}) reduced_counts = reducer(input_counts) self.assertEqual(len(expected_counts), len(reduced_counts)) for (key, hosts, count), event in zip(expected_counts, reduced_counts): data = event.data # type: dict self.assertEqual(key, data['key']) self.assertEqual(hosts, data['hosts']) self.assertEqual(len(hosts), data['total_hosts']) self.assertEqual(count, data['count'])
def test_aggregate_single_key_counts(self): """Test that counts are aggregated correctly with a single key.""" input_results = [ ('host1', 'key1', 2), ('host2', 'key1', 4), ('host3', 'key3', 2), ('host4', 'key1', 7), ('host5', 'key1', 9), ('host2', 'key2', 5), ('host1', 'key4', 3), ('host6', 'key3', 1), ] random.shuffle(input_results) input_counts = [{ 'hostname': h, 'key': k, 'count': c } for h, k, c in input_results] expected_counts = [ ('key3', ['host3', 'host6'], 2 + 1), ('key4', ['host1'], 3), ('key2', ['host2'], 5), ('key1', ['host1', 'host2', 'host4', 'host5'], 2 + 4 + 7 + 9), ] reducer = get_reducer('| count a', config={'flatten': True}) output_results = reducer(input_counts) self.assertEqual(len(expected_counts), len(output_results)) for (key, hosts, count), event in zip(expected_counts, output_results): data = event.data # type: dict self.assertEqual(key, data['key']) self.assertEqual(hosts, data['hosts']) self.assertEqual(len(hosts), data['total_hosts']) self.assertEqual(count, data['count'])