Esempio n. 1
0
    def test_aggregate_total_counts(self):
        """Test that total counts are aggregated correctly."""
        hosts = "abcdefghijklmnopqrstuvwxyz"

        input_counts = []
        expected_count = 0
        for count, host in enumerate(hosts, 1):
            expected_count += count
            data = {'hostname': host, 'key': 'totals', 'count': count}
            input_counts.append(AnalyticOutput.from_data([data]))

        random.shuffle(input_counts)

        reducer = get_reducer('| count')
        output_results = reducer(input_counts)

        self.assertEqual(len(output_results), 1)
        result = output_results[0]

        self.assertEqual(len(result.events), 1)
        data = result.events[0].data

        self.assertEqual(data['hosts'], list(hosts))
        self.assertEqual(data['total_hosts'], len(hosts))
        self.assertEqual(data['count'], expected_count)
Esempio n. 2
0
    def test_map_reduce_analytics(self):
        """Test map reduce functionality of python engines."""
        input_events = defaultdict(list)
        host_results = []

        for i, host in enumerate("abcdefghijklmnop"):
            events = []
            for event_number in range(10):
                data = {
                    'number': event_number,
                    'a': host + '-a-' + str(event_number),
                    'b': -event_number
                }
                events.append(Event.from_data(data))
            input_events[host] = events

        query_text = 'generic where true | sort a | head 5 | sort b'
        analytic = parse_analytic({
            'query': query_text,
            'metadata': {
                'id': 'test-analytic'
            }
        })
        host_engine = get_engine(analytic)

        # Map across multiple 'hosts'
        for hostname, host_events in input_events.items():
            for result in host_engine(host_events):  # type: AnalyticOutput
                for event in result.events:
                    event.data['hostname'] = hostname
                host_results.append(result)

        # Reduce across multiple 'hosts'
        reducer = get_reducer(analytic)
        reduced_results = reducer(host_results)

        expected_a = ['a-a-{}'.format(value) for value in range(10)][:5][::-1]
        actual_a = [
            event.data['a'] for result in reduced_results
            for event in result.events
        ]
        self.validate_results(actual_a, expected_a, query_text)
Esempio n. 3
0
    def test_aggregate_multiple_key_counts(self):
        """Test that counts are aggregated correctly with multiple keys."""
        input_results = [
            ('host1', ['key1', 'key2', 'key3'], 2),
            ('host2', ['key1', 'key2', 'key3'], 4),
            ('host3', ['key1', 'key2', 'key3'], 2),
            ('host4', ['key1', 'key2', 'key5'], 7),
            ('host5', ['key1', 'key2', 'key5'], 9),
            ('host2', ['key2', 'key3', 'key4'], 5),
            ('host1', ['key4', 'key2', 'key5'], 3),
        ]

        random.shuffle(input_results)
        input_counts = [
            Event.from_data({
                'hostname': h,
                'key': k,
                'count': c
            }) for h, k, c in input_results
        ]

        expected_counts = [
            (('key4', 'key2', 'key5'), ['host1'], 3),
            (('key2', 'key3', 'key4'), ['host2'], 5),
            (('key1', 'key2', 'key3'), ['host1', 'host2', 'host3'], 2 + 4 + 2),
            (('key1', 'key2', 'key5'), ['host4', 'host5'], 7 + 9),
        ]

        reducer = get_reducer('| count a b c', config={'flatten': True})
        reduced_counts = reducer(input_counts)

        self.assertEqual(len(expected_counts), len(reduced_counts))
        for (key, hosts, count), event in zip(expected_counts, reduced_counts):
            data = event.data  # type: dict
            self.assertEqual(key, data['key'])
            self.assertEqual(hosts, data['hosts'])
            self.assertEqual(len(hosts), data['total_hosts'])
            self.assertEqual(count, data['count'])
Esempio n. 4
0
    def test_aggregate_single_key_counts(self):
        """Test that counts are aggregated correctly with a single key."""
        input_results = [
            ('host1', 'key1', 2),
            ('host2', 'key1', 4),
            ('host3', 'key3', 2),
            ('host4', 'key1', 7),
            ('host5', 'key1', 9),
            ('host2', 'key2', 5),
            ('host1', 'key4', 3),
            ('host6', 'key3', 1),
        ]

        random.shuffle(input_results)
        input_counts = [{
            'hostname': h,
            'key': k,
            'count': c
        } for h, k, c in input_results]

        expected_counts = [
            ('key3', ['host3', 'host6'], 2 + 1),
            ('key4', ['host1'], 3),
            ('key2', ['host2'], 5),
            ('key1', ['host1', 'host2', 'host4', 'host5'], 2 + 4 + 7 + 9),
        ]

        reducer = get_reducer('| count a', config={'flatten': True})
        output_results = reducer(input_counts)

        self.assertEqual(len(expected_counts), len(output_results))
        for (key, hosts, count), event in zip(expected_counts, output_results):
            data = event.data  # type: dict
            self.assertEqual(key, data['key'])
            self.assertEqual(hosts, data['hosts'])
            self.assertEqual(len(hosts), data['total_hosts'])
            self.assertEqual(count, data['count'])