def test_aggregation_without_events(app, es_with_templates): """Check that the aggregation doesn't crash if there are no events. This scenario happens when celery starts aggregating but no events have been created yet. """ # Aggregate events StatAggregator(name='file-download-agg', event='file-download', aggregation_field='file_id', aggregation_interval='day', query_modifiers=[]).run() assert not Index('stats-file-download', using=current_search_client).exists() # Create the index but without any event. This happens when the events # have been indexed but are not yet searchable (before index refresh). Index('events-stats-file-download-2017', using=current_search_client).create() # Wait for the index to be available time.sleep(1) # Aggregate events StatAggregator(name='test-file-download', event='file-download', aggregation_field='file_id', aggregation_interval='day', query_modifiers=[]).run() assert not Index('stats-file-download', using=current_search_client).exists()
def test_get_bookmark(app, indexed_events): """Test bookmark reading.""" for t in current_search.put_templates(ignore=[400]): pass stat_agg = StatAggregator(name='file-download-agg', client=current_search_client, event='file-download', aggregation_field='file_id', aggregation_interval='day') stat_agg.run() assert stat_agg.get_bookmark() == datetime.datetime(2017, 1, 8)
def test_get_bookmark(app, es, indexed_events): """Test bookmark reading.""" stat_agg = StatAggregator(name='file-download-agg', client=es, event='file-download', field='file_id', interval='day') stat_agg.run() current_search.flush_and_refresh(index='*') assert stat_agg.bookmark_api.get_bookmark() == \ datetime.datetime(2017, 1, 8)
def test_wrong_intervals(app): """Test aggregation with aggregation_interval > index_interval.""" with pytest.raises(ValueError): StatAggregator(current_search_client, 'test', aggregation_interval='month', index_interval='day')
def test_wrong_intervals(app, es): """Test aggregation with interval > index_interval.""" with pytest.raises(ValueError): StatAggregator('test-agg', 'test', es, interval='month', index_interval='day')
def test_metric_aggregations(app, es, event_queues): """Test aggregation metrics.""" current_stats.publish('file-download', [ _create_file_download_event(date, user_id='1') for date in [(2018, 1, 1, 12, 10), (2018, 1, 1, 12, 20), (2018, 1, 1, 12, 30), (2018, 1, 1, 13, 10), (2018, 1, 1, 13, 20), (2018, 1, 1, 13, 30), (2018, 1, 1, 14, 10), (2018, 1, 1, 14, 20), (2018, 1, 1, 14, 30), (2018, 1, 1, 15, 10), (2018, 1, 1, 15, 20), (2018, 1, 1, 15, 30)] ]) process_events(['file-download']) current_search.flush_and_refresh(index='*') stat_agg = StatAggregator(name='file-download-agg', client=es, event='file-download', field='file_id', metric_fields={ 'unique_count': ('cardinality', 'unique_session_id', { 'precision_threshold': 3000 }), 'volume': ('sum', 'size', {}) }, interval='day') stat_agg.run() current_search.flush_and_refresh(index='*') query = Search(using=es, index='stats-file-download') results = query.execute() assert len(results) == 1 assert results[0].count == 12 # 3 views over 4 differnet hour slices assert results[0].unique_count == 4 # 4 different hour slices accessed assert results[0].volume == 9000 * 12
def aggregate_and_check_version(expected_version): StatAggregator( field='file_id', interval='day', name='file-download-agg', event='file-download', query_modifiers=[], ).run() current_search.flush_and_refresh(index='*') res = es.search(index='stats-file-download', version=True) for hit in res['hits']['hits']: assert hit['_version'] == expected_version
def aggregate_and_check_version(expected_version): # Aggregate events StatAggregator(name='file-download-agg', event='file-download', aggregation_field='file_id', aggregation_interval='day', query_modifiers=[]).run() current_search_client.indices.refresh(index='*') res = current_search_client.search( index='stats-file-download', doc_type='file-download-day-aggregation', version=True) for hit in res['hits']['hits']: assert hit['_version'] == expected_version
def test_filter_robots(app, es, event_queues, indexed_events, with_robots): """Test the filter_robots query modifier.""" query_modifiers = [] if not with_robots: query_modifiers = [filter_robots] StatAggregator(name='file-download-agg', client=es, event='file-download', field='file_id', interval='day', query_modifiers=query_modifiers).run() current_search.flush_and_refresh(index='*') query = Search(using=es, index='stats-file-download')[0:30] \ .sort('file_id') results = query.execute() assert len(results) == 3 for result in results: if 'file_id' in result: assert result.count == (5 if with_robots else 2)
def test_filter_robots(app, es, event_queues, indexed_events, with_robots): """Test the filter_robots query modifier.""" query_modifiers = [] if not with_robots: query_modifiers = [filter_robots] StatAggregator(client=current_search_client, event='file-download', aggregation_field='file_id', aggregation_interval='day', query_modifiers=query_modifiers).run() current_search_client.indices.flush(index='*') query = Search( using=current_search_client, index='stats-file-download', doc_type='file-download-day-aggregation')[0:30].sort('file_id') results = query.execute() assert len(results) == 3 for result in results: if 'file_id' in result: assert result.count == (5 if with_robots else 2)