def test_piwik_exporter(app, db, es, locations, event_queues, full_record): records = create_stats_fixtures( metadata=full_record, n_records=1, n_versions=1, n_files=1, event_data={'user_id': '1', 'country': 'CH'}, # 4 event timestamps start_date=datetime(2018, 1, 1, 13), end_date=datetime(2018, 1, 1, 15), interval=timedelta(minutes=30), do_process_events=True, do_aggregate_events=False, do_update_record_statistics=False ) current_cache.delete('piwik_export:bookmark') bookmark = current_cache.get('piwik_export:bookmark') assert bookmark is None start_date = datetime(2018, 1, 1, 12) end_date = datetime(2018, 1, 1, 14) PiwikExporter().run(start_date=start_date, end_date=end_date) bookmark = current_cache.get('piwik_export:bookmark') assert bookmark == u'2018-01-01T14:00:00' PiwikExporter().run() bookmark = current_cache.get('piwik_export:bookmark') assert bookmark == u'2018-01-01T14:30:00'
def test_basic_stats(app, db, es, locations, event_queues, minimal_record): """Test basic statistics results.""" search = Search(using=es) records = create_stats_fixtures( # (10 * 2) -> 20 records and (10 * 2 * 3) -> 60 files metadata=minimal_record, n_records=10, n_versions=2, n_files=3, event_data={'user_id': '1'}, # 4 event timestamps start_date=datetime(2018, 1, 1, 13), end_date=datetime(2018, 1, 1, 15), interval=timedelta(minutes=30)) # Events indices prefix = app.config['SEARCH_INDEX_PREFIX'] # 2 versions * 10 records * 3 files * 4 events -> 240 assert search.index(prefix + 'events-stats-file-download').count() == 240 # 2 versions * 10 records * 4 events -> 80 assert search.index(prefix + 'events-stats-record-view').count() == 80 # Aggregations indices # (2 versions + 1 concept) * 10 records -> 30 documents + 2 bookmarks # 30d assert search.index(prefix + 'stats-file-download').count() == 30 # 30d assert search.index(prefix + 'stats-record-view').count() == 30 # 2bm + 2bm assert search.index(prefix + 'stats-bookmarks').count() == 4 # Records index for _, record, _ in records: doc = \ current_search_client.get( index=build_alias_name('records'), id=str(record.id), params={'_source_includes': '_stats'} ) assert doc['_source']['_stats'] == { # 4 view events 'views': 4.0, 'version_views': 8.0, # 4 view events over 2 different hours 'unique_views': 2.0, 'version_unique_views': 2.0, # 4 download events * 3 files 'downloads': 12.0, 'version_downloads': 24.0, # 4 download events * 3 files over 2 different hours 'unique_downloads': 2.0, 'version_unique_downloads': 2.0, # 4 download events * 3 files * 10 bytes 'volume': 120.0, 'version_volume': 240.0, }
def test_piwik_exporter_no_bookmark(app, db, es, locations, event_queues, full_record): records = create_stats_fixtures( metadata=full_record, n_records=1, n_versions=1, n_files=1, event_data={ 'user_id': '1', 'country': 'CH' }, # 4 event timestamps start_date=datetime(2018, 1, 1, 13), end_date=datetime(2018, 1, 1, 15), interval=timedelta(minutes=30), do_process_events=True) current_cache.delete('piwik_export:bookmark') bookmark = current_cache.get('piwik_export:bookmark') assert bookmark is None with mock.patch('zenodo.modules.stats.exporters.requests.post') as mocked: PiwikExporter().run() mocked.assert_not_called() bookmark = current_cache.get('piwik_export:bookmark') assert bookmark is None
def test_piwik_exporter_request_fail(app, db, es, locations, event_queues, full_record): records = create_stats_fixtures( metadata=full_record, n_records=1, n_versions=1, n_files=1, event_data={ 'user_id': '1', 'country': 'CH' }, # 4 event timestamps start_date=datetime(2018, 1, 1, 13), end_date=datetime(2018, 1, 1, 15), interval=timedelta(minutes=30), do_process_events=True) current_cache.delete('piwik_export:bookmark') bookmark = current_cache.get('piwik_export:bookmark') assert bookmark is None start_date = datetime(2018, 1, 1, 12) end_date = datetime(2018, 1, 1, 14) with pytest.raises(PiwikExportRequestError): PiwikExporter().run(start_date=start_date, end_date=end_date) bookmark = current_cache.get('piwik_export:bookmark') assert bookmark is None
def test_large_stats(app, db, es, locations, event_queues, minimal_record): """Test a larger number of events, aggregations, and results.""" search = Search(using=es) records = create_stats_fixtures( # (3 * 4) -> 12 records and (3 * 4 * 2) -> 24 files metadata=minimal_record, n_records=3, n_versions=4, n_files=2, event_data={'user_id': '1'}, # (31 + 30) * 2 -> 122 event timestamps (61 days and 2 events/day) start_date=datetime(2018, 3, 1), end_date=datetime(2018, 5, 1), interval=timedelta(hours=12)) # Events indices prefix = app.config['SEARCH_INDEX_PREFIX'] # 4 versions * 3 records * 2 files * 122 events -> 2928 assert search.index(prefix + 'events-stats-file-download').count() == 2928 # 4 versions * 3 records * 122 events -> 1464 assert search.index(prefix + 'events-stats-record-view').count() == 1464 # Aggregations indices # (4 versions + 1 concept) * 3 records -> 15 documents + 2 bookmarks q = search.index(prefix + 'stats-file-download') q = q.doc_type('file-download-day-aggregation') assert q.count() == 915 # 61 days * 15 records q = search.index(prefix + 'stats-record-view') q = q.doc_type('record-view-day-aggregation') assert q.count() == 915 # 61 days * 15 records # Records index for _, record, _ in records: doc = \ current_search_client.get( index=build_alias_name('records'), id=str(record.id), params={'_source_includes': '_stats'} ) assert doc['_source']['_stats'] == { # 4 view events 'views': 122.0, 'version_views': 488.0, # 4 view events over 2 different hours 'unique_views': 122.0, 'version_unique_views': 122.0, # 4 download events * 3 files 'downloads': 244.0, 'version_downloads': 976.0, # 4 download events * 3 files over 2 different hours 'unique_downloads': 122.0, 'version_unique_downloads': 122.0, # 4 download events * 3 files * 10 bytes 'volume': 2440.0, 'version_volume': 9760.0, }
def test_large_stats(app, db, es, locations, event_queues, minimal_record): """Test record page view event import.""" search = Search(using=es) records = create_stats_fixtures( # (3 * 4) -> 12 records and (3 * 4 * 2) -> 24 files metadata=minimal_record, n_records=3, n_versions=4, n_files=2, event_data={'user_id': '1'}, # (31 + 30) * 2 -> 122 event timestamps (61 days and 2 events/day) start_date=datetime(2018, 3, 1), end_date=datetime(2018, 5, 1), interval=timedelta(hours=12)) # Events indices # 4 versions * 3 records * 2 files * 122 events -> 2928 assert search.index('events-stats-file-download').count() == 2928 # 4 versions * 3 records * 122 events -> 1464 assert search.index('events-stats-record-view').count() == 1464 # Aggregations indices # (4 versions + 1 concept) * 3 records -> 15 documents + 2 bookmarks q = search.index('stats-file-download') q = q.doc_type('file-download-day-aggregation') assert q.count() == 915 # 61 days * 15 records q = search.index('stats-record-view') q = q.doc_type('record-view-day-aggregation') assert q.count() == 915 # 61 days * 15 records # Reords index for _, record, _ in records: doc = (RecordsSearch().get_record( record.id).source(include='_stats').execute()[0]) assert doc['_stats'] == { # 4 view events 'views': 122.0, 'version_views': 488.0, # 4 view events over 2 different hours 'unique_views': 122.0, 'version_unique_views': 122.0, # 4 download events * 3 files 'downloads': 244.0, 'version_downloads': 976.0, # 4 download events * 3 files over 2 different hours 'unique_downloads': 122.0, 'version_unique_downloads': 122.0, # 4 download events * 3 files * 10 bytes 'volume': 2440.0, 'version_volume': 9760.0, }
def test_basic_stats(app, db, es, locations, event_queues, minimal_record): """Test basic statistics results.""" search = Search(using=es) records = create_stats_fixtures( # (10 * 2) -> 20 records and (10 * 2 * 3) -> 60 files metadata=minimal_record, n_records=10, n_versions=2, n_files=3, event_data={'user_id': '1'}, # 4 event timestamps start_date=datetime(2018, 1, 1, 13), end_date=datetime(2018, 1, 1, 15), interval=timedelta(minutes=30)) # Events indices # 2 versions * 10 records * 3 files * 4 events -> 240 assert search.index('events-stats-file-download').count() == 240 # 2 versions * 10 records * 4 events -> 80 assert search.index('events-stats-record-view').count() == 80 # Aggregations indices # (2 versions + 1 concept) * 10 records -> 30 documents + 2 bookmarks assert search.index('stats-file-download').count() == 32 # 2bm + 30d assert search.index('stats-record-view').count() == 32 # 2bm + 30d # Reords index for _, record, _ in records: doc = (RecordsSearch().get_record( record.id).source(include='_stats').execute()[0]) assert doc['_stats'] == { # 4 view events 'views': 4.0, 'version_views': 8.0, # 4 view events over 2 different hours 'unique_views': 2.0, 'version_unique_views': 2.0, # 4 download events * 3 files 'downloads': 12.0, 'version_downloads': 24.0, # 4 download events * 3 files over 2 different hours 'unique_downloads': 2.0, 'version_unique_downloads': 2.0, # 4 download events * 3 files * 10 bytes 'volume': 120.0, 'version_volume': 240.0, }
def test_large_stats(app, db, es, locations, event_queues, minimal_record): """Test record page view event import.""" search = Search(using=es) records = create_stats_fixtures( # (3 * 4) -> 12 records and (3 * 4 * 2) -> 24 files metadata=minimal_record, n_records=3, n_versions=4, n_files=2, event_data={'user_id': '1'}, # (31 + 30) * 2 -> 122 event timestamps (61 days and 2 events/day) start_date=datetime(2018, 3, 1), end_date=datetime(2018, 5, 1), interval=timedelta(hours=12)) # Events indices # 4 versions * 3 records * 2 files * 122 events -> 2928 assert search.index('events-stats-file-download').count() == 2928 # 4 versions * 3 records * 122 events -> 1464 assert search.index('events-stats-record-view').count() == 1464 # Aggregations indices # (4 versions + 1 concept) * 3 records -> 15 documents + 2 bookmarks q = search.index('stats-file-download') q = q.doc_type('file-download-day-aggregation') assert q.count() == 915 # 61 days * 15 records q = search.index('stats-record-view') q = q.doc_type('record-view-day-aggregation') assert q.count() == 915 # 61 days * 15 records # Reords index for _, record, _ in records: doc = ( RecordsSearch().get_record(record.id) .source(include='_stats').execute()[0]) assert doc['_stats'] == { # 4 view events 'views': 122.0, 'version_views': 488.0, # 4 view events over 2 different hours 'unique_views': 122.0, 'version_unique_views': 122.0, # 4 download events * 3 files 'downloads': 244.0, 'version_downloads': 976.0, # 4 download events * 3 files over 2 different hours 'unique_downloads': 122.0, 'version_unique_downloads': 122.0, # 4 download events * 3 files * 10 bytes 'volume': 2440.0, 'version_volume': 9760.0, }
def test_piwik_exporter_no_bookmark(app, db, es, locations, event_queues, full_record): records = create_stats_fixtures( metadata=full_record, n_records=1, n_versions=1, n_files=1, event_data={'user_id': '1', 'country': 'CH'}, # 4 event timestamps start_date=datetime(2018, 1, 1, 13), end_date=datetime(2018, 1, 1, 15), interval=timedelta(minutes=30), do_process_events=True) current_cache.delete('piwik_export:bookmark') bookmark = current_cache.get('piwik_export:bookmark') assert bookmark is None with mock.patch('zenodo.modules.stats.exporters.requests.post') as mocked: PiwikExporter().run() mocked.assert_not_called() bookmark = current_cache.get('piwik_export:bookmark') assert bookmark is None
def test_basic_stats(app, db, es, locations, event_queues, minimal_record): """Test basic statistics results.""" search = Search(using=es) records = create_stats_fixtures( # (10 * 2) -> 20 records and (10 * 2 * 3) -> 60 files metadata=minimal_record, n_records=10, n_versions=2, n_files=3, event_data={'user_id': '1'}, # 4 event timestamps start_date=datetime(2018, 1, 1, 13), end_date=datetime(2018, 1, 1, 15), interval=timedelta(minutes=30)) # Events indices # 2 versions * 10 records * 3 files * 4 events -> 240 assert search.index('events-stats-file-download').count() == 240 # 2 versions * 10 records * 4 events -> 80 assert search.index('events-stats-record-view').count() == 80 # Aggregations indices # (2 versions + 1 concept) * 10 records -> 30 documents + 2 bookmarks assert search.index('stats-file-download').count() == 32 # 2bm + 30d assert search.index('stats-record-view').count() == 32 # 2bm + 30d # Reords index for _, record, _ in records: doc = ( RecordsSearch().get_record(record.id) .source(include='_stats').execute()[0]) assert doc['_stats'] == { # 4 view events 'views': 4.0, 'version_views': 8.0, # 4 view events over 2 different hours 'unique_views': 2.0, 'version_unique_views': 2.0, # 4 download events * 3 files 'downloads': 12.0, 'version_downloads': 24.0, # 4 download events * 3 files over 2 different hours 'unique_downloads': 2.0, 'version_unique_downloads': 2.0, # 4 download events * 3 files * 10 bytes 'volume': 120.0, 'version_volume': 240.0, }
def test_piwik_exporter_request_fail(app, db, es, locations, event_queues, full_record): records = create_stats_fixtures( metadata=full_record, n_records=1, n_versions=1, n_files=1, event_data={'user_id': '1', 'country': 'CH'}, # 4 event timestamps start_date=datetime(2018, 1, 1, 13), end_date=datetime(2018, 1, 1, 15), interval=timedelta(minutes=30), do_process_events=True) current_cache.delete('piwik_export:bookmark') bookmark = current_cache.get('piwik_export:bookmark') assert bookmark is None start_date = datetime(2018, 1, 1, 12) end_date = datetime(2018, 1, 1, 14) with pytest.raises(PiwikExportRequestError): PiwikExporter().run(start_date=start_date, end_date=end_date) bookmark = current_cache.get('piwik_export:bookmark') assert bookmark is None
def test_update_record_statistics(app, db, es, locations, event_queues, minimal_record): """Test record statistics update task.""" records = create_stats_fixtures( metadata=minimal_record, n_records=1, n_versions=5, n_files=3, event_data={'user_id': '1'}, # 4 event timestamps start_date=datetime(2018, 1, 1, 13), end_date=datetime(2018, 1, 1, 15), interval=timedelta(minutes=30), # This also runs the task we want to test and indexes records. do_update_record_statistics=True) expected_stats = { 'views': 4.0, 'version_views': 20.0, 'unique_views': 2.0, 'version_unique_views': 2.0, 'downloads': 12.0, 'version_downloads': 60.0, 'unique_downloads': 2.0, 'version_unique_downloads': 2.0, 'volume': 120.0, 'version_volume': 600.0, } # Check current stats for all records for recid, _, _ in records: stats = get_record_stats(recid.object_uuid) assert stats == expected_stats # Perform a view and all-files download today on the first version recid_v1, record_v1, file_objects_v1 = records[0] with app.test_client() as client: for f in file_objects_v1: file_url = url_for('invenio_records_ui.recid_files', pid_value=recid_v1.pid_value, filename=f.key) assert client.get(file_url).status_code == 200 record_url = url_for( 'invenio_records_ui.recid', pid_value=recid_v1.pid_value) assert client.get(record_url).status_code == 200 process_events(['record-view', 'file-download']) current_search.flush_and_refresh(index='events-stats-*') aggregate_events( ['record-view-agg', 'record-view-all-versions-agg', 'record-download-agg', 'record-download-all-versions-agg']) current_search.flush_and_refresh(index='stats-*') update_record_statistics() RecordIndexer().process_bulk_queue() current_search.flush_and_refresh(index='records') # Check current stats for all records stats = get_record_stats(recid_v1.object_uuid) assert stats == { 'views': 5.0, 'version_views': 21.0, 'unique_views': 3.0, 'version_unique_views': 3.0, 'downloads': 15.0, 'version_downloads': 63.0, 'unique_downloads': 3.0, 'version_unique_downloads': 3.0, 'volume': 150.0, 'version_volume': 630.0, } # Other versions will have only their `version_*` statistics updated expected_stats['version_views'] += 1 expected_stats['version_unique_views'] += 1 expected_stats['version_downloads'] += 3 expected_stats['version_unique_downloads'] += 1 expected_stats['version_volume'] += 30 for recid, _, _ in records[1:]: stats = get_record_stats(recid.object_uuid) assert stats == expected_stats
def test_update_record_statistics(app, db, es, locations, event_queues, minimal_record): """Test record statistics update task.""" records = create_stats_fixtures( metadata=minimal_record, n_records=1, n_versions=5, n_files=3, event_data={'user_id': '1'}, # 4 event timestamps (half-hours between 13:00-15:00) start_date=datetime(2018, 1, 1, 13), end_date=datetime(2018, 1, 1, 15), interval=timedelta(minutes=30), # This also runs the task we want to test and indexes records. do_update_record_statistics=True) expected_stats = { 'views': 4.0, 'version_views': 20.0, 'unique_views': 2.0, 'version_unique_views': 2.0, 'downloads': 12.0, 'version_downloads': 60.0, 'unique_downloads': 2.0, 'version_unique_downloads': 2.0, 'volume': 120.0, 'version_volume': 600.0, } # Check current stats for all records for recid, _, _ in records: stats = get_record_stats(recid.object_uuid) assert stats == expected_stats # Perform a view and all-files download today on the first version recid_v1, record_v1, file_objects_v1 = records[0] with app.test_client() as client: for f in file_objects_v1: file_url = url_for('invenio_records_ui.recid_files', pid_value=recid_v1.pid_value, filename=f.key) assert client.get(file_url).status_code == 200 record_url = url_for('invenio_records_ui.recid', pid_value=recid_v1.pid_value) assert client.get(record_url).status_code == 200 process_events(['record-view', 'file-download']) current_search.flush_and_refresh(index='events-stats-*') aggregate_events([ 'record-view-agg', 'record-view-all-versions-agg', 'record-download-agg', 'record-download-all-versions-agg' ]) current_search.flush_and_refresh(index='stats-*') update_record_statistics() RecordIndexer().process_bulk_queue() current_search.flush_and_refresh(index='records') # Check current stats for all records stats = get_record_stats(recid_v1.object_uuid) assert stats == { 'views': 5.0, 'version_views': 21.0, 'unique_views': 3.0, 'version_unique_views': 3.0, 'downloads': 15.0, 'version_downloads': 63.0, 'unique_downloads': 3.0, 'version_unique_downloads': 3.0, 'volume': 150.0, 'version_volume': 630.0, } # Other versions will have only their `version_*` statistics updated expected_stats['version_views'] += 1 expected_stats['version_unique_views'] += 1 expected_stats['version_downloads'] += 3 expected_stats['version_unique_downloads'] += 1 expected_stats['version_volume'] += 30 for recid, _, _ in records[1:]: stats = get_record_stats(recid.object_uuid) assert stats == expected_stats