def test_piwik_exporter(app, db, es, locations, event_queues, full_record):
    records = create_stats_fixtures(
        metadata=full_record, n_records=1, n_versions=1, n_files=1,
        event_data={'user_id': '1', 'country': 'CH'},
        # 4 event timestamps
        start_date=datetime(2018, 1, 1, 13),
        end_date=datetime(2018, 1, 1, 15),
        interval=timedelta(minutes=30),
        do_process_events=True,
        do_aggregate_events=False,
        do_update_record_statistics=False
    )

    current_cache.delete('piwik_export:bookmark')
    bookmark = current_cache.get('piwik_export:bookmark')
    assert bookmark is None

    start_date = datetime(2018, 1, 1, 12)
    end_date = datetime(2018, 1, 1, 14)
    PiwikExporter().run(start_date=start_date, end_date=end_date)
    bookmark = current_cache.get('piwik_export:bookmark')
    assert bookmark == u'2018-01-01T14:00:00'

    PiwikExporter().run()
    bookmark = current_cache.get('piwik_export:bookmark')
    assert bookmark == u'2018-01-01T14:30:00'
Beispiel #2
0
def test_basic_stats(app, db, es, locations, event_queues, minimal_record):
    """Test basic statistics results."""
    search = Search(using=es)
    records = create_stats_fixtures(
        # (10 * 2) -> 20 records and (10 * 2 * 3) -> 60 files
        metadata=minimal_record,
        n_records=10,
        n_versions=2,
        n_files=3,
        event_data={'user_id': '1'},
        # 4 event timestamps
        start_date=datetime(2018, 1, 1, 13),
        end_date=datetime(2018, 1, 1, 15),
        interval=timedelta(minutes=30))

    # Events indices
    prefix = app.config['SEARCH_INDEX_PREFIX']

    # 2 versions * 10 records * 3 files * 4 events -> 240
    assert search.index(prefix + 'events-stats-file-download').count() == 240
    # 2 versions * 10 records * 4 events -> 80
    assert search.index(prefix + 'events-stats-record-view').count() == 80

    # Aggregations indices
    # (2 versions + 1 concept) * 10 records -> 30 documents + 2 bookmarks

    # 30d
    assert search.index(prefix + 'stats-file-download').count() == 30

    # 30d
    assert search.index(prefix + 'stats-record-view').count() == 30

    # 2bm + 2bm
    assert search.index(prefix + 'stats-bookmarks').count() == 4

    # Records index
    for _, record, _ in records:
        doc = \
             current_search_client.get(
                index=build_alias_name('records'),
                id=str(record.id),
                params={'_source_includes': '_stats'}
                )
        assert doc['_source']['_stats'] == {
            # 4 view events
            'views': 4.0,
            'version_views': 8.0,
            # 4 view events over 2 different hours
            'unique_views': 2.0,
            'version_unique_views': 2.0,
            # 4 download events * 3 files
            'downloads': 12.0,
            'version_downloads': 24.0,
            # 4 download events * 3 files over 2 different hours
            'unique_downloads': 2.0,
            'version_unique_downloads': 2.0,
            # 4 download events * 3 files * 10 bytes
            'volume': 120.0,
            'version_volume': 240.0,
        }
Beispiel #3
0
def test_piwik_exporter(app, db, es, locations, event_queues, full_record):
    records = create_stats_fixtures(
        metadata=full_record, n_records=1, n_versions=1, n_files=1,
        event_data={'user_id': '1', 'country': 'CH'},
        # 4 event timestamps
        start_date=datetime(2018, 1, 1, 13),
        end_date=datetime(2018, 1, 1, 15),
        interval=timedelta(minutes=30),
        do_process_events=True,
        do_aggregate_events=False,
        do_update_record_statistics=False
    )

    current_cache.delete('piwik_export:bookmark')
    bookmark = current_cache.get('piwik_export:bookmark')
    assert bookmark is None

    start_date = datetime(2018, 1, 1, 12)
    end_date = datetime(2018, 1, 1, 14)
    PiwikExporter().run(start_date=start_date, end_date=end_date)
    bookmark = current_cache.get('piwik_export:bookmark')
    assert bookmark == u'2018-01-01T14:00:00'

    PiwikExporter().run()
    bookmark = current_cache.get('piwik_export:bookmark')
    assert bookmark == u'2018-01-01T14:30:00'
Beispiel #4
0
def test_piwik_exporter_no_bookmark(app, db, es, locations, event_queues,
                                    full_record):
    records = create_stats_fixtures(
        metadata=full_record,
        n_records=1,
        n_versions=1,
        n_files=1,
        event_data={
            'user_id': '1',
            'country': 'CH'
        },
        # 4 event timestamps
        start_date=datetime(2018, 1, 1, 13),
        end_date=datetime(2018, 1, 1, 15),
        interval=timedelta(minutes=30),
        do_process_events=True)

    current_cache.delete('piwik_export:bookmark')
    bookmark = current_cache.get('piwik_export:bookmark')
    assert bookmark is None

    with mock.patch('zenodo.modules.stats.exporters.requests.post') as mocked:
        PiwikExporter().run()
        mocked.assert_not_called()
    bookmark = current_cache.get('piwik_export:bookmark')
    assert bookmark is None
Beispiel #5
0
def test_piwik_exporter_request_fail(app, db, es, locations, event_queues,
                                     full_record):
    records = create_stats_fixtures(
        metadata=full_record,
        n_records=1,
        n_versions=1,
        n_files=1,
        event_data={
            'user_id': '1',
            'country': 'CH'
        },
        # 4 event timestamps
        start_date=datetime(2018, 1, 1, 13),
        end_date=datetime(2018, 1, 1, 15),
        interval=timedelta(minutes=30),
        do_process_events=True)

    current_cache.delete('piwik_export:bookmark')
    bookmark = current_cache.get('piwik_export:bookmark')
    assert bookmark is None

    start_date = datetime(2018, 1, 1, 12)
    end_date = datetime(2018, 1, 1, 14)

    with pytest.raises(PiwikExportRequestError):
        PiwikExporter().run(start_date=start_date, end_date=end_date)
    bookmark = current_cache.get('piwik_export:bookmark')
    assert bookmark is None
Beispiel #6
0
def test_large_stats(app, db, es, locations, event_queues, minimal_record):
    """Test a larger number of events, aggregations, and results."""
    search = Search(using=es)
    records = create_stats_fixtures(
        # (3 * 4) -> 12 records and (3 * 4 * 2) -> 24 files
        metadata=minimal_record,
        n_records=3,
        n_versions=4,
        n_files=2,
        event_data={'user_id': '1'},
        # (31 + 30) * 2 -> 122 event timestamps (61 days and 2 events/day)
        start_date=datetime(2018, 3, 1),
        end_date=datetime(2018, 5, 1),
        interval=timedelta(hours=12))

    # Events indices
    prefix = app.config['SEARCH_INDEX_PREFIX']

    # 4 versions * 3 records * 2 files * 122 events -> 2928
    assert search.index(prefix + 'events-stats-file-download').count() == 2928
    # 4 versions * 3 records * 122 events -> 1464
    assert search.index(prefix + 'events-stats-record-view').count() == 1464

    # Aggregations indices
    # (4 versions + 1 concept) * 3 records -> 15 documents + 2 bookmarks
    q = search.index(prefix + 'stats-file-download')
    q = q.doc_type('file-download-day-aggregation')
    assert q.count() == 915  # 61 days * 15 records
    q = search.index(prefix + 'stats-record-view')
    q = q.doc_type('record-view-day-aggregation')
    assert q.count() == 915  # 61 days * 15 records

    # Records index
    for _, record, _ in records:
        doc = \
             current_search_client.get(
                index=build_alias_name('records'),
                id=str(record.id),
                params={'_source_includes': '_stats'}
                )
        assert doc['_source']['_stats'] == {
            # 4 view events
            'views': 122.0,
            'version_views': 488.0,
            # 4 view events over 2 different hours
            'unique_views': 122.0,
            'version_unique_views': 122.0,
            # 4 download events * 3 files
            'downloads': 244.0,
            'version_downloads': 976.0,
            # 4 download events * 3 files over 2 different hours
            'unique_downloads': 122.0,
            'version_unique_downloads': 122.0,
            # 4 download events * 3 files * 10 bytes
            'volume': 2440.0,
            'version_volume': 9760.0,
        }
def test_large_stats(app, db, es, locations, event_queues, minimal_record):
    """Test record page view event import."""
    search = Search(using=es)
    records = create_stats_fixtures(
        # (3 * 4) -> 12 records and (3 * 4 * 2) -> 24 files
        metadata=minimal_record,
        n_records=3,
        n_versions=4,
        n_files=2,
        event_data={'user_id': '1'},
        # (31 + 30) * 2 -> 122 event timestamps (61 days and 2 events/day)
        start_date=datetime(2018, 3, 1),
        end_date=datetime(2018, 5, 1),
        interval=timedelta(hours=12))

    # Events indices
    # 4 versions * 3 records * 2 files * 122 events -> 2928
    assert search.index('events-stats-file-download').count() == 2928
    # 4 versions * 3 records * 122 events -> 1464
    assert search.index('events-stats-record-view').count() == 1464

    # Aggregations indices
    # (4 versions + 1 concept) * 3 records -> 15 documents + 2 bookmarks
    q = search.index('stats-file-download')
    q = q.doc_type('file-download-day-aggregation')
    assert q.count() == 915  # 61 days * 15 records
    q = search.index('stats-record-view')
    q = q.doc_type('record-view-day-aggregation')
    assert q.count() == 915  # 61 days * 15 records

    # Reords index
    for _, record, _ in records:
        doc = (RecordsSearch().get_record(
            record.id).source(include='_stats').execute()[0])
        assert doc['_stats'] == {
            # 4 view events
            'views': 122.0,
            'version_views': 488.0,
            # 4 view events over 2 different hours
            'unique_views': 122.0,
            'version_unique_views': 122.0,
            # 4 download events * 3 files
            'downloads': 244.0,
            'version_downloads': 976.0,
            # 4 download events * 3 files over 2 different hours
            'unique_downloads': 122.0,
            'version_unique_downloads': 122.0,
            # 4 download events * 3 files * 10 bytes
            'volume': 2440.0,
            'version_volume': 9760.0,
        }
Beispiel #8
0
def test_basic_stats(app, db, es, locations, event_queues, minimal_record):
    """Test basic statistics results."""
    search = Search(using=es)
    records = create_stats_fixtures(
        # (10 * 2) -> 20 records and (10 * 2 * 3) -> 60 files
        metadata=minimal_record,
        n_records=10,
        n_versions=2,
        n_files=3,
        event_data={'user_id': '1'},
        # 4 event timestamps
        start_date=datetime(2018, 1, 1, 13),
        end_date=datetime(2018, 1, 1, 15),
        interval=timedelta(minutes=30))
    # Events indices
    # 2 versions * 10 records * 3 files * 4 events -> 240
    assert search.index('events-stats-file-download').count() == 240
    # 2 versions * 10 records * 4 events -> 80
    assert search.index('events-stats-record-view').count() == 80

    # Aggregations indices
    # (2 versions + 1 concept) * 10 records -> 30 documents + 2 bookmarks
    assert search.index('stats-file-download').count() == 32  # 2bm + 30d
    assert search.index('stats-record-view').count() == 32  # 2bm + 30d

    # Reords index
    for _, record, _ in records:
        doc = (RecordsSearch().get_record(
            record.id).source(include='_stats').execute()[0])
        assert doc['_stats'] == {
            # 4 view events
            'views': 4.0,
            'version_views': 8.0,
            # 4 view events over 2 different hours
            'unique_views': 2.0,
            'version_unique_views': 2.0,
            # 4 download events * 3 files
            'downloads': 12.0,
            'version_downloads': 24.0,
            # 4 download events * 3 files over 2 different hours
            'unique_downloads': 2.0,
            'version_unique_downloads': 2.0,
            # 4 download events * 3 files * 10 bytes
            'volume': 120.0,
            'version_volume': 240.0,
        }
Beispiel #9
0
def test_large_stats(app, db, es, locations, event_queues, minimal_record):
    """Test record page view event import."""
    search = Search(using=es)
    records = create_stats_fixtures(
        # (3 * 4) -> 12 records and (3 * 4 * 2) -> 24 files
        metadata=minimal_record, n_records=3, n_versions=4, n_files=2,
        event_data={'user_id': '1'},
        # (31 + 30) * 2 -> 122 event timestamps (61 days and 2 events/day)
        start_date=datetime(2018, 3, 1),
        end_date=datetime(2018, 5, 1),
        interval=timedelta(hours=12))

    # Events indices
    # 4 versions * 3 records * 2 files * 122 events -> 2928
    assert search.index('events-stats-file-download').count() == 2928
    # 4 versions * 3 records * 122 events -> 1464
    assert search.index('events-stats-record-view').count() == 1464

    # Aggregations indices
    # (4 versions + 1 concept) * 3 records -> 15 documents + 2 bookmarks
    q = search.index('stats-file-download')
    q = q.doc_type('file-download-day-aggregation')
    assert q.count() == 915  # 61 days * 15 records
    q = search.index('stats-record-view')
    q = q.doc_type('record-view-day-aggregation')
    assert q.count() == 915  # 61 days * 15 records

    # Reords index
    for _, record, _ in records:
        doc = (
            RecordsSearch().get_record(record.id)
            .source(include='_stats').execute()[0])
        assert doc['_stats'] == {
            # 4 view events
            'views': 122.0, 'version_views': 488.0,
            # 4 view events over 2 different hours
            'unique_views': 122.0, 'version_unique_views': 122.0,
            # 4 download events * 3 files
            'downloads': 244.0, 'version_downloads': 976.0,
            # 4 download events * 3 files over 2 different hours
            'unique_downloads': 122.0, 'version_unique_downloads': 122.0,
            # 4 download events * 3 files * 10 bytes
            'volume': 2440.0, 'version_volume': 9760.0,
        }
Beispiel #10
0
def test_piwik_exporter_no_bookmark(app, db, es, locations, event_queues,
                                    full_record):
    records = create_stats_fixtures(
        metadata=full_record, n_records=1, n_versions=1, n_files=1,
        event_data={'user_id': '1', 'country': 'CH'},
        # 4 event timestamps
        start_date=datetime(2018, 1, 1, 13),
        end_date=datetime(2018, 1, 1, 15),
        interval=timedelta(minutes=30),
        do_process_events=True)

    current_cache.delete('piwik_export:bookmark')
    bookmark = current_cache.get('piwik_export:bookmark')
    assert bookmark is None

    with mock.patch('zenodo.modules.stats.exporters.requests.post') as mocked:
        PiwikExporter().run()
        mocked.assert_not_called()
    bookmark = current_cache.get('piwik_export:bookmark')
    assert bookmark is None
Beispiel #11
0
def test_basic_stats(app, db, es, locations, event_queues, minimal_record):
    """Test basic statistics results."""
    search = Search(using=es)
    records = create_stats_fixtures(
        # (10 * 2) -> 20 records and (10 * 2 * 3) -> 60 files
        metadata=minimal_record, n_records=10, n_versions=2, n_files=3,
        event_data={'user_id': '1'},
        # 4 event timestamps
        start_date=datetime(2018, 1, 1, 13),
        end_date=datetime(2018, 1, 1, 15),
        interval=timedelta(minutes=30))
    # Events indices
    # 2 versions * 10 records * 3 files * 4 events -> 240
    assert search.index('events-stats-file-download').count() == 240
    # 2 versions * 10 records * 4 events -> 80
    assert search.index('events-stats-record-view').count() == 80

    # Aggregations indices
    # (2 versions + 1 concept) * 10 records -> 30 documents + 2 bookmarks
    assert search.index('stats-file-download').count() == 32  # 2bm + 30d
    assert search.index('stats-record-view').count() == 32  # 2bm + 30d

    # Reords index
    for _, record, _ in records:
        doc = (
            RecordsSearch().get_record(record.id)
            .source(include='_stats').execute()[0])
        assert doc['_stats'] == {
            # 4 view events
            'views': 4.0, 'version_views': 8.0,
            # 4 view events over 2 different hours
            'unique_views': 2.0, 'version_unique_views': 2.0,
            # 4 download events * 3 files
            'downloads': 12.0, 'version_downloads': 24.0,
            # 4 download events * 3 files over 2 different hours
            'unique_downloads': 2.0, 'version_unique_downloads': 2.0,
            # 4 download events * 3 files * 10 bytes
            'volume': 120.0, 'version_volume': 240.0,
        }
Beispiel #12
0
def test_piwik_exporter_request_fail(app, db, es, locations, event_queues,
                                     full_record):
    records = create_stats_fixtures(
        metadata=full_record, n_records=1, n_versions=1, n_files=1,
        event_data={'user_id': '1', 'country': 'CH'},
        # 4 event timestamps
        start_date=datetime(2018, 1, 1, 13),
        end_date=datetime(2018, 1, 1, 15),
        interval=timedelta(minutes=30),
        do_process_events=True)

    current_cache.delete('piwik_export:bookmark')
    bookmark = current_cache.get('piwik_export:bookmark')
    assert bookmark is None

    start_date = datetime(2018, 1, 1, 12)
    end_date = datetime(2018, 1, 1, 14)

    with pytest.raises(PiwikExportRequestError):
        PiwikExporter().run(start_date=start_date, end_date=end_date)
    bookmark = current_cache.get('piwik_export:bookmark')
    assert bookmark is None
Beispiel #13
0
def test_update_record_statistics(app, db, es, locations, event_queues,
                                  minimal_record):
    """Test record statistics update task."""
    records = create_stats_fixtures(
        metadata=minimal_record, n_records=1, n_versions=5, n_files=3,
        event_data={'user_id': '1'},
        # 4 event timestamps
        start_date=datetime(2018, 1, 1, 13),
        end_date=datetime(2018, 1, 1, 15),
        interval=timedelta(minutes=30),
        # This also runs the task we want to test and indexes records.
        do_update_record_statistics=True)

    expected_stats = {
        'views': 4.0,
        'version_views': 20.0,
        'unique_views': 2.0,
        'version_unique_views': 2.0,
        'downloads': 12.0,
        'version_downloads': 60.0,
        'unique_downloads': 2.0,
        'version_unique_downloads': 2.0,
        'volume': 120.0,
        'version_volume': 600.0,
    }

    # Check current stats for all records
    for recid, _, _ in records:
        stats = get_record_stats(recid.object_uuid)
        assert stats == expected_stats

    # Perform a view and all-files download today on the first version
    recid_v1, record_v1, file_objects_v1 = records[0]
    with app.test_client() as client:
        for f in file_objects_v1:
            file_url = url_for('invenio_records_ui.recid_files',
                               pid_value=recid_v1.pid_value, filename=f.key)
            assert client.get(file_url).status_code == 200
        record_url = url_for(
            'invenio_records_ui.recid', pid_value=recid_v1.pid_value)
        assert client.get(record_url).status_code == 200

    process_events(['record-view', 'file-download'])
    current_search.flush_and_refresh(index='events-stats-*')
    aggregate_events(
        ['record-view-agg', 'record-view-all-versions-agg',
         'record-download-agg', 'record-download-all-versions-agg'])
    current_search.flush_and_refresh(index='stats-*')
    update_record_statistics()
    RecordIndexer().process_bulk_queue()
    current_search.flush_and_refresh(index='records')

    # Check current stats for all records
    stats = get_record_stats(recid_v1.object_uuid)
    assert stats == {
        'views': 5.0,
        'version_views': 21.0,
        'unique_views': 3.0,
        'version_unique_views': 3.0,
        'downloads': 15.0,
        'version_downloads': 63.0,
        'unique_downloads': 3.0,
        'version_unique_downloads': 3.0,
        'volume': 150.0,
        'version_volume': 630.0,
    }

    # Other versions will have only their `version_*` statistics updated
    expected_stats['version_views'] += 1
    expected_stats['version_unique_views'] += 1
    expected_stats['version_downloads'] += 3
    expected_stats['version_unique_downloads'] += 1
    expected_stats['version_volume'] += 30
    for recid, _, _ in records[1:]:
        stats = get_record_stats(recid.object_uuid)
        assert stats == expected_stats
Beispiel #14
0
def test_update_record_statistics(app, db, es, locations, event_queues,
                                  minimal_record):
    """Test record statistics update task."""
    records = create_stats_fixtures(
        metadata=minimal_record,
        n_records=1,
        n_versions=5,
        n_files=3,
        event_data={'user_id': '1'},
        # 4 event timestamps (half-hours between 13:00-15:00)
        start_date=datetime(2018, 1, 1, 13),
        end_date=datetime(2018, 1, 1, 15),
        interval=timedelta(minutes=30),
        # This also runs the task we want to test and indexes records.
        do_update_record_statistics=True)

    expected_stats = {
        'views': 4.0,
        'version_views': 20.0,
        'unique_views': 2.0,
        'version_unique_views': 2.0,
        'downloads': 12.0,
        'version_downloads': 60.0,
        'unique_downloads': 2.0,
        'version_unique_downloads': 2.0,
        'volume': 120.0,
        'version_volume': 600.0,
    }

    # Check current stats for all records
    for recid, _, _ in records:
        stats = get_record_stats(recid.object_uuid)
        assert stats == expected_stats

    # Perform a view and all-files download today on the first version
    recid_v1, record_v1, file_objects_v1 = records[0]
    with app.test_client() as client:
        for f in file_objects_v1:
            file_url = url_for('invenio_records_ui.recid_files',
                               pid_value=recid_v1.pid_value,
                               filename=f.key)
            assert client.get(file_url).status_code == 200
        record_url = url_for('invenio_records_ui.recid',
                             pid_value=recid_v1.pid_value)
        assert client.get(record_url).status_code == 200

    process_events(['record-view', 'file-download'])
    current_search.flush_and_refresh(index='events-stats-*')
    aggregate_events([
        'record-view-agg', 'record-view-all-versions-agg',
        'record-download-agg', 'record-download-all-versions-agg'
    ])
    current_search.flush_and_refresh(index='stats-*')
    update_record_statistics()
    RecordIndexer().process_bulk_queue()
    current_search.flush_and_refresh(index='records')

    # Check current stats for all records
    stats = get_record_stats(recid_v1.object_uuid)
    assert stats == {
        'views': 5.0,
        'version_views': 21.0,
        'unique_views': 3.0,
        'version_unique_views': 3.0,
        'downloads': 15.0,
        'version_downloads': 63.0,
        'unique_downloads': 3.0,
        'version_unique_downloads': 3.0,
        'volume': 150.0,
        'version_volume': 630.0,
    }

    # Other versions will have only their `version_*` statistics updated
    expected_stats['version_views'] += 1
    expected_stats['version_unique_views'] += 1
    expected_stats['version_downloads'] += 3
    expected_stats['version_unique_downloads'] += 1
    expected_stats['version_volume'] += 30
    for recid, _, _ in records[1:]:
        stats = get_record_stats(recid.object_uuid)
        assert stats == expected_stats