コード例 #1
0
ファイル: test_event_emitters.py プロジェクト: topless/zenodo
def test_file_download(app, db, es, event_queues, record_with_files_creation):
    """Test file download views."""
    recid, record, _ = record_with_files_creation
    record['conceptdoi'] = '10.1234/foo.concept'
    record['conceptrecid'] = 'foo.concept'
    record.commit()
    db.session.commit()

    with app.test_client() as client:
        file_url = url_for(
            'invenio_records_ui.recid_files',
            pid_value=recid.pid_value,
            filename='Test.pdf',
        )
        assert client.get(file_url).status_code == 200

    process_events(['file-download'])
    current_search.flush_and_refresh(index='events-stats-file-download')

    search = Search(using=es, index='events-stats-file-download')
    assert search.count() == 1
    doc = search.execute()[0]
    assert doc['doi'] == '10.1234/foo.bar'
    assert doc['conceptdoi'] == '10.1234/foo.concept'
    assert doc['recid'] == '12345'
    assert doc['conceptrecid'] == 'foo.concept'
    assert doc['resource_type'] == {'type': 'publication', 'subtype': 'book'}
    assert doc['access_right'] == 'open'
    assert doc['communities'] == ['zenodo']
    assert doc['owners'] == [1]
コード例 #2
0
ファイル: test_event_emitters.py プロジェクト: topless/zenodo
def test_record_page(app, db, es, event_queues, full_record):
    """Test record page views."""
    full_record['conceptdoi'] = '10.1234/foo.concept'
    full_record['conceptrecid'] = 'foo.concept'
    r = Record.create(full_record)
    PersistentIdentifier.create('recid',
                                '12345',
                                object_type='rec',
                                object_uuid=r.id,
                                status=PIDStatus.REGISTERED)
    db.session.commit()

    with app.test_client() as client:
        record_url = url_for('invenio_records_ui.recid', pid_value='12345')
        assert client.get(record_url).status_code == 200

    process_events(['record-view'])
    current_search.flush_and_refresh(index='events-stats-record-view')

    search = Search(using=es, index='events-stats-record-view')
    assert search.count() == 1
    doc = search.execute()[0]
    assert doc['doi'] == '10.1234/foo.bar'
    assert doc['conceptdoi'] == '10.1234/foo.concept'
    assert doc['recid'] == '12345'
    assert doc['conceptrecid'] == 'foo.concept'
    assert doc['resource_type'] == {'type': 'publication', 'subtype': 'book'}
    assert doc['access_right'] == 'open'
    assert doc['communities'] == ['zenodo']
    assert doc['owners'] == [1]
コード例 #3
0
ファイル: test_event_emitters.py プロジェクト: slint/zenodo
def test_file_download(app, db, es, event_queues, record_with_files_creation):
    """Test file download views."""
    recid, record, _ = record_with_files_creation
    record['conceptdoi'] = '10.1234/foo.concept'
    record['conceptrecid'] = 'foo.concept'
    record.commit()
    db.session.commit()

    with app.test_client() as client:
        file_url = url_for(
            'invenio_records_ui.recid_files',
            pid_value=recid.pid_value,
            filename='Test.pdf',
        )
        assert client.get(file_url).status_code == 200

    process_events(['file-download'])
    current_search.flush_and_refresh(index='events-stats-file-download')

    search = Search(using=es, index='events-stats-file-download')
    assert search.count() == 1
    doc = search.execute()[0]
    assert doc['doi'] == '10.1234/foo.bar'
    assert doc['conceptdoi'] == '10.1234/foo.concept'
    assert doc['recid'] == '12345'
    assert doc['conceptrecid'] == 'foo.concept'
    assert doc['resource_type'] == {'type': 'publication', 'subtype': 'book'}
    assert doc['access_right'] == 'open'
    assert doc['communities'] == ['zenodo']
    assert doc['owners'] == [1]
コード例 #4
0
ファイル: test_event_emitters.py プロジェクト: slint/zenodo
def test_record_page(app, db, es, event_queues, full_record):
    """Test record page views."""
    full_record['conceptdoi'] = '10.1234/foo.concept'
    full_record['conceptrecid'] = 'foo.concept'
    r = Record.create(full_record)
    PersistentIdentifier.create(
        'recid', '12345', object_type='rec', object_uuid=r.id,
        status=PIDStatus.REGISTERED)
    db.session.commit()

    with app.test_client() as client:
        record_url = url_for('invenio_records_ui.recid', pid_value='12345')
        assert client.get(record_url).status_code == 200

    process_events(['record-view'])
    current_search.flush_and_refresh(index='events-stats-record-view')

    search = Search(using=es, index='events-stats-record-view')
    assert search.count() == 1
    doc = search.execute()[0]
    assert doc['doi'] == '10.1234/foo.bar'
    assert doc['conceptdoi'] == '10.1234/foo.concept'
    assert doc['recid'] == '12345'
    assert doc['conceptrecid'] == 'foo.concept'
    assert doc['resource_type'] == {'type': 'publication', 'subtype': 'book'}
    assert doc['access_right'] == 'open'
    assert doc['communities'] == ['zenodo']
    assert doc['owners'] == [1]
コード例 #5
0
ファイル: app.py プロジェクト: lnielsen/invenio-stats
def events():
    # Create events
    nb_days = 20
    day = datetime(2016, 12, 1, 0, 0, 0)
    max_events = 10
    random.seed(42)
    for _ in range(nb_days):
        publish_filedownload(random.randrange(1, max_events),
                             1, 'file1.txt', 1, 20, day)
        publish_filedownload(random.randrange(1, max_events),
                             1, 'file2.txt', 2, 20, day)
        day = day + timedelta(days=1)

    process_events(['file-download'])
    # flush elasticsearch indices so that the events become searchable
    current_search_client.indices.flush(index='*')
コード例 #6
0
def test_double_clicks(app, mock_event_queue, es):
    """Test that events occurring within a time window are counted as 1."""
    event_type = 'file-download'
    events = [
        _create_file_download_event(date)
        for date in [(2000, 6, 1, 10, 0,
                      10), (2000, 6, 1, 10, 0,
                            11), (2000, 6, 1, 10, 0,
                                  19), (2000, 6, 1, 10, 0, 22)]
    ]
    current_queues.declare()
    current_stats.publish(event_type, events)
    process_events(['file-download'])
    es.indices.refresh(index='*')
    res = es.search(index='events-stats-file-download-2000-06-01', )
    assert res['hits']['total'] == 2
コード例 #7
0
def events():
    # Create events
    nb_days = 20
    day = datetime(2016, 12, 1, 0, 0, 0)
    max_events = 10
    random.seed(42)
    for _ in range(nb_days):
        publish_filedownload(random.randrange(1, max_events), 1, 'file1.txt',
                             1, 20, day)
        publish_filedownload(random.randrange(1, max_events), 1, 'file2.txt',
                             2, 20, day)
        day = day + timedelta(days=1)

    process_events(['file-download'])
    # flush elasticsearch indices so that the events become searchable
    current_search_client.indices.flush(index='*')
コード例 #8
0
def test_metric_aggregations(app, event_queues, es_with_templates):
    """Test aggregation metrics."""
    es = es_with_templates
    current_stats.publish('file-download', [
        _create_file_download_event(date, user_id='1')
        for date in [(2018, 1, 1, 12,
                      10), (2018, 1, 1, 12,
                            20), (2018, 1, 1, 12,
                                  30), (2018, 1, 1, 13,
                                        10), (2018, 1, 1, 13,
                                              20), (2018, 1, 1, 13,
                                                    30), (2018, 1, 1, 14, 10),
                     (2018, 1, 1, 14,
                      20), (2018, 1, 1, 14,
                            30), (2018, 1, 1, 15,
                                  10), (2018, 1, 1, 15,
                                        20), (2018, 1, 1, 15, 30)]
    ])
    process_events(['file-download'])
    es.indices.refresh(index='*')

    StatAggregator(name='file-download-agg',
                   client=current_search_client,
                   event='file-download',
                   aggregation_field='file_id',
                   metric_aggregation_fields={
                       'unique_count': ('cardinality', 'unique_session_id', {
                           'precision_threshold': 1000
                       }),
                       'volume': ('sum', 'size', {})
                   },
                   aggregation_interval='day').run()
    es.indices.refresh(index='*')

    query = Search(using=current_search_client,
                   index='stats-file-download',
                   doc_type='file-download-day-aggregation')

    results = query.execute()
    assert len(results) == 1
    assert results[0].count == 12  # 3 views over 4 differnet hour slices
    assert results[0].unique_count == 4  # 4 different hour slices accessed
    assert results[0].volume == 9000 * 12
コード例 #9
0
def test_overwriting_aggregations(app, es, event_queues, sequential_ids):
    """Check that the StatAggregator correctly starts from bookmark.

    1. Create sample file download event and process it.
    2. Run aggregator and write count, in aggregation index.
    3. Create new events and repeat procedure to assert that the
        results within the interval of the previous events
        overwrite the aggregation,
        by checking that the document version has increased.
    """
    for t in current_search.put_templates(ignore=[400]):
        pass

    class NewDate(datetime.datetime):
        """datetime.datetime mock."""
        # Aggregate at 12:00, thus the day will be aggregated again later
        current_date = (2017, 6, 2, 12)

        @classmethod
        def utcnow(cls):
            return cls(*cls.current_date)

    # Send some events
    event_type = 'file-download'
    events = [_create_file_download_event(date) for date in
              [(2017, 6, 1), (2017, 6, 2, 10)]]
    current_queues.declare()
    current_stats.publish(event_type, events)
    process_events(['file-download'])
    current_search_client.indices.flush(index='*')
    with patch('datetime.datetime', NewDate):
        aggregate_events(['file-download-agg'])

    # Send new events, some on the last aggregated day and some far
    # in the future.
    res = current_search_client.search(index='stats-file-download',
                                       version=True)
    for hit in res['hits']['hits']:
        if 'file_id' in hit['_source'].keys():
            assert hit['_version'] == 1

    new_events = [_create_file_download_event(date) for date in
                  [(2017, 6, 2, 15),  # second event on the same date
                   (2017, 7, 1)]]
    current_stats.publish(event_type, new_events)
    process_events(['file-download'])
    current_search_client.indices.flush(index='*')

    # Aggregate again. The aggregation should start from the last bookmark.
    NewDate.current_date = (2017, 7, 2)
    with patch('datetime.datetime', NewDate):
        aggregate_events(['file-download-agg'])
    current_search_client.indices.flush(index='*')

    res = current_search_client.search(
        index='stats-file-download',
        doc_type='file-download-day-aggregation',
        version=True
    )
    for hit in res['hits']['hits']:
        if hit['_source']['timestamp'] == '2017-06-02T00:00:00':
            assert hit['_version'] == 2
            assert hit['_source']['count'] == 2
        else:
            assert hit['_version'] == 1
コード例 #10
0
def test_overwriting_aggregations(app, es, event_queues, sequential_ids):
    """Check that the StatAggregator correctly starts from bookmark.

    1. Create sample file download event and process it.
    2. Run aggregator and write count, in aggregation index.
    3. Create new events and repeat procedure to assert that the
        results within the interval of the previous events
        overwrite the aggregation,
        by checking that the document version has increased.
    """
    for t in current_search.put_templates(ignore=[400]):
        pass

    class NewDate(datetime.datetime):
        """datetime.datetime mock."""
        # Aggregate at 12:00, thus the day will be aggregated again later
        current_date = (2017, 6, 2, 12)

        @classmethod
        def utcnow(cls):
            return cls(*cls.current_date)

    # Send some events
    event_type = 'file-download'
    events = [
        _create_file_download_event(date)
        for date in [(2017, 6, 1), (2017, 6, 2, 10)]
    ]
    current_queues.declare()
    current_stats.publish(event_type, events)
    process_events(['file-download'])
    current_search_client.indices.flush(index='*')
    with patch('datetime.datetime', NewDate):
        aggregate_events(['file-download-agg'])

    # Send new events, some on the last aggregated day and some far
    # in the future.
    res = current_search_client.search(index='stats-file-download',
                                       version=True)
    for hit in res['hits']['hits']:
        if 'file_id' in hit['_source'].keys():
            assert hit['_version'] == 1

    new_events = [
        _create_file_download_event(date) for date in [
            (2017, 6, 2, 15),  # second event on the same date
            (2017, 7, 1)
        ]
    ]
    current_stats.publish(event_type, new_events)
    process_events(['file-download'])
    current_search_client.indices.flush(index='*')

    # Aggregate again. The aggregation should start from the last bookmark.
    NewDate.current_date = (2017, 7, 2)
    with patch('datetime.datetime', NewDate):
        aggregate_events(['file-download-agg'])
    current_search_client.indices.flush(index='*')

    res = current_search_client.search(
        index='stats-file-download',
        doc_type='file-download-day-aggregation',
        version=True)
    for hit in res['hits']['hits']:
        if hit['_source']['timestamp'] == '2017-06-02T00:00:00':
            assert hit['_version'] == 2
            assert hit['_source']['count'] == 2
        else:
            assert hit['_version'] == 1
コード例 #11
0
ファイル: test_stats_tasks.py プロジェクト: slint/zenodo
def test_update_record_statistics(app, db, es, locations, event_queues,
                                  minimal_record):
    """Test record statistics update task."""
    records = create_stats_fixtures(
        metadata=minimal_record, n_records=1, n_versions=5, n_files=3,
        event_data={'user_id': '1'},
        # 4 event timestamps
        start_date=datetime(2018, 1, 1, 13),
        end_date=datetime(2018, 1, 1, 15),
        interval=timedelta(minutes=30),
        # This also runs the task we want to test and indexes records.
        do_update_record_statistics=True)

    expected_stats = {
        'views': 4.0,
        'version_views': 20.0,
        'unique_views': 2.0,
        'version_unique_views': 2.0,
        'downloads': 12.0,
        'version_downloads': 60.0,
        'unique_downloads': 2.0,
        'version_unique_downloads': 2.0,
        'volume': 120.0,
        'version_volume': 600.0,
    }

    # Check current stats for all records
    for recid, _, _ in records:
        stats = get_record_stats(recid.object_uuid)
        assert stats == expected_stats

    # Perform a view and all-files download today on the first version
    recid_v1, record_v1, file_objects_v1 = records[0]
    with app.test_client() as client:
        for f in file_objects_v1:
            file_url = url_for('invenio_records_ui.recid_files',
                               pid_value=recid_v1.pid_value, filename=f.key)
            assert client.get(file_url).status_code == 200
        record_url = url_for(
            'invenio_records_ui.recid', pid_value=recid_v1.pid_value)
        assert client.get(record_url).status_code == 200

    process_events(['record-view', 'file-download'])
    current_search.flush_and_refresh(index='events-stats-*')
    aggregate_events(
        ['record-view-agg', 'record-view-all-versions-agg',
         'record-download-agg', 'record-download-all-versions-agg'])
    current_search.flush_and_refresh(index='stats-*')
    update_record_statistics()
    RecordIndexer().process_bulk_queue()
    current_search.flush_and_refresh(index='records')

    # Check current stats for all records
    stats = get_record_stats(recid_v1.object_uuid)
    assert stats == {
        'views': 5.0,
        'version_views': 21.0,
        'unique_views': 3.0,
        'version_unique_views': 3.0,
        'downloads': 15.0,
        'version_downloads': 63.0,
        'unique_downloads': 3.0,
        'version_unique_downloads': 3.0,
        'volume': 150.0,
        'version_volume': 630.0,
    }

    # Other versions will have only their `version_*` statistics updated
    expected_stats['version_views'] += 1
    expected_stats['version_unique_views'] += 1
    expected_stats['version_downloads'] += 3
    expected_stats['version_unique_downloads'] += 1
    expected_stats['version_volume'] += 30
    for recid, _, _ in records[1:]:
        stats = get_record_stats(recid.object_uuid)
        assert stats == expected_stats
コード例 #12
0
def create_stats_fixtures(metadata, n_records, n_versions, n_files,
                          event_data, start_date, end_date, interval,
                          do_process_events=True, do_aggregate_events=True,
                          do_update_record_statistics=True):
    """Generate configurable statistics fixtures.

    :param dict metadata: Base metadata for the created records.
    :param int n_records: Number of records that will be created.
    :param int n_versions: Number of versions for each record.
    :param int n_files: Number of files for each record version.
    :param dict event_data: Base event metadata (e.g. user, user agent, etc).
    :param datetime start_date: Start date for the generated events.
    :param datetime end_date: End date for the generated events.
    :param timedelta interval: Interval between each group of events.
    :param bool do_process_events: ``True`` will run the ``process_events``
        task.
    :param bool do_aggregate_events: ``True`` will run the ``aggregate_events``
        task.
    :param bool do_update_record_statistics: ``True`` will run the
        ``update_record_statistics`` task.
    """
    records = _create_records(
        metadata, total=n_records, versions=n_versions, files=n_files)

    @contextmanager
    def _patch_stats_publish():
        original_publish = current_stats.publish

        event_batches = defaultdict(list)

        def _patched_publish(self, event_type, events):
            events[0].update(event_data)
            event_batches[event_type].append(events[0])
        current_stats.publish = MethodType(_patched_publish, current_stats)
        yield
        current_stats.publish = original_publish
        for event_type, events in event_batches.items():
            current_stats.publish(event_type, events)

    with _patch_stats_publish():
        for ts in _gen_date_range(start_date, end_date, interval):
            event_data['timestamp'] = ts.isoformat()
            for recid, record, file_objects in records:
                with current_app.test_request_context():
                    record_viewed.send(current_app._get_current_object(),
                                       pid=recid, record=record)
                    for obj in file_objects:
                        file_downloaded.send(
                            current_app._get_current_object(),
                            obj=obj, record=record)
    if do_process_events:
        process_events(['record-view', 'file-download'])
        current_search.flush_and_refresh(index='events-stats-*')

    if do_aggregate_events:
        aggregate_events(
            ['record-view-agg', 'record-view-all-versions-agg',
             'record-download-agg', 'record-download-all-versions-agg'])
        current_search.flush_and_refresh(index='stats-*')

    if do_update_record_statistics:
        update_record_statistics(start_date=start_date.isoformat(),
                                 end_date=end_date.isoformat())
        RecordIndexer().process_bulk_queue()
        current_search.flush_and_refresh(index='records')

    return records
コード例 #13
0
def test_update_record_statistics(app, db, es, locations, event_queues,
                                  minimal_record):
    """Test record statistics update task."""
    records = create_stats_fixtures(
        metadata=minimal_record,
        n_records=1,
        n_versions=5,
        n_files=3,
        event_data={'user_id': '1'},
        # 4 event timestamps (half-hours between 13:00-15:00)
        start_date=datetime(2018, 1, 1, 13),
        end_date=datetime(2018, 1, 1, 15),
        interval=timedelta(minutes=30),
        # This also runs the task we want to test and indexes records.
        do_update_record_statistics=True)

    expected_stats = {
        'views': 4.0,
        'version_views': 20.0,
        'unique_views': 2.0,
        'version_unique_views': 2.0,
        'downloads': 12.0,
        'version_downloads': 60.0,
        'unique_downloads': 2.0,
        'version_unique_downloads': 2.0,
        'volume': 120.0,
        'version_volume': 600.0,
    }

    # Check current stats for all records
    for recid, _, _ in records:
        stats = get_record_stats(recid.object_uuid)
        assert stats == expected_stats

    # Perform a view and all-files download today on the first version
    recid_v1, record_v1, file_objects_v1 = records[0]
    with app.test_client() as client:
        for f in file_objects_v1:
            file_url = url_for('invenio_records_ui.recid_files',
                               pid_value=recid_v1.pid_value,
                               filename=f.key)
            assert client.get(file_url).status_code == 200
        record_url = url_for('invenio_records_ui.recid',
                             pid_value=recid_v1.pid_value)
        assert client.get(record_url).status_code == 200

    process_events(['record-view', 'file-download'])
    current_search.flush_and_refresh(index='events-stats-*')
    aggregate_events([
        'record-view-agg', 'record-view-all-versions-agg',
        'record-download-agg', 'record-download-all-versions-agg'
    ])
    current_search.flush_and_refresh(index='stats-*')
    update_record_statistics()
    RecordIndexer().process_bulk_queue()
    current_search.flush_and_refresh(index='records')

    # Check current stats for all records
    stats = get_record_stats(recid_v1.object_uuid)
    assert stats == {
        'views': 5.0,
        'version_views': 21.0,
        'unique_views': 3.0,
        'version_unique_views': 3.0,
        'downloads': 15.0,
        'version_downloads': 63.0,
        'unique_downloads': 3.0,
        'version_unique_downloads': 3.0,
        'volume': 150.0,
        'version_volume': 630.0,
    }

    # Other versions will have only their `version_*` statistics updated
    expected_stats['version_views'] += 1
    expected_stats['version_unique_views'] += 1
    expected_stats['version_downloads'] += 3
    expected_stats['version_unique_downloads'] += 1
    expected_stats['version_volume'] += 30
    for recid, _, _ in records[1:]:
        stats = get_record_stats(recid.object_uuid)
        assert stats == expected_stats
コード例 #14
0
def test_file_download_statistics(app, test_community, test_users,
                                  test_records, login_user):
    """Test checking a record's DOI using CLI commands."""
    with app.app_context():

        def url_for(*args, **kwargs):
            """Generate url using flask.url_for and the current app ctx."""
            with app.app_context():
                return flask_url_for(*args, **kwargs)

        # create user that will create the record and the files
        scopes = current_oauth2server.scope_choices()

        allowed_user = create_user('allowed')

        scopes = current_oauth2server.scope_choices()
        allowed_token = Token.create_personal('allowed_token',
                                              allowed_user.id,
                                              scopes=[s[0] for s in scopes])
        # application authentication token header
        allowed_headers = [('Authorization',
                            'Bearer {}'.format(allowed_token.access_token))]

        community_name = 'MyTestCommunity1'
        community = Community.get(name=community_name)
        com_admin = create_user('com_admin2', roles=[community.admin_role])
        com_admin_token = Token.create_personal('com_admin_token',
                                                com_admin.id,
                                                scopes=[s[0] for s in scopes])
        # application authentication token header
        com_admin_headers = [
            ('Authorization',
             'Bearer {}'.format(com_admin_token.access_token)),
            ('User-Agent', 'Mozilla/5.0 (Windows NT 6.1; WOW64) '
             'AppleWebKit/537.36 (KHTML, like Gecko)'
             'Chrome/45.0.2454.101 Safari/537.36')
        ]
        publish_headers = [('Content-Type', 'application/json-patch+json'),
                           ('Accept', 'application/json')] + com_admin_headers
        submit_headers = [('Content-Type', 'application/json-patch+json'),
                          ('Accept', 'application/json')] + allowed_headers
        stats_headers = [('Content-Type', 'application/json')]

        test_records_data = [
            generate_record_data(community=test_community.name)
            for idx in range(1, 3)
        ]

        for record_data in test_records_data:
            with app.test_client() as client:
                login_user(allowed_user, client)

                record_list_url = (lambda **kwargs: url_for(
                    'b2share_records_rest.b2rec_list', **kwargs))

                headers = [('Content-Type', 'application/json'),
                           ('Accept', 'application/json')] + allowed_headers
                draft_create_res = client.post(record_list_url(),
                                               data=json.dumps(record_data),
                                               headers=headers)
                assert draft_create_res.status_code == 201
                draft_create_data = json.loads(
                    draft_create_res.get_data(as_text=True))

                uploaded_files = {
                    'myfile1.html': b'contents1',
                    'myfile2.html': b'contents2'
                }

                for file_key, file_content in uploaded_files.items():
                    # Test file upload
                    headers = [('Accept', '*/*'),
                               ('Content-Type', 'text/html; charset=utf-8')
                               ] + allowed_headers
                    object_url = '{0}/{1}'.format(
                        draft_create_data['links']['files'], file_key)
                    file_put_res = client.put(
                        object_url,
                        input_stream=BytesIO(file_content),
                        headers=headers)
                    assert file_put_res.status_code == 200
                    file_put_data = json.loads(
                        file_put_res.get_data(as_text=True))
                    assert 'created' in file_put_data

                    bucket_id = draft_create_data['links']['files'].split(
                        '/')[-1]
                    # make sure that downloads from deposits are skipped
                    client.get(
                        url_for('invenio_files_rest.object_api',
                                bucket_id=bucket_id,
                                key=file_key))
                    assert process_events(['file-download']) == \
                        [('file-download', (0, 0))]

                # test draft submit
                draft_submit_res = client.patch(
                    url_for('b2share_deposit_rest.b2dep_item',
                            pid_value=draft_create_data['id']),
                    data=json.dumps([{
                        "op": "replace",
                        "path": "/publication_state",
                        "value": PublicationStates.submitted.name
                    }]),
                    headers=submit_headers)
                assert draft_submit_res.status_code == 200

            with app.test_client() as client:
                login_user(com_admin, client)
                # test draft publish
                draft_publish_res = client.patch(
                    url_for('b2share_deposit_rest.b2dep_item',
                            pid_value=draft_create_data['id']),
                    data=json.dumps([{
                        "op": "replace",
                        "path": "/publication_state",
                        "value": PublicationStates.published.name
                    }]),
                    headers=publish_headers)

                assert draft_publish_res.status_code == 200
                draft_publish_data = json.loads(
                    draft_publish_res.get_data(as_text=True))

                # Test record GET
                record_get_res = client.get(url_for(
                    'b2share_records_rest.b2rec_item',
                    pid_value=draft_publish_data['id']),
                                            headers=headers)
                assert record_get_res.status_code == 200
                record_get_data = json.loads(
                    record_get_res.get_data(as_text=True))

                # make sure that templates are in the ES
                list(current_search.put_templates())

                # test that a record is accessible through the rest api
                file1 = record_get_data['files'][0]

                # download once
                client.get(url_for('invenio_files_rest.object_api',
                                   bucket_id=file1['bucket'],
                                   key=file1['key']),
                           headers=com_admin_headers)
                # make sure that the queue contains the event
                assert list(
                    current_queues.queues['stats-file-download'].consume())

                # download again
                client.get(url_for('invenio_files_rest.object_api',
                                   bucket_id=file1['bucket'],
                                   key=file1['key']),
                           headers=com_admin_headers)

                process_events(['file-download'])
                current_search_client.indices.refresh('*')
                # make sure that new index for events is created in ES
                current_search_client.indices.exists(
                    index='events-stats-file-download')

                aggregate_events(['file-download-agg'])
                current_search_client.indices.refresh('*')

                # make sure that new aggregation index is created in ES
                current_search_client.indices.exists(
                    index='stats-file-download')

                stats_ret = client.post(url_for('invenio_stats.stat_query'),
                                        data=json.dumps({
                                            'mystat': {
                                                'stat':
                                                'bucket-file-download-total',
                                                'params': {
                                                    'start_date': '2017-01-01',
                                                    'bucket_id':
                                                    file1['bucket'],
                                                }
                                            }
                                        }),
                                        headers=stats_headers)
                stats_ret_data = json.loads(stats_ret.get_data(as_text=True))
                assert stats_ret_data['mystat']['buckets'][0]['value'] == 1.0
コード例 #15
0
ファイル: stats_helpers.py プロジェクト: zenodo/zenodo
def create_stats_fixtures(metadata, n_records, n_versions, n_files,
                          event_data, start_date, end_date, interval,
                          do_process_events=True, do_aggregate_events=True,
                          do_update_record_statistics=True):
    """Generate configurable statistics fixtures.

    :param dict metadata: Base metadata for the created records.
    :param int n_records: Number of records that will be created.
    :param int n_versions: Number of versions for each record.
    :param int n_files: Number of files for each record version.
    :param dict event_data: Base event metadata (e.g. user, user agent, etc).
    :param datetime start_date: Start date for the generated events.
    :param datetime end_date: End date for the generated events.
    :param timedelta interval: Interval between each group of events.
    :param bool do_process_events: ``True`` will run the ``process_events``
        task.
    :param bool do_aggregate_events: ``True`` will run the ``aggregate_events``
        task.
    :param bool do_update_record_statistics: ``True`` will run the
        ``update_record_statistics`` task.
    """
    records = _create_records(
        metadata, total=n_records, versions=n_versions, files=n_files)

    @contextmanager
    def _patch_stats_publish():
        original_publish = current_stats.publish

        event_batches = defaultdict(list)

        def _patched_publish(self, event_type, events):
            events[0].update(event_data)
            event_batches[event_type].append(events[0])
        current_stats.publish = MethodType(_patched_publish, current_stats)
        yield
        current_stats.publish = original_publish
        for event_type, events in event_batches.items():
            current_stats.publish(event_type, events)

    with _patch_stats_publish():
        for ts in _gen_date_range(start_date, end_date, interval):
            event_data['timestamp'] = ts.isoformat()
            for recid, record, file_objects in records:
                with current_app.test_request_context():
                    record_viewed.send(current_app._get_current_object(),
                                       pid=recid, record=record)
                    for obj in file_objects:
                        file_downloaded.send(
                            current_app._get_current_object(),
                            obj=obj, record=record)
    if do_process_events:
        process_events(['record-view', 'file-download'])
        current_search.flush_and_refresh(index='events-stats-*')

    if do_aggregate_events:
        aggregate_events(
            ['record-view-agg', 'record-view-all-versions-agg',
             'record-download-agg', 'record-download-all-versions-agg'])
        current_search.flush_and_refresh(index='stats-*')

    if do_update_record_statistics:
        update_record_statistics(start_date=start_date.isoformat(),
                                 end_date=end_date.isoformat())
        RecordIndexer().process_bulk_queue()
        current_search.flush_and_refresh(index='records')

    return records