def test_create_put_and_delete(app):
    runner = CliRunner()
    script_info = ScriptInfo(create_app=lambda: app)
    name = "test-index-name"

    result = runner.invoke(
        cmd,
        [
            "create",
            "--verbose",
            name,
            "--body",
            "./tests/mock_module/mappings/authors/authors-v1.0.0.json",
        ],
        obj=script_info,
    )
    assert result.exit_code == 0
    assert name in list(current_search_client.indices.get("*").keys())

    doc_type = "_doc" if ES_VERSION[0] > 5 else "recid"
    result = runner.invoke(
        cmd,
        [
            "put",
            name,
            doc_type,
            "--verbose",
            "--identifier",
            1,
            "--body",
            "./tests/mock_module/mappings/authors/authors-v1.0.0.json",
        ],
        obj=script_info,
    )
    assert result.exit_code == 0
    current_search_client.get(index=name, doc_type=doc_type, id=1)
    with pytest.raises(NotFoundError):
        current_search_client.get(index=name, doc_type=doc_type, id=2)

    result = runner.invoke(
        cmd,
        [
            "delete",
            "--verbose",
            "--yes-i-know",
            "--force",
            name,
        ],
        obj=script_info,
    )
    assert result.exit_code == 0
    assert name not in list(current_search_client.indices.get("*").keys())
Beispiel #2
0
def test_basic_stats(app, db, es, locations, event_queues, minimal_record):
    """Test basic statistics results."""
    search = Search(using=es)
    records = create_stats_fixtures(
        # (10 * 2) -> 20 records and (10 * 2 * 3) -> 60 files
        metadata=minimal_record,
        n_records=10,
        n_versions=2,
        n_files=3,
        event_data={'user_id': '1'},
        # 4 event timestamps
        start_date=datetime(2018, 1, 1, 13),
        end_date=datetime(2018, 1, 1, 15),
        interval=timedelta(minutes=30))

    # Events indices
    prefix = app.config['SEARCH_INDEX_PREFIX']

    # 2 versions * 10 records * 3 files * 4 events -> 240
    assert search.index(prefix + 'events-stats-file-download').count() == 240
    # 2 versions * 10 records * 4 events -> 80
    assert search.index(prefix + 'events-stats-record-view').count() == 80

    # Aggregations indices
    # (2 versions + 1 concept) * 10 records -> 30 documents + 2 bookmarks

    # 30d
    assert search.index(prefix + 'stats-file-download').count() == 30

    # 30d
    assert search.index(prefix + 'stats-record-view').count() == 30

    # 2bm + 2bm
    assert search.index(prefix + 'stats-bookmarks').count() == 4

    # Records index
    for _, record, _ in records:
        doc = \
             current_search_client.get(
                index=build_alias_name('records'),
                id=str(record.id),
                params={'_source_includes': '_stats'}
                )
        assert doc['_source']['_stats'] == {
            # 4 view events
            'views': 4.0,
            'version_views': 8.0,
            # 4 view events over 2 different hours
            'unique_views': 2.0,
            'version_unique_views': 2.0,
            # 4 download events * 3 files
            'downloads': 12.0,
            'version_downloads': 24.0,
            # 4 download events * 3 files over 2 different hours
            'unique_downloads': 2.0,
            'version_unique_downloads': 2.0,
            # 4 download events * 3 files * 10 bytes
            'volume': 120.0,
            'version_volume': 240.0,
        }
Beispiel #3
0
def test_large_stats(app, db, es, locations, event_queues, minimal_record):
    """Test a larger number of events, aggregations, and results."""
    search = Search(using=es)
    records = create_stats_fixtures(
        # (3 * 4) -> 12 records and (3 * 4 * 2) -> 24 files
        metadata=minimal_record,
        n_records=3,
        n_versions=4,
        n_files=2,
        event_data={'user_id': '1'},
        # (31 + 30) * 2 -> 122 event timestamps (61 days and 2 events/day)
        start_date=datetime(2018, 3, 1),
        end_date=datetime(2018, 5, 1),
        interval=timedelta(hours=12))

    # Events indices
    prefix = app.config['SEARCH_INDEX_PREFIX']

    # 4 versions * 3 records * 2 files * 122 events -> 2928
    assert search.index(prefix + 'events-stats-file-download').count() == 2928
    # 4 versions * 3 records * 122 events -> 1464
    assert search.index(prefix + 'events-stats-record-view').count() == 1464

    # Aggregations indices
    # (4 versions + 1 concept) * 3 records -> 15 documents + 2 bookmarks
    q = search.index(prefix + 'stats-file-download')
    q = q.doc_type('file-download-day-aggregation')
    assert q.count() == 915  # 61 days * 15 records
    q = search.index(prefix + 'stats-record-view')
    q = q.doc_type('record-view-day-aggregation')
    assert q.count() == 915  # 61 days * 15 records

    # Records index
    for _, record, _ in records:
        doc = \
             current_search_client.get(
                index=build_alias_name('records'),
                id=str(record.id),
                params={'_source_includes': '_stats'}
                )
        assert doc['_source']['_stats'] == {
            # 4 view events
            'views': 122.0,
            'version_views': 488.0,
            # 4 view events over 2 different hours
            'unique_views': 122.0,
            'version_unique_views': 122.0,
            # 4 download events * 3 files
            'downloads': 244.0,
            'version_downloads': 976.0,
            # 4 download events * 3 files over 2 different hours
            'unique_downloads': 122.0,
            'version_unique_downloads': 122.0,
            # 4 download events * 3 files * 10 bytes
            'volume': 2440.0,
            'version_volume': 9760.0,
        }
Beispiel #4
0
def test_create_put_and_delete(app):
    runner = CliRunner()
    script_info = ScriptInfo(create_app=lambda info: app)
    name = 'test-index-name'

    result = runner.invoke(cmd, [
        'create',
        '--verbose',
        name,
        '--body',
        './tests/mock_module/mappings/authors/authors-v1.0.0.json',
    ],
                           obj=script_info)
    assert result.exit_code == 0
    assert name in list(current_search_client.indices.get('*').keys())

    doc_type = '_doc' if ES_VERSION[0] > 5 else 'recid'
    result = runner.invoke(cmd, [
        'put',
        name,
        doc_type,
        '--verbose',
        '--identifier',
        1,
        '--body',
        './tests/mock_module/mappings/authors/authors-v1.0.0.json',
    ],
                           obj=script_info)
    assert result.exit_code == 0
    current_search_client.get(index=name, doc_type=doc_type, id=1)
    with pytest.raises(NotFoundError):
        current_search_client.get(index=name, doc_type=doc_type, id=2)

    result = runner.invoke(cmd, [
        'delete',
        '--verbose',
        '--yes-i-know',
        '--force',
        name,
    ],
                           obj=script_info)
    assert result.exit_code == 0
    assert name not in list(current_search_client.indices.get('*').keys())
Beispiel #5
0
    def init(self, dry_run=False):
        """Initialize the index with recipe and jobs documents."""
        if not dry_run:
            if not current_search_client.indices.exists(index=self.index):
                self.create_index()
            try:
                current_search_client.get(index=self.index, id=self.name)
                raise Exception(
                    ('The document {} already exists, a job is already '
                     'active.').format(self.state.index))
            except NotFoundError:
                pass

        # Get old indices
        jobs = {}
        for job_name, job_config in self.config['jobs'].items():
            job = obj_or_import_string(job_config['cls'])(job_name,
                                                          self,
                                                          config=job_config)
            initial_state = job.initial_state(dry_run=dry_run)
            jobs[job_name] = (job, initial_state)
        self.jobs = jobs

        if not dry_run:
            migration_initial_state = {
                "type": "migration",
                "config": self.config,
                "status": "INITIAL",
                "job_ids":
                [job.document_name for job, _ in self.jobs.values()]
            }
            self.state.commit(migration_initial_state)

            for job, initial_state in self.jobs.values():
                job.state.commit(initial_state)
                job.create_index(initial_state["dst"]["index"])
Beispiel #6
0
def get_record_stats(recordid, throws=True):
    """Fetch record statistics from Elasticsearch."""
    try:
        res = current_search_client.get(
            index=build_alias_name('records'),
            id=recordid,
            params={'_source_includes': '_stats'},
        )
        return res['_source']['_stats']
    except NotFoundError:
        return None
    except Exception:
        if throws:
            raise
        pass
Beispiel #7
0
def test_record_indexing(app, queue):
    """Run record autoindexer."""
    @before_record_index.connect_via(app)
    def remove_schema(sender, json=None, record=None):
        if '$schema' in json:
            del json['$schema']

    models_committed.connect(process_models_committed_signal, sender=app)

    with app.app_context():

        current_search_client.indices.delete_alias('_all',
                                                   '_all',
                                                   ignore=[400, 404])
        current_search_client.indices.delete('*')
        aliases = current_search_client.indices.get_aliases()
        assert 0 == len(aliases)

    runner = CliRunner()
    script_info = ScriptInfo(create_app=lambda info: app)

    with runner.isolated_filesystem():
        result = runner.invoke(cmd, ['destroy', '--yes-i-know'],
                               obj=script_info)
        result = runner.invoke(cmd, ['init'], obj=script_info)
        assert 0 == result.exit_code

    with app.app_context():
        from invenio_records.models import RecordMetadata
        with db.session.begin_nested():
            record1 = RecordMetadata(
                json={
                    '$schema': (
                        'http://example.com/schemas/'  # external site
                        'records/default-v1.0.0.json'),
                    'title':
                    'Test1',
                })
            db.session.add(record1)
            record2 = RecordMetadata(
                json={
                    '$schema': {
                        '$ref': (
                            'http://example.com/schemas/'  # external site
                            'records/default-v1.0.0.json')
                    },
                    'title': 'Test2',
                })
            db.session.add(record2)
        db.session.commit()

        record_indexer = RecordIndexer(queue=queue)
        result = record_indexer.process_bulk_queue()
        assert 2 == len(list(result))

        response = current_search_client.get(
            index='records-default-v1.0.0',
            id=record1.id,
        )
        assert str(record1.id) == response['_id']

        response = current_search_client.get(
            index='records-default-v1.0.0',
            id=record2.id,
        )
        assert str(record2.id) == response['_id']

        db.session.delete(record1)
        db.session.commit()

        record_indexer.process_bulk_queue()

        response = current_search_client.get(
            index='records-default-v1.0.0',
            id=record1.id,
            ignore=404,
        )
        assert not response['found']

    # Clean-up:
    with app.app_context():
        result = runner.invoke(cmd, ['destroy', '--yes-i-know'],
                               obj=script_info)
        assert 0 == result.exit_code
Beispiel #8
0
def test_record_indexing(app, queue):
    """Run record autoindexer."""
    @before_record_index.connect_via(app)
    def remove_schema(sender, json=None, record=None):
        if '$schema' in json:
            del json['$schema']

    models_committed.connect(process_models_committed_signal, sender=app)

    with app.app_context():

        current_search_client.indices.delete_alias('_all', '_all',
                                                   ignore=[400, 404])
        current_search_client.indices.delete('*')
        aliases = current_search_client.indices.get_aliases()
        assert 0 == len(aliases)

    runner = CliRunner()
    script_info = ScriptInfo(create_app=lambda info: app)

    with runner.isolated_filesystem():
        result = runner.invoke(cmd, ['destroy', '--yes-i-know'],
                               obj=script_info)
        result = runner.invoke(cmd, ['init'],
                               obj=script_info)
        assert 0 == result.exit_code

    with app.app_context():
        from invenio_records.models import RecordMetadata
        with db.session.begin_nested():
            record1 = RecordMetadata(json={
                '$schema': ('http://example.com/schemas/'  # external site
                            'records/default-v1.0.0.json'),
                'title': 'Test1',
            })
            db.session.add(record1)
            record2 = RecordMetadata(json={
                '$schema': {
                    '$ref': ('http://example.com/schemas/'  # external site
                             'records/default-v1.0.0.json')
                },
                'title': 'Test2',
            })
            db.session.add(record2)
        db.session.commit()

        record_indexer = RecordIndexer(queue=queue)
        result = record_indexer.process_bulk_queue()
        assert 2 == len(list(result))

        response = current_search_client.get(
            index='records-default-v1.0.0',
            id=record1.id,
        )
        assert str(record1.id) == response['_id']

        response = current_search_client.get(
            index='records-default-v1.0.0',
            id=record2.id,
        )
        assert str(record2.id) == response['_id']

        db.session.delete(record1)
        db.session.commit()

        record_indexer.process_bulk_queue()

        response = current_search_client.get(
            index='records-default-v1.0.0',
            id=record1.id,
            ignore=404,
        )
        assert not response['found']

    # Clean-up:
    with app.app_context():
        result = runner.invoke(cmd, ['destroy', '--yes-i-know'],
                               obj=script_info)
        assert 0 == result.exit_code
Beispiel #9
0
 def create_from_state(cls, recipe_name, **recipe_config):
     """Create `Migration` instance from ES state."""
     document = current_search_client.get(
         index=current_index_migrator.config_index, id=recipe_name)
     return cls(recipe_name, **document["_source"]["config"])