def test_create_put_and_delete(app): runner = CliRunner() script_info = ScriptInfo(create_app=lambda: app) name = "test-index-name" result = runner.invoke( cmd, [ "create", "--verbose", name, "--body", "./tests/mock_module/mappings/authors/authors-v1.0.0.json", ], obj=script_info, ) assert result.exit_code == 0 assert name in list(current_search_client.indices.get("*").keys()) doc_type = "_doc" if ES_VERSION[0] > 5 else "recid" result = runner.invoke( cmd, [ "put", name, doc_type, "--verbose", "--identifier", 1, "--body", "./tests/mock_module/mappings/authors/authors-v1.0.0.json", ], obj=script_info, ) assert result.exit_code == 0 current_search_client.get(index=name, doc_type=doc_type, id=1) with pytest.raises(NotFoundError): current_search_client.get(index=name, doc_type=doc_type, id=2) result = runner.invoke( cmd, [ "delete", "--verbose", "--yes-i-know", "--force", name, ], obj=script_info, ) assert result.exit_code == 0 assert name not in list(current_search_client.indices.get("*").keys())
def test_basic_stats(app, db, es, locations, event_queues, minimal_record): """Test basic statistics results.""" search = Search(using=es) records = create_stats_fixtures( # (10 * 2) -> 20 records and (10 * 2 * 3) -> 60 files metadata=minimal_record, n_records=10, n_versions=2, n_files=3, event_data={'user_id': '1'}, # 4 event timestamps start_date=datetime(2018, 1, 1, 13), end_date=datetime(2018, 1, 1, 15), interval=timedelta(minutes=30)) # Events indices prefix = app.config['SEARCH_INDEX_PREFIX'] # 2 versions * 10 records * 3 files * 4 events -> 240 assert search.index(prefix + 'events-stats-file-download').count() == 240 # 2 versions * 10 records * 4 events -> 80 assert search.index(prefix + 'events-stats-record-view').count() == 80 # Aggregations indices # (2 versions + 1 concept) * 10 records -> 30 documents + 2 bookmarks # 30d assert search.index(prefix + 'stats-file-download').count() == 30 # 30d assert search.index(prefix + 'stats-record-view').count() == 30 # 2bm + 2bm assert search.index(prefix + 'stats-bookmarks').count() == 4 # Records index for _, record, _ in records: doc = \ current_search_client.get( index=build_alias_name('records'), id=str(record.id), params={'_source_includes': '_stats'} ) assert doc['_source']['_stats'] == { # 4 view events 'views': 4.0, 'version_views': 8.0, # 4 view events over 2 different hours 'unique_views': 2.0, 'version_unique_views': 2.0, # 4 download events * 3 files 'downloads': 12.0, 'version_downloads': 24.0, # 4 download events * 3 files over 2 different hours 'unique_downloads': 2.0, 'version_unique_downloads': 2.0, # 4 download events * 3 files * 10 bytes 'volume': 120.0, 'version_volume': 240.0, }
def test_large_stats(app, db, es, locations, event_queues, minimal_record): """Test a larger number of events, aggregations, and results.""" search = Search(using=es) records = create_stats_fixtures( # (3 * 4) -> 12 records and (3 * 4 * 2) -> 24 files metadata=minimal_record, n_records=3, n_versions=4, n_files=2, event_data={'user_id': '1'}, # (31 + 30) * 2 -> 122 event timestamps (61 days and 2 events/day) start_date=datetime(2018, 3, 1), end_date=datetime(2018, 5, 1), interval=timedelta(hours=12)) # Events indices prefix = app.config['SEARCH_INDEX_PREFIX'] # 4 versions * 3 records * 2 files * 122 events -> 2928 assert search.index(prefix + 'events-stats-file-download').count() == 2928 # 4 versions * 3 records * 122 events -> 1464 assert search.index(prefix + 'events-stats-record-view').count() == 1464 # Aggregations indices # (4 versions + 1 concept) * 3 records -> 15 documents + 2 bookmarks q = search.index(prefix + 'stats-file-download') q = q.doc_type('file-download-day-aggregation') assert q.count() == 915 # 61 days * 15 records q = search.index(prefix + 'stats-record-view') q = q.doc_type('record-view-day-aggregation') assert q.count() == 915 # 61 days * 15 records # Records index for _, record, _ in records: doc = \ current_search_client.get( index=build_alias_name('records'), id=str(record.id), params={'_source_includes': '_stats'} ) assert doc['_source']['_stats'] == { # 4 view events 'views': 122.0, 'version_views': 488.0, # 4 view events over 2 different hours 'unique_views': 122.0, 'version_unique_views': 122.0, # 4 download events * 3 files 'downloads': 244.0, 'version_downloads': 976.0, # 4 download events * 3 files over 2 different hours 'unique_downloads': 122.0, 'version_unique_downloads': 122.0, # 4 download events * 3 files * 10 bytes 'volume': 2440.0, 'version_volume': 9760.0, }
def test_create_put_and_delete(app): runner = CliRunner() script_info = ScriptInfo(create_app=lambda info: app) name = 'test-index-name' result = runner.invoke(cmd, [ 'create', '--verbose', name, '--body', './tests/mock_module/mappings/authors/authors-v1.0.0.json', ], obj=script_info) assert result.exit_code == 0 assert name in list(current_search_client.indices.get('*').keys()) doc_type = '_doc' if ES_VERSION[0] > 5 else 'recid' result = runner.invoke(cmd, [ 'put', name, doc_type, '--verbose', '--identifier', 1, '--body', './tests/mock_module/mappings/authors/authors-v1.0.0.json', ], obj=script_info) assert result.exit_code == 0 current_search_client.get(index=name, doc_type=doc_type, id=1) with pytest.raises(NotFoundError): current_search_client.get(index=name, doc_type=doc_type, id=2) result = runner.invoke(cmd, [ 'delete', '--verbose', '--yes-i-know', '--force', name, ], obj=script_info) assert result.exit_code == 0 assert name not in list(current_search_client.indices.get('*').keys())
def init(self, dry_run=False): """Initialize the index with recipe and jobs documents.""" if not dry_run: if not current_search_client.indices.exists(index=self.index): self.create_index() try: current_search_client.get(index=self.index, id=self.name) raise Exception( ('The document {} already exists, a job is already ' 'active.').format(self.state.index)) except NotFoundError: pass # Get old indices jobs = {} for job_name, job_config in self.config['jobs'].items(): job = obj_or_import_string(job_config['cls'])(job_name, self, config=job_config) initial_state = job.initial_state(dry_run=dry_run) jobs[job_name] = (job, initial_state) self.jobs = jobs if not dry_run: migration_initial_state = { "type": "migration", "config": self.config, "status": "INITIAL", "job_ids": [job.document_name for job, _ in self.jobs.values()] } self.state.commit(migration_initial_state) for job, initial_state in self.jobs.values(): job.state.commit(initial_state) job.create_index(initial_state["dst"]["index"])
def get_record_stats(recordid, throws=True): """Fetch record statistics from Elasticsearch.""" try: res = current_search_client.get( index=build_alias_name('records'), id=recordid, params={'_source_includes': '_stats'}, ) return res['_source']['_stats'] except NotFoundError: return None except Exception: if throws: raise pass
def test_record_indexing(app, queue): """Run record autoindexer.""" @before_record_index.connect_via(app) def remove_schema(sender, json=None, record=None): if '$schema' in json: del json['$schema'] models_committed.connect(process_models_committed_signal, sender=app) with app.app_context(): current_search_client.indices.delete_alias('_all', '_all', ignore=[400, 404]) current_search_client.indices.delete('*') aliases = current_search_client.indices.get_aliases() assert 0 == len(aliases) runner = CliRunner() script_info = ScriptInfo(create_app=lambda info: app) with runner.isolated_filesystem(): result = runner.invoke(cmd, ['destroy', '--yes-i-know'], obj=script_info) result = runner.invoke(cmd, ['init'], obj=script_info) assert 0 == result.exit_code with app.app_context(): from invenio_records.models import RecordMetadata with db.session.begin_nested(): record1 = RecordMetadata( json={ '$schema': ( 'http://example.com/schemas/' # external site 'records/default-v1.0.0.json'), 'title': 'Test1', }) db.session.add(record1) record2 = RecordMetadata( json={ '$schema': { '$ref': ( 'http://example.com/schemas/' # external site 'records/default-v1.0.0.json') }, 'title': 'Test2', }) db.session.add(record2) db.session.commit() record_indexer = RecordIndexer(queue=queue) result = record_indexer.process_bulk_queue() assert 2 == len(list(result)) response = current_search_client.get( index='records-default-v1.0.0', id=record1.id, ) assert str(record1.id) == response['_id'] response = current_search_client.get( index='records-default-v1.0.0', id=record2.id, ) assert str(record2.id) == response['_id'] db.session.delete(record1) db.session.commit() record_indexer.process_bulk_queue() response = current_search_client.get( index='records-default-v1.0.0', id=record1.id, ignore=404, ) assert not response['found'] # Clean-up: with app.app_context(): result = runner.invoke(cmd, ['destroy', '--yes-i-know'], obj=script_info) assert 0 == result.exit_code
def test_record_indexing(app, queue): """Run record autoindexer.""" @before_record_index.connect_via(app) def remove_schema(sender, json=None, record=None): if '$schema' in json: del json['$schema'] models_committed.connect(process_models_committed_signal, sender=app) with app.app_context(): current_search_client.indices.delete_alias('_all', '_all', ignore=[400, 404]) current_search_client.indices.delete('*') aliases = current_search_client.indices.get_aliases() assert 0 == len(aliases) runner = CliRunner() script_info = ScriptInfo(create_app=lambda info: app) with runner.isolated_filesystem(): result = runner.invoke(cmd, ['destroy', '--yes-i-know'], obj=script_info) result = runner.invoke(cmd, ['init'], obj=script_info) assert 0 == result.exit_code with app.app_context(): from invenio_records.models import RecordMetadata with db.session.begin_nested(): record1 = RecordMetadata(json={ '$schema': ('http://example.com/schemas/' # external site 'records/default-v1.0.0.json'), 'title': 'Test1', }) db.session.add(record1) record2 = RecordMetadata(json={ '$schema': { '$ref': ('http://example.com/schemas/' # external site 'records/default-v1.0.0.json') }, 'title': 'Test2', }) db.session.add(record2) db.session.commit() record_indexer = RecordIndexer(queue=queue) result = record_indexer.process_bulk_queue() assert 2 == len(list(result)) response = current_search_client.get( index='records-default-v1.0.0', id=record1.id, ) assert str(record1.id) == response['_id'] response = current_search_client.get( index='records-default-v1.0.0', id=record2.id, ) assert str(record2.id) == response['_id'] db.session.delete(record1) db.session.commit() record_indexer.process_bulk_queue() response = current_search_client.get( index='records-default-v1.0.0', id=record1.id, ignore=404, ) assert not response['found'] # Clean-up: with app.app_context(): result = runner.invoke(cmd, ['destroy', '--yes-i-know'], obj=script_info) assert 0 == result.exit_code
def create_from_state(cls, recipe_name, **recipe_config): """Create `Migration` instance from ES state.""" document = current_search_client.get( index=current_index_migrator.config_index, id=recipe_name) return cls(recipe_name, **document["_source"]["config"])