def test_record_index_after_update(app, test_users, test_records, script_info, login_user): """Check that updating a record also reindex it.""" creator = test_users['deposits_creator'] with app.app_context(): rec = Record.get_record(test_records[0].record_id) pid = test_records[0].pid rec.update({'title': 'my modified title'}) # execute scheduled tasks synchronously process_bulk_queue.delay() # flush the indices so that indexed records are searchable current_search_client.indices.flush('*') search_url = url_for('b2share_records_rest.b2rec_list') headers = [('Content-Type', 'application/json'), ('Accept', 'application/json')] with app.test_client() as client: record_search_res = client.get( search_url, data='', headers=headers) assert record_search_res.status_code == 200 record_search_data = json.loads( record_search_res.get_data(as_text=True)) assert record_search_data['hits']['total'] == len(test_records) found_rec = [rec for rec in record_search_data['hits']['hits'] if rec['id'] == pid][0] assert rec['title'] == 'my modified title'
def elasticsearch_index_reindex(alembic, verbose): """Reindex records.""" query = (x[0] for x in PersistentIdentifier.query.filter_by( object_type='rec', status=PIDStatus.REGISTERED).filter( PersistentIdentifier.pid_type.in_(['b2rec', 'b2dep'])).values( PersistentIdentifier.object_uuid)) RecordIndexer().bulk_index(query) process_bulk_queue.delay()
def elasticsearch_index_reindex(alembic, verbose): """Reindex records.""" query = (x[0] for x in PersistentIdentifier.query.filter_by( object_type='rec', status=PIDStatus.REGISTERED ).filter( PersistentIdentifier.pid_type.in_(['b2rec', 'b2dep']) ).values( PersistentIdentifier.object_uuid )) RecordIndexer().bulk_index(query) process_bulk_queue.delay()
def manage_indexer_queues(): """Peeks into queues and spawns bulk indexers.""" channel = current_celery_app.connection().channel() indexers = current_indexer_registry.all() for name, indexer in indexers.items(): queue = indexer.mq_queue.bind(channel) _, num_messages, num_consumers = queue.queue_declare() max_consumers = current_app.config["INDEXER_MAX_BULK_CONSUMERS"] if num_messages > 0 and num_consumers < max_consumers: process_bulk_queue.delay(indexer_name=name)
def test_unpublished_deposit_unindex(app, test_users, draft_deposits, script_info, login_user): """Check that deleting an unpublished deposit also removes it from the search index.""" creator = test_users['deposits_creator'] with app.app_context(): Deposit.get_record(draft_deposits[0].deposit_id).delete() # execute scheduled tasks synchronously process_bulk_queue.delay() # flush the indices so that indexed records are searchable current_search_client.indices.flush('*') # deleted record should not be searchable subtest_record_search(app, creator, [], draft_deposits[1:], login_user)
def test_published_deposit_unindex(app, test_users, test_records, script_info, login_user): """Check that deleting a published deposit also removes it from the search index.""" creator = test_users['deposits_creator'] with app.app_context(): Deposit.get_record(test_records[0].deposit_id).delete() # execute scheduled tasks synchronously process_bulk_queue.delay() # flush the indices so that indexed records are searchable current_search_client.indices.flush('*') # deleted record should not be searchable subtest_record_search(app, creator, test_records, test_records[1:], login_user)
def test_record_indexing(app, test_users, test_records, script_info, login_user): """Test record indexing and reindexing.""" creator = test_users['deposits_creator'] with app.app_context(): # flush the indices so that indexed records are searchable current_search_client.indices.flush('*') # records and deposits should be indexed subtest_record_search(app, creator, test_records, test_records, login_user) with app.app_context(): current_search_client.indices.flush('*') # delete all elasticsearch indices and recreate them for deleted in current_search.delete(ignore=[404]): pass for created in current_search.create(None): pass # flush the indices so that indexed records are searchable current_search_client.indices.flush('*') # all records should have been deleted subtest_record_search(app, creator, [], [], login_user) with app.app_context(): runner = CliRunner() # Initialize queue res = runner.invoke(cli.queue, ['init', 'purge'], obj=script_info) assert 0 == res.exit_code # schedule a reindex task res = runner.invoke(cli.reindex, ['--yes-i-know'], obj=script_info) assert 0 == res.exit_code # execute scheduled tasks synchronously process_bulk_queue.delay() # flush the indices so that indexed records are searchable current_search_client.indices.flush('*') # records and deposits should be indexed again subtest_record_search(app, creator, test_records, test_records, login_user)
def test_delete_record(app, test_users, test_communities, login_user, script_info): """Test record deletion through the REST API.""" from click.testing import CliRunner from invenio_search import current_search_client from invenio_indexer import cli from invenio_indexer.tasks import process_bulk_queue uploaded_files = { 'myfile1.dat': b'contents1', 'myfile2.dat': b'contents2' } admin = test_users['admin'] headers = [('Accept', 'application/json')] with app.app_context(): creator = create_user('creator') non_creator = create_user('non_creator') record_data = generate_record_data() with app.test_client() as client: deposit, record_pid, record = create_record( record_data, creator, files=uploaded_files ) pid_value = record_pid.pid_value record_id = record.id bucket_id = record.files.bucket.id object_version = record.files.bucket.objects[0] deposit_bucket_id = deposit.files.bucket.id deposit_object_version = deposit.files.bucket.objects[0] record_url = url_for('b2share_records_rest.b2rec_item', pid_value=pid_value) deposit_url = url_for('b2share_deposit_rest.b2dep_item', pid_value=pid_value) bucket_url = url_for('invenio_files_rest.bucket_api', bucket_id=bucket_id) deposit_bucket_url = url_for('invenio_files_rest.bucket_api', bucket_id=deposit_bucket_id) object_version_url = url_for( 'invenio_files_rest.object_api', bucket_id=bucket_id, version=object_version.version_id, key=object_version.key ) deposit_object_version_url = url_for( 'invenio_files_rest.object_api', bucket_id=deposit_bucket_id, version=deposit_object_version.version_id, key=deposit_object_version.key ) # check that the record and deposit are searchable current_search_client.indices.flush('*') res = current_search_client.search(index='records') assert res['hits']['total'] == 1 res = current_search_client.search(index='deposits') assert res['hits']['total'] == 1 def test_delete(status, user=None): with app.test_client() as client: if user is not None: login_user(user, client) # delete the record request_res = client.delete(record_url, headers=headers) assert request_res.status_code == status def test_access(user=None, deleted=True): with app.test_client() as client: if user is not None: login_user(user, client) # try accessing the record request_res = client.get(record_url, headers=headers) assert request_res.status_code == 410 if deleted else 200 # try accessing the file bucket request_res = client.get(bucket_url, headers=headers) assert request_res.status_code == 404 if deleted else 200 # try accessing the file request_res = client.get(object_version_url, headers=headers) assert request_res.status_code == 404 if deleted else 200 # try accessing the deposit request_res = client.get(deposit_url, headers=headers) assert request_res.status_code == 410 if deleted else 200 # try accessing the deposit file bucket request_res = client.get(deposit_bucket_url, headers=headers) assert request_res.status_code == 404 if deleted else 200 # try accessing the deposit file request_res = client.get(deposit_object_version_url, headers=headers) assert request_res.status_code == 404 if deleted else 200 # Check that everything is accessible test_access(creator, deleted=False) test_delete(401) # anonymous user test_delete(403, creator) test_delete(403, non_creator) test_delete(200, admin) test_access() # anonymous user test_access(creator) test_access(non_creator) test_access(admin) # Check that reindexing records does not index deleted records and deposits with app.app_context(): runner = CliRunner() # Initialize queue res = runner.invoke(cli.queue, ['init', 'purge'], obj=script_info) assert 0 == res.exit_code # schedule a reindex task res = runner.invoke(cli.reindex, ['--yes-i-know'], obj=script_info) assert 0 == res.exit_code res = runner.invoke(cli.run, [], obj=script_info) assert 0 == res.exit_code # execute scheduled tasks synchronously process_bulk_queue.delay() # flush the indices so that indexed records are searchable current_search_client.indices.flush('*') # check that the record and deposit are not indexed res = current_search_client.search(index='records') assert res['hits']['total'] == 0 res = current_search_client.search(index='deposits') assert res['hits']['total'] == 0
def test_delete_record(app, test_users, test_communities, login_user, script_info): """Test record deletion through the REST API.""" from click.testing import CliRunner from invenio_search import current_search_client from invenio_indexer import cli from invenio_indexer.tasks import process_bulk_queue uploaded_files = { 'myfile1.dat': b'contents1', 'myfile2.dat': b'contents2' } admin = test_users['admin'] headers = [('Accept', 'application/json')] with app.app_context(): creator = create_user('creator') non_creator = create_user('non_creator') record_data = generate_record_data() with app.test_client() as client: deposit, record_pid, record = create_record( record_data, creator, files=uploaded_files ) pid_value = record_pid.pid_value record_id = record.id bucket_id = record.files.bucket.id object_version = record.files.bucket.objects[0] deposit_bucket_id = deposit.files.bucket.id deposit_object_version = deposit.files.bucket.objects[0] record_url = url_for('b2share_records_rest.b2rec_item', pid_value=pid_value) deposit_url = url_for('b2share_deposit_rest.b2dep_item', pid_value=pid_value) bucket_url = url_for('invenio_files_rest.bucket_api', bucket_id=bucket_id) deposit_bucket_url = url_for('invenio_files_rest.bucket_api', bucket_id=deposit_bucket_id) object_version_url = url_for( 'invenio_files_rest.object_api', bucket_id=bucket_id, version=object_version.version_id, key=object_version.key ) deposit_object_version_url = url_for( 'invenio_files_rest.object_api', bucket_id=deposit_bucket_id, version=deposit_object_version.version_id, key=deposit_object_version.key ) # check that the record and deposit are searchable current_search_client.indices.flush('*') res = current_search_client.search(index='records') assert res['hits']['total'] == 1 res = current_search_client.search(index='deposits') assert res['hits']['total'] == 1 def test_delete(status, user=None): with app.test_client() as client: if user is not None: login_user(user, client) # delete the record request_res = client.delete(record_url, headers=headers) assert request_res.status_code == status def test_access(user=None, deleted=True): with app.test_client() as client: if user is not None: login_user(user, client) # try accessing the record request_res = client.get(record_url, headers=headers) assert request_res.status_code == 410 if deleted else 200 # try accessing the file bucket request_res = client.get(bucket_url, headers=headers) assert request_res.status_code == 404 if deleted else 200 # try accessing the file request_res = client.get(object_version_url, headers=headers) assert request_res.status_code == 404 if deleted else 200 # try accessing the deposit request_res = client.get(deposit_url, headers=headers) assert request_res.status_code == 410 if deleted else 200 # try accessing the deposit file bucket request_res = client.get(deposit_bucket_url, headers=headers) assert request_res.status_code == 404 if deleted else 200 # try accessing the deposit file request_res = client.get(deposit_object_version_url, headers=headers) assert request_res.status_code == 404 if deleted else 200 # Check that everything is accessible test_access(creator, deleted=False) test_delete(401) # anonymous user test_delete(403, creator) test_delete(403, non_creator) test_delete(204, admin) test_access() # anonymous user test_access(creator) test_access(non_creator) test_access(admin) # Check that reindexing records does not index deleted records and deposits with app.app_context(): runner = CliRunner() # Initialize queue res = runner.invoke(cli.queue, ['init', 'purge'], obj=script_info) assert 0 == res.exit_code # schedule a reindex task res = runner.invoke(cli.reindex, ['--yes-i-know'], obj=script_info) assert 0 == res.exit_code res = runner.invoke(cli.run, [], obj=script_info) assert 0 == res.exit_code # execute scheduled tasks synchronously process_bulk_queue.delay() # flush the indices so that indexed records are searchable current_search_client.indices.flush('*') # check that the record and deposit are not indexed res = current_search_client.search(index='records') assert res['hits']['total'] == 0 res = current_search_client.search(index='deposits') assert res['hits']['total'] == 0
def test_record_indexing(app, test_users, test_records, script_info, login_user): """Test record indexing.""" creator = test_users['deposits_creator'] with app.app_context(): current_search_client.indices.flush('*') # delete all elasticsearch indices res = current_search_client.search(index="records", body={"query": { "match_all": {} }}) with app.app_context(): # delete all elasticsearch indices and recreate them for deleted in current_search.delete(ignore=[404]): pass for created in current_search.create(None): pass runner = CliRunner() # Initialize queue res = runner.invoke(cli.queue, ['init', 'purge'], obj=script_info) assert 0 == res.exit_code # schedule a reindex task res = runner.invoke(cli.reindex, ['--yes-i-know'], obj=script_info) assert 0 == res.exit_code # execute scheduled tasks synchronously process_bulk_queue.delay() # flush the indices so that indexed records are searchable current_search_client.indices.flush('*') search_url = url_for('b2share_records_rest.b2rec_list') search_deposits_url = url_for('b2share_records_rest.b2rec_list', drafts=1) headers = [('Content-Type', 'application/json'), ('Accept', 'application/json')] with app.app_context(): # delete all elasticsearch indices res = current_search_client.search(index="records", body={"query": { "match_all": {} }}) pass # search for published records with app.test_client() as client: record_search_res = client.get(search_url, data='', headers=headers) assert record_search_res.status_code == 200 record_search_data = json.loads( record_search_res.get_data(as_text=True)) assert record_search_data['hits']['total'] == len(test_records) record_pids = [hit['id'] for hit in record_search_data['hits']['hits']] expected_record_pids = [str(rec[1]) for rec in test_records] record_pids.sort() expected_record_pids.sort() assert record_pids == expected_record_pids # search for draft records with app.test_client() as client: login_user(creator, client) deposit_search_res = client.get(search_deposits_url, data='', headers=headers) assert deposit_search_res.status_code == 200 deposit_search_data = json.loads( deposit_search_res.get_data(as_text=True)) assert deposit_search_data['hits']['total'] == len(test_records) deposit_pids = [ hit['id'] for hit in deposit_search_data['hits']['hits'] ] expected_deposit_pids = [rec[0].hex for rec in test_records] deposit_pids.sort() expected_deposit_pids.sort() assert deposit_pids == expected_deposit_pids
def test_process_bulk_queue(app): """Test index records.""" with patch('invenio_indexer.api.RecordIndexer.process_bulk_queue') as fun: process_bulk_queue.delay() assert fun.called