def test_reindex(app, script_info): """Test reindex.""" # load records with app.test_request_context(): runner = CliRunner() rec_uuid = uuid.uuid4() data = {'title': 'Test0'} record = Record.create(data, id_=rec_uuid) db.session.commit() # Initialize queue res = runner.invoke(cli.queue, ['init', 'purge'], obj=script_info) assert 0 == res.exit_code res = runner.invoke(cli.reindex, ['--yes-i-know'], obj=script_info) assert 0 == res.exit_code res = runner.invoke(cli.run, [], obj=script_info) assert 0 == res.exit_code sleep(5) indexer = RecordIndexer() index, doc_type = indexer.record_to_index(record) res = current_search_client.get(index=index, doc_type=doc_type, id=rec_uuid) assert res['found'] # Destroy queue res = runner.invoke(cli.queue, ['delete'], obj=script_info) assert 0 == res.exit_code
def mef_person_update_index(sender, *args, **kwargs): """Index MEF person in ES.""" record = kwargs['record'] if 'documents' in record.get('$schema', ''): authors = record.get('authors', []) for author in authors: mef_url = author.get('$ref') if mef_url: mef_url = mef_url.replace( 'mef.rero.ch', current_app.config['RERO_ILS_MEF_HOST']) request = requests_get(url=mef_url, params=dict(resolve=1, sources=1)) if request.status_code == requests_codes.ok: data = request.json() id = data['id'] data = data.get('metadata') if data: data['id'] = id data['$schema'] = current_jsonschemas.path_to_url( current_app.config['RERO_ILS_PERSONS_MEF_SCHEMA']) indexer = RecordIndexer() index, doc_type = indexer.record_to_index(data) indexer.client.index( id=id, index=index, doc_type=doc_type, body=data, ) current_search.flush_and_refresh(index) else: current_app.logger.error( 'Mef resolver request error: {stat} {url}'.format( stat=request.status_code, url=mef_url)) raise Exception('unable to resolve')
def mef_person_delete(sender, *args, **kwargs): """Delete signal.""" record = kwargs['record'] if 'documents' in record.get('$schema', ''): authors = record.get('authors', []) for author in authors: mef_url = author.get('$ref') if mef_url: mef_url = mef_url.replace( 'mef.rero.ch', current_app.config['RERO_ILS_MEF_HOST']) request = requests_get(url=mef_url, params=dict(resolve=1, sources=1)) if request.status_code == requests_codes.ok: data = request.json() id = data['id'] data = data.get('metadata') if data: search = DocumentsSearch() count = search.filter( 'match', authors__pid=id).execute().hits.total if count == 1: indexer = RecordIndexer() index, doc_type = indexer.record_to_index(data) indexer.client.delete(id=id, index=index, doc_type=doc_type) current_search.flush_and_refresh(index) else: current_app.logger.error( 'Mef resolver request error: {result} {url}'.format( result=request.status_code, url=mef_url)) raise Exception('unable to resolve')
def test_before_deposit_index_hook_sets_files(create_record, db, es): deposit = create_record(published=False) # Reproduce file upload: add file to bucket associated with deposit bucket = Bucket.get(deposit['_buckets']['deposit']) obj = ObjectVersion.create(bucket, 'foo.txt') stream = BytesIO(b'Hello world!') obj.set_contents(stream, size=len(stream.getvalue()), size_limit=bucket.size_limit) db.session.commit() indexer = RecordIndexer() indexer.index(deposit) # Get the raw indexed document index, doc_type = indexer.record_to_index(deposit) es_deposit = es.get(index=index, doc_type=doc_type, id=deposit.id) assert '_files' in es_deposit['_source'] assert es_deposit['_source']['_files'][0]['type'] == 'txt'
def reindex_pid(pid_type, RecordClass): index_name = None indexer = RecordIndexer() for pid in tqdm.tqdm(PersistentIdentifier.query.filter_by( pid_type=pid_type, object_type='rec', status=PIDStatus.REGISTERED.value)): record = RecordClass.get_record(pid.object_uuid) if only and str(record.id) != only: continue try: index_name, doc_type = indexer.record_to_index(record) index_name = build_alias_name(index_name) # print('Indexing', record.get('id'), 'into', index_name) indexer.index(record) except: with open('/tmp/indexing-error.json', 'a') as f: print(json.dumps(record.dumps(), indent=4, ensure_ascii=False), file=f) traceback.print_exc(file=f) if raise_on_error: raise if index_name: current_search_client.indices.refresh(index_name) current_search_client.indices.flush(index_name)
def reindex_pid(pid_type, RecordClass, only: bool = False, raise_on_error=None): index_name = None indexer = RecordIndexer() pids = PersistentIdentifier.query.filter_by( pid_type=pid_type, object_type='rec', status=PIDStatus.REGISTERED.value).all() for pid in tqdm(pids): try: record = RecordClass.get_record(pid.object_uuid) except NoResultFound: continue keywords = record.get("keywords") if keywords: if keywords == "Keywords must be fixed in draft mode": del record["keywords"] if only and str(record.id) != only: continue try: index_name, doc_type = indexer.record_to_index(record) index_name = build_alias_name(index_name) # print('Indexing', record.get('id'), 'into', index_name) indexer.index(record) except: with open('/tmp/indexing-error.json', 'a') as f: print(json.dumps(record.dumps(), indent=4, ensure_ascii=False), file=f) traceback.print_exc(file=f) if raise_on_error: raise if index_name: current_search_client.indices.refresh(index_name) current_search_client.indices.flush(index_name)
def test_reindex(app, script_info): """Test reindex.""" # load records with app.test_request_context(): runner = CliRunner() id1 = uuid.uuid4() id2 = uuid.uuid4() record1 = Record.create(dict(title='Test 1', recid=1), id_=id1) record2 = Record.create(dict(title='Test 2', recid=2), id_=id2) PersistentIdentifier.create( pid_type='recid', pid_value=1, object_type='rec', object_uuid=id1, status=PIDStatus.REGISTERED, ) PersistentIdentifier.create( pid_type='recid', pid_value=2, object_type='rec', object_uuid=id2, status=PIDStatus.REGISTERED, ) db.session.commit() indexer = RecordIndexer() index, doc_type = indexer.record_to_index(record1) # Make sure the index doesn't exist at the beginning (it was not # preserved by accident from some other tests) assert current_search_client.indices.exists(index) is False # Initialize queue res = runner.invoke(cli.queue, ['init', 'purge'], obj=script_info) assert 0 == res.exit_code res = runner.invoke(cli.reindex, ['--yes-i-know', '-t', 'recid'], obj=script_info) assert 0 == res.exit_code res = runner.invoke(cli.run, [], obj=script_info) assert 0 == res.exit_code current_search.flush_and_refresh(index) # Both records should be indexed res = current_search_client.search(index=index) assert res['hits']['total'] == 2 # Delete one of the records record2 = Record.get_record(id2) record2.delete() db.session.commit() # Destroy the index and reindex list(current_search.delete(ignore=[404])) res = runner.invoke(cli.reindex, ['--yes-i-know', '-t', 'recid'], obj=script_info) assert 0 == res.exit_code res = runner.invoke(cli.run, [], obj=script_info) assert 0 == res.exit_code current_search.flush_and_refresh(index) # Check that the deleted record is not indexed res = current_search_client.search(index=index) assert res['hits']['total'] == 1 assert res['hits']['hits'][0]['_source']['title'] == 'Test 1' # Destroy queue and the index res = runner.invoke(cli.queue, ['delete'], obj=script_info) assert 0 == res.exit_code list(current_search.delete(ignore=[404]))