def test_disambiguate_authors_create_new_author(inspire_app, clean_celery_session, enable_disambiguation): literature_data = faker.record("lit", with_control_number=True) literature_data.update({ "authors": [{ "full_name": "Michal Kowal", "affiliations": [{ "value": "Warsaw U." }] }] }) literature_record = LiteratureRecord.create(data=literature_data) db.session.commit() def assert_lit_records_exist_in_es(): lit_record_1_from_es = InspireSearch.get_record_data_from_es( literature_record) assert lit_record_1_from_es retry_until_pass(assert_lit_records_exist_in_es, retry_interval=3) def assert_disambiguation_task(): literature_record_from_es = InspireSearch.get_record_data_from_es( literature_record) author_record_from_es = AuthorsSearch().query_from_iq("").execute() assert author_record_from_es.hits[0].name["value"] == "Michal Kowal" assert (literature_record_from_es["authors"][0]["recid"] == author_record_from_es.hits[0].control_number) retry_until_pass(assert_disambiguation_task)
def test_oai_with_for_arxiv_set(inspire_app, clean_celery_session): data = { "arxiv_eprints": [{"value": "2009.01484"}], "report_numbers": [{"value": "CERN-TH-2020-136"}], } record_data = faker.record("lit", data) record = LiteratureRecord.create(record_data) record_uuid = record.id record_marcxml = record2marcxml(record) db.session.commit() def assert_the_record_is_indexed(): current_search.flush_and_refresh("*") result = es_search("records-hep") uuids = get_value(result, "hits.hits._id") assert str(record_uuid) in uuids retry_until_pass(assert_the_record_is_indexed) set_name = inspire_app.config["OAI_SET_CERN_ARXIV"] oaiset = OAISet(spec=f"{set_name}", name="Test", description="Test") db.session.add(oaiset) db.session.commit() with inspire_app.test_client() as client: response = client.get( f"/api/oai2d?verb=ListRecords&metadataPrefix=marcxml&set={set_name}" ) assert record_marcxml in response.data
def test_disambiguate_authors_create_two_author_with_same_name( inspire_app, clean_celery_session, enable_disambiguation): literature_data = faker.record("lit", with_control_number=True) literature_data.update({ "authors": [{ "full_name": "Michal Kowal" }, { "full_name": "Michal Kowal" }] }) literature_record = LiteratureRecord.create(data=literature_data) db.session.commit() def assert_lit_records_exist_in_es(): lit_record_from_es = InspireSearch.get_record_data_from_es( literature_record) assert lit_record_from_es retry_until_pass(assert_lit_records_exist_in_es, retry_interval=3) def assert_disambiguation_task(): author_records_from_es = AuthorsSearch().query_from_iq("").execute() assert len(author_records_from_es.hits) == 2 retry_until_pass(assert_disambiguation_task)
def test_oai_get_single_identifier_for_CDS_set(inspire_app, clean_celery_session): data = {"_export_to": {"CDS": True}} record_data = faker.record("lit", data) record = LiteratureRecord.create(record_data) record_uuid = record.id record_marcxml = record2marcxml(record) db.session.commit() def assert_the_record_is_indexed(): current_search.flush_and_refresh("*") result = es_search("records-hep") uuids = get_value(result, "hits.hits._id") assert str(record_uuid) in uuids retry_until_pass(assert_the_record_is_indexed) set_name = inspire_app.config["OAI_SET_CDS"] oaiset = OAISet(spec=f"{set_name}", name="Test", description="Test") db.session.add(oaiset) db.session.commit() with inspire_app.test_client() as client: response = client.get( f"/api/oai2d?verb=GetRecord&metadataPrefix=marcxml&identifier=oai:inspirehep.net:{record['control_number']}" ) assert record_marcxml in response.data
def test_aut_record_removed_form_es_when_deleted(inspire_app, clean_celery_session): data = faker.record("aut") rec = AuthorsRecord.create(data) db.session.commit() def assert_record(): current_search.flush_and_refresh("records-authors") result = es_search("records-authors") result_total = get_value(result, "hits.total.value") expected_total = 1 assert expected_total == result_total retry_until_pass(assert_record) rec.delete() db.session.commit() def assert_record(): current_search.flush_and_refresh("records-authors") result = es_search("records-authors") result_total = get_value(result, "hits.total.value") expected_total = 0 assert expected_total == result_total retry_until_pass(assert_record)
def assert_record_not_in_es(recid): def assert_hits(): current_search.flush_and_refresh("records-hep") hits = LiteratureSearch().query_from_iq(f"recid:{recid}").execute().hits assert not hits retry_until_pass(assert_hits, retry_interval=5)
def assert_citation_count(cited_record, expected_count): def assert_record(): current_search.flush_and_refresh("records-hep") record_from_es = LiteratureSearch().get_record_data_from_es(cited_record) assert expected_count == record_from_es["citation_count"] retry_until_pass(assert_record, retry_interval=3)
def test_fulltext_indexer(inspire_app, clean_celery_session, override_config): with override_config(FEATURE_FLAG_ENABLE_FULLTEXT=True): data = faker.record("lit") data.update({ "arxiv_eprints": [{ "categories": ["hep-ph"], "value": "hep-ph/9404247" }], "documents": [{ "source": "arxiv", "fulltext": True, "filename": "arXiv:nucl-th_9310030.pdf", "key": "arXiv:nucl-th_9310030.pdf", "url": "https://www.w3.org/WAI/ER/tests/xhtml/testfiles/resources/pdf/dummy.pdf", }], }) record = LiteratureRecord.create(data) db.session.commit() def assert_record_in_es(): current_search.flush_and_refresh("*") record_lit_es = (LiteratureSearch().get_record(str( record.id)).execute().hits.hits[0]) assert "attachment" in record_lit_es._source["documents"][0] retry_until_pass(assert_record_in_es, timeout=90, retry_interval=20)
def test_institutions_record_updates_in_es_when_lit_rec_refers_to_it( inspire_app, clean_celery_session ): institution_1 = InstitutionsRecord.create(faker.record("ins")) institution_1_control_number = institution_1["control_number"] ref_1 = f"http://localhost:8000/api/institutions/{institution_1_control_number}" db.session.commit() expected_number_of_papers = 0 def assert_record(): current_search.flush_and_refresh("records-institutions") record_from_es = InstitutionsSearch().get_record_data_from_es(institution_1) assert expected_number_of_papers == record_from_es["number_of_papers"] retry_until_pass(assert_record) data = { "authors": [ { "full_name": "John Doe", "affiliations": [{"value": "Institution", "record": {"$ref": ref_1}}], } ] } LiteratureRecord.create(faker.record("lit", data)) db.session.commit() expected_number_of_papers = 1 def assert_record(): current_search.flush_and_refresh("records-institutions") record_from_es = InstitutionsSearch().get_record_data_from_es(institution_1) assert expected_number_of_papers == record_from_es["number_of_papers"] retry_until_pass(assert_record)
def test_disambiguate_authors_on_first_last_name_and_initials( inspire_app, clean_celery_session, enable_disambiguation): literature_data = faker.record("lit", with_control_number=True) literature_data.update({ "authors": [{ "full_name": "'t Hooft, Gerard", "curated_relation": True, "record": { "$ref": "http://localhost:5000/api/authors/999108" }, "ids": [{ "schema": "INSPIRE BAI", "value": "G.Hooft.2" }], }] }) literature_record = LiteratureRecord.create(literature_data) literature_data_2 = faker.record("lit", with_control_number=True) literature_data_2.update({ "authors": [{ "full_name": "'t Hooft, Gerard Antonio", "curated_relation": True, "record": { "$ref": "http://localhost:5000/api/authors/999105" }, "ids": [{ "schema": "INSPIRE BAI", "value": "G.Hooft.1" }], }] }) literature_record_2 = LiteratureRecord.create(literature_data_2) db.session.commit() def assert_lit_records_exist_in_es(): lit_record_1_from_es = InspireSearch.get_record_data_from_es( literature_record) lit_record_2_from_es = InspireSearch.get_record_data_from_es( literature_record_2) assert lit_record_1_from_es and lit_record_2_from_es retry_until_pass(assert_lit_records_exist_in_es, retry_interval=3) literature_data_3 = faker.record("lit", with_control_number=True) literature_data_3.update( {"authors": [{ "full_name": "'t Hooft, Gerard Antonio" }]}) literature_record_3 = LiteratureRecord.create(literature_data_3) db.session.commit() def assert_disambiguation_task(): literature_record_from_es = InspireSearch.get_record_data_from_es( literature_record_3) assert (literature_data_2["authors"][0]["record"] == literature_record_from_es["authors"][0]["record"]) retry_until_pass(assert_disambiguation_task, retry_interval=2)
def assert_es_hits_count(expected_hits_count): def assert_hits(): current_search.flush_and_refresh("records-hep") result = es_search("records-hep") result_total = get_value(result, "hits.total.value") assert expected_hits_count == result_total retry_until_pass(assert_hits, retry_interval=5)
def test_assign_from_an_author_to_another_that_is_not_stub( inspire_app, clean_celery_session): author_data = { "name": { "value": "Aad, Georges", "preferred_name": "Georges Aad" }, "ids": [{ "value": "G.Aad.1", "schema": "INSPIRE BAI" }], "stub": False, } from_author = create_record("aut") to_author = create_record("aut", data=author_data) literature = create_record( "lit", data={ "authors": [ { "full_name": "Urhan, Ahmet", "record": { "$ref": "http://localhost:5000/api/authors/17200" }, }, { "full_name": "Urhan, Harun", "record": { "$ref": f"http://localhost:5000/api/authors/{from_author['control_number']}" }, }, ] }, ) db.session.commit() assign_papers.delay( from_author_recid=from_author["control_number"], to_author_record=to_author, author_papers_recids=[literature["control_number"]], ) def assert_assign(): current_search.flush_and_refresh("*") literature_after = LiteratureSearch.get_record_data_from_es(literature) literature_author = literature_after["authors"][1] to_author_after = AuthorsRecord.get_record_by_pid_value( to_author["control_number"]) assert literature_author["record"] == { "$ref": f"http://localhost:5000/api/authors/{to_author['control_number']}" } assert literature_author["curated_relation"] assert literature_author["ids"] == to_author["ids"] assert not to_author_after["stub"] retry_until_pass(assert_assign, retry_interval=5)
def test_lit_record_appear_in_es_when_created(inspire_app, clean_celery_session): data = faker.record("lit") record = LiteratureRecord.create(data) db.session.commit() def assert_record(): current_search.flush_and_refresh("records-hep") record_from_es = LiteratureSearch().get_record_data_from_es(record) assert record_from_es["_ui_display"] retry_until_pass(assert_record)
def test_recalculate_references_after_literature_record_merge( inspire_app, clean_celery_session): literature_data = faker.record("lit", with_control_number=True) literature = InspireRecord.create(literature_data) literature_record_reference = literature["self"]["$ref"] seminar_data = faker.record("sem", with_control_number=True) seminar_data.update({ "literature_records": [{ "record": { "$ref": literature_record_reference } }] }) seminar = InspireRecord.create(seminar_data) literature_data_with_references = faker.record("lit", with_control_number=True) literature_data_with_references.update( {"references": [{ "record": { "$ref": literature_record_reference } }]}) literature_record_with_references = InspireRecord.create( literature_data_with_references) db.session.commit() def assert_all_records_in_es(): literature_record_from_es = InspireSearch.get_record_data_from_es( literature) seminar_record_from_es = InspireSearch.get_record_data_from_es(seminar) assert all([literature_record_from_es, seminar_record_from_es]) retry_until_pass(assert_all_records_in_es, retry_interval=3) merged_literature_data = faker.record("lit", with_control_number=True) merged_literature_data.update( {"deleted_records": [{ "$ref": literature_record_reference }]}) merged_literature_record = InspireRecord.create(merged_literature_data) db.session.commit() def assert_recalculate_references_task(): seminar_record_from_es = InspireSearch.get_record_data_from_es(seminar) literature_record_from_es = InspireSearch.get_record_data_from_es( literature_record_with_references) assert (seminar_record_from_es["literature_records"][0]["record"] ["$ref"] == merged_literature_record["self"]["$ref"]) assert (literature_record_from_es["references"][0]["record"]["$ref"] == merged_literature_record["self"]["$ref"]) retry_until_pass(assert_recalculate_references_task, retry_interval=3)
def test_continuous_migration_with_invalid_control_number( inspire_app, celery_app_with_context, celery_session_worker, redis): raw_record_citer = ( b"<record>" b' <controlfield tag="001">666</controlfield>' b' <datafield tag="245" ind1=" " ind2=" ">' b' <subfield code="a">This is a citer record</subfield>' b" </datafield>" b' <datafield tag="980" ind1=" " ind2=" ">' b' <subfield code="a">HEP</subfield>' b" </datafield>" b' <datafield tag="999" ind1="C" ind2="5">' b' <subfield code="0">667</subfield>' b' <subfield code="h">Achasov, M.N.</subfield>' b' <subfield code="k">snd-2018</subfield>' b' <subfield code="m">(SND Collaboration)</subfield>' b' <subfield code="o">2</subfield>' b' <subfield code="s">Phys.Rev.,D97,012008</subfield>' b' <subfield code="x">' b" [2] M. N. Achasov (SND Collaboration), Phys. Rev. D 97, 012008 (2018)." b" </subfield>" b' <subfield code="y">2018</subfield>' b' <subfield code="z">0</subfield>' b' <subfield code="z">1</subfield>' b" </datafield>" b"</record>") citer_control_number = 666 raw_record_cited = ( b"<record>" b' <controlfield tag="001">this is not a control number</controlfield>' b' <datafield tag="245" ind1=" " ind2=" ">' b' <subfield code="a">This is a citing record</subfield>' b" </datafield>" b' <datafield tag="980" ind1=" " ind2=" ">' b' <subfield code="a">HEP</subfield>' b" </datafield>" b"</record>") redis.rpush("legacy_records", zlib.compress(raw_record_citer)) redis.rpush("legacy_records", zlib.compress(raw_record_cited)) redis.rpush("legacy_records", b"END") assert redis.llen("legacy_records") == 3 with pytest.raises(ValueError): continuous_migration() def assert_continuous_migration(): assert redis.llen("legacy_records") == 2 retry_until_pass(assert_continuous_migration)
def test_aut_record_appear_in_es_when_created(inspire_app, clean_celery_session): data = faker.record("aut") record = AuthorsRecord.create(data) db.session.commit() expected_control_number = record["control_number"] def assert_record(): current_search.flush_and_refresh("records-authors") record_from_es = AuthorsSearch().get_record_data_from_es(record) assert expected_control_number == record_from_es["control_number"] retry_until_pass(assert_record)
def test_indexer_deletes_record_from_es(inspire_app, datadir): def assert_record_is_deleted_from_es(): current_search.flush_and_refresh("records-authors") expected_records_count = 0 record_lit_es = AuthorsSearch().get_record(str(record.id)).execute().hits assert expected_records_count == len(record_lit_es) record = AuthorsRecord.create(faker.record("aut")) db.session.commit() record.delete() db.session.commit() retry_until_pass(assert_record_is_deleted_from_es)
def test_indexer_updates_conference_papers_when_name_changes( inspire_app, clean_celery_session): conference_data = faker.record( "con", data={"titles": [{ "title": "Initial Title" }]}) conference = ConferencesRecord.create(conference_data) db.session.commit() current_search.flush_and_refresh("records-conferences") conference_id = conference["control_number"] conference_paper_data = faker.record( "lit", data={ "document_type": ["conference paper"], "publication_info": [{ "conference_record": { "$ref": f"https://labs.inspirehep.net/api/conferences/{conference_id}" } }], }, ) LiteratureRecord.create(conference_paper_data) db.session.commit() def assert_literature_has_correct_conference_title(): current_search.flush_and_refresh("*") result = es_search("records-hep") total = get_value(result, "hits.total.value") assert total == 1 literature = get_value(result, "hits.hits[0]._source") ui_display = orjson.loads(literature["_ui_display"]) assert conference["titles"] == get_value(ui_display, "conference_info[0].titles") retry_until_pass(assert_literature_has_correct_conference_title, timeout=45) data = dict(conference) data["titles"] = [{"title": "Updated Title"}] conference.update(data) db.session.commit() retry_until_pass(assert_literature_has_correct_conference_title, timeout=45)
def test_disambiguation_doesnt_assign_bai_when_already_in_author( inspire_app, clean_celery_session, enable_disambiguation): author_data = faker.record("aut", with_control_number=True) author_data.update({ "name": { "value": "Brian Gross" }, "ids": [{ "schema": "INSPIRE BAI", "value": "J.M.Maldacena.1" }], "email_addresses": [{ "current": True, "value": "*****@*****.**" }], }) author_record = InspireRecord.create(author_data) db.session.commit() def assert_authors_records_exist_in_es(): author_record_from_es = InspireSearch.get_record_data_from_es( author_record) assert author_record_from_es retry_until_pass(assert_authors_records_exist_in_es) literature_data = faker.record("lit", with_control_number=True) literature_data.update({ "authors": [{ "full_name": "Brian Gross", "ids": [{ "schema": "INSPIRE BAI", "value": "A.Test.1" }], "emails": ["*****@*****.**"], }] }) literature_record = LiteratureRecord.create(literature_data) db.session.commit() def assert_disambiguation_task(): literature_record_from_es = InspireSearch.get_record_data_from_es( literature_record) assert { "schema": "INSPIRE BAI", "value": "J.M.Maldacena.1", } in literature_record_from_es["authors"][0]["ids"] retry_until_pass(assert_disambiguation_task, retry_interval=2)
def test_indexer_deletes_record_from_es(inspire_app, datadir, clean_celery_session): def assert_record_is_deleted_from_es(): current_search.flush_and_refresh("records-hep") expected_records_count = 0 record_lit_es = LiteratureSearch().get_record(str(record.id)).execute().hits assert expected_records_count == len(record_lit_es) data = orjson.loads((datadir / "1630825.json").read_text()) record = LiteratureRecord.create(data) db.session.commit() record.delete() db.session.commit() retry_until_pass(assert_record_is_deleted_from_es)
def test_many_records_in_one_commit(inspire_app, clean_celery_session): record_recids = set() for x in range(4): data = faker.record("lit") record = LiteratureRecord.create(data) record_recids.add(record["control_number"]) db.session.commit() current_search.flush_and_refresh("records-hep") def assert_all_records_in_es(): result = LiteratureSearch().query_from_iq("").execute().hits result_recids = {hit.control_number for hit in result} assert len(result_recids & record_recids) == 4 retry_until_pass(assert_all_records_in_es, retry_interval=5)
def test_indexer_deletes_record_from_es(inspire_app, datadir): def assert_record_is_deleted_from_es(): current_search.flush_and_refresh("records-journals") expected_records_count = 0 record_lit_es = JournalsSearch().get_record(str(record.id)).execute().hits assert expected_records_count == len(record_lit_es) data = orjson.loads((datadir / "1213103.json").read_text()) record = JournalsRecord.create(data) db.session.commit() record.delete() db.session.commit() retry_until_pass(assert_record_is_deleted_from_es)
def test_process_references_in_records_process_author_records( mock_batch_index, inspire_app, clean_celery_session): author_record = AuthorsRecord.create(faker.record("aut")) lit_record = LiteratureRecord.create( faker.record( "lit", data={ "authors": [{ "full_name": author_record["name"]["value"], "record": author_record["self"], }] }, )) lit_record_2 = LiteratureRecord.create( faker.record( "lit", data={ "authors": [{ "full_name": author_record["name"]["value"], "record": author_record["self"], }] }, )) db.session.commit() def assert_records_in_es(): lit_record_from_es = InspireSearch.get_record_data_from_es(lit_record) lit_record_from_es_2 = InspireSearch.get_record_data_from_es( lit_record_2) aut_record_from_es = InspireSearch.get_record_data_from_es( author_record) assert lit_record_from_es and aut_record_from_es and lit_record_from_es_2 retry_until_pass(assert_records_in_es, retry_interval=5) models_committed.disconnect(index_after_commit) author_record["name"]["value"] = "Another Name" author_record.update(dict(author_record)) db.session.commit() # reconnect signal before we call process_references_in_records models_committed.connect(index_after_commit) task = process_references_in_records.delay([author_record.id]) task.get(timeout=5) assert sorted(mock_batch_index.mock_calls[0][1][0]) == sorted( [str(lit_record.id), str(lit_record_2.id)])
def test_match_references(inspire_app, cli, clean_celery_session): cited_data = { "document_type": ["article"], "dois": [{ "value": "10.1371/journal.pone.0188398" }], } cited_record = create_record_async("lit", data=cited_data) cited_record.index( delay=False) # reference-matcher requires cited record to be indexed citer_data = { "references": [{ "reference": { "dois": ["10.1371/journal.pone.0188398"] } }] } citer_record_1 = create_record_async("lit", data=citer_data) citer_record_2 = create_record_async("lit", data=citer_data) citer_record_3 = create_record_async("lit", data=citer_data) citer_ids = [citer_record_1.id, citer_record_2.id, citer_record_3.id] record_data = create_record_async("dat") record_data_uuids = record_data.id def assert_all_records_are_indexed(): current_search.flush_and_refresh("*") result = es_search("records-hep") uuids = get_value(result, "hits.hits._id") for uuid in citer_ids: assert str(uuid) in uuids result = es_search("records-data") uuids = get_value(result, "hits.hits._id") assert str(record_data_uuids) in uuids retry_until_pass(assert_all_records_are_indexed) result = cli.invoke(["match", "references", "-bs", 2]) assert result.exit_code == 0 for citer_id in citer_ids: updated_citer_record = LiteratureRecord.get_record(citer_id) assert (get_value(updated_citer_record, "references[0].record") == cited_record["self"])
def test_index_record_fulltext_manually(inspire_app, clean_celery_session, override_config, s3, datadir): metadata = {"foo": "bar"} pdf_path = os.path.join(datadir, "2206.04407.pdf") create_s3_bucket(KEY) create_s3_file( current_s3_instance.get_bucket_for_file_key(KEY), KEY, pdf_path, metadata, **{"ContentType": "application/pdf"}, ) with override_config(FEATURE_FLAG_ENABLE_FULLTEXT=True, FEATURE_FLAG_ENABLE_FILES=False): data = faker.record("lit") data.update({ "documents": [{ "source": "arxiv", "fulltext": True, "filename": "new_doc.pdf", "key": KEY, "url": "http://www.africau.edu/images/default/sample.pdf", }] }) rec = LiteratureRecord.create(data) models_committed.disconnect(index_after_commit) db.session.commit() models_committed.connect(index_after_commit) assert_record_not_in_es(rec["control_number"]) rec.index_fulltext() def assert_record_in_es(): current_search.flush_and_refresh("*") record_lit_es = (LiteratureSearch().get_record(str( rec.id)).execute().hits.hits[0]) document = record_lit_es._source["documents"][0] assert "attachment" in document assert "text" not in document # pipeline should remove it retry_until_pass(assert_record_in_es, timeout=90, retry_interval=5)
def test_fix_entries_by_update_date(inspire_app, clean_celery_session): literature_data = faker.record("lit", with_control_number=True) literature_data.update({ "authors": [{ "full_name": "George, Smith", "ids": [{ "value": "Smith.G.1", "schema": "INSPIRE BAI" }], }] }) record_1 = InspireRecord.create(literature_data) literature_data_2 = faker.record("lit", with_control_number=True) literature_data_2.update({ "authors": [{ "full_name": "Xiu, Li", "ids": [{ "value": "X.Liu.1", "schema": "INSPIRE BAI" }], }] }) record_2 = InspireRecord.create(literature_data_2) db.session.add( RecordsAuthors( author_id="A.Test.1", id_type="INSPIRE BAI", record_id=record_1.id, )) db.session.add( RecordsAuthors( author_id="A.Test.2", id_type="INSPIRE BAI", record_id=record_2.id, )) db.session.commit() def assert_all_entries_in_db(): assert len(RecordsAuthors.query.all()) == 4 retry_until_pass(assert_all_entries_in_db) LiteratureRecord.fix_entries_by_update_date() def assert_all_entries_in_db(): assert len(RecordsAuthors.query.all()) == 2 retry_until_pass(assert_all_entries_in_db, retry_interval=3)
def test_aut_record_update_when_changed(inspire_app, clean_celery_session): data = faker.record("aut") rec = AuthorsRecord.create(data) db.session.commit() expected_death_date = "1900-01-01" data["death_date"] = expected_death_date data["control_number"] = rec["control_number"] rec.update(data) db.session.commit() def assert_record(): current_search.flush_and_refresh("records-authors") record_from_es = AuthorsSearch().get_record_data_from_es(rec) assert expected_death_date == record_from_es["death_date"] retry_until_pass(assert_record)
def test_recalculate_references_recalculates_more_than_10_references( inspire_app, clean_celery_session): journal_data = faker.record("jou", with_control_number=True) journal = InspireRecord.create(journal_data) journal_record_reference = journal["self"]["$ref"] literature_data = faker.record("lit") literature_data.update({ "publication_info": [{ "journal_record": { "$ref": journal_record_reference } }] }) for i in range(11): InspireRecord.create(literature_data) db.session.commit() def assert_all_records_in_es(): literature_records_from_es = list(LiteratureSearch().query_from_iq( query_string= f"publication_info.journal_record.$ref: {journal_record_reference}" ).scan()) journal_record_from_es = InspireSearch.get_record_data_from_es(journal) assert len(literature_records_from_es) == 11 and journal_record_from_es retry_until_pass(assert_all_records_in_es, retry_interval=5) merged_journal_data = faker.record("jou", with_control_number=True) merged_journal_data.update( {"deleted_records": [{ "$ref": journal_record_reference }]}) merged_journal_record = InspireRecord.create(merged_journal_data) db.session.commit() def assert_recalculate_references_task(): literature_records_from_es = list(LiteratureSearch().query_from_iq( query_string= f'publication_info.journal_record.$ref: {merged_journal_record["self"]["$ref"]}' ).scan()) assert len(literature_records_from_es) == 11 retry_until_pass(assert_recalculate_references_task, retry_interval=3)
def test_indexer_updates_authors_papers_when_name_changes( inspire_app, clean_celery_session ): author_data = faker.record("aut") author = AuthorsRecord.create(author_data) db.session.commit() current_search.flush_and_refresh("records-authors") author_cn = author["control_number"] lit_data = { "authors": [ { "record": { "$ref": f"https://labs.inspirehep.net/api/authors/{author_cn}" }, "full_name": author["name"]["value"], } ] } lit_data = faker.record("lit", data=lit_data) lit_1 = LiteratureRecord.create(lit_data) db.session.commit() expected_facet_author_name = f"{author['control_number']}_{author['name']['value']}" def assert_record(): current_search.flush_and_refresh("records-hep") record_from_es = LiteratureSearch().get_record_data_from_es(lit_1) assert expected_facet_author_name == record_from_es["facet_author_name"][0] retry_until_pass(assert_record) data = dict(author) data["name"]["value"] = "Some other name" author.update(data) db.session.commit() expected_facet_author_name = f"{author['control_number']}_Some other name" def assert_record(): current_search.flush_and_refresh("records-hep") record_from_es = LiteratureSearch().get_record_data_from_es(lit_1) assert expected_facet_author_name == record_from_es["facet_author_name"][0] retry_until_pass(assert_record)
def test_lit_record_update_when_changed(inspire_app, clean_celery_session): data = faker.record("lit") data["titles"] = [{"title": "Original title"}] rec = LiteratureRecord.create(data) db.session.commit() expected_title = "Updated title" data["titles"][0]["title"] = expected_title data["control_number"] = rec["control_number"] rec.update(data) db.session.commit() def assert_record(): current_search.flush_and_refresh("records-hep") record_from_es = LiteratureSearch().get_record_data_from_es(rec) assert expected_title == record_from_es["titles"][0]["title"] retry_until_pass(assert_record)