def test_authors_create_with_invalid_data(inspire_app): data = faker.record("aut", with_control_number=True) data["invalid_key"] = "should throw an error" record_control_number = str(data["control_number"]) with pytest.raises(ValidationError): AuthorsRecord.create(data) record_pid = PersistentIdentifier.query.filter_by( pid_value=record_control_number).one_or_none() assert record_pid is None
def test_authors_create_with_existing_control_number(inspire_app): data = faker.record("aut", with_control_number=True) existing_object_uuid = uuid.uuid4() create_pidstore( object_uuid=existing_object_uuid, pid_type="aut", pid_value=data["control_number"], ) with pytest.raises(PIDAlreadyExists): AuthorsRecord.create(data)
def test_aut_record_appear_in_es_when_created(inspire_app, celery_app_with_context, celery_session_worker): data = faker.record("aut") rec = AuthorsRecord.create(data) db.session.commit() expected_id = str(rec.id) steps = [ { "step": current_search.flush_and_refresh, "args": ["records-authors"] }, { "step": es_search, "args": ["records-authors"], "expected_result": { "expected_key": "hits.total.value", "expected_result": 1, }, }, { "step": es_search, "args": ["records-authors"], "expected_result": { "expected_key": "hits.hits[0]._id", "expected_result": expected_id, }, }, ] retry_until_matched(steps)
def test_aut_record_removed_form_es_when_deleted(inspire_app, clean_celery_session): data = faker.record("aut") rec = AuthorsRecord.create(data) db.session.commit() def assert_record(): current_search.flush_and_refresh("records-authors") result = es_search("records-authors") result_total = get_value(result, "hits.total.value") expected_total = 1 assert expected_total == result_total retry_until_pass(assert_record) rec.delete() db.session.commit() def assert_record(): current_search.flush_and_refresh("records-authors") result = es_search("records-authors") result_total = get_value(result, "hits.total.value") expected_total = 0 assert expected_total == result_total retry_until_pass(assert_record)
def test_aut_record_update_when_changed(app, celery_app_with_context, celery_session_worker, retry_until_matched): data = faker.record("aut") rec = AuthorsRecord.create(data) db.session.commit() expected_death_date = "1900-01-01" data["death_date"] = expected_death_date rec.update(data) db.session.commit() steps = [ { "step": es.indices.refresh, "args": ["records-authors"] }, { "step": es.search, "args": ["records-authors"], "expected_result": { "expected_key": "hits.total", "expected_result": 1 }, }, ] resp = retry_until_matched(steps)["hits"]["hits"] assert resp[0]["_source"]["death_date"] == expected_death_date
def test_literature_create_or_update_with_existing_record(inspire_app): data = faker.record("aut", with_control_number=True) record = AuthorsRecord.create(data) assert data["control_number"] == record["control_number"] data_update = { "name": { "name_variants": ["UPDATED"], "preferred_name": "UPDATED", "value": "UPDATED", } } data.update(data_update) record_updated = AuthorsRecord.create_or_update(data) control_number = str(record_updated["control_number"]) assert record["control_number"] == record_updated["control_number"] record_updated_db = RecordMetadata.query.filter_by( id=record_updated.id).one() assert data == record_updated_db.json record_updated_pid = PersistentIdentifier.query.filter_by( pid_type="aut", pid_value=str(control_number)).one() assert record_updated.model.id == record_updated_pid.object_uuid assert control_number == record_updated_pid.pid_value
def test_aut_record_update_when_changed(inspire_app, celery_app_with_context, celery_session_worker): data = faker.record("aut") rec = AuthorsRecord.create(data) db.session.commit() expected_death_date = "1900-01-01" data["death_date"] = expected_death_date data["control_number"] = rec["control_number"] rec.update(data) db.session.commit() steps = [ { "step": current_search.flush_and_refresh, "args": ["records-authors"] }, { "step": es_search, "args": ["records-authors"], "expected_result": { "expected_key": "hits.total.value", "expected_result": 1, }, }, { "step": es_search, "args": ["records-authors"], "expected_result": { "expected_key": "hits.hits[0]._source.death_date", "expected_result": expected_death_date, }, }, ] retry_until_matched(steps)["hits"]["hits"]
def test_session_still_works_after_integrity_error_on_bai_provider( inspire_app): data = faker.record("aut") with override_config(FEATURE_FLAG_ENABLE_BAI_PROVIDER=True): AuthorsRecord.create(data=data) db.session.commit() with mock.patch( "inspirehep.pidstore.providers.bai.InspireBAIProvider.next_bai_number" ) as next_bai_mock: next_bai_mock.side_effect = [1, 2] AuthorsRecord.create(data=data) assert next_bai_mock.call_count == 2 db.session.commit() expected_pid_count = 2 assert (PersistentIdentifier.query.filter_by( pid_type="bai").count() == expected_pid_count)
def test_create_record_from_db_depending_on_its_pid_type(base_app, db, es): data = faker.record("aut") record = InspireRecord.create(data) assert type(record) == AuthorsRecord assert record.pid_type == "aut" record = AuthorsRecord.create(data) assert type(record) == AuthorsRecord assert record.pid_type == "aut"
def test_create_record_from_db_depending_on_its_pid_type(inspire_app): data = faker.record("aut") record = InspireRecord.create(data) assert isinstance(record, AuthorsRecord) assert record.pid_type == "aut" record = AuthorsRecord.create(data) assert isinstance(record, AuthorsRecord) assert record.pid_type == "aut"
def test_indexer_updates_advisor_when_student_name_changes( inspire_app, clean_celery_session ): advisor_data = faker.record("aut") advisor = AuthorsRecord.create(advisor_data) db.session.commit() current_search.flush_and_refresh("records-authors") student_data = faker.record( "aut", data={ "advisors": [ { "name": advisor["name"]["value"], "record": advisor["self"], "degree_type": "phd", } ] }, ) student = AuthorsRecord.create(student_data) db.session.commit() def assert_record(): current_search.flush_and_refresh("records-authors") records_from_es = AuthorsSearch().query_from_iq("").execute() assert len(records_from_es.hits) == 2 retry_until_pass(assert_record, 3) student["name"]["preferred_name"] = "Test Student" student.update(dict(student)) db.session.commit() expected_student_name = "Test Student" def assert_record(): current_search.flush_and_refresh("records-authors") record_from_es = AuthorsSearch().get_record_data_from_es(advisor) assert record_from_es["students"][0]["name"] == expected_student_name retry_until_pass(assert_record, retry_interval=3)
def test_regression_get_linked_author_records_uuids_if_author_changed_name_does_not_return_none_for_author_which_name_didnt_change( app, clean_celery_session ): author_data = faker.record("aut") author = AuthorsRecord.create(author_data) db.session.commit() data = dict(author) data["birth_date"] = "1950-01-01" author.update(data) db.session.commit() new_author = AuthorsRecord.get_record_by_pid_value(author["control_number"]) assert set() == new_author.get_linked_author_records_uuids_if_author_changed_name()
def test_get_linked_advisors_when_name_changes(inspire_app): data_advisor = faker.record("aut") advisor = AuthorsRecord.create(data_advisor) db.session.commit() assert not advisor.get_linked_advisors_when_name_changes() student_data = faker.record( "aut", data={ "advisors": [{ "name": advisor["name"]["value"], "record": advisor["self"], "degree_type": "phd", }] }, ) student = AuthorsRecord.create(student_data) db.session.commit() assert student.get_linked_advisors_when_name_changes() == set( [str(advisor.id)]) student["ids"] = [{"value": "0000-0002-1558-1309", "schema": "ORCID"}] student.update(dict(student)) db.session.commit() assert not student.get_linked_advisors_when_name_changes() student["name"]["preferred_name"] = "Test Author" student.update(dict(student)) db.session.commit() assert student.get_linked_advisors_when_name_changes() == set( [str(advisor.id)]) del student["name"]["preferred_name"] student.update(dict(student)) db.session.commit() assert student.get_linked_advisors_when_name_changes() == set( [str(advisor.id)])
def test_aut_record_appear_in_es_when_created(inspire_app, clean_celery_session): data = faker.record("aut") record = AuthorsRecord.create(data) db.session.commit() expected_control_number = record["control_number"] def assert_record(): current_search.flush_and_refresh("records-authors") record_from_es = AuthorsSearch().get_record_data_from_es(record) assert expected_control_number == record_from_es["control_number"] retry_until_pass(assert_record)
def test_indexer_deletes_record_from_es(inspire_app, datadir): def assert_record_is_deleted_from_es(): current_search.flush_and_refresh("records-authors") expected_records_count = 0 record_lit_es = AuthorsSearch().get_record(str(record.id)).execute().hits assert expected_records_count == len(record_lit_es) record = AuthorsRecord.create(faker.record("aut")) db.session.commit() record.delete() db.session.commit() retry_until_pass(assert_record_is_deleted_from_es)
def test_authors_create(inspire_app): data = faker.record("aut") record = AuthorsRecord.create(data) control_number = str(record["control_number"]) record_db = RecordMetadata.query.filter_by(id=record.id).one() assert record == record_db.json record_pid = PersistentIdentifier.query.filter_by( pid_type="aut", pid_value=str(control_number)).one() assert record.model.id == record_pid.object_uuid assert control_number == record_pid.pid_value
def test_process_references_in_records_process_author_records( mock_batch_index, inspire_app, clean_celery_session): author_record = AuthorsRecord.create(faker.record("aut")) lit_record = LiteratureRecord.create( faker.record( "lit", data={ "authors": [{ "full_name": author_record["name"]["value"], "record": author_record["self"], }] }, )) lit_record_2 = LiteratureRecord.create( faker.record( "lit", data={ "authors": [{ "full_name": author_record["name"]["value"], "record": author_record["self"], }] }, )) db.session.commit() def assert_records_in_es(): lit_record_from_es = InspireSearch.get_record_data_from_es(lit_record) lit_record_from_es_2 = InspireSearch.get_record_data_from_es( lit_record_2) aut_record_from_es = InspireSearch.get_record_data_from_es( author_record) assert lit_record_from_es and aut_record_from_es and lit_record_from_es_2 retry_until_pass(assert_records_in_es, retry_interval=5) models_committed.disconnect(index_after_commit) author_record["name"]["value"] = "Another Name" author_record.update(dict(author_record)) db.session.commit() # reconnect signal before we call process_references_in_records models_committed.connect(index_after_commit) task = process_references_in_records.delay([author_record.id]) task.get(timeout=5) assert sorted(mock_batch_index.mock_calls[0][1][0]) == sorted( [str(lit_record.id), str(lit_record_2.id)])
def test_aut_record_update_when_changed(inspire_app, clean_celery_session): data = faker.record("aut") rec = AuthorsRecord.create(data) db.session.commit() expected_death_date = "1900-01-01" data["death_date"] = expected_death_date data["control_number"] = rec["control_number"] rec.update(data) db.session.commit() def assert_record(): current_search.flush_and_refresh("records-authors") record_from_es = AuthorsSearch().get_record_data_from_es(rec) assert expected_death_date == record_from_es["death_date"] retry_until_pass(assert_record)
def test_indexer_updates_authors_papers_when_name_changes( inspire_app, clean_celery_session ): author_data = faker.record("aut") author = AuthorsRecord.create(author_data) db.session.commit() current_search.flush_and_refresh("records-authors") author_cn = author["control_number"] lit_data = { "authors": [ { "record": { "$ref": f"https://labs.inspirehep.net/api/authors/{author_cn}" }, "full_name": author["name"]["value"], } ] } lit_data = faker.record("lit", data=lit_data) lit_1 = LiteratureRecord.create(lit_data) db.session.commit() expected_facet_author_name = f"{author['control_number']}_{author['name']['value']}" def assert_record(): current_search.flush_and_refresh("records-hep") record_from_es = LiteratureSearch().get_record_data_from_es(lit_1) assert expected_facet_author_name == record_from_es["facet_author_name"][0] retry_until_pass(assert_record) data = dict(author) data["name"]["value"] = "Some other name" author.update(data) db.session.commit() expected_facet_author_name = f"{author['control_number']}_Some other name" def assert_record(): current_search.flush_and_refresh("records-hep") record_from_es = LiteratureSearch().get_record_data_from_es(lit_1) assert expected_facet_author_name == record_from_es["facet_author_name"][0] retry_until_pass(assert_record)
def test_aut_record_removed_form_es_when_deleted(app, celery_app_with_context, celery_session_worker, retry_until_matched): data = faker.record("aut") rec = AuthorsRecord.create(data) db.session.commit() steps = [ { "step": es.indices.refresh, "args": ["records-authors"] }, { "step": es.search, "args": ["records-authors"], "expected_result": { "expected_key": "hits.total", "expected_result": 1 }, }, ] retry_until_matched(steps) rec.delete() db.session.commit() steps = [ { "step": es.indices.refresh, "args": ["records-authors"] }, { "step": es.search, "args": ["records-authors"], "expected_result": { "expected_key": "hits.total", "expected_result": 0 }, }, ] retry_until_matched(steps)
def test_aut_record_appear_in_es_when_created(app, celery_app_with_context, celery_session_worker, retry_until_matched): data = faker.record("aut") rec = AuthorsRecord.create(data) db.session.commit() steps = [ { "step": es.indices.refresh, "args": ["records-authors"] }, { "step": es.search, "args": ["records-authors"], "expected_result": { "expected_key": "hits.total", "expected_result": 1 }, }, ] response = retry_until_matched(steps) assert response["hits"]["hits"][0]["_id"] == str(rec.id)
def test_indexer_updates_authors_papers_when_name_changes( inspire_app, celery_app_with_context, celery_session_worker): author_data = faker.record("aut") author = AuthorsRecord.create(author_data) db.session.commit() current_search.flush_and_refresh("records-authors") author_cn = author["control_number"] lit_data = { "authors": [{ "record": { "$ref": f"https://labs.inspirehep.net/api/authors/{author_cn}" }, "full_name": author["name"]["value"], }] } lit_data = faker.record("lit", data=lit_data) lit_1 = LiteratureRecord.create(lit_data) db.session.commit() expected_hits = 1 expected_facet_author_name_count = 1 expected_facet_author_name = f"{author['control_number']}_{author['name']['value']}" steps = [ { "step": current_search.flush_and_refresh, "args": ["*"] }, { "step": es_search, "args": ["records-hep"], "expected_result": { "expected_key": "hits.total.value", "expected_result": expected_hits, }, }, { "expected_key": "hits.hits[0]._source.facet_author_name[0]", "expected_result": expected_facet_author_name, }, ] results = retry_until_matched(steps, timeout=45) assert (len(results["hits"]["hits"][0]["_source"]["facet_author_name"]) == expected_facet_author_name_count) data = dict(author) data["name"]["value"] = "Some other name" author.update(data) db.session.commit() expected_facet_author_name = f"{author['control_number']}_Some other name" steps = [ { "step": current_search.flush_and_refresh, "args": ["*"] }, { "step": es_search, "args": ["records-hep"], "expected_result": { "expected_key": "hits.total.value", "expected_result": expected_hits, }, }, { "expected_key": "hits.hits[0]._source.facet_author_name[0]", "expected_result": expected_facet_author_name, }, ] results = retry_until_matched(steps, timeout=45) assert (len(results["hits"]["hits"][0]["_source"]["facet_author_name"]) == expected_facet_author_name_count)
def test_if_bai_is_processed_on_authors_record_creation(inspire_app): data = faker.record("aut", other_pids=["bai"]) bai = data["ids"][0]["value"] rec = AuthorsRecord.create(data) assert (PersistentIdentifier.query.filter_by(pid_type="bai", pid_value=bai).count() == 1)
def test_process_references_in_records_process_self_citations( mock_batch_index, inspire_app, clean_celery_session, enable_self_citations): author_record = AuthorsRecord.create( faker.record( "aut", data={ "name": { "value": "'t Hooft, Gerardus", "name_variants": ["'t Hooft, Gerard", "Hooft, Gerard T."], "preferred_name": "Gerardus 't Hooft", }, "ids": [ { "value": "INSPIRE-00060582", "schema": "INSPIRE ID" }, { "value": "G.tHooft.1", "schema": "INSPIRE BAI" }, ], }, )) author_record_2 = AuthorsRecord.create( faker.record( "aut", data={ "name": { "value": "'t Hooft, Gerardus Marcus", "preferred_name": "Gerardus Marcus 't Hooft", }, "ids": [ { "value": "INSPIRE-00060583", "schema": "INSPIRE ID" }, { "value": "G.tHooft.2", "schema": "INSPIRE BAI" }, ], }, )) lit_record = LiteratureRecord.create( faker.record( "lit", data={ "authors": [{ "ids": [ { "value": "INSPIRE-00060582", "schema": "INSPIRE ID" }, { "value": "G.tHooft.1", "schema": "INSPIRE BAI" }, ], "full_name": author_record["name"]["value"], "record": author_record["self"], }] }, )) lit_record_2 = LiteratureRecord.create( faker.record( "lit", literature_citations=[lit_record["control_number"]], data={ "authors": [{ "ids": [ { "value": "INSPIRE-00060583", "schema": "INSPIRE ID" }, { "value": "G.tHooft.2", "schema": "INSPIRE BAI" }, ], "full_name": author_record_2["name"]["value"], "record": author_record_2["self"], }] }, )) db.session.commit() def assert_records_in_es(): lit_record_from_es = InspireSearch.get_record_data_from_es(lit_record) lit_record_from_es_2 = InspireSearch.get_record_data_from_es( lit_record_2) aut_record_from_es = InspireSearch.get_record_data_from_es( author_record) assert lit_record_from_es and aut_record_from_es and lit_record_from_es_2 retry_until_pass(assert_records_in_es, retry_interval=5) models_committed.disconnect(index_after_commit) lit_record["authors"].append({ "ids": [ { "value": "INSPIRE-00060583", "schema": "INSPIRE ID" }, { "value": "G.tHooft.2", "schema": "INSPIRE BAI" }, ], "full_name": author_record_2["name"]["value"], "record": author_record_2["self"], }) lit_record.update(dict(lit_record)) db.session.commit() # reconnect signal before we call process_references_in_records models_committed.connect(index_after_commit) task = process_references_in_records.delay([lit_record.id]) task.get(timeout=5) assert sorted(mock_batch_index.mock_calls[0][1][0]) == sorted( [lit_record_2.id])
def test_disambiguation_races_assign(override_config, inspire_app, clean_celery_session, enable_disambiguation): cataloger = create_user(role="cataloger") with override_config(FEATURE_FLAG_ENABLE_BAI_PROVIDER=True, FEATURE_FLAG_ENABLE_BAI_CREATION=True): author_record_data = faker.record("aut") author_record_data.update({ "name": { "value": "Michael F. A'Hearn" }, "ids": [{ "schema": "INSPIRE BAI", "value": "M.F.A.Hearn.1" }], }) author_record = AuthorsRecord.create(author_record_data) lit_data = faker.record("lit") lit_data.update({ "authors": [{ "ids": [{ "value": "M.F.A.Hearn.1", "schema": "INSPIRE BAI" }], "uuid": "ce061c1e-866a-422d-9982-652183bae814", "full_name": "A'Hearn, M.F.", "signature_block": "HARNm", "curated_relation": True, "record": author_record["self"], }] }) lit_record = LiteratureRecord.create(lit_data) db.session.commit() with inspire_app.test_client() as client: login_user_via_session(client, email=cataloger.email) client.post( "/api/assign/author", data=orjson.dumps({ "literature_recids": [lit_record["control_number"]], "from_author_recid": author_record["control_number"], }), content_type="application/json", ) def assert_disambiguation_on_record_update(): literature_record_from_es = InspireSearch.get_record_data_from_es( lit_record) assert (get_values_for_schema( literature_record_from_es["authors"][0]["ids"], "INSPIRE BAI")[0] != "M.F.A.Hearn.1") retry_until_pass(assert_disambiguation_on_record_update, retry_interval=2)