def test_institution_literature_table_is_not_updated_when_feature_flag_is_disabled( update_function_mock, inspire_app ): institution = create_record("ins") institution_control_number = institution["control_number"] ref = f"http://localhost:8000/api/institutions/{institution_control_number}" data = { "authors": [ { "full_name": "John Doe", "affiliations": [{"value": "Institution", "record": {"$ref": ref}}], } ] } record_data = faker.record("lit", data) LiteratureRecord.create(record_data, disable_relations_update=True) update_function_mock.assert_not_called() LiteratureRecord.create(record_data, disable_relations_update=False) update_function_mock.assert_called()
def test_disable_conference_update_feature_flag_disabled( update_function_mock, inspire_app ): conference = create_record("con") conference_control_number = conference["control_number"] ref = f"http://localhost:8000/api/conferences/{conference_control_number}" conference.delete() data = { "publication_info": [{"conference_record": {"$ref": ref}}], "document_type": ["conference paper"], } record_data = faker.record("lit", data) LiteratureRecord.create(record_data, disable_relations_update=True) update_function_mock.assert_not_called() LiteratureRecord.create(record_data, disable_relations_update=False) update_function_mock.assert_called()
def test_api_seminars_schema_doesnt_return_acquisition_source_email(): acquisition_source = { "email": "*****@*****.**", "orcid": "0000-0000-0000-0000", "method": "oai", "source": "submitter", "internal_uid": 00000, } expected = { "orcid": "0000-0000-0000-0000", "method": "oai", "source": "submitter", "internal_uid": 00000, } data = {"acquisition_source": acquisition_source} data_record = faker.record("sem", data=data) result = SeminarsPublicListSchema().dump(data_record).data assert expected == result["acquisition_source"]
def test_record_created_through_api_is_indexed(inspire_app, clean_celery_session): data = faker.record("aut") token = AccessTokenFactory() db.session.commit() headers = {"Authorization": f"Bearer {token.access_token}"} content_type = "application/json" response = inspire_app.test_client().post("/api/authors", json=data, headers=headers, content_type=content_type) assert response.status_code == 201 def assert_record(): current_search.flush_and_refresh("records-authors") result = es_search("records-authors") result_total = get_value(result, "hits.total.value") expected_total = 1 assert expected_total == result_total retry_until_pass(assert_record)
def test_literature_create_with_dois(base_app, db, es): doi_value = faker.doi() data = {"dois": [{"value": doi_value}]} data = faker.record("lit", data=data) record = LiteratureRecord.create(data) record_db = RecordMetadata.query.filter_by(id=record.id).one() assert record == record_db.json expected_doi_pid_value = doi_value expected_doi_pid_type = "doi" expected_doi_pid_provider = "external" record_pid_doi = PersistentIdentifier.query.filter_by( pid_type="doi", object_uuid=record.id ).one() assert record.model.id == record_pid_doi.object_uuid assert expected_doi_pid_value == record_pid_doi.pid_value assert expected_doi_pid_type == record_pid_doi.pid_type assert expected_doi_pid_provider == record_pid_doi.pid_provider
def test_populate_title_suggest_with_all_inputs(): data = { "$schema": "http://localhost:5000/schemas/records/journals.json", "journal_title": { "title": "The Journal of High Energy Physics (JHEP)" }, "short_title": "JHEP", "title_variants": ["JOURNAL OF HIGH ENERGY PHYSICS"], } record = JournalsRecord(faker.record("jou", data)) marshmallow_schema = JournalsElasticSearchSchema() result = marshmallow_schema.dump(record).data["title_suggest"] expected = { "input": [ "The Journal of High Energy Physics (JHEP)", "JHEP", "JOURNAL OF HIGH ENERGY PHYSICS", ] } assert expected == result
def test_literature_citation_annual_summary_for_many_records(inspire_app): literature1 = create_record("lit", faker.record("lit")) create_record( "lit", faker.record( "lit", literature_citations=[literature1["control_number"]], data={"preprint_date": "2010-01-01"}, ), ) create_record( "lit", faker.record( "lit", literature_citations=[literature1["control_number"]], data={"preprint_date": "2013-01-01"}, ), ) literature2 = create_record("lit", faker.record("lit")) create_record( "lit", faker.record( "lit", literature_citations=[literature2["control_number"]], data={"preprint_date": "2012-01-01"}, ), ) create_record( "lit", faker.record( "lit", literature_citations=[literature2["control_number"]], data={"preprint_date": "2013-01-01"}, ), ) literature1.index(delay=False) literature2.index(delay=False) request_param = {"facet_name": "citations-by-year"} current_search.flush_and_refresh("records-hep") with inspire_app.test_client() as client: response = client.get( f"/literature/facets/?{urlencode(request_param)}") expected_response = {"value": {"2013": 2, "2012": 1, "2010": 1}} assert response.json["aggregations"][ "citations_by_year"] == expected_response
def test_disambiguation_on_record_update_unambiguous_match( inspire_app, clean_celery_session, enable_disambiguation): literature_data = faker.record("lit", with_control_number=True) literature_data.update({ "authors": [{ "full_name": "Kowalczyk, Elisabeth", "ids": [{ "schema": "INSPIRE BAI", "value": "E.Kowalczyk.1" }], }] }) literature_record = LiteratureRecord.create(data=literature_data) db.session.commit() def assert_first_disambiguation_no_match(): literature_record_from_es = InspireSearch.get_record_data_from_es( literature_record) assert get_values_for_schema( literature_record_from_es["authors"][0]["ids"], "INSPIRE BAI") retry_until_pass(assert_first_disambiguation_no_match, retry_interval=2) old_bai = get_values_for_schema(literature_record["authors"][0]["ids"], "INSPIRE BAI")[0] db.session.expire_all() lit_record = InspireRecord.get_record(literature_record.id) lit_record["authors"][0]["emails"] = ["test.test@com"] lit_record.update(dict(lit_record)) db.session.commit() def assert_disambiguation_on_record_update(): literature_record_from_es = InspireSearch.get_record_data_from_es( literature_record) assert (get_values_for_schema( literature_record_from_es["authors"][0]["ids"], "INSPIRE BAI")[0] == old_bai) retry_until_pass(assert_disambiguation_on_record_update, retry_interval=2)
def test_literature_citation_annual_summary_for_many_records( api_client, db, es_clear, create_record ): literature1 = create_record("lit", faker.record("lit")) create_record( "lit", faker.record( "lit", literature_citations=[literature1["control_number"]], data={"preprint_date": "2010-01-01"}, ), ) create_record( "lit", faker.record( "lit", literature_citations=[literature1["control_number"]], data={"preprint_date": "2013-01-01"}, ), ) literature2 = create_record("lit", faker.record("lit")) create_record( "lit", faker.record( "lit", literature_citations=[literature2["control_number"]], data={"preprint_date": "2012-01-01"}, ), ) create_record( "lit", faker.record( "lit", literature_citations=[literature2["control_number"]], data={"preprint_date": "2013-01-01"}, ), ) literature1._index() literature2._index() request_param = {"facet_name": "citations-by-year"} es_clear.indices.refresh("records-hep") response = api_client.get(f"literature/facets/?{urlencode(request_param)}") expected_response = {"value": {"2013": 2, "2012": 1, "2010": 1}} assert response.json["aggregations"]["citations_by_year"] == expected_response
def test_literature_create_with_arxiv_eprints(base_app, db, es): arxiv_value = faker.arxiv() data = {"arxiv_eprints": [{"value": arxiv_value}]} data = faker.record("lit", data=data) record = LiteratureRecord.create(data) record_db = RecordMetadata.query.filter_by(id=record.id).one() assert record == record_db.json expected_arxiv_pid_value = arxiv_value expected_arxiv_pid_type = "arxiv" expected_arxiv_pid_provider = "external" record_pid_arxiv = PersistentIdentifier.query.filter_by( pid_type="arxiv", object_uuid=record.id ).one() assert record.model.id == record_pid_arxiv.object_uuid assert expected_arxiv_pid_value == record_pid_arxiv.pid_value assert expected_arxiv_pid_type == record_pid_arxiv.pid_type assert expected_arxiv_pid_provider == record_pid_arxiv.pid_provider
def test_indexer_oai_set_CERN_arxiv_and_CDS(inspire_app): extra_data = { "report_numbers": [{"value": "CERN-2020-001"}], "arxiv_eprints": [{"value": "2009.01484"}], "_export_to": {"CDS": True}, } record_data = faker.record("lit", data=extra_data) record = LiteratureRecord.create(record_data) record.index(delay=False) result_record = LiteratureSearch.get_record_data_from_es(record) expected_id = f"oai:inspirehep.net:{record['control_number']}" expected_updated = "1994-12-19T00:00:00" expected_sets = [ inspire_app.config["OAI_SET_CDS"], inspire_app.config["OAI_SET_CERN_ARXIV"], ] assert expected_id == result_record["_oai"]["id"] assert expected_updated == result_record["_oai"]["updated"] assert expected_sets == result_record["_oai"]["sets"]
def test_indexer_oai_set_CERN_arxiv(inspire_app): extra_data = { "report_numbers": [{ "value": "CERN-2020-001" }], "arxiv_eprints": [{ "value": "2009.01484" }], } record_data = faker.record("lit", data=extra_data) record = LiteratureRecord.create(record_data) record.index(delay=False) result_record = LiteratureSearch.get_record_data_from_es(record) expected_id = f"oai:inspirehep.net:{record['control_number']}" expected_updated = record.updated.strftime(OAI_TIME_FORMAT) expected_sets = [inspire_app.config["OAI_SET_CERN_ARXIV"]] assert expected_id == result_record["_oai"]["id"] assert expected_updated == result_record["_oai"]["updated"] assert expected_sets == result_record["_oai"]["sets"]
def test_populate_affiliation_suggest_from_name_variants_with_umr(): data = { "$schema": "http://localhost:5000/schemas/records/institutions.json", "legacy_ICN": "CERN", "name_variants": [ { "value": "Centre Européen de Recherches Nucléaires" }, { "value": "UMR 2454" }, { "value": "umr 1234" }, { "value": "umr" }, ], } record = InstitutionsRecord(faker.record("ins", data)) schema = InstitutionsElasticSearchSchema() result = schema.dump(record).data["affiliation_suggest"] expected = { "input": [ "CERN", "Centre Européen de Recherches Nucléaires", "UMR 2454", "umr 1234", "umr", "2454", "1234", ] } assert expected == result
def test_should_display_positions_without_positions(): schema = AuthorsDetailSchema() data = { "name": { "value": "Doe, John", "preferred_name": "J Doe" }, "ids": [{ "schema": "INSPIRE BAI", "value": "John.Doe.1" }], } author = faker.record("aut", data=data, with_control_number=True) expected_result = False result = schema.dumps(author).data result_data = json.loads(result) result_should_display_positions = result_data.get( "should_display_positions") assert expected_result == result_should_display_positions
def test_abstract_source_one_missing_source( mockget_linked_records_from_dict_field, mock_referenced_authors ): schema = LiteratureElasticSearchSchema data = { "abstracts": [ { "value": "Imaginem gi converto defectus interdum ad si alterius to." "Qua ego lus cogitare referrem mansurum importat." }, { "source": "arXiv", "value": "Imaginem gi converto defectus interdum ad si alterius to." "Qua ego lus cogitare referrem mansurum importat.", }, ] } record = faker.record("lit", data=data) expected_abstracts = deepcopy(data["abstracts"]) expected_abstracts[1]["abstract_source_suggest"] = {"input": "arXiv"} result = json.loads(schema().dumps(record).data) assert result["abstracts"] == expected_abstracts
def test_dump_experiment(): data = { **DEFAULT_DATA_TO_DUMP, "accelerator_experiments": [{ "legacy_name": "CMS", "record": { "$ref": "http://api/experiments/123" } }], } record = faker.record("lit", data=data) result = Literature().dump(record).data expected = { **DEFAULT_DUMP, "experiment": "CMS", "experiment_record": { "$ref": "http://api/experiments/123" }, } assert result == expected
def test_only_public_and_current_emails(): schema = AuthorsDetailSchema() data = { "email_addresses": [ { "value": "*****@*****.**", "current": True, "hidden": True }, { "value": "*****@*****.**", "current": True, "hidden": False }, { "value": "*****@*****.**", "current": False, "hidden": False }, { "value": "*****@*****.**", "current": False, "hidden": True }, ] } author = faker.record("aut", data=data, with_control_number=True) expected_email_addresses = [{ "value": "*****@*****.**", "current": True, "hidden": False }] result = schema.dumps(author).data result_data = json.loads(result) result_email_addresses = result_data.get("email_addresses") assert expected_email_addresses == result_email_addresses
def test_signature_linked_by_disambiguation_has_correct_facet_author_name( inspire_app, clean_celery_session): data = faker.record("lit") data["authors"] = [{ "full_name": "Doe, John", "uuid": "94fc2b0a-dc17-42c2-bae3-ca0024079e51" }] record = LiteratureRecord.create(data) db.session.commit() clusters = [{ "signatures": [{ "publication_id": record["control_number"], "signature_uuid": "94fc2b0a-dc17-42c2-bae3-ca0024079e51", }], "authors": [], }] disambiguate_signatures(clusters) author_pids = PersistentIdentifier.query.filter_by(pid_type="aut").all() assert len(author_pids) == 1 pid_value = author_pids[0].pid_value author = AuthorsRecord.get_record_by_pid_value(pid_value) author_control_number = author.pop("control_number") expected_facet_author_name = [f"{author_control_number}_John Doe"] expected_record_ref = f"http://localhost:5000/api/authors/{pid_value}" def assert_references(): current_search.flush_and_refresh("records-hep") record_from_es = InspireSearch.get_record_data_from_es(record) assert expected_facet_author_name == record_from_es[ "facet_author_name"] assert expected_record_ref == record_from_es["authors"][0]["record"][ "$ref"] retry_until_pass(assert_references, retry_interval=2)
def test_load_author_advisors(): data = { **DEFAULT_DATA_TO_DUMP, "advisors": [ { "degree_type": "bachelor", "ids": [ {"schema": "DESY", "value": "DESY-55924820881"}, {"schema": "SCOPUS", "value": "7039712595"}, {"schema": "SCOPUS", "value": "8752067273"}, ], "name": "Jane Doe", "record": {"$ref": "http://1js40iZ"}, } ], } record = faker.record("aut", data=data) result = Author().load(record).data expected = { **DEFAULT_DATA_LOAD, "advisors": [ { "curated_relation": False, "degree_type": "bachelor", "ids": [ {"schema": "DESY", "value": "DESY-55924820881"}, {"schema": "SCOPUS", "value": "7039712595"}, {"schema": "SCOPUS", "value": "8752067273"}, ], "name": "Doe, Jane", "record": {"$ref": "http://1js40iZ"}, } ], } assert result == expected
def test_lit_record_removed_form_es_when_deleted(app, celery_app_with_context, celery_session_worker, retry_until_matched): data = faker.record("lit") rec = LiteratureRecord.create(data) db.session.commit() steps = [ { "step": es.indices.refresh, "args": ["records-hep"] }, { "step": es.search, "args": ["records-hep"], "expected_result": { "expected_key": "hits.total", "expected_result": 1 }, }, ] retry_until_matched(steps) rec.delete() db.session.commit() steps = [ { "step": es.indices.refresh, "args": ["records-hep"] }, { "step": es.search, "args": ["records-hep"], "expected_result": { "expected_key": "hits.total", "expected_result": 0 }, }, ] retry_until_matched(steps)
def test_aut_record_removed_form_es_when_deleted(inspire_app, celery_app_with_context, celery_session_worker): data = faker.record("aut") rec = AuthorsRecord.create(data) db.session.commit() steps = [ { "step": current_search.flush_and_refresh, "args": ["records-authors"] }, { "step": es_search, "args": ["records-authors"], "expected_result": { "expected_key": "hits.total.value", "expected_result": 1, }, }, ] retry_until_matched(steps) rec.delete() db.session.commit() steps = [ { "step": current_search.flush_and_refresh, "args": ["records-authors"] }, { "step": es_search, "args": ["records-authors"], "expected_result": { "expected_key": "hits.total.value", "expected_result": 0, }, }, ] retry_until_matched(steps)
def test_experiment_serializer_populates_experiment_suggest( mock_experiment_literature_table, ): schema = ExperimentsElasticSearchSchema() data = { "accelerator": {"value": "ACC"}, "collaboration": {"curated_relation": False, "value": "COLLABORATION"}, "experiment": {"short_name": "EXP SHORT NAME", "value": "Experiment value"}, "institutions": [ { "record": { "$ref": "http://labs.inspirehep.net/api/institutions/902725" }, "value": "INST_VALUE", "curated_relation": True, } ], "legacy_name": "LEGACY-NAME", "long_name": "{Long Name}", "name_variants": ["NAME_V1", "NAME_V2", "NAME_V3"], } expected_experiment_suggest = [ {"input": "ACC", "weight": 1}, {"input": "COLLABORATION", "weight": 1}, {"input": "EXP SHORT NAME", "weight": 1}, {"input": "Experiment value", "weight": 1}, {"input": "INST_VALUE", "weight": 1}, {"input": "{Long Name}", "weight": 1}, {"input": "NAME_V1", "weight": 1}, {"input": "NAME_V2", "weight": 1}, {"input": "NAME_V3", "weight": 1}, {"input": "LEGACY-NAME", "weight": 5}, ] experiment = ExperimentsRecord(faker.record("exp", data)) result = schema.dump(experiment).data["experiment_suggest"] assert result == expected_experiment_suggest
def test_get_citation_annual_summary(inspire_app): literature1 = create_record("lit", faker.record("lit")) create_record( "lit", faker.record( "lit", literature_citations=[literature1["control_number"]], data={"preprint_date": "2010-01-01"}, ), ) create_record( "lit", faker.record( "lit", literature_citations=[literature1["control_number"]], data={"preprint_date": "2013-01-01"}, ), ) literature2 = create_record("lit", faker.record("lit")) create_record( "lit", faker.record( "lit", literature_citations=[literature2["control_number"]], data={"preprint_date": "2012-01-01"}, ), ) create_record( "lit", faker.record( "lit", literature_citations=[literature2["control_number"]], data={"preprint_date": "2013-01-01"}, ), ) results1 = literature1.citations_by_year expected_response1 = [{"year": 2010, "count": 1}, {"year": 2013, "count": 1}] assert results1 == expected_response1 results2 = literature2.citations_by_year expected_response2 = [{"year": 2012, "count": 1}, {"year": 2013, "count": 1}] assert results2 == expected_response2
def test_has_cern_affiliation_with_multiple_authors(): data = { "curated": True, "authors": [ { "full_name": "J. Jones", "affiliations": [{ "value": "CERN" }, { "value": "SLAC" }], }, { "full_name": "F. Castle", "affiliations": [{ "value": "Whatever" }], }, ], } record = faker.record("lit", data) assert has_cern_affiliation(record)
def test_aut_record_appear_in_es_when_created(app, celery_app_with_context, celery_session_worker, retry_until_matched): data = faker.record("aut") rec = AuthorsRecord.create(data) db.session.commit() steps = [ { "step": es.indices.refresh, "args": ["records-authors"] }, { "step": es.search, "args": ["records-authors"], "expected_result": { "expected_key": "hits.total", "expected_result": 1 }, }, ] response = retry_until_matched(steps) assert response["hits"]["hits"][0]["_id"] == str(rec.id)
def test_dump_for_es(base_app, db, es): additional_fields = { "preprint_date": "2016-01-01", "publication_info": [{"year": 2015}], } data = faker.record("lit", data=additional_fields) expected_document_type = ["article"] record = LiteratureRecord.create(data) dump = record.serialize_for_es() assert "_ui_display" in dump assert "control_number" in dump assert record["control_number"] == dump["control_number"] assert "id" in dump assert record["control_number"] == dump["id"] assert expected_document_type == dump["document_type"] ui_field = json.loads(dump["_ui_display"]) assert "titles" in ui_field assert "document_type" in ui_field assert record["titles"] == ui_field["titles"] assert record["control_number"] == ui_field["control_number"]
def test_many_records_in_one_commit(app, celery_app_with_context, celery_session_worker, retry_until_matched): for x in range(10): data = faker.record("lit") rec = LiteratureRecord.create(data) db.session.commit() es.indices.refresh("records-hep") steps = [ { "step": es.indices.refresh, "args": ["records-hep"] }, { "step": es.search, "args": ["records-hep"], "expected_result": { "expected_key": "hits.total", "expected_result": 10 }, }, ] retry_until_matched(steps)
def test_literature_create_or_update_with_existing_record(base_app, db): data = faker.record(with_control_number=True) record = LiteratureRecord.create(data) assert data["control_number"] == record["control_number"] data.update({"titles": [{"title": "UPDATED"}]}) record_updated = LiteratureRecord.create_or_update(data) control_number = str(record_updated["control_number"]) assert record["control_number"] == record_updated["control_number"] record_updated_db = RecordMetadata.query.filter_by( id=record_updated.id).one() assert data == record_updated_db.json record_updated_pid = PersistentIdentifier.query.filter_by( pid_type="lit", pid_value=str(control_number)).one() assert record_updated.model.id == record_updated_pid.object_uuid assert control_number == record_updated_pid.pid_value
def test_institutions_serializer_populates_affiliation_suggest(): schema = InstitutionsElasticSearchSchema() data = { "ICN": ["ICN_VALUE"], "legacy_ICN": "Legacy icn value", "institution_hierarchy": [{ "acronym": "ACR1", "name": "Name1" }], "name_variants": [{ "value": "name1" }, { "value": "name2" }], "addresses": [{ "postal_code": "12345" }, { "postal_code": "65432" }], } expected_result = { "input": [ "ICN_VALUE", "ACR1", "Name1", "Legacy icn value", "name1", "name2", "12345", "65432", ] } institution = InstitutionsRecord(faker.record("ins", data)) result = schema.dump(institution).data["affiliation_suggest"] assert result == expected_result
def test_journals_create_or_update_with_existing_record(inspire_app): data = faker.record("jou", with_control_number=True) record = JournalsRecord.create(data) assert data["control_number"] == record["control_number"] data_update = {"public_notes": [{"value": "UPDATED"}]} data.update(data_update) record_updated = JournalsRecord.create_or_update(data) control_number = str(record_updated["control_number"]) assert record["control_number"] == record_updated["control_number"] record_updated_db = RecordMetadata.query.filter_by( id=record_updated.id).one() assert data == record_updated_db.json record_updated_pid = PersistentIdentifier.query.filter_by( pid_type="jou", pid_value=str(control_number)).one() assert record_updated.model.id == record_updated_pid.object_uuid assert control_number == record_updated_pid.pid_value