def assert_all_records_are_indexed(): current_search.flush_and_refresh("*") result = es_search("records-hep") uuids = get_value(result, "hits.hits._id") for uuid in citer_ids: assert str(uuid) in uuids result = es_search("records-data") uuids = get_value(result, "hits.hits._id") assert str(record_data_uuids) in uuids
def test_index_institutions_record(inspire_app, datadir): data = orjson.loads((datadir / "902725.json").read_text()) record = create_record("ins", data=data) expected_count = 1 expected_metadata = InstitutionsElasticSearchSchema().dump(record).data expected_metadata["affiliation_suggest"] = { "input": [ "CERN, Geneva", "CERN", "European Organization for Nuclear Research", "CERN", "Centre Européen de Recherches Nucléaires", "01631", "1211", ] } expected_metadata["number_of_papers"] = 0 expected_metadata["_created"] = utils.isoformat(record.created) expected_metadata["_updated"] = utils.isoformat(record.updated) response = es_search("records-institutions") assert response["hits"]["total"]["value"] == expected_count assert response["hits"]["hits"][0]["_source"] == expected_metadata
def test_index_literature_record(inspire_app, datadir): author_data = json.loads((datadir / "1032336.json").read_text()) author = create_record("aut", data=author_data) data = json.loads((datadir / "1630825.json").read_text()) record = create_record("lit", data=data) expected_count = 1 expected_metadata = json.loads((datadir / "es_1630825.json").read_text()) expected_metadata_ui_display = json.loads(expected_metadata.pop("_ui_display")) expected_metadata_latex_us_display = expected_metadata.pop("_latex_us_display") expected_metadata_latex_eu_display = expected_metadata.pop("_latex_eu_display") expected_metadata_bibtex_display = expected_metadata.pop("_bibtex_display") expected_facet_author_name = expected_metadata.pop("facet_author_name") expected_metadata.pop("authors") response = es_search("records-hep") result = response["hits"]["hits"][0]["_source"] result_ui_display = json.loads(result.pop("_ui_display")) result_latex_us_display = result.pop("_latex_us_display") result_latex_eu_display = result.pop("_latex_eu_display") result_bibtex_display = result.pop("_bibtex_display") result_authors = result.pop("authors") result_facet_author_name = result.pop("facet_author_name") del result["_created"] del result["_updated"] assert response["hits"]["total"]["value"] == expected_count assert not DeepDiff(result, expected_metadata, ignore_order=True) assert result_ui_display == expected_metadata_ui_display assert result_latex_us_display == expected_metadata_latex_us_display assert result_latex_eu_display == expected_metadata_latex_eu_display assert result_bibtex_display == expected_metadata_bibtex_display assert len(record.get("authors")) == len(result_facet_author_name) assert sorted(result_facet_author_name) == sorted(expected_facet_author_name)
def test_indexer_not_fulltext_links_in_ui_display_when_no_fulltext_links( inspire_app): data = { "external_system_identifiers": [ { "schema": "OSTI", "value": "7224300" }, { "schema": "ADS", "value": "1994PhRvD..50.4491S" }, { "schema": "SPIRES", "value": "SPIRES-2926342" }, ], "documents": [{ "source": "arxiv", "key": "arXiv:nucl-th_9310031.pdf", "url": "http://localhost:8000/some_url2.pdf", }], } create_record("lit", data=data) response = es_search("records-hep") result = response["hits"]["hits"][0]["_source"] result_ui_display = json.loads(result.pop("_ui_display")) assert "fulltext_links" not in result_ui_display
def test_indexer_creates_proper_fulltext_links_in_ui_display_files_disabled( inspire_app, disable_files): expected_fulltext_links = [ { "description": "arXiv", "value": "https://arxiv.org/pdf/hep-ph/9404247" }, { "description": "KEK scanned document", "value": "https://lib-extopc.kek.jp/preprints/PDF/1994/9407/9407219.pdf", }, ] data = { "external_system_identifiers": [ { "schema": "OSTI", "value": "7224300" }, { "schema": "ADS", "value": "1994PhRvD..50.4491S" }, { "schema": "KEKSCAN", "value": "94-07-219" }, { "schema": "SPIRES", "value": "SPIRES-2926342" }, ], "arxiv_eprints": [{ "categories": ["hep-ph"], "value": "hep-ph/9404247" }], "documents": [ { "source": "arxiv", "fulltext": True, "key": "arXiv:nucl-th_9310030.pdf", "url": "http://localhost:8000/some_url.pdf", }, { "source": "arxiv", "key": "arXiv:nucl-th_9310031.pdf", "url": "http://localhost:8000/some_url2.pdf", }, ], } create_record("lit", data=data) response = es_search("records-hep") result = response["hits"]["hits"][0]["_source"] result_ui_display = json.loads(result.pop("_ui_display")) assert result_ui_display["fulltext_links"] == expected_fulltext_links
def test_regression_index_literature_record_with_related_records(inspire_app, datadir): data = json.loads((datadir / "1503270.json").read_text()) record = create_record("lit", data=data) response = es_search("records-hep") result = response["hits"]["hits"][0]["_source"] assert data["related_records"] == result["related_records"]
def test_indexer_creates_proper_fulltext_links_in_ui_display_files_enabled( inspire_app, s3): create_s3_bucket("1") create_s3_bucket("f") expected_fulltext_links = ["arXiv", "KEK scanned document", "fulltext"] data = { "external_system_identifiers": [ { "schema": "OSTI", "value": "7224300" }, { "schema": "ADS", "value": "1994PhRvD..50.4491S" }, { "schema": "KEKSCAN", "value": "94-07-219" }, { "schema": "SPIRES", "value": "SPIRES-2926342" }, ], "arxiv_eprints": [{ "categories": ["hep-ph"], "value": "hep-ph/9404247" }], "documents": [ { "source": "arxiv", "fulltext": True, "hidden": True, "key": "arXiv:nucl-th_9310030.pdf", "url": "https://arxiv.org/pdf/1910.11662.pdf", }, { "source": "arxiv", "key": "arXiv:nucl-th_9310031.pdf", "url": "http://inspirehep.net/record/863300/files/fermilab-pub-10-255-e.pdf", }, ], } record = create_record("lit", data=data) response = es_search("records-hep") result = response["hits"]["hits"][0]["_source"] result_ui_display = json.loads(result.pop("_ui_display")) for link in result_ui_display["fulltext_links"]: assert link["value"] assert link["description"] in expected_fulltext_links
def assert_literature_has_correct_conference_title(): current_search.flush_and_refresh("*") result = es_search("records-hep") total = get_value(result, "hits.total.value") assert total == 1 literature = get_value(result, "hits.hits[0]._source") ui_display = orjson.loads(literature["_ui_display"]) assert conference["titles"] == get_value(ui_display, "conference_info[0].titles")
def test_index_data_record(inspire_app): record = create_record("dat") expected_count = 1 expected_metadata = deepcopy(record) expected_metadata["_created"] = utils.isoformat(record.created) expected_metadata["_updated"] = utils.isoformat(record.updated) response = es_search("records-data") assert response["hits"]["total"]["value"] == expected_count assert response["hits"]["hits"][0]["_source"] == expected_metadata
def test_index_seminars_record(inspire_app, datadir): seminar_data = json.loads((datadir / "1.json").read_text()) record = create_record("sem", data=seminar_data) expected_count = 1 expected_metadata = deepcopy(record) expected_metadata["_created"] = utils.isoformat(record.created) expected_metadata["_updated"] = utils.isoformat(record.updated) response = es_search("records-seminars") assert response["hits"]["total"]["value"] == expected_count assert response["hits"]["hits"][0]["_source"] == expected_metadata
def test_indexer_deletes_record_from_es(inspire_app, datadir): data = json.loads((datadir / "1630825.json").read_text()) record = create_record("lit", data=data) record.delete() record.index(delay=False) current_search.flush_and_refresh("records-hep") expected_total = 0 response = es_search("records-hep") hits_total = response["hits"]["total"]["value"] assert hits_total == expected_total
def test_index_author_record(inspire_app, datadir): data = orjson.loads((datadir / "999108.json").read_text()) record = create_record("aut", data=data) expected_count = 1 expected_metadata = data = orjson.loads( (datadir / "999108_expected.json").read_text()) expected_metadata["_created"] = utils.isoformat(record.created) expected_metadata["_updated"] = utils.isoformat(record.updated) response = es_search("records-authors") assert response["hits"]["total"]["value"] == expected_count assert response["hits"]["hits"][0]["_source"] == expected_metadata
def test_index_job_record(inspire_app): record = create_record("job") expected_total = 1 expected_source = deepcopy(record) expected_source["_created"] = utils.isoformat(record.created) expected_source["_updated"] = utils.isoformat(record.updated) response = es_search("records-jobs") response_source = response["hits"]["hits"][0]["_source"] response_total = response["hits"]["total"]["value"] assert expected_total == response_total assert expected_source == response_source
def test_index_conference_record(inspire_app, datadir): data = json.loads((datadir / "1203206.json").read_text()) record = create_record("con", data=data) expected_count = 1 expected_metadata = ConferencesElasticSearchSchema().dump(record).data expected_metadata["_created"] = utils.isoformat(record.created) expected_metadata["_updated"] = utils.isoformat(record.updated) expected_metadata[ "number_of_contributions"] = record.number_of_contributions response = es_search("records-conferences") assert response["hits"]["total"]["value"] == expected_count assert response["hits"]["hits"][0]["_source"] == expected_metadata
def test_indexer_removes_supervisors_from_authors_for_ui_display_field(inspire_app): authors = [ {"full_name": "Frank Castle"}, {"full_name": "Jimmy", "inspire_roles": ["supervisor"]}, ] data = {"authors": authors} create_record("lit", data=data) response = es_search("records-hep") expected_author_full_name = "Frank Castle" result = response["hits"]["hits"][0]["_source"] result_ui_display = json.loads(result.pop("_ui_display")) result_authors = result["authors"] assert len(result_ui_display["authors"]) == 1 assert result_ui_display["authors"][0]["full_name"] == expected_author_full_name assert len(result_authors) == 1 assert result_authors[0]["full_name"] == expected_author_full_name
def test_indexer_separates_supervisors_from_authors(inspire_app): authors = [ {"full_name": "Frank Castle"}, {"full_name": "Jimmy", "inspire_roles": ["supervisor"]}, ] data = {"authors": authors} create_record("lit", data=data) response = es_search("records-hep") expected_author_full_name = "Frank Castle" expected_supervisor = "Jimmy" result = response["hits"]["hits"][0]["_source"] result_authors = result["authors"] assert len(result_authors) == 1 assert result_authors[0]["full_name"] == expected_author_full_name result_supervisors = result["supervisors"] assert len(result_supervisors) == 1 assert result_supervisors[0]["full_name"] == expected_supervisor
def test_index_experiment_record(inspire_app, datadir): data = json.loads((datadir / "1108541.json").read_text()) record = create_record("exp", data=data) expected_count = 1 expected_metadata = deepcopy(record) expected_metadata["experiment_suggest"] = [ { "input": "LHC", "weight": 1 }, { "input": "ATLAS", "weight": 1 }, { "input": "CERN", "weight": 1 }, { "input": "{ATLAS}", "weight": 1 }, { "input": "ATLAS", "weight": 1 }, { "input": "CERN-ATLAS", "weight": 1 }, { "input": "CERN-LHC-ATLAS", "weight": 5 }, ] expected_metadata["_created"] = utils.isoformat(record.created) expected_metadata["_updated"] = utils.isoformat(record.updated) expected_metadata["number_of_papers"] = 0 response = es_search("records-experiments") assert response["hits"]["total"]["value"] == expected_count assert response["hits"]["hits"][0]["_source"] == expected_metadata
def test_index_journal_record(inspire_app, datadir): data = json.loads((datadir / "1213103.json").read_text()) record = create_record("jou", data=data) expected_count = 1 expected_metadata = deepcopy(record) expected_metadata["title_suggest"] = { "input": [ "The Journal of High Energy Physics (JHEP)", "JHEP", "JOURNAL OF HIGH ENERGY PHYSICS", "JOURNL OF HIGH ENERGY PHYSICS", "JOURNALOFHIGHENERGYPHYSICS", "J HIGH ENERGY PHYSICS", "JOUR OFHIGHENERGYPHYS", "J HIGHENERGYPHYSICS", "J HIGH ENERGY PHYS", "J HIGH ENGERY PHYS", "J HIGH ENERG PHYS", "J HIGH ENERGYPHYS", "J HIGHENERGY PHYS", "J HIGH ENER PHYS", "J HIGHENERGYPHYS", "J HIGHENERGYPHY", "J HIGH EN PHYS", "J HIGH ENERGY", "J HIGHEN PHYS", "J HIGH PHYS", "J H E PHYS", "J HEP", "JHEPA", "JHEP", ] } expected_metadata["_created"] = utils.isoformat(record.created) expected_metadata["_updated"] = utils.isoformat(record.updated) response = es_search("records-journals") assert response["hits"]["total"]["value"] == expected_count assert response["hits"]["hits"][0]["_source"] == expected_metadata
def test_index_experiment_record(inspire_app, datadir): data = orjson.loads((datadir / "1108541.json").read_text()) record = create_record("exp", data=data) expected_count = 1 expected_metadata = deepcopy(record) expected_metadata["_created"] = utils.isoformat(record.created) expected_metadata["_updated"] = utils.isoformat(record.updated) expected_metadata["number_of_papers"] = 0 expected_metadata["normalized_name_variants"] = [ "ATLAS", "ATLAS", "CERN-ATLAS" ] expected_metadata["facet_inspire_classification"] = [ "Collider|Hadrons|p p" ] response = es_search("records-experiments") assert response["hits"]["total"]["value"] == expected_count assert response["hits"]["hits"][0]["_source"] == expected_metadata
def assert_the_record_is_indexed(): current_search.flush_and_refresh("*") result = es_search("records-hep") uuids = get_value(result, "hits.hits._id") assert str(record_uuid) in uuids
def assert_hits(): current_search.flush_and_refresh("records-hep") result = es_search("records-hep") result_total = get_value(result, "hits.total.value") assert expected_hits_count == result_total
def assert_record(): current_search.flush_and_refresh("records-authors") result = es_search("records-authors") result_total = get_value(result, "hits.total.value") expected_total = 1 assert expected_total == result_total