Beispiel #1
0
    def assert_all_records_are_indexed():
        current_search.flush_and_refresh("*")
        result = es_search("records-hep")
        uuids = get_value(result, "hits.hits._id")

        for uuid in citer_ids:
            assert str(uuid) in uuids

        result = es_search("records-data")
        uuids = get_value(result, "hits.hits._id")
        assert str(record_data_uuids) in uuids
Beispiel #2
0
def test_index_institutions_record(inspire_app, datadir):
    data = orjson.loads((datadir / "902725.json").read_text())
    record = create_record("ins", data=data)

    expected_count = 1
    expected_metadata = InstitutionsElasticSearchSchema().dump(record).data
    expected_metadata["affiliation_suggest"] = {
        "input": [
            "CERN, Geneva",
            "CERN",
            "European Organization for Nuclear Research",
            "CERN",
            "Centre Européen de Recherches Nucléaires",
            "01631",
            "1211",
        ]
    }
    expected_metadata["number_of_papers"] = 0
    expected_metadata["_created"] = utils.isoformat(record.created)
    expected_metadata["_updated"] = utils.isoformat(record.updated)

    response = es_search("records-institutions")

    assert response["hits"]["total"]["value"] == expected_count
    assert response["hits"]["hits"][0]["_source"] == expected_metadata
Beispiel #3
0
def test_index_literature_record(inspire_app, datadir):
    author_data = json.loads((datadir / "1032336.json").read_text())
    author = create_record("aut", data=author_data)

    data = json.loads((datadir / "1630825.json").read_text())
    record = create_record("lit", data=data)

    expected_count = 1
    expected_metadata = json.loads((datadir / "es_1630825.json").read_text())
    expected_metadata_ui_display = json.loads(expected_metadata.pop("_ui_display"))
    expected_metadata_latex_us_display = expected_metadata.pop("_latex_us_display")
    expected_metadata_latex_eu_display = expected_metadata.pop("_latex_eu_display")
    expected_metadata_bibtex_display = expected_metadata.pop("_bibtex_display")
    expected_facet_author_name = expected_metadata.pop("facet_author_name")
    expected_metadata.pop("authors")

    response = es_search("records-hep")

    result = response["hits"]["hits"][0]["_source"]
    result_ui_display = json.loads(result.pop("_ui_display"))
    result_latex_us_display = result.pop("_latex_us_display")
    result_latex_eu_display = result.pop("_latex_eu_display")
    result_bibtex_display = result.pop("_bibtex_display")
    result_authors = result.pop("authors")
    result_facet_author_name = result.pop("facet_author_name")
    del result["_created"]
    del result["_updated"]
    assert response["hits"]["total"]["value"] == expected_count
    assert not DeepDiff(result, expected_metadata, ignore_order=True)
    assert result_ui_display == expected_metadata_ui_display
    assert result_latex_us_display == expected_metadata_latex_us_display
    assert result_latex_eu_display == expected_metadata_latex_eu_display
    assert result_bibtex_display == expected_metadata_bibtex_display
    assert len(record.get("authors")) == len(result_facet_author_name)
    assert sorted(result_facet_author_name) == sorted(expected_facet_author_name)
Beispiel #4
0
def test_indexer_not_fulltext_links_in_ui_display_when_no_fulltext_links(
        inspire_app):

    data = {
        "external_system_identifiers": [
            {
                "schema": "OSTI",
                "value": "7224300"
            },
            {
                "schema": "ADS",
                "value": "1994PhRvD..50.4491S"
            },
            {
                "schema": "SPIRES",
                "value": "SPIRES-2926342"
            },
        ],
        "documents": [{
            "source": "arxiv",
            "key": "arXiv:nucl-th_9310031.pdf",
            "url": "http://localhost:8000/some_url2.pdf",
        }],
    }
    create_record("lit", data=data)
    response = es_search("records-hep")

    result = response["hits"]["hits"][0]["_source"]
    result_ui_display = json.loads(result.pop("_ui_display"))

    assert "fulltext_links" not in result_ui_display
Beispiel #5
0
def test_indexer_creates_proper_fulltext_links_in_ui_display_files_disabled(
        inspire_app, disable_files):
    expected_fulltext_links = [
        {
            "description": "arXiv",
            "value": "https://arxiv.org/pdf/hep-ph/9404247"
        },
        {
            "description":
            "KEK scanned document",
            "value":
            "https://lib-extopc.kek.jp/preprints/PDF/1994/9407/9407219.pdf",
        },
    ]

    data = {
        "external_system_identifiers": [
            {
                "schema": "OSTI",
                "value": "7224300"
            },
            {
                "schema": "ADS",
                "value": "1994PhRvD..50.4491S"
            },
            {
                "schema": "KEKSCAN",
                "value": "94-07-219"
            },
            {
                "schema": "SPIRES",
                "value": "SPIRES-2926342"
            },
        ],
        "arxiv_eprints": [{
            "categories": ["hep-ph"],
            "value": "hep-ph/9404247"
        }],
        "documents": [
            {
                "source": "arxiv",
                "fulltext": True,
                "key": "arXiv:nucl-th_9310030.pdf",
                "url": "http://localhost:8000/some_url.pdf",
            },
            {
                "source": "arxiv",
                "key": "arXiv:nucl-th_9310031.pdf",
                "url": "http://localhost:8000/some_url2.pdf",
            },
        ],
    }
    create_record("lit", data=data)
    response = es_search("records-hep")

    result = response["hits"]["hits"][0]["_source"]
    result_ui_display = json.loads(result.pop("_ui_display"))

    assert result_ui_display["fulltext_links"] == expected_fulltext_links
Beispiel #6
0
def test_regression_index_literature_record_with_related_records(inspire_app, datadir):
    data = json.loads((datadir / "1503270.json").read_text())
    record = create_record("lit", data=data)

    response = es_search("records-hep")

    result = response["hits"]["hits"][0]["_source"]

    assert data["related_records"] == result["related_records"]
Beispiel #7
0
def test_indexer_creates_proper_fulltext_links_in_ui_display_files_enabled(
        inspire_app, s3):
    create_s3_bucket("1")
    create_s3_bucket("f")
    expected_fulltext_links = ["arXiv", "KEK scanned document", "fulltext"]

    data = {
        "external_system_identifiers": [
            {
                "schema": "OSTI",
                "value": "7224300"
            },
            {
                "schema": "ADS",
                "value": "1994PhRvD..50.4491S"
            },
            {
                "schema": "KEKSCAN",
                "value": "94-07-219"
            },
            {
                "schema": "SPIRES",
                "value": "SPIRES-2926342"
            },
        ],
        "arxiv_eprints": [{
            "categories": ["hep-ph"],
            "value": "hep-ph/9404247"
        }],
        "documents": [
            {
                "source": "arxiv",
                "fulltext": True,
                "hidden": True,
                "key": "arXiv:nucl-th_9310030.pdf",
                "url": "https://arxiv.org/pdf/1910.11662.pdf",
            },
            {
                "source":
                "arxiv",
                "key":
                "arXiv:nucl-th_9310031.pdf",
                "url":
                "http://inspirehep.net/record/863300/files/fermilab-pub-10-255-e.pdf",
            },
        ],
    }
    record = create_record("lit", data=data)
    response = es_search("records-hep")

    result = response["hits"]["hits"][0]["_source"]
    result_ui_display = json.loads(result.pop("_ui_display"))
    for link in result_ui_display["fulltext_links"]:
        assert link["value"]
        assert link["description"] in expected_fulltext_links
    def assert_literature_has_correct_conference_title():
        current_search.flush_and_refresh("*")
        result = es_search("records-hep")
        total = get_value(result, "hits.total.value")

        assert total == 1

        literature = get_value(result, "hits.hits[0]._source")
        ui_display = orjson.loads(literature["_ui_display"])
        assert conference["titles"] == get_value(ui_display,
                                                 "conference_info[0].titles")
Beispiel #9
0
def test_index_data_record(inspire_app):
    record = create_record("dat")

    expected_count = 1
    expected_metadata = deepcopy(record)
    expected_metadata["_created"] = utils.isoformat(record.created)
    expected_metadata["_updated"] = utils.isoformat(record.updated)

    response = es_search("records-data")

    assert response["hits"]["total"]["value"] == expected_count
    assert response["hits"]["hits"][0]["_source"] == expected_metadata
def test_index_seminars_record(inspire_app, datadir):
    seminar_data = json.loads((datadir / "1.json").read_text())
    record = create_record("sem", data=seminar_data)

    expected_count = 1
    expected_metadata = deepcopy(record)
    expected_metadata["_created"] = utils.isoformat(record.created)
    expected_metadata["_updated"] = utils.isoformat(record.updated)

    response = es_search("records-seminars")

    assert response["hits"]["total"]["value"] == expected_count
    assert response["hits"]["hits"][0]["_source"] == expected_metadata
Beispiel #11
0
def test_indexer_deletes_record_from_es(inspire_app, datadir):
    data = json.loads((datadir / "1630825.json").read_text())
    record = create_record("lit", data=data)
    record.delete()
    record.index(delay=False)
    current_search.flush_and_refresh("records-hep")

    expected_total = 0

    response = es_search("records-hep")
    hits_total = response["hits"]["total"]["value"]

    assert hits_total == expected_total
Beispiel #12
0
def test_index_author_record(inspire_app, datadir):
    data = orjson.loads((datadir / "999108.json").read_text())
    record = create_record("aut", data=data)

    expected_count = 1
    expected_metadata = data = orjson.loads(
        (datadir / "999108_expected.json").read_text())
    expected_metadata["_created"] = utils.isoformat(record.created)
    expected_metadata["_updated"] = utils.isoformat(record.updated)

    response = es_search("records-authors")

    assert response["hits"]["total"]["value"] == expected_count
    assert response["hits"]["hits"][0]["_source"] == expected_metadata
Beispiel #13
0
def test_index_job_record(inspire_app):
    record = create_record("job")

    expected_total = 1
    expected_source = deepcopy(record)
    expected_source["_created"] = utils.isoformat(record.created)
    expected_source["_updated"] = utils.isoformat(record.updated)

    response = es_search("records-jobs")
    response_source = response["hits"]["hits"][0]["_source"]
    response_total = response["hits"]["total"]["value"]

    assert expected_total == response_total
    assert expected_source == response_source
def test_index_conference_record(inspire_app, datadir):
    data = json.loads((datadir / "1203206.json").read_text())
    record = create_record("con", data=data)

    expected_count = 1
    expected_metadata = ConferencesElasticSearchSchema().dump(record).data
    expected_metadata["_created"] = utils.isoformat(record.created)
    expected_metadata["_updated"] = utils.isoformat(record.updated)
    expected_metadata[
        "number_of_contributions"] = record.number_of_contributions

    response = es_search("records-conferences")

    assert response["hits"]["total"]["value"] == expected_count
    assert response["hits"]["hits"][0]["_source"] == expected_metadata
Beispiel #15
0
def test_indexer_removes_supervisors_from_authors_for_ui_display_field(inspire_app):
    authors = [
        {"full_name": "Frank Castle"},
        {"full_name": "Jimmy", "inspire_roles": ["supervisor"]},
    ]
    data = {"authors": authors}
    create_record("lit", data=data)
    response = es_search("records-hep")

    expected_author_full_name = "Frank Castle"
    result = response["hits"]["hits"][0]["_source"]
    result_ui_display = json.loads(result.pop("_ui_display"))
    result_authors = result["authors"]
    assert len(result_ui_display["authors"]) == 1
    assert result_ui_display["authors"][0]["full_name"] == expected_author_full_name
    assert len(result_authors) == 1
    assert result_authors[0]["full_name"] == expected_author_full_name
Beispiel #16
0
def test_indexer_separates_supervisors_from_authors(inspire_app):
    authors = [
        {"full_name": "Frank Castle"},
        {"full_name": "Jimmy", "inspire_roles": ["supervisor"]},
    ]
    data = {"authors": authors}
    create_record("lit", data=data)
    response = es_search("records-hep")

    expected_author_full_name = "Frank Castle"
    expected_supervisor = "Jimmy"
    result = response["hits"]["hits"][0]["_source"]
    result_authors = result["authors"]
    assert len(result_authors) == 1
    assert result_authors[0]["full_name"] == expected_author_full_name
    result_supervisors = result["supervisors"]
    assert len(result_supervisors) == 1
    assert result_supervisors[0]["full_name"] == expected_supervisor
Beispiel #17
0
def test_index_experiment_record(inspire_app, datadir):
    data = json.loads((datadir / "1108541.json").read_text())
    record = create_record("exp", data=data)

    expected_count = 1
    expected_metadata = deepcopy(record)
    expected_metadata["experiment_suggest"] = [
        {
            "input": "LHC",
            "weight": 1
        },
        {
            "input": "ATLAS",
            "weight": 1
        },
        {
            "input": "CERN",
            "weight": 1
        },
        {
            "input": "{ATLAS}",
            "weight": 1
        },
        {
            "input": "ATLAS",
            "weight": 1
        },
        {
            "input": "CERN-ATLAS",
            "weight": 1
        },
        {
            "input": "CERN-LHC-ATLAS",
            "weight": 5
        },
    ]
    expected_metadata["_created"] = utils.isoformat(record.created)
    expected_metadata["_updated"] = utils.isoformat(record.updated)
    expected_metadata["number_of_papers"] = 0

    response = es_search("records-experiments")

    assert response["hits"]["total"]["value"] == expected_count
    assert response["hits"]["hits"][0]["_source"] == expected_metadata
Beispiel #18
0
def test_index_journal_record(inspire_app, datadir):
    data = json.loads((datadir / "1213103.json").read_text())
    record = create_record("jou", data=data)

    expected_count = 1
    expected_metadata = deepcopy(record)
    expected_metadata["title_suggest"] = {
        "input": [
            "The Journal of High Energy Physics (JHEP)",
            "JHEP",
            "JOURNAL OF HIGH ENERGY PHYSICS",
            "JOURNL OF HIGH ENERGY PHYSICS",
            "JOURNALOFHIGHENERGYPHYSICS",
            "J HIGH ENERGY PHYSICS",
            "JOUR OFHIGHENERGYPHYS",
            "J HIGHENERGYPHYSICS",
            "J HIGH ENERGY PHYS",
            "J HIGH ENGERY PHYS",
            "J HIGH ENERG PHYS",
            "J HIGH ENERGYPHYS",
            "J HIGHENERGY PHYS",
            "J HIGH ENER PHYS",
            "J HIGHENERGYPHYS",
            "J HIGHENERGYPHY",
            "J HIGH EN PHYS",
            "J HIGH ENERGY",
            "J HIGHEN PHYS",
            "J HIGH PHYS",
            "J H E PHYS",
            "J HEP",
            "JHEPA",
            "JHEP",
        ]
    }
    expected_metadata["_created"] = utils.isoformat(record.created)
    expected_metadata["_updated"] = utils.isoformat(record.updated)

    response = es_search("records-journals")

    assert response["hits"]["total"]["value"] == expected_count
    assert response["hits"]["hits"][0]["_source"] == expected_metadata
def test_index_experiment_record(inspire_app, datadir):
    data = orjson.loads((datadir / "1108541.json").read_text())
    record = create_record("exp", data=data)

    expected_count = 1
    expected_metadata = deepcopy(record)

    expected_metadata["_created"] = utils.isoformat(record.created)
    expected_metadata["_updated"] = utils.isoformat(record.updated)
    expected_metadata["number_of_papers"] = 0
    expected_metadata["normalized_name_variants"] = [
        "ATLAS", "ATLAS", "CERN-ATLAS"
    ]
    expected_metadata["facet_inspire_classification"] = [
        "Collider|Hadrons|p p"
    ]

    response = es_search("records-experiments")

    assert response["hits"]["total"]["value"] == expected_count
    assert response["hits"]["hits"][0]["_source"] == expected_metadata
Beispiel #20
0
 def assert_the_record_is_indexed():
     current_search.flush_and_refresh("*")
     result = es_search("records-hep")
     uuids = get_value(result, "hits.hits._id")
     assert str(record_uuid) in uuids
Beispiel #21
0
 def assert_hits():
     current_search.flush_and_refresh("records-hep")
     result = es_search("records-hep")
     result_total = get_value(result, "hits.total.value")
     assert expected_hits_count == result_total
Beispiel #22
0
 def assert_record():
     current_search.flush_and_refresh("records-authors")
     result = es_search("records-authors")
     result_total = get_value(result, "hits.total.value")
     expected_total = 1
     assert expected_total == result_total