def test_disambiguate_authors_create_new_author(inspire_app,
                                                clean_celery_session,
                                                enable_disambiguation):
    literature_data = faker.record("lit", with_control_number=True)
    literature_data.update({
        "authors": [{
            "full_name": "Michal Kowal",
            "affiliations": [{
                "value": "Warsaw U."
            }]
        }]
    })
    literature_record = LiteratureRecord.create(data=literature_data)
    db.session.commit()

    def assert_lit_records_exist_in_es():
        lit_record_1_from_es = InspireSearch.get_record_data_from_es(
            literature_record)
        assert lit_record_1_from_es

    retry_until_pass(assert_lit_records_exist_in_es, retry_interval=3)

    def assert_disambiguation_task():
        literature_record_from_es = InspireSearch.get_record_data_from_es(
            literature_record)
        author_record_from_es = AuthorsSearch().query_from_iq("").execute()
        assert author_record_from_es.hits[0].name["value"] == "Michal Kowal"
        assert (literature_record_from_es["authors"][0]["recid"] ==
                author_record_from_es.hits[0].control_number)

    retry_until_pass(assert_disambiguation_task)
Beispiel #2
0
def test_oai_with_for_arxiv_set(inspire_app, clean_celery_session):
    data = {
        "arxiv_eprints": [{"value": "2009.01484"}],
        "report_numbers": [{"value": "CERN-TH-2020-136"}],
    }

    record_data = faker.record("lit", data)
    record = LiteratureRecord.create(record_data)
    record_uuid = record.id
    record_marcxml = record2marcxml(record)
    db.session.commit()

    def assert_the_record_is_indexed():
        current_search.flush_and_refresh("*")
        result = es_search("records-hep")
        uuids = get_value(result, "hits.hits._id")
        assert str(record_uuid) in uuids

    retry_until_pass(assert_the_record_is_indexed)

    set_name = inspire_app.config["OAI_SET_CERN_ARXIV"]
    oaiset = OAISet(spec=f"{set_name}", name="Test", description="Test")
    db.session.add(oaiset)
    db.session.commit()
    with inspire_app.test_client() as client:
        response = client.get(
            f"/api/oai2d?verb=ListRecords&metadataPrefix=marcxml&set={set_name}"
        )
        assert record_marcxml in response.data
def test_disambiguate_authors_create_two_author_with_same_name(
        inspire_app, clean_celery_session, enable_disambiguation):
    literature_data = faker.record("lit", with_control_number=True)
    literature_data.update({
        "authors": [{
            "full_name": "Michal Kowal"
        }, {
            "full_name": "Michal Kowal"
        }]
    })
    literature_record = LiteratureRecord.create(data=literature_data)

    db.session.commit()

    def assert_lit_records_exist_in_es():
        lit_record_from_es = InspireSearch.get_record_data_from_es(
            literature_record)

        assert lit_record_from_es

    retry_until_pass(assert_lit_records_exist_in_es, retry_interval=3)

    def assert_disambiguation_task():
        author_records_from_es = AuthorsSearch().query_from_iq("").execute()
        assert len(author_records_from_es.hits) == 2

    retry_until_pass(assert_disambiguation_task)
Beispiel #4
0
def test_oai_get_single_identifier_for_CDS_set(inspire_app, clean_celery_session):
    data = {"_export_to": {"CDS": True}}
    record_data = faker.record("lit", data)
    record = LiteratureRecord.create(record_data)
    record_uuid = record.id
    record_marcxml = record2marcxml(record)
    db.session.commit()

    def assert_the_record_is_indexed():
        current_search.flush_and_refresh("*")
        result = es_search("records-hep")
        uuids = get_value(result, "hits.hits._id")
        assert str(record_uuid) in uuids

    retry_until_pass(assert_the_record_is_indexed)

    set_name = inspire_app.config["OAI_SET_CDS"]
    oaiset = OAISet(spec=f"{set_name}", name="Test", description="Test")
    db.session.add(oaiset)
    db.session.commit()

    with inspire_app.test_client() as client:
        response = client.get(
            f"/api/oai2d?verb=GetRecord&metadataPrefix=marcxml&identifier=oai:inspirehep.net:{record['control_number']}"
        )
        assert record_marcxml in response.data
Beispiel #5
0
def test_aut_record_removed_form_es_when_deleted(inspire_app, clean_celery_session):
    data = faker.record("aut")
    rec = AuthorsRecord.create(data)
    db.session.commit()

    def assert_record():
        current_search.flush_and_refresh("records-authors")
        result = es_search("records-authors")
        result_total = get_value(result, "hits.total.value")
        expected_total = 1
        assert expected_total == result_total

    retry_until_pass(assert_record)

    rec.delete()
    db.session.commit()

    def assert_record():
        current_search.flush_and_refresh("records-authors")
        result = es_search("records-authors")
        result_total = get_value(result, "hits.total.value")
        expected_total = 0
        assert expected_total == result_total

    retry_until_pass(assert_record)
def assert_record_not_in_es(recid):
    def assert_hits():
        current_search.flush_and_refresh("records-hep")
        hits = LiteratureSearch().query_from_iq(f"recid:{recid}").execute().hits
        assert not hits

    retry_until_pass(assert_hits, retry_interval=5)
def assert_citation_count(cited_record, expected_count):
    def assert_record():
        current_search.flush_and_refresh("records-hep")
        record_from_es = LiteratureSearch().get_record_data_from_es(cited_record)
        assert expected_count == record_from_es["citation_count"]

    retry_until_pass(assert_record, retry_interval=3)
Beispiel #8
0
def test_fulltext_indexer(inspire_app, clean_celery_session, override_config):
    with override_config(FEATURE_FLAG_ENABLE_FULLTEXT=True):
        data = faker.record("lit")
        data.update({
            "arxiv_eprints": [{
                "categories": ["hep-ph"],
                "value": "hep-ph/9404247"
            }],
            "documents": [{
                "source":
                "arxiv",
                "fulltext":
                True,
                "filename":
                "arXiv:nucl-th_9310030.pdf",
                "key":
                "arXiv:nucl-th_9310030.pdf",
                "url":
                "https://www.w3.org/WAI/ER/tests/xhtml/testfiles/resources/pdf/dummy.pdf",
            }],
        })
        record = LiteratureRecord.create(data)
        db.session.commit()

        def assert_record_in_es():
            current_search.flush_and_refresh("*")
            record_lit_es = (LiteratureSearch().get_record(str(
                record.id)).execute().hits.hits[0])
            assert "attachment" in record_lit_es._source["documents"][0]

        retry_until_pass(assert_record_in_es, timeout=90, retry_interval=20)
Beispiel #9
0
def test_institutions_record_updates_in_es_when_lit_rec_refers_to_it(
    inspire_app, clean_celery_session
):
    institution_1 = InstitutionsRecord.create(faker.record("ins"))
    institution_1_control_number = institution_1["control_number"]
    ref_1 = f"http://localhost:8000/api/institutions/{institution_1_control_number}"
    db.session.commit()
    expected_number_of_papers = 0

    def assert_record():
        current_search.flush_and_refresh("records-institutions")
        record_from_es = InstitutionsSearch().get_record_data_from_es(institution_1)
        assert expected_number_of_papers == record_from_es["number_of_papers"]

    retry_until_pass(assert_record)

    data = {
        "authors": [
            {
                "full_name": "John Doe",
                "affiliations": [{"value": "Institution", "record": {"$ref": ref_1}}],
            }
        ]
    }

    LiteratureRecord.create(faker.record("lit", data))
    db.session.commit()
    expected_number_of_papers = 1

    def assert_record():
        current_search.flush_and_refresh("records-institutions")
        record_from_es = InstitutionsSearch().get_record_data_from_es(institution_1)
        assert expected_number_of_papers == record_from_es["number_of_papers"]

    retry_until_pass(assert_record)
def test_disambiguate_authors_on_first_last_name_and_initials(
        inspire_app, clean_celery_session, enable_disambiguation):
    literature_data = faker.record("lit", with_control_number=True)
    literature_data.update({
        "authors": [{
            "full_name": "'t Hooft, Gerard",
            "curated_relation": True,
            "record": {
                "$ref": "http://localhost:5000/api/authors/999108"
            },
            "ids": [{
                "schema": "INSPIRE BAI",
                "value": "G.Hooft.2"
            }],
        }]
    })
    literature_record = LiteratureRecord.create(literature_data)

    literature_data_2 = faker.record("lit", with_control_number=True)
    literature_data_2.update({
        "authors": [{
            "full_name": "'t Hooft, Gerard Antonio",
            "curated_relation": True,
            "record": {
                "$ref": "http://localhost:5000/api/authors/999105"
            },
            "ids": [{
                "schema": "INSPIRE BAI",
                "value": "G.Hooft.1"
            }],
        }]
    })
    literature_record_2 = LiteratureRecord.create(literature_data_2)
    db.session.commit()

    def assert_lit_records_exist_in_es():
        lit_record_1_from_es = InspireSearch.get_record_data_from_es(
            literature_record)
        lit_record_2_from_es = InspireSearch.get_record_data_from_es(
            literature_record_2)
        assert lit_record_1_from_es and lit_record_2_from_es

    retry_until_pass(assert_lit_records_exist_in_es, retry_interval=3)

    literature_data_3 = faker.record("lit", with_control_number=True)
    literature_data_3.update(
        {"authors": [{
            "full_name": "'t Hooft, Gerard Antonio"
        }]})
    literature_record_3 = LiteratureRecord.create(literature_data_3)
    db.session.commit()

    def assert_disambiguation_task():
        literature_record_from_es = InspireSearch.get_record_data_from_es(
            literature_record_3)
        assert (literature_data_2["authors"][0]["record"] ==
                literature_record_from_es["authors"][0]["record"])

    retry_until_pass(assert_disambiguation_task, retry_interval=2)
Beispiel #11
0
def assert_es_hits_count(expected_hits_count):
    def assert_hits():
        current_search.flush_and_refresh("records-hep")
        result = es_search("records-hep")
        result_total = get_value(result, "hits.total.value")
        assert expected_hits_count == result_total

    retry_until_pass(assert_hits, retry_interval=5)
Beispiel #12
0
def test_assign_from_an_author_to_another_that_is_not_stub(
        inspire_app, clean_celery_session):
    author_data = {
        "name": {
            "value": "Aad, Georges",
            "preferred_name": "Georges Aad"
        },
        "ids": [{
            "value": "G.Aad.1",
            "schema": "INSPIRE BAI"
        }],
        "stub": False,
    }
    from_author = create_record("aut")
    to_author = create_record("aut", data=author_data)
    literature = create_record(
        "lit",
        data={
            "authors": [
                {
                    "full_name": "Urhan, Ahmet",
                    "record": {
                        "$ref": "http://localhost:5000/api/authors/17200"
                    },
                },
                {
                    "full_name": "Urhan, Harun",
                    "record": {
                        "$ref":
                        f"http://localhost:5000/api/authors/{from_author['control_number']}"
                    },
                },
            ]
        },
    )
    db.session.commit()

    assign_papers.delay(
        from_author_recid=from_author["control_number"],
        to_author_record=to_author,
        author_papers_recids=[literature["control_number"]],
    )

    def assert_assign():
        current_search.flush_and_refresh("*")
        literature_after = LiteratureSearch.get_record_data_from_es(literature)
        literature_author = literature_after["authors"][1]
        to_author_after = AuthorsRecord.get_record_by_pid_value(
            to_author["control_number"])
        assert literature_author["record"] == {
            "$ref":
            f"http://localhost:5000/api/authors/{to_author['control_number']}"
        }
        assert literature_author["curated_relation"]
        assert literature_author["ids"] == to_author["ids"]
        assert not to_author_after["stub"]

    retry_until_pass(assert_assign, retry_interval=5)
def test_lit_record_appear_in_es_when_created(inspire_app, clean_celery_session):
    data = faker.record("lit")
    record = LiteratureRecord.create(data)
    db.session.commit()

    def assert_record():
        current_search.flush_and_refresh("records-hep")
        record_from_es = LiteratureSearch().get_record_data_from_es(record)
        assert record_from_es["_ui_display"]

    retry_until_pass(assert_record)
Beispiel #14
0
def test_recalculate_references_after_literature_record_merge(
        inspire_app, clean_celery_session):
    literature_data = faker.record("lit", with_control_number=True)
    literature = InspireRecord.create(literature_data)
    literature_record_reference = literature["self"]["$ref"]

    seminar_data = faker.record("sem", with_control_number=True)
    seminar_data.update({
        "literature_records": [{
            "record": {
                "$ref": literature_record_reference
            }
        }]
    })
    seminar = InspireRecord.create(seminar_data)

    literature_data_with_references = faker.record("lit",
                                                   with_control_number=True)
    literature_data_with_references.update(
        {"references": [{
            "record": {
                "$ref": literature_record_reference
            }
        }]})
    literature_record_with_references = InspireRecord.create(
        literature_data_with_references)
    db.session.commit()

    def assert_all_records_in_es():
        literature_record_from_es = InspireSearch.get_record_data_from_es(
            literature)
        seminar_record_from_es = InspireSearch.get_record_data_from_es(seminar)
        assert all([literature_record_from_es, seminar_record_from_es])

    retry_until_pass(assert_all_records_in_es, retry_interval=3)

    merged_literature_data = faker.record("lit", with_control_number=True)
    merged_literature_data.update(
        {"deleted_records": [{
            "$ref": literature_record_reference
        }]})
    merged_literature_record = InspireRecord.create(merged_literature_data)
    db.session.commit()

    def assert_recalculate_references_task():
        seminar_record_from_es = InspireSearch.get_record_data_from_es(seminar)
        literature_record_from_es = InspireSearch.get_record_data_from_es(
            literature_record_with_references)
        assert (seminar_record_from_es["literature_records"][0]["record"]
                ["$ref"] == merged_literature_record["self"]["$ref"])
        assert (literature_record_from_es["references"][0]["record"]["$ref"] ==
                merged_literature_record["self"]["$ref"])

    retry_until_pass(assert_recalculate_references_task, retry_interval=3)
Beispiel #15
0
def test_continuous_migration_with_invalid_control_number(
        inspire_app, celery_app_with_context, celery_session_worker, redis):
    raw_record_citer = (
        b"<record>"
        b'  <controlfield tag="001">666</controlfield>'
        b'  <datafield tag="245" ind1=" " ind2=" ">'
        b'    <subfield code="a">This is a citer record</subfield>'
        b"  </datafield>"
        b'  <datafield tag="980" ind1=" " ind2=" ">'
        b'    <subfield code="a">HEP</subfield>'
        b"  </datafield>"
        b'   <datafield tag="999" ind1="C" ind2="5">'
        b'    <subfield code="0">667</subfield>'
        b'    <subfield code="h">Achasov, M.N.</subfield>'
        b'    <subfield code="k">snd-2018</subfield>'
        b'    <subfield code="m">(SND Collaboration)</subfield>'
        b'    <subfield code="o">2</subfield>'
        b'    <subfield code="s">Phys.Rev.,D97,012008</subfield>'
        b'    <subfield code="x">'
        b"    [2] M. N. Achasov (SND Collaboration), Phys. Rev. D 97, 012008 (2018)."
        b"    </subfield>"
        b'    <subfield code="y">2018</subfield>'
        b'    <subfield code="z">0</subfield>'
        b'    <subfield code="z">1</subfield>'
        b"    </datafield>"
        b"</record>")
    citer_control_number = 666

    raw_record_cited = (
        b"<record>"
        b'  <controlfield tag="001">this is not a control number</controlfield>'
        b'  <datafield tag="245" ind1=" " ind2=" ">'
        b'    <subfield code="a">This is a citing record</subfield>'
        b"  </datafield>"
        b'  <datafield tag="980" ind1=" " ind2=" ">'
        b'    <subfield code="a">HEP</subfield>'
        b"  </datafield>"
        b"</record>")

    redis.rpush("legacy_records", zlib.compress(raw_record_citer))
    redis.rpush("legacy_records", zlib.compress(raw_record_cited))
    redis.rpush("legacy_records", b"END")

    assert redis.llen("legacy_records") == 3

    with pytest.raises(ValueError):
        continuous_migration()

    def assert_continuous_migration():
        assert redis.llen("legacy_records") == 2

    retry_until_pass(assert_continuous_migration)
Beispiel #16
0
def test_aut_record_appear_in_es_when_created(inspire_app, clean_celery_session):
    data = faker.record("aut")
    record = AuthorsRecord.create(data)
    db.session.commit()

    expected_control_number = record["control_number"]

    def assert_record():
        current_search.flush_and_refresh("records-authors")
        record_from_es = AuthorsSearch().get_record_data_from_es(record)
        assert expected_control_number == record_from_es["control_number"]

    retry_until_pass(assert_record)
Beispiel #17
0
def test_indexer_deletes_record_from_es(inspire_app, datadir):
    def assert_record_is_deleted_from_es():
        current_search.flush_and_refresh("records-authors")
        expected_records_count = 0
        record_lit_es = AuthorsSearch().get_record(str(record.id)).execute().hits
        assert expected_records_count == len(record_lit_es)

    record = AuthorsRecord.create(faker.record("aut"))
    db.session.commit()

    record.delete()
    db.session.commit()

    retry_until_pass(assert_record_is_deleted_from_es)
def test_indexer_updates_conference_papers_when_name_changes(
        inspire_app, clean_celery_session):
    conference_data = faker.record(
        "con", data={"titles": [{
            "title": "Initial Title"
        }]})
    conference = ConferencesRecord.create(conference_data)
    db.session.commit()
    current_search.flush_and_refresh("records-conferences")
    conference_id = conference["control_number"]

    conference_paper_data = faker.record(
        "lit",
        data={
            "document_type": ["conference paper"],
            "publication_info": [{
                "conference_record": {
                    "$ref":
                    f"https://labs.inspirehep.net/api/conferences/{conference_id}"
                }
            }],
        },
    )

    LiteratureRecord.create(conference_paper_data)
    db.session.commit()

    def assert_literature_has_correct_conference_title():
        current_search.flush_and_refresh("*")
        result = es_search("records-hep")
        total = get_value(result, "hits.total.value")

        assert total == 1

        literature = get_value(result, "hits.hits[0]._source")
        ui_display = orjson.loads(literature["_ui_display"])
        assert conference["titles"] == get_value(ui_display,
                                                 "conference_info[0].titles")

    retry_until_pass(assert_literature_has_correct_conference_title,
                     timeout=45)

    data = dict(conference)
    data["titles"] = [{"title": "Updated Title"}]
    conference.update(data)
    db.session.commit()

    retry_until_pass(assert_literature_has_correct_conference_title,
                     timeout=45)
def test_disambiguation_doesnt_assign_bai_when_already_in_author(
        inspire_app, clean_celery_session, enable_disambiguation):
    author_data = faker.record("aut", with_control_number=True)
    author_data.update({
        "name": {
            "value": "Brian Gross"
        },
        "ids": [{
            "schema": "INSPIRE BAI",
            "value": "J.M.Maldacena.1"
        }],
        "email_addresses": [{
            "current": True,
            "value": "*****@*****.**"
        }],
    })
    author_record = InspireRecord.create(author_data)
    db.session.commit()

    def assert_authors_records_exist_in_es():
        author_record_from_es = InspireSearch.get_record_data_from_es(
            author_record)
        assert author_record_from_es

    retry_until_pass(assert_authors_records_exist_in_es)

    literature_data = faker.record("lit", with_control_number=True)
    literature_data.update({
        "authors": [{
            "full_name": "Brian Gross",
            "ids": [{
                "schema": "INSPIRE BAI",
                "value": "A.Test.1"
            }],
            "emails": ["*****@*****.**"],
        }]
    })
    literature_record = LiteratureRecord.create(literature_data)
    db.session.commit()

    def assert_disambiguation_task():
        literature_record_from_es = InspireSearch.get_record_data_from_es(
            literature_record)
        assert {
            "schema": "INSPIRE BAI",
            "value": "J.M.Maldacena.1",
        } in literature_record_from_es["authors"][0]["ids"]

    retry_until_pass(assert_disambiguation_task, retry_interval=2)
def test_indexer_deletes_record_from_es(inspire_app, datadir, clean_celery_session):
    def assert_record_is_deleted_from_es():
        current_search.flush_and_refresh("records-hep")
        expected_records_count = 0
        record_lit_es = LiteratureSearch().get_record(str(record.id)).execute().hits
        assert expected_records_count == len(record_lit_es)

    data = orjson.loads((datadir / "1630825.json").read_text())
    record = LiteratureRecord.create(data)
    db.session.commit()

    record.delete()
    db.session.commit()

    retry_until_pass(assert_record_is_deleted_from_es)
def test_many_records_in_one_commit(inspire_app, clean_celery_session):
    record_recids = set()
    for x in range(4):
        data = faker.record("lit")
        record = LiteratureRecord.create(data)
        record_recids.add(record["control_number"])
    db.session.commit()
    current_search.flush_and_refresh("records-hep")

    def assert_all_records_in_es():
        result = LiteratureSearch().query_from_iq("").execute().hits
        result_recids = {hit.control_number for hit in result}
        assert len(result_recids & record_recids) == 4

    retry_until_pass(assert_all_records_in_es, retry_interval=5)
Beispiel #22
0
def test_indexer_deletes_record_from_es(inspire_app, datadir):
    def assert_record_is_deleted_from_es():
        current_search.flush_and_refresh("records-journals")
        expected_records_count = 0
        record_lit_es = JournalsSearch().get_record(str(record.id)).execute().hits
        assert expected_records_count == len(record_lit_es)

    data = orjson.loads((datadir / "1213103.json").read_text())
    record = JournalsRecord.create(data)
    db.session.commit()

    record.delete()
    db.session.commit()

    retry_until_pass(assert_record_is_deleted_from_es)
def test_process_references_in_records_process_author_records(
        mock_batch_index, inspire_app, clean_celery_session):
    author_record = AuthorsRecord.create(faker.record("aut"))
    lit_record = LiteratureRecord.create(
        faker.record(
            "lit",
            data={
                "authors": [{
                    "full_name": author_record["name"]["value"],
                    "record": author_record["self"],
                }]
            },
        ))
    lit_record_2 = LiteratureRecord.create(
        faker.record(
            "lit",
            data={
                "authors": [{
                    "full_name": author_record["name"]["value"],
                    "record": author_record["self"],
                }]
            },
        ))

    db.session.commit()

    def assert_records_in_es():
        lit_record_from_es = InspireSearch.get_record_data_from_es(lit_record)
        lit_record_from_es_2 = InspireSearch.get_record_data_from_es(
            lit_record_2)
        aut_record_from_es = InspireSearch.get_record_data_from_es(
            author_record)
        assert lit_record_from_es and aut_record_from_es and lit_record_from_es_2

    retry_until_pass(assert_records_in_es, retry_interval=5)

    models_committed.disconnect(index_after_commit)
    author_record["name"]["value"] = "Another Name"
    author_record.update(dict(author_record))
    db.session.commit()
    # reconnect signal before we call process_references_in_records
    models_committed.connect(index_after_commit)
    task = process_references_in_records.delay([author_record.id])

    task.get(timeout=5)

    assert sorted(mock_batch_index.mock_calls[0][1][0]) == sorted(
        [str(lit_record.id), str(lit_record_2.id)])
Beispiel #24
0
def test_match_references(inspire_app, cli, clean_celery_session):
    cited_data = {
        "document_type": ["article"],
        "dois": [{
            "value": "10.1371/journal.pone.0188398"
        }],
    }
    cited_record = create_record_async("lit", data=cited_data)
    cited_record.index(
        delay=False)  # reference-matcher requires cited record to be indexed

    citer_data = {
        "references": [{
            "reference": {
                "dois": ["10.1371/journal.pone.0188398"]
            }
        }]
    }
    citer_record_1 = create_record_async("lit", data=citer_data)
    citer_record_2 = create_record_async("lit", data=citer_data)
    citer_record_3 = create_record_async("lit", data=citer_data)
    citer_ids = [citer_record_1.id, citer_record_2.id, citer_record_3.id]

    record_data = create_record_async("dat")
    record_data_uuids = record_data.id

    def assert_all_records_are_indexed():
        current_search.flush_and_refresh("*")
        result = es_search("records-hep")
        uuids = get_value(result, "hits.hits._id")

        for uuid in citer_ids:
            assert str(uuid) in uuids

        result = es_search("records-data")
        uuids = get_value(result, "hits.hits._id")
        assert str(record_data_uuids) in uuids

    retry_until_pass(assert_all_records_are_indexed)

    result = cli.invoke(["match", "references", "-bs", 2])

    assert result.exit_code == 0

    for citer_id in citer_ids:
        updated_citer_record = LiteratureRecord.get_record(citer_id)
        assert (get_value(updated_citer_record,
                          "references[0].record") == cited_record["self"])
Beispiel #25
0
def test_index_record_fulltext_manually(inspire_app, clean_celery_session,
                                        override_config, s3, datadir):
    metadata = {"foo": "bar"}
    pdf_path = os.path.join(datadir, "2206.04407.pdf")
    create_s3_bucket(KEY)
    create_s3_file(
        current_s3_instance.get_bucket_for_file_key(KEY),
        KEY,
        pdf_path,
        metadata,
        **{"ContentType": "application/pdf"},
    )

    with override_config(FEATURE_FLAG_ENABLE_FULLTEXT=True,
                         FEATURE_FLAG_ENABLE_FILES=False):
        data = faker.record("lit")
        data.update({
            "documents": [{
                "source":
                "arxiv",
                "fulltext":
                True,
                "filename":
                "new_doc.pdf",
                "key":
                KEY,
                "url":
                "http://www.africau.edu/images/default/sample.pdf",
            }]
        })
        rec = LiteratureRecord.create(data)
        models_committed.disconnect(index_after_commit)
        db.session.commit()
        models_committed.connect(index_after_commit)

        assert_record_not_in_es(rec["control_number"])

        rec.index_fulltext()

        def assert_record_in_es():
            current_search.flush_and_refresh("*")
            record_lit_es = (LiteratureSearch().get_record(str(
                rec.id)).execute().hits.hits[0])
            document = record_lit_es._source["documents"][0]
            assert "attachment" in document
            assert "text" not in document  # pipeline should remove it

        retry_until_pass(assert_record_in_es, timeout=90, retry_interval=5)
Beispiel #26
0
def test_fix_entries_by_update_date(inspire_app, clean_celery_session):
    literature_data = faker.record("lit", with_control_number=True)
    literature_data.update({
        "authors": [{
            "full_name": "George, Smith",
            "ids": [{
                "value": "Smith.G.1",
                "schema": "INSPIRE BAI"
            }],
        }]
    })
    record_1 = InspireRecord.create(literature_data)
    literature_data_2 = faker.record("lit", with_control_number=True)
    literature_data_2.update({
        "authors": [{
            "full_name": "Xiu, Li",
            "ids": [{
                "value": "X.Liu.1",
                "schema": "INSPIRE BAI"
            }],
        }]
    })
    record_2 = InspireRecord.create(literature_data_2)
    db.session.add(
        RecordsAuthors(
            author_id="A.Test.1",
            id_type="INSPIRE BAI",
            record_id=record_1.id,
        ))
    db.session.add(
        RecordsAuthors(
            author_id="A.Test.2",
            id_type="INSPIRE BAI",
            record_id=record_2.id,
        ))
    db.session.commit()

    def assert_all_entries_in_db():
        assert len(RecordsAuthors.query.all()) == 4

    retry_until_pass(assert_all_entries_in_db)

    LiteratureRecord.fix_entries_by_update_date()

    def assert_all_entries_in_db():
        assert len(RecordsAuthors.query.all()) == 2

    retry_until_pass(assert_all_entries_in_db, retry_interval=3)
Beispiel #27
0
def test_aut_record_update_when_changed(inspire_app, clean_celery_session):
    data = faker.record("aut")
    rec = AuthorsRecord.create(data)
    db.session.commit()
    expected_death_date = "1900-01-01"
    data["death_date"] = expected_death_date
    data["control_number"] = rec["control_number"]
    rec.update(data)
    db.session.commit()

    def assert_record():
        current_search.flush_and_refresh("records-authors")
        record_from_es = AuthorsSearch().get_record_data_from_es(rec)
        assert expected_death_date == record_from_es["death_date"]

    retry_until_pass(assert_record)
Beispiel #28
0
def test_recalculate_references_recalculates_more_than_10_references(
        inspire_app, clean_celery_session):
    journal_data = faker.record("jou", with_control_number=True)
    journal = InspireRecord.create(journal_data)
    journal_record_reference = journal["self"]["$ref"]

    literature_data = faker.record("lit")
    literature_data.update({
        "publication_info": [{
            "journal_record": {
                "$ref": journal_record_reference
            }
        }]
    })
    for i in range(11):
        InspireRecord.create(literature_data)

    db.session.commit()

    def assert_all_records_in_es():
        literature_records_from_es = list(LiteratureSearch().query_from_iq(
            query_string=
            f"publication_info.journal_record.$ref: {journal_record_reference}"
        ).scan())
        journal_record_from_es = InspireSearch.get_record_data_from_es(journal)

        assert len(literature_records_from_es) == 11 and journal_record_from_es

    retry_until_pass(assert_all_records_in_es, retry_interval=5)

    merged_journal_data = faker.record("jou", with_control_number=True)
    merged_journal_data.update(
        {"deleted_records": [{
            "$ref": journal_record_reference
        }]})

    merged_journal_record = InspireRecord.create(merged_journal_data)
    db.session.commit()

    def assert_recalculate_references_task():
        literature_records_from_es = list(LiteratureSearch().query_from_iq(
            query_string=
            f'publication_info.journal_record.$ref: {merged_journal_record["self"]["$ref"]}'
        ).scan())
        assert len(literature_records_from_es) == 11

    retry_until_pass(assert_recalculate_references_task, retry_interval=3)
Beispiel #29
0
def test_indexer_updates_authors_papers_when_name_changes(
    inspire_app, clean_celery_session
):
    author_data = faker.record("aut")
    author = AuthorsRecord.create(author_data)
    db.session.commit()
    current_search.flush_and_refresh("records-authors")
    author_cn = author["control_number"]

    lit_data = {
        "authors": [
            {
                "record": {
                    "$ref": f"https://labs.inspirehep.net/api/authors/{author_cn}"
                },
                "full_name": author["name"]["value"],
            }
        ]
    }
    lit_data = faker.record("lit", data=lit_data)

    lit_1 = LiteratureRecord.create(lit_data)
    db.session.commit()

    expected_facet_author_name = f"{author['control_number']}_{author['name']['value']}"

    def assert_record():
        current_search.flush_and_refresh("records-hep")
        record_from_es = LiteratureSearch().get_record_data_from_es(lit_1)
        assert expected_facet_author_name == record_from_es["facet_author_name"][0]

    retry_until_pass(assert_record)

    data = dict(author)
    data["name"]["value"] = "Some other name"
    author.update(data)
    db.session.commit()

    expected_facet_author_name = f"{author['control_number']}_Some other name"

    def assert_record():
        current_search.flush_and_refresh("records-hep")
        record_from_es = LiteratureSearch().get_record_data_from_es(lit_1)
        assert expected_facet_author_name == record_from_es["facet_author_name"][0]

    retry_until_pass(assert_record)
def test_lit_record_update_when_changed(inspire_app, clean_celery_session):
    data = faker.record("lit")
    data["titles"] = [{"title": "Original title"}]
    rec = LiteratureRecord.create(data)
    db.session.commit()
    expected_title = "Updated title"
    data["titles"][0]["title"] = expected_title
    data["control_number"] = rec["control_number"]
    rec.update(data)
    db.session.commit()

    def assert_record():
        current_search.flush_and_refresh("records-hep")
        record_from_es = LiteratureSearch().get_record_data_from_es(rec)
        assert expected_title == record_from_es["titles"][0]["title"]

    retry_until_pass(assert_record)