Esempio n. 1
0
def test_index_record(inspire_app, celery_app_with_context,
                      celery_session_worker):
    models_committed.disconnect(index_after_commit)

    records = [
        create_record_async("lit"),
        create_record_async("aut"),
        create_record_async("job"),
        create_record_async("jou"),
        create_record_async("exp"),
        create_record_async("con"),
        create_record_async("dat"),
        create_record_async("ins"),
    ]

    uuids = [record.id for record in records]
    task = index_records.delay(uuids)

    results = task.get(timeout=5)

    uuids = [str(uuid) for uuid in uuids]
    assert results == uuids

    for record in records:
        result = InspireSearch.get_record_data_from_es(record)
        assert record["control_number"] == result["control_number"]
    models_committed.connect(index_after_commit)
Esempio n. 2
0
def test_process_references_in_records_reindexes_conferences_when_pub_info_changes(
        inspire_app, celery_app_with_context, celery_session_worker):
    # disconnect this signal so records don't get indexed
    models_committed.disconnect(index_after_commit)

    conference_data = faker.record("con", with_control_number=True)
    conference_record = InspireRecord.create(conference_data)

    conference_control_number = conference_record["control_number"]
    conf_ref = f"http://localhost:8000/api/conferences/{conference_control_number}"

    data = faker.record("lit", with_control_number=True)

    data["publication_info"] = [{"conference_record": {"$ref": conf_ref}}]
    data["document_type"] = ["conference paper"]

    record = InspireRecord.create(data)
    db.session.commit()

    # reconnect signal before we call process_references_in_records
    models_committed.connect(index_after_commit)

    uuids = [record.id]

    task = process_references_in_records.delay(uuids)

    result = task.get(timeout=5)

    conference_record_es = InspireSearch.get_record_data_from_es(
        conference_record)
    expected_number_of_contributions = 1

    assert (expected_number_of_contributions ==
            conference_record_es["number_of_contributions"])
Esempio n. 3
0
    def assert_recalculate_references_task():
        author_record_from_es = InspireSearch.get_record_data_from_es(author)
        job_record_from_es = InspireSearch.get_record_data_from_es(job)
        literature_record_from_es = InspireSearch.get_record_data_from_es(
            literature)
        assert (author_record_from_es["positions"][0]["record"]["$ref"] ==
                merged_institution_record["self"]["$ref"])
        assert (job_record_from_es["institutions"][0]["record"]["$ref"] ==
                merged_institution_record["self"]["$ref"])
        assert (
            literature_record_from_es["authors"][0]["affiliations"][0]
            ["record"]["$ref"] == merged_institution_record["self"]["$ref"])

        assert (
            literature_record_from_es["thesis_info"]["institutions"][0]
            ["record"]["$ref"] == merged_institution_record["self"]["$ref"])
 def assert_disambiguation_on_update():
     db.session.close()
     literature_record = LiteratureRecord.get_record(literature_record_uuid)
     literature_record_from_es = InspireSearch.get_record_data_from_es(
         literature_record)
     assert literature_record["authors"][0]["record"]["$ref"]
     assert literature_record_from_es["authors"][0]["record"]["$ref"]
def test_process_references_in_records_reindexes_experiments_when_linked_experiments_change(
        app, clean_celery_session):
    # disconnect this signal so records don't get indexed
    models_committed.disconnect(index_after_commit)

    experiment_data = faker.record("exp", with_control_number=True)
    experiment = InspireRecord.create(experiment_data)
    db.session.commit()

    experiment_control_number = experiment["control_number"]
    exp_ref = f"http://localhost:8000/api/experiments/{experiment_control_number}"

    data = faker.record("lit", with_control_number=True)

    data["accelerator_experiments"] = [{
        "legacy_name": "LIGO",
        "record": {
            "$ref": exp_ref
        }
    }]

    record = InspireRecord.create(data)
    db.session.commit()

    models_committed.connect(index_after_commit)

    task = process_references_in_records.delay([record.id])
    task.get(timeout=5)

    experiment_record_es = InspireSearch.get_record_data_from_es(experiment)
    expected_number_of_paper = 1

    assert expected_number_of_paper == experiment_record_es["number_of_papers"]
 def assert_disambiguation_task():
     literature_record_from_es = InspireSearch.get_record_data_from_es(
         literature_record)
     assert {
         "schema": "INSPIRE BAI",
         "value": "J.M.Maldacena.1",
     } in literature_record_from_es["authors"][0]["ids"]
 def assert_disambiguation_task():
     literature_record_from_es = InspireSearch.get_record_data_from_es(
         literature_record)
     author_record_from_es = AuthorsSearch().query_from_iq("").execute()
     assert author_record_from_es.hits[0].name["value"] == "Michal Kowal"
     assert (literature_record_from_es["authors"][0]["recid"] ==
             author_record_from_es.hits[0].control_number)
 def assert_references():
     current_search.flush_and_refresh("records-hep")
     record_from_es = InspireSearch.get_record_data_from_es(record)
     assert expected_facet_author_name == record_from_es[
         "facet_author_name"]
     assert expected_record_ref == record_from_es["authors"][0]["record"][
         "$ref"]
Esempio n. 9
0
def update_references_pointing_to_merged_record(refs_to_schema,
                                                merged_record_uri,
                                                new_record_uri):
    for index, path in refs_to_schema:
        query = get_query_for_given_path(index, path, merged_record_uri)
        es_index_name = f"records-{index}"
        matched_records = InspireSearch(
            index=es_index_name).query(query).scan()
        for matched_record in matched_records:
            pid_type = current_app.config["SCHEMA_TO_PID_TYPES"][index]
            record_class = InspireRecord.get_subclasses()[pid_type]
            matched_inspire_record_data = (
                db.session.query(RecordMetadata).with_for_update().filter_by(
                    id=matched_record.meta.id).first())
            matched_inspire_record = record_class(
                matched_inspire_record_data.json,
                model=matched_inspire_record_data)
            referenced_records_in_path = flatten_list(
                get_value(matched_inspire_record, path[:-len(".$ref")], []))
            for referenced_record in referenced_records_in_path:
                update_reference_if_reference_uri_matches(
                    referenced_record, merged_record_uri, new_record_uri)
            matched_inspire_record.update(dict(matched_inspire_record))
            LOGGER.info("Updated reference for record",
                        uuid=str(matched_inspire_record.id))
    db.session.commit()
Esempio n. 10
0
    def assert_migrator_task():
        record_citer = InspireRecord.get_record_by_pid_value(
            citer_control_number, "lit")
        record_citing = InspireRecord.get_record_by_pid_value(
            citing_control_number, "lit")

        assert record_citing.citation_count == 1

        record_citer_es = InspireSearch.get_record_data_from_es(record_citer)
        result_citer_control_number = record_citer_es["control_number"]

        assert citer_control_number == result_citer_control_number

        record_citing_es = InspireSearch.get_record_data_from_es(record_citing)
        result_citing_control_number = record_citing_es["control_number"]

        assert citing_control_number == result_citing_control_number
Esempio n. 11
0
    def assert_all_records_in_es():
        literature_records_from_es = list(LiteratureSearch().query_from_iq(
            query_string=
            f"publication_info.journal_record.$ref: {journal_record_reference}"
        ).scan())
        journal_record_from_es = InspireSearch.get_record_data_from_es(journal)

        assert len(literature_records_from_es) == 11 and journal_record_from_es
 def assert_disambiguation_task():
     literature_record_from_es = InspireSearch.get_record_data_from_es(
         literature_record)
     # new author is created
     assert (literature_record_from_es["authors"][0].get("record") !=
             "http://localhost:5000/api/authors/90676330")
     assert (literature_record_from_es["authors"][0].get("record") !=
             "http://localhost:5000/api/authors/90676331")
    def assert_disambiguation_on_record_update():
        literature_record_from_es = InspireSearch.get_record_data_from_es(
            literature_record_3)
        assert (literature_record_from_es["authors"][0]["ids"] ==
                lit_record["authors"][0]["ids"])

        assert (literature_record_from_es["authors"][0]["record"] ==
                lit_record["authors"][0]["record"])
Esempio n. 14
0
def test_continuous_migration_with_invalid_control_number(
        app, cache, celery_app_with_context, celery_session_worker):
    raw_record_citer = (
        b"<record>"
        b'  <controlfield tag="001">666</controlfield>'
        b'  <datafield tag="245" ind1=" " ind2=" ">'
        b'    <subfield code="a">This is a citer record</subfield>'
        b"  </datafield>"
        b'  <datafield tag="980" ind1=" " ind2=" ">'
        b'    <subfield code="a">HEP</subfield>'
        b"  </datafield>"
        b'   <datafield tag="999" ind1="C" ind2="5">'
        b'    <subfield code="0">667</subfield>'
        b'    <subfield code="h">Achasov, M.N.</subfield>'
        b'    <subfield code="k">snd-2018</subfield>'
        b'    <subfield code="m">(SND Collaboration)</subfield>'
        b'    <subfield code="o">2</subfield>'
        b'    <subfield code="s">Phys.Rev.,D97,012008</subfield>'
        b'    <subfield code="x">'
        b"    [2] M. N. Achasov (SND Collaboration), Phys. Rev. D 97, 012008 (2018)."
        b"    </subfield>"
        b'    <subfield code="y">2018</subfield>'
        b'    <subfield code="z">0</subfield>'
        b'    <subfield code="z">1</subfield>'
        b"    </datafield>"
        b"</record>")
    citer_control_number = 666

    raw_record_cited = (
        b"<record>"
        b'  <controlfield tag="001">this is not a control number</controlfield>'
        b'  <datafield tag="245" ind1=" " ind2=" ">'
        b'    <subfield code="a">This is a citing record</subfield>'
        b"  </datafield>"
        b'  <datafield tag="980" ind1=" " ind2=" ">'
        b'    <subfield code="a">HEP</subfield>'
        b"  </datafield>"
        b"</record>")

    cache.rpush("legacy_records", zlib.compress(raw_record_citer))
    cache.rpush("legacy_records", zlib.compress(raw_record_cited))

    assert cache.llen("legacy_records") == 2

    with pytest.raises(ValueError):
        continuous_migration()

    record_citer = InspireRecord.get_record_by_pid_value(
        citer_control_number, "lit")
    record_citer_es = InspireSearch.get_record_data_from_es(record_citer)
    result_citer_control_number = record_citer_es["control_number"]

    assert citer_control_number == result_citer_control_number

    # I don't like timeouts, it's the only way to wait for this chain
    time.sleep(5)

    assert cache.llen("legacy_records") == 1
 def assert_disambiguation_task():
     literature_record_from_es = InspireSearch.get_record_data_from_es(
         literature_record)
     literature_record_from_es_authors = literature_record_from_es.get(
         "authors")
     assert (str(author_1["control_number"])
             in literature_record_from_es_authors[0]["record"]["$ref"])
     assert (str(author_2["control_number"])
             in literature_record_from_es_authors[1]["record"]["$ref"])
Esempio n. 16
0
def test_search_factory_with_query(inspire_app):
    with current_app.test_request_context("?q=foo"):
        search = InspireSearch()
        expected_query_string = "foo"
        expected_search_to_dict = {
            "query": {
                "query_string": {
                    "default_operator": "AND",
                    "query": "foo"
                }
            },
            "track_total_hits": True,
        }
        query_string, search = inspire_search_factory(None, search)
        search_to_dict = search.to_dict()

        assert expected_query_string == query_string
        assert expected_search_to_dict == search_to_dict
Esempio n. 17
0
def test_index_record_deletes_a_deleted_record(inspire_app,
                                               clean_celery_session):
    record_to_delete = create_record_async("lit")
    record_to_delete_control_number = record_to_delete["control_number"]
    record_to_delete = InspireRecord.get_record_by_pid_value(
        record_to_delete_control_number, "lit")
    record_to_delete.delete()
    db.session.commit()

    uuids = [record_to_delete.id]
    task = index_records.delay(uuids)

    results = task.get(timeout=5)

    uuids = [str(uuid) for uuid in uuids]
    assert results == uuids

    with pytest.raises(TransportError):
        InspireSearch.get_record_data_from_es(record_to_delete)
    def assert_first_disambiguation_no_match():
        literature_record_from_es = InspireSearch.get_record_data_from_es(
            literature_record_3)

        assert get_values_for_schema(
            literature_record_from_es["authors"][0]["ids"], "INSPIRE BAI")
        assert (literature_record_from_es["authors"][0]["ids"] !=
                literature_record["authors"][0]["ids"])
        assert (literature_record_from_es["authors"][0]["ids"] !=
                literature_record_2["authors"][0]["ids"])
Esempio n. 19
0
def test_process_references_in_records(inspire_app, celery_app_with_context,
                                       celery_session_worker):
    # disconnect this signal so records don't get indexed
    models_committed.disconnect(index_after_commit)

    cited_record_1 = LiteratureRecord.create(faker.record("lit"))
    cited_record_2 = LiteratureRecord.create(faker.record("lit"))

    data_citing_record_1 = faker.record(
        "lit", literature_citations=[cited_record_1["control_number"]])
    citing_record_1 = LiteratureRecord.create(data_citing_record_1)
    data_citing_record_2 = faker.record(
        "lit", literature_citations=[cited_record_2["control_number"]])
    citing_record_2 = LiteratureRecord.create(data_citing_record_2)

    db.session.commit()

    # reconnect signal before we call process_references_in_records
    models_committed.connect(index_after_commit)

    uuids = [citing_record_1.id, citing_record_2.id]

    task = process_references_in_records.delay(uuids)

    result = task.get(timeout=5)

    result_cited_record_1 = InspireSearch.get_record_data_from_es(
        cited_record_1)
    expected_result_cited_record_1_citation_count = 1

    assert (expected_result_cited_record_1_citation_count ==
            result_cited_record_1["citation_count"])

    result_cited_record_2 = InspireSearch.get_record_data_from_es(
        cited_record_2)
    expected_result_cited_record_2_citation_count = 1
    assert (expected_result_cited_record_2_citation_count ==
            result_cited_record_2["citation_count"])
Esempio n. 20
0
 def assert_all_records_in_es():
     literature_record_from_es = InspireSearch.get_record_data_from_es(
         literature)
     seminar_record_from_es = InspireSearch.get_record_data_from_es(seminar)
     assert all([literature_record_from_es, seminar_record_from_es])
Esempio n. 21
0
def test_continuous_migration_with_an_invalid_record(app, cache,
                                                     celery_app_with_context,
                                                     celery_session_worker):
    raw_record_citer = (
        b"<record>"
        b'  <controlfield tag="001">666</controlfield>'
        b'  <datafield tag="245" ind1=" " ind2=" ">'
        b'    <subfield code="a">This is a citer record</subfield>'
        b"  </datafield>"
        b'  <datafield tag="980" ind1=" " ind2=" ">'
        b'    <subfield code="a">HEP</subfield>'
        b"  </datafield>"
        b'   <datafield tag="999" ind1="C" ind2="5">'
        b'    <subfield code="0">667</subfield>'
        b'    <subfield code="h">Achasov, M.N.</subfield>'
        b'    <subfield code="k">snd-2018</subfield>'
        b'    <subfield code="m">(SND Collaboration)</subfield>'
        b'    <subfield code="o">2</subfield>'
        b'    <subfield code="s">Phys.Rev.,D97,012008</subfield>'
        b'    <subfield code="x">'
        b"    [2] M. N. Achasov (SND Collaboration), Phys. Rev. D 97, 012008 (2018)."
        b"    </subfield>"
        b'    <subfield code="y">2018</subfield>'
        b'    <subfield code="z">0</subfield>'
        b'    <subfield code="z">1</subfield>'
        b"    </datafield>"
        b"</record>")
    citer_control_number = 666

    raw_record_cited = (
        b"<record>"
        b'  <controlfield tag="001">667</controlfield>'
        b'  <datafield tag="245" ind1=" " ind2=" ">'
        b'    <subfield code="a">This is a citing record</subfield>'
        b"  </datafield>"
        b'  <datafield tag="980" ind1=" " ind2=" ">'
        b'    <subfield code="a">HEP</subfield>'
        b"  </datafield>"
        b"</record>")
    cited_control_number = 667

    raw_record_invalid = (
        b"<record>"
        b'  <controlfield tag="001">668</controlfield>'
        b'  <datafield tag="260" ind1=" " ind2=" ">'
        b'    <subfield code="c">Definitely not a date</subfield>'
        b"  </datafield>"
        b'  <datafield tag="980" ind1=" " ind2=" ">'
        b'    <subfield code="a">HEP</subfield>'
        b"  </datafield>"
        b"</record>")
    invalid_control_number = 668

    cache.rpush("legacy_records", zlib.compress(raw_record_citer))
    cache.rpush("legacy_records", zlib.compress(raw_record_invalid))
    cache.rpush("legacy_records", zlib.compress(raw_record_cited))

    assert cache.llen("legacy_records") == 3

    continuous_migration()

    # I don't like timeouts, it's the only way to wait for this chain
    time.sleep(10)

    record_citer = InspireRecord.get_record_by_pid_value(
        citer_control_number, "lit")
    record_cited = InspireRecord.get_record_by_pid_value(
        cited_control_number, "lit")

    with pytest.raises(PIDDoesNotExistError):
        InspireRecord.get_record_by_pid_value(invalid_control_number, "lit")

    assert record_cited.citation_count == 1

    record_citer_es = InspireSearch.get_record_data_from_es(record_citer)
    result_citer_control_number = record_citer_es["control_number"]

    assert citer_control_number == result_citer_control_number

    record_cited_es = InspireSearch.get_record_data_from_es(record_cited)
    result_cited_control_number = record_cited_es["control_number"]

    assert cited_control_number == result_cited_control_number

    with app.test_client() as client:
        result = client.get(
            f"/literature/{result_cited_control_number}/citations").json
        result_citation_count = result["metadata"]["citation_count"]

        assert 1 == result_citation_count

    assert cache.llen("legacy_records") == 0
 def assert_authors_records_exist_in_es():
     author_record_from_es = InspireSearch.get_record_data_from_es(
         author_record)
     assert author_record_from_es
    def assert_lit_records_exist_in_es():
        lit_record_from_es = InspireSearch.get_record_data_from_es(
            literature_record)

        assert lit_record_from_es
 def assert_disambiguation_task():
     literature_record_from_es = InspireSearch.get_record_data_from_es(
         literature_record_3)
     assert (literature_data_2["authors"][0]["record"] ==
             literature_record_from_es["authors"][0]["record"])
 def assert_disambiguation_task():
     literature_record_from_es = InspireSearch.get_record_data_from_es(
         literature_record)
     assert not literature_record_from_es["authors"][0].get("record")
Esempio n. 26
0
 def assert_recalculate_references_task():
     literature_record_from_es = InspireSearch.get_record_data_from_es(
         literature)
     assert (literature_record_from_es["publication_info"][0]
             ["conference_record"]["$ref"] ==
             merged_conference_record["self"]["$ref"])
Esempio n. 27
0
def test_process_references_in_records_reindexes_institutions_when_linked_institutions_change(
        inspire_app, celery_app_with_context, celery_session_worker):
    # disconnect this signal so records don't get indexed
    models_committed.disconnect(index_after_commit)

    institution_data = faker.record("ins", with_control_number=True)
    institution = InspireRecord.create(institution_data)

    institution_control_number = institution["control_number"]
    inst_ref = f"http://localhost:8000/api/institutions/{institution_control_number}"

    data = faker.record("lit", with_control_number=True)
    data.update({
        "authors": [{
            "full_name":
            "John Doe",
            "affiliations": [{
                "value": "Institution",
                "record": {
                    "$ref": inst_ref
                }
            }],
        }]
    })

    record_authors_aff = InspireRecord.create(data)
    db.session.commit()

    data = faker.record("lit", with_control_number=True)
    data.update(
        {"thesis_info": {
            "institutions": [{
                "record": {
                    "$ref": inst_ref
                }
            }]
        }})

    record_thesis_info = InspireRecord.create(data)
    db.session.commit()

    data = faker.record("lit", with_control_number=True)
    data.update({
        "record_affiliations": [{
            "record": {
                "$ref": inst_ref
            },
            "value": "Institution"
        }]
    })

    record_affiliations = InspireRecord.create(data)
    db.session.commit()
    # reconnect signal before we call process_references_in_records
    models_committed.connect(index_after_commit)

    task = process_references_in_records.delay(
        [record_authors_aff.id, record_thesis_info.id, record_affiliations.id])

    task.get(timeout=5)

    institution_record_es = InspireSearch.get_record_data_from_es(institution)
    expected_number_of_paper = 3

    assert expected_number_of_paper == institution_record_es[
        "number_of_papers"]
 def assert_disambiguation_on_record_update():
     literature_record_from_es = InspireSearch.get_record_data_from_es(
         literature_record)
     assert (get_values_for_schema(
         literature_record_from_es["authors"][0]["ids"],
         "INSPIRE BAI")[0] == old_bai)
 def assert_disambiguation_task():
     literature_record_from_es = InspireSearch.get_record_data_from_es(
         literature_record)
     assert (str(author_data["control_number"])
             in literature_record_from_es["authors"][0]["record"]["$ref"])
def test_continuous_migration_with_different_type_of_records(
        inspire_app, celery_app_with_context, celery_session_worker, redis):
    raw_record_citer = (
        b"<record>"
        b'  <controlfield tag="001">666</controlfield>'
        b'  <datafield tag="245" ind1=" " ind2=" ">'
        b'    <subfield code="a">This is a citer record</subfield>'
        b"  </datafield>"
        b'  <datafield tag="980" ind1=" " ind2=" ">'
        b'    <subfield code="a">HEP</subfield>'
        b"  </datafield>"
        b'   <datafield tag="999" ind1="C" ind2="5">'
        b'    <subfield code="0">667</subfield>'
        b'    <subfield code="h">Achasov, M.N.</subfield>'
        b'    <subfield code="k">snd-2018</subfield>'
        b'    <subfield code="m">(SND Collaboration)</subfield>'
        b'    <subfield code="o">2</subfield>'
        b'    <subfield code="s">Phys.Rev.,D97,012008</subfield>'
        b'    <subfield code="x">'
        b"    [2] M. N. Achasov (SND Collaboration), Phys. Rev. D 97, 012008 (2018)."
        b"    </subfield>"
        b'    <subfield code="y">2018</subfield>'
        b'    <subfield code="z">0</subfield>'
        b'    <subfield code="z">1</subfield>'
        b"    </datafield>"
        b"</record>")
    citer_control_number = 666

    raw_record_cited = (
        b"<record>"
        b'  <controlfield tag="001">667</controlfield>'
        b'  <datafield tag="245" ind1=" " ind2=" ">'
        b'    <subfield code="a">This is a citing record</subfield>'
        b"  </datafield>"
        b'  <datafield tag="980" ind1=" " ind2=" ">'
        b'    <subfield code="a">HEP</subfield>'
        b"  </datafield>"
        b"</record>")
    cited_control_number = 667

    raw_author = (b"<record>"
                  b'  <controlfield tag="001">668</controlfield>'
                  b'  <datafield tag="100" ind1=" " ind2=" ">'
                  b'    <subfield code="a">Jessica Jones</subfield>'
                  b'    <subfield code="q">Jones Jessica</subfield>'
                  b"  </datafield>"
                  b'  <datafield tag="980" ind1=" " ind2=" ">'
                  b'    <subfield code="a">HEPNAMES</subfield>'
                  b"  </datafield>"
                  b"</record>")
    author_control_number = 668

    redis.rpush("legacy_records", zlib.compress(raw_record_citer))
    redis.rpush("legacy_records", zlib.compress(raw_author))
    redis.rpush("legacy_records", zlib.compress(raw_record_cited))
    redis.rpush("legacy_records", b"END")

    assert redis.llen("legacy_records") == 4

    continuous_migration()

    # I don't like timeouts, it's the only way to wait for this chain
    time.sleep(5)

    record_citer = InspireRecord.get_record_by_pid_value(
        citer_control_number, "lit")
    record_cited = InspireRecord.get_record_by_pid_value(
        cited_control_number, "lit")
    record_author = InspireRecord.get_record_by_pid_value(
        author_control_number, "aut")

    assert record_cited.citation_count == 1

    record_citer_es = InspireSearch.get_record_data_from_es(record_citer)
    result_citer_control_number = record_citer_es["control_number"]

    assert citer_control_number == result_citer_control_number

    record_cited_es = InspireSearch.get_record_data_from_es(record_cited)
    result_cited_control_number = record_cited_es["control_number"]

    assert cited_control_number == result_cited_control_number

    record_author_es = InspireSearch.get_record_data_from_es(record_author)
    result_author_control_number = record_author_es["control_number"]

    assert author_control_number == result_author_control_number

    with inspire_app.test_client() as client:
        result = client.get(
            f"/api/literature/{result_cited_control_number}/citations").json
        result_citation_count = result["metadata"]["citation_count"]

        assert 1 == result_citation_count

    assert redis.llen("legacy_records") == 0