Esempio n. 1
0
def test_authors_create_with_invalid_data(inspire_app):
    data = faker.record("aut", with_control_number=True)
    data["invalid_key"] = "should throw an error"
    record_control_number = str(data["control_number"])

    with pytest.raises(ValidationError):
        AuthorsRecord.create(data)

    record_pid = PersistentIdentifier.query.filter_by(
        pid_value=record_control_number).one_or_none()
    assert record_pid is None
Esempio n. 2
0
def test_authors_create_with_existing_control_number(inspire_app):
    data = faker.record("aut", with_control_number=True)
    existing_object_uuid = uuid.uuid4()

    create_pidstore(
        object_uuid=existing_object_uuid,
        pid_type="aut",
        pid_value=data["control_number"],
    )

    with pytest.raises(PIDAlreadyExists):
        AuthorsRecord.create(data)
Esempio n. 3
0
def test_aut_record_appear_in_es_when_created(inspire_app,
                                              celery_app_with_context,
                                              celery_session_worker):
    data = faker.record("aut")
    rec = AuthorsRecord.create(data)
    db.session.commit()
    expected_id = str(rec.id)
    steps = [
        {
            "step": current_search.flush_and_refresh,
            "args": ["records-authors"]
        },
        {
            "step": es_search,
            "args": ["records-authors"],
            "expected_result": {
                "expected_key": "hits.total.value",
                "expected_result": 1,
            },
        },
        {
            "step": es_search,
            "args": ["records-authors"],
            "expected_result": {
                "expected_key": "hits.hits[0]._id",
                "expected_result": expected_id,
            },
        },
    ]
    retry_until_matched(steps)
Esempio n. 4
0
def test_aut_record_removed_form_es_when_deleted(inspire_app, clean_celery_session):
    data = faker.record("aut")
    rec = AuthorsRecord.create(data)
    db.session.commit()

    def assert_record():
        current_search.flush_and_refresh("records-authors")
        result = es_search("records-authors")
        result_total = get_value(result, "hits.total.value")
        expected_total = 1
        assert expected_total == result_total

    retry_until_pass(assert_record)

    rec.delete()
    db.session.commit()

    def assert_record():
        current_search.flush_and_refresh("records-authors")
        result = es_search("records-authors")
        result_total = get_value(result, "hits.total.value")
        expected_total = 0
        assert expected_total == result_total

    retry_until_pass(assert_record)
Esempio n. 5
0
def test_aut_record_update_when_changed(app, celery_app_with_context,
                                        celery_session_worker,
                                        retry_until_matched):
    data = faker.record("aut")
    rec = AuthorsRecord.create(data)
    db.session.commit()
    expected_death_date = "1900-01-01"
    data["death_date"] = expected_death_date
    rec.update(data)
    db.session.commit()

    steps = [
        {
            "step": es.indices.refresh,
            "args": ["records-authors"]
        },
        {
            "step": es.search,
            "args": ["records-authors"],
            "expected_result": {
                "expected_key": "hits.total",
                "expected_result": 1
            },
        },
    ]
    resp = retry_until_matched(steps)["hits"]["hits"]
    assert resp[0]["_source"]["death_date"] == expected_death_date
Esempio n. 6
0
def test_literature_create_or_update_with_existing_record(inspire_app):
    data = faker.record("aut", with_control_number=True)
    record = AuthorsRecord.create(data)

    assert data["control_number"] == record["control_number"]

    data_update = {
        "name": {
            "name_variants": ["UPDATED"],
            "preferred_name": "UPDATED",
            "value": "UPDATED",
        }
    }
    data.update(data_update)

    record_updated = AuthorsRecord.create_or_update(data)
    control_number = str(record_updated["control_number"])

    assert record["control_number"] == record_updated["control_number"]

    record_updated_db = RecordMetadata.query.filter_by(
        id=record_updated.id).one()

    assert data == record_updated_db.json

    record_updated_pid = PersistentIdentifier.query.filter_by(
        pid_type="aut", pid_value=str(control_number)).one()

    assert record_updated.model.id == record_updated_pid.object_uuid
    assert control_number == record_updated_pid.pid_value
Esempio n. 7
0
def test_aut_record_update_when_changed(inspire_app, celery_app_with_context,
                                        celery_session_worker):
    data = faker.record("aut")
    rec = AuthorsRecord.create(data)
    db.session.commit()
    expected_death_date = "1900-01-01"
    data["death_date"] = expected_death_date
    data["control_number"] = rec["control_number"]
    rec.update(data)
    db.session.commit()

    steps = [
        {
            "step": current_search.flush_and_refresh,
            "args": ["records-authors"]
        },
        {
            "step": es_search,
            "args": ["records-authors"],
            "expected_result": {
                "expected_key": "hits.total.value",
                "expected_result": 1,
            },
        },
        {
            "step": es_search,
            "args": ["records-authors"],
            "expected_result": {
                "expected_key": "hits.hits[0]._source.death_date",
                "expected_result": expected_death_date,
            },
        },
    ]
    retry_until_matched(steps)["hits"]["hits"]
Esempio n. 8
0
def test_session_still_works_after_integrity_error_on_bai_provider(
        inspire_app):
    data = faker.record("aut")
    with override_config(FEATURE_FLAG_ENABLE_BAI_PROVIDER=True):
        AuthorsRecord.create(data=data)
        db.session.commit()
        with mock.patch(
                "inspirehep.pidstore.providers.bai.InspireBAIProvider.next_bai_number"
        ) as next_bai_mock:
            next_bai_mock.side_effect = [1, 2]
            AuthorsRecord.create(data=data)
            assert next_bai_mock.call_count == 2
        db.session.commit()

    expected_pid_count = 2

    assert (PersistentIdentifier.query.filter_by(
        pid_type="bai").count() == expected_pid_count)
Esempio n. 9
0
def test_create_record_from_db_depending_on_its_pid_type(base_app, db, es):
    data = faker.record("aut")
    record = InspireRecord.create(data)
    assert type(record) == AuthorsRecord
    assert record.pid_type == "aut"

    record = AuthorsRecord.create(data)
    assert type(record) == AuthorsRecord
    assert record.pid_type == "aut"
Esempio n. 10
0
def test_create_record_from_db_depending_on_its_pid_type(inspire_app):
    data = faker.record("aut")
    record = InspireRecord.create(data)
    assert isinstance(record, AuthorsRecord)
    assert record.pid_type == "aut"

    record = AuthorsRecord.create(data)
    assert isinstance(record, AuthorsRecord)
    assert record.pid_type == "aut"
Esempio n. 11
0
def test_indexer_updates_advisor_when_student_name_changes(
    inspire_app, clean_celery_session
):
    advisor_data = faker.record("aut")
    advisor = AuthorsRecord.create(advisor_data)
    db.session.commit()
    current_search.flush_and_refresh("records-authors")
    student_data = faker.record(
        "aut",
        data={
            "advisors": [
                {
                    "name": advisor["name"]["value"],
                    "record": advisor["self"],
                    "degree_type": "phd",
                }
            ]
        },
    )
    student = AuthorsRecord.create(student_data)
    db.session.commit()

    def assert_record():
        current_search.flush_and_refresh("records-authors")
        records_from_es = AuthorsSearch().query_from_iq("").execute()
        assert len(records_from_es.hits) == 2

    retry_until_pass(assert_record, 3)

    student["name"]["preferred_name"] = "Test Student"
    student.update(dict(student))
    db.session.commit()

    expected_student_name = "Test Student"

    def assert_record():
        current_search.flush_and_refresh("records-authors")
        record_from_es = AuthorsSearch().get_record_data_from_es(advisor)
        assert record_from_es["students"][0]["name"] == expected_student_name

    retry_until_pass(assert_record, retry_interval=3)
Esempio n. 12
0
def test_regression_get_linked_author_records_uuids_if_author_changed_name_does_not_return_none_for_author_which_name_didnt_change(
    app, clean_celery_session
):
    author_data = faker.record("aut")
    author = AuthorsRecord.create(author_data)
    db.session.commit()
    data = dict(author)
    data["birth_date"] = "1950-01-01"
    author.update(data)
    db.session.commit()
    new_author = AuthorsRecord.get_record_by_pid_value(author["control_number"])
    assert set() == new_author.get_linked_author_records_uuids_if_author_changed_name()
Esempio n. 13
0
def test_get_linked_advisors_when_name_changes(inspire_app):
    data_advisor = faker.record("aut")
    advisor = AuthorsRecord.create(data_advisor)
    db.session.commit()

    assert not advisor.get_linked_advisors_when_name_changes()

    student_data = faker.record(
        "aut",
        data={
            "advisors": [{
                "name": advisor["name"]["value"],
                "record": advisor["self"],
                "degree_type": "phd",
            }]
        },
    )
    student = AuthorsRecord.create(student_data)
    db.session.commit()

    assert student.get_linked_advisors_when_name_changes() == set(
        [str(advisor.id)])

    student["ids"] = [{"value": "0000-0002-1558-1309", "schema": "ORCID"}]
    student.update(dict(student))
    db.session.commit()

    assert not student.get_linked_advisors_when_name_changes()

    student["name"]["preferred_name"] = "Test Author"
    student.update(dict(student))
    db.session.commit()
    assert student.get_linked_advisors_when_name_changes() == set(
        [str(advisor.id)])

    del student["name"]["preferred_name"]
    student.update(dict(student))
    db.session.commit()
    assert student.get_linked_advisors_when_name_changes() == set(
        [str(advisor.id)])
Esempio n. 14
0
def test_aut_record_appear_in_es_when_created(inspire_app, clean_celery_session):
    data = faker.record("aut")
    record = AuthorsRecord.create(data)
    db.session.commit()

    expected_control_number = record["control_number"]

    def assert_record():
        current_search.flush_and_refresh("records-authors")
        record_from_es = AuthorsSearch().get_record_data_from_es(record)
        assert expected_control_number == record_from_es["control_number"]

    retry_until_pass(assert_record)
Esempio n. 15
0
def test_indexer_deletes_record_from_es(inspire_app, datadir):
    def assert_record_is_deleted_from_es():
        current_search.flush_and_refresh("records-authors")
        expected_records_count = 0
        record_lit_es = AuthorsSearch().get_record(str(record.id)).execute().hits
        assert expected_records_count == len(record_lit_es)

    record = AuthorsRecord.create(faker.record("aut"))
    db.session.commit()

    record.delete()
    db.session.commit()

    retry_until_pass(assert_record_is_deleted_from_es)
Esempio n. 16
0
def test_authors_create(inspire_app):
    data = faker.record("aut")
    record = AuthorsRecord.create(data)

    control_number = str(record["control_number"])
    record_db = RecordMetadata.query.filter_by(id=record.id).one()

    assert record == record_db.json

    record_pid = PersistentIdentifier.query.filter_by(
        pid_type="aut", pid_value=str(control_number)).one()

    assert record.model.id == record_pid.object_uuid
    assert control_number == record_pid.pid_value
Esempio n. 17
0
def test_process_references_in_records_process_author_records(
        mock_batch_index, inspire_app, clean_celery_session):
    author_record = AuthorsRecord.create(faker.record("aut"))
    lit_record = LiteratureRecord.create(
        faker.record(
            "lit",
            data={
                "authors": [{
                    "full_name": author_record["name"]["value"],
                    "record": author_record["self"],
                }]
            },
        ))
    lit_record_2 = LiteratureRecord.create(
        faker.record(
            "lit",
            data={
                "authors": [{
                    "full_name": author_record["name"]["value"],
                    "record": author_record["self"],
                }]
            },
        ))

    db.session.commit()

    def assert_records_in_es():
        lit_record_from_es = InspireSearch.get_record_data_from_es(lit_record)
        lit_record_from_es_2 = InspireSearch.get_record_data_from_es(
            lit_record_2)
        aut_record_from_es = InspireSearch.get_record_data_from_es(
            author_record)
        assert lit_record_from_es and aut_record_from_es and lit_record_from_es_2

    retry_until_pass(assert_records_in_es, retry_interval=5)

    models_committed.disconnect(index_after_commit)
    author_record["name"]["value"] = "Another Name"
    author_record.update(dict(author_record))
    db.session.commit()
    # reconnect signal before we call process_references_in_records
    models_committed.connect(index_after_commit)
    task = process_references_in_records.delay([author_record.id])

    task.get(timeout=5)

    assert sorted(mock_batch_index.mock_calls[0][1][0]) == sorted(
        [str(lit_record.id), str(lit_record_2.id)])
Esempio n. 18
0
def test_aut_record_update_when_changed(inspire_app, clean_celery_session):
    data = faker.record("aut")
    rec = AuthorsRecord.create(data)
    db.session.commit()
    expected_death_date = "1900-01-01"
    data["death_date"] = expected_death_date
    data["control_number"] = rec["control_number"]
    rec.update(data)
    db.session.commit()

    def assert_record():
        current_search.flush_and_refresh("records-authors")
        record_from_es = AuthorsSearch().get_record_data_from_es(rec)
        assert expected_death_date == record_from_es["death_date"]

    retry_until_pass(assert_record)
Esempio n. 19
0
def test_indexer_updates_authors_papers_when_name_changes(
    inspire_app, clean_celery_session
):
    author_data = faker.record("aut")
    author = AuthorsRecord.create(author_data)
    db.session.commit()
    current_search.flush_and_refresh("records-authors")
    author_cn = author["control_number"]

    lit_data = {
        "authors": [
            {
                "record": {
                    "$ref": f"https://labs.inspirehep.net/api/authors/{author_cn}"
                },
                "full_name": author["name"]["value"],
            }
        ]
    }
    lit_data = faker.record("lit", data=lit_data)

    lit_1 = LiteratureRecord.create(lit_data)
    db.session.commit()

    expected_facet_author_name = f"{author['control_number']}_{author['name']['value']}"

    def assert_record():
        current_search.flush_and_refresh("records-hep")
        record_from_es = LiteratureSearch().get_record_data_from_es(lit_1)
        assert expected_facet_author_name == record_from_es["facet_author_name"][0]

    retry_until_pass(assert_record)

    data = dict(author)
    data["name"]["value"] = "Some other name"
    author.update(data)
    db.session.commit()

    expected_facet_author_name = f"{author['control_number']}_Some other name"

    def assert_record():
        current_search.flush_and_refresh("records-hep")
        record_from_es = LiteratureSearch().get_record_data_from_es(lit_1)
        assert expected_facet_author_name == record_from_es["facet_author_name"][0]

    retry_until_pass(assert_record)
Esempio n. 20
0
def test_aut_record_removed_form_es_when_deleted(app, celery_app_with_context,
                                                 celery_session_worker,
                                                 retry_until_matched):
    data = faker.record("aut")
    rec = AuthorsRecord.create(data)
    db.session.commit()
    steps = [
        {
            "step": es.indices.refresh,
            "args": ["records-authors"]
        },
        {
            "step": es.search,
            "args": ["records-authors"],
            "expected_result": {
                "expected_key": "hits.total",
                "expected_result": 1
            },
        },
    ]
    retry_until_matched(steps)
    rec.delete()
    db.session.commit()
    steps = [
        {
            "step": es.indices.refresh,
            "args": ["records-authors"]
        },
        {
            "step": es.search,
            "args": ["records-authors"],
            "expected_result": {
                "expected_key": "hits.total",
                "expected_result": 0
            },
        },
    ]
    retry_until_matched(steps)
Esempio n. 21
0
def test_aut_record_appear_in_es_when_created(app, celery_app_with_context,
                                              celery_session_worker,
                                              retry_until_matched):
    data = faker.record("aut")
    rec = AuthorsRecord.create(data)
    db.session.commit()
    steps = [
        {
            "step": es.indices.refresh,
            "args": ["records-authors"]
        },
        {
            "step": es.search,
            "args": ["records-authors"],
            "expected_result": {
                "expected_key": "hits.total",
                "expected_result": 1
            },
        },
    ]
    response = retry_until_matched(steps)

    assert response["hits"]["hits"][0]["_id"] == str(rec.id)
Esempio n. 22
0
def test_indexer_updates_authors_papers_when_name_changes(
        inspire_app, celery_app_with_context, celery_session_worker):
    author_data = faker.record("aut")
    author = AuthorsRecord.create(author_data)
    db.session.commit()
    current_search.flush_and_refresh("records-authors")
    author_cn = author["control_number"]

    lit_data = {
        "authors": [{
            "record": {
                "$ref": f"https://labs.inspirehep.net/api/authors/{author_cn}"
            },
            "full_name": author["name"]["value"],
        }]
    }
    lit_data = faker.record("lit", data=lit_data)

    lit_1 = LiteratureRecord.create(lit_data)
    db.session.commit()

    expected_hits = 1
    expected_facet_author_name_count = 1
    expected_facet_author_name = f"{author['control_number']}_{author['name']['value']}"
    steps = [
        {
            "step": current_search.flush_and_refresh,
            "args": ["*"]
        },
        {
            "step": es_search,
            "args": ["records-hep"],
            "expected_result": {
                "expected_key": "hits.total.value",
                "expected_result": expected_hits,
            },
        },
        {
            "expected_key": "hits.hits[0]._source.facet_author_name[0]",
            "expected_result": expected_facet_author_name,
        },
    ]
    results = retry_until_matched(steps, timeout=45)

    assert (len(results["hits"]["hits"][0]["_source"]["facet_author_name"]) ==
            expected_facet_author_name_count)

    data = dict(author)
    data["name"]["value"] = "Some other name"
    author.update(data)
    db.session.commit()

    expected_facet_author_name = f"{author['control_number']}_Some other name"

    steps = [
        {
            "step": current_search.flush_and_refresh,
            "args": ["*"]
        },
        {
            "step": es_search,
            "args": ["records-hep"],
            "expected_result": {
                "expected_key": "hits.total.value",
                "expected_result": expected_hits,
            },
        },
        {
            "expected_key": "hits.hits[0]._source.facet_author_name[0]",
            "expected_result": expected_facet_author_name,
        },
    ]
    results = retry_until_matched(steps, timeout=45)

    assert (len(results["hits"]["hits"][0]["_source"]["facet_author_name"]) ==
            expected_facet_author_name_count)
Esempio n. 23
0
def test_if_bai_is_processed_on_authors_record_creation(inspire_app):
    data = faker.record("aut", other_pids=["bai"])
    bai = data["ids"][0]["value"]
    rec = AuthorsRecord.create(data)
    assert (PersistentIdentifier.query.filter_by(pid_type="bai",
                                                 pid_value=bai).count() == 1)
Esempio n. 24
0
def test_process_references_in_records_process_self_citations(
        mock_batch_index, inspire_app, clean_celery_session,
        enable_self_citations):
    author_record = AuthorsRecord.create(
        faker.record(
            "aut",
            data={
                "name": {
                    "value": "'t Hooft, Gerardus",
                    "name_variants": ["'t Hooft, Gerard", "Hooft, Gerard T."],
                    "preferred_name": "Gerardus 't Hooft",
                },
                "ids": [
                    {
                        "value": "INSPIRE-00060582",
                        "schema": "INSPIRE ID"
                    },
                    {
                        "value": "G.tHooft.1",
                        "schema": "INSPIRE BAI"
                    },
                ],
            },
        ))
    author_record_2 = AuthorsRecord.create(
        faker.record(
            "aut",
            data={
                "name": {
                    "value": "'t Hooft, Gerardus Marcus",
                    "preferred_name": "Gerardus Marcus 't Hooft",
                },
                "ids": [
                    {
                        "value": "INSPIRE-00060583",
                        "schema": "INSPIRE ID"
                    },
                    {
                        "value": "G.tHooft.2",
                        "schema": "INSPIRE BAI"
                    },
                ],
            },
        ))
    lit_record = LiteratureRecord.create(
        faker.record(
            "lit",
            data={
                "authors": [{
                    "ids": [
                        {
                            "value": "INSPIRE-00060582",
                            "schema": "INSPIRE ID"
                        },
                        {
                            "value": "G.tHooft.1",
                            "schema": "INSPIRE BAI"
                        },
                    ],
                    "full_name":
                    author_record["name"]["value"],
                    "record":
                    author_record["self"],
                }]
            },
        ))
    lit_record_2 = LiteratureRecord.create(
        faker.record(
            "lit",
            literature_citations=[lit_record["control_number"]],
            data={
                "authors": [{
                    "ids": [
                        {
                            "value": "INSPIRE-00060583",
                            "schema": "INSPIRE ID"
                        },
                        {
                            "value": "G.tHooft.2",
                            "schema": "INSPIRE BAI"
                        },
                    ],
                    "full_name":
                    author_record_2["name"]["value"],
                    "record":
                    author_record_2["self"],
                }]
            },
        ))
    db.session.commit()

    def assert_records_in_es():
        lit_record_from_es = InspireSearch.get_record_data_from_es(lit_record)
        lit_record_from_es_2 = InspireSearch.get_record_data_from_es(
            lit_record_2)
        aut_record_from_es = InspireSearch.get_record_data_from_es(
            author_record)
        assert lit_record_from_es and aut_record_from_es and lit_record_from_es_2

    retry_until_pass(assert_records_in_es, retry_interval=5)

    models_committed.disconnect(index_after_commit)
    lit_record["authors"].append({
        "ids": [
            {
                "value": "INSPIRE-00060583",
                "schema": "INSPIRE ID"
            },
            {
                "value": "G.tHooft.2",
                "schema": "INSPIRE BAI"
            },
        ],
        "full_name":
        author_record_2["name"]["value"],
        "record":
        author_record_2["self"],
    })
    lit_record.update(dict(lit_record))
    db.session.commit()
    # reconnect signal before we call process_references_in_records
    models_committed.connect(index_after_commit)
    task = process_references_in_records.delay([lit_record.id])

    task.get(timeout=5)

    assert sorted(mock_batch_index.mock_calls[0][1][0]) == sorted(
        [lit_record_2.id])
def test_disambiguation_races_assign(override_config, inspire_app,
                                     clean_celery_session,
                                     enable_disambiguation):
    cataloger = create_user(role="cataloger")
    with override_config(FEATURE_FLAG_ENABLE_BAI_PROVIDER=True,
                         FEATURE_FLAG_ENABLE_BAI_CREATION=True):
        author_record_data = faker.record("aut")
        author_record_data.update({
            "name": {
                "value": "Michael F. A'Hearn"
            },
            "ids": [{
                "schema": "INSPIRE BAI",
                "value": "M.F.A.Hearn.1"
            }],
        })
        author_record = AuthorsRecord.create(author_record_data)
        lit_data = faker.record("lit")
        lit_data.update({
            "authors": [{
                "ids": [{
                    "value": "M.F.A.Hearn.1",
                    "schema": "INSPIRE BAI"
                }],
                "uuid":
                "ce061c1e-866a-422d-9982-652183bae814",
                "full_name":
                "A'Hearn, M.F.",
                "signature_block":
                "HARNm",
                "curated_relation":
                True,
                "record":
                author_record["self"],
            }]
        })
        lit_record = LiteratureRecord.create(lit_data)
        db.session.commit()

    with inspire_app.test_client() as client:
        login_user_via_session(client, email=cataloger.email)
        client.post(
            "/api/assign/author",
            data=orjson.dumps({
                "literature_recids": [lit_record["control_number"]],
                "from_author_recid":
                author_record["control_number"],
            }),
            content_type="application/json",
        )

        def assert_disambiguation_on_record_update():
            literature_record_from_es = InspireSearch.get_record_data_from_es(
                lit_record)

            assert (get_values_for_schema(
                literature_record_from_es["authors"][0]["ids"],
                "INSPIRE BAI")[0] != "M.F.A.Hearn.1")

        retry_until_pass(assert_disambiguation_on_record_update,
                         retry_interval=2)