def test_literature_create_or_update_with_existing_record(inspire_app):
    data = faker.record("aut", with_control_number=True)
    record = AuthorsRecord.create(data)

    assert data["control_number"] == record["control_number"]

    data_update = {
        "name": {
            "name_variants": ["UPDATED"],
            "preferred_name": "UPDATED",
            "value": "UPDATED",
        }
    }
    data.update(data_update)

    record_updated = AuthorsRecord.create_or_update(data)
    control_number = str(record_updated["control_number"])

    assert record["control_number"] == record_updated["control_number"]

    record_updated_db = RecordMetadata.query.filter_by(
        id=record_updated.id).one()

    assert data == record_updated_db.json

    record_updated_pid = PersistentIdentifier.query.filter_by(
        pid_type="aut", pid_value=str(control_number)).one()

    assert record_updated.model.id == record_updated_pid.object_uuid
    assert control_number == record_updated_pid.pid_value
def test_authors_create_with_invalid_data(inspire_app):
    data = faker.record("aut", with_control_number=True)
    data["invalid_key"] = "should throw an error"
    record_control_number = str(data["control_number"])

    with pytest.raises(ValidationError):
        AuthorsRecord.create(data)

    record_pid = PersistentIdentifier.query.filter_by(
        pid_value=record_control_number).one_or_none()
    assert record_pid is None
Beispiel #3
0
def test_regression_get_linked_author_records_uuids_if_author_changed_name_does_not_return_none_for_author_which_name_didnt_change(
    app, clean_celery_session
):
    author_data = faker.record("aut")
    author = AuthorsRecord.create(author_data)
    db.session.commit()
    data = dict(author)
    data["birth_date"] = "1950-01-01"
    author.update(data)
    db.session.commit()
    new_author = AuthorsRecord.get_record_by_pid_value(author["control_number"])
    assert set() == new_author.get_linked_author_records_uuids_if_author_changed_name()
def test_authors_create_with_existing_control_number(inspire_app):
    data = faker.record("aut", with_control_number=True)
    existing_object_uuid = uuid.uuid4()

    create_pidstore(
        object_uuid=existing_object_uuid,
        pid_type="aut",
        pid_value=data["control_number"],
    )

    with pytest.raises(PIDAlreadyExists):
        AuthorsRecord.create(data)
def test_redirection_works_for_authors(inspire_app):
    redirected_record = create_record("aut")
    record = create_record(
        "aut", data={"deleted_records": [redirected_record["self"]]})

    original_record = AuthorsRecord.get_uuid_from_pid_value(
        redirected_record["control_number"], original_record=True)
    new_record = AuthorsRecord.get_uuid_from_pid_value(
        redirected_record["control_number"])

    assert original_record != new_record
    assert original_record == redirected_record.id
    assert new_record == record.id
Beispiel #6
0
def test_aut_record_update_when_changed(app, celery_app_with_context,
                                        celery_session_worker,
                                        retry_until_matched):
    data = faker.record("aut")
    rec = AuthorsRecord.create(data)
    db.session.commit()
    expected_death_date = "1900-01-01"
    data["death_date"] = expected_death_date
    rec.update(data)
    db.session.commit()

    steps = [
        {
            "step": es.indices.refresh,
            "args": ["records-authors"]
        },
        {
            "step": es.search,
            "args": ["records-authors"],
            "expected_result": {
                "expected_key": "hits.total",
                "expected_result": 1
            },
        },
    ]
    resp = retry_until_matched(steps)["hits"]["hits"]
    assert resp[0]["_source"]["death_date"] == expected_death_date
Beispiel #7
0
def test_aut_record_update_when_changed(inspire_app, celery_app_with_context,
                                        celery_session_worker):
    data = faker.record("aut")
    rec = AuthorsRecord.create(data)
    db.session.commit()
    expected_death_date = "1900-01-01"
    data["death_date"] = expected_death_date
    data["control_number"] = rec["control_number"]
    rec.update(data)
    db.session.commit()

    steps = [
        {
            "step": current_search.flush_and_refresh,
            "args": ["records-authors"]
        },
        {
            "step": es_search,
            "args": ["records-authors"],
            "expected_result": {
                "expected_key": "hits.total.value",
                "expected_result": 1,
            },
        },
        {
            "step": es_search,
            "args": ["records-authors"],
            "expected_result": {
                "expected_key": "hits.hits[0]._source.death_date",
                "expected_result": expected_death_date,
            },
        },
    ]
    retry_until_matched(steps)["hits"]["hits"]
Beispiel #8
0
def test_aut_record_appear_in_es_when_created(inspire_app,
                                              celery_app_with_context,
                                              celery_session_worker):
    data = faker.record("aut")
    rec = AuthorsRecord.create(data)
    db.session.commit()
    expected_id = str(rec.id)
    steps = [
        {
            "step": current_search.flush_and_refresh,
            "args": ["records-authors"]
        },
        {
            "step": es_search,
            "args": ["records-authors"],
            "expected_result": {
                "expected_key": "hits.total.value",
                "expected_result": 1,
            },
        },
        {
            "step": es_search,
            "args": ["records-authors"],
            "expected_result": {
                "expected_key": "hits.hits[0]._id",
                "expected_result": expected_id,
            },
        },
    ]
    retry_until_matched(steps)
Beispiel #9
0
def clean_stub_authors():
    """Removes all the authors created by disambiguation and having no linked papers."""
    # We get all the stub authors (created by disambiguation) from ES and we verify
    # in db if the returned records are stub (ES data might be outdated)
    stub_authors_query = Q("term", stub=True)
    stub_authors_search = (AuthorsSearch().query(stub_authors_query).source(
        ["control_number"]))
    stub_authors_control_numbers = [("aut", str(author["control_number"]))
                                    for author in stub_authors_search.scan()]
    # We change isolation level in db to the higher one (serializable) to avoid
    # issues with race condition
    db.session.connection(
        execution_options={"isolation_level": "SERIALIZABLE"})
    stub_authors_verified = AuthorsRecord.get_records_by_pids(
        stub_authors_control_numbers)
    stub_authors_bais = {
        get_values_for_schema(author["ids"], "INSPIRE BAI")[0]: author
        for author in stub_authors_verified if author.get("stub")
    }
    # We verify which authors have linked papers
    stub_authors_with_papers = set(
        query_authors_with_linked_papers_by_bai(stub_authors_bais.keys()))
    # For every author who has not linked papers we delete record
    authors_to_remove = set(
        stub_authors_bais.keys()).difference(stub_authors_with_papers)
    click.echo(
        f"Removing {len(authors_to_remove)} stub authors with no linked papers"
    )
    for author_bai in authors_to_remove:
        author = stub_authors_bais[author_bai]
        author.delete()
    db.session.commit()
    click.echo("Successfully removed stub authors")
Beispiel #10
0
def test_aut_record_removed_form_es_when_deleted(inspire_app, clean_celery_session):
    data = faker.record("aut")
    rec = AuthorsRecord.create(data)
    db.session.commit()

    def assert_record():
        current_search.flush_and_refresh("records-authors")
        result = es_search("records-authors")
        result_total = get_value(result, "hits.total.value")
        expected_total = 1
        assert expected_total == result_total

    retry_until_pass(assert_record)

    rec.delete()
    db.session.commit()

    def assert_record():
        current_search.flush_and_refresh("records-authors")
        result = es_search("records-authors")
        result_total = get_value(result, "hits.total.value")
        expected_total = 0
        assert expected_total == result_total

    retry_until_pass(assert_record)
Beispiel #11
0
def get_orcids_for_push(record):
    """Obtain the ORCIDs associated to the list of authors in the Literature record.

    The ORCIDs are looked up both in the ``ids`` of the ``authors`` and in the
    Author records that have claimed the paper.

    Args:
        record(dict): metadata from a Literature record

    Returns:
        Iterator[str]: all ORCIDs associated to these authors
    """
    orcids_on_record = []
    author_recids_with_claims = []
    for author in record.get("authors", []):
        orcids_in_author = get_values_for_schema(author.get("ids", []),
                                                 "ORCID")
        if orcids_in_author:
            orcids_on_record.extend(orcids_in_author)
        elif author.get("curated_relation") is True and "record" in author:
            author_recids_with_claims.append(
                get_recid_from_ref(author["record"]))

    author_records = AuthorsRecord.get_records_by_pids(
        ("aut", str(recid)) for recid in author_recids_with_claims)

    all_ids = (author.get("ids", []) for author in author_records)
    orcids_in_authors = chain.from_iterable(
        get_values_for_schema(ids, "ORCID") for ids in all_ids)

    return chain(orcids_on_record, orcids_in_authors)
Beispiel #12
0
 def get(self, pid_value):
     try:
         record = AuthorsRecord.get_record_by_pid_value(pid_value)
     except PIDDoesNotExistError:
         abort(404)
     serialized_record = author_v1.dump(record)
     return jsonify({"data": serialized_record})
Beispiel #13
0
def test_create_record_with_multiple_files(inspire_app, cli):
    data_literature = faker.record("lit", with_control_number=True)
    data_author = faker.record("aut", with_control_number=True)
    control_number_literature = data_literature["control_number"]
    control_number_author = data_author["control_number"]

    with cli.isolated_filesystem():
        with open(f"{control_number_literature}.json", "wb") as f:
            f.write(orjson.dumps(data_literature))
        with open(f"{control_number_author}.json", "wb") as f:
            f.write(orjson.dumps(data_author))
        result = cli.invoke(
            [
                "importer",
                "records",
                "-f",
                f"{control_number_literature}.json",
                "-f",
                f"{control_number_author}.json",
            ]
        )
        result_record_literature = LiteratureRecord.get_record_by_pid_value(
            control_number_literature
        )
        result_record_author = AuthorsRecord.get_record_by_pid_value(
            control_number_author
        )

        assert result.exit_code == 0
        assert control_number_literature == result_record_literature["control_number"]
        assert control_number_author == result_record_author["control_number"]
Beispiel #14
0
def test_create_record_with_directory(base_app, db, script_info):
    runner = CliRunner()
    data_literature = faker.record("lit", with_control_number=True)
    data_author = faker.record("aut", with_control_number=True)
    control_number_literature = data_literature["control_number"]
    control_number_author = data_author["control_number"]

    with runner.isolated_filesystem():
        os.mkdir("test_directory/")
        with open(f"test_directory/{control_number_literature}.json",
                  "w") as f:
            f.write(json.dumps(data_literature))
        with open(f"test_directory/{control_number_author}.json", "w") as f:
            f.write(json.dumps(data_author))

        result = runner.invoke(importer, ["records", "-d", "test_directory"],
                               obj=script_info)
        result_record_literature = LiteratureRecord.get_record_by_pid_value(
            control_number_literature)
        result_record_author = AuthorsRecord.get_record_by_pid_value(
            control_number_author)

        assert result.exit_code == 0
        assert control_number_literature == result_record_literature[
            "control_number"]
        assert control_number_author == result_record_author["control_number"]
Beispiel #15
0
def _get_current_user_author_profile():
    current_user_orcid = get_current_user_orcid()
    try:
        current_author_profile = AuthorsRecord.get_record_by_pid_value(
            current_user_orcid, "orcid")
        return current_author_profile
    except PIDDoesNotExistError:
        return
def test_create_record_from_db_depending_on_its_pid_type(inspire_app):
    data = faker.record("aut")
    record = InspireRecord.create(data)
    assert isinstance(record, AuthorsRecord)
    assert record.pid_type == "aut"

    record = AuthorsRecord.create(data)
    assert isinstance(record, AuthorsRecord)
    assert record.pid_type == "aut"
Beispiel #17
0
def test_session_still_works_after_integrity_error_on_bai_provider(
        inspire_app):
    data = faker.record("aut")
    with override_config(FEATURE_FLAG_ENABLE_BAI_PROVIDER=True):
        AuthorsRecord.create(data=data)
        db.session.commit()
        with mock.patch(
                "inspirehep.pidstore.providers.bai.InspireBAIProvider.next_bai_number"
        ) as next_bai_mock:
            next_bai_mock.side_effect = [1, 2]
            AuthorsRecord.create(data=data)
            assert next_bai_mock.call_count == 2
        db.session.commit()

    expected_pid_count = 2

    assert (PersistentIdentifier.query.filter_by(
        pid_type="bai").count() == expected_pid_count)
Beispiel #18
0
def test_create_record_from_db_depending_on_its_pid_type(base_app, db, es):
    data = faker.record("aut")
    record = InspireRecord.create(data)
    assert type(record) == AuthorsRecord
    assert record.pid_type == "aut"

    record = AuthorsRecord.create(data)
    assert type(record) == AuthorsRecord
    assert record.pid_type == "aut"
Beispiel #19
0
def test_assign_from_an_author_to_another_that_is_not_stub(inspire_app):
    cataloger = create_user(role="cataloger")
    from_author = create_record("aut")
    to_author = create_record("aut", data={"stub": False})
    literature = create_record(
        "lit",
        data={
            "authors": [
                {
                    "full_name": "Urhan, Ahmet",
                    "record": {
                        "$ref": "http://localhost:5000/api/authors/17200"
                    },
                },
                {
                    "full_name": "Urhan, Harun",
                    "record": {
                        "$ref":
                        f"http://localhost:5000/api/authors/{from_author['control_number']}"
                    },
                },
            ]
        },
    )

    with inspire_app.test_client() as client:
        login_user_via_session(client, email=cataloger.email)
        response = client.post(
            "/assign/author",
            data=orjson.dumps({
                "literature_recids": [literature["control_number"]],
                "from_author_recid":
                from_author["control_number"],
                "to_author_recid":
                to_author["control_number"],
            }),
            content_type="application/json",
        )
    response_status_code = response.status_code

    assert response_status_code == 200

    literature_after = LiteratureRecord.get_record_by_pid_value(
        literature["control_number"])
    literature_author = literature_after["authors"][1]
    assert literature_author["record"] == {
        "$ref":
        f"http://localhost:5000/api/authors/{to_author['control_number']}"
    }
    assert literature_author["curated_relation"]

    to_author_after = AuthorsRecord.get_record_by_pid_value(
        to_author["control_number"])
    assert not to_author_after["stub"]
Beispiel #20
0
def test_indexer_updates_advisor_when_student_name_changes(
    inspire_app, clean_celery_session
):
    advisor_data = faker.record("aut")
    advisor = AuthorsRecord.create(advisor_data)
    db.session.commit()
    current_search.flush_and_refresh("records-authors")
    student_data = faker.record(
        "aut",
        data={
            "advisors": [
                {
                    "name": advisor["name"]["value"],
                    "record": advisor["self"],
                    "degree_type": "phd",
                }
            ]
        },
    )
    student = AuthorsRecord.create(student_data)
    db.session.commit()

    def assert_record():
        current_search.flush_and_refresh("records-authors")
        records_from_es = AuthorsSearch().query_from_iq("").execute()
        assert len(records_from_es.hits) == 2

    retry_until_pass(assert_record, 3)

    student["name"]["preferred_name"] = "Test Student"
    student.update(dict(student))
    db.session.commit()

    expected_student_name = "Test Student"

    def assert_record():
        current_search.flush_and_refresh("records-authors")
        record_from_es = AuthorsSearch().get_record_data_from_es(advisor)
        assert record_from_es["students"][0]["name"] == expected_student_name

    retry_until_pass(assert_record, retry_interval=3)
Beispiel #21
0
def update_moved_orcid(old_orcid, new_orcid):
    author_record = AuthorsRecord.get_record_by_pid_value(old_orcid, "orcid")
    if new_orcid not in get_value(author_record, "ids.value", []):
        new_author_ids = [
            {"schema": "ORCID", "value": new_orcid},
            *author_record["ids"],
        ]
        author_record["ids"] = new_author_ids
        author_record.update(dict(author_record))
    remove_access_token_for_orcid_account(old_orcid, new_orcid)
    db.session.commit()
    LOGGER.info("ORCID updated", new_orcid=new_orcid, old_orcid=old_orcid)
Beispiel #22
0
def test_get_linked_advisors_when_name_changes(inspire_app):
    data_advisor = faker.record("aut")
    advisor = AuthorsRecord.create(data_advisor)
    db.session.commit()

    assert not advisor.get_linked_advisors_when_name_changes()

    student_data = faker.record(
        "aut",
        data={
            "advisors": [{
                "name": advisor["name"]["value"],
                "record": advisor["self"],
                "degree_type": "phd",
            }]
        },
    )
    student = AuthorsRecord.create(student_data)
    db.session.commit()

    assert student.get_linked_advisors_when_name_changes() == set(
        [str(advisor.id)])

    student["ids"] = [{"value": "0000-0002-1558-1309", "schema": "ORCID"}]
    student.update(dict(student))
    db.session.commit()

    assert not student.get_linked_advisors_when_name_changes()

    student["name"]["preferred_name"] = "Test Author"
    student.update(dict(student))
    db.session.commit()
    assert student.get_linked_advisors_when_name_changes() == set(
        [str(advisor.id)])

    del student["name"]["preferred_name"]
    student.update(dict(student))
    db.session.commit()
    assert student.get_linked_advisors_when_name_changes() == set(
        [str(advisor.id)])
Beispiel #23
0
    def get(self, pid_value):
        try:
            record = AuthorsRecord.get_record_by_pid_value(pid_value)

            if not can_user_edit_author_record(record):
                return (
                    jsonify({"message": "You are not allowed to edit this author"}),
                    403,
                )
        except PIDDoesNotExistError:
            abort(404)
        serialized_record = author_v1.dump(record)
        return jsonify({"data": serialized_record})
Beispiel #24
0
def assign_to_author(from_author_recid, to_author_recid, literature_recids):
    author_record = AuthorsRecord.get_record_by_pid_value(to_author_recid)
    num_workers = count_consumers_for_queue("assign")
    for batch in chunker(literature_recids, 10, num_workers):
        current_celery_app.send_task(
            "inspirehep.assign.tasks.assign_papers",
            kwargs={
                "from_author_recid": from_author_recid,
                "to_author_record": author_record,
                "author_papers_recids": batch,
            },
        )
    unstub_author_by_recid(to_author_recid)
Beispiel #25
0
def test_aut_record_appear_in_es_when_created(inspire_app, clean_celery_session):
    data = faker.record("aut")
    record = AuthorsRecord.create(data)
    db.session.commit()

    expected_control_number = record["control_number"]

    def assert_record():
        current_search.flush_and_refresh("records-authors")
        record_from_es = AuthorsSearch().get_record_data_from_es(record)
        assert expected_control_number == record_from_es["control_number"]

    retry_until_pass(assert_record)
Beispiel #26
0
 def assert_assign():
     current_search.flush_and_refresh("*")
     literature_after = LiteratureSearch.get_record_data_from_es(literature)
     literature_author = literature_after["authors"][1]
     to_author_after = AuthorsRecord.get_record_by_pid_value(
         to_author["control_number"])
     assert literature_author["record"] == {
         "$ref":
         f"http://localhost:5000/api/authors/{to_author['control_number']}"
     }
     assert literature_author["curated_relation"]
     assert literature_author["ids"] == to_author["ids"]
     assert not to_author_after["stub"]
def test_authors_create_or_update_with_new_record(inspire_app):
    data = faker.record("aut")
    record = AuthorsRecord.create_or_update(data)

    control_number = str(record["control_number"])
    record_db = RecordMetadata.query.filter_by(id=record.id).one()

    assert record == record_db.json

    record_pid = PersistentIdentifier.query.filter_by(
        pid_type="aut", pid_value=str(control_number)).one()

    assert record.model.id == record_pid.object_uuid
    assert control_number == record_pid.pid_value
def test_signature_linked_by_disambiguation_has_correct_facet_author_name(
        inspire_app, celery_app_with_context, celery_session_worker):
    data = faker.record("lit")
    data["authors"] = [{
        "full_name": "Doe, John",
        "uuid": "94fc2b0a-dc17-42c2-bae3-ca0024079e51"
    }]
    record = LiteratureRecord.create(data)
    db.session.commit()
    clusters = [{
        "signatures": [{
            "publication_id":
            record["control_number"],
            "signature_uuid":
            "94fc2b0a-dc17-42c2-bae3-ca0024079e51",
        }],
        "authors": [],
    }]
    disambiguate_signatures(clusters)
    author_pids = PersistentIdentifier.query.filter_by(pid_type="aut").all()
    assert len(author_pids) == 1
    pid_value = author_pids[0].pid_value
    author = AuthorsRecord.get_record_by_pid_value(pid_value)
    author_control_number = author.pop("control_number")

    expected_facet_author_name = [f"{author_control_number}_John Doe"]
    expected_record_ref = f"http://localhost:5000/api/authors/{pid_value}"
    steps = [
        {
            "step": current_search.flush_and_refresh,
            "args": ["records-hep"]
        },
        {
            "step": es_search,
            "args": ["records-hep"],
            "expected_result": {
                "expected_key": "hits.total.value",
                "expected_result": 1,
            },
        },
        {
            "expected_key": "hits.hits[0]._source.facet_author_name",
            "expected_result": expected_facet_author_name,
        },
        {
            "expected_key": "hits.hits[0]._source.authors[0].record.$ref",
            "expected_result": expected_record_ref,
        },
    ]
    retry_until_matched(steps)
Beispiel #29
0
def test_indexer_deletes_record_from_es(inspire_app, datadir):
    def assert_record_is_deleted_from_es():
        current_search.flush_and_refresh("records-authors")
        expected_records_count = 0
        record_lit_es = AuthorsSearch().get_record(str(record.id)).execute().hits
        assert expected_records_count == len(record_lit_es)

    record = AuthorsRecord.create(faker.record("aut"))
    db.session.commit()

    record.delete()
    db.session.commit()

    retry_until_pass(assert_record_is_deleted_from_es)
def test_process_references_in_records_process_author_records(
        mock_batch_index, inspire_app, clean_celery_session):
    author_record = AuthorsRecord.create(faker.record("aut"))
    lit_record = LiteratureRecord.create(
        faker.record(
            "lit",
            data={
                "authors": [{
                    "full_name": author_record["name"]["value"],
                    "record": author_record["self"],
                }]
            },
        ))
    lit_record_2 = LiteratureRecord.create(
        faker.record(
            "lit",
            data={
                "authors": [{
                    "full_name": author_record["name"]["value"],
                    "record": author_record["self"],
                }]
            },
        ))

    db.session.commit()

    def assert_records_in_es():
        lit_record_from_es = InspireSearch.get_record_data_from_es(lit_record)
        lit_record_from_es_2 = InspireSearch.get_record_data_from_es(
            lit_record_2)
        aut_record_from_es = InspireSearch.get_record_data_from_es(
            author_record)
        assert lit_record_from_es and aut_record_from_es and lit_record_from_es_2

    retry_until_pass(assert_records_in_es, retry_interval=5)

    models_committed.disconnect(index_after_commit)
    author_record["name"]["value"] = "Another Name"
    author_record.update(dict(author_record))
    db.session.commit()
    # reconnect signal before we call process_references_in_records
    models_committed.connect(index_after_commit)
    task = process_references_in_records.delay([author_record.id])

    task.get(timeout=5)

    assert sorted(mock_batch_index.mock_calls[0][1][0]) == sorted(
        [str(lit_record.id), str(lit_record_2.id)])