def test_literature_create_or_update_with_existing_record(inspire_app): data = faker.record("aut", with_control_number=True) record = AuthorsRecord.create(data) assert data["control_number"] == record["control_number"] data_update = { "name": { "name_variants": ["UPDATED"], "preferred_name": "UPDATED", "value": "UPDATED", } } data.update(data_update) record_updated = AuthorsRecord.create_or_update(data) control_number = str(record_updated["control_number"]) assert record["control_number"] == record_updated["control_number"] record_updated_db = RecordMetadata.query.filter_by( id=record_updated.id).one() assert data == record_updated_db.json record_updated_pid = PersistentIdentifier.query.filter_by( pid_type="aut", pid_value=str(control_number)).one() assert record_updated.model.id == record_updated_pid.object_uuid assert control_number == record_updated_pid.pid_value
def test_authors_create_with_invalid_data(inspire_app): data = faker.record("aut", with_control_number=True) data["invalid_key"] = "should throw an error" record_control_number = str(data["control_number"]) with pytest.raises(ValidationError): AuthorsRecord.create(data) record_pid = PersistentIdentifier.query.filter_by( pid_value=record_control_number).one_or_none() assert record_pid is None
def test_regression_get_linked_author_records_uuids_if_author_changed_name_does_not_return_none_for_author_which_name_didnt_change( app, clean_celery_session ): author_data = faker.record("aut") author = AuthorsRecord.create(author_data) db.session.commit() data = dict(author) data["birth_date"] = "1950-01-01" author.update(data) db.session.commit() new_author = AuthorsRecord.get_record_by_pid_value(author["control_number"]) assert set() == new_author.get_linked_author_records_uuids_if_author_changed_name()
def test_authors_create_with_existing_control_number(inspire_app): data = faker.record("aut", with_control_number=True) existing_object_uuid = uuid.uuid4() create_pidstore( object_uuid=existing_object_uuid, pid_type="aut", pid_value=data["control_number"], ) with pytest.raises(PIDAlreadyExists): AuthorsRecord.create(data)
def test_redirection_works_for_authors(inspire_app): redirected_record = create_record("aut") record = create_record( "aut", data={"deleted_records": [redirected_record["self"]]}) original_record = AuthorsRecord.get_uuid_from_pid_value( redirected_record["control_number"], original_record=True) new_record = AuthorsRecord.get_uuid_from_pid_value( redirected_record["control_number"]) assert original_record != new_record assert original_record == redirected_record.id assert new_record == record.id
def test_aut_record_update_when_changed(app, celery_app_with_context, celery_session_worker, retry_until_matched): data = faker.record("aut") rec = AuthorsRecord.create(data) db.session.commit() expected_death_date = "1900-01-01" data["death_date"] = expected_death_date rec.update(data) db.session.commit() steps = [ { "step": es.indices.refresh, "args": ["records-authors"] }, { "step": es.search, "args": ["records-authors"], "expected_result": { "expected_key": "hits.total", "expected_result": 1 }, }, ] resp = retry_until_matched(steps)["hits"]["hits"] assert resp[0]["_source"]["death_date"] == expected_death_date
def test_aut_record_update_when_changed(inspire_app, celery_app_with_context, celery_session_worker): data = faker.record("aut") rec = AuthorsRecord.create(data) db.session.commit() expected_death_date = "1900-01-01" data["death_date"] = expected_death_date data["control_number"] = rec["control_number"] rec.update(data) db.session.commit() steps = [ { "step": current_search.flush_and_refresh, "args": ["records-authors"] }, { "step": es_search, "args": ["records-authors"], "expected_result": { "expected_key": "hits.total.value", "expected_result": 1, }, }, { "step": es_search, "args": ["records-authors"], "expected_result": { "expected_key": "hits.hits[0]._source.death_date", "expected_result": expected_death_date, }, }, ] retry_until_matched(steps)["hits"]["hits"]
def test_aut_record_appear_in_es_when_created(inspire_app, celery_app_with_context, celery_session_worker): data = faker.record("aut") rec = AuthorsRecord.create(data) db.session.commit() expected_id = str(rec.id) steps = [ { "step": current_search.flush_and_refresh, "args": ["records-authors"] }, { "step": es_search, "args": ["records-authors"], "expected_result": { "expected_key": "hits.total.value", "expected_result": 1, }, }, { "step": es_search, "args": ["records-authors"], "expected_result": { "expected_key": "hits.hits[0]._id", "expected_result": expected_id, }, }, ] retry_until_matched(steps)
def clean_stub_authors(): """Removes all the authors created by disambiguation and having no linked papers.""" # We get all the stub authors (created by disambiguation) from ES and we verify # in db if the returned records are stub (ES data might be outdated) stub_authors_query = Q("term", stub=True) stub_authors_search = (AuthorsSearch().query(stub_authors_query).source( ["control_number"])) stub_authors_control_numbers = [("aut", str(author["control_number"])) for author in stub_authors_search.scan()] # We change isolation level in db to the higher one (serializable) to avoid # issues with race condition db.session.connection( execution_options={"isolation_level": "SERIALIZABLE"}) stub_authors_verified = AuthorsRecord.get_records_by_pids( stub_authors_control_numbers) stub_authors_bais = { get_values_for_schema(author["ids"], "INSPIRE BAI")[0]: author for author in stub_authors_verified if author.get("stub") } # We verify which authors have linked papers stub_authors_with_papers = set( query_authors_with_linked_papers_by_bai(stub_authors_bais.keys())) # For every author who has not linked papers we delete record authors_to_remove = set( stub_authors_bais.keys()).difference(stub_authors_with_papers) click.echo( f"Removing {len(authors_to_remove)} stub authors with no linked papers" ) for author_bai in authors_to_remove: author = stub_authors_bais[author_bai] author.delete() db.session.commit() click.echo("Successfully removed stub authors")
def test_aut_record_removed_form_es_when_deleted(inspire_app, clean_celery_session): data = faker.record("aut") rec = AuthorsRecord.create(data) db.session.commit() def assert_record(): current_search.flush_and_refresh("records-authors") result = es_search("records-authors") result_total = get_value(result, "hits.total.value") expected_total = 1 assert expected_total == result_total retry_until_pass(assert_record) rec.delete() db.session.commit() def assert_record(): current_search.flush_and_refresh("records-authors") result = es_search("records-authors") result_total = get_value(result, "hits.total.value") expected_total = 0 assert expected_total == result_total retry_until_pass(assert_record)
def get_orcids_for_push(record): """Obtain the ORCIDs associated to the list of authors in the Literature record. The ORCIDs are looked up both in the ``ids`` of the ``authors`` and in the Author records that have claimed the paper. Args: record(dict): metadata from a Literature record Returns: Iterator[str]: all ORCIDs associated to these authors """ orcids_on_record = [] author_recids_with_claims = [] for author in record.get("authors", []): orcids_in_author = get_values_for_schema(author.get("ids", []), "ORCID") if orcids_in_author: orcids_on_record.extend(orcids_in_author) elif author.get("curated_relation") is True and "record" in author: author_recids_with_claims.append( get_recid_from_ref(author["record"])) author_records = AuthorsRecord.get_records_by_pids( ("aut", str(recid)) for recid in author_recids_with_claims) all_ids = (author.get("ids", []) for author in author_records) orcids_in_authors = chain.from_iterable( get_values_for_schema(ids, "ORCID") for ids in all_ids) return chain(orcids_on_record, orcids_in_authors)
def get(self, pid_value): try: record = AuthorsRecord.get_record_by_pid_value(pid_value) except PIDDoesNotExistError: abort(404) serialized_record = author_v1.dump(record) return jsonify({"data": serialized_record})
def test_create_record_with_multiple_files(inspire_app, cli): data_literature = faker.record("lit", with_control_number=True) data_author = faker.record("aut", with_control_number=True) control_number_literature = data_literature["control_number"] control_number_author = data_author["control_number"] with cli.isolated_filesystem(): with open(f"{control_number_literature}.json", "wb") as f: f.write(orjson.dumps(data_literature)) with open(f"{control_number_author}.json", "wb") as f: f.write(orjson.dumps(data_author)) result = cli.invoke( [ "importer", "records", "-f", f"{control_number_literature}.json", "-f", f"{control_number_author}.json", ] ) result_record_literature = LiteratureRecord.get_record_by_pid_value( control_number_literature ) result_record_author = AuthorsRecord.get_record_by_pid_value( control_number_author ) assert result.exit_code == 0 assert control_number_literature == result_record_literature["control_number"] assert control_number_author == result_record_author["control_number"]
def test_create_record_with_directory(base_app, db, script_info): runner = CliRunner() data_literature = faker.record("lit", with_control_number=True) data_author = faker.record("aut", with_control_number=True) control_number_literature = data_literature["control_number"] control_number_author = data_author["control_number"] with runner.isolated_filesystem(): os.mkdir("test_directory/") with open(f"test_directory/{control_number_literature}.json", "w") as f: f.write(json.dumps(data_literature)) with open(f"test_directory/{control_number_author}.json", "w") as f: f.write(json.dumps(data_author)) result = runner.invoke(importer, ["records", "-d", "test_directory"], obj=script_info) result_record_literature = LiteratureRecord.get_record_by_pid_value( control_number_literature) result_record_author = AuthorsRecord.get_record_by_pid_value( control_number_author) assert result.exit_code == 0 assert control_number_literature == result_record_literature[ "control_number"] assert control_number_author == result_record_author["control_number"]
def _get_current_user_author_profile(): current_user_orcid = get_current_user_orcid() try: current_author_profile = AuthorsRecord.get_record_by_pid_value( current_user_orcid, "orcid") return current_author_profile except PIDDoesNotExistError: return
def test_create_record_from_db_depending_on_its_pid_type(inspire_app): data = faker.record("aut") record = InspireRecord.create(data) assert isinstance(record, AuthorsRecord) assert record.pid_type == "aut" record = AuthorsRecord.create(data) assert isinstance(record, AuthorsRecord) assert record.pid_type == "aut"
def test_session_still_works_after_integrity_error_on_bai_provider( inspire_app): data = faker.record("aut") with override_config(FEATURE_FLAG_ENABLE_BAI_PROVIDER=True): AuthorsRecord.create(data=data) db.session.commit() with mock.patch( "inspirehep.pidstore.providers.bai.InspireBAIProvider.next_bai_number" ) as next_bai_mock: next_bai_mock.side_effect = [1, 2] AuthorsRecord.create(data=data) assert next_bai_mock.call_count == 2 db.session.commit() expected_pid_count = 2 assert (PersistentIdentifier.query.filter_by( pid_type="bai").count() == expected_pid_count)
def test_create_record_from_db_depending_on_its_pid_type(base_app, db, es): data = faker.record("aut") record = InspireRecord.create(data) assert type(record) == AuthorsRecord assert record.pid_type == "aut" record = AuthorsRecord.create(data) assert type(record) == AuthorsRecord assert record.pid_type == "aut"
def test_assign_from_an_author_to_another_that_is_not_stub(inspire_app): cataloger = create_user(role="cataloger") from_author = create_record("aut") to_author = create_record("aut", data={"stub": False}) literature = create_record( "lit", data={ "authors": [ { "full_name": "Urhan, Ahmet", "record": { "$ref": "http://localhost:5000/api/authors/17200" }, }, { "full_name": "Urhan, Harun", "record": { "$ref": f"http://localhost:5000/api/authors/{from_author['control_number']}" }, }, ] }, ) with inspire_app.test_client() as client: login_user_via_session(client, email=cataloger.email) response = client.post( "/assign/author", data=orjson.dumps({ "literature_recids": [literature["control_number"]], "from_author_recid": from_author["control_number"], "to_author_recid": to_author["control_number"], }), content_type="application/json", ) response_status_code = response.status_code assert response_status_code == 200 literature_after = LiteratureRecord.get_record_by_pid_value( literature["control_number"]) literature_author = literature_after["authors"][1] assert literature_author["record"] == { "$ref": f"http://localhost:5000/api/authors/{to_author['control_number']}" } assert literature_author["curated_relation"] to_author_after = AuthorsRecord.get_record_by_pid_value( to_author["control_number"]) assert not to_author_after["stub"]
def test_indexer_updates_advisor_when_student_name_changes( inspire_app, clean_celery_session ): advisor_data = faker.record("aut") advisor = AuthorsRecord.create(advisor_data) db.session.commit() current_search.flush_and_refresh("records-authors") student_data = faker.record( "aut", data={ "advisors": [ { "name": advisor["name"]["value"], "record": advisor["self"], "degree_type": "phd", } ] }, ) student = AuthorsRecord.create(student_data) db.session.commit() def assert_record(): current_search.flush_and_refresh("records-authors") records_from_es = AuthorsSearch().query_from_iq("").execute() assert len(records_from_es.hits) == 2 retry_until_pass(assert_record, 3) student["name"]["preferred_name"] = "Test Student" student.update(dict(student)) db.session.commit() expected_student_name = "Test Student" def assert_record(): current_search.flush_and_refresh("records-authors") record_from_es = AuthorsSearch().get_record_data_from_es(advisor) assert record_from_es["students"][0]["name"] == expected_student_name retry_until_pass(assert_record, retry_interval=3)
def update_moved_orcid(old_orcid, new_orcid): author_record = AuthorsRecord.get_record_by_pid_value(old_orcid, "orcid") if new_orcid not in get_value(author_record, "ids.value", []): new_author_ids = [ {"schema": "ORCID", "value": new_orcid}, *author_record["ids"], ] author_record["ids"] = new_author_ids author_record.update(dict(author_record)) remove_access_token_for_orcid_account(old_orcid, new_orcid) db.session.commit() LOGGER.info("ORCID updated", new_orcid=new_orcid, old_orcid=old_orcid)
def test_get_linked_advisors_when_name_changes(inspire_app): data_advisor = faker.record("aut") advisor = AuthorsRecord.create(data_advisor) db.session.commit() assert not advisor.get_linked_advisors_when_name_changes() student_data = faker.record( "aut", data={ "advisors": [{ "name": advisor["name"]["value"], "record": advisor["self"], "degree_type": "phd", }] }, ) student = AuthorsRecord.create(student_data) db.session.commit() assert student.get_linked_advisors_when_name_changes() == set( [str(advisor.id)]) student["ids"] = [{"value": "0000-0002-1558-1309", "schema": "ORCID"}] student.update(dict(student)) db.session.commit() assert not student.get_linked_advisors_when_name_changes() student["name"]["preferred_name"] = "Test Author" student.update(dict(student)) db.session.commit() assert student.get_linked_advisors_when_name_changes() == set( [str(advisor.id)]) del student["name"]["preferred_name"] student.update(dict(student)) db.session.commit() assert student.get_linked_advisors_when_name_changes() == set( [str(advisor.id)])
def get(self, pid_value): try: record = AuthorsRecord.get_record_by_pid_value(pid_value) if not can_user_edit_author_record(record): return ( jsonify({"message": "You are not allowed to edit this author"}), 403, ) except PIDDoesNotExistError: abort(404) serialized_record = author_v1.dump(record) return jsonify({"data": serialized_record})
def assign_to_author(from_author_recid, to_author_recid, literature_recids): author_record = AuthorsRecord.get_record_by_pid_value(to_author_recid) num_workers = count_consumers_for_queue("assign") for batch in chunker(literature_recids, 10, num_workers): current_celery_app.send_task( "inspirehep.assign.tasks.assign_papers", kwargs={ "from_author_recid": from_author_recid, "to_author_record": author_record, "author_papers_recids": batch, }, ) unstub_author_by_recid(to_author_recid)
def test_aut_record_appear_in_es_when_created(inspire_app, clean_celery_session): data = faker.record("aut") record = AuthorsRecord.create(data) db.session.commit() expected_control_number = record["control_number"] def assert_record(): current_search.flush_and_refresh("records-authors") record_from_es = AuthorsSearch().get_record_data_from_es(record) assert expected_control_number == record_from_es["control_number"] retry_until_pass(assert_record)
def assert_assign(): current_search.flush_and_refresh("*") literature_after = LiteratureSearch.get_record_data_from_es(literature) literature_author = literature_after["authors"][1] to_author_after = AuthorsRecord.get_record_by_pid_value( to_author["control_number"]) assert literature_author["record"] == { "$ref": f"http://localhost:5000/api/authors/{to_author['control_number']}" } assert literature_author["curated_relation"] assert literature_author["ids"] == to_author["ids"] assert not to_author_after["stub"]
def test_authors_create_or_update_with_new_record(inspire_app): data = faker.record("aut") record = AuthorsRecord.create_or_update(data) control_number = str(record["control_number"]) record_db = RecordMetadata.query.filter_by(id=record.id).one() assert record == record_db.json record_pid = PersistentIdentifier.query.filter_by( pid_type="aut", pid_value=str(control_number)).one() assert record.model.id == record_pid.object_uuid assert control_number == record_pid.pid_value
def test_signature_linked_by_disambiguation_has_correct_facet_author_name( inspire_app, celery_app_with_context, celery_session_worker): data = faker.record("lit") data["authors"] = [{ "full_name": "Doe, John", "uuid": "94fc2b0a-dc17-42c2-bae3-ca0024079e51" }] record = LiteratureRecord.create(data) db.session.commit() clusters = [{ "signatures": [{ "publication_id": record["control_number"], "signature_uuid": "94fc2b0a-dc17-42c2-bae3-ca0024079e51", }], "authors": [], }] disambiguate_signatures(clusters) author_pids = PersistentIdentifier.query.filter_by(pid_type="aut").all() assert len(author_pids) == 1 pid_value = author_pids[0].pid_value author = AuthorsRecord.get_record_by_pid_value(pid_value) author_control_number = author.pop("control_number") expected_facet_author_name = [f"{author_control_number}_John Doe"] expected_record_ref = f"http://localhost:5000/api/authors/{pid_value}" steps = [ { "step": current_search.flush_and_refresh, "args": ["records-hep"] }, { "step": es_search, "args": ["records-hep"], "expected_result": { "expected_key": "hits.total.value", "expected_result": 1, }, }, { "expected_key": "hits.hits[0]._source.facet_author_name", "expected_result": expected_facet_author_name, }, { "expected_key": "hits.hits[0]._source.authors[0].record.$ref", "expected_result": expected_record_ref, }, ] retry_until_matched(steps)
def test_indexer_deletes_record_from_es(inspire_app, datadir): def assert_record_is_deleted_from_es(): current_search.flush_and_refresh("records-authors") expected_records_count = 0 record_lit_es = AuthorsSearch().get_record(str(record.id)).execute().hits assert expected_records_count == len(record_lit_es) record = AuthorsRecord.create(faker.record("aut")) db.session.commit() record.delete() db.session.commit() retry_until_pass(assert_record_is_deleted_from_es)
def test_process_references_in_records_process_author_records( mock_batch_index, inspire_app, clean_celery_session): author_record = AuthorsRecord.create(faker.record("aut")) lit_record = LiteratureRecord.create( faker.record( "lit", data={ "authors": [{ "full_name": author_record["name"]["value"], "record": author_record["self"], }] }, )) lit_record_2 = LiteratureRecord.create( faker.record( "lit", data={ "authors": [{ "full_name": author_record["name"]["value"], "record": author_record["self"], }] }, )) db.session.commit() def assert_records_in_es(): lit_record_from_es = InspireSearch.get_record_data_from_es(lit_record) lit_record_from_es_2 = InspireSearch.get_record_data_from_es( lit_record_2) aut_record_from_es = InspireSearch.get_record_data_from_es( author_record) assert lit_record_from_es and aut_record_from_es and lit_record_from_es_2 retry_until_pass(assert_records_in_es, retry_interval=5) models_committed.disconnect(index_after_commit) author_record["name"]["value"] = "Another Name" author_record.update(dict(author_record)) db.session.commit() # reconnect signal before we call process_references_in_records models_committed.connect(index_after_commit) task = process_references_in_records.delay([author_record.id]) task.get(timeout=5) assert sorted(mock_batch_index.mock_calls[0][1][0]) == sorted( [str(lit_record.id), str(lit_record_2.id)])