def test_disambiguate_signatures_cluster_with_more_than_1_authors( base_app, db, es_clear, create_record, redis): data = { "authors": [{ "full_name": "Doe, John", "uuid": "94fc2b0a-dc17-42c2-bae3-ca0024079e51" }] } record = create_record("lit", data=data) clusters = [{ "signatures": [{ "publication_id": record["control_number"], "signature_uuid": "94fc2b0a-dc17-42c2-bae3-ca0024079e51", }], "authors": [ { "author_id": 100, "has_claims": True }, { "author_id": 101, "has_claims": False }, ], }] disambiguate_signatures(clusters) assert "record" not in record["authors"][0]
def test_disambiguate_signatures_cluster_with_one_author( base_app, db, es_clear, create_record, redis): data = { "authors": [{ "full_name": "Doe, John", "uuid": "94fc2b0a-dc17-42c2-bae3-ca0024079e51" }] } record = create_record("lit", data=data) clusters = [{ "signatures": [{ "publication_id": record["control_number"], "signature_uuid": "94fc2b0a-dc17-42c2-bae3-ca0024079e51", }], "authors": [{ "author_id": 100, "has_claims": True }], }] disambiguate_signatures(clusters) expected_author = { "full_name": "Doe, John", "uuid": "94fc2b0a-dc17-42c2-bae3-ca0024079e51", "signature_block": "Dj", "record": { "$ref": "http://localhost:5000/api/authors/100" }, } assert expected_author == record["authors"][0]
def test_disambiguate_signatures_cluster_creates_author_with_facet_author_name( inspire_app, ): data = { "authors": [ {"full_name": "Doe, John", "uuid": "94fc2b0a-dc17-42c2-bae3-ca0024079e51"} ] } record = create_record("lit", data=data) clusters = [ { "signatures": [ { "publication_id": record["control_number"], "signature_uuid": "94fc2b0a-dc17-42c2-bae3-ca0024079e51", } ], "authors": [], } ] disambiguate_signatures(clusters) author_pids = PersistentIdentifier.query.filter_by(pid_type="aut").all() assert len(author_pids) == 1 pid_value = author_pids[0].pid_value author = AuthorsRecord.get_record_by_pid_value(pid_value) author_control_number = author.pop("control_number") expected_facet_author_name = f"{author_control_number}_John Doe" headers = {"Accept": "application/vnd+inspire.record.ui+json"} with inspire_app.test_client() as client: response = client.get(f"/authors/{author_control_number}", headers=headers) author_details_json = orjson.loads(response.data) assert ( expected_facet_author_name == author_details_json["metadata"]["facet_author_name"] )
def test_signature_linked_by_disambiguation_has_correct_facet_author_name( inspire_app, celery_app_with_context, celery_session_worker): data = faker.record("lit") data["authors"] = [{ "full_name": "Doe, John", "uuid": "94fc2b0a-dc17-42c2-bae3-ca0024079e51" }] record = LiteratureRecord.create(data) db.session.commit() clusters = [{ "signatures": [{ "publication_id": record["control_number"], "signature_uuid": "94fc2b0a-dc17-42c2-bae3-ca0024079e51", }], "authors": [], }] disambiguate_signatures(clusters) author_pids = PersistentIdentifier.query.filter_by(pid_type="aut").all() assert len(author_pids) == 1 pid_value = author_pids[0].pid_value author = AuthorsRecord.get_record_by_pid_value(pid_value) author_control_number = author.pop("control_number") expected_facet_author_name = [f"{author_control_number}_John Doe"] expected_record_ref = f"http://localhost:5000/api/authors/{pid_value}" steps = [ { "step": current_search.flush_and_refresh, "args": ["records-hep"] }, { "step": es_search, "args": ["records-hep"], "expected_result": { "expected_key": "hits.total.value", "expected_result": 1, }, }, { "expected_key": "hits.hits[0]._source.facet_author_name", "expected_result": expected_facet_author_name, }, { "expected_key": "hits.hits[0]._source.authors[0].record.$ref", "expected_result": expected_record_ref, }, ] retry_until_matched(steps)
def test_disambiguate_signatures_cluster_with_0_authors(inspire_app): data = { "authors": [{ "full_name": "Doe, John", "uuid": "94fc2b0a-dc17-42c2-bae3-ca0024079e51" }] } record = create_record("lit", data=data) clusters = [{ "signatures": [{ "publication_id": record["control_number"], "signature_uuid": "94fc2b0a-dc17-42c2-bae3-ca0024079e51", }], "authors": [], }] disambiguate_signatures(clusters) author_pids = PersistentIdentifier.query.filter_by(pid_type="aut").all() assert len(author_pids) == 1 pid_value = author_pids[0].pid_value author = AuthorsRecord.get_record_by_pid_value(pid_value) expected_author = { "name": { "value": "Doe, John" }, "_collections": ["Authors"], "stub": True, "acquisition_source": { "method": "beard", "datetime": "2019-02-15T00:00:00" }, "$schema": "http://localhost:5000/schemas/records/authors.json", "self": { "$ref": f"http://localhost:5000/api/authors/{pid_value}" }, } expected_ref = f"http://localhost:5000/api/authors/{pid_value}" author.pop("control_number") assert expected_author == author assert expected_ref == record["authors"][0]["record"]["$ref"]
def test_disambiguate_signatures_cluster_with_no_authors_and_invalid_signature_uuid( base_app, db, es_clear, create_record, create_pidstore, redis): data = { "authors": [{ "full_name": "Doe, John", "uuid": "94fc2b0a-dc17-42c2-bae3-ca0024079e51" }] } record = create_record("lit", data=data) clusters = [{ "signatures": [{ "publication_id": record["control_number"], "signature_uuid": "94fc2b0a-dc17-42c2-bae3-ca0024079e52", }], "authors": [], }] disambiguate_signatures(clusters) # check it does not create a new author assert len(PersistentIdentifier.query.filter_by(pid_type="aut").all()) == 0
def test_signature_linked_by_disambiguation_has_correct_facet_author_name( inspire_app, clean_celery_session): data = faker.record("lit") data["authors"] = [{ "full_name": "Doe, John", "uuid": "94fc2b0a-dc17-42c2-bae3-ca0024079e51" }] record = LiteratureRecord.create(data) db.session.commit() clusters = [{ "signatures": [{ "publication_id": record["control_number"], "signature_uuid": "94fc2b0a-dc17-42c2-bae3-ca0024079e51", }], "authors": [], }] disambiguate_signatures(clusters) author_pids = PersistentIdentifier.query.filter_by(pid_type="aut").all() assert len(author_pids) == 1 pid_value = author_pids[0].pid_value author = AuthorsRecord.get_record_by_pid_value(pid_value) author_control_number = author.pop("control_number") expected_facet_author_name = [f"{author_control_number}_John Doe"] expected_record_ref = f"http://localhost:5000/api/authors/{pid_value}" def assert_references(): current_search.flush_and_refresh("records-hep") record_from_es = InspireSearch.get_record_data_from_es(record) assert expected_facet_author_name == record_from_es[ "facet_author_name"] assert expected_record_ref == record_from_es["authors"][0]["record"][ "$ref"] retry_until_pass(assert_references, retry_interval=2)