def assign_to_new_stub_author(from_author_recid, literature_recids): # TODO: differentiate from BEARD created stub author to_author = create_new_stub_author() author_signatures = assign_papers(from_author_recid, to_author["control_number"], literature_recids) update_author_names(to_author, author_signatures) return to_author["control_number"]
def assign_to_new_stub_author(from_author_recid, literature_recids): # TODO: differentiate from BEARD created stub author author_papers = list(get_literature_records_by_recid(literature_recids)) author_signatures = get_author_signatures(from_author_recid, author_papers) stub_author_data = update_author_names({"name": {}}, author_signatures) to_author = create_new_stub_author(**stub_author_data) assign_papers( from_author_recid, to_author, author_papers, is_stub_author=True, ) return to_author["control_number"]
def test_create_new_stub_author(inspire_app): author = create_new_stub_author() control_number = author["control_number"] expected_data = { "name": {"value": "BEARD STUB"}, "_collections": ["Authors"], "stub": True, "acquisition_source": {"method": "beard", "datetime": "2019-02-15T00:00:00"}, "$schema": "http://localhost:5000/schemas/records/authors.json", "control_number": control_number, "self": {"$ref": f"http://localhost:5000/api/authors/{control_number}"}, } assert expected_data == author
def assign_to_new_stub_author(from_author_recid, literature_recids): # TODO: differentiate from BEARD created stub author author_papers = get_literature_records_by_recid(literature_recids) author_signatures = get_author_signatures(from_author_recid, author_papers) stub_author_data = update_author_names({"name": {}}, author_signatures) to_author = create_new_stub_author(**stub_author_data) num_workers = count_consumers_for_queue("assign") for batch in chunker(literature_recids, 10, num_workers): current_celery_app.send_task( "inspirehep.assign.tasks.assign_papers", kwargs={ "from_author_recid": from_author_recid, "to_author_record": to_author, "author_papers_recids": batch, "is_stub_author": True, }, ) return to_author["control_number"]
def disambiguate_signatures(self, clusters): """Task which performs author disambiguation according to the given clusters. If the cluster has no authors, it creates a new author using the data from all the signatures and links all signatures to the newly created author. If the cluster has exactly one author, it links all signatures to that author. Args: clusters (list): clusters received after the clustering performed by inspire_disambiguation. """ for cluster in clusters: authors = cluster["authors"] if len(authors) == 1: disambiguation_assigned_clusters.labels("1").inc() LOGGER.debug( "Received cluster with 1 author.", author=cluster["authors"][0], signatures=cluster["signatures"], ) with db.session.begin_nested(): link_signatures_to_author( cluster["signatures"], cluster["authors"][0]["author_id"] ) elif len(authors) == 0: disambiguation_assigned_clusters.labels("0").inc() with db.session.begin_nested(): LOGGER.debug( "Received cluster with 0 authors.", signatures=cluster["signatures"] ) author = create_new_stub_author() linked_signatures = link_signatures_to_author( cluster["signatures"], author["control_number"] ) if not linked_signatures: author.hard_delete() else: disambiguation_created_authors.inc() update_author_names(author, linked_signatures) else: disambiguation_assigned_clusters.labels("2+").inc() LOGGER.debug("Received cluster with more than 1 author.") db.session.commit()
def create_new_author(full_name, from_recid): new_author_data = { "name": {"value": full_name}, "_private_notes": [ { "source": "INSPIRE-disambiguation", "value": f"Created from literature record {from_recid}", } ], } new_author = create_new_stub_author(**new_author_data) LOGGER.info( "Created new author record", { "control_number": str(new_author.get("control_number")), "full_name": full_name, }, ) return new_author