Esempio n. 1
0
def assign_to_new_stub_author(from_author_recid, literature_recids):
    # TODO: differentiate from BEARD created stub author
    to_author = create_new_stub_author()
    author_signatures = assign_papers(from_author_recid,
                                      to_author["control_number"],
                                      literature_recids)
    update_author_names(to_author, author_signatures)
    return to_author["control_number"]
Esempio n. 2
0
def assign_to_new_stub_author(from_author_recid, literature_recids):
    # TODO: differentiate from BEARD created stub author
    author_papers = list(get_literature_records_by_recid(literature_recids))
    author_signatures = get_author_signatures(from_author_recid, author_papers)
    stub_author_data = update_author_names({"name": {}}, author_signatures)
    to_author = create_new_stub_author(**stub_author_data)
    assign_papers(
        from_author_recid,
        to_author,
        author_papers,
        is_stub_author=True,
    )
    return to_author["control_number"]
Esempio n. 3
0
def test_create_new_stub_author(inspire_app):
    author = create_new_stub_author()
    control_number = author["control_number"]
    expected_data = {
        "name": {"value": "BEARD STUB"},
        "_collections": ["Authors"],
        "stub": True,
        "acquisition_source": {"method": "beard", "datetime": "2019-02-15T00:00:00"},
        "$schema": "http://localhost:5000/schemas/records/authors.json",
        "control_number": control_number,
        "self": {"$ref": f"http://localhost:5000/api/authors/{control_number}"},
    }

    assert expected_data == author
Esempio n. 4
0
def assign_to_new_stub_author(from_author_recid, literature_recids):
    # TODO: differentiate from BEARD created stub author
    author_papers = get_literature_records_by_recid(literature_recids)
    author_signatures = get_author_signatures(from_author_recid, author_papers)
    stub_author_data = update_author_names({"name": {}}, author_signatures)
    to_author = create_new_stub_author(**stub_author_data)
    num_workers = count_consumers_for_queue("assign")
    for batch in chunker(literature_recids, 10, num_workers):
        current_celery_app.send_task(
            "inspirehep.assign.tasks.assign_papers",
            kwargs={
                "from_author_recid": from_author_recid,
                "to_author_record": to_author,
                "author_papers_recids": batch,
                "is_stub_author": True,
            },
        )
    return to_author["control_number"]
Esempio n. 5
0
def disambiguate_signatures(self, clusters):
    """Task which performs author disambiguation according to the given clusters.
    If the cluster has no authors, it creates a new author using the data from all the signatures
    and links all signatures to the newly created author.
    If the cluster has exactly one author, it links all signatures to that author.

    Args:
        clusters (list): clusters received after the clustering performed by inspire_disambiguation.
    """
    for cluster in clusters:
        authors = cluster["authors"]
        if len(authors) == 1:
            disambiguation_assigned_clusters.labels("1").inc()
            LOGGER.debug(
                "Received cluster with 1 author.",
                author=cluster["authors"][0],
                signatures=cluster["signatures"],
            )
            with db.session.begin_nested():
                link_signatures_to_author(
                    cluster["signatures"], cluster["authors"][0]["author_id"]
                )

        elif len(authors) == 0:
            disambiguation_assigned_clusters.labels("0").inc()
            with db.session.begin_nested():
                LOGGER.debug(
                    "Received cluster with 0 authors.", signatures=cluster["signatures"]
                )
                author = create_new_stub_author()
                linked_signatures = link_signatures_to_author(
                    cluster["signatures"], author["control_number"]
                )
                if not linked_signatures:
                    author.hard_delete()
                else:
                    disambiguation_created_authors.inc()
                    update_author_names(author, linked_signatures)

        else:
            disambiguation_assigned_clusters.labels("2+").inc()
            LOGGER.debug("Received cluster with more than 1 author.")
    db.session.commit()
Esempio n. 6
0
def create_new_author(full_name, from_recid):
    new_author_data = {
        "name": {"value": full_name},
        "_private_notes": [
            {
                "source": "INSPIRE-disambiguation",
                "value": f"Created from literature record {from_recid}",
            }
        ],
    }

    new_author = create_new_stub_author(**new_author_data)
    LOGGER.info(
        "Created new author record",
        {
            "control_number": str(new_author.get("control_number")),
            "full_name": full_name,
        },
    )
    return new_author