Ejemplo n.º 1
0
def assign_to_new_stub_author(from_author_recid, literature_recids):
    # TODO: differentiate from BEARD created stub author
    to_author = create_new_stub_author()
    author_signatures = assign_papers(from_author_recid,
                                      to_author["control_number"],
                                      literature_recids)
    update_author_names(to_author, author_signatures)
    return to_author["control_number"]
Ejemplo n.º 2
0
def test_update_author_names():
    author = {
        "name": {
            "value": "NEW AUTHOR"
        },
        "_collections": ["Authors"],
        "$schema": "http://localhost:5000/schemas/record/authors.json",
    }
    signatures = [
        {
            "full_name": "Doe, John"
        },
        {
            "full_name": "Mason, Jane"
        },
        {
            "full_name": "longest name in the list"
        },
    ]
    result_author = update_author_names(author, signatures)

    expected_author = {
        "name": {
            "value": "longest name in the list",
            "name_variants": ["Mason, Jane", "Doe, John"],
        },
        "_collections": ["Authors"],
        "$schema": "http://localhost:5000/schemas/record/authors.json",
    }
    assert expected_author == result_author
Ejemplo n.º 3
0
def disambiguate_signatures(self, clusters):
    """Task which performs author disambiguation according to the given clusters.
    If the cluster has no authors, it creates a new author using the data from all the signatures
    and links all signatures to the newly created author.
    If the cluster has exactly one author, it links all signatures to that author.

    Args:
        clusters (list): clusters received after the clustering performed by inspire_disambiguation.
    """
    for cluster in clusters:
        authors = cluster["authors"]
        if len(authors) == 1:
            disambiguation_assigned_clusters.labels("1").inc()
            LOGGER.debug(
                "Received cluster with 1 author.",
                author=cluster["authors"][0],
                signatures=cluster["signatures"],
            )
            with db.session.begin_nested():
                link_signatures_to_author(
                    cluster["signatures"], cluster["authors"][0]["author_id"]
                )

        elif len(authors) == 0:
            disambiguation_assigned_clusters.labels("0").inc()
            with db.session.begin_nested():
                LOGGER.debug(
                    "Received cluster with 0 authors.", signatures=cluster["signatures"]
                )
                author = create_new_stub_author()
                linked_signatures = link_signatures_to_author(
                    cluster["signatures"], author["control_number"]
                )
                if not linked_signatures:
                    author.hard_delete()
                else:
                    disambiguation_created_authors.inc()
                    update_author_names(author, linked_signatures)

        else:
            disambiguation_assigned_clusters.labels("2+").inc()
            LOGGER.debug("Received cluster with more than 1 author.")
    db.session.commit()
Ejemplo n.º 4
0
def assign_to_new_stub_author(from_author_recid, literature_recids):
    # TODO: differentiate from BEARD created stub author
    author_papers = list(get_literature_records_by_recid(literature_recids))
    author_signatures = get_author_signatures(from_author_recid, author_papers)
    stub_author_data = update_author_names({"name": {}}, author_signatures)
    to_author = create_new_stub_author(**stub_author_data)
    assign_papers(
        from_author_recid,
        to_author,
        author_papers,
        is_stub_author=True,
    )
    return to_author["control_number"]
Ejemplo n.º 5
0
def test_update_author_names_doesnt_put_duplicate_name_variants():
    author = {
        "name": {"value": "NEW AUTHOR"},
        "_collections": ["Authors"],
        "$schema": "http://localhost:5000/schemas/record/authors.json",
    }
    signatures = [{"full_name": "Doe, John"}, {"full_name": "Doe, John"}]
    result_author = update_author_names(author, signatures)

    expected_author = {
        "name": {"value": "Doe, John"},
        "_collections": ["Authors"],
        "$schema": "http://localhost:5000/schemas/record/authors.json",
    }
    assert expected_author == result_author
Ejemplo n.º 6
0
def assign_to_new_stub_author(from_author_recid, literature_recids):
    # TODO: differentiate from BEARD created stub author
    author_papers = get_literature_records_by_recid(literature_recids)
    author_signatures = get_author_signatures(from_author_recid, author_papers)
    stub_author_data = update_author_names({"name": {}}, author_signatures)
    to_author = create_new_stub_author(**stub_author_data)
    num_workers = count_consumers_for_queue("assign")
    for batch in chunker(literature_recids, 10, num_workers):
        current_celery_app.send_task(
            "inspirehep.assign.tasks.assign_papers",
            kwargs={
                "from_author_recid": from_author_recid,
                "to_author_record": to_author,
                "author_papers_recids": batch,
                "is_stub_author": True,
            },
        )
    return to_author["control_number"]