예제 #1
0
def disambiguate_signatures(self, clusters):
    """Task which performs author disambiguation according to the given clusters.
    If the cluster has no authors, it creates a new author using the data from all the signatures
    and links all signatures to the newly created author.
    If the cluster has exactly one author, it links all signatures to that author.

    Args:
        clusters (list): clusters received after the clustering performed by inspire_disambiguation.
    """
    for cluster in clusters:
        authors = cluster["authors"]
        if len(authors) == 1:
            disambiguation_assigned_clusters.labels("1").inc()
            LOGGER.debug(
                "Received cluster with 1 author.",
                author=cluster["authors"][0],
                signatures=cluster["signatures"],
            )
            with db.session.begin_nested():
                link_signatures_to_author(
                    cluster["signatures"], cluster["authors"][0]["author_id"]
                )

        elif len(authors) == 0:
            disambiguation_assigned_clusters.labels("0").inc()
            with db.session.begin_nested():
                LOGGER.debug(
                    "Received cluster with 0 authors.", signatures=cluster["signatures"]
                )
                author = create_new_stub_author()
                linked_signatures = link_signatures_to_author(
                    cluster["signatures"], author["control_number"]
                )
                if not linked_signatures:
                    author.hard_delete()
                else:
                    disambiguation_created_authors.inc()
                    update_author_names(author, linked_signatures)

        else:
            disambiguation_assigned_clusters.labels("2+").inc()
            LOGGER.debug("Received cluster with more than 1 author.")
    db.session.commit()
예제 #2
0
def test_link_signatures_to_author(base_app, db, es_clear, create_record,
                                   redis):
    data_1 = {
        "authors": [{
            "full_name": "Doe, John",
            "uuid": "94fc2b0a-dc17-42c2-bae3-ca0024079e51"
        }]
    }
    record_1 = create_record("lit", data=data_1)
    data_2 = {
        "authors": [{
            "full_name": "Walker, Sam",
            "uuid": "94fc2b0a-dc17-42c2-bae3-ca0024079e52"
        }]
    }
    record_2 = create_record("lit", data=data_2)
    signatures_data = [
        {
            "publication_id": record_1["control_number"],
            "signature_uuid": "94fc2b0a-dc17-42c2-bae3-ca0024079e51",
        },
        {
            "publication_id": record_2["control_number"],
            "signature_uuid": "94fc2b0a-dc17-42c2-bae3-ca0024079e52",
        },
    ]
    signatures = link_signatures_to_author(signatures_data, 123)
    expected_signatures = [
        {
            "full_name": "Doe, John",
            "uuid": "94fc2b0a-dc17-42c2-bae3-ca0024079e51",
            "signature_block": "Dj",
            "record": {
                "$ref": "http://localhost:5000/api/authors/123"
            },
        },
        {
            "full_name": "Walker, Sam",
            "uuid": "94fc2b0a-dc17-42c2-bae3-ca0024079e52",
            "signature_block": "WALCARs",
            "record": {
                "$ref": "http://localhost:5000/api/authors/123"
            },
        },
    ]
    expected_ref = "http://localhost:5000/api/authors/123"
    assert expected_signatures == signatures
    assert expected_ref == record_1["authors"][0]["record"]["$ref"]
    assert expected_ref == record_2["authors"][0]["record"]["$ref"]
예제 #3
0
def test_link_signatures_to_author_missing_uuid(inspire_app):
    data = {
        "authors": [
            {"full_name": "Doe, John", "uuid": "94fc2b0a-dc17-42c2-bae3-ca0024079e52"}
        ]
    }
    record = create_record("lit", data=data)
    signatures_data = [
        {
            "publication_id": record["control_number"],
            "signature_uuid": "94fc2b0a-dc17-42c2-bae3-ca0024079e51",
        }
    ]
    signatures = link_signatures_to_author(signatures_data, 123)
    assert signatures == []