Ejemplo n.º 1
0
def test_process_clustering_output_signatures_without_author_id():
    clusterer_mock = MagicMock()
    clusterer_mock.clusterer.labels_ = numpy.array([1, 1])
    clusterer_mock.X = numpy.array(
        [
            [
                Signature(
                    author_affiliation="Rutgers U., Piscataway",
                    author_id=None,
                    author_name="Doe, John",
                    publication=Publication(
                        abstract="Many curated authors",
                        authors=[
                            "Doe, John",
                            "Doe, J",
                            "Doe, John",
                            "Doe, John",
                            "Doe, John",
                            "Doe, John",
                            "Jamie",
                            "Jamie",
                        ],
                        collaborations=[],
                        keywords=["keyword"],
                        publication_id=1,
                        title="Title",
                        topics=["category"],
                    ),
                    signature_block="JOhn",
                    signature_uuid="94fc2b0a-dc17-42c2-bae3-ca0024079e52",
                    is_curated_author_id=False,
                )
            ],
            [
                Signature(
                    author_affiliation="Rutgers U., Piscataway",
                    author_id=None,
                    author_name="Doe, John",
                    publication=Publication(
                        abstract="Many curated authors",
                        authors=[
                            "Doe, John",
                            "Doe, J",
                            "Doe, John",
                            "Doe, John",
                            "Doe, John",
                            "Doe, John",
                            "Jamie",
                            "Jamie",
                        ],
                        collaborations=[],
                        keywords=["keyword"],
                        publication_id=1,
                        title="Title",
                        topics=["category"],
                    ),
                    signature_block="JOhn",
                    signature_uuid="94fc2b0a-dc17-42c2-bae3-ca0024079e54",
                    is_curated_author_id=False,
                )
            ],
        ],
        dtype=object,
    )

    expected_output = [{
        "signatures": [
            (1, "94fc2b0a-dc17-42c2-bae3-ca0024079e52"),
            (1, "94fc2b0a-dc17-42c2-bae3-ca0024079e54"),
        ],
        "authors": [],
    }]

    output = process_clustering_output(clusterer_mock)
    assert not DeepDiff(output, expected_output, ignore_order=True)
Ejemplo n.º 2
0
def test_process_clustering_output_signatures_multiple_curated_author_ids():
    clusterer_mock = MagicMock()
    clusterer_mock.clusterer.labels_ = numpy.array([0, 0, 1, 1, 1])
    clusterer_mock.X = numpy.array(
        [
            [
                Signature(
                    author_affiliation="Rutgers U., Piscataway",
                    author_id=1,
                    author_name="Doe, John",
                    publication=Publication(
                        abstract="Many curated authors",
                        authors=[
                            "Doe, John",
                            "Doe, J",
                            "Doe, John",
                            "Doe, John",
                            "Doe, John",
                            "Doe, John",
                            "Jamie",
                            "Jamie",
                        ],
                        collaborations=[],
                        keywords=["keyword"],
                        publication_id=11,
                        title="Title",
                        topics=["category"],
                    ),
                    signature_block="JOhn",
                    signature_uuid="94fc2b0a-dc17-42c2-bae3-ca0024079e52",
                    is_curated_author_id=True,
                )
            ],
            [
                Signature(
                    author_affiliation="Rutgers U., Piscataway",
                    author_id=None,
                    author_name="Doe, John",
                    publication=Publication(
                        abstract="Many curated authors",
                        authors=[
                            "Doe, John",
                            "Doe, J",
                            "Doe, John",
                            "Doe, John",
                            "Doe, John",
                            "Doe, John",
                            "Jamie",
                            "Jamie",
                        ],
                        collaborations=[],
                        keywords=["keyword"],
                        publication_id=12,
                        title="Title",
                        topics=["category"],
                    ),
                    signature_block="JOhn",
                    signature_uuid="94fc2b0a-dc17-42c2-bae3-ca0024079e53",
                    is_curated_author_id=False,
                )
            ],
            [
                Signature(
                    author_affiliation="Rutgers U., Piscataway",
                    author_id=3,
                    author_name="Doe, John",
                    publication=Publication(
                        abstract="Many curated authors",
                        authors=[
                            "Doe, John",
                            "Doe, J",
                            "Doe, John",
                            "Doe, John",
                            "Doe, John",
                            "Doe, John",
                            "Jamie",
                            "Jamie",
                        ],
                        collaborations=[],
                        keywords=["keyword"],
                        publication_id=13,
                        title="Title",
                        topics=["category"],
                    ),
                    signature_block="JOhn",
                    signature_uuid="94fc2b0a-dc17-42c2-bae3-ca0024079e54",
                    is_curated_author_id=True,
                )
            ],
            [
                Signature(
                    author_affiliation="Rutgers U., Piscataway",
                    author_id=None,
                    author_name="Doe, John",
                    publication=Publication(
                        abstract="Many curated authors",
                        authors=[
                            "Doe, John",
                            "Doe, J",
                            "Doe, John",
                            "Doe, John",
                            "Doe, John",
                            "Doe, John",
                            "Jamie",
                            "Jamie",
                        ],
                        collaborations=[],
                        keywords=["keyword"],
                        publication_id=14,
                        title="Title",
                        topics=["category"],
                    ),
                    signature_block="JOhn",
                    signature_uuid="94fc2b0a-dc17-42c2-bae3-ca0024079e55",
                    is_curated_author_id=False,
                )
            ],
            [
                Signature(
                    author_affiliation="Rutgers U., Piscataway",
                    author_id=5,
                    author_name="Doe, John",
                    publication=Publication(
                        abstract="Many curated authors",
                        authors=[
                            "Doe, John",
                            "Doe, J",
                            "Doe, John",
                            "Doe, John",
                            "Doe, John",
                            "Doe, John",
                            "Jamie",
                            "Jamie",
                        ],
                        collaborations=[],
                        keywords=["keyword"],
                        publication_id=15,
                        title="Title",
                        topics=["category"],
                    ),
                    signature_block="JOhn",
                    signature_uuid="94fc2b0a-dc17-42c2-bae3-ca0024079e56",
                    is_curated_author_id=True,
                )
            ],
        ],
        dtype=object,
    )

    expected_output = [
        {
            "signatures": [
                {
                    "publication_id": 11,
                    "signature_uuid": "94fc2b0a-dc17-42c2-bae3-ca0024079e52",
                },
                {
                    "publication_id": 12,
                    "signature_uuid": "94fc2b0a-dc17-42c2-bae3-ca0024079e53",
                },
            ],
            "authors": [{"author_id": 1, "has_claims": True}],
        },
        {
            "signatures": [
                {
                    "publication_id": 13,
                    "signature_uuid": "94fc2b0a-dc17-42c2-bae3-ca0024079e54",
                },
                {
                    "publication_id": 14,
                    "signature_uuid": "94fc2b0a-dc17-42c2-bae3-ca0024079e55",
                },
                {
                    "publication_id": 15,
                    "signature_uuid": "94fc2b0a-dc17-42c2-bae3-ca0024079e56",
                },
            ],
            "authors": [
                {"author_id": 3, "has_claims": True},
                {"author_id": 5, "has_claims": True},
            ],
        },
    ]
    output = process_clustering_output(clusterer_mock)
    assert not DeepDiff(output, expected_output, ignore_order=True)