Example #1
0
def test_appoint_profile_from_claimed_signature(small_app):
    """Check the module for the case where claimed signature takes
    everything.
    """
    from inspirehep.modules.disambiguation.tasks import disambiguation_clustering, update_authors_recid

    old_record_id = str(PersistentIdentifier.get("literature", 11883).object_uuid)
    old_record = get_es_record_by_uuid(old_record_id)
    old_author_uuid = old_record["authors"][0]["uuid"]

    # Add phonetic block to the record.
    old_record["authors"][0]["signature_block"] = "HAGp"
    old_record["authors"][0]["recid"] = "2"
    es.index(index="records-hep", doc_type="hep", id=old_record_id, body=old_record)
    es.indices.refresh("records-hep")

    record_id = str(PersistentIdentifier.get("literature", 1358492).object_uuid)
    record = get_es_record_by_uuid(record_id)
    author_uuid = record["authors"][0]["uuid"]

    # Add phonetic block to the record.
    record["authors"][0]["signature_block"] = "HAGp"
    record["authors"][0]["recid"] = "314159265"
    record["authors"][0]["curated_relation"] = True
    es.index(index="records-hep", doc_type="hep", id=record_id, body=record)
    es.indices.refresh("records-hep")

    with patch("celery.current_app.send_task", return_value=_BeardObject(({"2": [old_author_uuid, author_uuid]}, {}))):
        with patch(
            "inspirehep.modules.disambiguation.tasks.update_authors_recid.delay", side_effect=update_authors_recid
        ):
            disambiguation_clustering("HAGp")

    assert Record.get_record(old_record_id)["authors"][0]["recid"] == "314159265"
    assert Record.get_record(record_id)["authors"][0]["recid"] == "314159265"
Example #2
0
def test_solve_claim_conflicts(small_app):
    """Check the module for the case where at least two claimed
    signatures are assigned to the same cluster.
    """
    from inspirehep.modules.disambiguation.tasks import disambiguation_clustering, update_authors_recid

    # Claimed signature #1.
    glashow_record_id_claimed = str(PersistentIdentifier.get("literature", 4328).object_uuid)
    glashow_record_claimed = get_es_record_by_uuid(glashow_record_id_claimed)
    glashow_record_uuid_claimed = glashow_record_claimed["authors"][0]["uuid"]

    # Add phonetic block to the record.
    glashow_record_claimed["authors"][0]["signature_block"] = "HAGp"
    glashow_record_claimed["authors"][0]["curated_relation"] = True
    glashow_record_claimed["authors"][0]["recid"] = "3"
    es.index(index="records-hep", doc_type="hep", id=glashow_record_id_claimed, body=glashow_record_claimed)
    es.indices.refresh("records-hep")

    # Claimed signature #2.
    higgs_record_id_claimed = str(PersistentIdentifier.get("literature", 1358492).object_uuid)
    higgs_record_claimed = get_es_record_by_uuid(higgs_record_id_claimed)
    higgs_record_uuid_claimed = higgs_record_claimed["authors"][0]["uuid"]

    # Add phonetic block to the record.
    higgs_record_claimed["authors"][0]["signature_block"] = "HAGp"
    higgs_record_claimed["authors"][0]["curated_relation"] = True
    higgs_record_claimed["authors"][0]["recid"] = "4"
    es.index(index="records-hep", doc_type="hep", id=higgs_record_id_claimed, body=higgs_record_claimed)
    es.indices.refresh("records-hep")

    # Not claimed signature.
    higgs_record_id_not_claimed = str(PersistentIdentifier.get("literature", 11883).object_uuid)
    higgs_record_not_claimed = get_es_record_by_uuid(higgs_record_id_not_claimed)
    higgs_record_uuid_not_claimed = higgs_record_not_claimed["authors"][0]["uuid"]

    # Add phonetic block to the record.
    higgs_record_not_claimed["authors"][0]["signature_block"] = "HAGp"
    es.index(index="records-hep", doc_type="hep", id=higgs_record_id_not_claimed, body=higgs_record_not_claimed)
    es.indices.refresh("records-hep")

    with patch(
        "celery.current_app.send_task",
        return_value=_BeardObject(
            ({"3": [glashow_record_uuid_claimed, higgs_record_uuid_claimed, higgs_record_uuid_not_claimed]}, {})
        ),
    ):
        with patch(
            "inspirehep.modules.disambiguation.logic._solve_claims_conflict",
            return_value=_ConflictObject({higgs_record_uuid_claimed: [higgs_record_uuid_not_claimed]}),
        ):
            with patch(
                "inspirehep.modules.disambiguation.tasks.update_authors_recid.delay", side_effect=update_authors_recid
            ):
                disambiguation_clustering("HAGp")

    assert Record.get_record(higgs_record_id_not_claimed)["authors"][0]["recid"] == "4"
def test_count_phonetic_block_dispatched(small_app):
    """Count if two phonetic blocks were dispatched."""
    from inspirehep.modules.disambiguation.tasks import (
        disambiguation_daemon, )

    # Check if the queue has three records.
    assert DisambiguationRecord.query.count() == 3

    # Signature #1.
    glashow_record_id = str(
        PersistentIdentifier.get("literature", 4328).object_uuid)
    glashow_record = get_es_record_by_uuid(glashow_record_id)

    # Add phonetic block to the record.
    glashow_record['authors'][0]['signature_block'] = "GLASs"
    es.index(index='records-hep',
             doc_type='hep',
             id=glashow_record_id,
             body=glashow_record)
    es.indices.refresh('records-hep')

    # Signature #2.
    higgs_record_id_first = str(
        PersistentIdentifier.get("literature", 1358492).object_uuid)
    higgs_record_first = get_es_record_by_uuid(higgs_record_id_first)

    # Add phonetic block to the record.
    higgs_record_first['authors'][0]['signature_block'] = "HAGp"
    es.index(index='records-hep',
             doc_type='hep',
             id=higgs_record_id_first,
             body=higgs_record_first)
    es.indices.refresh('records-hep')

    # Signature #3.
    higgs_record_id_second = str(
        PersistentIdentifier.get("literature", 11883).object_uuid)
    higgs_record_second = get_es_record_by_uuid(higgs_record_id_second)

    # Add phonetic block to the record.
    higgs_record_second['authors'][0]['signature_block'] = "HAGp"
    es.index(index='records-hep',
             doc_type='hep',
             id=higgs_record_id_second,
             body=higgs_record_second)
    es.indices.refresh('records-hep')

    with patch("celery.current_app.send_task") as send_to_clustering:
        disambiguation_daemon()

        assert send_to_clustering.call_count == 2
Example #4
0
def test_count_phonetic_block_dispatched(small_app):
    """Count if two phonetic blocks were dispatched."""
    from inspirehep.modules.disambiguation.tasks import (
        disambiguation_daemon,
    )

    # Check if the queue has three records.
    assert DisambiguationRecord.query.count() == 3

    # Signature #1.
    glashow_record_id = str(PersistentIdentifier.get(
        "literature", 4328).object_uuid)
    glashow_record = get_es_record_by_uuid(glashow_record_id)

    # Add phonetic block to the record.
    glashow_record['authors'][0]['signature_block'] = "GLASs"
    es.index(index='records-hep', doc_type='hep',
             id=glashow_record_id, body=glashow_record)
    es.indices.refresh('records-hep')

    # Signature #2.
    higgs_record_id_first = str(PersistentIdentifier.get(
        "literature", 1358492).object_uuid)
    higgs_record_first = get_es_record_by_uuid(higgs_record_id_first)

    # Add phonetic block to the record.
    higgs_record_first['authors'][0]['signature_block'] = "HAGp"
    es.index(index='records-hep', doc_type='hep',
             id=higgs_record_id_first, body=higgs_record_first)
    es.indices.refresh('records-hep')

    # Signature #3.
    higgs_record_id_second = str(PersistentIdentifier.get(
        "literature", 11883).object_uuid)
    higgs_record_second = get_es_record_by_uuid(higgs_record_id_second)

    # Add phonetic block to the record.
    higgs_record_second['authors'][0]['signature_block'] = "HAGp"
    es.index(index='records-hep', doc_type='hep',
             id=higgs_record_id_second, body=higgs_record_second)
    es.indices.refresh('records-hep')

    with patch("celery.current_app.send_task") as send_to_clustering:
        disambiguation_daemon()

        assert send_to_clustering.call_count == 2
def test_appoint_profile_from_claimed_signature(small_app):
    """Check the module for the case where claimed signature takes
    everything.
    """
    from inspirehep.modules.disambiguation.tasks import (
        disambiguation_clustering, update_authors_recid)

    old_record_id = str(
        PersistentIdentifier.get("literature", 11883).object_uuid)
    old_record = get_es_record_by_uuid(old_record_id)
    old_author_uuid = old_record['authors'][0]['uuid']

    # Add phonetic block to the record.
    old_record['authors'][0]['signature_block'] = "HAGp"
    old_record['authors'][0]['recid'] = "2"
    es.index(index='records-hep',
             doc_type='hep',
             id=old_record_id,
             body=old_record)
    es.indices.refresh('records-hep')

    record_id = str(
        PersistentIdentifier.get("literature", 1358492).object_uuid)
    record = get_es_record_by_uuid(record_id)
    author_uuid = record['authors'][0]['uuid']

    # Add phonetic block to the record.
    record['authors'][0]['signature_block'] = "HAGp"
    record['authors'][0]['recid'] = "314159265"
    record['authors'][0]['curated_relation'] = True
    es.index(index='records-hep', doc_type='hep', id=record_id, body=record)
    es.indices.refresh('records-hep')

    with patch("celery.current_app.send_task",
               return_value=_BeardObject(({
                   "2": [old_author_uuid, author_uuid]
               }, {}))):
        with patch(
                "inspirehep.modules.disambiguation.tasks.update_authors_recid.delay",
                side_effect=update_authors_recid):
            disambiguation_clustering("HAGp")

    assert Record.get_record(old_record_id)['authors'][0]['recid'] == \
        "314159265"
    assert Record.get_record(record_id)['authors'][0]['recid'] == \
        "314159265"
Example #6
0
 def get_record(cls, object_uuid, with_deleted=False):
     """Get record instance from ElasticSearch."""
     try:
         return cls(get_es_record_by_uuid(object_uuid))
     except RecordGetterError as e:
         if isinstance(e.cause, NotFoundError):
             # Raise this error so the interface will render a 404 page
             # rather than a 500
             raise PIDDoesNotExistError('es_record', object_uuid)
         else:
             raise
Example #7
0
 def get_record(cls, object_uuid, with_deleted=False):
     """Get record instance from ElasticSearch."""
     try:
         return cls(get_es_record_by_uuid(object_uuid))
     except RecordGetterError as e:
         if isinstance(e.cause, NotFoundError):
             # Raise this error so the interface will render a 404 page
             # rather than a 500
             raise PIDDoesNotExistError('es_record', object_uuid)
         else:
             raise
Example #8
0
def test_match_signature_with_existing_profile(small_app):
    """Check the module for the case with signatures and existing profile."""
    from inspirehep.modules.disambiguation.tasks import (
        disambiguation_clustering, update_authors_recid)

    old_record_id = str(PersistentIdentifier.get('lit', 11883).object_uuid)
    old_record = get_es_record_by_uuid(old_record_id)
    old_author_uuid = old_record['authors'][0]['uuid']

    # Add phonetic block to the record.
    old_record['authors'][0]['signature_block'] = "HAGp"
    es.index(index='records-hep',
             doc_type='hep',
             id=old_record_id,
             body=old_record)
    es.indices.refresh('records-hep')

    record_id = str(PersistentIdentifier.get('lit', 1358492).object_uuid)
    record = get_es_record_by_uuid(record_id)
    author_uuid = record['authors'][0]['uuid']

    # Add phonetic block to the record.
    record['authors'][0]['signature_block'] = "HAGp"
    es.index(index='records-hep', doc_type='hep', id=record_id, body=record)
    es.indices.refresh('records-hep')

    with patch("celery.current_app.send_task",
               return_value=_BeardObject(({
                   "1": [old_author_uuid, author_uuid]
               }, {}))):
        with patch(
                "inspirehep.modules.disambiguation.tasks.update_authors_recid.delay",
                side_effect=update_authors_recid):
            disambiguation_clustering("HAGp")

    assert InspireRecord.get_record(
        old_record_id)['authors'][0]['recid'] == "1"
    assert InspireRecord.get_record(record_id)['authors'][0]['recid'] == "1"
Example #9
0
def test_match_signature_with_existing_profile(small_app):
    """Check the module for the case with signatures and existing profile."""
    from inspirehep.modules.disambiguation.tasks import (
        disambiguation_clustering,
        update_authors_recid
    )

    old_record_id = str(PersistentIdentifier.get('lit', 11883).object_uuid)
    old_record = get_es_record_by_uuid(old_record_id)
    old_author_uuid = old_record['authors'][0]['uuid']

    # Add phonetic block to the record.
    old_record['authors'][0]['signature_block'] = "HAGp"
    es.index(index='records-hep', doc_type='hep',
             id=old_record_id, body=old_record)
    es.indices.refresh('records-hep')

    record_id = str(PersistentIdentifier.get('lit', 1358492).object_uuid)
    record = get_es_record_by_uuid(record_id)
    author_uuid = record['authors'][0]['uuid']

    # Add phonetic block to the record.
    record['authors'][0]['signature_block'] = "HAGp"
    es.index(index='records-hep', doc_type='hep',
             id=record_id, body=record)
    es.indices.refresh('records-hep')

    with patch("celery.current_app.send_task",
               return_value=_BeardObject(
                   ({"1": [old_author_uuid, author_uuid]}, {}))):
        with patch("inspirehep.modules.disambiguation.tasks.update_authors_recid.delay",
                   side_effect=update_authors_recid):
            disambiguation_clustering("HAGp")

    assert InspireRecord.get_record(old_record_id)['authors'][0]['recid'] == "1"
    assert InspireRecord.get_record(record_id)['authors'][0]['recid'] == "1"
Example #10
0
def test_single_signature_with_no_profile(small_app):
    """Check the module for the case with a single, new signature."""
    from inspirehep.modules.disambiguation.tasks import disambiguation_clustering, update_authors_recid

    record_id = str(PersistentIdentifier.get("literature", 11883).object_uuid)
    record = get_es_record_by_uuid(record_id)
    author_uuid = record["authors"][0]["uuid"]

    # Add phonetic block to the record.
    record["authors"][0]["signature_block"] = "HAGp"
    es.index(index="records-hep", doc_type="hep", id=record_id, body=record)
    es.indices.refresh("records-hep")

    with patch("celery.current_app.send_task", return_value=_BeardObject(({}, {"0": [author_uuid]}))):
        with patch(
            "inspirehep.modules.disambiguation.tasks.update_authors_recid.delay", side_effect=update_authors_recid
        ):
            disambiguation_clustering("HAGp")

    assert Record.get_record(record_id)["authors"][0]["recid"] == "1"
Example #11
0
def test_solve_claim_conflicts(small_app):
    """Check the module for the case where at least two claimed
    signatures are assigned to the same cluster.
    """
    from inspirehep.modules.disambiguation.tasks import (
        disambiguation_clustering, update_authors_recid)

    # Claimed signature #1.
    glashow_record_id_claimed = str(
        PersistentIdentifier.get('lit', 4328).object_uuid)
    glashow_record_claimed = get_es_record_by_uuid(glashow_record_id_claimed)
    glashow_record_uuid_claimed = glashow_record_claimed['authors'][0]['uuid']

    # Add phonetic block to the record.
    glashow_record_claimed['authors'][0]['signature_block'] = "HAGp"
    glashow_record_claimed['authors'][0]['curated_relation'] = True
    glashow_record_claimed['authors'][0]['recid'] = "3"
    es.index(index='records-hep',
             doc_type='hep',
             id=glashow_record_id_claimed,
             body=glashow_record_claimed)
    es.indices.refresh('records-hep')

    # Claimed signature #2.
    higgs_record_id_claimed = str(
        PersistentIdentifier.get('lit', 1358492).object_uuid)
    higgs_record_claimed = get_es_record_by_uuid(higgs_record_id_claimed)
    higgs_record_uuid_claimed = higgs_record_claimed['authors'][0]['uuid']

    # Add phonetic block to the record.
    higgs_record_claimed['authors'][0]['signature_block'] = "HAGp"
    higgs_record_claimed['authors'][0]['curated_relation'] = True
    higgs_record_claimed['authors'][0]['recid'] = "4"
    es.index(index='records-hep',
             doc_type='hep',
             id=higgs_record_id_claimed,
             body=higgs_record_claimed)
    es.indices.refresh('records-hep')

    # Not claimed signature.
    higgs_record_id_not_claimed = str(
        PersistentIdentifier.get('lit', 11883).object_uuid)
    higgs_record_not_claimed = get_es_record_by_uuid(
        higgs_record_id_not_claimed)
    higgs_record_uuid_not_claimed = higgs_record_not_claimed['authors'][0][
        'uuid']

    # Add phonetic block to the record.
    higgs_record_not_claimed['authors'][0]['signature_block'] = "HAGp"
    es.index(index='records-hep',
             doc_type='hep',
             id=higgs_record_id_not_claimed,
             body=higgs_record_not_claimed)
    es.indices.refresh('records-hep')

    with patch("celery.current_app.send_task",
               return_value=_BeardObject(({
                   "3": [
                       glashow_record_uuid_claimed, higgs_record_uuid_claimed,
                       higgs_record_uuid_not_claimed
                   ]
               }, {}))):
        with patch(
                "inspirehep.modules.disambiguation.logic._solve_claims_conflict",
                return_value=_ConflictObject({
                    higgs_record_uuid_claimed: [higgs_record_uuid_not_claimed]
                })):
            with patch(
                    "inspirehep.modules.disambiguation.tasks.update_authors_recid.delay",
                    side_effect=update_authors_recid):
                disambiguation_clustering("HAGp")

    assert InspireRecord.get_record(
        higgs_record_id_not_claimed)['authors'][0]['recid'] == "4"