コード例 #1
0
ファイル: author.py プロジェクト: theleestarr/inspire-next
 def before_dump(self, data):
     family_name, given_name = self.get_name_splitted(data)
     return {
         'advisors': get_value(data, 'advisors', default=missing),
         'acquisition_source': get_value(
             data, 'acquisition_source', default=missing),
         'arxiv_categories': get_value(
             data, 'arxiv_categories', default=missing),
         'blog': self.get_first_or_missing(
             self.get_value_by_description_key(data.get('urls', []), 'blog')),
         'display_name': get_value(
             data, 'name.preferred_name', default=missing),
         'family_name': self.get_value_or_missing(family_name),
         'given_name': self.get_value_or_missing(given_name),
         'linkedin': self.get_first_or_missing(
             get_values_for_schema(data.get('ids', []), 'LINKEDIN')),
         'native_name': get_value(
             data, 'name.native_names[0]', default=missing),
         'orcid': self.get_first_or_missing(
             get_values_for_schema(data.get('ids', []), 'ORCID')),
         'positions': get_value(data, 'positions', default=missing),
         'project_membership': get_value(
             data, 'project_membership', default=missing),
         'public_emails': get_value(
             data, 'email_addresses.value', default=missing),
         'status': get_value(data, 'status', default=missing),
         'twitter': self.get_first_or_missing(
             get_values_for_schema(data.get('ids', []), 'TWITTER')),
         'websites': get_value(data, 'urls.value', default=missing),
     }
コード例 #2
0
ファイル: utils.py プロジェクト: harunurhan/inspire-next
def get_orcids_for_push(record):
    """Obtain the ORCIDs associated to the list of authors in the Literature record.

    The ORCIDs are looked up both in the ``ids`` of the ``authors`` and in the
    Author records that have claimed the paper.

    Args:
        record(dict): metadata from a Literature record

    Returns:
        Iterator[str]: all ORCIDs associated to these authors
    """
    orcids_on_record = []
    author_recids_with_claims = []

    for author in record.get('authors', []):
        orcids_in_author = get_values_for_schema(author.get('ids', []), 'ORCID')
        if orcids_in_author:
            orcids_on_record.extend(orcids_in_author)
        elif author.get('curated_relation') is True and 'record' in author:
            author_recids_with_claims.append(get_recid_from_ref(author['record']))

    author_records = get_db_records(('aut', recid) for recid in author_recids_with_claims)
    all_ids = (author.get('ids', []) for author in author_records)
    orcids_in_authors = chain.from_iterable(get_values_for_schema(ids, 'ORCID') for ids in all_ids)

    return chain(orcids_on_record, orcids_in_authors)
コード例 #3
0
ファイル: utils.py プロジェクト: tsgit/inspirehep
def get_orcids_for_push(record):
    """Obtain the ORCIDs associated to the list of authors in the Literature record.

    The ORCIDs are looked up both in the ``ids`` of the ``authors`` and in the
    Author records that have claimed the paper.

    Args:
        record(dict): metadata from a Literature record

    Returns:
        Iterator[str]: all ORCIDs associated to these authors
    """
    orcids_on_record = []
    author_recids_with_claims = []
    for author in record.get("authors", []):
        orcids_in_author = get_values_for_schema(author.get("ids", []),
                                                 "ORCID")
        if orcids_in_author:
            orcids_on_record.extend(orcids_in_author)
        elif author.get("curated_relation") is True and "record" in author:
            author_recids_with_claims.append(
                get_recid_from_ref(author["record"]))

    author_records = AuthorsRecord.get_records_by_pids(
        ("aut", str(recid)) for recid in author_recids_with_claims)

    all_ids = (author.get("ids", []) for author in author_records)
    orcids_in_authors = chain.from_iterable(
        get_values_for_schema(ids, "ORCID") for ids in all_ids)

    return chain(orcids_on_record, orcids_in_authors)
コード例 #4
0
ファイル: author.py プロジェクト: tsgit/inspirehep
 def before_dump(self, data):
     family_name, given_name = self.get_name_split(data)
     return {
         "advisors": get_value(data, "advisors", default=missing),
         "alternate_name": get_value(data, "name.name_variants[0]", default=missing),
         "acquisition_source": get_value(
             data, "acquisition_source", default=missing
         ),
         "arxiv_categories": get_value(data, "arxiv_categories", default=missing),
         "blog": self.get_first_or_missing(
             self.get_value_by_description_key(data.get("urls", []), "blog")
         ),
         "display_name": get_value(data, "name.preferred_name", default=missing),
         "family_name": self.get_value_or_missing(family_name),
         "given_name": self.get_value_or_missing(given_name),
         "linkedin": self.get_first_or_missing(
             get_values_for_schema(data.get("ids", []), "LINKEDIN")
         ),
         "native_name": get_value(data, "name.native_names[0]", default=missing),
         "orcid": self.get_first_or_missing(
             get_values_for_schema(data.get("ids", []), "ORCID")
         ),
         "positions": get_value(data, "positions", default=missing),
         "project_membership": get_value(
             data, "project_membership", default=missing
         ),
         "emails": get_value(data, "email_addresses", default=missing),
         "status": get_value(data, "status", default=missing),
         "twitter": self.get_first_or_missing(
             get_values_for_schema(data.get("ids", []), "TWITTER")
         ),
         "websites": get_value(data, "urls.value", default=missing),
     }
コード例 #5
0
def test_id_is_not_written_to_record_for_stale_data_push(
    mock_hal_create, mock_update_record_with_new_ids, inspire_app, get_fixture
):
    hal_create_receipt = Deposit_Receipt()
    hal_create_receipt.id = "hal:123456"
    mock_hal_create.return_value = hal_create_receipt

    def side_effect(*args, **kwargs):
        if side_effect.counter == 0:
            side_effect.counter += 1
            raise StaleDataError
        else:
            return update_record_with_new_ids(*args, **kwargs)

    side_effect.counter = 0
    mock_update_record_with_new_ids.side_effect = side_effect

    record_json = orjson.loads(get_fixture("hal_preprod_record.json"))
    record_data = faker.record("lit", data=record_json)
    record = InspireRecord.create(record_data)

    institute_json = orjson.loads(get_fixture("hal_preprod_institute.json"))
    institute_data = faker.record("ins", data=institute_json)
    InspireRecord.create(institute_data)

    _hal_push(record)
    record = InspireRecord.get_record_by_pid_value(record["control_number"], "lit")
    assert get_values_for_schema(record["external_system_identifiers"], "HAL") == [
        "hal:123456"
    ]
コード例 #6
0
def clean_stub_authors():
    """Removes all the authors created by disambiguation and having no linked papers."""
    # We get all the stub authors (created by disambiguation) from ES and we verify
    # in db if the returned records are stub (ES data might be outdated)
    stub_authors_query = Q("term", stub=True)
    stub_authors_search = (AuthorsSearch().query(stub_authors_query).source(
        ["control_number"]))
    stub_authors_control_numbers = [("aut", str(author["control_number"]))
                                    for author in stub_authors_search.scan()]
    # We change isolation level in db to the higher one (serializable) to avoid
    # issues with race condition
    db.session.connection(
        execution_options={"isolation_level": "SERIALIZABLE"})
    stub_authors_verified = AuthorsRecord.get_records_by_pids(
        stub_authors_control_numbers)
    stub_authors_bais = {
        get_values_for_schema(author["ids"], "INSPIRE BAI")[0]: author
        for author in stub_authors_verified if author.get("stub")
    }
    # We verify which authors have linked papers
    stub_authors_with_papers = set(
        query_authors_with_linked_papers_by_bai(stub_authors_bais.keys()))
    # For every author who has not linked papers we delete record
    authors_to_remove = set(
        stub_authors_bais.keys()).difference(stub_authors_with_papers)
    click.echo(
        f"Removing {len(authors_to_remove)} stub authors with no linked papers"
    )
    for author_bai in authors_to_remove:
        author = stub_authors_bais[author_bai]
        author.delete()
    db.session.commit()
    click.echo("Successfully removed stub authors")
コード例 #7
0
ファイル: test_record.py プロジェクト: pazembrz/inspire-utils
def test_get_values_for_schema():
    elements = [
        {'schema': 'good', 'value': 'first'},
        {'schema': 'bad', 'value': 'second'},
        {'schema': 'good', 'value': 'third'},
    ]
    assert get_values_for_schema(elements, 'good') == ['first', 'third']
コード例 #8
0
def test_orcid_is_updated_if_was_moved(inspire_app, user_remote_account):
    old_orcid = user_remote_account.remote_account.extra_data["orcid"]
    data = {
        "$schema":
        "http://localhost:5000/schemas/records/authors.json",
        "_collections": ["Authors"],
        "control_number":
        123456789,
        "ids": [
            {
                "schema": "INSPIRE BAI",
                "value": "J.Smith.1"
            },
            {
                "schema": "ORCID",
                "value": old_orcid
            },
        ],
        "name": {
            "value": "Smith, John"
        },
    }
    rec = create_record("aut", data=data)
    db.session.commit()
    new_orcid = "0000-0003-4792-9178"
    update_moved_orcid(old_orcid, new_orcid)
    author_record = InspireRecord.get_record_by_pid_value(
        rec["control_number"], "aut")
    assert new_orcid in get_values_for_schema(author_record.get("ids", []),
                                              "ORCID")
コード例 #9
0
        def assert_disambiguation_on_record_update():
            literature_record_from_es = InspireSearch.get_record_data_from_es(
                lit_record)

            assert (get_values_for_schema(
                literature_record_from_es["authors"][0]["ids"],
                "INSPIRE BAI")[0] != "M.F.A.Hearn.1")
コード例 #10
0
def test_push_happy_flow(inspire_app, get_fixture):
    record_json = orjson.loads(get_fixture("hal_preprod_record.json"))
    record_data = faker.record("lit", data=record_json)
    record = InspireRecord.create(record_data)

    institute_json = orjson.loads(get_fixture("hal_preprod_institute.json"))
    institute_data = faker.record("ins", data=institute_json)
    InspireRecord.create(institute_data)

    # hal create
    receipt = _hal_push(record)

    assert receipt
    assert receipt.parsed

    hal_id = receipt.id
    assert hal_id
    updated_record = InspireRecord.get_record_by_pid_value(
        record["control_number"], "lit"
    )
    assert (
        get_values_for_schema(
            get_value(updated_record, "external_system_identifiers", []), "HAL"
        )[0]
        == hal_id
    )

    # hal update
    receipt = _hal_push(record)
    assert receipt
    assert receipt.parsed
コード例 #11
0
def get_reference_and_bai_if_unambiguous_match(matched_refs_ids):
    if len(matched_refs_ids) == 1:
        author_reference, author_ids = matched_refs_ids.popitem()
        author_bais = get_values_for_schema(author_ids, "INSPIRE BAI")
        return {
            "author_reference": author_reference,
            "author_bai": author_bais[0] if author_bais else None,
        }
コード例 #12
0
ファイル: utils.py プロジェクト: turtle321/inspire-next
def get_author_with_record_facet_author_name(author):
    author_ids = author.get('ids', [])
    bai = get_values_for_schema(author_ids, 'INSPIRE BAI')[0]
    author_preferred_name = get_value(author, 'name.preferred_name')
    if author_preferred_name:
        return u'{}_{}'.format(bai, author_preferred_name)
    else:
        return u'{}_{}'.format(bai, get_author_display_name(author['name']['value']))
コード例 #13
0
def can_user_edit_author_record(author_record):
    if is_superuser_or_cataloger_logged_in():
        return True

    ids = author_record.get("ids", [])
    orcids = get_values_for_schema(ids, "ORCID")
    user_orcid = get_current_user_orcid()
    return user_orcid in orcids
コード例 #14
0
ファイル: examples.py プロジェクト: inspirehep/inspirehep
def get_bais_by_recid(record):
    record_bais = {}
    for author in record.get("authors", []):
        bai = (get_values_for_schema(author.get("ids", []), "INSPIRE BAI") or [None])[0]
        recid = get_author_recid(author)
        if bai and recid:
            record_bais[recid] = bai

    return record_bais
コード例 #15
0
ファイル: utils.py プロジェクト: harunurhan/inspire-next
def get_author_with_record_facet_author_name(author):
    author_ids = author.get('ids', [])
    author_bai = get_values_for_schema(author_ids, 'INSPIRE BAI')
    bai = author_bai[0] if author_bai else 'BAI'
    author_preferred_name = get_value(author, 'name.preferred_name')
    if author_preferred_name:
        return u'{}_{}'.format(bai, author_preferred_name)
    else:
        return u'{}_{}'.format(bai, get_author_display_name(author['name']['value']))
コード例 #16
0
    def assert_first_disambiguation_no_match():
        literature_record_from_es = InspireSearch.get_record_data_from_es(
            literature_record_3)

        assert get_values_for_schema(
            literature_record_from_es["authors"][0]["ids"], "INSPIRE BAI")
        assert (literature_record_from_es["authors"][0]["ids"] !=
                literature_record["authors"][0]["ids"])
        assert (literature_record_from_es["authors"][0]["ids"] !=
                literature_record_2["authors"][0]["ids"])
コード例 #17
0
def get_facet_author_name_for_author(author):
    author_ids = author.get("ids", [])
    author_bai = get_values_for_schema(author_ids, "INSPIRE BAI")
    bai = author_bai[0] if author_bai else "BAI"
    author_preferred_name = get_value(author, "name.preferred_name")

    if author_preferred_name:
        return "{}_{}".format(bai, author_preferred_name)

    return "{}_{}".format(
        bai, get_display_name_for_author_name(get_value(author, "name.value")))
コード例 #18
0
def assign_papers(
    from_author_recid, to_author_record, author_papers, is_stub_author=False
):
    author_bai = get_values_for_schema(to_author_record["ids"], "INSPIRE BAI")[0]
    for record in author_papers:
        lit_author = get_author_by_recid(record, from_author_recid)
        lit_author["record"] = get_record_ref(
            to_author_record["control_number"], endpoint="authors"
        )
        if not is_stub_author:
            lit_author["curated_relation"] = True
        lit_author["ids"] = update_author_bai(author_bai, lit_author)
        record.update(dict(record))
コード例 #19
0
def _hal_push(record):
    if "Literature" in record["_collections"] or "HAL Hidden" in record["_collections"]:
        tei = convert_to_tei(record)

        ids = record.get("external_system_identifiers", [])
        hal_value = get_values_for_schema(ids, "HAL")
        hal_id = hal_value[0] if hal_value else ""
        if hal_id:
            receipt = _hal_update(tei, hal_id, record)
        else:
            receipt = _hal_create(tei, record)
        if receipt and receipt.id != hal_id:
            _write_hal_id_to_record(record, receipt.id)
        return receipt
コード例 #20
0
    def assign_author_to_papers(self):
        from .literature import LiteratureRecord

        bai_list = get_values_for_schema(self.get("ids", []), "INSPIRE BAI")
        if not bai_list:
            return
        bai = bai_list[0]
        author_papers_ids = [
            str(record_control_number)
            for record_control_number in self.query_author_papers(bai)
        ]
        author_papers = LiteratureRecord.get_records(author_papers_ids)
        for paper in author_papers:
            author = get_author_by_bai(paper, bai)
            author["record"] = self.get("self")
            paper.update(dict(paper))
コード例 #21
0
ファイル: ui.py プロジェクト: inspirehep/inspirehep
    def add_ads_links_for_arxiv_papers(self, data):
        arxiv_id = get_value(data, "arxiv_eprints[0].value")

        external_system_ids = get_value(data,
                                        "external_system_identifiers",
                                        default=[])
        ads_ids = get_values_for_schema(external_system_ids, "ADS")

        if arxiv_id and not ads_ids:
            external_system_ids.append({
                "schema": "ADS",
                "value": f"arXiv:{arxiv_id}"
            })
            data["external_system_identifiers"] = external_system_ids

        return data
コード例 #22
0
ファイル: tasks.py プロジェクト: benjamin-bergia/inspirehep
def _hal_push(record):
    if "Literature" in record["_collections"] or "HAL Hidden" in record[
            "_collections"]:
        tei = convert_to_tei(record)

        ids = record.get("external_system_identifiers", [])
        hal_value = get_values_for_schema(ids, "HAL")
        hal_id = hal_value[0] if hal_value else ""
        lock_name = f"hal:{record['control_number']}"
        with distributed_lock(lock_name, blocking=True):
            if hal_id:
                receipt = _hal_update(tei, hal_id, record)
            else:
                receipt = _hal_create(tei, record)
            if receipt and receipt.id != hal_id:
                _write_hal_id_to_record(record, receipt.id)
            return receipt
コード例 #23
0
ファイル: examples.py プロジェクト: inspirehep/inspirehep
 def do(record, logger, state):
     for advisor in record["advisors"]:
         if not advisor_has_inspire_id_but_no_record(advisor):
             continue
         inspire_id = get_values_for_schema(advisor["ids"], "INSPIRE ID")[0]
         hits = (
             AuthorsSearch().query_from_iq(f"ids.value:{inspire_id}").execute().hits
         )
         recids = [hit.control_number for hit in hits]
         if not len(recids) == 1:
             logger.warning(
                 "No unique match for INSPIRE ID, skipping.",
                 inspire_id=inspire_id,
                 recids=recids,
             )
             continue
         recid = recids[0]
         advisor["record"] = get_ref_from_pid("aut", recid)
コード例 #24
0
ファイル: api.py プロジェクト: inspirehep/inspirehep
def process_cds_record(cds_record):
    control_numbers = get_value(cds_record, "metadata.other_ids", [])
    arxivs = get_value(cds_record, "metadata.eprints", [])
    dois = get_value(cds_record, "metadata.dois.value", [])
    report_numbers = get_value(cds_record, "metadata.report_numbers.value", [])

    cds_id = cds_record.get("id") or get_value(cds_record,
                                               "metadata.control_number", [])

    if not cds_id:
        LOGGER.info(
            "Cannot extract CDS id from CDS response",
            cds_data=cds_record,
        )
        return

    record = get_record_for_provided_ids(control_numbers, arxivs, dois,
                                         report_numbers)
    if not record:
        LOGGER.warning(
            "Cannot find record with any of the provided IDS",
            control_numbers=control_numbers,
            arxivs=arxivs,
            dois=dois,
            report_numbers=report_numbers,
        )
        return None
    control_number = record.control_number

    ids = record.get("external_system_identifiers", [])
    values = get_values_for_schema(ids, "CDS")
    if cds_id in values:
        LOGGER.info(
            "Correct CDS identifier is already present in the record",
            recid=control_number,
            cds_id=cds_id,
        )
        return

    builder = LiteratureBuilder(record=record)
    builder.add_external_system_identifier(cds_id, "CDS")

    data = dict(builder.record)
    record.update(data)
コード例 #25
0
ファイル: tasks.py プロジェクト: inspirehep/inspirehep
def assign_papers(
    self,
    from_author_recid,
    to_author_record,
    author_papers_recids,
    is_stub_author=False,
):
    author_bai = get_values_for_schema(to_author_record["ids"],
                                       "INSPIRE BAI")[0]
    for recid in author_papers_recids:
        record = LiteratureRecord.get_record_by_pid_value(recid)
        lit_author = get_author_by_recid(record, from_author_recid)
        lit_author["record"] = get_record_ref(
            to_author_record["control_number"], endpoint="authors")
        if not is_stub_author:
            lit_author["curated_relation"] = True
        lit_author["ids"] = update_author_bai(author_bai, lit_author)
        record.update(dict(record))
    db.session.commit()
コード例 #26
0
ファイル: tasks.py プロジェクト: benjamin-bergia/inspirehep
def disambiguate_authors(self, record_uuid):
    # handle case when we try to get a record which is deleted
    try:
        record = InspireRecord.get_record(record_uuid)
    except NoResultFound:
        return
    if "Literature" not in record["_collections"]:
        return
    authors = record.get_modified_authors()
    updated_authors = []
    for author in authors:
        if author.get("curated_relation"):
            continue
        matched_author_data = match_author(author)
        if not matched_author_data:
            matched_author_data = match_literature_author(author, record)
        if matched_author_data:
            author["record"] = {
                "$ref": matched_author_data["author_reference"]
            }
            assign_bai_to_literature_author(
                author, matched_author_data.get("author_bai"))
            updated_authors.append(
                matched_author_data["author_reference"].split("/")[-1])
        elif "record" not in author:
            new_author_record = create_new_author(author["full_name"],
                                                  record["control_number"])
            author["record"] = new_author_record["self"]
            new_author_bai = get_values_for_schema(new_author_record["ids"],
                                                   "INSPIRE BAI")[0]
            assign_bai_to_literature_author(author, new_author_bai)
            updated_authors.append(new_author_record["control_number"])
    if updated_authors:
        LOGGER.info(
            "Updated references for authors",
            {
                "uuid": str(record.id),
                "recid": record["control_number"],
                "authors_control_numbers": updated_authors,
            },
        )
        record.update(dict(record))
        db.session.commit()
コード例 #27
0
def test_disambiguation_on_record_update_unambiguous_match(
        inspire_app, clean_celery_session, enable_disambiguation):
    literature_data = faker.record("lit", with_control_number=True)
    literature_data.update({
        "authors": [{
            "full_name":
            "Kowalczyk, Elisabeth",
            "ids": [{
                "schema": "INSPIRE BAI",
                "value": "E.Kowalczyk.1"
            }],
        }]
    })
    literature_record = LiteratureRecord.create(data=literature_data)
    db.session.commit()

    def assert_first_disambiguation_no_match():
        literature_record_from_es = InspireSearch.get_record_data_from_es(
            literature_record)

        assert get_values_for_schema(
            literature_record_from_es["authors"][0]["ids"], "INSPIRE BAI")

    retry_until_pass(assert_first_disambiguation_no_match, retry_interval=2)
    old_bai = get_values_for_schema(literature_record["authors"][0]["ids"],
                                    "INSPIRE BAI")[0]
    db.session.expire_all()
    lit_record = InspireRecord.get_record(literature_record.id)
    lit_record["authors"][0]["emails"] = ["test.test@com"]
    lit_record.update(dict(lit_record))
    db.session.commit()

    def assert_disambiguation_on_record_update():
        literature_record_from_es = InspireSearch.get_record_data_from_es(
            literature_record)
        assert (get_values_for_schema(
            literature_record_from_es["authors"][0]["ids"],
            "INSPIRE BAI")[0] == old_bai)

    retry_until_pass(assert_disambiguation_on_record_update, retry_interval=2)
コード例 #28
0
ファイル: tasks.py プロジェクト: inspirehep/inspirehep
def _disambiguate_authors(authors_to_disambiguate, record):
    updated_authors = []
    for author in authors_to_disambiguate:
        if author.get("curated_relation"):
            continue
        assigned_author_recid = None
        matched_author_data = match_author(author)
        if not matched_author_data:
            matched_author_data = match_literature_author(author, record)
        if matched_author_data:
            author["record"] = {"$ref": matched_author_data["author_reference"]}
            assign_bai_to_literature_author(
                author, matched_author_data.get("author_bai")
            )
            assigned_author_recid = matched_author_data["author_reference"].split("/")[
                -1
            ]
        elif "record" not in author:
            linked_author_record = create_new_author(
                author["full_name"], record["control_number"]
            )
            author["record"] = linked_author_record["self"]
            new_author_bai = get_values_for_schema(
                linked_author_record["ids"], "INSPIRE BAI"
            )[0]
            assign_bai_to_literature_author(author, new_author_bai)
            assigned_author_recid = linked_author_record["control_number"]
        if assigned_author_recid:
            if len(author["full_name"].split(",")[0].split(" ")) == 1:
                if matched_author_data:
                    linked_author_record = AuthorsRecord.get_record_by_pid_value(
                        assigned_author_recid
                    )
                author["full_name"] = reorder_lit_author_names(
                    author["full_name"], linked_author_record["name"]["value"]
                )
            updated_authors.append(assigned_author_recid)
    return updated_authors
コード例 #29
0
 def get_authors_bais(self):
     return get_values_for_schema(
         flatten_list(self.get_value("authors.ids", [])), "INSPIRE BAI")
コード例 #30
0
def does_current_user_own_author_record(author):
    author_orcids = get_values_for_schema(author.get("ids", []), "ORCID")
    if author_orcids:
        author_orcid = author_orcids.pop()
        return get_current_user_orcid() == author_orcid
    return False
コード例 #31
0
ファイル: utils.py プロジェクト: inspirehep/inspirehep
def get_first_value_for_schema(list, schema):
    ids_for_schema = get_values_for_schema(list, schema)
    return ids_for_schema[0] if ids_for_schema else None
コード例 #32
0
 def get_pid_values(self):
     return set(get_values_for_schema(self.data.get("ids", []), "ORCID"))