コード例 #1
0
def clean_stub_authors():
    """Removes all the authors created by disambiguation and having no linked papers."""
    # We get all the stub authors (created by disambiguation) from ES and we verify
    # in db if the returned records are stub (ES data might be outdated)
    stub_authors_query = Q("term", stub=True)
    stub_authors_search = (AuthorsSearch().query(stub_authors_query).source(
        ["control_number"]))
    stub_authors_control_numbers = [("aut", str(author["control_number"]))
                                    for author in stub_authors_search.scan()]
    # We change isolation level in db to the higher one (serializable) to avoid
    # issues with race condition
    db.session.connection(
        execution_options={"isolation_level": "SERIALIZABLE"})
    stub_authors_verified = AuthorsRecord.get_records_by_pids(
        stub_authors_control_numbers)
    stub_authors_bais = {
        get_values_for_schema(author["ids"], "INSPIRE BAI")[0]: author
        for author in stub_authors_verified if author.get("stub")
    }
    # We verify which authors have linked papers
    stub_authors_with_papers = set(
        query_authors_with_linked_papers_by_bai(stub_authors_bais.keys()))
    # For every author who has not linked papers we delete record
    authors_to_remove = set(
        stub_authors_bais.keys()).difference(stub_authors_with_papers)
    click.echo(
        f"Removing {len(authors_to_remove)} stub authors with no linked papers"
    )
    for author_bai in authors_to_remove:
        author = stub_authors_bais[author_bai]
        author.delete()
    db.session.commit()
    click.echo("Successfully removed stub authors")
コード例 #2
0
 def assert_disambiguation_task():
     literature_record_from_es = InspireSearch.get_record_data_from_es(
         literature_record)
     author_record_from_es = AuthorsSearch().query_from_iq("").execute()
     assert author_record_from_es.hits[0].name["value"] == "Michal Kowal"
     assert (literature_record_from_es["authors"][0]["recid"] ==
             author_record_from_es.hits[0].control_number)
コード例 #3
0
ファイル: test_api.py プロジェクト: MJedr/inspirehep
def test_authors_search_query(inspire_app):
    query_to_dict = AuthorsSearch().query_from_iq("J Ellis").to_dict()

    expexted_query = {
        "query": {
            "bool": {
                "should": [
                    {
                        "match": {
                            "names_analyzed": "J Ellis"
                        }
                    },
                    {
                        "match": {
                            "names_analyzed_initials": "J Ellis"
                        }
                    },
                    {
                        "query_string": {
                            "query": "J Ellis"
                        }
                    },
                ]
            }
        },
        "track_total_hits": True,
    }
    assert expexted_query == query_to_dict
コード例 #4
0
def test_indexer_deletes_record_from_es(es_clear, db, datadir, create_record):
    data = json.loads((datadir / "999108.json").read_text())
    record = create_record("aut", data=data)

    record["deleted"] = True
    record._index()
    es_clear.indices.refresh("records-authors")

    expected_records_count = 0

    record_lit_es = AuthorsSearch().get_record(str(record.id)).execute().hits
    assert expected_records_count == len(record_lit_es)
コード例 #5
0
def test_indexer_deletes_record_from_es(inspire_app, datadir):
    data = json.loads((datadir / "999108.json").read_text())
    record = create_record("aut", data=data)

    record["deleted"] = True
    record.index(delay=False)
    current_search.flush_and_refresh("records-authors")

    expected_records_count = 0

    record_lit_es = AuthorsSearch().get_record(str(record.id)).execute().hits
    assert expected_records_count == len(record_lit_es)
コード例 #6
0
def test_reindex_one_type_of_record(inspire_app, cli):
    record_lit = create_record_factory("lit")
    create_record_factory("aut")

    cli.invoke(["index", "reindex", "-p", "lit"])
    current_search.flush_and_refresh("*")
    expected_aut_len = 0
    results_lit_uuid = LiteratureSearch().execute().hits.hits[0]["_id"]
    results_aut_len = len(AuthorsSearch().execute().hits.hits)

    assert str(record_lit.id) == results_lit_uuid
    assert expected_aut_len == results_aut_len
コード例 #7
0
def test_authors_query_for_query_with_colon(inspire_app):
    query_to_dict = (AuthorsSearch().query_from_iq(
        "positions.record.$ref:905189").to_dict())

    expected_query = {
        "query": {
            "query_string": {
                "query": "positions.record.$ref:905189"
            }
        },
        "track_total_hits": True,
    }
    assert expected_query == query_to_dict
コード例 #8
0
def test_reindex_all_types_records(inspire_app, cli):
    record_lit = create_record_factory("lit")
    record_aut = create_record_factory("aut")
    record_job = create_record_factory("job")
    record_con = create_record_factory("con")

    cli.invoke(["index", "reindex", "--all"])
    current_search.flush_and_refresh("*")
    results_lit_uuid = LiteratureSearch().execute().hits.hits[0]["_id"]
    results_aut_uuid = AuthorsSearch().execute().hits.hits[0]["_id"]
    results_con_uuid = ConferencesSearch().execute().hits.hits[0]["_id"]
    results_job_uuid = JobsSearch().execute().hits.hits[0]["_id"]

    assert str(record_lit.id) == results_lit_uuid
    assert str(record_aut.id) == results_aut_uuid
    assert str(record_con.id) == results_con_uuid
    assert str(record_job.id) == results_job_uuid
コード例 #9
0
ファイル: examples.py プロジェクト: inspirehep/inspirehep
 def do(record, logger, state):
     for advisor in record["advisors"]:
         if not advisor_has_inspire_id_but_no_record(advisor):
             continue
         inspire_id = get_values_for_schema(advisor["ids"], "INSPIRE ID")[0]
         hits = (
             AuthorsSearch().query_from_iq(f"ids.value:{inspire_id}").execute().hits
         )
         recids = [hit.control_number for hit in hits]
         if not len(recids) == 1:
             logger.warning(
                 "No unique match for INSPIRE ID, skipping.",
                 inspire_id=inspire_id,
                 recids=recids,
             )
             continue
         recid = recids[0]
         advisor["record"] = get_ref_from_pid("aut", recid)
コード例 #10
0
 def get_papers_uuids(self):
     all_papers = AuthorsSearch.get_author_papers(self, source="_id")
     papers_ids = {paper.meta["id"] for paper in all_papers}
     return papers_ids
コード例 #11
0
ファイル: test_authors.py プロジェクト: inspirehep/inspirehep
 def assert_record():
     current_search.flush_and_refresh("records-authors")
     record_from_es = AuthorsSearch().get_record_data_from_es(record)
     assert expected_control_number == record_from_es["control_number"]
コード例 #12
0
ファイル: test_authors.py プロジェクト: inspirehep/inspirehep
 def assert_record():
     current_search.flush_and_refresh("records-authors")
     record_from_es = AuthorsSearch().get_record_data_from_es(advisor)
     assert record_from_es["students"][0]["name"] == expected_student_name
コード例 #13
0
ファイル: test_authors.py プロジェクト: inspirehep/inspirehep
 def assert_record():
     current_search.flush_and_refresh("records-authors")
     records_from_es = AuthorsSearch().query_from_iq("").execute()
     assert len(records_from_es.hits) == 2
コード例 #14
0
ファイル: test_authors.py プロジェクト: inspirehep/inspirehep
 def assert_record_is_deleted_from_es():
     current_search.flush_and_refresh("records-authors")
     expected_records_count = 0
     record_lit_es = AuthorsSearch().get_record(str(record.id)).execute().hits
     assert expected_records_count == len(record_lit_es)
コード例 #15
0
 def assert_disambiguation_task():
     author_records_from_es = AuthorsSearch().query_from_iq("").execute()
     assert len(author_records_from_es.hits) == 2
コード例 #16
0
ファイル: test_api.py プロジェクト: MJedr/inspirehep
def test_empty_authors_search_query(inspire_app):
    query_to_dict = AuthorsSearch().query_from_iq("").to_dict()

    expexted_query = {"query": {"match_all": {}}, "track_total_hits": True}
    assert expexted_query == query_to_dict
コード例 #17
0
ファイル: test_authors.py プロジェクト: inspirehep/inspirehep
 def assert_record():
     current_search.flush_and_refresh("records-authors")
     record_from_es = AuthorsSearch().get_record_data_from_es(rec)
     assert expected_death_date == record_from_es["death_date"]
コード例 #18
0
def authors():
    return AuthorsSearch()
コード例 #19
0
 def get_papers_uuids(self):
     all_papers = AuthorsSearch.get_author_papers(self, source="_id")
     papers_ids = [paper.meta["id"] for paper in all_papers]
     return list(set(papers_ids))