コード例 #1
0
        def assert_disambiguation_cli():
            records = LiteratureSearch().get_records(record_uuids).execute()
            for record in records:
                for author in record.authors:
                    assert "record" in author

            record_not_disambiguated = (LiteratureSearch().get_records(
                [record_that_shouldnt_be_disambiguated_uuid]).execute())
            assert "record" not in record_not_disambiguated[0]["authors"][0]
コード例 #2
0
def test_gracefully_handle_records_updating_in_wrong_order(
        inspire_app, clean_celery_session):
    # We want to run indexing in weird order, so disable auto indexing
    models_committed.disconnect(index_after_commit)

    cited_record = LiteratureRecord.create(data=faker.record("lit"))
    record_data = faker.record(
        "lit", literature_citations=[cited_record.control_number])
    record = LiteratureRecord.create(data=record_data)
    db.session.commit()

    record = LiteratureRecord.get_record_by_pid_value(record.control_number)

    index_record(record.id, record.model.versions[-1].version_id)
    assert LiteratureSearch().get_source(
        cited_record.id)["citation_count"] == 1

    data = dict(record)
    del data["references"]

    record.update(data)
    db.session.commit()
    record = LiteratureRecord.get_record_by_pid_value(record.control_number)
    data = dict(record)
    data["titles"][0] = {"title": "New Title"}
    record.update(data)
    db.session.commit()

    record = LiteratureRecord.get_record_by_pid_value(record.control_number)

    index_record(record.id, record.model.versions[-1].version_id)

    record = LiteratureRecord.get_record_by_pid_value(record.control_number)

    assert LiteratureSearch().get_source(
        cited_record.id)["citation_count"] == 1
    assert LiteratureSearch().get_source(record.id)["titles"] == [{
        "title":
        "New Title"
    }]

    index_record(record.id, record.model.versions[-2].version_id)

    assert LiteratureSearch().get_source(
        cited_record.id)["citation_count"] == 0
    assert LiteratureSearch().get_source(record.id)["titles"] == [{
        "title":
        "New Title"
    }]
    models_committed.connect(index_after_commit)
コード例 #3
0
def test_get_search_with_source_with_LiteratureSearch_instance_with_defined_headers(
        inspire_app):
    config = {
        "LITERATURE_SOURCE_INCLUDES_BY_CONTENT_TYPE": {
            "application/vnd+inspire.record.ui+json": ["title", "description"]
        },
        "LITERATURE_SOURCE_EXCLUDES_BY_CONTENT_TYPE": {
            "application/vnd+inspire.record.ui+json":
            ["excludes_with_includes_looks_stupid"],
            "application/bibtex": ["control_number"],
        },
    }
    headers = {"Accept": "application/vnd+inspire.record.ui+json"}
    with override_config(**config), current_app.test_request_context(
            headers=headers):
        search = LiteratureSearch()
        search = get_search_with_source(search)

        expected_source_includes = ["title", "description"]
        expected_source_excludes = ["excludes_with_includes_looks_stupid"]

        search_to_dict = search.to_dict()
        search_source = search_to_dict["_source"]

        assert expected_source_includes == search_source["includes"]
        assert expected_source_excludes == search_source["excludes"]
コード例 #4
0
 def assert_record_in_es():
     current_search.flush_and_refresh("*")
     record_lit_es = (LiteratureSearch().get_record(str(
         rec.id)).execute().hits.hits[0])
     document = record_lit_es._source["documents"][0]
     assert "attachment" in document
     assert "text" not in document  # pipeline should remove it
コード例 #5
0
def _get_all_not_disambiguated_records_search():
    query = {
        "query": {
            "bool": {
                "must": [
                    {
                        "nested": {
                            "path": "authors",
                            "query": {
                                "bool": {
                                    "must_not": {
                                        "exists": {
                                            "field": "authors.record.$ref"
                                        }
                                    }
                                }
                            },
                        }
                    },
                    {
                        "match": {
                            "_collections": "Literature"
                        }
                    },
                ]
            }
        }
    }

    search_obj = (LiteratureSearch().from_dict(query).params(
        track_total_hits=True, _source={}, size=1000, scroll="60m"))
    return search_obj
コード例 #6
0
def test_literature_journal_title_search_is_case_insensitive(inspire_app):
    record1 = create_record(
        "lit",
        data={
            "publication_info": [{
                "year": 2017,
                "artid": "020",
                "page_start": "020",
                "journal_title": "JHEP",
                "journal_record": {
                    "$ref": "https://inspirebeta.net/api/journals/1213103"
                },
                "journal_volume": "10",
            }],
        },
    )
    record2 = create_record(
        "lit",
        data={
            "publication_info": [{
                "year": 2017,
                "artid": "021",
                "page_start": "021",
                "journal_title": "JHEP",
                "journal_volume": "10",
            }],
        },
    )
    result_lowercase = LiteratureSearch().query_from_iq("j jhep").execute()
    result_uppercase = LiteratureSearch().query_from_iq("j JHEP").execute()

    assert result_lowercase
    assert result_uppercase

    hits_lowercase = result_lowercase["hits"]["hits"]
    hits_uppercase = result_uppercase["hits"]["hits"]
    result_lowercase_found_record_ids = [hit._id for hit in hits_lowercase]
    result_uppercase_found_record_ids = [hit._id for hit in hits_uppercase]

    assert len(result_lowercase_found_record_ids) == 2
    assert len(result_uppercase_found_record_ids) == 2

    assert str(record1.id) in result_lowercase_found_record_ids
    assert str(record2.id) in result_lowercase_found_record_ids

    assert str(record1.id) in result_uppercase_found_record_ids
    assert str(record2.id) in result_uppercase_found_record_ids
コード例 #7
0
ファイル: test_tasks.py プロジェクト: inspirehep/inspirehep
    def assert_all_records_in_es():
        literature_records_from_es = list(LiteratureSearch().query_from_iq(
            query_string=
            f"publication_info.journal_record.$ref: {journal_record_reference}"
        ).scan())
        journal_record_from_es = InspireSearch.get_record_data_from_es(journal)

        assert len(literature_records_from_es) == 11 and journal_record_from_es
コード例 #8
0
 def assert_update_in_es():
     current_search.flush_and_refresh("*")
     record_lit_es = (LiteratureSearch().get_record(str(
         record.id)).execute().hits.hits[0])
     assert "new_doc.pdf" == record_lit_es._source["documents"][0][
         "key"]
     assert (record_first_attachment !=
             record_lit_es._source["documents"][0]["attachment"])
コード例 #9
0
def test_get_search_with_source_with_fields_query_param_and_wrong_formats(
        inspire_app):
    with current_app.test_request_context("?fields=authors,ids&format=bibtex"):
        search = LiteratureSearch()
        with pytest.raises(FieldsParamForbidden):
            get_search_with_source(search)

    with current_app.test_request_context(
            "?fields=authors,ids&format=latex-eu"):
        search = LiteratureSearch()
        with pytest.raises(FieldsParamForbidden):
            get_search_with_source(search)

    with current_app.test_request_context(
            "?fields=authors,ids&format=latex-us"):
        search = LiteratureSearch()
        with pytest.raises(FieldsParamForbidden):
            get_search_with_source(search)
コード例 #10
0
def test_get_search_with_source_with_fields_query_param(inspire_app):
    with current_app.test_request_context("?fields=authors,ids"):
        search = LiteratureSearch()
        search = get_search_with_source(search)
        expected_search_to_dict_source = {
            "includes":
            ["authors", "ids", "control_number", "_updated", "_created"]
        }
        search_to_dict = search.to_dict()
        assert expected_search_to_dict_source == search_to_dict["_source"]
コード例 #11
0
ファイル: test_api.py プロジェクト: MJedr/inspirehep
def test_literature_get_records_by_pids_returns_correct_record(inspire_app):
    record1 = create_record("lit")
    record1_control_number = record1["control_number"]
    record2 = create_record("lit")
    record2_control_number = record2["control_number"]
    expected_control_numbers = [record1_control_number, record2_control_number]
    result = LiteratureSearch().get_records_by_pids([("lit",
                                                      record1_control_number)])
    assert len(result) == 1
    assert (json.loads(
        result[0]._ui_display)["control_number"] == record1["control_number"])

    result = LiteratureSearch().get_records_by_pids([
        ("lit", record1_control_number), ("lit", record2_control_number)
    ])

    assert len(result) == len(expected_control_numbers)
    for rec in result:
        assert rec.to_dict()["control_number"] in expected_control_numbers
コード例 #12
0
ファイル: api.py プロジェクト: inspirehep/inspirehep
def query_report_number(report_number):
    query = Q("match", report_numbers__value__fuzzy=report_number)
    source = ["control_number"]
    results = LiteratureSearch().query(query).source(source).execute()
    if len(results.hits) == 1:
        control_number = results.hits[0]["control_number"]
        return get_record_for_pid_or_none(
            "lit",
            control_number,
        )
    return None
コード例 #13
0
ファイル: utils.py プロジェクト: inspirehep/inspirehep
def get_literature_recids_for_orcid(orcid):
    """Return the Literature recids that were claimed by an ORCiD.

    We record the fact that the Author record X has claimed the Literature
    record Y by storing in Y an author object with a ``$ref`` pointing to X
    and the key ``curated_relation`` set to ``True``. Therefore this method
    first searches the DB for the Author records for the one containing the
    given ORCiD, and then uses its recid to search in ES for the Literature
    records that satisfy the above property.

    Args:
        orcid (str): the ORCiD.

    Return:
        list(int): the recids of the Literature records that were claimed
        by that ORCiD.

    """
    orcid_object = f'[{{"schema": "ORCID", "value": "{orcid}"}}]'
    # this first query is written in a way that can use the index on (json -> ids)

    author_rec_uuid = (
        db.session.query(RecordMetadata.id)
        .filter(type_coerce(RecordMetadata.json, JSONB)["ids"].contains(orcid_object))
        .one()
        .id
    )

    author_record = (
        db.session.query(PersistentIdentifier)
        .filter(
            PersistentIdentifier.object_type == "rec",
            PersistentIdentifier.object_uuid == author_rec_uuid,
            PersistentIdentifier.pid_type == "aut",
        )
        .one()
    )

    author_recid = (
        author_record.pid_value
        if not author_record.is_redirected()
        else InspireRedirect.get_redirect(author_record).pid_value
    )

    query = Q("match", authors__curated_relation=True) & Q(
        "match", **{"authors.record.$ref": author_recid}
    )
    search_by_curated_author = (
        LiteratureSearch()
        .query("nested", path="authors", query=query)
        .params(_source=["control_number"], size=9999)
    )

    return [el["control_number"] for el in search_by_curated_author]
コード例 #14
0
def test_migrate_from_mirror_removes_record_from_es(inspire_app, datadir):
    data = orjson.loads((datadir / "dummy_record.json").read_text())
    create_record("lit", data=data)

    expected_record_lit_es_len = 1
    record_lit_uuid = LiteratureRecord.get_uuid_from_pid_value(12345)
    record_lit_es = LiteratureSearch().get_record(str(record_lit_uuid)).execute().hits
    record_lit_es_len = len(record_lit_es)
    assert expected_record_lit_es_len == record_lit_es_len

    record_deleted_fixture_path = pkg_resources.resource_filename(
        __name__, os.path.join("fixtures", "dummy_deleted.xml")
    )
    migrate_from_file(record_deleted_fixture_path)
    current_search.flush_and_refresh("records-hep")

    expected_record_lit_es_len = 0
    record_lit_uuid = LiteratureRecord.get_uuid_from_pid_value(12345)
    record_lit_es = LiteratureSearch().get_record(str(record_lit_uuid)).execute().hits
    record_lit_es_len = len(record_lit_es)
    assert expected_record_lit_es_len == record_lit_es_len
コード例 #15
0
def test_reindex_one_type_of_record(inspire_app, cli):
    record_lit = create_record_factory("lit")
    create_record_factory("aut")

    cli.invoke(["index", "reindex", "-p", "lit"])
    current_search.flush_and_refresh("*")
    expected_aut_len = 0
    results_lit_uuid = LiteratureSearch().execute().hits.hits[0]["_id"]
    results_aut_len = len(AuthorsSearch().execute().hits.hits)

    assert str(record_lit.id) == results_lit_uuid
    assert expected_aut_len == results_aut_len
コード例 #16
0
def test_get_search_with_source_with_LiteratureSearch_instance_without_config(
        inspire_app):
    config = {
        "LITERATURE_SOURCE_INCLUDES_BY_CONTENT_TYPE": None,
        "LITERATURE_SOURCE_EXCLUDES_BY_CONTENT_TYPE": None,
    }
    with override_config(**config), current_app.test_request_context():
        search = LiteratureSearch()
        search = get_search_with_source(search)

        search_to_dict = search.to_dict()
        assert "_source" not in search_to_dict
コード例 #17
0
def test_get_search_with_source_with_LiteratureSearch_instance_without_config(
        base_app):
    config = {
        "LITERATURE_SOURCE_INCLUDES_BY_CONTENT_TYPE": None,
        "LITERATURE_SOURCE_EXCLUDES_BY_CONTENT_TYPE": None,
    }
    with patch.dict(base_app.config, config), base_app.test_request_context():
        search = LiteratureSearch()
        search = get_search_with_source(search)

        search_to_dict = search.to_dict()
        assert "_source" not in search_to_dict
コード例 #18
0
def test_get_search_with_source_with_fields_query_param_and_wrong_mimetype(
        inspire_app):
    with current_app.test_request_context(
            "?fields=authors,ids", headers={"Accept": "application/x-bibtex"}):
        with pytest.raises(FieldsParamForbidden):
            search = LiteratureSearch()
            get_search_with_source(search)

    with current_app.test_request_context(
            "?fields=authors,ids",
            headers={"Accept": "application/vnd+inspire.latex.eu+x-latex"},
    ):
        with pytest.raises(FieldsParamForbidden):
            search = LiteratureSearch()
            get_search_with_source(search)

    with current_app.test_request_context(
            "?fields=authors,ids",
            headers={"Accept": "application/vnd+inspire.latex.us+x-latex"},
    ):
        with pytest.raises(FieldsParamForbidden):
            search = LiteratureSearch()
            get_search_with_source(search)
コード例 #19
0
def test_reference_convert_old_publication_info_to_new_with_exception(
        mock_convert_old_publication_info_to_new, inspire_app):
    mock_convert_old_publication_info_to_new.side_effect = Exception()
    reference = {
        "reference": {
            "publication_info": {
                "journal_title": "JHEP",
                "journal_volume": "06",
                "page_start": "131",
                "year": 2018,
            }
        }
    }
    result = LiteratureSearch().convert_old_publication_info_to_new(reference)
    assert reference == result
コード例 #20
0
def test_cli_reindex_deleted_and_redirected_records(inspire_app, cli):
    redirected = create_record("lit")
    new_record = create_record("lit")
    deleted = create_record("lit")

    # disable signals so re-indexing won't run automatically after record update
    models_committed.disconnect(index_after_commit)
    # redirect one record
    new_record_data = dict(new_record)
    new_record_data["deleted_records"] = [redirected["self"]]
    new_record.update(new_record_data)

    # delete one record
    deleted.delete()

    # re-enable signals
    models_committed.connect(index_after_commit)
    # check if deleted and redirected were left in ES
    current_search.flush_and_refresh("*")

    expected_control_numbers = [
        redirected.control_number,
        new_record.control_number,
        deleted.control_number,
    ]
    results = LiteratureSearch().query_from_iq("").execute()
    control_numbers_from_es = [x.control_number for x in results.hits]
    assert set(control_numbers_from_es) == set(expected_control_numbers)

    cli.invoke(["index", "reindex", "-p", "lit"])
    current_search.flush_and_refresh("*")

    expected_control_numbers = [new_record.control_number]
    results = LiteratureSearch().query_from_iq("").execute()
    control_numbers_from_es = [x.control_number for x in results.hits]
    assert set(control_numbers_from_es) == set(expected_control_numbers)
コード例 #21
0
def test_migrate_from_mirror_doesnt_index_deleted_records(inspire_app):
    record_fixture_path = pkg_resources.resource_filename(
        __name__, os.path.join("fixtures", "dummy.xml"))
    record_fixture_path_deleted = pkg_resources.resource_filename(
        __name__, os.path.join("fixtures", "deleted_record.xml"))
    migrate_from_file(record_fixture_path)
    migrate_from_file(record_fixture_path_deleted)
    current_search.flush_and_refresh("records-hep")

    expected_record_lit_es_len = 1

    record_lit_uuid = LiteratureRecord.get_uuid_from_pid_value(12345)
    record_lit_es = LiteratureSearch().get_record(
        str(record_lit_uuid)).execute().hits
    record_lit_es_len = len(record_lit_es)

    assert expected_record_lit_es_len == record_lit_es_len
コード例 #22
0
def test_reindex_all_types_records(inspire_app, cli):
    record_lit = create_record_factory("lit")
    record_aut = create_record_factory("aut")
    record_job = create_record_factory("job")
    record_con = create_record_factory("con")

    cli.invoke(["index", "reindex", "--all"])
    current_search.flush_and_refresh("*")
    results_lit_uuid = LiteratureSearch().execute().hits.hits[0]["_id"]
    results_aut_uuid = AuthorsSearch().execute().hits.hits[0]["_id"]
    results_con_uuid = ConferencesSearch().execute().hits.hits[0]["_id"]
    results_job_uuid = JobsSearch().execute().hits.hits[0]["_id"]

    assert str(record_lit.id) == results_lit_uuid
    assert str(record_aut.id) == results_aut_uuid
    assert str(record_con.id) == results_con_uuid
    assert str(record_job.id) == results_job_uuid
コード例 #23
0
def test_get_search_with_source_with_LiteratureSearch_instance_with_not_defined_headers(
        inspire_app):
    config = {
        "LITERATURE_SOURCE_INCLUDES_BY_CONTENT_TYPE": {
            "application/vnd+inspire.record.ui+json": ["title", "description"]
        },
        "LITERATURE_SOURCE_EXCLUDES_BY_CONTENT_TYPE": {
            "application/bibtex": ["control_number"]
        },
    }
    headers = {"Accept": "application/json"}
    with override_config(**config), current_app.test_request_context(
            headers=headers):
        search = LiteratureSearch()
        search = get_search_with_source(search)

        search_to_dict = search.to_dict()
        assert "_source" not in search_to_dict
コード例 #24
0
def _find_matching_author_in_lit_record(author_parsed_name, lit_recid):
    author_name_query = author_parsed_name.generate_es_query()
    author_name_query["nested"]["inner_hits"] = {}
    query = {
        "bool": {
            "must":
            [author_name_query, {
                "match": {
                    "control_number": lit_recid
                }
            }]
        }
    }
    hits = LiteratureSearch().query(query).execute()
    authors_matched = hits[0].meta["inner_hits"].to_dict().get("authors")
    if len(hits) == 1 and len(authors_matched) == 1:
        author_record = authors_matched[0]["record"].to_dict()
        return get_recid_from_ref(author_record)
コード例 #25
0
def test_return_record_for_journal_info_search_with_journal_title_with_dots_and_spaces(
    inspire_app, ):

    queries = ["Phys.Lett.B", "Phys. Lett. B"]

    cited_record_json = {
        "$schema":
        "http://localhost:5000/schemas/records/hep.json",
        "_collections": ["Literature"],
        "control_number":
        1,
        "document_type": ["article"],
        "publication_info": [{
            "journal_title": "Phys.Lett.B",
            "journal_volume": "704",
            "page_start": "223",
            "year": 2011,
        }],
        "titles": [{
            "title": "The Strongly-Interacting Light Higgs"
        }],
    }

    create_record(
        "jou",
        data={
            "short_title": "Phys.Lett.B",
            "journal_title": {
                "title": "Phys Lett B"
            }
        },
    )
    create_record("lit", cited_record_json)

    expected_control_number = 1

    for query in queries:

        response = LiteratureSearch().query_from_iq(query).execute()

        response_record_control_number = response["hits"]["hits"][0][
            "_source"]["control_number"]

        assert expected_control_number == response_record_control_number
コード例 #26
0
def find_references(references, requested_format):
    display_format = FORMAT_TO_SOURCE_FIELD[requested_format]

    ret = []
    errors = []
    for ref, line in references:
        query = ref
        keyword = None
        if re.search(r"^\d{4}[\w.&]{15}$", ref):
            # ads
            keyword = "external_system_identifiers.value"
        elif re.search(r".*\:\d{4}\w\w\w?", ref):
            keyword = "texkey"
        elif re.search(r".*\/\d{7}", ref):
            keyword = "eprint"
        elif re.search(r"\d{4}\.\d{4,5}", ref):
            keyword = "eprint"
        elif re.search(r"\w\.\w+\.\w", ref):
            keyword = "j"
            query = re.sub(r"\.", ",", ref)
        elif re.search(r"\w\-\w", ref):
            keyword = "r"

        results = (
            LiteratureSearch()
            .query_from_iq(f"{keyword}:{query}")
            .params(size=2, _source=[display_format, "texkeys", "control_number"])
            .execute()
        )

        hits = results.hits.hits
        if len(hits) == 0:
            errors.append({"ref": ref, "line": line, "type": "not found"})
        elif len(hits) > 1:
            errors.append({"ref": ref, "line": line, "type": "ambiguous"})
        else:
            source_field = hits[0]["_source"]
            control_number = source_field["control_number"]
            texkey = getattr(source_field, "texkeys", [control_number])[0]
            ret.append(source_field[display_format].replace(f"{{{texkey}", f"{{{ref}"))

    return ret, errors
コード例 #27
0
def test_migrate_from_mirror_doesnt_index_deleted_records(
        base_app, db, es_clear):
    record_fixture_path = pkg_resources.resource_filename(
        __name__, os.path.join("fixtures", "dummy.xml"))
    record_fixture_path_deleted = pkg_resources.resource_filename(
        __name__, os.path.join("fixtures", "deleted_record.xml"))
    migrate_from_file(record_fixture_path)
    migrate_from_file(record_fixture_path_deleted)
    es_clear.indices.refresh("records-hep")

    expected_record_lit_es_len = 1

    record_lit_uuid = LiteratureRecord.get_uuid_from_pid_value(12345)
    with pytest.raises(PIDDoesNotExistError):
        LiteratureRecord.get_uuid_from_pid_value(1234)
    record_lit_es = LiteratureSearch().get_record(
        str(record_lit_uuid)).execute().hits
    record_lit_es_len = len(record_lit_es)

    assert expected_record_lit_es_len == record_lit_es_len
コード例 #28
0
    def populate_curated_relation(hits):
        author_recid = request.values.get("author", "", type=str).split("_")[0]
        hits_control_numbers = [hit["_source"]["control_number"] for hit in hits]
        nested_query = Q("match", authors__curated_relation=True) & Q(
            "match", **{"authors.record.$ref": author_recid}
        )
        papers_with_author_curated = (
            LiteratureSearch()
            .filter("terms", control_number=hits_control_numbers)
            .query("nested", path="authors", query=nested_query)
            .params(_source=["control_number"], size=9999)
        )
        papers_with_author_curated_recids = {
            el["control_number"] for el in papers_with_author_curated
        }

        for hit in hits:
            if hit["_source"]["control_number"] in papers_with_author_curated_recids:
                hit["_source"]["curated_relation"] = True
        return hits
コード例 #29
0
def literature():
    return LiteratureSearch()
コード例 #30
0
def test_reference_convert_old_publication_info_to_new_with_empty_reference(
    inspire_app, ):
    reference = {"reference": {"publication_info": {}}}
    result = LiteratureSearch().convert_old_publication_info_to_new(reference)
    assert reference == result