Exemple #1
0
def test_get_search_with_source_with_LiteratureSearch_instance_with_defined_headers(
        inspire_app):
    config = {
        "LITERATURE_SOURCE_INCLUDES_BY_CONTENT_TYPE": {
            "application/vnd+inspire.record.ui+json": ["title", "description"]
        },
        "LITERATURE_SOURCE_EXCLUDES_BY_CONTENT_TYPE": {
            "application/vnd+inspire.record.ui+json":
            ["excludes_with_includes_looks_stupid"],
            "application/bibtex": ["control_number"],
        },
    }
    headers = {"Accept": "application/vnd+inspire.record.ui+json"}
    with override_config(**config), current_app.test_request_context(
            headers=headers):
        search = LiteratureSearch()
        search = get_search_with_source(search)

        expected_source_includes = ["title", "description"]
        expected_source_excludes = ["excludes_with_includes_looks_stupid"]

        search_to_dict = search.to_dict()
        search_source = search_to_dict["_source"]

        assert expected_source_includes == search_source["includes"]
        assert expected_source_excludes == search_source["excludes"]
Exemple #2
0
def test_indexer_populates_referenced_authors_bais(inspire_app):
    data_authors = {
        "authors": [
            {
                "full_name": "Jean-Luc Picard",
                "ids": [{
                    "schema": "INSPIRE BAI",
                    "value": "Jean.L.Picard.1"
                }],
            },
            {
                "full_name": "John Doe",
                "ids": [{
                    "schema": "INSPIRE BAI",
                    "value": "J.Doe.1"
                }],
            },
        ]
    }
    cited_record_1 = create_record("lit", data=data_authors)
    data_authors = {
        "authors": [
            {
                "full_name": "Jean-Luc Picard",
                "ids": [{
                    "schema": "INSPIRE BAI",
                    "value": "Jean.L.Picard.1"
                }],
            },
            {
                "full_name": "Steven Johnson",
                "ids": [{
                    "schema": "INSPIRE BAI",
                    "value": "S.Johnson.1"
                }],
            },
        ]
    }
    cited_record_2 = create_record("lit", data=data_authors)
    citing_record = create_record(
        "lit",
        literature_citations=[
            cited_record_1["control_number"],
            cited_record_2["control_number"],
        ],
    )
    expected_rec3_referenced_authors_bais = [
        "J.Doe.1",
        "Jean.L.Picard.1",
        "S.Johnson.1",
    ]
    rec1_es = LiteratureSearch.get_record_data_from_es(cited_record_1)
    rec2_es = LiteratureSearch.get_record_data_from_es(cited_record_2)
    rec3_es = LiteratureSearch.get_record_data_from_es(citing_record)
    assert "referenced_authors_bais" not in rec1_es
    assert "referenced_authors_bais" not in rec2_es
    assert (sorted(rec3_es["referenced_authors_bais"]) ==
            expected_rec3_referenced_authors_bais)
Exemple #3
0
        def assert_disambiguation_cli():
            records = LiteratureSearch().get_records(record_uuids).execute()
            for record in records:
                for author in record.authors:
                    assert "record" in author

            record_not_disambiguated = (LiteratureSearch().get_records(
                [record_that_shouldnt_be_disambiguated_uuid]).execute())
            assert "record" not in record_not_disambiguated[0]["authors"][0]
def test_get_search_with_source_with_fields_query_param(inspire_app):
    with current_app.test_request_context("?fields=authors,ids"):
        search = LiteratureSearch()
        search = get_search_with_source(search)
        expected_search_to_dict_source = {
            "includes":
            ["authors", "ids", "control_number", "_updated", "_created"]
        }
        search_to_dict = search.to_dict()
        assert expected_search_to_dict_source == search_to_dict["_source"]
Exemple #5
0
def test_get_search_with_source_with_LiteratureSearch_instance_without_config(
        inspire_app):
    config = {
        "LITERATURE_SOURCE_INCLUDES_BY_CONTENT_TYPE": None,
        "LITERATURE_SOURCE_EXCLUDES_BY_CONTENT_TYPE": None,
    }
    with override_config(**config), current_app.test_request_context():
        search = LiteratureSearch()
        search = get_search_with_source(search)

        search_to_dict = search.to_dict()
        assert "_source" not in search_to_dict
def test_get_search_with_source_with_LiteratureSearch_instance_without_config(
        base_app):
    config = {
        "LITERATURE_SOURCE_INCLUDES_BY_CONTENT_TYPE": None,
        "LITERATURE_SOURCE_EXCLUDES_BY_CONTENT_TYPE": None,
    }
    with patch.dict(base_app.config, config), base_app.test_request_context():
        search = LiteratureSearch()
        search = get_search_with_source(search)

        search_to_dict = search.to_dict()
        assert "_source" not in search_to_dict
def test_gracefully_handle_records_updating_in_wrong_order(
        inspire_app, clean_celery_session):
    # We want to run indexing in weird order, so disable auto indexing
    models_committed.disconnect(index_after_commit)

    cited_record = LiteratureRecord.create(data=faker.record("lit"))
    record_data = faker.record(
        "lit", literature_citations=[cited_record.control_number])
    record = LiteratureRecord.create(data=record_data)
    db.session.commit()

    record = LiteratureRecord.get_record_by_pid_value(record.control_number)

    index_record(record.id, record.model.versions[-1].version_id)
    assert LiteratureSearch().get_source(
        cited_record.id)["citation_count"] == 1

    data = dict(record)
    del data["references"]

    record.update(data)
    db.session.commit()
    record = LiteratureRecord.get_record_by_pid_value(record.control_number)
    data = dict(record)
    data["titles"][0] = {"title": "New Title"}
    record.update(data)
    db.session.commit()

    record = LiteratureRecord.get_record_by_pid_value(record.control_number)

    index_record(record.id, record.model.versions[-1].version_id)

    record = LiteratureRecord.get_record_by_pid_value(record.control_number)

    assert LiteratureSearch().get_source(
        cited_record.id)["citation_count"] == 1
    assert LiteratureSearch().get_source(record.id)["titles"] == [{
        "title":
        "New Title"
    }]

    index_record(record.id, record.model.versions[-2].version_id)

    assert LiteratureSearch().get_source(
        cited_record.id)["citation_count"] == 0
    assert LiteratureSearch().get_source(record.id)["titles"] == [{
        "title":
        "New Title"
    }]
    models_committed.connect(index_after_commit)
def test_indexer_oai_set_CERN_arxiv_and_CDS(inspire_app):
    extra_data = {
        "report_numbers": [{
            "value": "CERN-2020-001"
        }],
        "arxiv_eprints": [{
            "value": "2009.01484"
        }],
        "_export_to": {
            "CDS": True
        },
    }

    record_data = faker.record("lit", data=extra_data)
    record = LiteratureRecord.create(record_data)
    record.index(delay=False)
    result_record = LiteratureSearch.get_record_data_from_es(record)

    expected_id = f"oai:inspirehep.net:{record['control_number']}"
    expected_updated = record.updated.strftime(OAI_TIME_FORMAT)
    expected_sets = [
        inspire_app.config["OAI_SET_CDS"],
        inspire_app.config["OAI_SET_CERN_ARXIV"],
    ]

    assert expected_id == result_record["_oai"]["id"]
    assert expected_updated == result_record["_oai"]["updated"]
    assert expected_sets == result_record["_oai"]["sets"]
Exemple #9
0
def _get_all_not_disambiguated_records_search():
    query = {
        "query": {
            "bool": {
                "must": [
                    {
                        "nested": {
                            "path": "authors",
                            "query": {
                                "bool": {
                                    "must_not": {
                                        "exists": {
                                            "field": "authors.record.$ref"
                                        }
                                    }
                                }
                            },
                        }
                    },
                    {
                        "match": {
                            "_collections": "Literature"
                        }
                    },
                ]
            }
        }
    }

    search_obj = (LiteratureSearch().from_dict(query).params(
        track_total_hits=True, _source={}, size=1000, scroll="60m"))
    return search_obj
 def assert_record_in_es():
     current_search.flush_and_refresh("*")
     record_lit_es = (LiteratureSearch().get_record(str(
         rec.id)).execute().hits.hits[0])
     document = record_lit_es._source["documents"][0]
     assert "attachment" in document
     assert "text" not in document  # pipeline should remove it
Exemple #11
0
 def assert_disambiguation_cli():
     record_from_es = LiteratureSearch.get_record_data_from_es(record)
     for author in record_from_es["authors"]:
         if author["full_name"] == "Test Author Dismabiguated":
             assert author["record"]["$ref"] == disambiguated_author_ref
         else:
             assert "record" in author
Exemple #12
0
def test_literature_journal_title_search_is_case_insensitive(inspire_app):
    record1 = create_record(
        "lit",
        data={
            "publication_info": [{
                "year": 2017,
                "artid": "020",
                "page_start": "020",
                "journal_title": "JHEP",
                "journal_record": {
                    "$ref": "https://inspirebeta.net/api/journals/1213103"
                },
                "journal_volume": "10",
            }],
        },
    )
    record2 = create_record(
        "lit",
        data={
            "publication_info": [{
                "year": 2017,
                "artid": "021",
                "page_start": "021",
                "journal_title": "JHEP",
                "journal_volume": "10",
            }],
        },
    )
    result_lowercase = LiteratureSearch().query_from_iq("j jhep").execute()
    result_uppercase = LiteratureSearch().query_from_iq("j JHEP").execute()

    assert result_lowercase
    assert result_uppercase

    hits_lowercase = result_lowercase["hits"]["hits"]
    hits_uppercase = result_uppercase["hits"]["hits"]
    result_lowercase_found_record_ids = [hit._id for hit in hits_lowercase]
    result_uppercase_found_record_ids = [hit._id for hit in hits_uppercase]

    assert len(result_lowercase_found_record_ids) == 2
    assert len(result_uppercase_found_record_ids) == 2

    assert str(record1.id) in result_lowercase_found_record_ids
    assert str(record2.id) in result_lowercase_found_record_ids

    assert str(record1.id) in result_uppercase_found_record_ids
    assert str(record2.id) in result_uppercase_found_record_ids
Exemple #13
0
    def assert_all_records_in_es():
        literature_records_from_es = list(LiteratureSearch().query_from_iq(
            query_string=
            f"publication_info.journal_record.$ref: {journal_record_reference}"
        ).scan())
        journal_record_from_es = InspireSearch.get_record_data_from_es(journal)

        assert len(literature_records_from_es) == 11 and journal_record_from_es
 def assert_update_in_es():
     current_search.flush_and_refresh("*")
     record_lit_es = (LiteratureSearch().get_record(str(
         record.id)).execute().hits.hits[0])
     assert "new_doc.pdf" == record_lit_es._source["documents"][0][
         "key"]
     assert (record_first_attachment !=
             record_lit_es._source["documents"][0]["attachment"])
Exemple #15
0
def test_get_search_with_source_with_LiteratureSearch_instance_with_not_defined_headers(
        inspire_app):
    config = {
        "LITERATURE_SOURCE_INCLUDES_BY_CONTENT_TYPE": {
            "application/vnd+inspire.record.ui+json": ["title", "description"]
        },
        "LITERATURE_SOURCE_EXCLUDES_BY_CONTENT_TYPE": {
            "application/bibtex": ["control_number"]
        },
    }
    headers = {"Accept": "application/json"}
    with override_config(**config), current_app.test_request_context(
            headers=headers):
        search = LiteratureSearch()
        search = get_search_with_source(search)

        search_to_dict = search.to_dict()
        assert "_source" not in search_to_dict
def test_get_search_with_source_with_fields_query_param_and_wrong_formats(
        inspire_app):
    with current_app.test_request_context("?fields=authors,ids&format=bibtex"):
        search = LiteratureSearch()
        with pytest.raises(FieldsParamForbidden):
            get_search_with_source(search)

    with current_app.test_request_context(
            "?fields=authors,ids&format=latex-eu"):
        search = LiteratureSearch()
        with pytest.raises(FieldsParamForbidden):
            get_search_with_source(search)

    with current_app.test_request_context(
            "?fields=authors,ids&format=latex-us"):
        search = LiteratureSearch()
        with pytest.raises(FieldsParamForbidden):
            get_search_with_source(search)
Exemple #17
0
def test_literature_get_records_by_pids_returns_correct_record(inspire_app):
    record1 = create_record("lit")
    record1_control_number = record1["control_number"]
    record2 = create_record("lit")
    record2_control_number = record2["control_number"]
    expected_control_numbers = [record1_control_number, record2_control_number]
    result = LiteratureSearch().get_records_by_pids([("lit",
                                                      record1_control_number)])
    assert len(result) == 1
    assert (json.loads(
        result[0]._ui_display)["control_number"] == record1["control_number"])

    result = LiteratureSearch().get_records_by_pids([
        ("lit", record1_control_number), ("lit", record2_control_number)
    ])

    assert len(result) == len(expected_control_numbers)
    for rec in result:
        assert rec.to_dict()["control_number"] in expected_control_numbers
Exemple #18
0
def query_report_number(report_number):
    query = Q("match", report_numbers__value__fuzzy=report_number)
    source = ["control_number"]
    results = LiteratureSearch().query(query).source(source).execute()
    if len(results.hits) == 1:
        control_number = results.hits[0]["control_number"]
        return get_record_for_pid_or_none(
            "lit",
            control_number,
        )
    return None
Exemple #19
0
def get_literature_recids_for_orcid(orcid):
    """Return the Literature recids that were claimed by an ORCiD.

    We record the fact that the Author record X has claimed the Literature
    record Y by storing in Y an author object with a ``$ref`` pointing to X
    and the key ``curated_relation`` set to ``True``. Therefore this method
    first searches the DB for the Author records for the one containing the
    given ORCiD, and then uses its recid to search in ES for the Literature
    records that satisfy the above property.

    Args:
        orcid (str): the ORCiD.

    Return:
        list(int): the recids of the Literature records that were claimed
        by that ORCiD.

    """
    orcid_object = f'[{{"schema": "ORCID", "value": "{orcid}"}}]'
    # this first query is written in a way that can use the index on (json -> ids)

    author_rec_uuid = (
        db.session.query(RecordMetadata.id)
        .filter(type_coerce(RecordMetadata.json, JSONB)["ids"].contains(orcid_object))
        .one()
        .id
    )

    author_record = (
        db.session.query(PersistentIdentifier)
        .filter(
            PersistentIdentifier.object_type == "rec",
            PersistentIdentifier.object_uuid == author_rec_uuid,
            PersistentIdentifier.pid_type == "aut",
        )
        .one()
    )

    author_recid = (
        author_record.pid_value
        if not author_record.is_redirected()
        else InspireRedirect.get_redirect(author_record).pid_value
    )

    query = Q("match", authors__curated_relation=True) & Q(
        "match", **{"authors.record.$ref": author_recid}
    )
    search_by_curated_author = (
        LiteratureSearch()
        .query("nested", path="authors", query=query)
        .params(_source=["control_number"], size=9999)
    )

    return [el["control_number"] for el in search_by_curated_author]
Exemple #20
0
def test_reindex_one_type_of_record(inspire_app, cli):
    record_lit = create_record_factory("lit")
    create_record_factory("aut")

    cli.invoke(["index", "reindex", "-p", "lit"])
    current_search.flush_and_refresh("*")
    expected_aut_len = 0
    results_lit_uuid = LiteratureSearch().execute().hits.hits[0]["_id"]
    results_aut_len = len(AuthorsSearch().execute().hits.hits)

    assert str(record_lit.id) == results_lit_uuid
    assert expected_aut_len == results_aut_len
Exemple #21
0
def test_migrate_from_mirror_removes_record_from_es(inspire_app, datadir):
    data = orjson.loads((datadir / "dummy_record.json").read_text())
    create_record("lit", data=data)

    expected_record_lit_es_len = 1
    record_lit_uuid = LiteratureRecord.get_uuid_from_pid_value(12345)
    record_lit_es = LiteratureSearch().get_record(str(record_lit_uuid)).execute().hits
    record_lit_es_len = len(record_lit_es)
    assert expected_record_lit_es_len == record_lit_es_len

    record_deleted_fixture_path = pkg_resources.resource_filename(
        __name__, os.path.join("fixtures", "dummy_deleted.xml")
    )
    migrate_from_file(record_deleted_fixture_path)
    current_search.flush_and_refresh("records-hep")

    expected_record_lit_es_len = 0
    record_lit_uuid = LiteratureRecord.get_uuid_from_pid_value(12345)
    record_lit_es = LiteratureSearch().get_record(str(record_lit_uuid)).execute().hits
    record_lit_es_len = len(record_lit_es)
    assert expected_record_lit_es_len == record_lit_es_len
Exemple #22
0
 def assert_assign():
     for literature in [literature_1, literature_2]:
         current_search.flush_and_refresh("*")
         literature_after = LiteratureSearch.get_record_data_from_es(
             literature)
         literature_author = literature_after["authors"][0]
         assert literature_author["record"] == {
             "$ref":
             f"http://localhost:5000/api/authors/{to_author['control_number']}"
         }
         assert literature_author["curated_relation"]
         assert literature_author["ids"] == to_author["ids"]
Exemple #23
0
 def assert_assign():
     current_search.flush_and_refresh("*")
     literature_after = LiteratureSearch.get_record_data_from_es(literature)
     literature_author = literature_after["authors"][1]
     to_author_after = AuthorsRecord.get_record_by_pid_value(
         to_author["control_number"])
     assert literature_author["record"] == {
         "$ref":
         f"http://localhost:5000/api/authors/{to_author['control_number']}"
     }
     assert literature_author["curated_relation"]
     assert literature_author["ids"] == to_author["ids"]
     assert not to_author_after["stub"]
def test_get_search_with_source_with_fields_query_param_and_wrong_mimetype(
        inspire_app):
    with current_app.test_request_context(
            "?fields=authors,ids", headers={"Accept": "application/x-bibtex"}):
        with pytest.raises(FieldsParamForbidden):
            search = LiteratureSearch()
            get_search_with_source(search)

    with current_app.test_request_context(
            "?fields=authors,ids",
            headers={"Accept": "application/vnd+inspire.latex.eu+x-latex"},
    ):
        with pytest.raises(FieldsParamForbidden):
            search = LiteratureSearch()
            get_search_with_source(search)

    with current_app.test_request_context(
            "?fields=authors,ids",
            headers={"Accept": "application/vnd+inspire.latex.us+x-latex"},
    ):
        with pytest.raises(FieldsParamForbidden):
            search = LiteratureSearch()
            get_search_with_source(search)
def test_reference_convert_old_publication_info_to_new_with_exception(
        mock_convert_old_publication_info_to_new, inspire_app):
    mock_convert_old_publication_info_to_new.side_effect = Exception()
    reference = {
        "reference": {
            "publication_info": {
                "journal_title": "JHEP",
                "journal_volume": "06",
                "page_start": "131",
                "year": 2018,
            }
        }
    }
    result = LiteratureSearch().convert_old_publication_info_to_new(reference)
    assert reference == result
Exemple #26
0
def test_indexer_oai_set_CDS(inspire_app):
    extra_data = {"_export_to": {"CDS": True}}

    record_data = faker.record("lit", data=extra_data)
    record = LiteratureRecord.create(record_data)
    record.index(delay=False)
    result_record = LiteratureSearch.get_record_data_from_es(record)

    expected_id = f"oai:inspirehep.net:{record['control_number']}"
    expected_updated = "1994-12-19T00:00:00"
    expected_sets = [inspire_app.config["OAI_SET_CDS"]]

    assert expected_id == result_record["_oai"]["id"]
    assert expected_updated == result_record["_oai"]["updated"]
    assert expected_sets == result_record["_oai"]["sets"]
Exemple #27
0
def test_cli_reindex_deleted_and_redirected_records(inspire_app, cli):
    redirected = create_record("lit")
    new_record = create_record("lit")
    deleted = create_record("lit")

    # disable signals so re-indexing won't run automatically after record update
    models_committed.disconnect(index_after_commit)
    # redirect one record
    new_record_data = dict(new_record)
    new_record_data["deleted_records"] = [redirected["self"]]
    new_record.update(new_record_data)

    # delete one record
    deleted.delete()

    # re-enable signals
    models_committed.connect(index_after_commit)
    # check if deleted and redirected were left in ES
    current_search.flush_and_refresh("*")

    expected_control_numbers = [
        redirected.control_number,
        new_record.control_number,
        deleted.control_number,
    ]
    results = LiteratureSearch().query_from_iq("").execute()
    control_numbers_from_es = [x.control_number for x in results.hits]
    assert set(control_numbers_from_es) == set(expected_control_numbers)

    cli.invoke(["index", "reindex", "-p", "lit"])
    current_search.flush_and_refresh("*")

    expected_control_numbers = [new_record.control_number]
    results = LiteratureSearch().query_from_iq("").execute()
    control_numbers_from_es = [x.control_number for x in results.hits]
    assert set(control_numbers_from_es) == set(expected_control_numbers)
Exemple #28
0
def test_reindex_all_types_records(inspire_app, cli):
    record_lit = create_record_factory("lit")
    record_aut = create_record_factory("aut")
    record_job = create_record_factory("job")
    record_con = create_record_factory("con")

    cli.invoke(["index", "reindex", "--all"])
    current_search.flush_and_refresh("*")
    results_lit_uuid = LiteratureSearch().execute().hits.hits[0]["_id"]
    results_aut_uuid = AuthorsSearch().execute().hits.hits[0]["_id"]
    results_con_uuid = ConferencesSearch().execute().hits.hits[0]["_id"]
    results_job_uuid = JobsSearch().execute().hits.hits[0]["_id"]

    assert str(record_lit.id) == results_lit_uuid
    assert str(record_aut.id) == results_aut_uuid
    assert str(record_con.id) == results_con_uuid
    assert str(record_job.id) == results_job_uuid
Exemple #29
0
def test_migrate_from_mirror_doesnt_index_deleted_records(inspire_app):
    record_fixture_path = pkg_resources.resource_filename(
        __name__, os.path.join("fixtures", "dummy.xml"))
    record_fixture_path_deleted = pkg_resources.resource_filename(
        __name__, os.path.join("fixtures", "deleted_record.xml"))
    migrate_from_file(record_fixture_path)
    migrate_from_file(record_fixture_path_deleted)
    current_search.flush_and_refresh("records-hep")

    expected_record_lit_es_len = 1

    record_lit_uuid = LiteratureRecord.get_uuid_from_pid_value(12345)
    record_lit_es = LiteratureSearch().get_record(
        str(record_lit_uuid)).execute().hits
    record_lit_es_len = len(record_lit_es)

    assert expected_record_lit_es_len == record_lit_es_len
Exemple #30
0
def test_indexer_oai_set_CERN_arxiv(inspire_app):
    extra_data = {
        "report_numbers": [{"value": "CERN-2020-001"}],
        "arxiv_eprints": [{"value": "2009.01484"}],
    }

    record_data = faker.record("lit", data=extra_data)
    record = LiteratureRecord.create(record_data)
    record.index(delay=False)
    result_record = LiteratureSearch.get_record_data_from_es(record)

    expected_id = f"oai:inspirehep.net:{record['control_number']}"
    expected_updated = "1994-12-19T00:00:00"
    expected_sets = [inspire_app.config["OAI_SET_CERN_ARXIV"]]

    assert expected_id == result_record["_oai"]["id"]
    assert expected_updated == result_record["_oai"]["updated"]
    assert expected_sets == result_record["_oai"]["sets"]