def test_revert_revision_works_correctly_and_runs_update(inspire_app): cited_data = { "$schema": "http://localhost:5000/schemas/records/hep.json", "titles": [{ "title": "Test a valid record" }], "document_type": ["article"], "_collections": ["Literature"], } cited_record = LiteratureRecord.create(cited_data) citing_data = { "$schema": "http://localhost:5000/schemas/records/hep.json", "titles": [{ "title": "My title" }], "document_type": ["article"], "_collections": ["Literature"], "preprint_date": "2019-06-28", "references": [{ "record": { "$ref": f"http://localhost:5000/api/literature/{cited_record['control_number']}" } }], } citing_record = LiteratureRecord.create(citing_data) db.session.commit() assert len(citing_record.model.references) == 1 assert len(cited_record.model.citations) == 1 assert citing_record.revision_id == 2 citing_record = LiteratureRecord.get_record(citing_record.id) data = dict(citing_record) del data["references"] citing_record.update(data) db.session.commit() citing_record = LiteratureRecord.get_record(citing_record.id) assert len(citing_record.model.references) == 0 assert len(cited_record.model.citations) == 0 assert citing_record.revision_id == 4 citing_record.revert(2) db.session.commit() citing_record = LiteratureRecord.get_record(citing_record.id) assert len(citing_record.model.references) == 1 assert len(cited_record.model.citations) == 1 # Reverted to revision 2 but added as next revision # so it will be revision 6 assert citing_record.revision_id == 6 assert dict(citing_record.revisions[2]) == dict(citing_record)
def test_match_references(inspire_app, cli, celery_app_with_context, celery_session_worker): cited_data = { "document_type": ["article"], "dois": [{ "value": "10.1371/journal.pone.0188398" }], } cited_record = create_record_async("lit", data=cited_data) cited_record.index( delay=False) # reference-matcher requires cited record to be indexed citer_data = { "references": [{ "reference": { "dois": ["10.1371/journal.pone.0188398"] } }] } citer_record_1 = create_record_async("lit", data=citer_data) citer_record_2 = create_record_async("lit", data=citer_data) citer_record_3 = create_record_async("lit", data=citer_data) citer_ids = [citer_record_1.id, citer_record_2.id, citer_record_3.id] create_record_async("dat") result = cli.invoke(["match", "references", "-bs", 2]) assert result.exit_code == 0 for citer_id in citer_ids: updated_citer_record = LiteratureRecord.get_record(citer_id) assert (get_value(updated_citer_record, "references[0].record") == cited_record["self"])
def test_match_references_by_uuids_dedupes_references_after_matching( inspire_app): citer_data = { "references": [ { "reference": { "report_numbers": ["AMBIGUOUS-42"] }, "record": { "$ref": "https://inspirehep.net/api/literature/1234" }, }, { "reference": { "report_numbers": ["AMBIGUOUS-42"] }, "record": { "$ref": "https://inspirehep.net/api/literature/5678" }, }, ] } citer_record = create_record("lit", data=citer_data) match_references_by_uuids([str(citer_record.id)]) updated_citer_record = LiteratureRecord.get_record(citer_record.id) expected_references = [{"reference": {"report_numbers": ["AMBIGUOUS-42"]}}] assert updated_citer_record["references"] == expected_references
def test_match_references_by_uuids(inspire_app): cited_data = { "document_type": ["article"], "dois": [{ "value": "10.1371/journal.pone.0188398" }], } cited_record = create_record("lit", data=cited_data) citer_data = { "references": [{ "reference": { "dois": ["10.1371/journal.pone.0188398"] } }] } citer_record = create_record("lit", data=citer_data) excluded_citer_record = create_record("lit", data=citer_data) # won't be passed deleted_record = create_record("lit", data={ "deleted": True, **citer_data }, with_control_number=True) record_without_references = create_record("lit") match_references_by_uuids([ str(citer_record.id), str(record_without_references.id), str(deleted_record.id), ]) updated_citer_record = LiteratureRecord.get_record(citer_record.id) excluded_citer_record = LiteratureRecord.get_record( excluded_citer_record.id) with pytest.raises(NoResultFound): LiteratureRecord.get_record(deleted_record.id) deleted_record = LiteratureRecord.get_record(deleted_record.id, with_deleted=True) assert (get_value(updated_citer_record, "references[0].record") == cited_record["self"]) assert "record" not in get_value(excluded_citer_record, "references[0]") assert "record" not in get_value(deleted_record, "references[0]")
def test_match_references_by_uuids(inspire_app): cited_data = { "document_type": ["article"], "dois": [{"value": "10.1371/journal.pone.0188398"}], } cited_record = create_record("lit", data=cited_data) citer_data = { "references": [{"reference": {"dois": ["10.1371/journal.pone.0188398"]}}] } citer_record = create_record("lit", data=citer_data) excluded_citer_record = create_record("lit", data=citer_data) # won't be passed record_without_references = create_record("lit") match_references_by_uuids([str(citer_record.id), str(record_without_references.id)]) updated_citer_record = LiteratureRecord.get_record(citer_record.id) excluded_citer_record = LiteratureRecord.get_record(excluded_citer_record.id) assert ( get_value(updated_citer_record, "references[0].record") == cited_record["self"] ) assert "record" not in get_value(excluded_citer_record, "references[0]")
def test_match_references(inspire_app, cli, clean_celery_session): cited_data = { "document_type": ["article"], "dois": [{ "value": "10.1371/journal.pone.0188398" }], } cited_record = create_record_async("lit", data=cited_data) cited_record.index( delay=False) # reference-matcher requires cited record to be indexed citer_data = { "references": [{ "reference": { "dois": ["10.1371/journal.pone.0188398"] } }] } citer_record_1 = create_record_async("lit", data=citer_data) citer_record_2 = create_record_async("lit", data=citer_data) citer_record_3 = create_record_async("lit", data=citer_data) citer_ids = [citer_record_1.id, citer_record_2.id, citer_record_3.id] record_data = create_record_async("dat") record_data_uuids = record_data.id def assert_all_records_are_indexed(): current_search.flush_and_refresh("*") result = es_search("records-hep") uuids = get_value(result, "hits.hits._id") for uuid in citer_ids: assert str(uuid) in uuids result = es_search("records-data") uuids = get_value(result, "hits.hits._id") assert str(record_data_uuids) in uuids retry_until_pass(assert_all_records_are_indexed) result = cli.invoke(["match", "references", "-bs", 2]) assert result.exit_code == 0 for citer_id in citer_ids: updated_citer_record = LiteratureRecord.get_record(citer_id) assert (get_value(updated_citer_record, "references[0].record") == cited_record["self"])
def test_redirect_and_delete_record_from_deleted_records_field(inspire_app): record_to_delete = create_record("lit") record = create_record("lit") data = dict(record) data["deleted_records"] = [record_to_delete["self"]] record.update(data) deleted_records = record["deleted_records"] assert len(deleted_records) == 1 old_pid = record_to_delete.control_number_pid assert old_pid.is_redirected() record_redirected = LiteratureRecord.get_record_by_pid_value( record_to_delete.control_number) assert record_redirected.id == record.id original_record = LiteratureRecord.get_record(record_to_delete.id, with_deleted=True) assert original_record["deleted"] is True
def test_fulltext_indexer_updates_documents_when_record_changed( inspire_app, clean_celery_session, override_config): with override_config(FEATURE_FLAG_ENABLE_FULLTEXT=True): data = faker.record("lit") data.update({ "arxiv_eprints": [{ "categories": ["hep-ph"], "value": "hep-ph/9404247" }], "documents": [{ "source": "arxiv", "fulltext": True, "filename": "arXiv:nucl-th_9310030.pdf", "key": "arXiv:nucl-th_9310030.pdf", "url": "https://www.w3.org/WAI/ER/tests/xhtml/testfiles/resources/pdf/dummy.pdf", }], }) record = LiteratureRecord.create(data) db.session.commit() def assert_record_in_es(): current_search.flush_and_refresh("*") record_lit_es = (LiteratureSearch().get_record(str( record.id)).execute().hits.hits[0]) assert "attachment" in record_lit_es._source["documents"][0] retry_until_pass(assert_record_in_es, timeout=90, retry_interval=20) record_first_attachment = (LiteratureSearch().get_record(str( record.id)).execute().hits.hits[0]["_source"]["documents"][0] ["attachment"]) db.session.expire_all() record = LiteratureRecord.get_record(record.id) record["documents"] = [{ "source": "arxiv", "fulltext": True, "filename": "new_doc.pdf", "key": "new_doc.pdf", "url": "http://www.africau.edu/images/default/sample.pdf", }] record.update(dict(record)) db.session.commit() def assert_update_in_es(): current_search.flush_and_refresh("*") record_lit_es = (LiteratureSearch().get_record(str( record.id)).execute().hits.hits[0]) assert "new_doc.pdf" == record_lit_es._source["documents"][0][ "key"] assert (record_first_attachment != record_lit_es._source["documents"][0]["attachment"]) retry_until_pass(assert_update_in_es, timeout=90, retry_interval=20)