def test_on_deleted_record_index_on_InspireRecord(): record = {"control_number": 4321, "deleted": True} expected = {"uuid": "1", "force_delete": True} assert InspireRecord._record_index(record, _id=1) == expected assert InspireRecord._record_index(record, _id=1, force_delete=False) == expected assert InspireRecord._record_index(record, _id=1, force_delete=True) == expected
def test_resolving_download_method(fsopen_mock, base_app, db, create_record_factory, init_files_db): record_metadata = create_record_factory("lit") record_metadata2 = create_record_factory("lit") record = InspireRecord.get_record(record_metadata.id) record2 = InspireRecord.get_record(record_metadata2.id) assert record._find_and_add_file("http://missing_url.com") is None assert (record._find_and_add_file( f"/api/files/{uuid.uuid4()}/{hashlib.sha1(b'test-hash').hexdigest()}") is None) key = record._find_and_add_file( url= f"/api/files/{uuid.uuid4()}/{hashlib.sha1(b'test-hash').hexdigest()}", original_url="http://document_url.cern.ch/file.pdf", ) assert key is not None assert key in record.files.keys bucket_id = record.files[key].bucket_id local_uri = f"/api/files/{bucket_id}/{key}" key2 = record2._find_and_add_file(url=local_uri) assert key2 is not None assert key2 == key assert key2 in record2.files.keys
def assert_continuous_migration(): record_citer = InspireRecord.get_record_by_pid_value( citer_control_number, "lit") record_cited = InspireRecord.get_record_by_pid_value( cited_control_number, "lit") assert record_cited.citation_count == 1 record_citer_es = InspireSearch.get_record_data_from_es(record_citer) result_citer_control_number = record_citer_es["control_number"] assert citer_control_number == result_citer_control_number record_cited_es = InspireSearch.get_record_data_from_es(record_cited) result_cited_control_number = record_cited_es["control_number"] assert cited_control_number == result_cited_control_number with inspire_app.test_client() as client: result = client.get( f"/api/literature/{result_cited_control_number}/citations" ).json result_citation_count = result["metadata"]["citation_count"] assert 1 == result_citation_count assert redis.llen("legacy_records") == 0
def test_recalculate_citations(app, celery_app_with_context, celery_session_worker): data_cited = faker.record("lit", with_control_number=True) record_cited = InspireRecord.create(data_cited, disable_citation_update=True) db.session.commit() record_cited_control_number = record_cited["control_number"] data_citing = faker.record( "lit", literature_citations=[record_cited_control_number], with_control_number=True, ) record_citing = InspireRecord.create(data_citing, disable_citation_update=True) db.session.commit() uuids = [record_cited.id, record_citing.id] task = recalculate_citations.delay(uuids) task.get(timeout=5) result_record_cited = RecordCitations.query.filter_by( cited_id=record_cited.id).one() assert record_citing.id == result_record_cited.citer_id record_cited = InspireRecord.get_record_by_pid_value( record_cited_control_number, "lit") record_cited_citation_count = 1 assert record_cited_citation_count == record_cited.citation_count
def test_delete_record_with_files(fsopen_mock, base_app, db, create_record, init_files_db, enable_files): record_metadata = create_record("lit") record = InspireRecord.get_record(record_metadata.id) file_metadata = record._add_file(url="http://figure_url.cern.ch/file.png") record_metadata2 = create_record("lit") record2 = InspireRecord.get_record(record_metadata2.id) assert record.id != record2.id file_metadata2 = record2._add_file( url="http://figure_url.cern.ch/file.png") file1_obj = record.files[file_metadata["key"]].obj file2_obj = record2.files[file_metadata2["key"]].obj assert file1_obj.file_id == file2_obj.file_id record.delete() assert len(record.files.keys) == 0 assert file1_obj.is_head is False file1_versions = file1_obj.get_versions(file1_obj.bucket_id, file1_obj.key) file1_updated = [file for file in file1_versions if file.is_head is True][0] assert file1_updated.is_head is True assert file1_updated.deleted is True assert file1_updated.file_id is None assert file2_obj.is_head is True assert file2_obj.deleted is False assert file2_obj.file_id is not None
def test_get_record_raise_stale_data(inspire_app, clean_celery_session): record = LiteratureRecord.create(data=faker.record("lit")) db.session.commit() non_existing_version = record.model.version_id + 10 with pytest.raises(StaleDataError): InspireRecord.get_record(record.id, record_version=non_existing_version)
def test_update_relations_with_modified_experiments(app, celery_app_with_context, celery_session_worker): experiment_data = faker.record("exp", with_control_number=True) experiment = InspireRecord.create(experiment_data) db.session.commit() experiment_control_number = experiment["control_number"] exp_ref = f"http://localhost:8000/api/experiments/{experiment_control_number}" data = faker.record("lit", with_control_number=True) data["accelerator_experiments"] = [{ "legacy_name": "LIGO", "record": { "$ref": exp_ref } }] record = InspireRecord.create(data, disable_relations_update=True) db.session.commit() task = update_relations.delay([record.id]) task.get(timeout=5) experiment_literature_relation = ExperimentLiterature.query.filter_by( experiment_uuid=experiment.id).one() assert experiment_literature_relation.literature_uuid == record.id
def assert_migrator_task(): record_citer = InspireRecord.get_record_by_pid_value( citer_control_number, "lit") record_citing = InspireRecord.get_record_by_pid_value( citing_control_number, "lit") record_author = InspireRecord.get_record_by_pid_value( author_control_number, "aut") assert record_citing.citation_count == 1 record_citer_es = InspireSearch.get_record_data_from_es(record_citer) result_citer_control_number = record_citer_es["control_number"] assert citer_control_number == result_citer_control_number record_citing_es = InspireSearch.get_record_data_from_es(record_citing) result_citing_control_number = record_citing_es["control_number"] assert citing_control_number == result_citing_control_number record_author_es = InspireSearch.get_record_data_from_es(record_author) result_author_control_number = record_author_es["control_number"] assert author_control_number == result_author_control_number with pytest.raises(PIDDoesNotExistError): InspireRecord.get_record_by_pid_value(invalid_control_number, "lit")
def test_process_references_in_records_reindexes_experiments_when_linked_experiments_change( app, celery_app_with_context, celery_session_worker): # disconnect this signal so records don't get indexed models_committed.disconnect(index_after_commit) experiment_data = faker.record("exp", with_control_number=True) experiment = InspireRecord.create(experiment_data) db.session.commit() experiment_control_number = experiment["control_number"] exp_ref = f"http://localhost:8000/api/experiments/{experiment_control_number}" data = faker.record("lit", with_control_number=True) data["accelerator_experiments"] = [{ "legacy_name": "LIGO", "record": { "$ref": exp_ref } }] record = InspireRecord.create(data) db.session.commit() models_committed.connect(index_after_commit) task = process_references_in_records.delay([record.id]) task.get(timeout=5) experiment_record_es = InspireSearch.get_record_data_from_es(experiment) expected_number_of_paper = 1 assert expected_number_of_paper == experiment_record_es["number_of_papers"]
def test_get_records_pid_from_field(): data = { "references": [ { "record": "http://labs.inspirehep.net/api/literature/98765", "reference": { "misc": ["abcd", "defg"], "label": "qwerty", "record": { "$ref": "http://labs.inspirehep.net/api/literature/339134" }, }, } ], "publication_info": {"year": 1984}, "some_stuff": {"other_stuff": "not_related"}, "different_field": "http://labs.inspirehep.net/api/literature/329134", "other_record": {"$ref": ["http://labs.inspirehep.net/api/literature/319136"]}, } path_1 = "references.reference.record" expected_1 = [("lit", "339134")] path_2 = "some_stuff" expected_2 = [] path_3 = "other_record" expected_3 = [("lit", "319136")] assert InspireRecord._get_linked_pids_from_field(data, path_1) == expected_1 assert InspireRecord._get_linked_pids_from_field(data, path_2) == expected_2 assert InspireRecord._get_linked_pids_from_field(data, path_3) == expected_3
def test_update_relations_with_modified_institutions(inspire_app, celery_app_with_context, celery_session_worker): institution_data = faker.record("ins", with_control_number=True) institution = InspireRecord.create(institution_data) db.session.commit() institution_control_number = institution["control_number"] inst_ref = f"http://localhost:8000/api/institutions/{institution_control_number}" data = faker.record("lit", with_control_number=True) data["authors"] = [{ "full_name": "John Doe", "affiliations": [{ "value": "Institution", "record": { "$ref": inst_ref } }], }] record = InspireRecord.create(data, disable_relations_update=True) db.session.commit() task = update_relations.delay([record.id]) task.get(timeout=5) institution_literature_relation = InstitutionLiterature.query.filter_by( institution_uuid=institution.id).one() assert institution_literature_relation.literature_uuid == record.id
def test_get_modified_references(base_app, db, es_clear): cited_data = faker.record("lit") cited_record_1 = InspireRecord.create(cited_data) citing_data = faker.record( "lit", literature_citations=[cited_record_1["control_number"]] ) citing_record = LiteratureRecord.create(citing_data) db.session.commit() assert citing_record.get_modified_references() == [cited_record_1.id] cited_data_2 = faker.record("lit") cited_record_2 = InspireRecord.create(cited_data_2) citing_data["references"] = [ { "record": { "$ref": f"http://localhost:5000/api/literature/{cited_record_2['control_number']}" } } ] citing_record.update(citing_data) db.session.commit() assert citing_record.get_modified_references() == [cited_record_2.id] citing_record.delete() db.session.commit() assert citing_record.get_modified_references() == [cited_record_2.id]
def test_id_is_not_written_to_record_for_stale_data_push( mock_hal_create, mock_update_record_with_new_ids, inspire_app, get_fixture ): hal_create_receipt = Deposit_Receipt() hal_create_receipt.id = "hal:123456" mock_hal_create.return_value = hal_create_receipt def side_effect(*args, **kwargs): if side_effect.counter == 0: side_effect.counter += 1 raise StaleDataError else: return update_record_with_new_ids(*args, **kwargs) side_effect.counter = 0 mock_update_record_with_new_ids.side_effect = side_effect record_json = orjson.loads(get_fixture("hal_preprod_record.json")) record_data = faker.record("lit", data=record_json) record = InspireRecord.create(record_data) institute_json = orjson.loads(get_fixture("hal_preprod_institute.json")) institute_data = faker.record("ins", data=institute_json) InspireRecord.create(institute_data) _hal_push(record) record = InspireRecord.get_record_by_pid_value(record["control_number"], "lit") assert get_values_for_schema(record["external_system_identifiers"], "HAL") == [ "hal:123456" ]
def test_add_file_already_attached(fsopen_mock, base_app, db, create_record_factory, init_files_db): record_metadata = create_record_factory("lit") record = InspireRecord.get_record(record_metadata.id) expected_filename = "file.png" file_metadata = record._add_file(url="http://figure_url.cern.ch/file.png", key=expected_filename) file_metadata2 = record._add_file(url="http://figure_url.cern.ch/file.png") assert file_metadata["key"] == file_metadata2["key"] assert len(record.files.keys) == 1 record_metadata2 = create_record_factory("lit") record2 = InspireRecord.get_record(record_metadata2.id) assert record.id != record2.id file_metadata3 = record2._add_file( url="http://figure_url.cern.ch/file.png") assert file_metadata["key"] == file_metadata3["key"] file1_obj = record.files[file_metadata["key"]].obj file3_obj = record2.files[file_metadata3["key"]].obj assert file1_obj.bucket_id != file3_obj.bucket_id assert file1_obj.file_id == file3_obj.file_id url = f"/api/files/{record2.files.bucket.id}/{file3_obj.key}" file_metadata4 = record2._add_file(url=url) assert file_metadata4 is not None
def test_push_happy_flow(inspire_app, get_fixture): record_json = orjson.loads(get_fixture("hal_preprod_record.json")) record_data = faker.record("lit", data=record_json) record = InspireRecord.create(record_data) institute_json = orjson.loads(get_fixture("hal_preprod_institute.json")) institute_data = faker.record("ins", data=institute_json) InspireRecord.create(institute_data) # hal create receipt = _hal_push(record) assert receipt assert receipt.parsed hal_id = receipt.id assert hal_id updated_record = InspireRecord.get_record_by_pid_value( record["control_number"], "lit" ) assert ( get_values_for_schema( get_value(updated_record, "external_system_identifiers", []), "HAL" )[0] == hal_id ) # hal update receipt = _hal_push(record) assert receipt assert receipt.parsed
def test_get_conference_record(app, get_fixture): expexted_json = orjson.loads( get_fixture("expected_conference_record.json")) expected_record_data = faker.record("con", data=expexted_json) expected_record = InspireRecord.create(expected_record_data) schema = load_schema("hep") control_number_schema = schema["properties"]["control_number"] publication_info_schema = schema["properties"]["publication_info"] conference_record = {"control_number": 1692403} assert validate(conference_record["control_number"], control_number_schema) is None data = { "publication_info": [{ "conference_record": { "$ref": "http://localhost:5000/api/conferences/972464" } }] } assert validate(data["publication_info"], publication_info_schema) is None record_data = faker.record("lit", data) record = InspireRecord.create(record_data) result = get_conference_record(record) assert expected_record == result record.delete()
def test_process_references_in_records_reindexes_conferences_when_pub_info_changes( inspire_app, celery_app_with_context, celery_session_worker): # disconnect this signal so records don't get indexed models_committed.disconnect(index_after_commit) conference_data = faker.record("con", with_control_number=True) conference_record = InspireRecord.create(conference_data) conference_control_number = conference_record["control_number"] conf_ref = f"http://localhost:8000/api/conferences/{conference_control_number}" data = faker.record("lit", with_control_number=True) data["publication_info"] = [{"conference_record": {"$ref": conf_ref}}] data["document_type"] = ["conference paper"] record = InspireRecord.create(data) db.session.commit() # reconnect signal before we call process_references_in_records models_committed.connect(index_after_commit) uuids = [record.id] task = process_references_in_records.delay(uuids) result = task.get(timeout=5) conference_record_es = InspireSearch.get_record_data_from_es( conference_record) expected_number_of_contributions = 1 assert (expected_number_of_contributions == conference_record_es["number_of_contributions"])
def test_data_citation_count_property(base_app, db, es): data = faker.record("dat") record = InspireRecord.create(data) data2 = faker.record("lit", data_citations=[record["control_number"]]) record2 = InspireRecord.create(data2) assert record.citation_count == 1 assert record2.citation_count == 0
def test_get_linked_records_in_field_empty(inspire_app): expected_result_len = 0 expected_result = [] record = InspireRecord({}) result = record.get_linked_records_from_field("references.record") result = list(result) assert expected_result_len == len(result) assert expected_result == result
def test_get_literature_recids_for_orcid_raises_if_two_authors_are_found( inspire_app, datadir): data = json.loads((datadir / "1061000.json").read_text()) create_record("aut", data=data) record = InspireRecord.get_record_by_pid_value(1061000, pid_type="aut") record["control_number"] = 1061001 with pytest.raises(PIDAlreadyExists): record = InspireRecord.create_or_update(record)
def test_create_records_from_mirror_recids_with_different_types_of_record(inspire_app): raw_record_literature_valid = ( b"<record>" b' <controlfield tag="001">666</controlfield>' b' <datafield tag="245" ind1=" " ind2=" ">' b' <subfield code="a">On the validity of INSPIRE records</subfield>' b" </datafield>" b' <datafield tag="980" ind1=" " ind2=" ">' b' <subfield code="a">HEP</subfield>' b" </datafield>" b"</record>" ) valid_record_literature = LegacyRecordsMirror.from_marcxml( raw_record_literature_valid ) db.session.add(valid_record_literature) raw_record_invalid = ( b"<record>" b' <controlfield tag="001">667</controlfield>' b' <datafield tag="260" ind1=" " ind2=" ">' b' <subfield code="c">Definitely not a date</subfield>' b" </datafield>" b' <datafield tag="980" ind1=" " ind2=" ">' b' <subfield code="a">HEP</subfield>' b" </datafield>" b"</record>" ) invalid_record = LegacyRecordsMirror.from_marcxml(raw_record_invalid) db.session.add(invalid_record) raw_record_author_valid = ( b"<record>" b' <controlfield tag="001">668</controlfield>' b' <datafield tag="100" ind1=" " ind2=" ">' b' <subfield code="a">Jessica Jones</subfield>' b' <subfield code="q">Jones Jessica</subfield>' b" </datafield>" b' <datafield tag="980" ind1=" " ind2=" ">' b' <subfield code="a">HEPNAMES</subfield>' b" </datafield>" b"</record>" ) valid_record_author = LegacyRecordsMirror.from_marcxml(raw_record_author_valid) db.session.add(valid_record_author) task_results = create_records_from_mirror_recids([666, 667, 668]) record_literature = InspireRecord.get_record_by_pid_value(666, "lit") assert str(record_literature.id) in task_results record_author = InspireRecord.get_record_by_pid_value(668, "aut") assert str(record_author.id) in task_results with pytest.raises(PIDDoesNotExistError): InspireRecord.get_record_by_pid_value(667, "lit")
def test_get_linked_records_in_field_empty(base_app, db, es, create_record_factory): expected_result_len = 0 expected_result = [] record = InspireRecord({}) result = record.get_linked_records_from_field("references.record") result = list(result) assert expected_result_len == len(result) assert expected_result == result
def test_get_literature_recids_for_orcid_still_works_if_author_has_no_ids( inspire_app, datadir): data = json.loads((datadir / "1061000.json").read_text()) create_record("aut", data=data) record = InspireRecord.get_record_by_pid_value(1061000, pid_type="aut") del record["ids"] record = InspireRecord.create_or_update(record) with pytest.raises(NoResultFound): get_literature_recids_for_orcid("0000-0003-4792-9178")
def get_parent_record(data): if data.get("doc_type") == "inproceedings": conference_records = InspireRecord.get_linked_records_from_dict_field( data, "publication_info.conference_record") conference_record = next(conference_records, {}) return conference_record book_records = InspireRecord.get_linked_records_from_dict_field( data, "publication_info.parent_record") return next(book_records, {})
def test_create_or_update_record_from_db_depending_on_its_pid_type(base_app, db, es): data = faker.record("job") record = InspireRecord.create_or_update(data) assert type(record) == JobsRecord assert record.pid_type == "job" data_update = {"description": "Updated"} data.update(data_update) record = InspireRecord.create_or_update(data) assert type(record) == JobsRecord assert record.pid_type == "job"
def test_create_or_update_record_from_db_depending_on_its_pid_type(inspire_app): data = faker.record("job") record = InspireRecord.create_or_update(data) assert isinstance(record, JobsRecord) assert record.pid_type == "job" data_update = {"description": "Updated"} data.update(data_update) record = InspireRecord.create_or_update(data) assert isinstance(record, JobsRecord) assert record.pid_type == "job"
def test_create_or_update_record_from_db_depending_on_its_pid_type(inspire_app): data = faker.record("exp", with_control_number=True) record = InspireRecord.create_or_update(data) assert isinstance(record, ExperimentsRecord) assert record.pid_type == "exp" data_update = {"deleted": True} data.update(data_update) record = InspireRecord.create_or_update(data) assert isinstance(record, ExperimentsRecord) assert record.pid_type == "exp"
def test_create_or_update_record_from_db_depending_on_its_pid_type(inspire_app): data = faker.record("dat") record = InspireRecord.create_or_update(data) assert type(record) == DataRecord assert record.pid_type == "dat" data_update = {"deleted": True} data.update(data_update) record = InspireRecord.create_or_update(data) assert type(record) == DataRecord assert record.pid_type == "dat"
def test_create_or_update_record_from_db_depending_on_its_pid_type(inspire_app): data = faker.record("con") record = InspireRecord.create_or_update(data) assert isinstance(record, ConferencesRecord) assert record.pid_type == "con" data_update = {"titles": [{"title": "UPDATED"}]} data.update(data_update) record = InspireRecord.create_or_update(data) assert isinstance(record, ConferencesRecord) assert record.pid_type == "con"
def test_create_or_update_record_from_db_depending_on_its_pid_type(base_app, db, es): data = faker.record("exp") record = InspireRecord.create_or_update(data) assert type(record) == ExperimentsRecord assert record.pid_type == "exp" data_update = {"deleted": True} data.update(data_update) record = InspireRecord.create_or_update(data) assert type(record) == ExperimentsRecord assert record.pid_type == "exp"