def record_with_two_revisions(app): record = { '$schema': 'http://localhost:5000/schemas/records/hep.json', 'control_number': 111, 'document_type': [ 'article', ], 'titles': [ { 'title': 'record rev0' }, ], 'self': { '$ref': 'http://localhost:5000/schemas/records/hep.json', }, '_collections': ['Literature'] } with db.session.begin_nested(): record_insert_or_replace(record) db.session.commit() record['titles'][0]['title'] = 'record rev1' with db.session.begin_nested(): record_insert_or_replace(record) db.session.commit() yield _delete_record('lit', 111)
def test_save_roots(workflow_app): # XXX: for some reason, this must be internal. from inspirehep.modules.migrator.tasks import record_insert_or_replace head = record_insert_or_replace(fake_record('title1', 123)) update = record_insert_or_replace(fake_record('title2', 456)) obj = workflow_object_class.create( data={}, data_type='hep' ) obj.extra_data['head_uuid'] = str(head.id) obj.extra_data['update_uuid'] = str(update.id) obj.save() insert_wf_record_source(json={}, record_uuid=head.id, source='a') insert_wf_record_source(json={}, record_uuid=head.id, source='b') # this will not be saved because there's already an entry with source `a` insert_wf_record_source(json={}, record_uuid=update.id, source='a') insert_wf_record_source(json={}, record_uuid=update.id, source='c') save_roots(obj, None) assert read_wf_record_source(str(head.id), 'a') assert read_wf_record_source(str(head.id), 'b') assert read_wf_record_source(str(head.id), 'c')
def record_with_two_revisions(app): record = { '$schema': 'http://localhost:5000/schemas/records/hep.json', 'control_number': 111, 'document_type': [ 'article', ], 'titles': [ {'title': 'record rev0'}, ], 'self': { '$ref': 'http://localhost:5000/schemas/records/hep.json', }, '_collections': ['Literature'] } with db.session.begin_nested(): record_insert_or_replace(record) db.session.commit() record['titles'][0]['title'] = 'record rev1' with db.session.begin_nested(): record_insert_or_replace(record) db.session.commit() yield _delete_record('lit', 111)
def _create_record(record_json): with db.session.begin_nested(): record_insert_or_replace(record_json) db.session.commit() es.indices.refresh() return record_json
def book_with_another_document_type(app): """Temporarily add another document type to a book record.""" record = get_db_record('lit', 1373790) record['document_type'] = ['book', 'proceedings'] record_insert_or_replace(record) yield record = get_db_record('lit', 1373790) record['document_type'] = ['book'] record_insert_or_replace(record)
def test_manual_merge_existing_records(workflow_app): # XXX: for some reason, this must be internal. from inspirehep.modules.migrator.tasks import record_insert_or_replace json_head = fake_record('This is the HEAD', 1) json_update = fake_record('While this is the update', 2) # this two fields will create a merging conflict json_head['core'] = True json_update['core'] = False head = record_insert_or_replace(json_head) update = record_insert_or_replace(json_update) head_id = head.id update_id = update.id obj_id = start_merger( head_id=1, update_id=2, current_user_id=1, ) do_resolve_manual_merge_wf(workflow_app, obj_id) # retrieve it again, otherwise Detached Instance Error obj = workflow_object_class.get(obj_id) assert obj.status == ObjectStatus.COMPLETED assert obj.extra_data['approved'] is True assert obj.extra_data['auto-approved'] is False # no root present before last_root = read_wf_record_source(head_id, 'arxiv') assert last_root is None root_update = read_wf_record_source(update_id, get_source(update)) assert root_update is None # check that head's content has been replaced by merged deleted_record = RecordMetadata.query.filter_by(id=update_id).one() latest_record = get_db_record('lit', 1) assert deleted_record.json['deleted'] is True # check deleted record is linked in the latest one deleted_rec_ref = {'$ref': 'http://localhost:5000/api/literature/2'} assert [deleted_rec_ref] == latest_record['deleted_records'] # check the merged record is linked in the deleted one new_record_metadata = {'$ref': 'http://localhost:5000/api/literature/1'} assert new_record_metadata == deleted_record.json['new_record'] del latest_record['deleted_records'] assert latest_record == obj.data # -> resulted merged record
def jhep_with_malformed_title(app): """Temporarily add a malformed title to the JHEP record.""" record = get_db_record('jou', 1213103) record['title_variants'].append('+++++') record_insert_or_replace(record) yield record = get_db_record('jou', 1213103) record['title_variants'] = record['title_variants'][:-1] record_insert_or_replace(record)
def test_manual_merge_existing_records(workflow_app): # XXX: for some reason, this must be internal. from inspirehep.modules.migrator.tasks import record_insert_or_replace json_head = fake_record('This is the HEAD', 1) json_update = fake_record('While this is the update', 2) # this two fields will create a merging conflict json_head['core'] = True json_update['core'] = False head = record_insert_or_replace(json_head) update = record_insert_or_replace(json_update) head_id = head.id update_id = update.id obj_id = start_merger( head_id=1, update_id=2, current_user_id=1, ) do_resolve_manual_merge_wf(workflow_app, obj_id) # retrieve it again, otherwise Detached Instance Error obj = workflow_object_class.get(obj_id) assert obj.status == ObjectStatus.COMPLETED assert obj.extra_data['approved'] is True assert obj.extra_data['auto-approved'] is False # no root present before last_root = read_wf_record_source(head_id, 'arxiv') assert last_root is None root_update = read_wf_record_source(update_id, get_source(update)) assert root_update is None # check that head's content has been replaced by merged deleted_record = RecordMetadata.query.filter_by(id=update_id).one() latest_record = get_db_record('lit', 1) assert deleted_record.json['deleted'] is True # check deleted record is linked in the latest one deleted_rec_ref = {'$ref': 'http://localhost:5000/api/literature/2'} assert [deleted_rec_ref] == latest_record['deleted_records'] # check the merged record is linked in the deleted one new_record_metadata = {'$ref': 'http://localhost:5000/api/literature/1'} assert new_record_metadata == deleted_record.json['new_record'] del latest_record['deleted_records'] assert latest_record == obj.data # -> resulted merged record
def jhep_with_malformed_title(app): """Temporarily add a malformed title to the JHEP record.""" record = get_db_record('jou', 1213103) record['title_variants'].append('+++++') record_insert_or_replace(record) yield record = get_db_record('jou', 1213103) record['title_variants'] = record['title_variants'][:-1] record_insert_or_replace(record)
def cern_with_hal_id(app): """Temporarily add the HAL id to the CERN record.""" record = get_db_record('ins', 902725) record['external_system_identifiers'] = [{'schema': 'HAL', 'value': '300037'}] record_insert_or_replace(record) es.indices.refresh('records-institutions') yield record = get_db_record('ins', 902725) del record['external_system_identifiers'] record_insert_or_replace(record) es.indices.refresh('records-institutions')
def test_manual_merge_with_none_record(workflow_app): # XXX: for some reason, this must be internal. from inspirehep.modules.migrator.tasks import record_insert_or_replace json_head = fake_record('This is the HEAD', 1) record_insert_or_replace(json_head) non_existing_id = 123456789 with pytest.raises(RecordGetterError): start_merger( head_id=1, update_id=non_existing_id, current_user_id=1, )
def test_manual_merge_with_none_record(workflow_app): # XXX: for some reason, this must be internal. from inspirehep.modules.migrator.tasks import record_insert_or_replace json_head = fake_record('This is the HEAD', 1) record_insert_or_replace(json_head) non_existing_id = 123456789 with pytest.raises(RecordGetterError): start_merger( head_id=1, update_id=non_existing_id, current_user_id=1, )
def merged_records(app): merged_snippet = ( '<record>' ' <controlfield tag="001">111</controlfield>' ' <datafield tag="245" ind1=" " ind2=" ">' ' <subfield code="a">merged</subfield>' ' </datafield>' ' <datafield tag="981" ind1=" " ind2=" ">' ' <subfield code="a">222</subfield>' ' </datafield>' ' <datafield tag="980" ind1=" " ind2=" ">' ' <subfield code="a">HEP</subfield>' ' </datafield>' '</record>' ) deleted_snippet = ( '<record>' ' <controlfield tag="001">222</controlfield>' ' <datafield tag="245" ind1=" " ind2=" ">' ' <subfield code="a">deleted</subfield>' ' </datafield>' ' <datafield tag="970" ind1=" " ind2=" ">' ' <subfield code="d">111</subfield>' ' </datafield>' ' <datafield tag="980" ind1=" " ind2=" ">' ' <subfield code="a">HEP</subfield>' ' <subfield code="c">DELETED</subfield>' ' </datafield>' '</record>' ) merged_record = marcxml2record(merged_snippet) merged_record['$schema'] = 'http://localhost:5000/schemas/records/hep.json' deleted_record = marcxml2record(deleted_snippet) deleted_record['$schema'] = 'http://localhost:5000/schemas/records/hep.json' with db.session.begin_nested(): merged_uuid = record_insert_or_replace(merged_record).id deleted_uuid = record_insert_or_replace(deleted_record).id db.session.commit() es.indices.refresh('records-hep') yield _delete_merged_records('lit', 111, 222, merged_uuid, deleted_uuid)
def merged_records(app): merged_snippet = ( '<record>' ' <controlfield tag="001">111</controlfield>' ' <datafield tag="245" ind1=" " ind2=" ">' ' <subfield code="a">merged</subfield>' ' </datafield>' ' <datafield tag="981" ind1=" " ind2=" ">' ' <subfield code="a">222</subfield>' ' </datafield>' ' <datafield tag="980" ind1=" " ind2=" ">' ' <subfield code="a">HEP</subfield>' ' </datafield>' '</record>' ) deleted_snippet = ( '<record>' ' <controlfield tag="001">222</controlfield>' ' <datafield tag="245" ind1=" " ind2=" ">' ' <subfield code="a">deleted</subfield>' ' </datafield>' ' <datafield tag="970" ind1=" " ind2=" ">' ' <subfield code="d">111</subfield>' ' </datafield>' ' <datafield tag="980" ind1=" " ind2=" ">' ' <subfield code="a">HEP</subfield>' ' <subfield code="c">DELETED</subfield>' ' </datafield>' '</record>' ) merged_record = marcxml2record(merged_snippet) merged_record['$schema'] = 'http://localhost:5000/schemas/records/hep.json' deleted_record = marcxml2record(deleted_snippet) deleted_record['$schema'] = 'http://localhost:5000/schemas/records/hep.json' with db.session.begin_nested(): merged_uuid = record_insert_or_replace(merged_record).id deleted_uuid = record_insert_or_replace(deleted_record).id db.session.commit() es.indices.refresh('records-hep') yield _delete_merged_records('lit', 111, 222, merged_uuid, deleted_uuid)
def cern_with_hal_id(app): """Temporarily add the HAL id to the CERN record.""" record = get_db_record('ins', 902725) record['external_system_identifiers'] = [{ 'schema': 'HAL', 'value': '300037' }] record_insert_or_replace(record) es.indices.refresh('records-institutions') yield record = get_db_record('ins', 902725) del record['external_system_identifiers'] record_insert_or_replace(record) es.indices.refresh('records-institutions')
def not_yet_merged_records(app): merged_record = { '$schema': 'http://localhost:5000/schemas/records/hep.json', 'control_number': 111, 'document_type': [ 'article', ], 'titles': [ { 'title': 'merged' }, ], 'self': { '$ref': 'http://localhost:5000/api/literature/111', }, '_collections': ['Literature'], } deleted_record = { '$schema': 'http://localhost:5000/schemas/records/hep.json', 'control_number': 222, 'document_type': [ 'article', ], 'titles': [ { 'title': 'deleted' }, ], 'self': { '$ref': 'http://localhost:5000/api/literature/222', }, '_collections': ['Literature'], } with db.session.begin_nested(): merged_uuid = record_insert_or_replace(merged_record).id deleted_uuid = record_insert_or_replace(deleted_record).id db.session.commit() yield _delete_merged_records('lit', 111, 222, merged_uuid, deleted_uuid)
def insert_journals_in_db(workflow_app): """Temporarily add few journals in the DB""" from inspirehep.modules.migrator.tasks import record_insert_or_replace # imported here because it is a Celery task journal_no_pro_and_ref = json.loads(pkg_resources.resource_string( __name__, os.path.join('fixtures', 'jou_record_refereed.json'))) journal_pro_and_ref = json.loads(pkg_resources.resource_string( __name__, os.path.join('fixtures', 'jou_record_refereed_and_proceedings.json'))) with db.session.begin_nested(): record_insert_or_replace(journal_no_pro_and_ref) record_insert_or_replace(journal_pro_and_ref) db.session.commit() es.indices.refresh('records-journals') yield _delete_record('jou', 1936475) _delete_record('jou', 1936476) es.indices.refresh('records-journals')
def deleted_record(app): snippet = ('<record>' ' <controlfield tag="001">111</controlfield>' ' <datafield tag="245" ind1=" " ind2=" ">' ' <subfield code="a">deleted</subfield>' ' </datafield>' ' <datafield tag="980" ind1=" " ind2=" ">' ' <subfield code="c">DELETED</subfield>' ' </datafield>' '</record>') record = hep.do(create_record(snippet)) record['$schema'] = 'http://localhost:5000/schemas/records/hep.json' with db.session.begin_nested(): record_insert_or_replace(record) db.session.commit() yield _delete_record('lit', 111)
def not_yet_merged_records(app): merged_record = { '$schema': 'http://localhost:5000/schemas/records/hep.json', 'control_number': 111, 'document_type': [ 'article', ], 'titles': [ {'title': 'merged'}, ], 'self': { '$ref': 'http://localhost:5000/api/literature/111', }, '_collections': ['Literature'], } deleted_record = { '$schema': 'http://localhost:5000/schemas/records/hep.json', 'control_number': 222, 'document_type': [ 'article', ], 'titles': [ {'title': 'deleted'}, ], 'self': { '$ref': 'http://localhost:5000/api/literature/222', }, '_collections': ['Literature'], } with db.session.begin_nested(): merged_uuid = record_insert_or_replace(merged_record).id deleted_uuid = record_insert_or_replace(deleted_record).id db.session.commit() yield _delete_merged_records('lit', 111, 222, merged_uuid, deleted_uuid)
def test_get_head_source_return_arxiv_when_one_arxive_source_present(app, simple_record): # XXX: for some reason, this must be internal. from inspirehep.modules.migrator.tasks import record_insert_or_replace rec = record_insert_or_replace(simple_record) uuid = rec.id # two sources for the same record insert_wf_record_source(json=simple_record, record_uuid=uuid, source='ejl') assert get_head_source(uuid) == 'publisher' insert_wf_record_source(json=simple_record, record_uuid=uuid, source='arxiv') assert get_head_source(uuid) == 'arxiv'
def test_save_roots(workflow_app): # XXX: for some reason, this must be internal. from inspirehep.modules.migrator.tasks import record_insert_or_replace head = record_insert_or_replace(fake_record('title1', 123)) update = record_insert_or_replace(fake_record('title2', 456)) obj = workflow_object_class.create(data={}, data_type='hep') obj.extra_data['head_uuid'] = str(head.id) obj.extra_data['update_uuid'] = str(update.id) obj.save() insert_wf_record_source(json={}, record_uuid=head.id, source='a') insert_wf_record_source(json={}, record_uuid=head.id, source='b') # this will not be saved because there's already an entry with source `a` insert_wf_record_source(json={}, record_uuid=update.id, source='a') insert_wf_record_source(json={}, record_uuid=update.id, source='c') save_roots(obj, None) assert read_wf_record_source(str(head.id), 'a') assert read_wf_record_source(str(head.id), 'b') assert read_wf_record_source(str(head.id), 'c')
def deleted_record(app): snippet = ( '<record>' ' <controlfield tag="001">111</controlfield>' ' <datafield tag="245" ind1=" " ind2=" ">' ' <subfield code="a">deleted</subfield>' ' </datafield>' ' <datafield tag="980" ind1=" " ind2=" ">' ' <subfield code="a">HEP</subfield>' ' <subfield code="c">DELETED</subfield>' ' </datafield>' '</record>' ) record = marcxml2record(snippet) record['$schema'] = 'http://localhost:5000/schemas/records/hep.json' with db.session.begin_nested(): record_insert_or_replace(record) db.session.commit() yield _delete_record('lit', 111)
def not_yet_deleted_record(app): record = { '$schema': 'http://localhost:5000/schemas/records/hep.json', 'control_number': 111, 'document_type': [ 'article', ], 'titles': [ { 'title': 'deleted' }, ], 'self': { '$ref': 'http://localhost:5000/schemas/records/hep.json', } } with db.session.begin_nested(): record_insert_or_replace(record) db.session.commit() yield _delete_record('lit', 111)
def insert_journals_in_db(workflow_app): """Temporarily add few journals in the DB""" from inspirehep.modules.migrator.tasks import record_insert_or_replace # imported here because it is a Celery task journal_full_1 = json.loads(pkg_resources.resource_string( __name__, os.path.join('fixtures', 'jou_record_fully_covered_1.json'))) journal_partial_1 = json.loads(pkg_resources.resource_string( __name__, os.path.join('fixtures', 'jou_record_partially_covered_1.json'))) journal_partial_2 = json.loads(pkg_resources.resource_string( __name__, os.path.join('fixtures', 'jou_record_partially_covered_2.json'))) journal_no_pro_and_ref = json.loads(pkg_resources.resource_string( __name__, os.path.join('fixtures', 'jou_record_refereed.json'))) journal_pro_and_ref = json.loads(pkg_resources.resource_string( __name__, os.path.join('fixtures', 'jou_record_refereed_and_proceedings.json'))) with db.session.begin_nested(): record_insert_or_replace(journal_full_1) record_insert_or_replace(journal_partial_1) record_insert_or_replace(journal_partial_2) record_insert_or_replace(journal_no_pro_and_ref) record_insert_or_replace(journal_pro_and_ref) db.session.commit() es.indices.refresh('records-journals') yield _delete_record('jou', 1936475) _delete_record('jou', 1936476) _delete_record('jou', 1936480) _delete_record('jou', 1936481) _delete_record('jou', 1936482) es.indices.refresh('records-journals')
def test_get_head_source_return_arxiv_when_one_arxive_source_present( app, simple_record): # XXX: for some reason, this must be internal. from inspirehep.modules.migrator.tasks import record_insert_or_replace rec = record_insert_or_replace(simple_record) uuid = rec.id # two sources for the same record insert_wf_record_source(json=simple_record, record_uuid=uuid, source='ejl') assert get_head_source(uuid) == 'publisher' insert_wf_record_source(json=simple_record, record_uuid=uuid, source='arxiv') assert get_head_source(uuid) == 'arxiv'
def records_to_be_merged(app): merged_record = { '$schema': 'http://localhost:5000/schemas/records/hep.json', 'control_number': 111, 'document_type': [ 'article', ], 'titles': [ {'title': 'merged'}, ], 'self': { '$ref': 'http://localhost:5000/api/literature/111', }, '_collections': ['Literature'], } deleted_record = { '$schema': 'http://localhost:5000/schemas/records/hep.json', 'control_number': 222, 'document_type': [ 'article', ], 'titles': [ {'title': 'deleted'}, ], 'self': { '$ref': 'http://localhost:5000/api/literature/222', }, '_collections': ['Literature'], } pointing_record = { '$schema': 'http://localhost:5000/schemas/records/hep.json', 'accelerator_experiments': [ { 'record': { '$ref': 'http://localhost:5000/api/literature/222', }, }, ], 'control_number': 333, 'document_type': [ 'article', ], 'titles': [ {'title': 'pointing'}, ], 'self': { '$ref': 'http://localhost:5000/api/literature/333', }, '_collections': ['Literature'], } with db.session.begin_nested(): merged_uuid = record_insert_or_replace(merged_record).id deleted_uuid = record_insert_or_replace(deleted_record).id record_insert_or_replace(pointing_record) db.session.commit() es.indices.refresh('records-hep') yield _delete_merged_records('lit', 111, 222, merged_uuid, deleted_uuid) _delete_record('lit', 333)
def records_to_be_merged(app): merged_record = { '$schema': 'http://localhost:5000/schemas/records/hep.json', 'control_number': 111, 'document_type': [ 'article', ], 'titles': [ { 'title': 'merged' }, ], 'self': { '$ref': 'http://localhost:5000/api/literature/111', }, } deleted_record = { '$schema': 'http://localhost:5000/schemas/records/hep.json', 'control_number': 222, 'document_type': [ 'article', ], 'titles': [ { 'title': 'deleted' }, ], 'self': { '$ref': 'http://localhost:5000/api/literature/222', }, } pointing_record = { '$schema': 'http://localhost:5000/schemas/records/hep.json', 'accelerator_experiments': [ { 'record': { '$ref': 'http://localhost:5000/api/literature/222', }, }, ], 'control_number': 333, 'document_type': [ 'article', ], 'titles': [ { 'title': 'pointing' }, ], 'self': { '$ref': 'http://localhost:5000/api/literature/333', }, } with db.session.begin_nested(): merged_uuid = record_insert_or_replace(merged_record).id deleted_uuid = record_insert_or_replace(deleted_record).id record_insert_or_replace(pointing_record) db.session.commit() es.indices.refresh('records-hep') yield _delete_merged_records('lit', 111, 222, merged_uuid, deleted_uuid) _delete_record('lit', 333)