def test_save_roots(workflow_app): head = InspireRecord.create_or_update(fake_record('title1', 123), skip_files=False) head.commit() update = InspireRecord.create_or_update(fake_record('title2', 456), skip_files=False) update.commit() obj = workflow_object_class.create( data={}, data_type='hep' ) obj.extra_data['head_uuid'] = str(head.id) obj.extra_data['update_uuid'] = str(update.id) obj.save() # Union: keep the most recently created/updated root from each source. insert_wf_record_source(json={'version': 'original'}, record_uuid=head.id, source='arxiv') insert_wf_record_source(json={'version': 'updated'}, record_uuid=update.id, source='arxiv') insert_wf_record_source(json={'version': 'updated'}, record_uuid=update.id, source='publisher') save_roots(obj, None) arxiv_rec = read_wf_record_source(head.id, 'arxiv') assert arxiv_rec.json == {'version': 'updated'} pub_rec = read_wf_record_source(head.id, 'publisher') assert pub_rec.json == {'version': 'updated'} assert not read_wf_record_source(update.id, 'arxiv') assert not read_wf_record_source(update.id, 'publisher')
def test_save_roots(workflow_app): # XXX: for some reason, this must be internal. from inspirehep.modules.migrator.tasks import record_insert_or_replace head = record_insert_or_replace(fake_record('title1', 123)) update = record_insert_or_replace(fake_record('title2', 456)) obj = workflow_object_class.create( data={}, data_type='hep' ) obj.extra_data['head_uuid'] = str(head.id) obj.extra_data['update_uuid'] = str(update.id) obj.save() insert_wf_record_source(json={}, record_uuid=head.id, source='a') insert_wf_record_source(json={}, record_uuid=head.id, source='b') # this will not be saved because there's already an entry with source `a` insert_wf_record_source(json={}, record_uuid=update.id, source='a') insert_wf_record_source(json={}, record_uuid=update.id, source='c') save_roots(obj, None) assert read_wf_record_source(str(head.id), 'a') assert read_wf_record_source(str(head.id), 'b') assert read_wf_record_source(str(head.id), 'c')
def test_save_roots(workflow_app): head = InspireRecord.create_or_update(fake_record('title1', 123), skip_files=False) head.commit() update = InspireRecord.create_or_update(fake_record('title2', 456), skip_files=False) update.commit() obj = workflow_object_class.create(data={}, data_type='hep') obj.extra_data['head_uuid'] = str(head.id) obj.extra_data['update_uuid'] = str(update.id) obj.save() insert_wf_record_source(json={}, record_uuid=head.id, source='a') insert_wf_record_source(json={}, record_uuid=head.id, source='b') # this will not be saved because there's already an entry with source `a` insert_wf_record_source(json={}, record_uuid=update.id, source='a') insert_wf_record_source(json={}, record_uuid=update.id, source='c') save_roots(obj, None) assert read_wf_record_source(str(head.id), 'a') assert read_wf_record_source(str(head.id), 'b') assert read_wf_record_source(str(head.id), 'c')
def test_manual_merge_existing_records(workflow_app): json_head = fake_record('This is the HEAD', 1) json_update = fake_record('While this is the update', 2) # this two fields will create a merging conflict json_head['core'] = True json_update['core'] = False head = InspireRecord.create_or_update(json_head, skip_files=False) head.commit() update = InspireRecord.create_or_update(json_update, skip_files=False) update.commit() head_id = head.id update_id = update.id obj_id = start_merger( head_id=1, update_id=2, current_user_id=1, ) do_resolve_manual_merge_wf(workflow_app, obj_id) # retrieve it again, otherwise Detached Instance Error obj = workflow_object_class.get(obj_id) assert obj.status == ObjectStatus.COMPLETED assert obj.extra_data['approved'] is True assert obj.extra_data['auto-approved'] is False # no root present before last_root = read_wf_record_source(head_id, 'arxiv') assert last_root is None update_source = LiteratureReader(update).source root_update = read_wf_record_source(update_id, update_source) assert root_update is None # check that head's content has been replaced by merged deleted_record = RecordMetadata.query.filter_by(id=update_id).one() latest_record = get_db_record('lit', 1) assert deleted_record.json['deleted'] is True # check deleted record is linked in the latest one deleted_rec_ref = {'$ref': 'http://localhost:5000/api/literature/2'} assert [deleted_rec_ref] == latest_record['deleted_records'] # check the merged record is linked in the deleted one new_record_metadata = {'$ref': 'http://localhost:5000/api/literature/1'} assert new_record_metadata == deleted_record.json['new_record'] del latest_record['deleted_records'] assert latest_record == obj.data # -> resulted merged record
def test_manual_merge_existing_records(workflow_app): json_head = fake_record('This is the HEAD', 1) json_update = fake_record('While this is the update', 2) # this two fields will create a merging conflict json_head['core'] = True json_update['core'] = False head = InspireRecord.create_or_update(json_head, skip_files=False) head.commit() update = InspireRecord.create_or_update(json_update, skip_files=False) update.commit() head_id = head.id update_id = update.id obj_id = start_merger( head_id=1, update_id=2, current_user_id=1, ) do_resolve_manual_merge_wf(workflow_app, obj_id) # retrieve it again, otherwise Detached Instance Error obj = workflow_object_class.get(obj_id) assert obj.status == ObjectStatus.COMPLETED assert obj.extra_data['approved'] is True assert obj.extra_data['auto-approved'] is False # no root present before last_root = read_wf_record_source(head_id, 'arxiv') assert last_root is None update_source = LiteratureReader(update).source root_update = read_wf_record_source(update_id, update_source) assert root_update is None # check that head's content has been replaced by merged deleted_record = RecordMetadata.query.filter_by(id=update_id).one() latest_record = get_db_record('lit', 1) assert deleted_record.json['deleted'] is True # check deleted record is linked in the latest one deleted_rec_ref = {'$ref': 'http://localhost:5000/api/literature/2'} assert [deleted_rec_ref] == latest_record['deleted_records'] # check the merged record is linked in the deleted one new_record_metadata = {'$ref': 'http://localhost:5000/api/literature/1'} assert new_record_metadata == deleted_record.json['new_record'] del latest_record['deleted_records'] assert latest_record == obj.data # -> resulted merged record
def test_merge_with_conflicts_callback_url( mocked_api_request_magpie, mocked_beard_api, workflow_app, mocked_external_services, disable_file_upload, enable_merge_on_update, ): with patch( 'inspire_json_merger.config.ArxivOnArxivOperations.conflict_filters', ['acquisition_source.source']): factory = TestRecordMetadata.create_from_file( __name__, 'merge_record_arxiv.json', index_name='records-hep') update_workflow_id = build_workflow(RECORD_WITH_CONFLICTS).id eng_uuid = start('article', object_id=update_workflow_id) eng = WorkflowEngine.from_uuid(eng_uuid) obj = eng.objects[0] conflicts = obj.extra_data.get('conflicts') expected_url = 'http://localhost:5000/callback/workflows/resolve_merge_conflicts' assert obj.status == ObjectStatus.HALTED assert expected_url == obj.extra_data.get('callback_url') assert len(conflicts) == 1 assert obj.extra_data.get('is-update') is True assert obj.extra_data['merger_root'] == RECORD_WITH_CONFLICTS payload = { 'id': obj.id, 'metadata': obj.data, '_extra_data': obj.extra_data } with workflow_app.test_client() as client: response = client.put( obj.extra_data.get('callback_url'), data=json.dumps(payload), content_type='application/json', ) data = json.loads(response.get_data()) expected_message = 'Workflow {} has been saved with conflicts.'.format( obj.id) assert response.status_code == 200 assert expected_message == data['message'] eng = WorkflowEngine.from_uuid(eng_uuid) obj = eng.objects[0] assert obj.status == ObjectStatus.HALTED updated_root = read_wf_record_source(factory.record_metadata.id, 'arxiv') assert updated_root is None
def test_store_root_new_record(workflow_app): config = { 'FEATURE_FLAG_ENABLE_MERGER': True } eng = MagicMock(workflow_definition=MagicMock(data_type='hep')) with patch.dict(current_app.config, config): head = TestRecordMetadata.create_from_kwargs(index=False, has_pid=False) head_uuid = head.record_metadata.id record = head.record_metadata.json obj = workflow_object_class.create(record) root = { 'version': 'original', 'acquisition_source': {'source': 'arXiv'} } extra_data = { 'head_uuid': str(head_uuid), 'merger_root': root, } obj.extra_data = extra_data store_root(obj, eng) root_entry = read_wf_record_source(head_uuid, 'arxiv') assert root_entry.json == root
def test_merge_without_conflicts_rootful( mocked_api_request_magpie, mocked_beard_api, workflow_app, mocked_external_services, disable_file_upload, enable_merge_on_update, ): with patch('inspire_json_merger.config.ArxivOnArxivOperations.conflict_filters', ['acquisition_source.source']): factory = TestRecordMetadata.create_from_file( __name__, 'merge_record_arxiv.json', index_name='records-hep') update_workflow_id = build_workflow(RECORD_WITH_CONFLICTS).id insert_wf_record_source(json=ARXIV_ROOT, record_uuid=factory.record_metadata.id, source='arxiv') eng_uuid = start('article', object_id=update_workflow_id) eng = WorkflowEngine.from_uuid(eng_uuid) obj = eng.objects[0] conflicts = obj.extra_data.get('conflicts') assert obj.status == ObjectStatus.COMPLETED assert not conflicts assert obj.extra_data.get('callback_url') is None assert obj.extra_data.get('is-update') is True assert obj.extra_data['merger_head_revision'] == 0 assert obj.extra_data['merger_original_root'] == ARXIV_ROOT updated_root = read_wf_record_source(factory.record_metadata.id, 'arxiv') assert updated_root.json == RECORD_WITH_CONFLICTS
def test_merge_with_disabled_merge_on_update_feature_flag( mocked_api_request_magpie, mocked_beard_api, workflow_app, mocked_external_services, disable_file_upload, ): with patch.dict(workflow_app.config, {'FEATURE_FLAG_ENABLE_MERGER': False}): factory = TestRecordMetadata.create_from_file( __name__, 'merge_record_arxiv.json', index_name='records-hep') update_workflow_id = build_workflow(RECORD_WITHOUT_CONFLICTS).id eng_uuid = start('article', object_id=update_workflow_id) eng = WorkflowEngine.from_uuid(eng_uuid) obj = eng.objects[0] assert obj.status == ObjectStatus.COMPLETED assert obj.extra_data.get('callback_url') is None assert obj.extra_data.get('conflicts') is None assert obj.extra_data.get('merged') is True assert obj.extra_data.get('merger_root') is None assert obj.extra_data.get('is-update') is True updated_root = read_wf_record_source(factory.record_metadata.id, 'arxiv') assert updated_root is None
def test_store_root_new_record(workflow_app): config = {'FEATURE_FLAG_ENABLE_MERGER': True} eng = MagicMock(workflow_definition=MagicMock(data_type='hep')) with patch.dict(current_app.config, config): head = TestRecordMetadata.create_from_kwargs(index=False, has_pid=False) head_uuid = head.record_metadata.id record = head.record_metadata.json obj = workflow_object_class.create(record) root = { 'version': 'original', 'acquisition_source': { 'source': 'arXiv' } } extra_data = { 'head_uuid': str(head_uuid), 'merger_root': root, } obj.extra_data = extra_data store_root(obj, eng) root_entry = read_wf_record_source(head_uuid, 'arxiv') assert root_entry.json == root
def test_merge_callback_url_with_malformed_workflow( mocked_api_request_magpie, mocked_beard_api, workflow_app, mocked_external_services, disable_file_upload, enable_merge_on_update, ): with patch('inspire_json_merger.config.ArxivOnArxivOperations.conflict_filters', ['acquisition_source.source']): factory = TestRecordMetadata.create_from_file( __name__, 'merge_record_arxiv.json', index_name='records-hep') update_workflow_id = build_workflow(RECORD_WITH_CONFLICTS).id eng_uuid = start('article', object_id=update_workflow_id) eng = WorkflowEngine.from_uuid(eng_uuid) obj = eng.objects[0] conflicts = obj.extra_data.get('conflicts') expected_url = 'http://localhost:5000/callback/workflows/resolve_merge_conflicts' assert obj.status == ObjectStatus.HALTED assert expected_url == obj.extra_data.get('callback_url') assert len(conflicts) == 1 assert obj.extra_data.get('is-update') is True assert obj.extra_data['merger_root'] == RECORD_WITH_CONFLICTS payload = { 'id': obj.id, 'metadata': 'Jessica Jones', '_extra_data': 'Frank Castle' } with workflow_app.test_client() as client: response = client.put( obj.extra_data.get('callback_url'), data=json.dumps(payload), content_type='application/json', ) data = json.loads(response.get_data()) expected_message = 'The workflow request is malformed.' assert response.status_code == 400 assert expected_message == data['message'] eng = WorkflowEngine.from_uuid(eng_uuid) obj = eng.objects[0] assert obj.status == ObjectStatus.HALTED assert obj.extra_data.get('callback_url') is not None assert obj.extra_data.get('conflicts') is not None assert obj.extra_data['merger_root'] is not None updated_root = read_wf_record_source(factory.record_metadata.id, 'arxiv') assert updated_root is None
def test_wf_record_source_read_and_write(dummy_record): insert_wf_record_source(json=dummy_record, record_uuid=dummy_record.id, source='arXiv') db.session.commit() retrieved_root = read_wf_record_source(record_uuid=dummy_record.id, source='arXiv') assert dummy_record == retrieved_root.json
def test_wf_record_with_submitter_source_read_and_write(dummy_record): insert_wf_record_source(json=dummy_record, record_uuid=dummy_record.id, source='submitter') db.session.commit() retrieved_root = read_wf_record_source(record_uuid=dummy_record.id, source='submitter') assert dummy_record == retrieved_root.json assert 'submitter' == retrieved_root.source
def test_wf_record_source_read_and_write(dummy_record): insert_wf_record_source( json=dummy_record, record_uuid=dummy_record.id, source='arXiv' ) db.session.commit() retrieved_root = read_wf_record_source(record_uuid=dummy_record.id, source='arXiv') assert dummy_record == retrieved_root.json
def test_manual_merge_existing_records(mock_put_record_to_hep, mock_store_records, workflow_app): json_head = fake_record('This is the HEAD', 1) json_update = fake_record('While this is the update', 2) # this two fields will create a merging conflict json_head['core'] = True json_update['core'] = False head = InspireRecord.create_or_update(json_head, skip_files=False) head.commit() update = InspireRecord.create_or_update(json_update, skip_files=False) update.commit() head_id = head.id update_id = update.id obj_id = start_merger( head_id=1, update_id=2, current_user_id=1, ) do_resolve_manual_merge_wf(workflow_app, obj_id) mock_put_record_to_hep.assert_called() # retrieve it again, otherwise Detached Instance Error obj = workflow_object_class.get(obj_id) assert obj.status == ObjectStatus.COMPLETED assert obj.extra_data['approved'] is True assert obj.extra_data['auto-approved'] is False # no root present before last_root = read_wf_record_source(head_id, 'arxiv') assert last_root is None update_source = LiteratureReader(update).source root_update = read_wf_record_source(update_id, update_source) assert root_update is None
def test_merge_without_conflicts_callback_url( mocked_api_request_magpie, mocked_beard_api, workflow_app, mocked_external_services, disable_file_upload, enable_merge_on_update, ): with patch( 'inspire_json_merger.config.ArxivOnArxivOperations.conflict_filters', ['acquisition_source.source']): factory = TestRecordMetadata.create_from_file( __name__, 'merge_record_arxiv.json', index_name='records-hep') record_update = RECORD_WITHOUT_CONFLICTS record_update.update({ 'arxiv_eprints': factory.record_metadata.json.get('arxiv_eprints') }) eng_uuid = start('article', [record_update]) eng = WorkflowEngine.from_uuid(eng_uuid) obj = eng.objects[0] conflicts = obj.extra_data.get('conflicts') url = 'http://localhost:5000/callback/workflows/resolve_merge_conflicts' assert obj.status == ObjectStatus.COMPLETED assert conflicts is None assert obj.extra_data.get('is-update') is True updated_root = read_wf_record_source(factory.record_metadata.id, 'arxiv') assert updated_root.json == record_update payload = { 'id': obj.id, 'metadata': obj.data, '_extra_data': obj.extra_data } with workflow_app.test_client() as client: response = client.put( url, data=json.dumps(payload), content_type='application/json', ) assert response.status_code == 400
def test_save_roots(workflow_app): head = InspireRecord.create_or_update(fake_record('title1', 123), skip_files=False) head.commit() update = InspireRecord.create_or_update(fake_record('title2', 456), skip_files=False) update.commit() obj = workflow_object_class.create(data={}, data_type='hep') obj.extra_data['head_uuid'] = str(head.id) obj.extra_data['update_uuid'] = str(update.id) obj.save() # Union: keep the most recently created/updated root from each source. insert_wf_record_source(json={'version': 'original'}, record_uuid=head.id, source='arxiv') insert_wf_record_source(json={'version': 'updated'}, record_uuid=update.id, source='arxiv') insert_wf_record_source(json={'version': 'updated'}, record_uuid=update.id, source='publisher') save_roots(obj, None) arxiv_rec = read_wf_record_source(head.id, 'arxiv') assert arxiv_rec.json == {'version': 'updated'} pub_rec = read_wf_record_source(head.id, 'publisher') assert pub_rec.json == {'version': 'updated'} assert not read_wf_record_source(update.id, 'arxiv') assert not read_wf_record_source(update.id, 'publisher')
def merge_articles(obj, eng): """Merge two articles. The workflow payload is overwritten by the merged record, the conflicts are stored in ``extra_data.conflicts``. Also, it adds a ``callback_url`` which contains the endpoint which resolves the merge conflicts. Note: When the feature flag ``FEATURE_FLAG_ENABLE_MERGER`` is ``False`` it will skip the merge. """ if not current_app.config.get('FEATURE_FLAG_ENABLE_MERGER'): return None matched_control_number = obj.extra_data['matches']['approved'] head_uuid = PersistentIdentifier.get('lit', matched_control_number).object_uuid head_record = InspireRecord.get_record(head_uuid) update = obj.data update_source = LiteratureReader(obj.data).source head_root = read_wf_record_source(record_uuid=head_record.id, source=update_source.lower()) head_root = head_root.json if head_root else {} obj.extra_data['head_uuid'] = str(head_uuid) obj.extra_data['head_version_id'] = head_record.model.version_id obj.extra_data['merger_head_revision'] = head_record.revision_id obj.extra_data['merger_original_root'] = deepcopy(head_root) merged, conflicts = merge( head=head_record.to_dict(), root=head_root, update=update, ) obj.data = merged if conflicts: obj.extra_data['conflicts'] = conflicts obj.extra_data['conflicts_metadata'] = { 'datetime': datetime.now().strftime("%b %d, %Y, %H:%M:%S %p"), 'update_source': update_source, } obj.extra_data['callback_url'] = \ get_resolve_merge_conflicts_callback_url() obj.save()
def test_save_roots(workflow_app): # XXX: for some reason, this must be internal. from inspirehep.modules.migrator.tasks import record_insert_or_replace head = record_insert_or_replace(fake_record('title1', 123)) update = record_insert_or_replace(fake_record('title2', 456)) obj = workflow_object_class.create(data={}, data_type='hep') obj.extra_data['head_uuid'] = str(head.id) obj.extra_data['update_uuid'] = str(update.id) obj.save() insert_wf_record_source(json={}, record_uuid=head.id, source='a') insert_wf_record_source(json={}, record_uuid=head.id, source='b') # this will not be saved because there's already an entry with source `a` insert_wf_record_source(json={}, record_uuid=update.id, source='a') insert_wf_record_source(json={}, record_uuid=update.id, source='c') save_roots(obj, None) assert read_wf_record_source(str(head.id), 'a') assert read_wf_record_source(str(head.id), 'b') assert read_wf_record_source(str(head.id), 'c')
def test_test_wf_record_source_update(dummy_record): insert_wf_record_source(json=dummy_record, record_uuid=dummy_record.id, source='arxiv') db.session.commit() # update the content dummy_record['document_type'] = ['article'] insert_wf_record_source(json=dummy_record, record_uuid=dummy_record.id, source='arxiv') db.session.commit() retrieved_root = read_wf_record_source(record_uuid=dummy_record.id, source='arxiv') assert dummy_record == retrieved_root.json
def test_merge_without_conflicts_callback_url( mocked_api_request_magpie, mocked_beard_api, workflow_app, mocked_external_services, disable_file_upload, enable_merge_on_update, ): with patch('inspire_json_merger.config.ArxivOnArxivOperations.conflict_filters', ['acquisition_source.source']): factory = TestRecordMetadata.create_from_file( __name__, 'merge_record_arxiv.json', index_name='records-hep') update_workflow_id = build_workflow(RECORD_WITHOUT_CONFLICTS).id eng_uuid = start('article', object_id=update_workflow_id) eng = WorkflowEngine.from_uuid(eng_uuid) obj = eng.objects[0] conflicts = obj.extra_data.get('conflicts') url = 'http://localhost:5000/callback/workflows/resolve_merge_conflicts' assert obj.status == ObjectStatus.COMPLETED assert conflicts is None assert obj.extra_data.get('is-update') is True updated_root = read_wf_record_source(factory.record_metadata.id, 'arxiv') assert updated_root.json == RECORD_WITHOUT_CONFLICTS payload = { 'id': obj.id, 'metadata': obj.data, '_extra_data': obj.extra_data } with workflow_app.test_client() as client: response = client.put( url, data=json.dumps(payload), content_type='application/json', ) assert response.status_code == 400
def test_test_wf_record_source_update(dummy_record): insert_wf_record_source( json=dummy_record, record_uuid=dummy_record.id, source='arXiv' ) db.session.commit() # update the content dummy_record['document_type'] = ['article'] insert_wf_record_source( json=dummy_record, record_uuid=dummy_record.id, source='arXiv' ) db.session.commit() retrieved_root = read_wf_record_source(record_uuid=dummy_record.id, source='arXiv') assert dummy_record == retrieved_root.json
def merge_articles(obj, eng): """Merge two articles. The workflow payload is overwritten by the merged record, the conflicts are stored in ``extra_data.conflicts``. Also, it adds a ``callback_url`` which contains the endpoint which resolves the merge conflicts. Note: When the feature flag ``FEATURE_FLAG_ENABLE_MERGER`` is ``False`` it will skip the merge. """ if not current_app.config.get('FEATURE_FLAG_ENABLE_MERGER'): return None matched_control_number = obj.extra_data['matches']['approved'] head_uuid = PersistentIdentifier.get('lit', matched_control_number).object_uuid obj.extra_data['head_uuid'] = str(head_uuid) head = InspireRecord.get_record(head_uuid) update = obj.data update_source = get_source(update).lower() head_root = read_wf_record_source(record_uuid=head.id, source=update_source) head_root = head_root.json if head_root else {} merged, conflicts = merge( head=head.dumps(), root=head_root, update=update, ) obj.data = merged if conflicts: obj.extra_data['conflicts'] = conflicts obj.extra_data['callback_url'] = \ get_resolve_merge_conflicts_callback_url() obj.save()
def test_empty_root(dummy_record): record_uuid = dummy_record.id retrieved_root = read_wf_record_source(record_uuid=record_uuid, source='Elsevier') assert retrieved_root is None
def test_wf_record_source_does_not_match_db_content(dummy_record): dummy_record.commit() db.session.commit() # write in the db retrieved_root = read_wf_record_source(record_uuid=dummy_record.id, source='Elsevier') assert retrieved_root is None
def test_wf_record_source_does_not_match_db_content(dummy_record): dummy_record.commit() db.session.commit() # write in the db retrieved_root = read_wf_record_source(record_uuid=dummy_record.id, source='publisher') assert retrieved_root is None
def test_empty_root(dummy_record): record_uuid = dummy_record.id retrieved_root = read_wf_record_source(record_uuid=record_uuid, source='publisher') assert retrieved_root is None