def test_match_in_holdingpen_previously_rejected_wf_stop( mocked_download_arxiv, mocked_api_request_beard, mocked_api_request_magpie, mocked_package_download, workflow_app, mocked_external_services, ): record = generate_record() eng_uuid = start('article', [record]) eng = WorkflowEngine.from_uuid(eng_uuid) obj_id = eng.objects[0].id obj = workflow_object_class.get(obj_id) obj.extra_data["approved"] = False # reject record obj.continue_workflow() obj = workflow_object_class.get(obj_id) assert obj.status == ObjectStatus.COMPLETED assert obj.extra_data.get('approved') is False es.indices.refresh('holdingpen-hep') record['titles'][0]['title'] = 'This is an update that will match the wf in the holdingpen' # this workflow matches in the holdingpen and stops because the # matched one was rejected eng_uuid = start('article', [record]) eng = WorkflowEngine.from_uuid(eng_uuid) obj2 = eng.objects[0] assert obj2.extra_data['already-in-holding-pen'] is False assert obj2.extra_data['previously_rejected'] is True assert obj2.extra_data['previously_rejected_matches'] == [obj_id]
def test_merge_with_conflicts_callback_url( mocked_api_request_magpie, mocked_beard_api, workflow_app, mocked_external_services, disable_file_upload, enable_merge_on_update, ): with patch( 'inspire_json_merger.config.ArxivOnArxivOperations.conflict_filters', ['acquisition_source.source']): factory = TestRecordMetadata.create_from_file( __name__, 'merge_record_arxiv.json', index_name='records-hep') update_workflow_id = build_workflow(RECORD_WITH_CONFLICTS).id eng_uuid = start('article', object_id=update_workflow_id) eng = WorkflowEngine.from_uuid(eng_uuid) obj = eng.objects[0] conflicts = obj.extra_data.get('conflicts') expected_url = 'http://localhost:5000/callback/workflows/resolve_merge_conflicts' assert obj.status == ObjectStatus.HALTED assert expected_url == obj.extra_data.get('callback_url') assert len(conflicts) == 1 assert obj.extra_data.get('is-update') is True assert obj.extra_data['merger_root'] == RECORD_WITH_CONFLICTS payload = { 'id': obj.id, 'metadata': obj.data, '_extra_data': obj.extra_data } with workflow_app.test_client() as client: response = client.put( obj.extra_data.get('callback_url'), data=json.dumps(payload), content_type='application/json', ) data = json.loads(response.get_data()) expected_message = 'Workflow {} has been saved with conflicts.'.format( obj.id) assert response.status_code == 200 assert expected_message == data['message'] eng = WorkflowEngine.from_uuid(eng_uuid) obj = eng.objects[0] assert obj.status == ObjectStatus.HALTED updated_root = read_wf_record_source(factory.record_metadata.id, 'arxiv') assert updated_root is None
def test_harvesting_arxiv_workflow_manual_accepted( mocked_refextract_extract_refs, mocked_matching_match, mocked_api_request_magpie, mocked_api_request_beard, mocked_download_utils, mocked_download_arxiv, mocked_package_download, workflow_app, mocked_external_services, ): record = generate_record() """Test a full harvesting workflow.""" workflow_uuid, eng, obj = get_halted_workflow(app=workflow_app, record=record) do_accept_core(app=workflow_app, workflow_id=obj.id) eng = WorkflowEngine.from_uuid(workflow_uuid) obj = eng.processed_objects[0] assert obj.status == ObjectStatus.WAITING do_robotupload_callback(app=workflow_app, workflow_id=obj.id, recids=[12345]) obj = workflow_object_class.get(obj.id) assert obj.status == ObjectStatus.WAITING do_webcoll_callback(app=workflow_app, recids=[12345]) eng = WorkflowEngine.from_uuid(workflow_uuid) obj = eng.processed_objects[0] # It was accepted assert obj.status == ObjectStatus.COMPLETED assert obj.extra_data["approved"] is True
def test_harvesting_arxiv_workflow_manual_accepted( mocked_refextract_extract_refs, mocked_matching_match, mocked_api_request_magpie, mocked_api_request_beard, mocked_download_utils, mocked_download_arxiv, mocked_package_download, workflow_app, mocked_external_services, ): record = generate_record() """Test a full harvesting workflow.""" workflow_uuid, eng, obj = get_halted_workflow(app=workflow_app, record=record) do_accept_core(app=workflow_app, workflow_id=obj.id) eng = WorkflowEngine.from_uuid(workflow_uuid) obj = eng.processed_objects[0] assert obj.status == ObjectStatus.WAITING do_robotupload_callback(app=workflow_app, workflow_id=obj.id, recids=[12345]) eng = WorkflowEngine.from_uuid(workflow_uuid) obj = eng.processed_objects[0] # It was accepted assert obj.status == ObjectStatus.COMPLETED assert obj.extra_data["approved"] is True
def test_match_in_holdingpen_previously_rejected_wf_stop( mocked_download_arxiv, mocked_api_request_beard, mocked_api_request_magpie, workflow_app, mocked_external_services, ): record = generate_record() eng_uuid = start('article', [record]) eng = WorkflowEngine.from_uuid(eng_uuid) obj_id = eng.objects[0].id obj = workflow_object_class.get(obj_id) obj.extra_data["approved"] = False # reject record obj.continue_workflow() obj = workflow_object_class.get(obj_id) assert obj.status == ObjectStatus.COMPLETED assert obj.extra_data.get('approved') is False es.indices.refresh('holdingpen-hep') record['titles'][0][ 'title'] = 'This is an update that will match the wf in the holdingpen' # this workflow matches in the holdingpen and stops because the # matched one was rejected eng_uuid = start('article', [record]) eng = WorkflowEngine.from_uuid(eng_uuid) obj2 = eng.objects[0] assert obj2.extra_data['already-in-holding-pen'] is False assert obj2.extra_data['previously_rejected'] is True assert obj2.extra_data['previously_rejected_matches'] == [obj_id]
def test_match_in_holdingpen_different_sources_continues( mocked_api_request_magpie, mocked_api_request_beard, mocked_package_download, mocked_is_pdf_link, mocked_download_arxiv, workflow_app, mocked_external_services, ): record = generate_record() workflow_id = build_workflow(record).id eng_uuid = start('article', object_id=workflow_id) es.indices.refresh('holdingpen-hep') eng = WorkflowEngine.from_uuid(eng_uuid) wf_to_match = eng.objects[0].id obj = workflow_object_class.get(wf_to_match) assert obj.status == ObjectStatus.HALTED # generated wf pending in holdingpen record['titles'][0][ 'title'] = 'This is an update that will match the wf in the holdingpen' record['acquisition_source']['source'] = 'but not the source' # this workflow matches in the holdingpen but continues because has a # different source workflow_id = build_workflow(record).id eng_uuid = start('article', object_id=workflow_id) eng = WorkflowEngine.from_uuid(eng_uuid) obj = eng.objects[0] assert obj.extra_data['already-in-holding-pen'] is True assert obj.extra_data['holdingpen_matches'] == [wf_to_match] assert obj.extra_data['previously_rejected'] is False assert not obj.extra_data.get('stopped-matched-holdingpen-wf')
def test_halt(app, halt_workflow, halt_workflow_conditional): """Test halt task.""" assert 'halttest' in app.extensions['invenio-workflows'].workflows assert 'halttestcond' in app.extensions['invenio-workflows'].workflows with app.app_context(): data = [{'foo': 'bar'}] eng_uuid = start('halttest', data) eng = WorkflowEngine.from_uuid(eng_uuid) obj = eng.processed_objects[0] assert obj.known_statuses.WAITING == obj.status assert WorkflowStatus.HALTED == eng.status obj_id = obj.id obj.continue_workflow() obj = WorkflowObject.get(obj_id) assert obj.known_statuses.COMPLETED == obj.status # Check conditional workflows and pass data not as a list (to check). eng_uuid = start('halttestcond', data[0]) eng = WorkflowEngine.from_uuid(eng_uuid) obj = eng.processed_objects[0] assert obj.known_statuses.WAITING == obj.status assert WorkflowStatus.HALTED == eng.status obj_id = obj.id obj.continue_workflow() obj = WorkflowObject.get(obj_id) assert obj.known_statuses.COMPLETED == obj.status
def test_match_in_holdingpen_previously_rejected_wf_stop( mocked_api_request_magpie, mocked_api_request_beard, mocked_package_download, mocked_is_pdf_link, mocked_download_arxiv, workflow_app, mocked_external_services, ): record = generate_record() record_workflow = build_workflow(record).id eng_uuid = start("article", object_id=record_workflow) eng = WorkflowEngine.from_uuid(eng_uuid) obj_id = eng.objects[0].id obj = workflow_object_class.get(obj_id) obj.extra_data["approved"] = False # reject record obj.continue_workflow() obj = workflow_object_class.get(obj_id) assert obj.status == ObjectStatus.COMPLETED assert obj.extra_data.get("approved") is False es.indices.refresh("holdingpen-hep") record["titles"][0][ "title"] = "This is an update that will match the wf in the holdingpen" # this workflow matches in the holdingpen and stops because the # matched one was rejected workflow_id = build_workflow(record).id eng_uuid = start("article", object_id=workflow_id) eng = WorkflowEngine.from_uuid(eng_uuid) obj2 = eng.objects[0] assert obj2.extra_data["previously_rejected"] is True assert obj2.extra_data["previously_rejected_matches"] == [obj_id]
def test_match_in_holdingpen_different_sources_continues( mocked_api_request_magpie, mocked_api_request_beard, mocked_package_download, mocked_is_pdf_link, mocked_download_arxiv, workflow_app, mocked_external_services, ): record = generate_record() workflow_id = build_workflow(record).id eng_uuid = start('article', object_id=workflow_id) es.indices.refresh('holdingpen-hep') eng = WorkflowEngine.from_uuid(eng_uuid) wf_to_match = eng.objects[0].id obj = workflow_object_class.get(wf_to_match) assert obj.status == ObjectStatus.HALTED # generated wf pending in holdingpen record['titles'][0]['title'] = 'This is an update that will match the wf in the holdingpen' record['acquisition_source']['source'] = 'but not the source' # this workflow matches in the holdingpen but continues because has a # different source workflow_id = build_workflow(record).id eng_uuid = start('article', object_id=workflow_id) eng = WorkflowEngine.from_uuid(eng_uuid) obj = eng.objects[0] assert obj.extra_data['already-in-holding-pen'] is True assert obj.extra_data['holdingpen_matches'] == [wf_to_match] assert obj.extra_data['previously_rejected'] is False assert not obj.extra_data.get('stopped-matched-holdingpen-wf')
def test_harvesting_arxiv_workflow_accepted( mocked, db_only_app, record_oai_arxiv_plots): """Test a full harvesting workflow.""" from invenio_workflows import ( start, WorkflowEngine, ObjectStatus, workflow_object_class ) from dojson.contrib.marc21.utils import create_record from invenio_db import db from inspirehep.dojson.hep import hep from inspirehep.modules.converter.xslt import convert # Convert to MARCXML, then dict, then HEP JSON record_oai_arxiv_plots_marcxml = convert( record_oai_arxiv_plots, "oaiarXiv2marcxml.xsl" ) record_marc = create_record(record_oai_arxiv_plots_marcxml) record_json = hep.do(record_marc) workflow_uuid = None with db_only_app.app_context(): workflow_uuid = start('article', [record_json]) eng = WorkflowEngine.from_uuid(workflow_uuid) obj = eng.processed_objects[0] assert obj.status == ObjectStatus.HALTED assert obj.data_type == "hep" # Files should have been attached (tarball + pdf) assert obj.files["1407.7587.pdf"] assert obj.files["1407.7587.tar.gz"] # A publication note should have been extracted pub_info = obj.data.get('publication_info') assert pub_info assert pub_info[0] assert pub_info[0].get('year') == "2014" assert pub_info[0].get('journal_title') == "J. Math. Phys." # This record should not have been touched yet assert "approved" not in obj.extra_data # Now let's resolve it as accepted and continue # FIXME Should be accept, but record validation prevents us. obj.remove_action() obj.extra_data["approved"] = True obj.extra_data["core"] = True obj.save() db.session.commit() with db_only_app.app_context(): eng = WorkflowEngine.from_uuid(workflow_uuid) obj = eng.processed_objects[0] obj_id = obj.id obj.continue_workflow() obj = workflow_object_class.get(obj_id) # It was accepted assert obj.status == ObjectStatus.COMPLETED
def test_merge_callback_url_with_malformed_workflow( mocked_api_request_magpie, mocked_beard_api, workflow_app, mocked_external_services, disable_file_upload, enable_merge_on_update, ): with patch('inspire_json_merger.config.ArxivOnArxivOperations.conflict_filters', ['acquisition_source.source']): factory = TestRecordMetadata.create_from_file( __name__, 'merge_record_arxiv.json', index_name='records-hep') update_workflow_id = build_workflow(RECORD_WITH_CONFLICTS).id eng_uuid = start('article', object_id=update_workflow_id) eng = WorkflowEngine.from_uuid(eng_uuid) obj = eng.objects[0] conflicts = obj.extra_data.get('conflicts') expected_url = 'http://localhost:5000/callback/workflows/resolve_merge_conflicts' assert obj.status == ObjectStatus.HALTED assert expected_url == obj.extra_data.get('callback_url') assert len(conflicts) == 1 assert obj.extra_data.get('is-update') is True assert obj.extra_data['merger_root'] == RECORD_WITH_CONFLICTS payload = { 'id': obj.id, 'metadata': 'Jessica Jones', '_extra_data': 'Frank Castle' } with workflow_app.test_client() as client: response = client.put( obj.extra_data.get('callback_url'), data=json.dumps(payload), content_type='application/json', ) data = json.loads(response.get_data()) expected_message = 'The workflow request is malformed.' assert response.status_code == 400 assert expected_message == data['message'] eng = WorkflowEngine.from_uuid(eng_uuid) obj = eng.objects[0] assert obj.status == ObjectStatus.HALTED assert obj.extra_data.get('callback_url') is not None assert obj.extra_data.get('conflicts') is not None assert obj.extra_data['merger_root'] is not None updated_root = read_wf_record_source(factory.record_metadata.id, 'arxiv') assert updated_root is None
def test_harvesting_arxiv_workflow_manual_accepted( mocked_refextract_extract_refs, mocked_matching_search, mocked_api_request_beard_block, mocked_api_request_magpie, mocked_api_request_beard, mocked_download, workflow_app, record, ): """Test a full harvesting workflow.""" with requests_mock.Mocker() as requests_mocker: requests_mocker.register_uri( requests_mock.ANY, re.compile('.*(indexer|localhost).*'), real_http=True, ) requests_mocker.register_uri( 'POST', re.compile( 'https?://localhost:1234.*', ), text=u'[INFO]', status_code=200, ) workflow_uuid, eng, obj = get_halted_workflow( app=workflow_app, extra_config={'PRODUCTION_MODE': False}, record=record, ) _do_accept_core( app=workflow_app, workflow_id=obj.id, ) eng = WorkflowEngine.from_uuid(workflow_uuid) obj = eng.processed_objects[0] assert obj.status == ObjectStatus.WAITING response = _do_robotupload_callback( app=workflow_app, workflow_id=obj.id, recids=[12345], ) assert response.status_code == 200 obj = workflow_object_class.get(obj.id) assert obj.status == ObjectStatus.WAITING response = _do_webcoll_callback(app=workflow_app, recids=[12345]) assert response.status_code == 200 eng = WorkflowEngine.from_uuid(workflow_uuid) obj = eng.processed_objects[0] # It was accepted assert obj.status == ObjectStatus.COMPLETED
def test_harvesting_arxiv_workflow_accepted(mocked, small_app, record_oai_arxiv_plots): """Test a full harvesting workflow.""" from invenio_workflows import (start, WorkflowEngine, ObjectStatus, workflow_object_class) from dojson.contrib.marc21.utils import create_record from invenio_db import db from inspirehep.dojson.hep import hep from inspirehep.modules.converter.xslt import convert # Convert to MARCXML, then dict, then HEP JSON record_oai_arxiv_plots_marcxml = convert(record_oai_arxiv_plots, "oaiarXiv2marcxml.xsl") record_marc = create_record(record_oai_arxiv_plots_marcxml) record_json = hep.do(record_marc) workflow_uuid = None with small_app.app_context(): workflow_uuid = start('article', [record_json]) eng = WorkflowEngine.from_uuid(workflow_uuid) obj = eng.processed_objects[0] assert obj.status == ObjectStatus.HALTED assert obj.data_type == "hep" # Files should have been attached (tarball + pdf) assert obj.files["1407.7587.pdf"] assert obj.files["1407.7587.tar.gz"] # A publication note should have been extracted pub_info = obj.data.get('publication_info') assert pub_info assert pub_info[0] assert pub_info[0].get('year') == "2014" assert pub_info[0].get('journal_title') == "J. Math. Phys." # This record should not have been touched yet assert "approved" not in obj.extra_data # Now let's resolve it as accepted and continue # FIXME Should be accept, but record validation prevents us. obj.remove_action() obj.extra_data["approved"] = True obj.extra_data["core"] = True obj.save() db.session.commit() with small_app.app_context(): eng = WorkflowEngine.from_uuid(workflow_uuid) obj = eng.processed_objects[0] obj_id = obj.id obj.continue_workflow() obj = workflow_object_class.get(obj_id) # It was accepted assert obj.status == ObjectStatus.COMPLETED
def test_harvesting_arxiv_workflow_manual_accepted( mocked_refextract_extract_refs, mocked_matching_search, mocked_api_request_magpie, mocked_api_request_beard, mocked_download_utils, mocked_download_arxiv, workflow_app, ): record = generate_record() """Test a full harvesting workflow.""" with requests_mock.Mocker() as requests_mocker: requests_mocker.register_uri( requests_mock.ANY, re.compile('.*(indexer|localhost).*'), real_http=True, ) requests_mocker.register_uri( 'POST', re.compile('https?://localhost:1234.*', ), text=u'[INFO]', status_code=200, ) workflow_uuid, eng, obj = get_halted_workflow( app=workflow_app, extra_config={'PRODUCTION_MODE': False}, record=record, ) do_accept_core( app=workflow_app, workflow_id=obj.id, ) eng = WorkflowEngine.from_uuid(workflow_uuid) obj = eng.processed_objects[0] assert obj.status == ObjectStatus.WAITING response = do_robotupload_callback( app=workflow_app, workflow_id=obj.id, recids=[12345], ) assert response.status_code == 200 obj = workflow_object_class.get(obj.id) assert obj.status == ObjectStatus.WAITING response = do_webcoll_callback(app=workflow_app, recids=[12345]) assert response.status_code == 200 eng = WorkflowEngine.from_uuid(workflow_uuid) obj = eng.processed_objects[0] # It was accepted assert obj.status == ObjectStatus.COMPLETED
def test_merge_with_conflicts_callback_url_and_resolve(workflow_app, enable_merge_on_update, disable_file_upload): factory = TestRecordMetadata.create_from_file(__name__, 'record_for_merging.json') record_update = RECORD_WITH_CONFLICTS record_update.update({ 'dois': factory.record_metadata.json.get('dois'), }) eng_uuid = start('article', [record_update]) eng = WorkflowEngine.from_uuid(eng_uuid) obj = eng.objects[0] conflicts = obj.extra_data.get('conflicts') expected_url = 'http://localhost:5000/callback/workflows/resolve_merge_conflicts' assert obj.status == ObjectStatus.HALTED assert expected_url == obj.extra_data.get('callback_url') assert len(conflicts) == 1 assert obj.extra_data.get('is-update') is True # resolve conflicts obj.data['$schema'] = factory.record_metadata.json.get('$schema') del obj.extra_data['conflicts'] payload = { 'id': obj.id, 'metadata': obj.data, '_extra_data': obj.extra_data } with workflow_app.test_client() as client: response = client.put( obj.extra_data.get('callback_url'), data=json.dumps(payload), content_type='application/json', ) assert response.status_code == 200 eng = WorkflowEngine.from_uuid(eng_uuid) obj = eng.objects[0] conflicts = obj.extra_data.get('conflicts') assert obj.status == ObjectStatus.COMPLETED assert conflicts is None assert obj.extra_data.get('approved') is True assert obj.extra_data.get('is-update') is True assert obj.extra_data.get('merged') is True
def test_merge_callback_url_with_malformed_workflow(workflow_app, enable_merge_on_update, disable_file_upload): factory = TestRecordMetadata.create_from_file(__name__, 'record_for_merging.json') record_update = RECORD_WITH_CONFLICTS record_update.update({ 'dois': factory.record_metadata.json.get('dois'), }) eng_uuid = start('article', [record_update]) eng = WorkflowEngine.from_uuid(eng_uuid) obj = eng.objects[0] conflicts = obj.extra_data.get('conflicts') expected_url = 'http://localhost:5000/callback/workflows/resolve_merge_conflicts' assert obj.status == ObjectStatus.HALTED assert expected_url == obj.extra_data.get('callback_url') assert len(conflicts) == 1 assert obj.extra_data.get('is-update') is True payload = { 'id': obj.id, 'metadata': 'Jessica Jones', '_extra_data': 'Frank Castle' } with workflow_app.test_client() as client: response = client.put( obj.extra_data.get('callback_url'), data=json.dumps(payload), content_type='application/json', ) data = json.loads(response.get_data()) expected_message = 'The workflow request is malformed.' assert response.status_code == 400 assert expected_message == data['message'] eng = WorkflowEngine.from_uuid(eng_uuid) obj = eng.objects[0] assert obj.status == ObjectStatus.HALTED assert obj.extra_data.get('callback_url') is not None assert obj.extra_data.get('conflicts') is not None
def transform_example_file(obj, eng: WorkflowEngine): input_data = '' try: with open(obj.data, 'r') as input: input_data = input.read() except OSError: eng.abort() # Cannot read input data, abort workflow execution output = obj.scratch.create_file(task_name='example_output') with open(output, 'w') as tf: tf.write(input_data.title()) obj.data = output return obj
def test_restart(app, restart_workflow): """Test halt task.""" assert 'restarttest' in app.extensions['invenio-workflows'].workflows with app.app_context(): data = {} eng_uuid = start('restarttest', data) eng = WorkflowEngine.from_uuid(eng_uuid) obj = eng.processed_objects[0] assert obj.known_statuses.HALTED == obj.status assert WorkflowStatus.HALTED == eng.status assert obj.data == {"title": "foo"} assert obj.get_action() == "foo" assert obj.get_action_message() == "Test" # Restart shall have no effect (still halted) new_eng_uuid = restart(eng_uuid) assert new_eng_uuid == eng_uuid eng = WorkflowEngine.from_uuid(eng_uuid) obj = eng.processed_objects[0] assert obj.known_statuses.HALTED == obj.status assert WorkflowStatus.HALTED == eng.status assert obj.data == {"title": {"value": "bar"}} assert obj.get_action() == "foo" obj.remove_action() assert obj.get_action() is None obj_id = obj.id # Now it should resume the next task resume(obj_id) obj = WorkflowObject.get(obj_id) assert obj.known_statuses.COMPLETED == obj.status assert obj.extra_data.get('test') == 'test' assert obj.data.get('title').get('source') == 'TEST' # We restart the object again restart(obj.workflow.uuid, data=obj) assert obj.known_statuses.HALTED == obj.status assert WorkflowStatus.HALTED == eng.status assert obj.data == {"title": {"value": "bar"}}
def test_harvesting_arxiv_workflow_core_record_auto_accepted( mocked_refextract_extract_refs, mocked_api_request_magpie, mocked_api_request_beard, mocked_is_pdf_link, mocked_package_download, mocked_arxiv_download, workflow_app, mocked_external_services, ): """Test a full harvesting workflow.""" record, categories = core_record() extra_config = { "BEARD_API_URL": "http://example.com/beard", "MAGPIE_API_URL": "http://example.com/magpie", "ARXIV_CATEGORIES": categories, } with workflow_app.app_context(): workflow_id = build_workflow(record).id with mock.patch.dict(workflow_app.config, extra_config): workflow_uuid = start("article", object_id=workflow_id) eng = WorkflowEngine.from_uuid(workflow_uuid) obj = eng.processed_objects[0] assert obj.extra_data["approved"] is True assert obj.extra_data["auto-approved"] is True assert obj.data["core"] is True
def start_edit_article_workflow(recid): try: record = get_db_record('lit', recid) except RecordGetterError: raise CallbackRecordNotFoundError(recid) record_permission = RecordPermission.create(action='update', record=record) if not record_permission.can(): abort(403, record_permission) # has to be done before start() since, it is deattaching this session user_id = current_user.get_id() eng_uuid = start('edit_article', data=record) workflow_id = WorkflowEngine.from_uuid(eng_uuid).objects[0].id workflow = workflow_object_class.get(workflow_id) workflow.id_user = user_id if request.referrer: base_rt_url = get_rt_link_for_ticket('').replace('?', '\?') ticket_match = re.match(base_rt_url + '(?P<ticket_id>\d+)', request.referrer) if ticket_match: ticket_id = int(ticket_match.group('ticket_id')) workflow.extra_data['curation_ticket_id'] = ticket_id workflow.save() db.session.commit() url = "{}{}".format(current_app.config['WORKFLOWS_EDITOR_API_URL'], workflow_id) return redirect(location=url, code=302)
def test_merge_without_conflicts(workflow_app, enable_merge_on_update, record_to_merge): record_update = { '$schema': 'http://localhost:5000/schemas/records/hep.json', '_collections': ['Literature'], 'document_type': ['article'], 'titles': [ {'title': 'Jessica Jones'}, {'title': 'Luke Cage'}, {'title': 'Frank Castle'}, ], 'dois': [ { 'value': '10.1007/978-3-319-15001-7' } ], } eng_uuid = start('article', [record_update]) eng = WorkflowEngine.from_uuid(eng_uuid) obj = eng.objects[0] assert obj.status == ObjectStatus.COMPLETED assert obj.extra_data.get('callback_url') is None assert obj.extra_data.get('conflicts') is None assert obj.extra_data.get('approved') is True assert obj.extra_data.get('is-update') is True assert obj.extra_data.get('merged') is True
def test_validation_error_callback_with_malformed_with_invalid_types(workflow_app): invalid_record = { '_collections': [ 'Literature', ], 'document_type': [ 'article', ], 'titles': [ {'title': 'A title'}, ], } eng_uuid = start('article', [invalid_record]) eng = WorkflowEngine.from_uuid(eng_uuid) obj = eng.objects[0] response = do_validation_callback( workflow_app, # id 'Alias Investigations', obj.data, # extra_data 'Jessica Jones' ) data = json.loads(response.get_data()) expected_message = 'The workflow request is malformed.' expected_error_code = 'MALFORMED' assert response.status_code == 400 assert expected_error_code == data['error_code'] assert expected_message == data['message'] assert 'errors' in data
def test_validation_error_callback_with_a_valid(workflow_app): valid_record = { '_collections': [ 'Literature', ], 'document_type': [ 'article', ], 'titles': [ {'title': 'A title'}, ], } eng_uuid = start('article', [valid_record]) eng = WorkflowEngine.from_uuid(eng_uuid) obj = eng.objects[0] assert obj.status != ObjectStatus.ERROR response = do_validation_callback( workflow_app, obj.id, obj.data, obj.extra_data ) expected_error_code = 'WORKFLOW_NOT_IN_ERROR_STATE' data = json.loads(response.get_data()) assert response.status_code == 400 assert expected_error_code == data['error_code']
def test_validation_error_callback_with_missing_worfklow(workflow_app): invalid_record = { '_collections': [ 'Literature', ], 'document_type': [ 'article', ], 'titles': [ {'title': 'A title'}, ], } eng_uuid = start('article', [invalid_record]) eng = WorkflowEngine.from_uuid(eng_uuid) obj = eng.objects[0] response = do_validation_callback( workflow_app, 1111, obj.data, obj.extra_data ) data = json.loads(response.get_data()) expected_message = 'The workflow with id "1111" was not found.' expected_error_code = 'WORKFLOW_NOT_FOUND' assert response.status_code == 404 assert expected_error_code == data['error_code'] assert expected_message == data['message']
def edit_workflow(workflow_app): app_client = workflow_app.test_client() user = User.query.filter_by(email='*****@*****.**').one() login_user_via_session(app_client, user=user) record = { '$schema': 'http://localhost:5000/schemas/records/hep.json', 'arxiv_eprints': [ { 'categories': [ 'nucl-th' ], 'value': '1802.03287' } ], 'control_number': 123, 'document_type': ['article'], 'titles': [{'title': 'Resource Pooling in Large-Scale Content Delivery Systems'}], 'self': {'$ref': 'http://localhost:5000/schemas/records/hep.json'}, '_collections': ['Literature'] } factory = TestRecordMetadata.create_from_kwargs(json=record) eng_uuid = start('edit_article', data=factory.record_metadata.json) obj = WorkflowEngine.from_uuid(eng_uuid).objects[0] assert obj.status == ObjectStatus.WAITING assert obj.extra_data['callback_url'] return obj
def test_previously_rejected_from_not_fully_harvested_category_is_not_auto_approved( mocked_refextract_extract_refs, mocked_api_request_magpie, mocked_api_request_beard, mocked_is_pdf_link, mocked_package_download, mocked_arxiv_download, workflow_app, mocked_external_services, ): record, categories = core_record() record["arxiv_eprints"][0]["categories"] = ["q-bio.GN"] obj = workflow_object_class.create( data=record, status=ObjectStatus.COMPLETED, data_type="hep" ) obj.extra_data["approved"] = False # reject it obj.save() es.indices.refresh("holdingpen-hep") extra_config = { "BEARD_API_URL": "http://example.com/beard", "MAGPIE_API_URL": "http://example.com/magpie", "ARXIV_CATEGORIES": categories, } with workflow_app.app_context(): with mock.patch.dict(workflow_app.config, extra_config): workflow_id = build_workflow(record).id eng_uuid = start("article", object_id=workflow_id) eng = WorkflowEngine.from_uuid(eng_uuid) obj2 = eng.processed_objects[0] assert not obj2.extra_data["auto-approved"] assert len(obj2.extra_data["previously_rejected_matches"]) > 0 assert obj2.status == ObjectStatus.COMPLETED
def test_validation_error_callback_with_malformed_with_invalid_types(workflow_app): invalid_record = { "_collections": ["Literature"], "document_type": ["article"], "titles": [{"title": "A title"}], } workflow_id = build_workflow(invalid_record).id eng_uuid = start("article", object_id=workflow_id) eng = WorkflowEngine.from_uuid(eng_uuid) obj = eng.objects[0] response = do_validation_callback( workflow_app, # id "Alias Investigations", obj.data, # extra_data "Jessica Jones", ) data = json.loads(response.get_data()) expected_message = "The workflow request is malformed." expected_error_code = "MALFORMED" assert response.status_code == 400 assert expected_error_code == data["error_code"] assert expected_message == data["message"] assert "errors" in data
def test_keep_previously_rejected_from_fully_harvested_category_is_auto_approved( mocked_refextract_extract_refs, mocked_api_request_magpie, mocked_api_request_beard, mocked_is_pdf_link, mocked_package_download, mocked_arxiv_download, workflow_app, mocked_external_services, ): record, categories = core_record() obj = workflow_object_class.create( data=record, status=ObjectStatus.COMPLETED, data_type='hep', ) obj.extra_data['approved'] = False # reject it obj.save() es.indices.refresh('holdingpen-hep') extra_config = { "BEARD_API_URL": "http://example.com/beard", "MAGPIE_API_URL": "http://example.com/magpie", 'ARXIV_CATEGORIES': categories, } with workflow_app.app_context(): with mock.patch.dict(workflow_app.config, extra_config): workflow_uuid = start('article', [record]) eng = WorkflowEngine.from_uuid(workflow_uuid) obj2 = eng.processed_objects[0] assert obj2.extra_data['auto-approved'] assert len(obj2.extra_data['previously_rejected_matches']) > 0 assert obj.status == ObjectStatus.COMPLETED
def test_harvesting_arxiv_workflow_manual_rejected( mocked_refextract_extract_refs, mocked_api_request_magpie, mocked_beard_api, mocked_actions_download, mocked_is_pdf_link, mocked_arxiv_download, workflow_app, mocked_external_services, ): """Test a full harvesting workflow.""" record = generate_record() extra_config = { "BEARD_API_URL": "http://example.com/beard", "MAGPIE_API_URL": "http://example.com/magpie", } workflow_uuid, eng, obj = get_halted_workflow(app=workflow_app, extra_config=extra_config, record=record) obj.extra_data["approved"] = False obj.save() db.session.commit() eng = WorkflowEngine.from_uuid(workflow_uuid) obj = eng.processed_objects[0] obj_id = obj.id obj.continue_workflow() obj = workflow_object_class.get(obj_id) # It was rejected assert obj.status == ObjectStatus.COMPLETED assert obj.extra_data["approved"] is False
def test_harvesting_arxiv_workflow_already_on_legacy( mocked_refextract_extract_refs, mocked_api_request_beard_block, mocked_api_request_magpie, mocked_api_request_beard, mocked_download, small_app, already_harvested_on_legacy_record, ): """Test a full harvesting workflow.""" extra_config = { "BEARD_API_URL": "http://example.com/beard", "MAGPIE_API_URL": "http://example.com/magpie", } workflow_uuid = None with small_app.app_context(): with mock.patch.dict(small_app.config, extra_config): workflow_uuid = start('article', [ already_harvested_on_legacy_record]) eng = WorkflowEngine.from_uuid(workflow_uuid) obj = eng.processed_objects[0] assert obj.status == ObjectStatus.COMPLETED assert 'already-ingested' in obj.extra_data assert obj.extra_data['already-ingested']
def test_merge_with_disabled_merge_on_update_feature_flag( mocked_api_request_magpie, mocked_beard_api, workflow_app, mocked_external_services, disable_file_upload, ): with patch.dict(workflow_app.config, {'FEATURE_FLAG_ENABLE_MERGER': False}): factory = TestRecordMetadata.create_from_file( __name__, 'merge_record_arxiv.json', index_name='records-hep') update_workflow_id = build_workflow(RECORD_WITHOUT_CONFLICTS).id eng_uuid = start('article', object_id=update_workflow_id) eng = WorkflowEngine.from_uuid(eng_uuid) obj = eng.objects[0] assert obj.status == ObjectStatus.COMPLETED assert obj.extra_data.get('callback_url') is None assert obj.extra_data.get('conflicts') is None assert obj.extra_data.get('merged') is True assert obj.extra_data.get('merger_root') is None assert obj.extra_data.get('is-update') is True updated_root = read_wf_record_source(factory.record_metadata.id, 'arxiv') assert updated_root is None
def test_validation_error_callback_with_malformed_with_invalid_types( workflow_app): invalid_record = { "_collections": ["Literature"], "document_type": ["article"], "titles": [{ "title": "A title" }], } workflow_id = build_workflow(invalid_record).id eng_uuid = start("article", object_id=workflow_id) eng = WorkflowEngine.from_uuid(eng_uuid) obj = eng.objects[0] response = do_validation_callback( workflow_app, # id "Alias Investigations", obj.data, # extra_data "Jessica Jones", ) data = json.loads(response.get_data()) expected_message = "The workflow request is malformed." expected_error_code = "MALFORMED" assert response.status_code == 400 assert expected_error_code == data["error_code"] assert expected_message == data["message"] assert "errors" in data
def test_previously_rejected_from_not_fully_harvested_category_is_not_auto_approved( mocked_refextract_extract_refs, mocked_api_request_magpie, mocked_api_request_beard, mocked_is_pdf_link, mocked_package_download, mocked_arxiv_download, workflow_app, mocked_external_services, ): record, categories = core_record() record["arxiv_eprints"][0]["categories"] = ["q-bio.GN"] obj = workflow_object_class.create(data=record, status=ObjectStatus.COMPLETED, data_type="hep") obj.extra_data["approved"] = False # reject it obj.save() es.indices.refresh("holdingpen-hep") extra_config = { "BEARD_API_URL": "http://example.com/beard", "MAGPIE_API_URL": "http://example.com/magpie", "ARXIV_CATEGORIES": categories, } with workflow_app.app_context(): with mock.patch.dict(workflow_app.config, extra_config): workflow_id = build_workflow(record).id eng_uuid = start("article", object_id=workflow_id) eng = WorkflowEngine.from_uuid(eng_uuid) obj2 = eng.processed_objects[0] assert not obj2.extra_data["auto-approved"] assert len(obj2.extra_data["previously_rejected_matches"]) > 0 assert obj2.status == ObjectStatus.COMPLETED
def test_validation_error_callback_with_missing_worfklow(workflow_app): invalid_record = { "_collections": ["Literature"], "document_type": ["article"], "titles": [{ "title": "A title" }], } workflow_id = build_workflow(invalid_record).id eng_uuid = start("article", object_id=workflow_id) eng = WorkflowEngine.from_uuid(eng_uuid) obj = eng.objects[0] response = do_validation_callback(workflow_app, 1111, obj.data, obj.extra_data) data = json.loads(response.get_data()) expected_message = 'The workflow with id "1111" was not found.' expected_error_code = "WORKFLOW_NOT_FOUND" assert response.status_code == 404 assert expected_error_code == data["error_code"] assert expected_message == data["message"]
def test_merge_with_conflicts_rootful( mocked_api_request_magpie, mocked_beard_api, workflow_app, mocked_external_services, disable_file_upload, enable_merge_on_update, ): with patch('inspire_json_merger.config.ArxivOnArxivOperations.conflict_filters', ['acquisition_source.source']): TestRecordMetadata.create_from_file( __name__, 'merge_record_arxiv.json', index_name='records-hep') update_workflow_id = build_workflow(RECORD_WITH_CONFLICTS).id # By default the root is {}. eng_uuid = start('article', object_id=update_workflow_id) eng = WorkflowEngine.from_uuid(eng_uuid) obj = eng.objects[0] conflicts = obj.extra_data.get('conflicts') assert obj.status == ObjectStatus.HALTED assert len(conflicts) == 1 assert obj.extra_data.get('callback_url') is not None assert obj.extra_data.get('is-update') is True assert obj.extra_data['merger_root'] == RECORD_WITH_CONFLICTS assert obj.extra_data['merger_head_revision'] == 0 assert obj.extra_data['merger_original_root'] == {}
def test_update_exact_matched_goes_trough_the_workflow( mocked_is_pdf_link, mocked_download_arxiv, mocked_api_request_beard, mocked_api_request_magpie, workflow_app, mocked_external_services, record_from_db, ): record = record_from_db workflow_id = build_workflow(record).id eng_uuid = start("article", object_id=workflow_id) obj_id = WorkflowEngine.from_uuid(eng_uuid).objects[0].id obj = workflow_object_class.get(obj_id) assert obj.extra_data["holdingpen_matches"] == [] assert obj.extra_data["previously_rejected"] is False assert not obj.extra_data.get("stopped-matched-holdingpen-wf") assert obj.extra_data["is-update"] assert obj.extra_data["exact-matched"] assert obj.extra_data["matches"]["exact"] == [record.get("control_number")] assert obj.extra_data["matches"]["approved"] == record.get( "control_number") assert obj.extra_data["approved"] assert obj.status == ObjectStatus.COMPLETED
def test_stop_matched_holdingpen_wfs(app, simple_record): # need to run a wf in order to assign to it the wf definition and a uuid # for it obj = workflow_object_class.create( data_type='hep', **simple_record ) workflow_uuid = start('article', object_id=obj.id) eng = WorkflowEngine.from_uuid(workflow_uuid) obj = eng.processed_objects[0] obj.status = ObjectStatus.HALTED obj.save() obj_id = obj.id es.indices.refresh('holdingpen-hep') obj2 = WorkflowObject.create(data_type='hep', **simple_record) obj2_id = obj2.id match_non_completed_wf_in_holdingpen(obj2, None) assert obj2.extra_data['holdingpen_matches'] == [obj_id] stop_matched_holdingpen_wfs(obj2, None) stopped_wf = workflow_object_class.get(obj_id) assert stopped_wf.status == ObjectStatus.COMPLETED assert stopped_wf.extra_data['stopped-by-wf'] == obj2_id
def test_merge_without_conflicts_rootful( mocked_api_request_magpie, mocked_beard_api, workflow_app, mocked_external_services, disable_file_upload, enable_merge_on_update, ): with patch('inspire_json_merger.config.ArxivOnArxivOperations.conflict_filters', ['acquisition_source.source']): factory = TestRecordMetadata.create_from_file( __name__, 'merge_record_arxiv.json', index_name='records-hep') update_workflow_id = build_workflow(RECORD_WITH_CONFLICTS).id insert_wf_record_source(json=ARXIV_ROOT, record_uuid=factory.record_metadata.id, source='arxiv') eng_uuid = start('article', object_id=update_workflow_id) eng = WorkflowEngine.from_uuid(eng_uuid) obj = eng.objects[0] conflicts = obj.extra_data.get('conflicts') assert obj.status == ObjectStatus.COMPLETED assert not conflicts assert obj.extra_data.get('callback_url') is None assert obj.extra_data.get('is-update') is True assert obj.extra_data['merger_head_revision'] == 0 assert obj.extra_data['merger_original_root'] == ARXIV_ROOT updated_root = read_wf_record_source(factory.record_metadata.id, 'arxiv') assert updated_root.json == RECORD_WITH_CONFLICTS
def test_harvesting_arxiv_workflow_manual_rejected( mocked_refextract_extract_refs, mocked_api_request_magpie, mocked_beard_api, mocked_actions_download, mocked_is_pdf_link, mocked_arxiv_download, workflow_app, mocked_external_services, ): """Test a full harvesting workflow.""" record = generate_record() extra_config = { "BEARD_API_URL": "http://example.com/beard", "MAGPIE_API_URL": "http://example.com/magpie", } workflow_uuid, eng, obj = get_halted_workflow( app=workflow_app, extra_config=extra_config, record=record ) obj.extra_data["approved"] = False obj.save() db.session.commit() eng = WorkflowEngine.from_uuid(workflow_uuid) obj = eng.processed_objects[0] obj_id = obj.id obj.continue_workflow() obj = workflow_object_class.get(obj_id) # It was rejected assert obj.status == ObjectStatus.COMPLETED assert obj.extra_data["approved"] is False
def stop_matched_holdingpen_wfs(obj, eng): """Stop the matched workflow objects in the holdingpen. Stops the matched workflows in the holdingpen by replacing their steps with a new one defined on the fly, containing a ``stop`` step, and executing it. For traceability reason, these workflows are also marked as ``'stopped-by-wf'``, whose value is the current workflow's id. In the use case of harvesting twice an article, this function is involved to stop the first workflow and let the current one being processed, since it the latest metadata. Args: obj: a workflow object. eng: a workflow engine. Returns: None """ stopping_steps = [mark('stopped-by-wf', int(obj.id)), stop_processing] save_workflow(obj, eng) for holdingpen_wf_id in obj.extra_data['holdingpen_matches']: holdingpen_wf = workflow_object_class.get(holdingpen_wf_id) holdingpen_wf_eng = WorkflowEngine.from_uuid(holdingpen_wf.id_workflow) # stop this holdingpen workflow by replacing its steps with a stop step holdingpen_wf_eng.callbacks.replace(stopping_steps) holdingpen_wf_eng.process([holdingpen_wf])
def stop_matched_holdingpen_wfs(obj, eng): """Stop the matched workflow objects in the holdingpen. Stops the matched workflows in the holdingpen by replacing their steps with a new one defined on the fly, containing a ``stop`` step, and executing it. For traceability reason, these workflows are also marked as ``'stopped-by-wf'``, whose value is the current workflow's id. In the use case of harvesting twice an article, this function is involved to stop the first workflow and let the current one being processed, since it the latest metadata. Args: obj: a workflow object. eng: a workflow engine. Returns: None """ stopping_steps = [mark('stopped-by-wf', int(obj.id)), stop_processing] obj.save() for holdingpen_wf_id in obj.extra_data['holdingpen_matches']: holdingpen_wf = workflow_object_class.get(holdingpen_wf_id) holdingpen_wf_eng = WorkflowEngine.from_uuid(holdingpen_wf.id_workflow) # stop this holdingpen workflow by replacing its steps with a stop step holdingpen_wf_eng.callbacks.replace(stopping_steps) holdingpen_wf_eng.process([holdingpen_wf])
def test_merge_without_conflicts_handles_update_without_acquisition_source_and_acts_as_rootless( mocked_api_request_magpie, mocked_beard_api, workflow_app, mocked_external_services, disable_file_upload, enable_merge_on_update, ): with patch('inspire_json_merger.config.PublisherOnArxivOperations.conflict_filters', ['acquisition_source.source']): factory = TestRecordMetadata.create_from_file( __name__, 'merge_record_arxiv.json', index_name='records-hep') update_workflow_id = build_workflow(RECORD_WITHOUT_ACQUISITION_SOURCE_AND_NO_CONFLICTS).id eng_uuid = start('article', object_id=update_workflow_id) eng = WorkflowEngine.from_uuid(eng_uuid) obj = eng.objects[0] conflicts = obj.extra_data.get('conflicts') assert obj.status == ObjectStatus.COMPLETED assert not conflicts assert obj.extra_data.get('callback_url') is None assert obj.extra_data.get('is-update') is True assert obj.extra_data['merger_head_revision'] == 0 assert obj.extra_data['merger_original_root'] == {} # source us unknown, so no new root is saved. roots = read_all_wf_record_sources(factory.record_metadata.id) assert not roots
def test_validation_error_callback_with_a_valid(workflow_app): valid_record = { "_collections": ["Literature"], "document_type": ["article"], "titles": [{ "title": "A title" }], } workflow_id = build_workflow(valid_record).id eng_uuid = start("article", object_id=workflow_id) eng = WorkflowEngine.from_uuid(eng_uuid) obj = eng.objects[0] assert obj.status != ObjectStatus.ERROR response = do_validation_callback(workflow_app, obj.id, obj.data, obj.extra_data) expected_error_code = "WORKFLOW_NOT_IN_ERROR_STATE" data = json.loads(response.get_data()) assert response.status_code == 400 assert expected_error_code == data["error_code"]
def test_refextract_from_pdf( mocked_indexing_task, mocked_api_request_magpie, mocked_api_request_beard, mocked_is_pdf_link, mocked_package_download, mocked_arxiv_download, workflow_app, mocked_external_services ): """Test refextract from PDF and reference matching for default Configuration by going through the entire workflow.""" cited_record_json = { '$schema': 'http://localhost:5000/schemas/records/hep.json', '_collections': ['Literature'], 'arxiv_eprints': [ { 'categories': ['quant-ph', 'cond-mat.mes-hall', 'cond-mat.str-el', 'math-ph', 'math.MP'], 'value': '1308.0815' } ], 'control_number': 1000, 'document_type': ['article'], 'titles': [ { 'source': 'arXiv', 'title': 'Solving a two-electron quantum dot model in terms of polynomial solutions of a Biconfluent Heun equation' } ], } TestRecordMetadata.create_from_kwargs( json=cited_record_json, index='records-hep', pid_type='lit') citing_record, categories = insert_citing_record() extra_config = { "BEARD_API_URL": "http://example.com/beard", "MAGPIE_API_URL": "http://example.com/magpie", 'ARXIV_CATEGORIES': categories, } schema = load_schema('hep') subschema = schema['properties']['acquisition_source'] assert validate(citing_record['acquisition_source'], subschema) is None with mock.patch.dict(workflow_app.config, extra_config): workflow_id = build_workflow(citing_record).id citing_doc_workflow_uuid = start('article', object_id=workflow_id) citing_doc_eng = WorkflowEngine.from_uuid(citing_doc_workflow_uuid) citing_doc_obj = citing_doc_eng.processed_objects[0] assert citing_doc_obj.data['references'][7]['record']['$ref'] == 'http://localhost:5000/api/literature/1000' assert citing_doc_obj.data['references'][0]['raw_refs'][0]['source'] == 'arXiv'
def test_update_record_goes_through_api_version_of_store_record_without_issue( mocked_is_pdf_link, mocked_download_arxiv, mocked_api_request_beard, mocked_api_request_magpie, workflow_app, mocked_external_services, record_from_db, ): record = record_from_db workflow_id = build_workflow(record).id expected_control_number = record['control_number'] expected_head_uuid = str(record.id) with mock.patch.dict( workflow_app.config, { "FEATURE_FLAG_ENABLE_REST_RECORD_MANAGEMENT": True, "INSPIREHEP_URL": "http://web:8000" } ): with requests_mock.Mocker(real_http=True) as requests_mocker: requests_mocker.register_uri( 'PUT', '{url}/literature/{cn}'.format( url=workflow_app.config.get("INSPIREHEP_URL"), cn=expected_control_number, ), headers={'content-type': 'application/json'}, status_code=200, json={ 'metadata': { 'control_number': expected_control_number, }, 'id_': expected_head_uuid } ) eng_uuid = start("article", object_id=workflow_id) url_paths = [r.path for r in requests_mocker.request_history] url_hostnames = [r.hostname for r in requests_mocker.request_history] assert 'web' in url_hostnames assert "/literature/{cn}".format(cn=expected_control_number) in url_paths obj_id = WorkflowEngine.from_uuid(eng_uuid).objects[0].id obj = workflow_object_class.get(obj_id) assert obj.data['control_number'] == expected_control_number assert obj.extra_data["holdingpen_matches"] == [] assert obj.extra_data["previously_rejected"] is False assert not obj.extra_data.get("stopped-matched-holdingpen-wf") assert obj.extra_data["is-update"] assert obj.extra_data["exact-matched"] assert obj.extra_data["matches"]["exact"] == [record.get("control_number")] assert obj.extra_data["matches"]["approved"] == record.get("control_number") assert obj.extra_data["approved"] assert obj.status == ObjectStatus.COMPLETED
def test_match_in_holdingpen_stops_pending_wf( mocked_download_arxiv, mocked_api_request_beard, mocked_api_request_magpie, mocked_package_download, workflow_app, mocked_external_services, ): record = generate_record() eng_uuid = start('article', [record]) es.indices.refresh('holdingpen-hep') eng = WorkflowEngine.from_uuid(eng_uuid) old_wf = eng.objects[0] obj_id = old_wf.id assert old_wf.status == ObjectStatus.HALTED assert old_wf.extra_data['previously_rejected'] is False record2 = record record['titles'][0]['title'] = 'This is an update that will match the wf in the holdingpen' eng_uuid2 = start('article', [record2]) es.indices.refresh('holdingpen-hep') eng2 = WorkflowEngine.from_uuid(eng_uuid2) update_wf = eng2.objects[0] assert update_wf.status == ObjectStatus.HALTED assert update_wf.extra_data['already-in-holding-pen'] is True assert update_wf.extra_data['previously_rejected'] is False assert update_wf.extra_data['stopped-matched-holdingpen-wf'] is True assert update_wf.extra_data['is-update'] is False old_wf = workflow_object_class.get(obj_id) assert old_wf.extra_data['already-in-holding-pen'] is False assert old_wf.extra_data['previously_rejected'] is False assert old_wf.extra_data['stopped-by-wf'] == update_wf.id assert old_wf.extra_data.get('approved') is None assert old_wf.extra_data['is-update'] is False assert old_wf.status == ObjectStatus.COMPLETED
def get_halted_workflow(mocked_is_pdf_link, app, record, extra_config=None): mocked_is_pdf_link.return_value = True extra_config = extra_config or {} with mock.patch.dict(app.config, extra_config): workflow_id = build_workflow(record).id workflow_uuid = start("article", object_id=workflow_id) eng = WorkflowEngine.from_uuid(workflow_uuid) obj = eng.processed_objects[0] assert obj.status == ObjectStatus.HALTED assert obj.data_type == "hep" # Files should have been attached (tarball + pdf, and plots) assert obj.files["1407.7587.pdf"] assert obj.files["1407.7587.tar.gz"] assert len(obj.files) > 2 # A publication note should have been extracted pub_info = obj.data.get("publication_info") assert pub_info assert pub_info[0] assert pub_info[0].get("year") == 2014 assert pub_info[0].get("journal_title") == "J. Math. Phys." # A prediction should have been made prediction = obj.extra_data.get("relevance_prediction") assert prediction assert prediction["decision"] == "Non-CORE" assert prediction["scores"]["Non-CORE"] == 0.8358207729691823 expected_experiment_prediction = { "experiments": [{"label": "CMS", "score": 0.75495152473449707}] } experiments_prediction = obj.extra_data.get("experiments_prediction") assert experiments_prediction == expected_experiment_prediction keywords_prediction = obj.extra_data.get("keywords_prediction") assert keywords_prediction assert { "label": "galaxy", "score": 0.29424679279327393, "accept": True, } in keywords_prediction["keywords"] # This record should not have been touched yet assert obj.extra_data["approved"] is None return workflow_uuid, eng, obj
def test_match_in_holdingpen_previously_rejected_wf_stop( mocked_api_request_magpie, mocked_api_request_beard, mocked_package_download, mocked_is_pdf_link, mocked_download_arxiv, workflow_app, mocked_external_services, ): record = generate_record() record_workflow = build_workflow(record).id eng_uuid = start("article", object_id=record_workflow) eng = WorkflowEngine.from_uuid(eng_uuid) obj_id = eng.objects[0].id obj = workflow_object_class.get(obj_id) obj.extra_data["approved"] = False # reject record obj.continue_workflow() obj = workflow_object_class.get(obj_id) assert obj.status == ObjectStatus.COMPLETED assert obj.extra_data.get("approved") is False es.indices.refresh("holdingpen-hep") record["titles"][0][ "title" ] = "This is an update that will match the wf in the holdingpen" # this workflow matches in the holdingpen and stops because the # matched one was rejected workflow_id = build_workflow(record).id eng_uuid = start("article", object_id=workflow_id) eng = WorkflowEngine.from_uuid(eng_uuid) obj2 = eng.objects[0] assert obj2.extra_data["previously_rejected"] is True assert obj2.extra_data["previously_rejected_matches"] == [obj_id]
def test_article_workflow_continues_when_record_is_valid(workflow_app): valid_record = { "_collections": ["Literature"], "document_type": ["article"], "titles": [{"title": "A title"}], } workflow_id = build_workflow(valid_record).id eng_uuid = start("article", object_id=workflow_id) eng = WorkflowEngine.from_uuid(eng_uuid) obj = eng.objects[0] assert obj.status != ObjectStatus.ERROR assert "_error_msg" not in obj.extra_data
def get_halted_workflow(app, record, extra_config=None): extra_config = extra_config or {} with mock.patch.dict(app.config, extra_config): workflow_uuid = start('article', [record]) eng = WorkflowEngine.from_uuid(workflow_uuid) obj = eng.processed_objects[0] assert obj.status == ObjectStatus.HALTED assert obj.data_type == "hep" # Files should have been attached (tarball + pdf, and plots) assert obj.files["1407.7587.pdf"] assert obj.files["1407.7587.tar.gz"] assert len(obj.files) > 2 # A publication note should have been extracted pub_info = obj.data.get('publication_info') assert pub_info assert pub_info[0] assert pub_info[0].get('year') == 2014 assert pub_info[0].get('journal_title') == "J. Math. Phys." # A prediction should have been made prediction = obj.extra_data.get("relevance_prediction") assert prediction assert prediction['decision'] == 'Non-CORE' assert prediction['scores']['Non-CORE'] == 0.8358207729691823 # TODO: add the experiments predictions to the workflow # object (see issue #2054). experiments_prediction = obj.extra_data.get("experiments_prediction") assert experiments_prediction is None keywords_prediction = obj.extra_data.get("keywords_prediction") assert keywords_prediction assert { "label": "galaxy", "score": 0.29424679279327393, "accept": True } in keywords_prediction['keywords'] # This record should not have been touched yet assert "approved" not in obj.extra_data return workflow_uuid, eng, obj
def test_match_in_holdingpen_stops_pending_wf( mocked_api_request_magpie, mocked_api_request_beard, mocked_package_download, mocked_is_pdf_link, mocked_download_arxiv, workflow_app, mocked_external_services, ): record = generate_record() workflow_id = build_workflow(record).id eng_uuid = start("article", object_id=workflow_id) es.indices.refresh("holdingpen-hep") eng = WorkflowEngine.from_uuid(eng_uuid) old_wf = eng.objects[0] obj_id = old_wf.id assert old_wf.status == ObjectStatus.HALTED assert old_wf.extra_data["previously_rejected"] is False record2 = record record["titles"][0][ "title" ] = "This is an update that will match the wf in the holdingpen" record2_workflow = build_workflow(record2).id start("article", object_id=record2_workflow) es.indices.refresh("holdingpen-hep") update_wf = workflow_object_class.get(record2_workflow) assert update_wf.status == ObjectStatus.HALTED # As workflow stops (in error) before setting this assert update_wf.extra_data["previously_rejected"] is False assert update_wf.extra_data['already-in-holding-pen'] is True assert update_wf.extra_data["stopped-matched-holdingpen-wf"] is True assert update_wf.extra_data["is-update"] is False old_wf = workflow_object_class.get(obj_id) assert old_wf.extra_data['already-in-holding-pen'] is False assert old_wf.extra_data['previously_rejected'] is False assert old_wf.extra_data['stopped-by-wf'] == update_wf.id assert old_wf.extra_data.get('approved') is None assert old_wf.extra_data['is-update'] is False assert old_wf.status == ObjectStatus.COMPLETED
def test_delayed_execution_api(app, halt_workflow): """Test continue object task.""" with app.app_context(): data = [{'foo': 'bar'}] async_result = start.delay('halttest', data) eng = WorkflowEngine.from_uuid(async_result.get()) obj = eng.processed_objects[0] assert obj.known_statuses.WAITING == obj.status assert WorkflowStatus.HALTED == eng.status obj_id = obj.id resume.delay(obj_id) obj = WorkflowObject.get(obj_id) assert obj.known_statuses.COMPLETED == obj.status