def test_match_in_holdingpen_previously_rejected_wf_stop( mocked_download_arxiv, mocked_api_request_beard, mocked_api_request_magpie, mocked_package_download, workflow_app, mocked_external_services, ): record = generate_record() eng_uuid = start('article', [record]) eng = WorkflowEngine.from_uuid(eng_uuid) obj_id = eng.objects[0].id obj = workflow_object_class.get(obj_id) obj.extra_data["approved"] = False # reject record obj.continue_workflow() obj = workflow_object_class.get(obj_id) assert obj.status == ObjectStatus.COMPLETED assert obj.extra_data.get('approved') is False es.indices.refresh('holdingpen-hep') record['titles'][0]['title'] = 'This is an update that will match the wf in the holdingpen' # this workflow matches in the holdingpen and stops because the # matched one was rejected eng_uuid = start('article', [record]) eng = WorkflowEngine.from_uuid(eng_uuid) obj2 = eng.objects[0] assert obj2.extra_data['already-in-holding-pen'] is False assert obj2.extra_data['previously_rejected'] is True assert obj2.extra_data['previously_rejected_matches'] == [obj_id]
def test_update_exact_matched_goes_trough_the_workflow( mocked_is_pdf_link, mocked_download_arxiv, mocked_api_request_beard, mocked_api_request_magpie, workflow_app, mocked_external_services, record_from_db, ): record = record_from_db workflow_id = build_workflow(record).id eng_uuid = start("article", object_id=workflow_id) obj_id = WorkflowEngine.from_uuid(eng_uuid).objects[0].id obj = workflow_object_class.get(obj_id) assert obj.extra_data["holdingpen_matches"] == [] assert obj.extra_data["previously_rejected"] is False assert not obj.extra_data.get("stopped-matched-holdingpen-wf") assert obj.extra_data["is-update"] assert obj.extra_data["exact-matched"] assert obj.extra_data["matches"]["exact"] == [record.get("control_number")] assert obj.extra_data["matches"]["approved"] == record.get( "control_number") assert obj.extra_data["approved"] assert obj.status == ObjectStatus.COMPLETED
def start_edit_article_workflow(recid): try: record = get_db_record('lit', recid) except RecordGetterError: raise CallbackRecordNotFoundError(recid) record_permission = RecordPermission.create(action='update', record=record) if not record_permission.can(): abort(403, record_permission) # has to be done before start() since, it is deattaching this session user_id = current_user.get_id() eng_uuid = start('edit_article', data=record) workflow_id = WorkflowEngine.from_uuid(eng_uuid).objects[0].id workflow = workflow_object_class.get(workflow_id) workflow.id_user = user_id if request.referrer: base_rt_url = get_rt_link_for_ticket('').replace('?', '\?') ticket_match = re.match(base_rt_url + '(?P<ticket_id>\d+)', request.referrer) if ticket_match: ticket_id = int(ticket_match.group('ticket_id')) workflow.extra_data['curation_ticket_id'] = ticket_id workflow.save() db.session.commit() url = "{}{}".format(current_app.config['WORKFLOWS_EDITOR_API_URL'], workflow_id) return redirect(location=url, code=302)
def test_workflow_do_not_changes_to_hidden_if_record_authors_do_not_have_interesting_affiliations( mocked_refextract_extract_refs, mocked_api_request_magpie, mocked_beard_api, mocked_actions_download, mocked_is_pdf_link, mocked_arxiv_download, workflow_app, mocked_external_services, ): """Test a full harvesting workflow.""" record = generate_record() workflow_id = build_workflow(record).id with patch.dict( workflow_app.config, { 'FEATURE_FLAG_ENABLE_REST_RECORD_MANAGEMENT': True, 'INSPIREHEP_URL': "http://web:8000" }): start("article", object_id=workflow_id) wf = workflow_object_class.get(workflow_id) wf.extra_data['approved'] = True wf.save() wf.continue_workflow(delayed=False) collections_in_record = mocked_external_services.request_history[0].json( )['_collections'] assert "CDS Hidden" not in collections_in_record assert "HAL Hidden" not in collections_in_record assert "Fermilab" not in collections_in_record assert ["Literature"] == collections_in_record
def webcoll_callback(): """Handle a callback from webcoll with the record ids processed. Expects the request data to contain a list of record ids in the recids field. """ recids = dict(request.form).get('recids', []) pending_records = current_cache.get("pending_records") or dict() for rid in recids: if rid in pending_records: objectid = pending_records[rid] workflow_object = workflow_object_class.get(objectid) base_url = _get_base_url() workflow_object.extra_data['url'] = join( base_url, 'record', str(rid) ) workflow_object.extra_data['recid'] = rid workflow_object.save() db.session.commit() workflow_object.continue_workflow(delayed=True) del pending_records[rid] current_cache.set( "pending_records", pending_records, timeout=current_app.config["PENDING_RECORDS_CACHE_TIMEOUT"] ) return jsonify({"result": "success"})
def test_workflows_halts_on_multiple_exact_matches(workflow_app): # Record from arxiv with just arxiv ID in DB TestRecordMetadata.create_from_file( __name__, "multiple_matches_arxiv.json", index_name="records-hep" ) # Record from publisher with just DOI in DB TestRecordMetadata.create_from_file( __name__, "multiple_matches_publisher.json", index_name="records-hep" ) path = pkg_resources.resource_filename( __name__, "fixtures/multiple_matches_arxiv_update.json" ) update_from_arxiv = json.load(open(path)) # An update from arxiv with the same arxiv and DOI as above records workflow_id = build_workflow(update_from_arxiv).id start("article", object_id=workflow_id) obj = workflow_object_class.get(workflow_id) assert len(set(obj.extra_data["matches"]["exact"])) == 2 assert obj.status == ObjectStatus.HALTED assert obj.extra_data["_action"] == "resolve_multiple_exact_matches"
def test_responses_with_etag(workflow_app): factory = TestRecordMetadata.create_from_kwargs( json={'titles': [{'title': 'Etag version'}]} ) workflow_id = build_workflow(factory.record_metadata.json).id obj = workflow_object_class.get(workflow_id) obj.save() db.session.commit() workflow_url = '/api/holdingpen/{}'.format(obj.id) with workflow_app.test_client() as client: login_user_via_session(client, email='*****@*****.**') response = client.get(workflow_url) assert response.status_code == 200 etag = response.headers['ETag'] last_modified = response.headers['Last-Modified'] response = client.get( workflow_url, headers={'If-Modified-Since': last_modified}) assert response.status_code == 304 response = client.get(workflow_url, headers={'If-None-Match': etag}) assert response.status_code == 304 response = client.get(workflow_url, headers={'If-None-Match': 'Jessica Jones'}) assert response.status_code == 200
def test_harvesting_arxiv_workflow_manual_rejected( mocked_refextract_extract_refs, mocked_api_request_magpie, mocked_beard_api, mocked_actions_download, mocked_is_pdf_link, mocked_arxiv_download, workflow_app, mocked_external_services, ): """Test a full harvesting workflow.""" record = generate_record() extra_config = { "BEARD_API_URL": "http://example.com/beard", "MAGPIE_API_URL": "http://example.com/magpie", } workflow_uuid, eng, obj = get_halted_workflow( app=workflow_app, extra_config=extra_config, record=record ) obj.extra_data["approved"] = False obj.save() db.session.commit() eng = WorkflowEngine.from_uuid(workflow_uuid) obj = eng.processed_objects[0] obj_id = obj.id obj.continue_workflow() obj = workflow_object_class.get(obj_id) # It was rejected assert obj.status == ObjectStatus.COMPLETED assert obj.extra_data["approved"] is False
def test_validation_error_callback_with_validation_error(workflow_app): invalid_record = { "_collections": ["Literature"], "document_type": ["article"], "titles": [{"title": "A title"}], "preprint_date": "Jessica Jones", } workflow_id = build_workflow(invalid_record).id with pytest.raises(ValidationError): start("article", object_id=workflow_id) obj = workflow_object_class.get(workflow_id) assert obj.status == ObjectStatus.ERROR response = do_validation_callback(workflow_app, obj.id, obj.data, obj.extra_data) expected_message = "Validation error." expected_error_code = "VALIDATION_ERROR" data = json.loads(response.get_data()) assert response.status_code == 400 assert expected_error_code == data["error_code"] assert expected_message == data["message"] assert data["workflow"]["_extra_data"]["callback_url"] assert len(data["workflow"]["_extra_data"]["validation_errors"]) == 1
def test_article_workflow_stops_when_record_is_not_valid(workflow_app): invalid_record = { 'document_type': [ 'article', ], 'titles': [ { 'title': 'A title' }, ], } workflow_id = build_workflow(invalid_record).id with pytest.raises(ValidationError): start('article', object_id=workflow_id) obj = workflow_object_class.get(workflow_id) assert obj.status == ObjectStatus.ERROR assert '_error_msg' in obj.extra_data assert 'required' in obj.extra_data['_error_msg'] expected_url = 'http://localhost:5000/callback/workflows/resolve_validation_errors' assert expected_url == obj.extra_data['callback_url'] assert obj.extra_data['validation_errors'] assert 'message' in obj.extra_data['validation_errors'][0] assert 'path' in obj.extra_data['validation_errors'][0]
def test_harvesting_arxiv_workflow_manual_rejected( mocked_refextract_extract_refs, mocked_api_request_magpie, mocked_beard_api, mocked_actions_download, mocked_is_pdf_link, mocked_arxiv_download, workflow_app, mocked_external_services, ): """Test a full harvesting workflow.""" record = generate_record() extra_config = { "BEARD_API_URL": "http://example.com/beard", "MAGPIE_API_URL": "http://example.com/magpie", } workflow_uuid, eng, obj = get_halted_workflow( app=workflow_app, extra_config=extra_config, record=record ) obj.extra_data["approved"] = False obj.save() db.session.commit() eng = WorkflowEngine.from_uuid(workflow_uuid) obj = eng.processed_objects[0] obj_id = obj.id obj.continue_workflow() obj = workflow_object_class.get(obj_id) # It was rejected assert obj.status == ObjectStatus.COMPLETED assert obj.extra_data["approved"] is False
def test_stop_matched_holdingpen_wfs(app, simple_record): # need to run a wf in order to assign to it the wf definition and a uuid # for it obj = workflow_object_class.create( data_type='hep', **simple_record ) workflow_uuid = start('article', object_id=obj.id) eng = WorkflowEngine.from_uuid(workflow_uuid) obj = eng.processed_objects[0] obj.status = ObjectStatus.HALTED obj.save() obj_id = obj.id es.indices.refresh('holdingpen-hep') obj2 = WorkflowObject.create(data_type='hep', **simple_record) obj2_id = obj2.id match_non_completed_wf_in_holdingpen(obj2, None) assert obj2.extra_data['holdingpen_matches'] == [obj_id] stop_matched_holdingpen_wfs(obj2, None) stopped_wf = workflow_object_class.get(obj_id) assert stopped_wf.status == ObjectStatus.COMPLETED assert stopped_wf.extra_data['stopped-by-wf'] == obj2_id
def stop_matched_holdingpen_wfs(obj, eng): """Stop the matched workflow objects in the holdingpen. Stops the matched workflows in the holdingpen by replacing their steps with a new one defined on the fly, containing a ``stop`` step, and executing it. For traceability reason, these workflows are also marked as ``'stopped-by-wf'``, whose value is the current workflow's id. In the use case of harvesting twice an article, this function is involved to stop the first workflow and let the current one being processed, since it the latest metadata. Args: obj: a workflow object. eng: a workflow engine. Returns: None """ stopping_steps = [mark('stopped-by-wf', int(obj.id)), stop_processing] obj.save() for holdingpen_wf_id in obj.extra_data['holdingpen_matches']: holdingpen_wf = workflow_object_class.get(holdingpen_wf_id) holdingpen_wf_eng = WorkflowEngine.from_uuid(holdingpen_wf.id_workflow) # stop this holdingpen workflow by replacing its steps with a stop step holdingpen_wf_eng.callbacks.replace(stopping_steps) holdingpen_wf_eng.process([holdingpen_wf])
def test_harvesting_arxiv_workflow_manual_accepted( mocked_refextract_extract_refs, mocked_matching_match, mocked_api_request_magpie, mocked_api_request_beard, mocked_download_utils, mocked_download_arxiv, mocked_package_download, workflow_app, mocked_external_services, ): record = generate_record() """Test a full harvesting workflow.""" workflow_uuid, eng, obj = get_halted_workflow(app=workflow_app, record=record) do_accept_core(app=workflow_app, workflow_id=obj.id) eng = WorkflowEngine.from_uuid(workflow_uuid) obj = eng.processed_objects[0] assert obj.status == ObjectStatus.WAITING do_robotupload_callback(app=workflow_app, workflow_id=obj.id, recids=[12345]) obj = workflow_object_class.get(obj.id) assert obj.status == ObjectStatus.WAITING do_webcoll_callback(app=workflow_app, recids=[12345]) eng = WorkflowEngine.from_uuid(workflow_uuid) obj = eng.processed_objects[0] # It was accepted assert obj.status == ObjectStatus.COMPLETED assert obj.extra_data["approved"] is True
def continue_workflow_callback(): """Handle callback to continue a workflow. Expects the request data to contain a object ID in the nonce field. """ request_data = request.get_json() id_object = request_data.get("nonce", "") if id_object: callback_results = request_data.get("results", {}) workflow_object = workflow_object_class.get(id_object) if workflow_object: results = request_data.get("results", []) for result in results: status = result.get('success', False) if status: recid = result.get('recid') base_url = _get_base_url() workflow_object.extra_data['url'] = join( base_url, 'record', str(recid) ) workflow_object.extra_data['recid'] = recid # Will add the results to the engine extra_data column. workflow_object.save() db.session.commit() workflow_object.continue_workflow( delayed=True, callback_results=callback_results ) return jsonify({"result": "success"}) return jsonify({"result": "failed"})
def test_responses_with_etag(workflow_app): factory = TestRecordMetadata.create_from_kwargs( json={'titles': [{'title': 'Etag version'}]} ) workflow_id = build_workflow(factory.record_metadata.json).id obj = workflow_object_class.get(workflow_id) obj.save() db.session.commit() workflow_url = '/api/holdingpen/{}'.format(obj.id) with workflow_app.test_client() as client: login_user_via_session(client, email='*****@*****.**') response = client.get(workflow_url) assert response.status_code == 200 etag = response.headers['ETag'] last_modified = response.headers['Last-Modified'] response = client.get( workflow_url, headers={'If-Modified-Since': last_modified}) assert response.status_code == 304 response = client.get(workflow_url, headers={'If-None-Match': etag}) assert response.status_code == 304 response = client.get(workflow_url, headers={'If-None-Match': 'Jessica Jones'}) assert response.status_code == 200
def test_validation_error_callback_with_validation_error(workflow_app): invalid_record = { "_collections": ["Literature"], "document_type": ["article"], "titles": [{"title": "A title"}], "preprint_date": "Jessica Jones", } workflow_id = build_workflow(invalid_record).id with pytest.raises(ValidationError): start("article", object_id=workflow_id) obj = workflow_object_class.get(workflow_id) assert obj.status == ObjectStatus.ERROR response = do_validation_callback(workflow_app, obj.id, obj.data, obj.extra_data) expected_message = "Validation error." expected_error_code = "VALIDATION_ERROR" data = json.loads(response.get_data()) assert response.status_code == 400 assert expected_error_code == data["error_code"] assert expected_message == data["message"] assert data["workflow"]["_extra_data"]["callback_url"] assert len(data["workflow"]["_extra_data"]["validation_errors"]) == 1
def resolve_missmatch_version_with_legacy(workflow_id, legacy_revision): """Revert record revision to be the same with the legacy version. Example :: resolve_missmatch_version_with_legacy(1236029, '20180926071008.0') """ obj = workflow_object_class.get(workflow_id) record = get_db_record('lit', obj.data['control_number']) revisions = [ revision for revision in record.revisions if revision.get('legacy_version') == legacy_revision ] if not revisions: print('revision {} not found'.format(legacy_revision)) return None print('revision found.') revision = revisions.pop() record.clear() record.update(revision, skip_files=True) record.commit() obj.callback_pos = [0] obj.save() db.session.commit() response = obj.continue_workflow(delayed=True) print 'Workflow {} currently in status {}'.format(workflow_id, response.status)
def stop_matched_holdingpen_wfs(obj, eng): """Stop the matched workflow objects in the holdingpen. Stops the matched workflows in the holdingpen by replacing their steps with a new one defined on the fly, containing a ``stop`` step, and executing it. For traceability reason, these workflows are also marked as ``'stopped-by-wf'``, whose value is the current workflow's id. In the use case of harvesting twice an article, this function is involved to stop the first workflow and let the current one being processed, since it the latest metadata. Args: obj: a workflow object. eng: a workflow engine. Returns: None """ stopping_steps = [mark('stopped-by-wf', int(obj.id)), stop_processing] obj.save() for holdingpen_wf_id in obj.extra_data['holdingpen_matches']: holdingpen_wf = workflow_object_class.get(holdingpen_wf_id) holdingpen_wf_eng = WorkflowEngine.from_uuid(holdingpen_wf.id_workflow) # stop this holdingpen workflow by replacing its steps with a stop step holdingpen_wf_eng.callbacks.replace(stopping_steps) holdingpen_wf_eng.process([holdingpen_wf])
def test_conflict_creates_ticket( mocked_api_request_magpie, mocked_beard_api, workflow_app, mocked_external_services, disable_file_upload, enable_merge_on_update, ): with patch( 'inspire_json_merger.config.ArxivOnArxivOperations.conflict_filters', ['acquisition_source.source']): TestRecordMetadata.create_from_file(__name__, 'merge_record_arxiv.json', index_name='records-hep') update_workflow_id = build_workflow(RECORD_WITH_CONFLICTS).id start('article', object_id=update_workflow_id) wf = workflow_object_class.get(update_workflow_id) expected_ticket = u'content=Queue%3A+HEP_conflicts%0AText%3A+Merge+conflict+needs+to+be+resolved.%0A++%0A++https%3A%2F%2Flocalhost%3A5000%2Feditor%2Fholdingpen%2F{wf_id}%0ASubject%3A+arXiv%3A1703.04802+%28%23None%29%0Aid%3A+ticket%2Fnew%0ACF'.format( wf_id=wf.id) assert mocked_external_services.request_history[0].text.startswith( expected_ticket) assert wf.extra_data['conflict-ticket-id'] expected_ticket_close_url = 'http://rt.inspire/ticket/{ticket_id}/edit'.format( ticket_id=wf.extra_data['conflict-ticket-id']) wf.continue_workflow() assert mocked_external_services.request_history[ 1].url == expected_ticket_close_url assert mocked_external_services.request_history[ 1].text == u'content=Status%3A+resolved'
def start_edit_article_workflow(recid): try: record = get_db_record('lit', recid) except RecordGetterError: raise CallbackRecordNotFoundError(recid) record_permission = RecordPermission.create(action='update', record=record) if not record_permission.can(): abort(403, record_permission) eng_uuid = start('edit_article', data=record) workflow_id = WorkflowEngine.from_uuid(eng_uuid).objects[0].id workflow = workflow_object_class.get(workflow_id) if request.referrer: base_rt_url = get_rt_link_for_ticket('').replace('?', '\?') ticket_match = re.match(base_rt_url + '(?P<ticket_id>\d+)', request.referrer) if ticket_match: ticket_id = int(ticket_match.group('ticket_id')) workflow.extra_data['curation_ticket_id'] = ticket_id workflow.save() db.session.commit() url = "{}{}".format(current_app.config['WORKFLOWS_EDITOR_API_URL'], workflow_id) return redirect(location=url, code=302)
def test_article_workflow_stops_when_record_is_not_valid(workflow_app): invalid_record = { 'document_type': [ 'article', ], 'titles': [ {'title': 'A title'}, ], } obj = workflow_object_class.create( data=invalid_record, data_type='hep', id_user=1, ) obj_id = obj.id with pytest.raises(ValidationError): start('article', invalid_record, obj_id) obj = workflow_object_class.get(obj_id) assert obj.status == ObjectStatus.ERROR assert '_error_msg' in obj.extra_data assert 'required' in obj.extra_data['_error_msg']
def _continue_workflow(workflow_id, recid, result=None): """Small wrapper to continue a workflow. Will prepare the needed data from the record id and the result data if passed. :return: True if succeeded, False if the specified workflow id does not exist. """ result = result if result is not None else {} base_url = _get_base_url() try: workflow_object = workflow_object_class.get(workflow_id) except WorkflowsMissingObject: current_app.logger.error( 'No workflow object with the id %s could be found.', workflow_id, ) return False workflow_object.extra_data['url'] = join( base_url, 'record', str(recid) ) workflow_object.extra_data['recid'] = recid workflow_object.data['control_number'] = recid workflow_object.extra_data['callback_result'] = result workflow_object.save() db.session.commit() workflow_object.continue_workflow(delayed=True) return True
def test_stop_matched_holdingpen_wfs(app, simple_record): # need to run a wf in order to assign to it the wf definition and a uuid # for it obj = workflow_object_class.create( data_type='hep', **simple_record ) workflow_uuid = start('article', object_id=obj.id) eng = WorkflowEngine.from_uuid(workflow_uuid) obj = eng.processed_objects[0] obj.status = ObjectStatus.HALTED obj.save() obj_id = obj.id current_search.flush_and_refresh('holdingpen-hep') obj2 = WorkflowObject.create(data_type='hep', **simple_record) obj2_id = obj2.id match_non_completed_wf_in_holdingpen(obj2, None) assert obj2.extra_data['holdingpen_matches'] == [obj_id] stop_matched_holdingpen_wfs(obj2, None) stopped_wf = workflow_object_class.get(obj_id) assert stopped_wf.status == ObjectStatus.COMPLETED assert stopped_wf.extra_data['stopped-by-wf'] == obj2_id
def update_existing_workflow_object(obj, eng): """Update the data of the old object with the new data.""" from invenio_workflows import workflow_object_class holdingpen_ids = obj.extra_data.get("holdingpen_ids", []) for matched_id in holdingpen_ids: existing_obj = workflow_object_class.get(matched_id) if ( obj.data.get('acquisition_source') and existing_obj.data.get('acquisition_source') ): if ( obj.data['acquisition_source'].get('method') == existing_obj.data['acquisition_source'].get('method') ): # Method is the same, update obj existing_obj.data.update(obj.data) existing_obj.save() break else: msg = "Cannot update old object, non valid ids: {0}".format( holdingpen_ids ) obj.log.error(msg) raise Exception(msg)
def _continue_workflow(workflow_id, recid, result=None): """Small wrapper to continue a workflow. Will prepare the needed data from the record id and the result data if passed. :return: True if succeeded, False if the specified workflow id does not exist. """ result = result if result is not None else {} base_url = _get_base_url() try: workflow_object = workflow_object_class.get(workflow_id) except WorkflowsMissingObject: current_app.logger.error( 'No workflow object with the id %s could be found.', workflow_id, ) return False workflow_object.extra_data['url'] = join(base_url, 'record', str(recid)) workflow_object.extra_data['recid'] = recid workflow_object.extra_data['callback_result'] = result workflow_object.save() db.session.commit() workflow_object.continue_workflow(delayed=True) return True
def test_harvesting_arxiv_workflow_manual_accepted( mocked_refextract_extract_refs, mocked_matching_match, mocked_api_request_magpie, mocked_api_request_beard, mocked_download_utils, mocked_download_arxiv, mocked_package_download, workflow_app, mocked_external_services, ): record = generate_record() """Test a full harvesting workflow.""" workflow_uuid, eng, obj = get_halted_workflow(app=workflow_app, record=record) do_accept_core(app=workflow_app, workflow_id=obj.id) eng = WorkflowEngine.from_uuid(workflow_uuid) obj = eng.processed_objects[0] assert obj.status == ObjectStatus.WAITING do_robotupload_callback(app=workflow_app, workflow_id=obj.id, recids=[12345]) obj = workflow_object_class.get(obj.id) assert obj.status == ObjectStatus.WAITING do_webcoll_callback(app=workflow_app, recids=[12345]) eng = WorkflowEngine.from_uuid(workflow_uuid) obj = eng.processed_objects[0] # It was accepted assert obj.status == ObjectStatus.COMPLETED assert obj.extra_data["approved"] is True
def webcoll_callback(): """Handle a callback from webcoll with the record ids processed. Expects the request data to contain a list of record ids in the recids field. """ recids = dict(request.form).get('recids', []) pending_records = current_cache.get("pending_records") or dict() for rid in recids: if rid in pending_records: objectid = pending_records[rid] workflow_object = workflow_object_class.get(objectid) base_url = _get_base_url() workflow_object.extra_data['url'] = join(base_url, 'record', str(rid)) workflow_object.extra_data['recid'] = rid workflow_object.save() db.session.commit() workflow_object.continue_workflow(delayed=True) del pending_records[rid] current_cache.set( "pending_records", pending_records, timeout=current_app.config["PENDING_RECORDS_CACHE_TIMEOUT"]) return jsonify({"result": "success"})
def update_existing_workflow_object(obj, eng): """Update the data of the old object with the new data.""" from invenio_workflows import workflow_object_class holdingpen_ids = obj.extra_data.get("holdingpen_ids", []) for matched_id in holdingpen_ids: existing_obj = workflow_object_class.get(matched_id) if ( obj.data.get('acquisition_source') and existing_obj.data.get('acquisition_source') ): if ( obj.data['acquisition_source'].get('method') == existing_obj.data['acquisition_source'].get('method') ): # Method is the same, update obj existing_obj.data.update(obj.data) existing_obj.save() break else: msg = "Cannot update old object, non valid ids: {0}".format( holdingpen_ids ) obj.log.error(msg) raise Exception(msg)
def test_match_in_holdingpen_different_sources_continues( mocked_api_request_magpie, mocked_api_request_beard, mocked_package_download, mocked_is_pdf_link, mocked_download_arxiv, workflow_app, mocked_external_services, ): record = generate_record() workflow_id = build_workflow(record).id eng_uuid = start('article', object_id=workflow_id) current_search.flush_and_refresh('holdingpen-hep') eng = WorkflowEngine.from_uuid(eng_uuid) wf_to_match = eng.objects[0].id obj = workflow_object_class.get(wf_to_match) assert obj.status == ObjectStatus.HALTED # generated wf pending in holdingpen record['titles'][0]['title'] = 'This is an update that will match the wf in the holdingpen' record['acquisition_source']['source'] = 'but not the source' # this workflow matches in the holdingpen but continues because has a # different source workflow_id = build_workflow(record).id eng_uuid = start('article', object_id=workflow_id) eng = WorkflowEngine.from_uuid(eng_uuid) obj = eng.objects[0] assert obj.extra_data['already-in-holding-pen'] is True assert obj.extra_data['holdingpen_matches'] == [wf_to_match] assert obj.extra_data['previously_rejected'] is False assert not obj.extra_data.get('stopped-matched-holdingpen-wf')
def test_workflows_halts_on_multiple_exact_matches(workflow_app): # Record from arxiv with just arxiv ID in DB TestRecordMetadata.create_from_file( __name__, "multiple_matches_arxiv.json", index_name="records-hep" ) # Record from publisher with just DOI in DB TestRecordMetadata.create_from_file( __name__, "multiple_matches_publisher.json", index_name="records-hep" ) path = pkg_resources.resource_filename( __name__, "fixtures/multiple_matches_arxiv_update.json" ) update_from_arxiv = json.load(open(path)) # An update from arxiv with the same arxiv and DOI as above records workflow_id = build_workflow(update_from_arxiv).id start("article", object_id=workflow_id) obj = workflow_object_class.get(workflow_id) assert len(set(obj.extra_data["matches"]["exact"])) == 2 assert obj.status == ObjectStatus.HALTED assert obj.extra_data["_action"] == "resolve_multiple_exact_matches"
def test_workflow_restart_count_initialized_properly( mocked_api_request_magpie, mocked_api_request_beard, mocked_is_pdf_link, mocked_package_download, mocked_arxiv_download, workflow_app, mocked_external_services, ): """Test a full harvesting workflow.""" record = generate_record() with workflow_app.app_context(): obj_id = build_workflow(record).id start('article', object_id=obj_id) obj = workflow_object_class.get(obj_id) assert obj.extra_data['source_data']['persistent_data']['marks']['restart-count'] == 0 assert obj.extra_data['restart-count'] == 0 obj.callback_pos = [0] obj.save() db.session.commit() start('article', object_id=obj_id) assert obj.extra_data['source_data']['persistent_data']['marks']['restart-count'] == 1 assert obj.extra_data['restart-count'] == 1
def test_article_workflow_stops_when_record_is_not_valid(workflow_app): invalid_record = { "document_type": ["article"], "titles": [{ "title": "A title" }] } workflow_id = build_workflow(invalid_record).id with pytest.raises(ValidationError): start("article", object_id=workflow_id) obj = workflow_object_class.get(workflow_id) assert obj.status == ObjectStatus.ERROR assert "_error_msg" in obj.extra_data assert "required" in obj.extra_data["_error_msg"] expected_url = "http://localhost:5000/callback/workflows/resolve_validation_errors" assert expected_url == obj.extra_data["callback_url"] assert obj.extra_data["validation_errors"] assert "message" in obj.extra_data["validation_errors"][0] assert "path" in obj.extra_data["validation_errors"][0]
def test_match_in_holdingpen_different_sources_continues( mocked_api_request_magpie, mocked_api_request_beard, mocked_package_download, mocked_is_pdf_link, mocked_download_arxiv, workflow_app, mocked_external_services, ): record = generate_record() workflow_id = build_workflow(record).id eng_uuid = start('article', object_id=workflow_id) es.indices.refresh('holdingpen-hep') eng = WorkflowEngine.from_uuid(eng_uuid) wf_to_match = eng.objects[0].id obj = workflow_object_class.get(wf_to_match) assert obj.status == ObjectStatus.HALTED # generated wf pending in holdingpen record['titles'][0]['title'] = 'This is an update that will match the wf in the holdingpen' record['acquisition_source']['source'] = 'but not the source' # this workflow matches in the holdingpen but continues because has a # different source workflow_id = build_workflow(record).id eng_uuid = start('article', object_id=workflow_id) eng = WorkflowEngine.from_uuid(eng_uuid) obj = eng.objects[0] assert obj.extra_data['already-in-holding-pen'] is True assert obj.extra_data['holdingpen_matches'] == [wf_to_match] assert obj.extra_data['previously_rejected'] is False assert not obj.extra_data.get('stopped-matched-holdingpen-wf')
def test_harvesting_arxiv_workflow_accepted( mocked, db_only_app, record_oai_arxiv_plots): """Test a full harvesting workflow.""" from invenio_workflows import ( start, WorkflowEngine, ObjectStatus, workflow_object_class ) from dojson.contrib.marc21.utils import create_record from invenio_db import db from inspirehep.dojson.hep import hep from inspirehep.modules.converter.xslt import convert # Convert to MARCXML, then dict, then HEP JSON record_oai_arxiv_plots_marcxml = convert( record_oai_arxiv_plots, "oaiarXiv2marcxml.xsl" ) record_marc = create_record(record_oai_arxiv_plots_marcxml) record_json = hep.do(record_marc) workflow_uuid = None with db_only_app.app_context(): workflow_uuid = start('article', [record_json]) eng = WorkflowEngine.from_uuid(workflow_uuid) obj = eng.processed_objects[0] assert obj.status == ObjectStatus.HALTED assert obj.data_type == "hep" # Files should have been attached (tarball + pdf) assert obj.files["1407.7587.pdf"] assert obj.files["1407.7587.tar.gz"] # A publication note should have been extracted pub_info = obj.data.get('publication_info') assert pub_info assert pub_info[0] assert pub_info[0].get('year') == "2014" assert pub_info[0].get('journal_title') == "J. Math. Phys." # This record should not have been touched yet assert "approved" not in obj.extra_data # Now let's resolve it as accepted and continue # FIXME Should be accept, but record validation prevents us. obj.remove_action() obj.extra_data["approved"] = True obj.extra_data["core"] = True obj.save() db.session.commit() with db_only_app.app_context(): eng = WorkflowEngine.from_uuid(workflow_uuid) obj = eng.processed_objects[0] obj_id = obj.id obj.continue_workflow() obj = workflow_object_class.get(obj_id) # It was accepted assert obj.status == ObjectStatus.COMPLETED
def test_workflow_restart_count_initialized_properly( mocked_api_request_magpie, mocked_api_request_beard, mocked_is_pdf_link, mocked_package_download, mocked_arxiv_download, workflow_app, mocked_external_services, ): """Test a full harvesting workflow.""" record = generate_record() with workflow_app.app_context(): obj_id = build_workflow(record).id start('article', object_id=obj_id) obj = workflow_object_class.get(obj_id) assert obj.extra_data['source_data']['persistent_data']['marks']['restart-count'] == 0 assert obj.extra_data['restart-count'] == 0 obj.callback_pos = [0] obj.save() db.session.commit() start('article', object_id=obj_id) assert obj.extra_data['source_data']['persistent_data']['marks']['restart-count'] == 1 assert obj.extra_data['restart-count'] == 1
def test_wf_rejects_automatically_when_previous_matched_wf_was_rejected( app, celery_app_with_context, celery_session_worker, generated_record ): app.config['FEATURE_FLAG_ENABLE_UPDATE_TO_LEGACY'] = False app.config['PRODUCTION_MODE'] = False app.config['USE_SIGNALS_ON_TIMEOUT'] = False record = generated_record workflow = build_workflow(record) workflow.save() db.session.commit() wf1_id = workflow.id start.delay('article', object_id=wf1_id) es.indices.refresh('holdingpen-hep') check_wf_state(wf1_id, ObjectStatus.HALTED) wf1 = workflow_object_class.get(wf1_id) wf1.extra_data["approved"] = False wf1.continue_workflow(delayed=True) es.indices.refresh('holdingpen-hep') check_wf_state(wf1_id, ObjectStatus.COMPLETED) wf1 = workflow_object_class.get(wf1_id) assert wf1.extra_data.get("approved") is False workflow2 = build_workflow(record) workflow2.save() db.session.commit() wf2_id = workflow2.id start.delay('article', object_id=wf2_id) es.indices.refresh("holdingpen-hep") check_wf_state(wf2_id, ObjectStatus.COMPLETED) wf2 = workflow_object_class.get(wf2_id) assert wf2.extra_data["previously_rejected"] is True assert wf2.extra_data["previously_rejected_matches"] == [wf1_id]
def test_harvesting_arxiv_workflow_manual_accepted( mocked_refextract_extract_refs, mocked_matching_search, mocked_api_request_beard_block, mocked_api_request_magpie, mocked_api_request_beard, mocked_download, workflow_app, record, ): """Test a full harvesting workflow.""" with requests_mock.Mocker() as requests_mocker: requests_mocker.register_uri( requests_mock.ANY, re.compile('.*(indexer|localhost).*'), real_http=True, ) requests_mocker.register_uri( 'POST', re.compile( 'https?://localhost:1234.*', ), text=u'[INFO]', status_code=200, ) workflow_uuid, eng, obj = get_halted_workflow( app=workflow_app, extra_config={'PRODUCTION_MODE': False}, record=record, ) _do_accept_core( app=workflow_app, workflow_id=obj.id, ) eng = WorkflowEngine.from_uuid(workflow_uuid) obj = eng.processed_objects[0] assert obj.status == ObjectStatus.WAITING response = _do_robotupload_callback( app=workflow_app, workflow_id=obj.id, recids=[12345], ) assert response.status_code == 200 obj = workflow_object_class.get(obj.id) assert obj.status == ObjectStatus.WAITING response = _do_webcoll_callback(app=workflow_app, recids=[12345]) assert response.status_code == 200 eng = WorkflowEngine.from_uuid(workflow_uuid) obj = eng.processed_objects[0] # It was accepted assert obj.status == ObjectStatus.COMPLETED
def test_update_record_goes_through_api_version_of_store_record_without_issue( mocked_is_pdf_link, mocked_download_arxiv, mocked_api_request_beard, mocked_api_request_magpie, workflow_app, mocked_external_services, record_from_db, ): record = record_from_db workflow_id = build_workflow(record).id expected_control_number = record['control_number'] expected_head_uuid = str(record.id) with mock.patch.dict( workflow_app.config, { "FEATURE_FLAG_ENABLE_REST_RECORD_MANAGEMENT": True, "INSPIREHEP_URL": "http://web:8000" }): with requests_mock.Mocker(real_http=True) as requests_mocker: requests_mocker.register_uri( 'PUT', '{url}/literature/{cn}'.format( url=workflow_app.config.get("INSPIREHEP_URL"), cn=expected_control_number, ), headers={'content-type': 'application/json'}, status_code=200, json={ 'metadata': { 'control_number': expected_control_number, }, 'id_': expected_head_uuid }) eng_uuid = start("article", object_id=workflow_id) url_paths = [r.path for r in requests_mocker.request_history] url_hostnames = [ r.hostname for r in requests_mocker.request_history ] assert 'web' in url_hostnames assert "/literature/{cn}".format( cn=expected_control_number) in url_paths obj_id = WorkflowEngine.from_uuid(eng_uuid).objects[0].id obj = workflow_object_class.get(obj_id) assert obj.data['control_number'] == expected_control_number assert obj.extra_data["holdingpen_matches"] == [] assert obj.extra_data["previously_rejected"] is False assert not obj.extra_data.get("stopped-matched-holdingpen-wf") assert obj.extra_data["is-update"] assert obj.extra_data["exact-matched"] assert obj.extra_data["matches"]["exact"] == [record.get("control_number")] assert obj.extra_data["matches"]["approved"] == record.get( "control_number") assert obj.extra_data["approved"] assert obj.status == ObjectStatus.COMPLETED
def test_match_in_holdingpen_stops_pending_wf( mocked_api_request_magpie, mocked_api_request_beard, mocked_package_download, mocked_is_pdf_link, mocked_download_arxiv, workflow_app, mocked_external_services, ): record = generate_record() workflow_id = build_workflow(record).id eng_uuid = start("article", object_id=workflow_id) es.indices.refresh("holdingpen-hep") eng = WorkflowEngine.from_uuid(eng_uuid) old_wf = eng.objects[0] obj_id = old_wf.id assert old_wf.status == ObjectStatus.HALTED assert old_wf.extra_data["previously_rejected"] is False record2 = record record["titles"][0][ "title" ] = "This is an update that will match the wf in the holdingpen" record2_workflow = build_workflow(record2).id start("article", object_id=record2_workflow) es.indices.refresh("holdingpen-hep") update_wf = workflow_object_class.get(record2_workflow) assert update_wf.status == ObjectStatus.HALTED # As workflow stops (in error) before setting this assert update_wf.extra_data["previously_rejected"] is False assert update_wf.extra_data['already-in-holding-pen'] is True assert update_wf.extra_data["stopped-matched-holdingpen-wf"] is True assert update_wf.extra_data["is-update"] is False old_wf = workflow_object_class.get(obj_id) assert old_wf.extra_data['already-in-holding-pen'] is False assert old_wf.extra_data['previously_rejected'] is False assert old_wf.extra_data['stopped-by-wf'] == update_wf.id assert old_wf.extra_data.get('approved') is None assert old_wf.extra_data['is-update'] is False assert old_wf.status == ObjectStatus.COMPLETED
def test_match_in_holdingpen_stops_pending_wf( mocked_api_request_magpie, mocked_api_request_beard, mocked_package_download, mocked_is_pdf_link, mocked_download_arxiv, workflow_app, mocked_external_services, ): record = generate_record() workflow_id = build_workflow(record).id eng_uuid = start("article", object_id=workflow_id) current_search.flush_and_refresh("holdingpen-hep") eng = WorkflowEngine.from_uuid(eng_uuid) old_wf = eng.objects[0] obj_id = old_wf.id assert old_wf.status == ObjectStatus.HALTED assert old_wf.extra_data["previously_rejected"] is False record2 = record record["titles"][0][ "title" ] = "This is an update that will match the wf in the holdingpen" record2_workflow = build_workflow(record2).id start("article", object_id=record2_workflow) current_search.flush_and_refresh("holdingpen-hep") update_wf = workflow_object_class.get(record2_workflow) assert update_wf.status == ObjectStatus.HALTED # As workflow stops (in error) before setting this assert update_wf.extra_data["previously_rejected"] is False assert update_wf.extra_data['already-in-holding-pen'] is True assert update_wf.extra_data["stopped-matched-holdingpen-wf"] is True assert update_wf.extra_data["is-update"] is False old_wf = workflow_object_class.get(obj_id) assert old_wf.extra_data['already-in-holding-pen'] is False assert old_wf.extra_data['previously_rejected'] is False assert old_wf.extra_data['stopped-by-wf'] == update_wf.id assert old_wf.extra_data.get('approved') is None assert old_wf.extra_data['is-update'] is False assert old_wf.status == ObjectStatus.COMPLETED
def test_harvesting_arxiv_workflow_accepted(mocked, small_app, record_oai_arxiv_plots): """Test a full harvesting workflow.""" from invenio_workflows import (start, WorkflowEngine, ObjectStatus, workflow_object_class) from dojson.contrib.marc21.utils import create_record from invenio_db import db from inspirehep.dojson.hep import hep from inspirehep.modules.converter.xslt import convert # Convert to MARCXML, then dict, then HEP JSON record_oai_arxiv_plots_marcxml = convert(record_oai_arxiv_plots, "oaiarXiv2marcxml.xsl") record_marc = create_record(record_oai_arxiv_plots_marcxml) record_json = hep.do(record_marc) workflow_uuid = None with small_app.app_context(): workflow_uuid = start('article', [record_json]) eng = WorkflowEngine.from_uuid(workflow_uuid) obj = eng.processed_objects[0] assert obj.status == ObjectStatus.HALTED assert obj.data_type == "hep" # Files should have been attached (tarball + pdf) assert obj.files["1407.7587.pdf"] assert obj.files["1407.7587.tar.gz"] # A publication note should have been extracted pub_info = obj.data.get('publication_info') assert pub_info assert pub_info[0] assert pub_info[0].get('year') == "2014" assert pub_info[0].get('journal_title') == "J. Math. Phys." # This record should not have been touched yet assert "approved" not in obj.extra_data # Now let's resolve it as accepted and continue # FIXME Should be accept, but record validation prevents us. obj.remove_action() obj.extra_data["approved"] = True obj.extra_data["core"] = True obj.save() db.session.commit() with small_app.app_context(): eng = WorkflowEngine.from_uuid(workflow_uuid) obj = eng.processed_objects[0] obj_id = obj.id obj.continue_workflow() obj = workflow_object_class.get(obj_id) # It was accepted assert obj.status == ObjectStatus.COMPLETED
def test_manual_merge_existing_records(workflow_app): json_head = fake_record('This is the HEAD', 1) json_update = fake_record('While this is the update', 2) # this two fields will create a merging conflict json_head['core'] = True json_update['core'] = False head = InspireRecord.create_or_update(json_head, skip_files=False) head.commit() update = InspireRecord.create_or_update(json_update, skip_files=False) update.commit() head_id = head.id update_id = update.id obj_id = start_merger( head_id=1, update_id=2, current_user_id=1, ) do_resolve_manual_merge_wf(workflow_app, obj_id) # retrieve it again, otherwise Detached Instance Error obj = workflow_object_class.get(obj_id) assert obj.status == ObjectStatus.COMPLETED assert obj.extra_data['approved'] is True assert obj.extra_data['auto-approved'] is False # no root present before last_root = read_wf_record_source(head_id, 'arxiv') assert last_root is None update_source = LiteratureReader(update).source root_update = read_wf_record_source(update_id, update_source) assert root_update is None # check that head's content has been replaced by merged deleted_record = RecordMetadata.query.filter_by(id=update_id).one() latest_record = get_db_record('lit', 1) assert deleted_record.json['deleted'] is True # check deleted record is linked in the latest one deleted_rec_ref = {'$ref': 'http://localhost:5000/api/literature/2'} assert [deleted_rec_ref] == latest_record['deleted_records'] # check the merged record is linked in the deleted one new_record_metadata = {'$ref': 'http://localhost:5000/api/literature/1'} assert new_record_metadata == deleted_record.json['new_record'] del latest_record['deleted_records'] assert latest_record == obj.data # -> resulted merged record
def test_manual_merge_existing_records(workflow_app): json_head = fake_record('This is the HEAD', 1) json_update = fake_record('While this is the update', 2) # this two fields will create a merging conflict json_head['core'] = True json_update['core'] = False head = InspireRecord.create_or_update(json_head, skip_files=False) head.commit() update = InspireRecord.create_or_update(json_update, skip_files=False) update.commit() head_id = head.id update_id = update.id obj_id = start_merger( head_id=1, update_id=2, current_user_id=1, ) do_resolve_manual_merge_wf(workflow_app, obj_id) # retrieve it again, otherwise Detached Instance Error obj = workflow_object_class.get(obj_id) assert obj.status == ObjectStatus.COMPLETED assert obj.extra_data['approved'] is True assert obj.extra_data['auto-approved'] is False # no root present before last_root = read_wf_record_source(head_id, 'arxiv') assert last_root is None update_source = LiteratureReader(update).source root_update = read_wf_record_source(update_id, update_source) assert root_update is None # check that head's content has been replaced by merged deleted_record = RecordMetadata.query.filter_by(id=update_id).one() latest_record = get_db_record('lit', 1) assert deleted_record.json['deleted'] is True # check deleted record is linked in the latest one deleted_rec_ref = {'$ref': 'http://localhost:5000/api/literature/2'} assert [deleted_rec_ref] == latest_record['deleted_records'] # check the merged record is linked in the deleted one new_record_metadata = {'$ref': 'http://localhost:5000/api/literature/1'} assert new_record_metadata == deleted_record.json['new_record'] del latest_record['deleted_records'] assert latest_record == obj.data # -> resulted merged record
def test_harvesting_arxiv_workflow_manual_accepted( mocked_refextract_extract_refs, mocked_matching_search, mocked_api_request_magpie, mocked_api_request_beard, mocked_download_utils, mocked_download_arxiv, workflow_app, ): record = generate_record() """Test a full harvesting workflow.""" with requests_mock.Mocker() as requests_mocker: requests_mocker.register_uri( requests_mock.ANY, re.compile('.*(indexer|localhost).*'), real_http=True, ) requests_mocker.register_uri( 'POST', re.compile('https?://localhost:1234.*', ), text=u'[INFO]', status_code=200, ) workflow_uuid, eng, obj = get_halted_workflow( app=workflow_app, extra_config={'PRODUCTION_MODE': False}, record=record, ) do_accept_core( app=workflow_app, workflow_id=obj.id, ) eng = WorkflowEngine.from_uuid(workflow_uuid) obj = eng.processed_objects[0] assert obj.status == ObjectStatus.WAITING response = do_robotupload_callback( app=workflow_app, workflow_id=obj.id, recids=[12345], ) assert response.status_code == 200 obj = workflow_object_class.get(obj.id) assert obj.status == ObjectStatus.WAITING response = do_webcoll_callback(app=workflow_app, recids=[12345]) assert response.status_code == 200 eng = WorkflowEngine.from_uuid(workflow_uuid) obj = eng.processed_objects[0] # It was accepted assert obj.status == ObjectStatus.COMPLETED
def test_update_author_submit_with_required_fields(mock_start, workflow_app, mocked_external_services): data = { "data": { "$schema": "http://*****:*****@gmail.com", "datetime": "2019-02-04T10:06:34.695915", "method": "submitter", "submission_number": "None", "internal_uid": 1, }, "name": { "value": "Martinez, Diegpo" }, "status": "active", "control_number": 3 } } with workflow_app.test_client() as client: headers = {"Authorization": "Bearer " + current_app.config["AUTHENTICATION_TOKEN"]} response = client.post('/workflows/authors', data=json.dumps(data), content_type='application/json', headers=headers) assert response.status_code == 200 workflow_object_id = json.loads(response.data).get('workflow_object_id') assert workflow_object_id is not None obj = workflow_object_class.get(workflow_object_id) mock_start.delay.assert_called_once_with("author", object_id=workflow_object_id) expected = { "status": "active", "$schema": "http://*****:*****@gmail.com", "submission_number": "1", "datetime": "2019-02-04T10:06:34.695915" }, "_collections": [ "Authors" ], "name": { "value": "Martinez, Diegpo" }, "control_number": 3 } assert expected == obj.data assert obj.extra_data['is-update'] is True
def test_update_author_submit_with_required_fields(mock_start, workflow_app, mocked_external_services): data = { "data": { "$schema": "http://*****:*****@gmail.com", "datetime": "2019-02-04T10:06:34.695915", "method": "submitter", "submission_number": "None", "internal_uid": 1, }, "name": { "value": "Martinez, Diegpo" }, "status": "active", "control_number": 3 } } with workflow_app.test_client() as client: headers = {"Authorization": "Bearer " + current_app.config["AUTHENTICATION_TOKEN"]} response = client.post('/workflows/authors', data=json.dumps(data), content_type='application/json', headers=headers) assert response.status_code == 200 workflow_object_id = json.loads(response.data).get('workflow_object_id') assert workflow_object_id is not None obj = workflow_object_class.get(workflow_object_id) mock_start.delay.assert_called_once_with("author", object_id=workflow_object_id) expected = { "status": "active", "$schema": "http://*****:*****@gmail.com", "submission_number": "1", "datetime": "2019-02-04T10:06:34.695915" }, "_collections": [ "Authors" ], "name": { "value": "Martinez, Diegpo" }, "control_number": 3 } assert expected == obj.data assert obj.extra_data['is-update'] is True
def test_cli_delete_edit_article_workflows(app_cli_runner): wf_to_be_deleted = build_workflow({}, data_type='hep') wf_to_be_deleted.save() start('edit_article', object_id=wf_to_be_deleted.id) wf_to_be_deleted = workflow_object_class.get(wf_to_be_deleted.id) wf_to_be_deleted.status = ObjectStatus.WAITING wf_to_be_deleted.created = datetime.datetime(2020, 7, 8, 12, 31, 8, 299777) wf_to_be_deleted.save() wf_in_error = build_workflow({}, data_type='hep') wf_in_error.status = ObjectStatus.ERROR wf_in_error.extra_data["_error_msg"] = "Error in WebColl" wf_in_error.created = datetime.datetime(2020, 7, 8, 12, 31, 8, 299777) wf_in_error.save() recent_wf = build_workflow({}, data_type='hep') recent_wf.save() start('edit_article', object_id=recent_wf.id) recent_wf = workflow_object_class.get(recent_wf.id) recent_wf.status = ObjectStatus.WAITING recent_wf.created = datetime.datetime(2020, 7, 11, 12, 31, 8, 299777) recent_wf.save() indices = ['holdingpen-hep'] es.indices.refresh(indices) es_result = es.search(indices) assert es_result['hits']['total']['value'] == 3 wf_count = WorkflowObjectModel.query.count() assert wf_count == 3 result = app_cli_runner.invoke(workflows, ['delete_edit_article_older_than']) assert "Found 1 workflows to delete older than 48 hours" in result.output_bytes es.indices.refresh(indices) es_result = es.search(indices) assert es_result['hits']['total']['value'] == 2 wf_count = WorkflowObjectModel.query.count() assert wf_count == 2 assert WorkflowObjectModel.query.filter_by( id=wf_to_be_deleted.id).one_or_none() is None
def test_authors_workflow_continues_when_record_is_valid(workflow_app, mocked_external_services): valid_record = { '_collections': ['Authors'], 'name': { 'preferred_name': 'John Smith', 'value': 'Smith, John' } } workflow_id = build_workflow(valid_record, data_type='authors', id_user=1).id obj = workflow_object_class.get(workflow_id) start('author', object_id=obj.id) obj = workflow_object_class.get(obj.id) assert obj.status == ObjectStatus.HALTED assert '_error_msg' not in obj.extra_data
def test_update_record_goes_through_api_version_of_store_record_without_issue( mocked_is_pdf_link, mocked_download_arxiv, mocked_api_request_beard, mocked_api_request_magpie, workflow_app, mocked_external_services, record_from_db, ): record = record_from_db workflow_id = build_workflow(record).id expected_control_number = record['control_number'] expected_head_uuid = str(record.id) with mock.patch.dict( workflow_app.config, { "FEATURE_FLAG_ENABLE_REST_RECORD_MANAGEMENT": True, "INSPIREHEP_URL": "http://web:8000" } ): with requests_mock.Mocker(real_http=True) as requests_mocker: requests_mocker.register_uri( 'PUT', '{url}/literature/{cn}'.format( url=workflow_app.config.get("INSPIREHEP_URL"), cn=expected_control_number, ), headers={'content-type': 'application/json'}, status_code=200, json={ 'metadata': { 'control_number': expected_control_number, }, 'id_': expected_head_uuid } ) eng_uuid = start("article", object_id=workflow_id) url_paths = [r.path for r in requests_mocker.request_history] url_hostnames = [r.hostname for r in requests_mocker.request_history] assert 'web' in url_hostnames assert "/literature/{cn}".format(cn=expected_control_number) in url_paths obj_id = WorkflowEngine.from_uuid(eng_uuid).objects[0].id obj = workflow_object_class.get(obj_id) assert obj.data['control_number'] == expected_control_number assert obj.extra_data["holdingpen_matches"] == [] assert obj.extra_data["previously_rejected"] is False assert not obj.extra_data.get("stopped-matched-holdingpen-wf") assert obj.extra_data["is-update"] assert obj.extra_data["exact-matched"] assert obj.extra_data["matches"]["exact"] == [record.get("control_number")] assert obj.extra_data["matches"]["approved"] == record.get("control_number") assert obj.extra_data["approved"] assert obj.status == ObjectStatus.COMPLETED
def test_wf_rejects_automatically_when_previous_matched_wf_was_rejected( app, celery_app_with_context, celery_session_worker, generated_record): app.config['FEATURE_FLAG_ENABLE_UPDATE_TO_LEGACY'] = False app.config['PRODUCTION_MODE'] = False app.config['USE_SIGNALS_ON_TIMEOUT'] = False record = generated_record workflow = build_workflow(record) workflow.save() db.session.commit() wf1_id = workflow.id start.delay('article', object_id=wf1_id) es.indices.refresh('holdingpen-hep') check_wf_state(wf1_id, ObjectStatus.HALTED) wf1 = workflow_object_class.get(wf1_id) wf1.extra_data["approved"] = False wf1.continue_workflow(delayed=True) es.indices.refresh('holdingpen-hep') check_wf_state(wf1_id, ObjectStatus.COMPLETED) wf1 = workflow_object_class.get(wf1_id) assert wf1.extra_data.get("approved") is False workflow2 = build_workflow(record) workflow2.save() db.session.commit() wf2_id = workflow2.id start.delay('article', object_id=wf2_id) es.indices.refresh("holdingpen-hep") check_wf_state(wf2_id, ObjectStatus.COMPLETED) wf2 = workflow_object_class.get(wf2_id) assert wf2.extra_data["previously_rejected"] is True assert wf2.extra_data["previously_rejected_matches"] == [wf1_id]
def restart_workflow(workflow_id, position=[0]): wf = workflow_object_class.get(workflow_id) print 'Workflow {} is currently in position {}'.format( workflow_id, wf.callback_pos) wf.callback_pos = position wf.save() db.session.commit() res = wf.continue_workflow(delayed=True) print 'Workflow {} currently in status {}'.format(workflow_id, res.status)
def remove_references(workflow_id): wf = workflow_object_class.get(workflow_id) print 'Workflow {} is currently in position {}'.format( workflow_id, wf.callback_pos) # Note that an empty list is not schema compliant. del wf.data['references'] wf.save() db.session.commit() res = wf.continue_workflow(start_point='restart_task', delayed=True) print 'Workflow {} currently in status {}'.format(workflow_id, res.status)
def remove_references(workflow_id): wf = workflow_object_class.get(workflow_id) print 'Workflow {} is currently in position {}'.format(workflow_id, wf.callback_pos) # Note that an empty list is not schema compliant. del wf.data['references'] wf.save() db.session.commit() res = wf.continue_workflow(start_point='restart_task', delayed=True) print 'Workflow {} currently in status {}'.format(workflow_id, res.status)
def restart_workflow(workflow_id, position=[0]): wf = workflow_object_class.get(workflow_id) print 'Workflow {} is currently in position {}'.format(workflow_id, wf.callback_pos) wf.callback_pos = position wf.save() db.session.commit() res = wf.continue_workflow(delayed=True) print 'Workflow {} currently in status {}'.format(workflow_id, res.status)
def test_edit_article_view_sets_user_id(workflow_api_client): user = User.query.filter_by(email='*****@*****.**').one() login_user_via_session(workflow_api_client, user=user) factory = TestRecordMetadata.create_from_kwargs(json={}) control_number = factory.record_metadata.json['control_number'] endpoint_url = "/workflows/edit_article/{}".format(control_number) response = workflow_api_client.get(endpoint_url) wflw_id = response.headers['Location'].split('/')[-1] wflw = workflow_object_class.get(wflw_id) assert wflw.id_user == int(user.get_id())
def test_update_author_submit_with_required_fields(api_client): data = { "data": { "_collections": [ "Authors" ], "acquisition_source": { "email": "*****@*****.**", "datetime": "2019-02-04T10:06:34.695915", "method": "submitter", "submission_number": "None", "internal_uid": 1, }, "name": { "value": "Martinez, Diegpo" }, "status": "active", "control_number": 3 } } response = api_client.post('/workflows/authors', data=json.dumps(data), content_type='application/json') assert response.status_code == 200 workflow_object_id = json.loads(response.data).get('workflow_object_id') assert workflow_object_id is not None obj = workflow_object_class.get(workflow_object_id) expected = { "status": "active", "$schema": "http://*****:*****@gmail.com", "submission_number": "1", "datetime": "2019-02-04T10:06:34.695915" }, "_collections": [ "Authors" ], "name": { "value": "Martinez, Diegpo" }, "control_number": 3 } assert expected == obj.data assert obj.extra_data['is-update'] is True
def _get_wfs_same_source(obj, eng): current_source = get_value(obj.data, 'acquisition_source.source').lower() try: workflows = obj.extra_data[extra_data_key] except KeyError: workflows = [] for wf_id in workflows: wf = workflow_object_class.get(wf_id) wf_source = get_value(wf.data, 'acquisition_source.source').lower() if wf_source == current_source: return True return False
def test_match_in_holdingpen_previously_rejected_wf_stop( mocked_api_request_magpie, mocked_api_request_beard, mocked_package_download, mocked_is_pdf_link, mocked_download_arxiv, workflow_app, mocked_external_services, ): record = generate_record() record_workflow = build_workflow(record).id eng_uuid = start("article", object_id=record_workflow) eng = WorkflowEngine.from_uuid(eng_uuid) obj_id = eng.objects[0].id obj = workflow_object_class.get(obj_id) obj.extra_data["approved"] = False # reject record obj.continue_workflow() obj = workflow_object_class.get(obj_id) assert obj.status == ObjectStatus.COMPLETED assert obj.extra_data.get("approved") is False es.indices.refresh("holdingpen-hep") record["titles"][0][ "title" ] = "This is an update that will match the wf in the holdingpen" # this workflow matches in the holdingpen and stops because the # matched one was rejected workflow_id = build_workflow(record).id eng_uuid = start("article", object_id=workflow_id) eng = WorkflowEngine.from_uuid(eng_uuid) obj2 = eng.objects[0] assert obj2.extra_data["previously_rejected"] is True assert obj2.extra_data["previously_rejected_matches"] == [obj_id]