def submit(): """Get form data and start workflow.""" form = LiteratureForm(formdata=request.form) visitor = DataExporter() visitor.visit(form) workflow_object = workflow_object_class.create( data={}, id_user=current_user.get_id(), data_type="hep") workflow_object.extra_data['formdata'] = copy.deepcopy(visitor.data) visitor.data = normalize_formdata(workflow_object, visitor.data) workflow_object.data = formdata_to_model(workflow_object, visitor.data) workflow_object.extra_data['source_data'] = { 'extra_data': copy.deepcopy(workflow_object.extra_data), 'data': copy.deepcopy(workflow_object.data), } workflow_object.save() db.session.commit() # Start workflow. delayed=True will execute the workflow in the # background using, for example, Celery. start.delay("article", object_id=workflow_object.id) if 'chapter' in visitor.data.get( 'type_of_doc') and not visitor.data.get('parent_book'): return redirect(url_for('.success_book_parent')) else: return redirect(url_for('.success'))
def start_author_workflow(): submission_data = request.get_json()['data'] workflow_object = workflow_object_class.create( data={}, # can be changed to get the user id from the current user once we implement authentication id_user=submission_data['acquisition_source']['internal_uid'], data_type='authors' ) submission_data['acquisition_source']['submission_number'] = str(workflow_object.id) workflow_object.data = submission_data workflow_object.extra_data['is-update'] = bool(submission_data.get('control_number')) workflow_object.extra_data['source_data'] = { 'data': copy.deepcopy(workflow_object.data), 'extra_data': copy.deepcopy(workflow_object.extra_data) } workflow_object.save() db.session.commit() workflow_object_id = workflow_object.id start.delay( 'author', object_id=workflow_object.id) return jsonify({'workflow_object_id': workflow_object_id})
def submitupdate(): """Form action handler for INSPIRE author update form.""" form = AuthorUpdateForm(formdata=request.form, is_update=True) visitor = DataExporter() visitor.visit(form) workflow_object = workflow_object_class.create( data={}, id_user=current_user.get_id(), data_type="authors" ) workflow_object.extra_data['formdata'] = copy.deepcopy(visitor.data) workflow_object.extra_data['is-update'] = True workflow_object.data = formdata_to_model(workflow_object, visitor.data) workflow_object.save() db.session.commit() # Start workflow. delay will execute the workflow in the background start.delay("author", object_id=workflow_object.id) ctx = { "inspire_url": get_inspire_url(visitor.data) } return render_template('authors/forms/update_success.html', **ctx)
def start_workflow_for_literature_submission(): json = request.get_json() submission_data = json['data'] workflow_object = workflow_object_class.create( data={}, id_user=submission_data['acquisition_source']['internal_uid'], data_type="hep") submission_data['acquisition_source']['submission_number'] = str( workflow_object.id) workflow_object.data = submission_data workflow_object.extra_data['formdata'] = json['form_data'] # Add submission pdf from formdata.url form_url = workflow_object.extra_data['formdata'].get('url') if form_url and 'arxiv.org' not in form_url: workflow_object.extra_data['submission_pdf'] = form_url # Remember that source_data should be created at the end, with all fields already # filled. As first step in WF will overwrite everything with data form source_data workflow_object.extra_data['source_data'] = { 'extra_data': copy.deepcopy(workflow_object.extra_data), 'data': copy.deepcopy(workflow_object.data), } workflow_object.save() db.session.commit() workflow_object_id = workflow_object.id start.delay("article", object_id=workflow_object.id) return jsonify({'workflow_object_id': workflow_object_id})
def start_workflow_for_literature_submission(): json = request.get_json() submission_data = json['data'] workflow_object = workflow_object_class.create( data={}, id_user=submission_data['acquisition_source']['internal_uid'], data_type="hep") submission_data['acquisition_source']['submission_number'] = str( workflow_object.id) workflow_object.data = submission_data workflow_object.extra_data['formdata'] = json['form_data'] workflow_object.extra_data['source_data'] = { 'extra_data': copy.deepcopy(workflow_object.extra_data), 'data': copy.deepcopy(workflow_object.data), } workflow_object.save() db.session.commit() workflow_object_id = workflow_object.id start.delay("article", object_id=workflow_object.id) return jsonify({'workflow_object_id': workflow_object_id})
def spawn_arXiv_workflow_from_oai_harvest(request, records, name, **kwargs): """Receive a list of harvested arXiv records and schedule workflow.""" from flask import current_app from invenio_workflows import start, workflows if request.endpoint not in ARXIV_URLS: # This is not arXiv return spider = kwargs.get('spider') workflow = kwargs.get('workflow') if spider or workflow: # Taken care of by inspire-crawler return workflow = "article" if workflow not in workflows: current_app.logger.warning( "{0} not in available workflows. Skipping workflow {1}.".format( workflow, name ) ) return for record in records: recxml = six.text_type(record) marcxml = convert(recxml, "oaiarXiv2marcxml.xsl") record = create_record(marcxml) hep_record = hep.do(record) start.delay(workflow, data=[hep_record])
def test_wf_replaces_old_workflow_which_is_in_halted_state( app, celery_app_with_context, celery_session_worker, generated_record): app.config['FEATURE_FLAG_ENABLE_UPDATE_TO_LEGACY'] = False app.config['PRODUCTION_MODE'] = False app.config['USE_SIGNALS_ON_TIMEOUT'] = False record = generated_record workflow = build_workflow(record) workflow.save() db.session.commit() wf1_id = workflow.id start.delay('article', object_id=wf1_id) es.indices.refresh('holdingpen-hep') check_wf_state(wf1_id, ObjectStatus.HALTED) workflow2 = build_workflow(record) workflow2.save() db.session.commit() wf2_id = workflow2.id es.indices.refresh('holdingpen-hep') check_wf_state(wf1_id, ObjectStatus.HALTED) check_wf_state(wf2_id, ObjectStatus.INITIAL) start.delay('article', object_id=wf2_id) es.indices.refresh('holdingpen-hep') check_wf_state(wf1_id, ObjectStatus.COMPLETED) check_wf_state(wf2_id, ObjectStatus.HALTED)
def submitnew(): """Form action handler for INSPIRE author new form.""" form = AuthorUpdateForm(formdata=request.form) visitor = DataExporter() visitor.visit(form) workflow_object = workflow_object_class.create( data={}, id_user=current_user.get_id(), data_type="authors" ) workflow_object.extra_data['formdata'] = copy.deepcopy(visitor.data) workflow_object.data = formdata_to_model(workflow_object, visitor.data) workflow_object.save() db.session.commit() # Start workflow. delayed=True will execute the workflow in the # background using, for example, Celery. start.delay("author", object_id=workflow_object.id) ctx = { "inspire_url": get_inspire_url(visitor.data) } return render_template('authors/forms/new_success.html', **ctx)
def submitnew(): """Form action handler for INSPIRE author new form.""" form = AuthorUpdateForm(formdata=request.form) visitor = DataExporter() visitor.visit(form) workflow_object = workflow_object_class.create( data={}, id_user=current_user.get_id(), data_type="authors" ) workflow_object.extra_data['formdata'] = copy.deepcopy(visitor.data) workflow_object.extra_data['is-update'] = False workflow_object.data = formdata_to_model(workflow_object, visitor.data) workflow_object.extra_data['source_data'] = { 'extra_data': copy.deepcopy(workflow_object.extra_data), 'data': copy.deepcopy(workflow_object.data), } workflow_object.save() db.session.commit() # Start workflow. delayed=True will execute the workflow in the # background using, for example, Celery. start.delay("author", object_id=workflow_object.id) ctx = { "inspire_url": get_inspire_url(visitor.data) } return render_template('authors/forms/new_success.html', **ctx)
def start_workflow_for_literature_submission(): json = request.get_json() submission_data = json['data'] workflow_object = workflow_object_class.create( data={}, id_user=submission_data['acquisition_source']['internal_uid'], data_type="hep" ) submission_data['acquisition_source']['submission_number'] = str(workflow_object.id) workflow_object.data = submission_data workflow_object.extra_data['formdata'] = json['form_data'] workflow_object.extra_data['source_data'] = { 'extra_data': copy.deepcopy(workflow_object.extra_data), 'data': copy.deepcopy(workflow_object.data), } workflow_object.save() db.session.commit() workflow_object_id = workflow_object.id start.delay("article", object_id=workflow_object.id) return jsonify({'workflow_object_id': workflow_object_id})
def submit(): """Get form data and start workflow.""" form = LiteratureForm(formdata=request.form) visitor = DataExporter() visitor.visit(form) workflow_object = workflow_object_class.create( data={}, id_user=current_user.get_id(), data_type="hep" ) workflow_object.extra_data['formdata'] = copy.deepcopy(visitor.data) visitor.data = normalize_formdata(workflow_object, visitor.data) workflow_object.data = formdata_to_model(workflow_object, visitor.data) workflow_object.extra_data['source_data'] = { 'extra_data': copy.deepcopy(workflow_object.extra_data), 'data': copy.deepcopy(workflow_object.data), } workflow_object.save() db.session.commit() # Start workflow. delayed=True will execute the workflow in the # background using, for example, Celery. start.delay("article", object_id=workflow_object.id) if 'chapter' in visitor.data.get('type_of_doc') and not visitor.data.get('parent_book'): return redirect(url_for('.success_book_parent')) else: return redirect(url_for('.success'))
def submitupdate(): """Form action handler for INSPIRE author update form.""" form = AuthorUpdateForm(formdata=request.form) visitor = DataExporter() visitor.visit(form) workflow_object = WorkflowObject.create_object( id_user=current_user.get_id()) workflow_object.data = visitor.data workflow_object.save() db.session.commit() # Start workflow. delay will execute the workflow in the background start.delay("authorupdate", object_id=workflow_object.id) ctx = {"inspire_url": get_inspire_url(visitor.data)} return render_template('authors/forms/update_success.html', **ctx)
def test_wf_rejects_automatically_when_previous_matched_wf_was_rejected( app, celery_app_with_context, celery_session_worker, generated_record ): app.config['FEATURE_FLAG_ENABLE_UPDATE_TO_LEGACY'] = False app.config['PRODUCTION_MODE'] = False app.config['USE_SIGNALS_ON_TIMEOUT'] = False record = generated_record workflow = build_workflow(record) workflow.save() db.session.commit() wf1_id = workflow.id start.delay('article', object_id=wf1_id) es.indices.refresh('holdingpen-hep') check_wf_state(wf1_id, ObjectStatus.HALTED) wf1 = workflow_object_class.get(wf1_id) wf1.extra_data["approved"] = False wf1.continue_workflow(delayed=True) es.indices.refresh('holdingpen-hep') check_wf_state(wf1_id, ObjectStatus.COMPLETED) wf1 = workflow_object_class.get(wf1_id) assert wf1.extra_data.get("approved") is False workflow2 = build_workflow(record) workflow2.save() db.session.commit() wf2_id = workflow2.id start.delay('article', object_id=wf2_id) es.indices.refresh("holdingpen-hep") check_wf_state(wf2_id, ObjectStatus.COMPLETED) wf2 = workflow_object_class.get(wf2_id) assert wf2.extra_data["previously_rejected"] is True assert wf2.extra_data["previously_rejected_matches"] == [wf1_id]
def submit(): """Get form data and start workflow.""" form = LiteratureForm(formdata=request.form) visitor = DataExporter() visitor.visit(form) workflow_object = WorkflowObject.create_object( id_user=current_user.get_id()) workflow_object.data = visitor.data workflow_object.save() db.session.commit() # Start workflow. delayed=True will execute the workflow in the # background using, for example, Celery. start.delay("literature", object_id=workflow_object.id) return redirect(url_for('.success'))
def submit(): """Get form data and start workflow.""" form = LiteratureForm(formdata=request.form) visitor = DataExporter() visitor.visit(form) workflow_object = WorkflowObject.create_object( id_user=current_user.get_id()) workflow_object.data = convert_data_to_model(workflow_object, visitor.data) workflow_object.save() db.session.commit() # Start workflow. delayed=True will execute the workflow in the # background using, for example, Celery. start.delay("literature", object_id=workflow_object.id) return redirect(url_for('.success'))
def submitupdate(): """Form action handler for INSPIRE author update form.""" form = AuthorUpdateForm(formdata=request.form) visitor = DataExporter() visitor.visit(form) workflow_object = WorkflowObject.create_object(id_user=current_user.get_id()) workflow_object.data = visitor.data workflow_object.save() db.session.commit() # Start workflow. delay will execute the workflow in the background start.delay("authorupdate", object_id=workflow_object.id) ctx = { "inspire_url": get_inspire_url(visitor.data) } return render_template('authors/forms/update_success.html', **ctx)
def submit(): """Get form data and start workflow.""" form = LiteratureForm(formdata=request.form) visitor = DataExporter() visitor.visit(form) workflow_object = workflow_object_class.create( data={}, id_user=current_user.get_id(), data_type="hep") workflow_object.extra_data['formdata'] = copy.deepcopy(visitor.data) workflow_object.data = formdata_to_model(workflow_object, visitor.data) workflow_object.save() db.session.commit() # Start workflow. delayed=True will execute the workflow in the # background using, for example, Celery. start.delay("article", object_id=workflow_object.id) return redirect(url_for('.success'))
def start_workflow_for_submission(self, endpoint, submission_data, control_number=None): workflow_object = workflow_object_class.create( data={}, id_user=current_user.get_id(), data_type=self.endpoint_to_data_type[endpoint] ) submission_data['acquisition_source'] = dict( email=current_user.email, datetime=datetime.datetime.utcnow().isoformat(), method='submitter', submission_number=str(workflow_object.id), internal_uid=int(workflow_object.id_user), ) orcid = self._get_user_orcid() if orcid: submission_data['acquisition_source']['orcid'] = orcid serializer = self._get_serializer_from_endpoint(endpoint) serialized_data = serializer().load(submission_data).data if control_number: serialized_data['control_number'] = int(control_number) workflow_object.data = serialized_data workflow_object.extra_data['is-update'] = bool(control_number) workflow_object.extra_data['source_data'] = { 'data': copy.deepcopy(workflow_object.data), 'extra_data': copy.deepcopy(workflow_object.extra_data) } workflow_object.save() db.session.commit() workflow_object_id = workflow_object.id start.delay( self.endpoint_to_workflow_name[endpoint], object_id=workflow_object.id) return workflow_object_id
def test_wf_rejects_automatically_when_previous_matched_wf_was_rejected( app, celery_app_with_context, celery_session_worker, generated_record): app.config['FEATURE_FLAG_ENABLE_UPDATE_TO_LEGACY'] = False app.config['PRODUCTION_MODE'] = False app.config['USE_SIGNALS_ON_TIMEOUT'] = False record = generated_record workflow = build_workflow(record) workflow.save() db.session.commit() wf1_id = workflow.id start.delay('article', object_id=wf1_id) es.indices.refresh('holdingpen-hep') check_wf_state(wf1_id, ObjectStatus.HALTED) wf1 = workflow_object_class.get(wf1_id) wf1.extra_data["approved"] = False wf1.continue_workflow(delayed=True) es.indices.refresh('holdingpen-hep') check_wf_state(wf1_id, ObjectStatus.COMPLETED) wf1 = workflow_object_class.get(wf1_id) assert wf1.extra_data.get("approved") is False workflow2 = build_workflow(record) workflow2.save() db.session.commit() wf2_id = workflow2.id start.delay('article', object_id=wf2_id) es.indices.refresh("holdingpen-hep") check_wf_state(wf2_id, ObjectStatus.COMPLETED) wf2 = workflow_object_class.get(wf2_id) assert wf2.extra_data["previously_rejected"] is True assert wf2.extra_data["previously_rejected_matches"] == [wf1_id]
def submit(): """Get form data and start workflow.""" form = LiteratureForm(formdata=request.form) visitor = DataExporter() visitor.visit(form) workflow_object = workflow_object_class.create( data={}, id_user=current_user.get_id(), data_type="hep" ) workflow_object.extra_data['formdata'] = copy.deepcopy(visitor.data) workflow_object.data = formdata_to_model(workflow_object, visitor.data) workflow_object.save() db.session.commit() # Start workflow. delayed=True will execute the workflow in the # background using, for example, Celery. start.delay("article", object_id=workflow_object.id) return redirect(url_for('.success'))
def test_wf_replaces_old_workflow_which_is_in_halted_state( app, celery_app_with_context, celery_session_worker, generated_record ): app.config['FEATURE_FLAG_ENABLE_UPDATE_TO_LEGACY'] = False app.config['PRODUCTION_MODE'] = False app.config['USE_SIGNALS_ON_TIMEOUT'] = False record = generated_record workflow = build_workflow(record) workflow.save() db.session.commit() wf1_id = workflow.id start.delay('article', object_id=wf1_id) es.indices.refresh('holdingpen-hep') check_wf_state(wf1_id, ObjectStatus.HALTED) workflow2 = build_workflow(record) workflow2.save() db.session.commit() wf2_id = workflow2.id es.indices.refresh('holdingpen-hep') check_wf_state(wf1_id, ObjectStatus.HALTED) check_wf_state(wf2_id, ObjectStatus.INITIAL) start.delay('article', object_id=wf2_id) es.indices.refresh('holdingpen-hep') check_wf_state(wf1_id, ObjectStatus.COMPLETED) check_wf_state(wf2_id, ObjectStatus.HALTED)
def spawn_arXiv_workflow_from_oai_harvest(request, records, name, **kwargs): """Receive a list of harvested arXiv records and schedule workflow.""" from flask import current_app from invenio_workflows import start, workflows if not request.endpoint == "http://export.arxiv.org/oai2": return workflow = "arxiv_ingestion" if workflow not in workflows: current_app.logger.warning( "{0} not in available workflows. Skipping workflow {1}.".format( workflow, name ) ) return for record in records: recxml = six.text_type(record) marcxml = convert(recxml, "oaiarXiv2marcxml.xsl") record = create_record(marcxml) hep_record = hep.do(record) start.delay(workflow, data=[hep_record])
def start_workflow_for_author_submission(): submission_data = request.get_json()['data'] workflow_object = workflow_object_class.create( data={}, id_user=submission_data['acquisition_source']['internal_uid'], data_type='authors') submission_data['acquisition_source']['submission_number'] = str( workflow_object.id) workflow_object.data = submission_data workflow_object.extra_data['is-update'] = bool( submission_data.get('control_number')) workflow_object.extra_data['source_data'] = { 'data': copy.deepcopy(workflow_object.data), 'extra_data': copy.deepcopy(workflow_object.extra_data) } workflow_object.save() db.session.commit() workflow_object_id = workflow_object.id start.delay('author', object_id=workflow_object.id) return jsonify({'workflow_object_id': workflow_object_id})
def test_delayed_execution_api(app, halt_workflow): """Test continue object task.""" with app.app_context(): data = [{'foo': 'bar'}] async_result = start.delay('halttest', data) eng = WorkflowEngine.from_uuid(async_result.get()) obj = eng.processed_objects[0] assert obj.known_statuses.WAITING == obj.status assert WorkflowStatus.HALTED == eng.status obj_id = obj.id resume.delay(obj_id) obj = WorkflowObject.get(obj_id) assert obj.known_statuses.COMPLETED == obj.status
def test_wf_not_stops_when_blocking_another_one_after_restarted_on_running( app, celery_app_with_context, celery_session_worker ): app.config['FEATURE_FLAG_ENABLE_UPDATE_TO_LEGACY'] = False app.config['PRODUCTION_MODE'] = False record = { '$schema': 'https://labs.inspirehep.net/schemas/records/hep.json', 'titles': [ { 'title': 'Update without conflicts title.' }, ], 'arxiv_eprints': [ { 'categories': [ 'astro-ph.HE' ], 'value': '9999.9999' } ], 'document_type': ['article'], '_collections': ['Literature'], 'acquisition_source': {'source': 'arXiv'}, 'keywords': [{'value': 'none'}] } workflow = build_workflow(record) workflow.status = ObjectStatus.RUNNING workflow.save() record['titles'][0]['title'] = 'second title?' workflow2 = build_workflow(record) workflow2.save() record['titles'].append({'title': 'thirtd_title'}) workflow3 = build_workflow(record) workflow3.save() db.session.commit() wf1_id = workflow.id wf2_id = workflow2.id wf3_id = workflow3.id es.indices.refresh('holdingpen-hep') check_wf_state(wf1_id, ObjectStatus.RUNNING) check_wf_state(wf2_id, ObjectStatus.INITIAL) check_wf_state(wf3_id, ObjectStatus.INITIAL) start.delay('article', object_id=wf3_id) es.indices.refresh('holdingpen-hep') check_wf_state(wf1_id, ObjectStatus.RUNNING) check_wf_state(wf2_id, ObjectStatus.INITIAL) check_wf_state(wf3_id, ObjectStatus.ERROR) start.delay('article', object_id=wf2_id) es.indices.refresh('holdingpen-hep') check_wf_state(wf1_id, ObjectStatus.RUNNING) check_wf_state(wf2_id, ObjectStatus.ERROR) check_wf_state(wf3_id, ObjectStatus.ERROR) start.delay('article', object_id=wf1_id) es.indices.refresh('holdingpen-hep') check_wf_state(wf1_id, ObjectStatus.COMPLETED) check_wf_state(wf2_id, ObjectStatus.COMPLETED) check_wf_state(wf3_id, ObjectStatus.COMPLETED) wf1 = workflow_object_class.get(wf1_id) wf2 = workflow_object_class.get(wf2_id) wf3 = workflow_object_class.get(wf3_id) assert wf1.data['control_number'] == wf2.data['control_number'] assert wf2.data['control_number'] == wf3.data['control_number'] assert wf1.extra_data.get('restarted-by-wf') is None assert wf2.extra_data.get('restarted-by-wf') == [1] assert wf3.extra_data.get('restarted-by-wf') == [2]
def test_wf_not_stops_when_blocking_another_one_after_restarted_on_running( app, celery_app_with_context, celery_session_worker): app.config['FEATURE_FLAG_ENABLE_UPDATE_TO_LEGACY'] = False app.config['PRODUCTION_MODE'] = False record = { '$schema': 'https://labs.inspirehep.net/schemas/records/hep.json', 'titles': [ { 'title': 'Update without conflicts title.' }, ], 'arxiv_eprints': [{ 'categories': ['astro-ph.HE'], 'value': '9999.9999' }], 'document_type': ['article'], '_collections': ['Literature'], 'acquisition_source': { 'source': 'arXiv' }, 'keywords': [{ 'value': 'none' }] } workflow = build_workflow(record) workflow.status = ObjectStatus.RUNNING workflow.save() record['titles'][0]['title'] = 'second title?' workflow2 = build_workflow(record) workflow2.save() record['titles'].append({'title': 'thirtd_title'}) workflow3 = build_workflow(record) workflow3.save() db.session.commit() wf1_id = workflow.id wf2_id = workflow2.id wf3_id = workflow3.id es.indices.refresh('holdingpen-hep') check_wf_state(wf1_id, ObjectStatus.RUNNING) check_wf_state(wf2_id, ObjectStatus.INITIAL) check_wf_state(wf3_id, ObjectStatus.INITIAL) start.delay('article', object_id=wf3_id) es.indices.refresh('holdingpen-hep') check_wf_state(wf1_id, ObjectStatus.RUNNING) check_wf_state(wf2_id, ObjectStatus.INITIAL) check_wf_state(wf3_id, ObjectStatus.ERROR) start.delay('article', object_id=wf2_id) es.indices.refresh('holdingpen-hep') check_wf_state(wf1_id, ObjectStatus.RUNNING) check_wf_state(wf2_id, ObjectStatus.ERROR) check_wf_state(wf3_id, ObjectStatus.ERROR) start.delay('article', object_id=wf1_id) es.indices.refresh('holdingpen-hep') check_wf_state(wf1_id, ObjectStatus.COMPLETED) check_wf_state(wf2_id, ObjectStatus.COMPLETED) check_wf_state(wf3_id, ObjectStatus.COMPLETED) wf1 = workflow_object_class.get(wf1_id) wf2 = workflow_object_class.get(wf2_id) wf3 = workflow_object_class.get(wf3_id) assert wf1.data['control_number'] == wf2.data['control_number'] assert wf2.data['control_number'] == wf3.data['control_number'] assert wf1.extra_data.get('restarted-by-wf') is None assert wf2.extra_data.get('restarted-by-wf') == [1] assert wf3.extra_data.get('restarted-by-wf') == [2]