def test_errors(app, error_workflow):
    """Test halt task."""
    assert 'errortest' in app.extensions['invenio-workflows'].workflows

    with app.app_context():
        with pytest.raises(WorkflowsMissingData):
            start('errortest')

        with pytest.raises(WorkflowDefinitionError):
            start('doesnotexist', 100)

        with pytest.raises(WorkflowsMissingObject):
            start('errortest', object_id=-1)

        obj = WorkflowObject.create({"id": 0})
        db.session.commit()

        obj_id = obj.id
        with pytest.raises(ZeroDivisionError):
            start('errortest', object_id=obj_id)

        obj = WorkflowObject.get(obj_id)

        assert obj.known_statuses.ERROR == obj.status
        assert obj.data == {"id": 0, "foo": "bar"}
Beispiel #2
0
def test_has_same_source(app, simple_record):
    obj = workflow_object_class.create(
        data=simple_record,
        status=ObjectStatus.HALTED,
        data_type='hep',
    )
    obj_id = obj.id
    obj.save()
    es.indices.refresh('holdingpen-hep')

    obj2 = WorkflowObject.create(data=simple_record, data_type='hep')
    match_non_completed_wf_in_holdingpen(obj2, None)

    same_source_func = has_same_source('holdingpen_matches')

    assert same_source_func(obj2, None)
    assert obj2.extra_data['holdingpen_matches'] == [obj_id]

    # change source and match the wf in the holdingpen
    different_source_rec = dict(simple_record)
    different_source_rec['acquisition_source'] = {'source': 'different'}
    obj3 = WorkflowObject.create(data=different_source_rec, data_type='hep')

    assert match_non_completed_wf_in_holdingpen(obj3, None)
    assert not same_source_func(obj3, None)
Beispiel #3
0
def test_errors(app, error_workflow):
    """Test halt task."""
    assert 'errortest' in app.extensions['invenio-workflows'].workflows

    with app.app_context():
        with pytest.raises(WorkflowsMissingData):
            start('errortest')

        with pytest.raises(WorkflowDefinitionError):
            start('doesnotexist', 100)

        with pytest.raises(WorkflowsMissingObject):
            start('errortest', object_id=-1)

        obj = WorkflowObject.create({"id": 0})
        db.session.commit()

        obj_id = obj.id
        with pytest.raises(ZeroDivisionError):
            start('errortest', object_id=obj_id)

        obj = WorkflowObject.get(obj_id)

        assert obj.known_statuses.ERROR == obj.status
        assert obj.data == {"id": 0, "foo": "bar"}
def test_halt(app, halt_workflow, halt_workflow_conditional):
    """Test halt task."""
    assert 'halttest' in app.extensions['invenio-workflows'].workflows
    assert 'halttestcond' in app.extensions['invenio-workflows'].workflows

    with app.app_context():
        data = [{'foo': 'bar'}]

        eng_uuid = start('halttest', data)

        eng = WorkflowEngine.from_uuid(eng_uuid)
        obj = eng.processed_objects[0]

        assert obj.known_statuses.WAITING == obj.status
        assert WorkflowStatus.HALTED == eng.status

        obj_id = obj.id
        obj.continue_workflow()

        obj = WorkflowObject.get(obj_id)
        assert obj.known_statuses.COMPLETED == obj.status

        # Check conditional workflows and pass data not as a list (to check).
        eng_uuid = start('halttestcond', data[0])
        eng = WorkflowEngine.from_uuid(eng_uuid)
        obj = eng.processed_objects[0]

        assert obj.known_statuses.WAITING == obj.status
        assert WorkflowStatus.HALTED == eng.status

        obj_id = obj.id
        obj.continue_workflow()

        obj = WorkflowObject.get(obj_id)
        assert obj.known_statuses.COMPLETED == obj.status
def test_has_same_source(app, simple_record):
    obj = workflow_object_class.create(
        data=simple_record,
        status=ObjectStatus.HALTED,
        data_type='hep',
    )
    obj_id = obj.id
    obj.save()
    es.indices.refresh('holdingpen-hep')

    obj2 = WorkflowObject.create(data=simple_record, data_type='hep')
    match_non_completed_wf_in_holdingpen(obj2, None)

    same_source_func = has_same_source('holdingpen_matches')

    assert same_source_func(obj2, None)
    assert obj2.extra_data['holdingpen_matches'] == [obj_id]

    # change source and match the wf in the holdingpen
    different_source_rec = dict(simple_record)
    different_source_rec['acquisition_source'] = {'source': 'different'}
    obj3 = WorkflowObject.create(data=different_source_rec, data_type='hep')

    assert match_non_completed_wf_in_holdingpen(obj3, None)
    assert not same_source_func(obj3, None)
Beispiel #6
0
def test_halt(app, halt_workflow, halt_workflow_conditional):
    """Test halt task."""
    assert 'halttest' in app.extensions['invenio-workflows'].workflows
    assert 'halttestcond' in app.extensions['invenio-workflows'].workflows

    with app.app_context():
        data = [{'foo': 'bar'}]

        eng_uuid = start('halttest', data)

        eng = WorkflowEngine.from_uuid(eng_uuid)
        obj = eng.processed_objects[0]

        assert obj.known_statuses.WAITING == obj.status
        assert WorkflowStatus.HALTED == eng.status

        obj_id = obj.id
        obj.continue_workflow()

        obj = WorkflowObject.get(obj_id)
        assert obj.known_statuses.COMPLETED == obj.status

        # Check conditional workflows and pass data not as a list (to check).
        eng_uuid = start('halttestcond', data[0])
        eng = WorkflowEngine.from_uuid(eng_uuid)
        obj = eng.processed_objects[0]

        assert obj.known_statuses.WAITING == obj.status
        assert WorkflowStatus.HALTED == eng.status

        obj_id = obj.id
        obj.continue_workflow()

        obj = WorkflowObject.get(obj_id)
        assert obj.known_statuses.COMPLETED == obj.status
Beispiel #7
0
def test_create_with_default_extra_data(app):
    """Test that the extra data dictionary is not shared between
    workflow instances."""
    with app.app_context():
        obj1 = WorkflowObject.create({"x": 22})
        obj1.extra_data['foo'] = 'bar'

        obj2 = WorkflowObject.create({"x": 22})
        assert obj2.extra_data is not obj1.extra_data
def test_task_info(app, halt_workflow):
    """Test WorkflowObject comparison functions."""
    with app.app_context():
        obj = WorkflowObject.create({"x": 22})
        start("halttest", obj)
        ident = obj.id
        obj = WorkflowObject.get(ident)
        task_info = obj.get_current_task_info()
        assert task_info["name"] == "halt_engine"
Beispiel #9
0
def test_task_info(app, halt_workflow):
    """Test WorkflowObject comparison functions."""
    with app.app_context():
        obj = WorkflowObject.create({"x": 22})
        start("halttest", obj)
        ident = obj.id
        obj = WorkflowObject.get(ident)
        task_info = obj.get_current_task_info()
        assert task_info["name"] == "halt_engine"
Beispiel #10
0
def test_inequality(obj1, obj2, app, halt_workflow):
    """Test WorkflowObject inequality functions."""
    with app.app_context():
        obj1 = WorkflowObject.create(obj1)
        obj2 = WorkflowObject.create(obj2)
        start("halttest", [obj1, obj2])

        assert obj1 != obj2
        assert obj2 != obj1
Beispiel #11
0
def test_audit(app):
    user_id = None
    workflow_id = None
    with app.app_context():
        user = User(email="*****@*****.**", active=True)
        user.password = "******"
        db.session.add(user)

        workflows_object = WorkflowObject.create_object()
        workflows_object.save()

        db.session.commit()
        user_id = user.id
        workflow_id = workflows_object.id

    with app.app_context():
        logging_info = {
            'object_id': workflow_id,
            'user_id': user_id,
            'score': 0.222113,
            'user_action': "Non-CORE",
            'decision': "Rejected",
            'source': "test",
            'action': "accept"
        }
        audit = WorkflowsAudit(**logging_info)
        audit.save()
        db.session.commit()

        assert WorkflowsAudit.query.count() == 1

        audit_entry = WorkflowsAudit.query.filter(
            WorkflowsAudit.object_id == workflow_id
        ).one()
        assert audit_entry
        assert audit_entry.action == "accept"
        assert audit_entry.score == 0.222113

    prediction_results = dict(
        max_score=0.222113, decision="Rejected"
    )
    with app.app_context():
        log_workflows_action(
            action="accept_core",
            prediction_results=prediction_results,
            object_id=workflow_id,
            user_id=None,
            source="test",
            user_action="accept"
        )
        db.session.commit()

        assert WorkflowsAudit.query.count() == 2

        audit_entry = WorkflowsAudit.query.filter(
            WorkflowsAudit.action == "accept_core"
        ).one()
        assert audit_entry
        assert audit_entry.action == "accept_core"
        assert audit_entry.score == 0.222113
Beispiel #12
0
def test_create_workflow_for_faulty_data(app, db, halt_workflow):
    """Test submit_results passing the data as payload."""
    job_id = uuid.uuid4().hex  # init random value
    with app.app_context():
        CrawlerJob.create(
            job_id=job_id,
            spider="desy",
            workflow=halt_workflow.__name__,
            logs=None,
            results=None,
        )
        db.session.commit()

    with app.app_context():
        job = CrawlerJob.get_by_job(job_id)
        assert job
        assert str(job.status)
        assert job.status == JobStatus.PENDING

        test_data = [{
            'error': 'ValueError',
            'traceback': 'There was a ValueError',
            'xml_record': 'Just an XML string'
        }]
        submit_results(job_id=job_id,
                       results_uri='idontexist',
                       results_data=test_data,
                       errors=None,
                       log_file="/foo/bar")
        workflow_id = CrawlerWorkflowObject.query.filter_by(job_id=job_id) \
            .one().object_id
        workflow = WorkflowObject.get(workflow_id)
        assert workflow.status == ObjectStatus.ERROR
Beispiel #13
0
def test_stop_matched_holdingpen_wfs(app, simple_record):
    # need to run a wf in order to assign to it the wf definition and a uuid
    # for it

    obj = workflow_object_class.create(
        data_type='hep',
        **simple_record
    )
    workflow_uuid = start('article', object_id=obj.id)
    eng = WorkflowEngine.from_uuid(workflow_uuid)
    obj = eng.processed_objects[0]
    obj.status = ObjectStatus.HALTED
    obj.save()
    obj_id = obj.id
    current_search.flush_and_refresh('holdingpen-hep')

    obj2 = WorkflowObject.create(data_type='hep', **simple_record)
    obj2_id = obj2.id

    match_non_completed_wf_in_holdingpen(obj2, None)
    assert obj2.extra_data['holdingpen_matches'] == [obj_id]

    stop_matched_holdingpen_wfs(obj2, None)

    stopped_wf = workflow_object_class.get(obj_id)
    assert stopped_wf.status == ObjectStatus.COMPLETED
    assert stopped_wf.extra_data['stopped-by-wf'] == obj2_id
def test_stop_matched_holdingpen_wfs(app, simple_record):
    # need to run a wf in order to assign to it the wf definition and a uuid
    # for it

    obj = workflow_object_class.create(
        data_type='hep',
        **simple_record
    )
    workflow_uuid = start('article', object_id=obj.id)
    eng = WorkflowEngine.from_uuid(workflow_uuid)
    obj = eng.processed_objects[0]
    obj.status = ObjectStatus.HALTED
    obj.save()
    obj_id = obj.id
    es.indices.refresh('holdingpen-hep')

    obj2 = WorkflowObject.create(data_type='hep', **simple_record)
    obj2_id = obj2.id

    match_non_completed_wf_in_holdingpen(obj2, None)
    assert obj2.extra_data['holdingpen_matches'] == [obj_id]

    stop_matched_holdingpen_wfs(obj2, None)

    stopped_wf = workflow_object_class.get(obj_id)
    assert stopped_wf.status == ObjectStatus.COMPLETED
    assert stopped_wf.extra_data['stopped-by-wf'] == obj2_id
Beispiel #15
0
def test_tasks(app, db, halt_workflow, sample_records_uri):
    """Test tasks."""
    job_id = uuid.uuid4().hex  # init random value
    with app.app_context():
        with pytest.raises(CrawlerJobNotExistError):
            submit_results(job_id,
                           results_uri=sample_records_uri,
                           errors=None,
                           log_file=None)

        CrawlerJob.create(
            job_id=job_id,
            spider="Test",
            workflow=halt_workflow.__name__,
            logs=None,
            results=None,
        )
        db.session.commit()

        with pytest.raises(CrawlerInvalidResultsPath):
            submit_results(job_id, results_uri="", errors=None, log_file=None)

    with app.app_context():
        job = CrawlerJob.get_by_job(job_id)
        assert job
        assert str(job.status)
        assert job.status == JobStatus.PENDING

        submit_results(job_id=job_id,
                       results_uri=sample_records_uri,
                       errors=None,
                       log_file="/foo/bar")

        job = CrawlerJob.get_by_job(job_id)
        assert job.logs == "/foo/bar"
        assert job.results == sample_records_uri

        workflow = WorkflowObject.get(1)
        assert workflow
        extra_data = workflow.extra_data
        assert 'source_data' in extra_data
        assert 'data' in extra_data['source_data']
        assert 'extra_data' in extra_data['source_data']
        expected_extra_data = {
            'crawler_job_id': job_id,
            'crawler_results_path': urlparse(sample_records_uri).path
        }
        assert expected_extra_data == extra_data['source_data']['extra_data']

        with pytest.raises(CrawlerJobError):
            submit_results(job_id,
                           results_uri=sample_records_uri,
                           errors=["Some error"],
                           log_file=None)

        job = CrawlerJob.get_by_job(job_id)
        assert job.status == JobStatus.ERROR
def test_tasks(app, db, halt_workflow, sample_record_filename):
    """Test tasks."""
    job_id = uuid.uuid4().hex  # init random value
    with app.app_context():
        with pytest.raises(CrawlerInvalidResultsPath):
            submit_results(job_id, results_uri="", errors=None, log_file=None)
        with pytest.raises(CrawlerInvalidResultsPath):
            submit_results(job_id, results_uri="", errors=None, log_file=None)
        with pytest.raises(CrawlerJobNotExistError):
            submit_results(
                job_id, results_uri=sample_record_filename,
                errors=None, log_file=None
            )

        CrawlerJob.create(
            job_id=job_id,
            spider="Test",
            workflow=halt_workflow.__name__,
            logs=None,
            results=None,
        )
        db.session.commit()

    with app.app_context():
        job = CrawlerJob.get_by_job(job_id)

        assert job
        assert str(job.status)
        assert job.status == JobStatus.PENDING

        submit_results(
            job_id=job_id,
            results_uri=sample_record_filename,
            errors=None,
            log_file="/foo/bar"
        )

        job = CrawlerJob.get_by_job(job_id)
        assert job.logs == "/foo/bar"
        assert job.results == sample_record_filename

        workflow = WorkflowObject.get(1)
        assert workflow
        assert workflow.extra_data['crawler_job_id'] == job_id
        crawler_results_path = workflow.extra_data['crawler_results_path']
        assert crawler_results_path == urlparse(sample_record_filename).path

        with pytest.raises(CrawlerJobError):
            submit_results(
                job_id, results_uri=sample_record_filename,
                errors=["Some error"], log_file=None
            )

        job = CrawlerJob.get_by_job(job_id)
        assert job.status == JobStatus.ERROR
Beispiel #17
0
def test_create_workflow_for_faulty_data(app, db, halt_workflow):
    """Test submit_results passing the data as payload."""
    job_id = uuid.uuid4().hex  # init random value
    with app.app_context():
        CrawlerJob.create(
            job_id=job_id,
            spider="desy",
            workflow=halt_workflow.__name__,
            logs=None,
            results=None,
        )
        db.session.commit()

    with app.app_context():
        job = CrawlerJob.get_by_job(job_id)
        assert job
        assert str(job.status)
        assert job.status == JobStatus.PENDING

        test_data = {
            'errors': [{
                'exception': 'ValueError',
                'traceback': 'ValueError on the line 23.'
            }],
            'source_data':
            'Just an XML string',
            'record': {},
            'file_name':
            'broken.xml'
        }
        submit_results(job_id=job_id,
                       results_uri='idontexist',
                       results_data=[test_data],
                       errors=None,
                       log_file="/foo/bar")
        workflow_id = CrawlerWorkflowObject.query.filter_by(job_id=job_id) \
            .one().object_id
        workflow = WorkflowObject.get(workflow_id)

        expected_crawl_error = {
            'errors': [{
                'exception': 'ValueError',
                'traceback': 'ValueError on the line 23.'
            }],
            'source_data':
            'Just an XML string',
            'file_name':
            'broken.xml'
        }

        assert workflow.status == ObjectStatus.ERROR
        assert workflow.data == test_data['record']
        assert workflow.extra_data['crawl_errors'] == expected_crawl_error
def test_execution_with_predefined_object(app, demo_workflow):
    """Test predefined object creation."""

    with app.app_context():
        obj = WorkflowObject.create({"x": 22})
        db.session.commit()

        ident = obj.id

        obj = WorkflowObject.get(ident)
        obj.start_workflow("demo_workflow")

        obj = WorkflowObject.get(ident)
        assert obj.data == {"x": 40}

        obj = WorkflowObject.create({"x": 22})
        db.session.commit()

        ident = obj.id

        obj.start_workflow("demo_workflow", delayed=True)
        obj = WorkflowObject.get(ident)
        assert obj.data == {"x": 40}

        # Check that attributes can be changed
        obj.status = obj.known_statuses.RUNNING
        obj.data_type = "bar"
        obj.save()
        db.session.commit()

        obj = WorkflowObject.get(ident)
        assert obj.status == obj.known_statuses.RUNNING
        assert obj.data_type == "bar"
Beispiel #19
0
def test_execution_with_predefined_object(app, demo_workflow):
    """Test predefined object creation."""

    with app.app_context():
        obj = WorkflowObject.create({"x": 22})
        db.session.commit()

        ident = obj.id

        obj = WorkflowObject.get(ident)
        obj.start_workflow("demo_workflow")

        obj = WorkflowObject.get(ident)
        assert obj.data == {"x": 40}

        obj = WorkflowObject.create({"x": 22})
        db.session.commit()

        ident = obj.id

        obj.start_workflow("demo_workflow", delayed=True)
        obj = WorkflowObject.get(ident)
        assert obj.data == {"x": 40}

        # Check that attributes can be changed
        obj.status = obj.known_statuses.RUNNING
        obj.data_type = "bar"
        obj.save()
        db.session.commit()

        obj = WorkflowObject.get(ident)
        assert obj.status == obj.known_statuses.RUNNING
        assert obj.data_type == "bar"
Beispiel #20
0
def test_create_error_workflow_for_wrong_crawl_result(app, db, halt_workflow):
    job_id = uuid.uuid4().hex  # init random value
    with app.app_context():
        CrawlerJob.create(
            job_id=job_id,
            spider="desy",
            workflow=halt_workflow.__name__,
            logs=None,
            results=None,
        )
        db.session.commit()

    with app.app_context():
        job = CrawlerJob.get_by_job(job_id)
        assert job
        assert str(job.status)
        assert job.status == JobStatus.PENDING

        test_data = {
            'source_data': 'Just an XML string',
            'record': {},
            # missing 'errors' and 'file_name'
        }
        submit_results(job_id=job_id,
                       results_uri='idontexist',
                       results_data=[test_data],
                       errors=None,
                       log_file="/foo/bar")
        workflow_id = CrawlerWorkflowObject.query.filter_by(job_id=job_id) \
            .one().object_id
        workflow = WorkflowObject.get(workflow_id)

        expected = {
            'errors': [{
                'exception':
                'KeyError',
                'traceback':
                'Wrong crawl result format. '
                'Missing the key `errors`'
            }],
            'file_name':
            None,
            'source_data': {
                'record': {},
                'source_data': 'Just an XML string'
            },
        }

        assert workflow.status == ObjectStatus.ERROR
        assert workflow.data == {}
        assert workflow.extra_data['crawl_errors'] == expected
Beispiel #21
0
def test_submit_results_with_results_data(app, db, halt_workflow,
                                          sample_records_uri, sample_records):
    """Test submit_results passing the data as payload."""
    job_id = uuid.uuid4().hex  # init random value
    with app.app_context():
        CrawlerJob.create(
            job_id=job_id,
            spider="Test",
            workflow=halt_workflow.__name__,
            logs=None,
            results=None,
        )
        db.session.commit()

    with app.app_context():
        job = CrawlerJob.get_by_job(job_id)
        assert job
        assert str(job.status)
        assert job.status == JobStatus.PENDING

        dummy_records_uri = sample_records_uri + 'idontexist'
        submit_results(
            job_id=job_id,
            results_uri=dummy_records_uri,
            results_data=sample_records,
            errors=None,
            log_file="/foo/bar"
        )

        job = CrawlerJob.get_by_job(job_id)
        assert job.logs == "/foo/bar"
        assert job.results == dummy_records_uri

        workflow = WorkflowObject.get(1)
        assert workflow
        assert workflow.extra_data['crawler_job_id'] == job_id
        crawler_results_path = workflow.extra_data['crawler_results_path']
        assert crawler_results_path == urlparse(dummy_records_uri).path

        with pytest.raises(CrawlerJobError):
            submit_results(
                job_id,
                results_uri=dummy_records_uri,
                results_data=sample_records,
                errors=["Some error"],
                log_file=None,
            )

        job = CrawlerJob.get_by_job(job_id)
        assert job.status == JobStatus.ERROR
def test_restart(app, restart_workflow):
    """Test halt task."""
    assert 'restarttest' in app.extensions['invenio-workflows'].workflows

    with app.app_context():
        data = {}

        eng_uuid = start('restarttest', data)

        eng = WorkflowEngine.from_uuid(eng_uuid)
        obj = eng.processed_objects[0]

        assert obj.known_statuses.HALTED == obj.status
        assert WorkflowStatus.HALTED == eng.status
        assert obj.data == {"title": "foo"}
        assert obj.get_action() == "foo"
        assert obj.get_action_message() == "Test"

        # Restart shall have no effect (still halted)
        new_eng_uuid = restart(eng_uuid)

        assert new_eng_uuid == eng_uuid

        eng = WorkflowEngine.from_uuid(eng_uuid)
        obj = eng.processed_objects[0]

        assert obj.known_statuses.HALTED == obj.status
        assert WorkflowStatus.HALTED == eng.status
        assert obj.data == {"title": {"value": "bar"}}
        assert obj.get_action() == "foo"

        obj.remove_action()
        assert obj.get_action() is None

        obj_id = obj.id

        # Now it should resume the next task
        resume(obj_id)

        obj = WorkflowObject.get(obj_id)
        assert obj.known_statuses.COMPLETED == obj.status
        assert obj.extra_data.get('test') == 'test'
        assert obj.data.get('title').get('source') == 'TEST'

        # We restart the object again
        restart(obj.workflow.uuid, data=obj)
        assert obj.known_statuses.HALTED == obj.status
        assert WorkflowStatus.HALTED == eng.status
        assert obj.data == {"title": {"value": "bar"}}
Beispiel #23
0
def test_restart(app, restart_workflow):
    """Test halt task."""
    assert 'restarttest' in app.extensions['invenio-workflows'].workflows

    with app.app_context():
        data = {}

        eng_uuid = start('restarttest', data)

        eng = WorkflowEngine.from_uuid(eng_uuid)
        obj = eng.processed_objects[0]

        assert obj.known_statuses.HALTED == obj.status
        assert WorkflowStatus.HALTED == eng.status
        assert obj.data == {"title": "foo"}
        assert obj.get_action() == "foo"
        assert obj.get_action_message() == "Test"

        # Restart shall have no effect (still halted)
        new_eng_uuid = restart(eng_uuid)

        assert new_eng_uuid == eng_uuid

        eng = WorkflowEngine.from_uuid(eng_uuid)
        obj = eng.processed_objects[0]

        assert obj.known_statuses.HALTED == obj.status
        assert WorkflowStatus.HALTED == eng.status
        assert obj.data == {"title": {"value": "bar"}}
        assert obj.get_action() == "foo"

        obj.remove_action()
        assert obj.get_action() is None

        obj_id = obj.id

        # Now it should resume the next task
        resume(obj_id)

        obj = WorkflowObject.get(obj_id)
        assert obj.known_statuses.COMPLETED == obj.status
        assert obj.extra_data.get('test') == 'test'
        assert obj.data.get('title').get('source') == 'TEST'

        # We restart the object again
        restart(obj.workflow.uuid, data=obj)
        assert obj.known_statuses.HALTED == obj.status
        assert WorkflowStatus.HALTED == eng.status
        assert obj.data == {"title": {"value": "bar"}}
Beispiel #24
0
def submit():
    """Get form data and start workflow."""
    form = LiteratureForm(formdata=request.form)
    visitor = DataExporter()
    visitor.visit(form)

    workflow_object = WorkflowObject.create_object(
        id_user=current_user.get_id())
    workflow_object.data = convert_data_to_model(workflow_object, visitor.data)
    workflow_object.save()
    db.session.commit()

    # Start workflow. delayed=True will execute the workflow in the
    # background using, for example, Celery.
    start.delay("literature", object_id=workflow_object.id)

    return redirect(url_for('.success'))
Beispiel #25
0
def submit():
    """Get form data and start workflow."""
    form = LiteratureForm(formdata=request.form)
    visitor = DataExporter()
    visitor.visit(form)

    workflow_object = WorkflowObject.create_object(
        id_user=current_user.get_id())
    workflow_object.data = visitor.data
    workflow_object.save()
    db.session.commit()

    # Start workflow. delayed=True will execute the workflow in the
    # background using, for example, Celery.
    start.delay("literature", object_id=workflow_object.id)

    return redirect(url_for('.success'))
Beispiel #26
0
def submitupdate():
    """Form action handler for INSPIRE author update form."""
    form = AuthorUpdateForm(formdata=request.form)
    visitor = DataExporter()
    visitor.visit(form)
    workflow_object = WorkflowObject.create_object(
        id_user=current_user.get_id())
    workflow_object.data = visitor.data
    workflow_object.save()
    db.session.commit()

    # Start workflow. delay will execute the workflow in the background
    start.delay("authorupdate", object_id=workflow_object.id)

    ctx = {"inspire_url": get_inspire_url(visitor.data)}

    return render_template('authors/forms/update_success.html', **ctx)
Beispiel #27
0
def test_delayed_execution_api(app, halt_workflow):
    """Test continue object task."""
    with app.app_context():
        data = [{'foo': 'bar'}]

        async_result = start.delay('halttest', data)

        eng = WorkflowEngine.from_uuid(async_result.get())
        obj = eng.processed_objects[0]

        assert obj.known_statuses.WAITING == obj.status
        assert WorkflowStatus.HALTED == eng.status

        obj_id = obj.id
        resume.delay(obj_id)

        obj = WorkflowObject.get(obj_id)
        assert obj.known_statuses.COMPLETED == obj.status
Beispiel #28
0
def submitupdate():
    """Form action handler for INSPIRE author update form."""
    form = AuthorUpdateForm(formdata=request.form)
    visitor = DataExporter()
    visitor.visit(form)
    workflow_object = WorkflowObject.create_object(id_user=current_user.get_id())
    workflow_object.data = visitor.data
    workflow_object.save()
    db.session.commit()

    # Start workflow. delay will execute the workflow in the background
    start.delay("authorupdate", object_id=workflow_object.id)

    ctx = {
        "inspire_url": get_inspire_url(visitor.data)
    }

    return render_template('authors/forms/update_success.html', **ctx)
def test_delayed_execution_api(app, halt_workflow):
    """Test continue object task."""
    with app.app_context():
        data = [{'foo': 'bar'}]

        async_result = start.delay('halttest', data)

        eng = WorkflowEngine.from_uuid(async_result.get())
        obj = eng.processed_objects[0]

        assert obj.known_statuses.WAITING == obj.status
        assert WorkflowStatus.HALTED == eng.status

        obj_id = obj.id
        resume.delay(obj_id)

        obj = WorkflowObject.get(obj_id)
        assert obj.known_statuses.COMPLETED == obj.status
def test_pending_holdingpen_matches_wf_if_not_completed(app, simple_record):
    obj = workflow_object_class.create(
        data=simple_record,
        status=ObjectStatus.HALTED,
        data_type='hep',
    )
    obj_id = obj.id
    obj.save()
    es.indices.refresh('holdingpen-hep')

    obj2 = WorkflowObject.create(data=simple_record, data_type='hep')
    assert match_non_completed_wf_in_holdingpen(obj2, None)
    assert obj2.extra_data['holdingpen_matches'] == [obj_id]

    obj = workflow_object_class.get(obj_id)
    obj.status = ObjectStatus.COMPLETED
    obj.save()
    es.indices.refresh('holdingpen-hep')

    # doesn't match anymore because obj is COMPLETED
    assert not match_non_completed_wf_in_holdingpen(obj2, None)
Beispiel #31
0
def test_pending_holdingpen_matches_wf_if_not_completed(app, simple_record):
    obj = workflow_object_class.create(
        data=simple_record,
        status=ObjectStatus.HALTED,
        data_type='hep',
    )
    obj_id = obj.id
    obj.save()
    es.indices.refresh('holdingpen-hep')

    obj2 = WorkflowObject.create(data=simple_record, data_type='hep')
    assert match_non_completed_wf_in_holdingpen(obj2, None)
    assert obj2.extra_data['holdingpen_matches'] == [obj_id]

    obj = workflow_object_class.get(obj_id)
    obj.status = ObjectStatus.COMPLETED
    obj.save()
    es.indices.refresh('holdingpen-hep')

    # doesn't match anymore because obj is COMPLETED
    assert not match_non_completed_wf_in_holdingpen(obj2, None)
Beispiel #32
0
def submit_results(job_id, results_uri, **kwargs):
    """Check results for current job."""
    results_path = urlparse(results_uri).path
    if not os.path.exists(results_path):
        raise CrawlerInvalidResultsPath(
            "Path specified in result does not exist: {0}".format(results_path)
        )
    job = CrawlerJob.query.get(job_id)
    if not job:
        raise CrawlerJobVerificationError(
            "Cannot find job id: {0}".format(job_id)
        )
    with open(results_path) as records:
        for line in records.readlines():
            record = json.loads(line)
            obj = WorkflowObject.create_object()
            obj.extra_data['crawler_job_id'] = job_id
            obj.extra_data['crawler_results_path'] = results_path
            obj.extra_data['record_extra'] = record.pop('extra_data', {})
            obj.data_type = current_app.config['CRAWLER_DATA_TYPE']
            obj.data = record
            obj.start_workflow(job.workflow, delayed=True)
def test_match_previously_rejected_wf_in_holdingpen(app, simple_record):
    obj = workflow_object_class.create(
        status=ObjectStatus.COMPLETED,
        data_type='hep',
        **simple_record
    )
    obj_id = obj.id
    obj.extra_data['approved'] = False  # reject it
    obj.save()
    es.indices.refresh('holdingpen-hep')

    obj2 = WorkflowObject.create(data_type='hep', **simple_record)
    assert match_previously_rejected_wf_in_holdingpen(obj2, None)
    assert obj2.extra_data['previously_rejected_matches'] == [obj_id]

    obj = workflow_object_class.get(obj_id)
    obj.status = ObjectStatus.HALTED
    obj.save()
    es.indices.refresh('holdingpen-hep')

    # doesn't match anymore because obj is COMPLETED
    assert not match_previously_rejected_wf_in_holdingpen(obj2, None)
Beispiel #34
0
def test_match_previously_rejected_wf_in_holdingpen(app, simple_record):
    obj = workflow_object_class.create(
        status=ObjectStatus.COMPLETED,
        data_type='hep',
        **simple_record
    )
    obj_id = obj.id
    obj.extra_data['approved'] = False  # reject it
    obj.save()
    current_search.flush_and_refresh('holdingpen-hep')

    obj2 = WorkflowObject.create(data_type='hep', **simple_record)
    assert match_previously_rejected_wf_in_holdingpen(obj2, None)
    assert obj2.extra_data['previously_rejected_matches'] == [obj_id]

    obj = workflow_object_class.get(obj_id)
    obj.status = ObjectStatus.HALTED
    obj.save()
    current_search.flush_and_refresh('holdingpen-hep')

    # doesn't match anymore because obj is COMPLETED
    assert not match_previously_rejected_wf_in_holdingpen(obj2, None)
def test_equality(app, halt_workflow):
    """Test WorkflowObject comparison functions."""
    with app.app_context():
        obj1 = WorkflowObject.create({"x": 22})
        obj2 = WorkflowObject.create({"x": 22})
        start("halttest", [obj1, obj2])

        ident1 = obj1.id
        ident2 = obj2.id

        obj1 = WorkflowObject.get(ident1)
        obj2 = WorkflowObject.get(ident2)
        assert obj1 == obj2

        obj3 = WorkflowObject.create({"x": 22})
        obj4 = WorkflowObject.create({"x": 2})
        assert obj4 != obj3
Beispiel #36
0
def test_equality(app, halt_workflow):
    """Test WorkflowObject comparison functions."""
    with app.app_context():
        obj1 = WorkflowObject.create({"x": 22})
        obj2 = WorkflowObject.create({"x": 22})
        start("halttest", [obj1, obj2])

        ident1 = obj1.id
        ident2 = obj2.id

        obj1 = WorkflowObject.get(ident1)
        obj2 = WorkflowObject.get(ident2)
        assert obj1 == obj2

        obj3 = WorkflowObject.create({"x": 22})
        obj4 = WorkflowObject.create({"x": 2})
        assert obj4 != obj3
Beispiel #37
0
def import_holdingpen_record(parent_objs, obj, eng):
    """Import an hp record."""
    from invenio_db import db
    from workflow.engine_db import WorkflowStatus
    from invenio_workflows import (
        Workflow, WorkflowObject, ObjectStatus
    )
    engine_model = Workflow(
        name=WORKFLOW_NAME_MAP.get(eng['name'], eng['name']),
        created=iso8601.parse_date(eng['created']),
        modified=iso8601.parse_date(eng['modified']),
        id_user=eng['id_user'],
        status=WorkflowStatus(eng['status']),
        uuid=eng['uuid'],
    )
    engine_model.extra_data = eng['extra_data']

    db.session.add(engine_model)
    try:
        db.session.commit()
    except IntegrityError:
        # The model has already been added to the DB.
        db.session.rollback()

    # First create parents
    if parent_objs:
        for parent in parent_objs:
            object_model = WorkflowObject.create(
                {},  # Pass empty data (filled later)
                id=parent['id'],
                created=iso8601.parse_date(eng['created']),
                modified=iso8601.parse_date(eng['modified']),
                data_type=DATA_TYPE_MAP.get(parent['data_type'], parent['data_type']),
                id_user=parent['id_user'],
                id_workflow=parent['id_workflow'],
                id_parent=parent['id_parent'],
                status=ObjectStatus(parent['status']),
                callback_pos=parent['extra_data']['_task_counter'],
            )

            object_model.data = obj['data']
            object_model.extra_data = obj['extra_data']
            fix_object_model(eng, object_model)
            object_model.save()

    # And then the object
    object_model = WorkflowObject.create(
        {},  # Pass empty data (filled later)
        id=obj['id'],
        created=iso8601.parse_date(eng['created']),
        modified=iso8601.parse_date(eng['modified']),
        data_type=DATA_TYPE_MAP.get(obj['data_type'], obj['data_type']),
        id_user=obj['id_user'],
        id_workflow=obj['id_workflow'],
        id_parent=obj['id_parent'],
        status=ObjectStatus(obj['status']),
        callback_pos=obj['extra_data']['_task_counter'],
    )

    object_model.data = obj['data']
    object_model.extra_data = obj['extra_data']
    fix_object_model(eng, object_model)
    object_model.save()
    db.session.commit()
def test_api(app, demo_halt_workflow):
    """Test WorkflowObject api function."""
    with app.app_context():
        # Test WorkflowObject.(create|query|get)
        # ======================================
        obj = WorkflowObject.create({"x": 22})
        db.session.commit()

        ident = obj.id

        obj = WorkflowObject.get(ident)
        obj.start_workflow("demo_halt_workflow")

        # Fetch object via query API
        objects = WorkflowObject.query(id=ident)
        assert len(objects) == 1
        obj = objects[0]

        # Workflow should have completed as x was always > 10
        # x = 22 + 20 - 2 = 40
        assert obj.data == {"x": 40}
        assert obj.status == obj.known_statuses.COMPLETED

        # Test WorkflowObject.restart_previous
        # ====================================
        # Workflow should now halt as x will be less than 10
        obj = WorkflowObject.create({"x": -20})
        db.session.commit()

        ident = obj.id

        obj.start_workflow("demo_halt_workflow", delayed=True)
        obj = WorkflowObject.get(ident)

        # x = -20 + 20 = 0
        assert obj.data == {"x": 0}
        # No action associated, so it should be waiting
        assert obj.status == obj.known_statuses.WAITING

        # To add 20 to x, we now restart previous task and now it should
        # not halt and complete fully
        obj.restart_previous()
        obj = WorkflowObject.get(ident)

        # x = 0 + 20 - 2 = 18
        assert obj.data == {"x": 18}
        assert obj.status == obj.known_statuses.COMPLETED

        # Test WorkflowObject.restart_next
        # ================================
        obj = WorkflowObject.create({"x": -100})
        db.session.commit()

        ident = obj.id

        obj.start_workflow("demo_halt_workflow")
        obj = WorkflowObject.get(ident)

        # x = -100 + 20 = -80
        assert obj.data == {"x": -80}
        assert obj.status == obj.known_statuses.WAITING

        obj.restart_next()
        obj = WorkflowObject.get(ident)

        # x = -80 - 2 = -82
        assert obj.data == {"x": -82}
        assert obj.status == obj.known_statuses.COMPLETED

        # Test WorkflowObject.restart_current
        # ===================================
        obj = WorkflowObject.create({"x": -100})
        db.session.commit()

        ident = obj.id

        obj.start_workflow("demo_halt_workflow")
        obj = WorkflowObject.get(ident)

        # x = -100 + 20 = -80
        assert obj.data == {"x": -80}
        assert obj.status == obj.known_statuses.WAITING

        obj.restart_current()
        obj = WorkflowObject.get(ident)

        # x = -80 - 2 = -82
        assert obj.data == {"x": -80}
        assert obj.status == obj.known_statuses.WAITING

        # Test WorkflowObject.delete
        # ==========================
        obj.delete()
        with pytest.raises(WorkflowsMissingObject):
            WorkflowObject.get(ident)
Beispiel #39
0
def test_api(app, demo_halt_workflow):
    """Test WorkflowObject api function."""
    with app.app_context():
        # Test WorkflowObject.(create|query|get)
        # ======================================
        obj = WorkflowObject.create({"x": 22})
        db.session.commit()

        ident = obj.id

        obj = WorkflowObject.get(ident)
        obj.start_workflow("demo_halt_workflow")

        # Fetch object via query API
        objects = WorkflowObject.query(id=ident)
        assert len(objects) == 1
        obj = objects[0]

        # Workflow should have completed as x was always > 10
        # x = 22 + 20 - 2 = 40
        assert obj.data == {"x": 40}
        assert obj.status == obj.known_statuses.COMPLETED

        # Test WorkflowObject.restart_previous
        # ====================================
        # Workflow should now halt as x will be less than 10
        obj = WorkflowObject.create({"x": -20})
        db.session.commit()

        ident = obj.id

        obj.start_workflow("demo_halt_workflow", delayed=True)
        obj = WorkflowObject.get(ident)

        # x = -20 + 20 = 0
        assert obj.data == {"x": 0}
        # No action associated, so it should be waiting
        assert obj.status == obj.known_statuses.WAITING

        # To add 20 to x, we now restart previous task and now it should
        # not halt and complete fully
        obj.restart_previous()
        obj = WorkflowObject.get(ident)

        # x = 0 + 20 - 2 = 18
        assert obj.data == {"x": 18}
        assert obj.status == obj.known_statuses.COMPLETED

        # Test WorkflowObject.restart_next
        # ================================
        obj = WorkflowObject.create({"x": -100})
        db.session.commit()

        ident = obj.id

        obj.start_workflow("demo_halt_workflow")
        obj = WorkflowObject.get(ident)

        # x = -100 + 20 = -80
        assert obj.data == {"x": -80}
        assert obj.status == obj.known_statuses.WAITING

        obj.restart_next()
        obj = WorkflowObject.get(ident)

        # x = -80 - 2 = -82
        assert obj.data == {"x": -82}
        assert obj.status == obj.known_statuses.COMPLETED

        # Test WorkflowObject.restart_current
        # ===================================
        obj = WorkflowObject.create({"x": -100})
        db.session.commit()

        ident = obj.id

        obj.start_workflow("demo_halt_workflow")
        obj = WorkflowObject.get(ident)

        # x = -100 + 20 = -80
        assert obj.data == {"x": -80}
        assert obj.status == obj.known_statuses.WAITING

        obj.restart_current()
        obj = WorkflowObject.get(ident)

        # x = -80 - 2 = -82
        assert obj.data == {"x": -80}
        assert obj.status == obj.known_statuses.WAITING

        # Test WorkflowObject.delete
        # ==========================
        obj.delete()
        with pytest.raises(WorkflowsMissingObject):
            WorkflowObject.get(ident)