def test_execution_with_predefined_object(app, demo_workflow): """Test predefined object creation.""" with app.app_context(): obj = WorkflowObject.create({"x": 22}) db.session.commit() ident = obj.id obj = WorkflowObject.get(ident) obj.start_workflow("demo_workflow") obj = WorkflowObject.get(ident) assert obj.data == {"x": 40} obj = WorkflowObject.create({"x": 22}) db.session.commit() ident = obj.id obj.start_workflow("demo_workflow", delayed=True) obj = WorkflowObject.get(ident) assert obj.data == {"x": 40} # Check that attributes can be changed obj.status = obj.known_statuses.RUNNING obj.data_type = "bar" obj.save() db.session.commit() obj = WorkflowObject.get(ident) assert obj.status == obj.known_statuses.RUNNING assert obj.data_type == "bar"
def test_halt(app, halt_workflow, halt_workflow_conditional): """Test halt task.""" assert 'halttest' in app.extensions['invenio-workflows'].workflows assert 'halttestcond' in app.extensions['invenio-workflows'].workflows with app.app_context(): data = [{'foo': 'bar'}] eng_uuid = start('halttest', data) eng = WorkflowEngine.from_uuid(eng_uuid) obj = eng.processed_objects[0] assert obj.known_statuses.WAITING == obj.status assert WorkflowStatus.HALTED == eng.status obj_id = obj.id obj.continue_workflow() obj = WorkflowObject.get(obj_id) assert obj.known_statuses.COMPLETED == obj.status # Check conditional workflows and pass data not as a list (to check). eng_uuid = start('halttestcond', data[0]) eng = WorkflowEngine.from_uuid(eng_uuid) obj = eng.processed_objects[0] assert obj.known_statuses.WAITING == obj.status assert WorkflowStatus.HALTED == eng.status obj_id = obj.id obj.continue_workflow() obj = WorkflowObject.get(obj_id) assert obj.known_statuses.COMPLETED == obj.status
def test_errors(app, error_workflow): """Test halt task.""" assert 'errortest' in app.extensions['invenio-workflows'].workflows with app.app_context(): with pytest.raises(WorkflowsMissingData): start('errortest') with pytest.raises(WorkflowDefinitionError): start('doesnotexist', 100) with pytest.raises(WorkflowsMissingObject): start('errortest', object_id=-1) obj = WorkflowObject.create({"id": 0}) db.session.commit() obj_id = obj.id with pytest.raises(ZeroDivisionError): start('errortest', object_id=obj_id) obj = WorkflowObject.get(obj_id) assert obj.known_statuses.ERROR == obj.status assert obj.data == {"id": 0, "foo": "bar"}
def test_create_workflow_for_faulty_data(app, db, halt_workflow): """Test submit_results passing the data as payload.""" job_id = uuid.uuid4().hex # init random value with app.app_context(): CrawlerJob.create( job_id=job_id, spider="desy", workflow=halt_workflow.__name__, logs=None, results=None, ) db.session.commit() with app.app_context(): job = CrawlerJob.get_by_job(job_id) assert job assert str(job.status) assert job.status == JobStatus.PENDING test_data = [{ 'error': 'ValueError', 'traceback': 'There was a ValueError', 'xml_record': 'Just an XML string' }] submit_results(job_id=job_id, results_uri='idontexist', results_data=test_data, errors=None, log_file="/foo/bar") workflow_id = CrawlerWorkflowObject.query.filter_by(job_id=job_id) \ .one().object_id workflow = WorkflowObject.get(workflow_id) assert workflow.status == ObjectStatus.ERROR
def test_equality(app, halt_workflow): """Test WorkflowObject comparison functions.""" with app.app_context(): obj1 = WorkflowObject.create({"x": 22}) obj2 = WorkflowObject.create({"x": 22}) start("halttest", [obj1, obj2]) ident1 = obj1.id ident2 = obj2.id obj1 = WorkflowObject.get(ident1) obj2 = WorkflowObject.get(ident2) assert obj1 == obj2 obj3 = WorkflowObject.create({"x": 22}) obj4 = WorkflowObject.create({"x": 2}) assert obj4 != obj3
def test_task_info(app, halt_workflow): """Test WorkflowObject comparison functions.""" with app.app_context(): obj = WorkflowObject.create({"x": 22}) start("halttest", obj) ident = obj.id obj = WorkflowObject.get(ident) task_info = obj.get_current_task_info() assert task_info["name"] == "halt_engine"
def test_tasks(app, db, halt_workflow, sample_records_uri): """Test tasks.""" job_id = uuid.uuid4().hex # init random value with app.app_context(): with pytest.raises(CrawlerJobNotExistError): submit_results(job_id, results_uri=sample_records_uri, errors=None, log_file=None) CrawlerJob.create( job_id=job_id, spider="Test", workflow=halt_workflow.__name__, logs=None, results=None, ) db.session.commit() with pytest.raises(CrawlerInvalidResultsPath): submit_results(job_id, results_uri="", errors=None, log_file=None) with app.app_context(): job = CrawlerJob.get_by_job(job_id) assert job assert str(job.status) assert job.status == JobStatus.PENDING submit_results(job_id=job_id, results_uri=sample_records_uri, errors=None, log_file="/foo/bar") job = CrawlerJob.get_by_job(job_id) assert job.logs == "/foo/bar" assert job.results == sample_records_uri workflow = WorkflowObject.get(1) assert workflow extra_data = workflow.extra_data assert 'source_data' in extra_data assert 'data' in extra_data['source_data'] assert 'extra_data' in extra_data['source_data'] expected_extra_data = { 'crawler_job_id': job_id, 'crawler_results_path': urlparse(sample_records_uri).path } assert expected_extra_data == extra_data['source_data']['extra_data'] with pytest.raises(CrawlerJobError): submit_results(job_id, results_uri=sample_records_uri, errors=["Some error"], log_file=None) job = CrawlerJob.get_by_job(job_id) assert job.status == JobStatus.ERROR
def test_tasks(app, db, halt_workflow, sample_record_filename): """Test tasks.""" job_id = uuid.uuid4().hex # init random value with app.app_context(): with pytest.raises(CrawlerInvalidResultsPath): submit_results(job_id, results_uri="", errors=None, log_file=None) with pytest.raises(CrawlerInvalidResultsPath): submit_results(job_id, results_uri="", errors=None, log_file=None) with pytest.raises(CrawlerJobNotExistError): submit_results( job_id, results_uri=sample_record_filename, errors=None, log_file=None ) CrawlerJob.create( job_id=job_id, spider="Test", workflow=halt_workflow.__name__, logs=None, results=None, ) db.session.commit() with app.app_context(): job = CrawlerJob.get_by_job(job_id) assert job assert str(job.status) assert job.status == JobStatus.PENDING submit_results( job_id=job_id, results_uri=sample_record_filename, errors=None, log_file="/foo/bar" ) job = CrawlerJob.get_by_job(job_id) assert job.logs == "/foo/bar" assert job.results == sample_record_filename workflow = WorkflowObject.get(1) assert workflow assert workflow.extra_data['crawler_job_id'] == job_id crawler_results_path = workflow.extra_data['crawler_results_path'] assert crawler_results_path == urlparse(sample_record_filename).path with pytest.raises(CrawlerJobError): submit_results( job_id, results_uri=sample_record_filename, errors=["Some error"], log_file=None ) job = CrawlerJob.get_by_job(job_id) assert job.status == JobStatus.ERROR
def test_create_workflow_for_faulty_data(app, db, halt_workflow): """Test submit_results passing the data as payload.""" job_id = uuid.uuid4().hex # init random value with app.app_context(): CrawlerJob.create( job_id=job_id, spider="desy", workflow=halt_workflow.__name__, logs=None, results=None, ) db.session.commit() with app.app_context(): job = CrawlerJob.get_by_job(job_id) assert job assert str(job.status) assert job.status == JobStatus.PENDING test_data = { 'errors': [{ 'exception': 'ValueError', 'traceback': 'ValueError on the line 23.' }], 'source_data': 'Just an XML string', 'record': {}, 'file_name': 'broken.xml' } submit_results(job_id=job_id, results_uri='idontexist', results_data=[test_data], errors=None, log_file="/foo/bar") workflow_id = CrawlerWorkflowObject.query.filter_by(job_id=job_id) \ .one().object_id workflow = WorkflowObject.get(workflow_id) expected_crawl_error = { 'errors': [{ 'exception': 'ValueError', 'traceback': 'ValueError on the line 23.' }], 'source_data': 'Just an XML string', 'file_name': 'broken.xml' } assert workflow.status == ObjectStatus.ERROR assert workflow.data == test_data['record'] assert workflow.extra_data['crawl_errors'] == expected_crawl_error
def test_create_error_workflow_for_wrong_crawl_result(app, db, halt_workflow): job_id = uuid.uuid4().hex # init random value with app.app_context(): CrawlerJob.create( job_id=job_id, spider="desy", workflow=halt_workflow.__name__, logs=None, results=None, ) db.session.commit() with app.app_context(): job = CrawlerJob.get_by_job(job_id) assert job assert str(job.status) assert job.status == JobStatus.PENDING test_data = { 'source_data': 'Just an XML string', 'record': {}, # missing 'errors' and 'file_name' } submit_results(job_id=job_id, results_uri='idontexist', results_data=[test_data], errors=None, log_file="/foo/bar") workflow_id = CrawlerWorkflowObject.query.filter_by(job_id=job_id) \ .one().object_id workflow = WorkflowObject.get(workflow_id) expected = { 'errors': [{ 'exception': 'KeyError', 'traceback': 'Wrong crawl result format. ' 'Missing the key `errors`' }], 'file_name': None, 'source_data': { 'record': {}, 'source_data': 'Just an XML string' }, } assert workflow.status == ObjectStatus.ERROR assert workflow.data == {} assert workflow.extra_data['crawl_errors'] == expected
def test_submit_results_with_results_data(app, db, halt_workflow, sample_records_uri, sample_records): """Test submit_results passing the data as payload.""" job_id = uuid.uuid4().hex # init random value with app.app_context(): CrawlerJob.create( job_id=job_id, spider="Test", workflow=halt_workflow.__name__, logs=None, results=None, ) db.session.commit() with app.app_context(): job = CrawlerJob.get_by_job(job_id) assert job assert str(job.status) assert job.status == JobStatus.PENDING dummy_records_uri = sample_records_uri + 'idontexist' submit_results( job_id=job_id, results_uri=dummy_records_uri, results_data=sample_records, errors=None, log_file="/foo/bar" ) job = CrawlerJob.get_by_job(job_id) assert job.logs == "/foo/bar" assert job.results == dummy_records_uri workflow = WorkflowObject.get(1) assert workflow assert workflow.extra_data['crawler_job_id'] == job_id crawler_results_path = workflow.extra_data['crawler_results_path'] assert crawler_results_path == urlparse(dummy_records_uri).path with pytest.raises(CrawlerJobError): submit_results( job_id, results_uri=dummy_records_uri, results_data=sample_records, errors=["Some error"], log_file=None, ) job = CrawlerJob.get_by_job(job_id) assert job.status == JobStatus.ERROR
def test_restart(app, restart_workflow): """Test halt task.""" assert 'restarttest' in app.extensions['invenio-workflows'].workflows with app.app_context(): data = {} eng_uuid = start('restarttest', data) eng = WorkflowEngine.from_uuid(eng_uuid) obj = eng.processed_objects[0] assert obj.known_statuses.HALTED == obj.status assert WorkflowStatus.HALTED == eng.status assert obj.data == {"title": "foo"} assert obj.get_action() == "foo" assert obj.get_action_message() == "Test" # Restart shall have no effect (still halted) new_eng_uuid = restart(eng_uuid) assert new_eng_uuid == eng_uuid eng = WorkflowEngine.from_uuid(eng_uuid) obj = eng.processed_objects[0] assert obj.known_statuses.HALTED == obj.status assert WorkflowStatus.HALTED == eng.status assert obj.data == {"title": {"value": "bar"}} assert obj.get_action() == "foo" obj.remove_action() assert obj.get_action() is None obj_id = obj.id # Now it should resume the next task resume(obj_id) obj = WorkflowObject.get(obj_id) assert obj.known_statuses.COMPLETED == obj.status assert obj.extra_data.get('test') == 'test' assert obj.data.get('title').get('source') == 'TEST' # We restart the object again restart(obj.workflow.uuid, data=obj) assert obj.known_statuses.HALTED == obj.status assert WorkflowStatus.HALTED == eng.status assert obj.data == {"title": {"value": "bar"}}
def test_delayed_execution_api(app, halt_workflow): """Test continue object task.""" with app.app_context(): data = [{'foo': 'bar'}] async_result = start.delay('halttest', data) eng = WorkflowEngine.from_uuid(async_result.get()) obj = eng.processed_objects[0] assert obj.known_statuses.WAITING == obj.status assert WorkflowStatus.HALTED == eng.status obj_id = obj.id resume.delay(obj_id) obj = WorkflowObject.get(obj_id) assert obj.known_statuses.COMPLETED == obj.status
def test_api(app, demo_halt_workflow): """Test WorkflowObject api function.""" with app.app_context(): # Test WorkflowObject.(create|query|get) # ====================================== obj = WorkflowObject.create({"x": 22}) db.session.commit() ident = obj.id obj = WorkflowObject.get(ident) obj.start_workflow("demo_halt_workflow") # Fetch object via query API objects = WorkflowObject.query(id=ident) assert len(objects) == 1 obj = objects[0] # Workflow should have completed as x was always > 10 # x = 22 + 20 - 2 = 40 assert obj.data == {"x": 40} assert obj.status == obj.known_statuses.COMPLETED # Test WorkflowObject.restart_previous # ==================================== # Workflow should now halt as x will be less than 10 obj = WorkflowObject.create({"x": -20}) db.session.commit() ident = obj.id obj.start_workflow("demo_halt_workflow", delayed=True) obj = WorkflowObject.get(ident) # x = -20 + 20 = 0 assert obj.data == {"x": 0} # No action associated, so it should be waiting assert obj.status == obj.known_statuses.WAITING # To add 20 to x, we now restart previous task and now it should # not halt and complete fully obj.restart_previous() obj = WorkflowObject.get(ident) # x = 0 + 20 - 2 = 18 assert obj.data == {"x": 18} assert obj.status == obj.known_statuses.COMPLETED # Test WorkflowObject.restart_next # ================================ obj = WorkflowObject.create({"x": -100}) db.session.commit() ident = obj.id obj.start_workflow("demo_halt_workflow") obj = WorkflowObject.get(ident) # x = -100 + 20 = -80 assert obj.data == {"x": -80} assert obj.status == obj.known_statuses.WAITING obj.restart_next() obj = WorkflowObject.get(ident) # x = -80 - 2 = -82 assert obj.data == {"x": -82} assert obj.status == obj.known_statuses.COMPLETED # Test WorkflowObject.restart_current # =================================== obj = WorkflowObject.create({"x": -100}) db.session.commit() ident = obj.id obj.start_workflow("demo_halt_workflow") obj = WorkflowObject.get(ident) # x = -100 + 20 = -80 assert obj.data == {"x": -80} assert obj.status == obj.known_statuses.WAITING obj.restart_current() obj = WorkflowObject.get(ident) # x = -80 - 2 = -82 assert obj.data == {"x": -80} assert obj.status == obj.known_statuses.WAITING # Test WorkflowObject.delete # ========================== obj.delete() with pytest.raises(WorkflowsMissingObject): WorkflowObject.get(ident)