def test_schedule_start_works_with_pipeline_concurrency(self): operation_run = OperationRunFactory() operation_run.operation.trigger_policy = TriggerPolicy.ONE_DONE operation_run.operation.save() pipeline_run = operation_run.pipeline_run # Set pipeline concurrency to 1 pipeline_run.pipeline.concurrency = 1 pipeline_run.pipeline.save() # Add a failed upstream upstream_run1 = OperationRunFactory(pipeline_run=pipeline_run) upstream_run2 = OperationRunFactory(pipeline_run=pipeline_run) operation_run.upstream_runs.set([upstream_run1, upstream_run2]) with patch('pipelines.tasks.pipelines_start_operation.apply_async') as mock_fct: OperationRunStatus.objects.create(status=OperationStatuses.FAILED, operation_run=upstream_run1) OperationRunStatus.objects.create(status=OperationStatuses.RUNNING, operation_run=upstream_run2) assert mock_fct.call_count == 1 with patch('db.models.pipelines.OperationRun.start') as mock_fct: assert operation_run.schedule_start() is True assert mock_fct.call_count == 0 operation_run.refresh_from_db() assert operation_run.last_status == OperationStatuses.CREATED
def test_schedule_start_with_failed_upstream(self): operation_run = OperationRunFactory() operation_run.operation.trigger_policy = TriggerPolicy.ALL_SUCCEEDED operation_run.operation.save() # Add a failed upstream upstream_run1 = OperationRunFactory() operation_run.upstream_runs.set([upstream_run1]) with patch('pipelines.tasks.pipelines_start_operation.apply_async') as mock_fct: OperationRunStatus.objects.create(status=OperationStatuses.FAILED, operation_run=upstream_run1) assert mock_fct.call_count == 1 assert operation_run.schedule_start() is False # Check also that the task is marked as UPSTREAM_FAILED # Since this operation cannot be started anymore operation_run.refresh_from_db() assert operation_run.last_status == OperationStatuses.UPSTREAM_FAILED
def test_schedule_start_works_when_conditions_are_met(self): operation_run = OperationRunFactory() operation_run.operation.trigger_policy = TriggerPolicy.ONE_DONE operation_run.operation.save() pipeline_run = operation_run.pipeline_run # Add a failed upstream upstream_run1 = OperationRunFactory(pipeline_run=pipeline_run) operation_run.upstream_runs.set([upstream_run1]) with patch('pipelines.tasks.pipelines_start_operation.apply_async') as mock_fct: OperationRunStatus.objects.create(status=OperationStatuses.FAILED, operation_run=upstream_run1) assert mock_fct.call_count == 1 with patch('db.models.pipelines.OperationRun.start') as mock_fct: assert operation_run.schedule_start() is False assert mock_fct.call_count == 1 operation_run.refresh_from_db() assert operation_run.last_status == OperationStatuses.SCHEDULED
def test_schedule_start_works_with_operation_concurrency(self): operation_run = OperationRunFactory() operation_run.operation.trigger_policy = TriggerPolicy.ONE_DONE operation_run.operation.save() pipeline_run = operation_run.pipeline_run # Set operation concurrency to 1 operation_run.operation.concurrency = 1 operation_run.operation.save() # Add a failed upstream upstream_run1 = OperationRunFactory(pipeline_run=pipeline_run) upstream_run2 = OperationRunFactory(pipeline_run=pipeline_run) operation_run.upstream_runs.set([upstream_run1, upstream_run2]) with patch('pipelines.tasks.pipelines_start_operation.apply_async') as mock_fct: OperationRunStatus.objects.create(status=OperationStatuses.FAILED, operation_run=upstream_run1) OperationRunStatus.objects.create(status=OperationStatuses.RUNNING, operation_run=upstream_run2) assert mock_fct.call_count == 1 # Add another operation run for this operation with scheduled new_operation_run = OperationRunFactory(operation=operation_run.operation) new_operation_run.upstream_runs.set([upstream_run1, upstream_run2]) with patch('db.models.pipelines.OperationRun.start') as mock_fct: assert operation_run.schedule_start() is False assert mock_fct.call_count == 1 operation_run.refresh_from_db() assert operation_run.last_status == OperationStatuses.SCHEDULED # Check if we can start another instance new_operation_run.refresh_from_db() assert new_operation_run.last_status == OperationStatuses.CREATED with patch('db.models.pipelines.OperationRun.start') as mock_fct: assert new_operation_run.schedule_start() is True assert mock_fct.call_count == 0 new_operation_run.refresh_from_db() assert new_operation_run.last_status == OperationStatuses.CREATED
def test_stopping_all_operation_runs_sets_pipeline_run_to_finished(self): operation_run = OperationRunFactory() start_operation_run(operation_run) operation_run.refresh_from_db() assert operation_run.last_status == OperationStatuses.CREATED pipeline_run = operation_run.pipeline_run assert pipeline_run.last_status == PipelineLifeCycle.CREATED assert pipeline_run.statuses.count() == 1 # Create another operation run for this pipeline_run operation_run2 = OperationRunFactory(pipeline_run=pipeline_run) start_operation_run(operation_run2) operation_run2.refresh_from_db() # Stopping the first operation does not stop the pipeline with patch('scheduler.tasks.jobs.jobs_stop.apply_async' ) as spawner_mock_stop: stop_operation_run(operation_run) assert spawner_mock_stop.call_count == 1 # Manual stopping operation_run.entity.set_status(OperationStatuses.STOPPED) pipeline_run.refresh_from_db() operation_run.refresh_from_db() assert operation_run.last_status == OperationStatuses.STOPPED assert pipeline_run.last_status == PipelineLifeCycle.CREATED assert pipeline_run.statuses.count() == 1 # Stopping the second operation stops the pipeline with patch('scheduler.tasks.jobs.jobs_stop.apply_async' ) as spawner_mock_stop: stop_operation_run(operation_run2) assert spawner_mock_stop.call_count == 1 # Manual stopping operation_run2.entity.set_status(OperationStatuses.STOPPED) pipeline_run.refresh_from_db() assert pipeline_run.last_status == PipelineLifeCycle.DONE assert pipeline_run.statuses.count() == 2
def test_schedule_start_works_with_operation_concurrency(self): operation_run = OperationRunFactory() operation_run.operation.trigger_policy = TriggerPolicy.ONE_DONE operation_run.operation.save() pipeline_run = operation_run.pipeline_run # Set operation concurrency to 1 operation_run.operation.concurrency = 1 operation_run.operation.save() # Add a failed upstream upstream_run1 = OperationRunFactory(pipeline_run=pipeline_run) upstream_run2 = OperationRunFactory(pipeline_run=pipeline_run) assert start_operation_run(upstream_run1) is False assert start_operation_run(upstream_run2) is False upstream_run1.refresh_from_db() upstream_run2.refresh_from_db() operation_run.upstream_runs.set([upstream_run1, upstream_run2]) with patch('polyflow.tasks.pipelines_start_operation.apply_async' ) as mock_fct: upstream_run1.set_status(OperationStatuses.FAILED) upstream_run2.set_status(OperationStatuses.RUNNING) assert mock_fct.call_count == 1 operation_run.refresh_from_db() assert operation_run.last_status is None assert start_operation_run(operation_run) is False operation_run.refresh_from_db() assert operation_run.last_status == OperationStatuses.CREATED # Add another operation run for this operation with scheduled new_operation_run = OperationRunFactory( operation=operation_run.operation) new_operation_run.upstream_runs.set([upstream_run1, upstream_run2]) assert new_operation_run.status is None # Check if we can start another instance new_operation_run.refresh_from_db() assert start_operation_run(new_operation_run) is True new_operation_run.refresh_from_db() assert new_operation_run.last_status is None
def test_schedule_start_works_with_pipeline_concurrency(self): operation_run = OperationRunFactory() operation_run.operation.trigger_policy = TriggerPolicy.ONE_DONE operation_run.operation.save() pipeline_run = operation_run.pipeline_run # Set pipeline concurrency to 1 pipeline_run.pipeline.concurrency = 1 pipeline_run.pipeline.save() # Add a failed upstream upstream_run1 = OperationRunFactory(pipeline_run=pipeline_run) upstream_run2 = OperationRunFactory(pipeline_run=pipeline_run) assert start_operation_run(upstream_run1) is False assert start_operation_run(upstream_run2) is True upstream_run1.refresh_from_db() upstream_run2.refresh_from_db() operation_run.upstream_runs.set([upstream_run1, upstream_run2]) with patch('polyflow.tasks.pipelines_start_operation.apply_async' ) as mock_fct: upstream_run1.set_status(OperationStatuses.FAILED) assert mock_fct.call_count == 1 operation_run.refresh_from_db() assert operation_run.last_status is None upstream_run2.refresh_from_db() assert upstream_run2.last_status is None # Should be started but e mocked the process with patch('polyflow.tasks.pipelines_start_operation.apply_async' ) as mock_fct: assert start_operation_run(upstream_run2) is False upstream_run2.refresh_from_db() upstream_run2.set_status(OperationStatuses.RUNNING) assert mock_fct.call_count == 0 assert start_operation_run(operation_run) is True assert operation_run.last_status is None upstream_run2.set_status(OperationStatuses.SUCCEEDED) operation_run.refresh_from_db() assert operation_run.last_status == OperationStatuses.CREATED
class TestOperationTask(BaseTest): def setUp(self): self.operation_run = OperationRunFactory() self.pipeline_run = self.operation_run.pipeline_run # Manually set status to scheduled self.operation_run.on_scheduled() return super().setUp() def test_task_without_operation_run_raises(self): @celery_app.task(base=OperationTask, shared=False) def dummy_task(): return with self.assertRaises(TypeError): dummy_task.apply_async() def test_task_with_operation_run_succeeds(self): @celery_app.task(base=OperationTask, shared=False) def dummy_task(operation_run_id): return kwargs = {'operation_run_id': self.operation_run.id} dummy_task.apply_async(kwargs=kwargs) self.operation_run.refresh_from_db() assert self.operation_run.succeeded is True assert set(self.operation_run.statuses.values_list( 'status', flat=True)) == { OperationStatuses.CREATED, OperationStatuses.SCHEDULED, OperationStatuses.RUNNING, OperationStatuses.SUCCEEDED, } self.pipeline_run.refresh_from_db() assert self.operation_run.pipeline_run.last_status == PipelineStatuses.FINISHED assert set( self.operation_run.pipeline_run.statuses.values_list( 'status', flat=True)) == { PipelineStatuses.CREATED, PipelineStatuses.SCHEDULED, PipelineStatuses.RUNNING, PipelineStatuses.FINISHED, } def test_task_with_error_fails(self): @celery_app.task(base=OperationTask, shared=False) def raising_task(operation_run_id): raise KeyError kwargs = {'operation_run_id': self.operation_run.id} raising_task.apply_async(kwargs=kwargs) self.operation_run.refresh_from_db() assert self.operation_run.failed is True assert set(self.operation_run.statuses.values_list( 'status', flat=True)) == { OperationStatuses.CREATED, OperationStatuses.SCHEDULED, OperationStatuses.RUNNING, OperationStatuses.FAILED, } self.pipeline_run.refresh_from_db() assert self.operation_run.pipeline_run.last_status == PipelineStatuses.FINISHED assert set( self.operation_run.pipeline_run.statuses.values_list( 'status', flat=True)) == { PipelineStatuses.CREATED, PipelineStatuses.SCHEDULED, PipelineStatuses.RUNNING, PipelineStatuses.FINISHED, } def test_task_retries_for_specified_exception(self): class RetryTask(ClassBasedTask): retry_for = (KeyError, ) @staticmethod def _run(task_bind, *args, **kwargs): raise KeyError @celery_app.task(base=OperationTask, bind=True, shared=False) def retry_task(task_bind, operation_run_id): assert task_bind.max_retries == 2 assert task_bind.countdown == 0 RetryTask.run(task_bind=task_bind, operation_run_id=operation_run_id) # Add retries and count to the operation self.operation_run.operation.max_retries = 2 self.operation_run.operation.retry_delay = 0 self.operation_run.operation.save() kwargs = {'operation_run_id': self.operation_run.id} retry_task.apply_async(kwargs=kwargs) self.operation_run.refresh_from_db() assert self.operation_run.last_status == OperationStatuses.RETRYING assert set(self.operation_run.statuses.values_list( 'status', flat=True)) == { OperationStatuses.CREATED, OperationStatuses.SCHEDULED, OperationStatuses.RUNNING, OperationStatuses.RETRYING, } self.pipeline_run.refresh_from_db() assert self.operation_run.pipeline_run.last_status == PipelineStatuses.RUNNING assert set( self.operation_run.pipeline_run.statuses.values_list( 'status', flat=True)) == { PipelineStatuses.CREATED, PipelineStatuses.SCHEDULED, PipelineStatuses.RUNNING, }
class TestOperationTask(BaseTest): def setUp(self): self.operation_run = OperationRunFactory() self.pipeline_run = self.operation_run.pipeline_run # Manually set status to scheduled self.operation_run.on_scheduled() return super().setUp() def test_task_without_operation_run_raises(self): @celery_app.task(base=OperationTask, shared=False) def dummy_task(): return with self.assertRaises(TypeError): dummy_task.apply_async() def test_task_with_operation_run_succeeds(self): @celery_app.task(base=OperationTask, shared=False) def dummy_task(operation_run_id): return kwargs = {'operation_run_id': self.operation_run.id} dummy_task.apply_async(kwargs=kwargs) self.operation_run.refresh_from_db() assert self.operation_run.succeeded is True assert set(self.operation_run.statuses.values_list('status', flat=True)) == { OperationStatuses.CREATED, OperationStatuses.SCHEDULED, OperationStatuses.RUNNING, OperationStatuses.SUCCEEDED, } self.pipeline_run.refresh_from_db() assert self.operation_run.pipeline_run.last_status == PipelineStatuses.FINISHED assert set(self.operation_run.pipeline_run.statuses.values_list('status', flat=True)) == { PipelineStatuses.CREATED, PipelineStatuses.SCHEDULED, PipelineStatuses.RUNNING, PipelineStatuses.FINISHED, } def test_task_with_error_fails(self): @celery_app.task(base=OperationTask, shared=False) def raising_task(operation_run_id): raise KeyError kwargs = {'operation_run_id': self.operation_run.id} raising_task.apply_async(kwargs=kwargs) self.operation_run.refresh_from_db() assert self.operation_run.failed is True assert set(self.operation_run.statuses.values_list('status', flat=True)) == { OperationStatuses.CREATED, OperationStatuses.SCHEDULED, OperationStatuses.RUNNING, OperationStatuses.FAILED, } self.pipeline_run.refresh_from_db() assert self.operation_run.pipeline_run.last_status == PipelineStatuses.FINISHED assert set(self.operation_run.pipeline_run.statuses.values_list('status', flat=True)) == { PipelineStatuses.CREATED, PipelineStatuses.SCHEDULED, PipelineStatuses.RUNNING, PipelineStatuses.FINISHED, } def test_task_retries_for_specified_exception(self): class RetryTask(ClassBasedTask): retry_for = (KeyError, ) @staticmethod def _run(task_bind, *args, **kwargs): raise KeyError @celery_app.task(base=OperationTask, bind=True, shared=False) def retry_task(task_bind, operation_run_id): assert task_bind.max_retries == 2 assert task_bind.countdown == 0 RetryTask.run(task_bind=task_bind, operation_run_id=operation_run_id) # Add retries and count to the operation self.operation_run.operation.max_retries = 2 self.operation_run.operation.retry_delay = 0 self.operation_run.operation.save() kwargs = {'operation_run_id': self.operation_run.id} retry_task.apply_async(kwargs=kwargs) self.operation_run.refresh_from_db() assert self.operation_run.last_status == OperationStatuses.RETRYING assert set(self.operation_run.statuses.values_list('status', flat=True)) == { OperationStatuses.CREATED, OperationStatuses.SCHEDULED, OperationStatuses.RUNNING, OperationStatuses.RETRYING, } self.pipeline_run.refresh_from_db() assert self.operation_run.pipeline_run.last_status == PipelineStatuses.RUNNING assert set(self.operation_run.pipeline_run.statuses.values_list('status', flat=True)) == { PipelineStatuses.CREATED, PipelineStatuses.SCHEDULED, PipelineStatuses.RUNNING, }