Esempio n. 1
0
    def test_schedule_start_works_with_pipeline_concurrency(self):
        operation_run = OperationRunFactory()
        operation_run.operation.trigger_policy = TriggerPolicy.ONE_DONE
        operation_run.operation.save()
        pipeline_run = operation_run.pipeline_run
        # Set pipeline concurrency to 1
        pipeline_run.pipeline.concurrency = 1
        pipeline_run.pipeline.save()

        # Add a failed upstream
        upstream_run1 = OperationRunFactory(pipeline_run=pipeline_run)
        upstream_run2 = OperationRunFactory(pipeline_run=pipeline_run)
        operation_run.upstream_runs.set([upstream_run1, upstream_run2])
        with patch('pipelines.tasks.pipelines_start_operation.apply_async') as mock_fct:
            OperationRunStatus.objects.create(status=OperationStatuses.FAILED,
                                              operation_run=upstream_run1)
            OperationRunStatus.objects.create(status=OperationStatuses.RUNNING,
                                              operation_run=upstream_run2)

        assert mock_fct.call_count == 1

        with patch('db.models.pipelines.OperationRun.start') as mock_fct:
            assert operation_run.schedule_start() is True

        assert mock_fct.call_count == 0

        operation_run.refresh_from_db()
        assert operation_run.last_status == OperationStatuses.CREATED
Esempio n. 2
0
    def test_schedule_start_with_failed_upstream(self):
        operation_run = OperationRunFactory()
        operation_run.operation.trigger_policy = TriggerPolicy.ALL_SUCCEEDED
        operation_run.operation.save()

        # Add a failed upstream
        upstream_run1 = OperationRunFactory()
        operation_run.upstream_runs.set([upstream_run1])
        with patch('pipelines.tasks.pipelines_start_operation.apply_async') as mock_fct:
            OperationRunStatus.objects.create(status=OperationStatuses.FAILED,
                                              operation_run=upstream_run1)

        assert mock_fct.call_count == 1

        assert operation_run.schedule_start() is False

        # Check also that the task is marked as UPSTREAM_FAILED
        # Since this operation cannot be started anymore
        operation_run.refresh_from_db()
        assert operation_run.last_status == OperationStatuses.UPSTREAM_FAILED
Esempio n. 3
0
    def test_schedule_start_works_when_conditions_are_met(self):
        operation_run = OperationRunFactory()
        operation_run.operation.trigger_policy = TriggerPolicy.ONE_DONE
        operation_run.operation.save()
        pipeline_run = operation_run.pipeline_run

        # Add a failed upstream
        upstream_run1 = OperationRunFactory(pipeline_run=pipeline_run)
        operation_run.upstream_runs.set([upstream_run1])
        with patch('pipelines.tasks.pipelines_start_operation.apply_async') as mock_fct:
            OperationRunStatus.objects.create(status=OperationStatuses.FAILED,
                                              operation_run=upstream_run1)

        assert mock_fct.call_count == 1

        with patch('db.models.pipelines.OperationRun.start') as mock_fct:
            assert operation_run.schedule_start() is False

        assert mock_fct.call_count == 1

        operation_run.refresh_from_db()
        assert operation_run.last_status == OperationStatuses.SCHEDULED
Esempio n. 4
0
    def test_schedule_start_works_with_operation_concurrency(self):
        operation_run = OperationRunFactory()
        operation_run.operation.trigger_policy = TriggerPolicy.ONE_DONE
        operation_run.operation.save()
        pipeline_run = operation_run.pipeline_run
        # Set operation concurrency to 1
        operation_run.operation.concurrency = 1
        operation_run.operation.save()

        # Add a failed upstream
        upstream_run1 = OperationRunFactory(pipeline_run=pipeline_run)
        upstream_run2 = OperationRunFactory(pipeline_run=pipeline_run)
        operation_run.upstream_runs.set([upstream_run1, upstream_run2])
        with patch('pipelines.tasks.pipelines_start_operation.apply_async') as mock_fct:
            OperationRunStatus.objects.create(status=OperationStatuses.FAILED,
                                              operation_run=upstream_run1)
            OperationRunStatus.objects.create(status=OperationStatuses.RUNNING,
                                              operation_run=upstream_run2)

        assert mock_fct.call_count == 1

        # Add another operation run for this operation with scheduled
        new_operation_run = OperationRunFactory(operation=operation_run.operation)
        new_operation_run.upstream_runs.set([upstream_run1, upstream_run2])

        with patch('db.models.pipelines.OperationRun.start') as mock_fct:
            assert operation_run.schedule_start() is False

        assert mock_fct.call_count == 1

        operation_run.refresh_from_db()
        assert operation_run.last_status == OperationStatuses.SCHEDULED

        # Check if we can start another instance
        new_operation_run.refresh_from_db()
        assert new_operation_run.last_status == OperationStatuses.CREATED

        with patch('db.models.pipelines.OperationRun.start') as mock_fct:
            assert new_operation_run.schedule_start() is True

        assert mock_fct.call_count == 0

        new_operation_run.refresh_from_db()
        assert new_operation_run.last_status == OperationStatuses.CREATED
Esempio n. 5
0
    def test_stopping_all_operation_runs_sets_pipeline_run_to_finished(self):
        operation_run = OperationRunFactory()
        start_operation_run(operation_run)
        operation_run.refresh_from_db()
        assert operation_run.last_status == OperationStatuses.CREATED
        pipeline_run = operation_run.pipeline_run
        assert pipeline_run.last_status == PipelineLifeCycle.CREATED
        assert pipeline_run.statuses.count() == 1

        # Create another operation run for this pipeline_run
        operation_run2 = OperationRunFactory(pipeline_run=pipeline_run)
        start_operation_run(operation_run2)
        operation_run2.refresh_from_db()
        # Stopping the first operation does not stop the pipeline
        with patch('scheduler.tasks.jobs.jobs_stop.apply_async'
                   ) as spawner_mock_stop:
            stop_operation_run(operation_run)
        assert spawner_mock_stop.call_count == 1
        # Manual stopping
        operation_run.entity.set_status(OperationStatuses.STOPPED)
        pipeline_run.refresh_from_db()
        operation_run.refresh_from_db()
        assert operation_run.last_status == OperationStatuses.STOPPED
        assert pipeline_run.last_status == PipelineLifeCycle.CREATED
        assert pipeline_run.statuses.count() == 1

        # Stopping the second operation stops the pipeline
        with patch('scheduler.tasks.jobs.jobs_stop.apply_async'
                   ) as spawner_mock_stop:
            stop_operation_run(operation_run2)
        assert spawner_mock_stop.call_count == 1
        # Manual stopping
        operation_run2.entity.set_status(OperationStatuses.STOPPED)
        pipeline_run.refresh_from_db()
        assert pipeline_run.last_status == PipelineLifeCycle.DONE
        assert pipeline_run.statuses.count() == 2
Esempio n. 6
0
    def test_schedule_start_works_with_operation_concurrency(self):
        operation_run = OperationRunFactory()
        operation_run.operation.trigger_policy = TriggerPolicy.ONE_DONE
        operation_run.operation.save()
        pipeline_run = operation_run.pipeline_run
        # Set operation concurrency to 1
        operation_run.operation.concurrency = 1
        operation_run.operation.save()

        # Add a failed upstream
        upstream_run1 = OperationRunFactory(pipeline_run=pipeline_run)
        upstream_run2 = OperationRunFactory(pipeline_run=pipeline_run)
        assert start_operation_run(upstream_run1) is False
        assert start_operation_run(upstream_run2) is False
        upstream_run1.refresh_from_db()
        upstream_run2.refresh_from_db()
        operation_run.upstream_runs.set([upstream_run1, upstream_run2])
        with patch('polyflow.tasks.pipelines_start_operation.apply_async'
                   ) as mock_fct:
            upstream_run1.set_status(OperationStatuses.FAILED)
            upstream_run2.set_status(OperationStatuses.RUNNING)

        assert mock_fct.call_count == 1
        operation_run.refresh_from_db()
        assert operation_run.last_status is None
        assert start_operation_run(operation_run) is False
        operation_run.refresh_from_db()
        assert operation_run.last_status == OperationStatuses.CREATED

        # Add another operation run for this operation with scheduled
        new_operation_run = OperationRunFactory(
            operation=operation_run.operation)
        new_operation_run.upstream_runs.set([upstream_run1, upstream_run2])
        assert new_operation_run.status is None

        # Check if we can start another instance
        new_operation_run.refresh_from_db()
        assert start_operation_run(new_operation_run) is True
        new_operation_run.refresh_from_db()
        assert new_operation_run.last_status is None
Esempio n. 7
0
    def test_schedule_start_works_with_pipeline_concurrency(self):
        operation_run = OperationRunFactory()
        operation_run.operation.trigger_policy = TriggerPolicy.ONE_DONE
        operation_run.operation.save()
        pipeline_run = operation_run.pipeline_run
        # Set pipeline concurrency to 1
        pipeline_run.pipeline.concurrency = 1
        pipeline_run.pipeline.save()

        # Add a failed upstream
        upstream_run1 = OperationRunFactory(pipeline_run=pipeline_run)
        upstream_run2 = OperationRunFactory(pipeline_run=pipeline_run)
        assert start_operation_run(upstream_run1) is False
        assert start_operation_run(upstream_run2) is True
        upstream_run1.refresh_from_db()
        upstream_run2.refresh_from_db()
        operation_run.upstream_runs.set([upstream_run1, upstream_run2])
        with patch('polyflow.tasks.pipelines_start_operation.apply_async'
                   ) as mock_fct:
            upstream_run1.set_status(OperationStatuses.FAILED)

        assert mock_fct.call_count == 1
        operation_run.refresh_from_db()
        assert operation_run.last_status is None
        upstream_run2.refresh_from_db()
        assert upstream_run2.last_status is None  # Should be started but e mocked the process

        with patch('polyflow.tasks.pipelines_start_operation.apply_async'
                   ) as mock_fct:
            assert start_operation_run(upstream_run2) is False
            upstream_run2.refresh_from_db()
            upstream_run2.set_status(OperationStatuses.RUNNING)

        assert mock_fct.call_count == 0

        assert start_operation_run(operation_run) is True

        assert operation_run.last_status is None

        upstream_run2.set_status(OperationStatuses.SUCCEEDED)

        operation_run.refresh_from_db()
        assert operation_run.last_status == OperationStatuses.CREATED
Esempio n. 8
0
class TestOperationTask(BaseTest):
    def setUp(self):

        self.operation_run = OperationRunFactory()
        self.pipeline_run = self.operation_run.pipeline_run
        # Manually set status to scheduled
        self.operation_run.on_scheduled()
        return super().setUp()

    def test_task_without_operation_run_raises(self):
        @celery_app.task(base=OperationTask, shared=False)
        def dummy_task():
            return

        with self.assertRaises(TypeError):
            dummy_task.apply_async()

    def test_task_with_operation_run_succeeds(self):
        @celery_app.task(base=OperationTask, shared=False)
        def dummy_task(operation_run_id):
            return

        kwargs = {'operation_run_id': self.operation_run.id}
        dummy_task.apply_async(kwargs=kwargs)
        self.operation_run.refresh_from_db()
        assert self.operation_run.succeeded is True
        assert set(self.operation_run.statuses.values_list(
            'status', flat=True)) == {
                OperationStatuses.CREATED,
                OperationStatuses.SCHEDULED,
                OperationStatuses.RUNNING,
                OperationStatuses.SUCCEEDED,
            }
        self.pipeline_run.refresh_from_db()
        assert self.operation_run.pipeline_run.last_status == PipelineStatuses.FINISHED
        assert set(
            self.operation_run.pipeline_run.statuses.values_list(
                'status', flat=True)) == {
                    PipelineStatuses.CREATED,
                    PipelineStatuses.SCHEDULED,
                    PipelineStatuses.RUNNING,
                    PipelineStatuses.FINISHED,
                }

    def test_task_with_error_fails(self):
        @celery_app.task(base=OperationTask, shared=False)
        def raising_task(operation_run_id):
            raise KeyError

        kwargs = {'operation_run_id': self.operation_run.id}
        raising_task.apply_async(kwargs=kwargs)
        self.operation_run.refresh_from_db()
        assert self.operation_run.failed is True
        assert set(self.operation_run.statuses.values_list(
            'status', flat=True)) == {
                OperationStatuses.CREATED,
                OperationStatuses.SCHEDULED,
                OperationStatuses.RUNNING,
                OperationStatuses.FAILED,
            }
        self.pipeline_run.refresh_from_db()
        assert self.operation_run.pipeline_run.last_status == PipelineStatuses.FINISHED
        assert set(
            self.operation_run.pipeline_run.statuses.values_list(
                'status', flat=True)) == {
                    PipelineStatuses.CREATED,
                    PipelineStatuses.SCHEDULED,
                    PipelineStatuses.RUNNING,
                    PipelineStatuses.FINISHED,
                }

    def test_task_retries_for_specified_exception(self):
        class RetryTask(ClassBasedTask):
            retry_for = (KeyError, )

            @staticmethod
            def _run(task_bind, *args, **kwargs):
                raise KeyError

        @celery_app.task(base=OperationTask, bind=True, shared=False)
        def retry_task(task_bind, operation_run_id):
            assert task_bind.max_retries == 2
            assert task_bind.countdown == 0
            RetryTask.run(task_bind=task_bind,
                          operation_run_id=operation_run_id)

        # Add retries and count to the operation
        self.operation_run.operation.max_retries = 2
        self.operation_run.operation.retry_delay = 0
        self.operation_run.operation.save()

        kwargs = {'operation_run_id': self.operation_run.id}
        retry_task.apply_async(kwargs=kwargs)
        self.operation_run.refresh_from_db()
        assert self.operation_run.last_status == OperationStatuses.RETRYING
        assert set(self.operation_run.statuses.values_list(
            'status', flat=True)) == {
                OperationStatuses.CREATED,
                OperationStatuses.SCHEDULED,
                OperationStatuses.RUNNING,
                OperationStatuses.RETRYING,
            }
        self.pipeline_run.refresh_from_db()
        assert self.operation_run.pipeline_run.last_status == PipelineStatuses.RUNNING
        assert set(
            self.operation_run.pipeline_run.statuses.values_list(
                'status', flat=True)) == {
                    PipelineStatuses.CREATED,
                    PipelineStatuses.SCHEDULED,
                    PipelineStatuses.RUNNING,
                }
class TestOperationTask(BaseTest):
    def setUp(self):

        self.operation_run = OperationRunFactory()
        self.pipeline_run = self.operation_run.pipeline_run
        # Manually set status to scheduled
        self.operation_run.on_scheduled()
        return super().setUp()

    def test_task_without_operation_run_raises(self):
        @celery_app.task(base=OperationTask, shared=False)
        def dummy_task():
            return

        with self.assertRaises(TypeError):
            dummy_task.apply_async()

    def test_task_with_operation_run_succeeds(self):
        @celery_app.task(base=OperationTask, shared=False)
        def dummy_task(operation_run_id):
            return

        kwargs = {'operation_run_id': self.operation_run.id}
        dummy_task.apply_async(kwargs=kwargs)
        self.operation_run.refresh_from_db()
        assert self.operation_run.succeeded is True
        assert set(self.operation_run.statuses.values_list('status', flat=True)) == {
            OperationStatuses.CREATED,
            OperationStatuses.SCHEDULED,
            OperationStatuses.RUNNING,
            OperationStatuses.SUCCEEDED,
        }
        self.pipeline_run.refresh_from_db()
        assert self.operation_run.pipeline_run.last_status == PipelineStatuses.FINISHED
        assert set(self.operation_run.pipeline_run.statuses.values_list('status', flat=True)) == {
            PipelineStatuses.CREATED,
            PipelineStatuses.SCHEDULED,
            PipelineStatuses.RUNNING,
            PipelineStatuses.FINISHED,
        }

    def test_task_with_error_fails(self):
        @celery_app.task(base=OperationTask, shared=False)
        def raising_task(operation_run_id):
            raise KeyError

        kwargs = {'operation_run_id': self.operation_run.id}
        raising_task.apply_async(kwargs=kwargs)
        self.operation_run.refresh_from_db()
        assert self.operation_run.failed is True
        assert set(self.operation_run.statuses.values_list('status', flat=True)) == {
            OperationStatuses.CREATED,
            OperationStatuses.SCHEDULED,
            OperationStatuses.RUNNING,
            OperationStatuses.FAILED,
        }
        self.pipeline_run.refresh_from_db()
        assert self.operation_run.pipeline_run.last_status == PipelineStatuses.FINISHED
        assert set(self.operation_run.pipeline_run.statuses.values_list('status', flat=True)) == {
            PipelineStatuses.CREATED,
            PipelineStatuses.SCHEDULED,
            PipelineStatuses.RUNNING,
            PipelineStatuses.FINISHED,
        }

    def test_task_retries_for_specified_exception(self):
        class RetryTask(ClassBasedTask):
            retry_for = (KeyError, )

            @staticmethod
            def _run(task_bind, *args, **kwargs):
                raise KeyError

        @celery_app.task(base=OperationTask, bind=True, shared=False)
        def retry_task(task_bind, operation_run_id):
            assert task_bind.max_retries == 2
            assert task_bind.countdown == 0
            RetryTask.run(task_bind=task_bind, operation_run_id=operation_run_id)

        # Add retries and count to the operation
        self.operation_run.operation.max_retries = 2
        self.operation_run.operation.retry_delay = 0
        self.operation_run.operation.save()

        kwargs = {'operation_run_id': self.operation_run.id}
        retry_task.apply_async(kwargs=kwargs)
        self.operation_run.refresh_from_db()
        assert self.operation_run.last_status == OperationStatuses.RETRYING
        assert set(self.operation_run.statuses.values_list('status', flat=True)) == {
            OperationStatuses.CREATED,
            OperationStatuses.SCHEDULED,
            OperationStatuses.RUNNING,
            OperationStatuses.RETRYING,
        }
        self.pipeline_run.refresh_from_db()
        assert self.operation_run.pipeline_run.last_status == PipelineStatuses.RUNNING
        assert set(self.operation_run.pipeline_run.statuses.values_list('status', flat=True)) == {
            PipelineStatuses.CREATED,
            PipelineStatuses.SCHEDULED,
            PipelineStatuses.RUNNING,
        }