예제 #1
0
    def test_check_for_stalled_adopted_tasks(self):
        exec_date = timezone.utcnow() - timedelta(minutes=40)
        start_date = timezone.utcnow() - timedelta(days=2)
        queued_dttm = timezone.utcnow() - timedelta(minutes=30)

        try_number = 1

        with DAG("test_check_for_stalled_adopted_tasks") as dag:
            task_1 = BaseOperator(task_id="task_1", start_date=start_date)
            task_2 = BaseOperator(task_id="task_2", start_date=start_date)

        key_1 = TaskInstanceKey(dag.dag_id, task_1.task_id, exec_date,
                                try_number)
        key_2 = TaskInstanceKey(dag.dag_id, task_2.task_id, exec_date,
                                try_number)

        executor = celery_executor.CeleryExecutor()
        executor.adopted_task_timeouts = {
            key_1: queued_dttm + executor.task_adoption_timeout,
            key_2: queued_dttm + executor.task_adoption_timeout
        }
        executor.tasks = {key_1: AsyncResult("231"), key_2: AsyncResult("232")}
        executor.sync()
        self.assertEqual(executor.event_buffer, {
            key_1: (State.FAILED, None),
            key_2: (State.FAILED, None)
        })
        self.assertEqual(executor.tasks, {})
        self.assertEqual(executor.adopted_task_timeouts, {})
예제 #2
0
    def test_celery_integration(self, broker_url):
        success_command = ['airflow', 'run', 'true', 'some_parameter']
        fail_command = ['airflow', 'version']

        def fake_execute_command(command):
            if command != success_command:
                raise AirflowException("fail")

        with self._prepare_app(broker_url,
                               execute=fake_execute_command) as app:
            executor = celery_executor.CeleryExecutor()
            executor.start()

            with start_worker(app=app, logfile=sys.stdout, loglevel='debug'):
                cached_celery_backend = celery_executor.execute_command.backend
                task_tuples_to_send = [
                    ('success', 'fake_simple_ti', success_command,
                     celery_executor.
                     celery_configuration['task_default_queue'],
                     celery_executor.execute_command),
                    ('fail', 'fake_simple_ti', fail_command, celery_executor.
                     celery_configuration['task_default_queue'],
                     celery_executor.execute_command)
                ]

                chunksize = executor._num_tasks_per_send_process(
                    len(task_tuples_to_send))
                num_processes = min(len(task_tuples_to_send),
                                    executor._sync_parallelism)

                send_pool = Pool(processes=num_processes)
                key_and_async_results = send_pool.map(
                    celery_executor.send_task_to_executor,
                    task_tuples_to_send,
                    chunksize=chunksize)

                send_pool.close()
                send_pool.join()

                for key, command, result in key_and_async_results:
                    # Only pops when enqueued successfully, otherwise keep it
                    # and expect scheduler loop to deal with it.
                    result.backend = cached_celery_backend
                    executor.running[key] = command
                    executor.tasks[key] = result
                    executor.last_state[key] = celery_states.PENDING

                executor.running['success'] = True
                executor.running['fail'] = True

                executor.end(synchronous=True)

        self.assertTrue(executor.event_buffer['success'], State.SUCCESS)
        self.assertTrue(executor.event_buffer['fail'], State.FAILED)

        self.assertNotIn('success', executor.tasks)
        self.assertNotIn('fail', executor.tasks)

        self.assertNotIn('success', executor.last_state)
        self.assertNotIn('fail', executor.last_state)
    def test_error_sending_task(self):
        def fake_execute_command():
            pass

        with _prepare_app(execute=fake_execute_command):
            # fake_execute_command takes no arguments while execute_command takes 1,
            # which will cause TypeError when calling task.apply_async()
            executor = celery_executor.CeleryExecutor()
            task = BashOperator(task_id="test",
                                bash_command="true",
                                dag=DAG(dag_id='id'),
                                start_date=datetime.now())
            when = datetime.now()
            value_tuple = (
                'command',
                1,
                None,
                SimpleTaskInstance(
                    ti=TaskInstance(task=task, execution_date=datetime.now())),
            )
            key = ('fail', 'fake_simple_ti', when, 0)
            executor.queued_tasks[key] = value_tuple
            executor.task_publish_retries[key] = 1
            executor.heartbeat()
        assert 0 == len(
            executor.queued_tasks), "Task should no longer be queued"
        assert executor.event_buffer[('fail', 'fake_simple_ti', when,
                                      0)][0] == State.FAILED
 def test_gauge_executor_metrics(self, mock_stats_gauge, mock_trigger_tasks, mock_sync):
     executor = celery_executor.CeleryExecutor()
     executor.heartbeat()
     calls = [mock.call('executor.open_slots', mock.ANY),
              mock.call('executor.queued_tasks', mock.ANY),
              mock.call('executor.running_tasks', mock.ANY)]
     mock_stats_gauge.assert_has_calls(calls)
예제 #5
0
    def test_celery_integration(self, broker_url):
        success_command = ['airflow', 'tasks', 'run', 'true', 'some_parameter']
        fail_command = ['airflow', 'version']

        def fake_execute_command(command):
            if command != success_command:
                raise AirflowException("fail")

        with _prepare_app(broker_url, execute=fake_execute_command) as app:
            executor = celery_executor.CeleryExecutor()
            self.assertEqual(executor.tasks, {})
            executor.start()

            with start_worker(app=app, logfile=sys.stdout, loglevel='info'):
                execute_date = datetime.now()

                task_tuples_to_send = [
                    (('success', 'fake_simple_ti', execute_date,
                      0), None, success_command, celery_executor.
                     celery_configuration['task_default_queue'],
                     celery_executor.execute_command),
                    (('fail', 'fake_simple_ti', execute_date,
                      0), None, fail_command, celery_executor.
                     celery_configuration['task_default_queue'],
                     celery_executor.execute_command)
                ]

                # "Enqueue" them. We don't have a real SimpleTaskInstance, so directly edit the dict
                for (key, simple_ti, command, queue,
                     task) in task_tuples_to_send:  # pylint: disable=W0612
                    executor.queued_tasks[key] = (command, 1, queue, simple_ti)

                executor._process_tasks(task_tuples_to_send)

                self.assertEqual(
                    list(executor.tasks.keys()),
                    [('success', 'fake_simple_ti', execute_date, 0),
                     ('fail', 'fake_simple_ti', execute_date, 0)])
                self.assertEqual(
                    executor.event_buffer[('success', 'fake_simple_ti',
                                           execute_date, 0)][0], State.QUEUED)
                self.assertEqual(
                    executor.event_buffer[('fail', 'fake_simple_ti',
                                           execute_date, 0)][0], State.QUEUED)

                executor.end(synchronous=True)

        self.assertEqual(
            executor.event_buffer[('success', 'fake_simple_ti', execute_date,
                                   0)][0], State.SUCCESS)
        self.assertEqual(
            executor.event_buffer[('fail', 'fake_simple_ti', execute_date,
                                   0)][0], State.FAILED)

        self.assertNotIn('success', executor.tasks)
        self.assertNotIn('fail', executor.tasks)

        self.assertEqual(executor.queued_tasks, {})
        self.assertEqual(timedelta(0, 600), executor.task_adoption_timeout)
예제 #6
0
    def test_exception_propagation(self):

        with _prepare_app(), self.assertLogs(celery_executor.log) as cm:
            executor = celery_executor.CeleryExecutor()
            executor.tasks = {'key': FakeCeleryResult()}
            executor.bulk_state_fetcher._get_many_using_multiprocessing(executor.tasks.values())

        self.assertTrue(any(celery_executor.CELERY_FETCH_ERR_MSG_HEADER in line for line in cm.output))
        self.assertTrue(any("Exception" in line for line in cm.output))
예제 #7
0
    def test_try_adopt_task_instances_none(self):
        date = datetime.utcnow()
        start_date = datetime.utcnow() - timedelta(days=2)

        with DAG("test_try_adopt_task_instances_none"):
            task_1 = BaseOperator(task_id="task_1", start_date=start_date)

        key1 = TaskInstance(task=task_1, execution_date=date)
        tis = [key1]
        executor = celery_executor.CeleryExecutor()

        self.assertEqual(executor.try_adopt_task_instances(tis), tis)
    def test_celery_integration(self, broker_url):
        with self._prepare_app(broker_url) as app:
            executor = celery_executor.CeleryExecutor()
            executor.start()

            with start_worker(app=app, logfile=sys.stdout, loglevel='info'):
                success_command = ['true', 'some_parameter']
                fail_command = ['false', 'some_parameter']
                execute_date = datetime.datetime.now()

                cached_celery_backend = celery_executor.execute_command.backend
                task_tuples_to_send = [
                    (('success', 'fake_simple_ti', execute_date, 0),
                     None, success_command, celery_executor.celery_configuration['task_default_queue'],
                     celery_executor.execute_command),
                    (('fail', 'fake_simple_ti', execute_date, 0),
                     None, fail_command, celery_executor.celery_configuration['task_default_queue'],
                     celery_executor.execute_command)
                ]

                chunksize = executor._num_tasks_per_send_process(len(task_tuples_to_send))
                num_processes = min(len(task_tuples_to_send), executor._sync_parallelism)

                send_pool = Pool(processes=num_processes)
                key_and_async_results = send_pool.map(
                    celery_executor.send_task_to_executor,
                    task_tuples_to_send,
                    chunksize=chunksize)

                send_pool.close()
                send_pool.join()

                for task_instance_key, _, result in key_and_async_results:
                    # Only pops when enqueued successfully, otherwise keep it
                    # and expect scheduler loop to deal with it.
                    result.backend = cached_celery_backend
                    executor.running.add(task_instance_key)
                    executor.tasks[task_instance_key] = result
                    executor.last_state[task_instance_key] = celery_states.PENDING

                executor.running.add(('success', 'fake_simple_ti', execute_date, 0))
                executor.running.add(('fail', 'fake_simple_ti', execute_date, 0))

                executor.end(synchronous=True)

        self.assertEqual(executor.event_buffer[('success', 'fake_simple_ti', execute_date, 0)], State.SUCCESS)
        self.assertEqual(executor.event_buffer[('fail', 'fake_simple_ti', execute_date, 0)], State.FAILED)

        self.assertNotIn('success', executor.tasks)
        self.assertNotIn('fail', executor.tasks)

        self.assertNotIn('success', executor.last_state)
        self.assertNotIn('fail', executor.last_state)
    def test_error_sending_task(self):
        def fake_execute_command():
            pass

        with self._prepare_app(execute=fake_execute_command):
            # fake_execute_command takes no arguments while execute_command takes 1,
            # which will cause TypeError when calling task.apply_async()
            executor = celery_executor.CeleryExecutor()
            value_tuple = 'command', '_', 'queue', 'should_be_a_simple_ti'
            executor.queued_tasks['key'] = value_tuple
            executor.heartbeat()
        self.assertEqual(1, len(executor.queued_tasks))
        self.assertEqual(executor.queued_tasks['key'], value_tuple)
예제 #10
0
    def test_error_sending_task(self):
        def fake_execute_command():
            pass

        with _prepare_app(execute=fake_execute_command):
            # fake_execute_command takes no arguments while execute_command takes 1,
            # which will cause TypeError when calling task.apply_async()
            executor = celery_executor.CeleryExecutor()
            task = BashOperator(task_id="test",
                                bash_command="true",
                                dag=DAG(dag_id='id'),
                                start_date=datetime.datetime.now())
            value_tuple = 'command', 1, None, \
                SimpleTaskInstance(ti=TaskInstance(task=task, execution_date=datetime.datetime.now()))
            key = ('fail', 'fake_simple_ti', datetime.datetime.now(), 0)
            executor.queued_tasks[key] = value_tuple
            executor.heartbeat()
        self.assertEqual(1, len(executor.queued_tasks))
        self.assertEqual(executor.queued_tasks[key], value_tuple)
예제 #11
0
    def test_try_adopt_task_instances(self):
        exec_date = timezone.utcnow() - timedelta(minutes=2)
        start_date = timezone.utcnow() - timedelta(days=2)
        queued_dttm = timezone.utcnow() - timedelta(minutes=1)

        try_number = 1

        with DAG("test_try_adopt_task_instances_none") as dag:
            task_1 = BaseOperator(task_id="task_1", start_date=start_date)
            task_2 = BaseOperator(task_id="task_2", start_date=start_date)

        ti1 = TaskInstance(task=task_1, execution_date=exec_date)
        ti1.external_executor_id = '231'
        ti1.queued_dttm = queued_dttm
        ti2 = TaskInstance(task=task_2, execution_date=exec_date)
        ti2.external_executor_id = '232'
        ti2.queued_dttm = queued_dttm

        tis = [ti1, ti2]
        executor = celery_executor.CeleryExecutor()
        self.assertEqual(executor.running, set())
        self.assertEqual(executor.adopted_task_timeouts, {})
        self.assertEqual(executor.tasks, {})

        not_adopted_tis = executor.try_adopt_task_instances(tis)

        key_1 = TaskInstanceKey(dag.dag_id, task_1.task_id, exec_date,
                                try_number)
        key_2 = TaskInstanceKey(dag.dag_id, task_2.task_id, exec_date,
                                try_number)
        self.assertEqual(executor.running, {key_1, key_2})
        self.assertEqual(
            dict(executor.adopted_task_timeouts),
            {
                key_1: queued_dttm + executor.task_adoption_timeout,
                key_2: queued_dttm + executor.task_adoption_timeout,
            },
        )
        self.assertEqual(executor.tasks, {
            key_1: AsyncResult("231"),
            key_2: AsyncResult("232")
        })
        self.assertEqual(not_adopted_tis, [])
예제 #12
0
    def test_exception_propagation(self):
        with self._prepare_app() as app:
            @app.task
            def fake_celery_task():
                return {}

            mock_log = mock.MagicMock()
            executor = celery_executor.CeleryExecutor()
            executor._log = mock_log

            executor.tasks = {'key': fake_celery_task()}
            executor.sync()

        assert mock_log.error.call_count == 1
        args, kwargs = mock_log.error.call_args_list[0]
        # Result of queuing is not a celery task but a dict,
        # and it should raise AttributeError and then get propagated
        # to the error log.
        self.assertIn(celery_executor.CELERY_FETCH_ERR_MSG_HEADER, args[0])
        self.assertIn('AttributeError', args[1])
    def test_retry_on_error_sending_task(self):
        """Test that Airflow retries publishing tasks to Celery Broker at least 3 times"""

        with _prepare_app(), self.assertLogs(
                celery_executor.log) as cm, mock.patch.object(
                    # Mock `with timeout()` to _instantly_ fail.
                    celery_executor.timeout,
                    "__enter__",
                    side_effect=AirflowTaskTimeout,
                ):
            executor = celery_executor.CeleryExecutor()
            assert executor.task_publish_retries == {}
            assert executor.task_publish_max_retries == 3, "Assert Default Max Retries is 3"

            task = BashOperator(task_id="test",
                                bash_command="true",
                                dag=DAG(dag_id='id'),
                                start_date=datetime.now())
            when = datetime.now()
            value_tuple = (
                'command',
                1,
                None,
                SimpleTaskInstance(
                    ti=TaskInstance(task=task, execution_date=datetime.now())),
            )
            key = ('fail', 'fake_simple_ti', when, 0)
            executor.queued_tasks[key] = value_tuple

            # Test that when heartbeat is called again, task is published again to Celery Queue
            executor.heartbeat()
            assert dict(executor.task_publish_retries) == {key: 2}
            assert 1 == len(
                executor.queued_tasks), "Task should remain in queue"
            assert executor.event_buffer == {}
            assert ("INFO:airflow.executors.celery_executor.CeleryExecutor:"
                    f"[Try 1 of 3] Task Timeout Error for Task: ({key})."
                    in cm.output)

            executor.heartbeat()
            assert dict(executor.task_publish_retries) == {key: 3}
            assert 1 == len(
                executor.queued_tasks), "Task should remain in queue"
            assert executor.event_buffer == {}
            assert ("INFO:airflow.executors.celery_executor.CeleryExecutor:"
                    f"[Try 2 of 3] Task Timeout Error for Task: ({key})."
                    in cm.output)

            executor.heartbeat()
            assert dict(executor.task_publish_retries) == {key: 4}
            assert 1 == len(
                executor.queued_tasks), "Task should remain in queue"
            assert executor.event_buffer == {}
            assert ("INFO:airflow.executors.celery_executor.CeleryExecutor:"
                    f"[Try 3 of 3] Task Timeout Error for Task: ({key})."
                    in cm.output)

            executor.heartbeat()
            assert dict(executor.task_publish_retries) == {}
            assert 0 == len(
                executor.queued_tasks), "Task should no longer be in queue"
            assert executor.event_buffer[('fail', 'fake_simple_ti', when,
                                          0)][0] == State.FAILED
    def test_retry_on_error_sending_task(self):
        """Test that Airflow retries publishing tasks to Celery Broker at least 3 times"""
        def fake_execute_command(command):
            print(command)

        with _prepare_app(execute=fake_execute_command), self.assertLogs(
                celery_executor.log) as cm, mock.patch.object(
                    celery_executor, "OPERATION_TIMEOUT", 0.001):
            # fake_execute_command takes no arguments while execute_command takes 1,
            # which will cause TypeError when calling task.apply_async()
            executor = celery_executor.CeleryExecutor()
            self.assertEqual(executor.task_publish_retries, {})
            self.assertEqual(executor.task_publish_max_retries,
                             3,
                             msg="Assert Default Max Retries is 3")

            task = BashOperator(task_id="test",
                                bash_command="true",
                                dag=DAG(dag_id='id'),
                                start_date=datetime.now())
            when = datetime.now()
            value_tuple = (
                'command',
                1,
                None,
                SimpleTaskInstance(
                    ti=TaskInstance(task=task, execution_date=datetime.now())),
            )
            key = ('fail', 'fake_simple_ti', when, 0)
            executor.queued_tasks[key] = value_tuple

            # Test that when heartbeat is called again, task is published again to Celery Queue
            executor.heartbeat()
            self.assertEqual(dict(executor.task_publish_retries), {key: 2})
            self.assertEqual(1, len(executor.queued_tasks),
                             "Task should remain in queue")
            self.assertEqual(executor.event_buffer, {})
            self.assertIn(
                "INFO:airflow.executors.celery_executor.CeleryExecutor:"
                f"[Try 1 of 3] Task Timeout Error for Task: ({key}).",
                cm.output,
            )

            executor.heartbeat()
            self.assertEqual(dict(executor.task_publish_retries), {key: 3})
            self.assertEqual(1, len(executor.queued_tasks),
                             "Task should remain in queue")
            self.assertEqual(executor.event_buffer, {})
            self.assertIn(
                "INFO:airflow.executors.celery_executor.CeleryExecutor:"
                f"[Try 2 of 3] Task Timeout Error for Task: ({key}).",
                cm.output,
            )

            executor.heartbeat()
            self.assertEqual(dict(executor.task_publish_retries), {key: 4})
            self.assertEqual(1, len(executor.queued_tasks),
                             "Task should remain in queue")
            self.assertEqual(executor.event_buffer, {})
            self.assertIn(
                "INFO:airflow.executors.celery_executor.CeleryExecutor:"
                f"[Try 3 of 3] Task Timeout Error for Task: ({key}).",
                cm.output,
            )

            executor.heartbeat()
            self.assertEqual(dict(executor.task_publish_retries), {})
            self.assertEqual(0, len(executor.queued_tasks),
                             "Task should no longer be in queue")
            self.assertEqual(
                executor.event_buffer[('fail', 'fake_simple_ti', when, 0)][0],
                State.FAILED)