def test_check_for_stalled_adopted_tasks(self): exec_date = timezone.utcnow() - timedelta(minutes=40) start_date = timezone.utcnow() - timedelta(days=2) queued_dttm = timezone.utcnow() - timedelta(minutes=30) try_number = 1 with DAG("test_check_for_stalled_adopted_tasks") as dag: task_1 = BaseOperator(task_id="task_1", start_date=start_date) task_2 = BaseOperator(task_id="task_2", start_date=start_date) key_1 = TaskInstanceKey(dag.dag_id, task_1.task_id, exec_date, try_number) key_2 = TaskInstanceKey(dag.dag_id, task_2.task_id, exec_date, try_number) executor = celery_executor.CeleryExecutor() executor.adopted_task_timeouts = { key_1: queued_dttm + executor.task_adoption_timeout, key_2: queued_dttm + executor.task_adoption_timeout } executor.tasks = {key_1: AsyncResult("231"), key_2: AsyncResult("232")} executor.sync() self.assertEqual(executor.event_buffer, { key_1: (State.FAILED, None), key_2: (State.FAILED, None) }) self.assertEqual(executor.tasks, {}) self.assertEqual(executor.adopted_task_timeouts, {})
def test_celery_integration(self, broker_url): success_command = ['airflow', 'run', 'true', 'some_parameter'] fail_command = ['airflow', 'version'] def fake_execute_command(command): if command != success_command: raise AirflowException("fail") with self._prepare_app(broker_url, execute=fake_execute_command) as app: executor = celery_executor.CeleryExecutor() executor.start() with start_worker(app=app, logfile=sys.stdout, loglevel='debug'): cached_celery_backend = celery_executor.execute_command.backend task_tuples_to_send = [ ('success', 'fake_simple_ti', success_command, celery_executor. celery_configuration['task_default_queue'], celery_executor.execute_command), ('fail', 'fake_simple_ti', fail_command, celery_executor. celery_configuration['task_default_queue'], celery_executor.execute_command) ] chunksize = executor._num_tasks_per_send_process( len(task_tuples_to_send)) num_processes = min(len(task_tuples_to_send), executor._sync_parallelism) send_pool = Pool(processes=num_processes) key_and_async_results = send_pool.map( celery_executor.send_task_to_executor, task_tuples_to_send, chunksize=chunksize) send_pool.close() send_pool.join() for key, command, result in key_and_async_results: # Only pops when enqueued successfully, otherwise keep it # and expect scheduler loop to deal with it. result.backend = cached_celery_backend executor.running[key] = command executor.tasks[key] = result executor.last_state[key] = celery_states.PENDING executor.running['success'] = True executor.running['fail'] = True executor.end(synchronous=True) self.assertTrue(executor.event_buffer['success'], State.SUCCESS) self.assertTrue(executor.event_buffer['fail'], State.FAILED) self.assertNotIn('success', executor.tasks) self.assertNotIn('fail', executor.tasks) self.assertNotIn('success', executor.last_state) self.assertNotIn('fail', executor.last_state)
def test_error_sending_task(self): def fake_execute_command(): pass with _prepare_app(execute=fake_execute_command): # fake_execute_command takes no arguments while execute_command takes 1, # which will cause TypeError when calling task.apply_async() executor = celery_executor.CeleryExecutor() task = BashOperator(task_id="test", bash_command="true", dag=DAG(dag_id='id'), start_date=datetime.now()) when = datetime.now() value_tuple = ( 'command', 1, None, SimpleTaskInstance( ti=TaskInstance(task=task, execution_date=datetime.now())), ) key = ('fail', 'fake_simple_ti', when, 0) executor.queued_tasks[key] = value_tuple executor.task_publish_retries[key] = 1 executor.heartbeat() assert 0 == len( executor.queued_tasks), "Task should no longer be queued" assert executor.event_buffer[('fail', 'fake_simple_ti', when, 0)][0] == State.FAILED
def test_gauge_executor_metrics(self, mock_stats_gauge, mock_trigger_tasks, mock_sync): executor = celery_executor.CeleryExecutor() executor.heartbeat() calls = [mock.call('executor.open_slots', mock.ANY), mock.call('executor.queued_tasks', mock.ANY), mock.call('executor.running_tasks', mock.ANY)] mock_stats_gauge.assert_has_calls(calls)
def test_celery_integration(self, broker_url): success_command = ['airflow', 'tasks', 'run', 'true', 'some_parameter'] fail_command = ['airflow', 'version'] def fake_execute_command(command): if command != success_command: raise AirflowException("fail") with _prepare_app(broker_url, execute=fake_execute_command) as app: executor = celery_executor.CeleryExecutor() self.assertEqual(executor.tasks, {}) executor.start() with start_worker(app=app, logfile=sys.stdout, loglevel='info'): execute_date = datetime.now() task_tuples_to_send = [ (('success', 'fake_simple_ti', execute_date, 0), None, success_command, celery_executor. celery_configuration['task_default_queue'], celery_executor.execute_command), (('fail', 'fake_simple_ti', execute_date, 0), None, fail_command, celery_executor. celery_configuration['task_default_queue'], celery_executor.execute_command) ] # "Enqueue" them. We don't have a real SimpleTaskInstance, so directly edit the dict for (key, simple_ti, command, queue, task) in task_tuples_to_send: # pylint: disable=W0612 executor.queued_tasks[key] = (command, 1, queue, simple_ti) executor._process_tasks(task_tuples_to_send) self.assertEqual( list(executor.tasks.keys()), [('success', 'fake_simple_ti', execute_date, 0), ('fail', 'fake_simple_ti', execute_date, 0)]) self.assertEqual( executor.event_buffer[('success', 'fake_simple_ti', execute_date, 0)][0], State.QUEUED) self.assertEqual( executor.event_buffer[('fail', 'fake_simple_ti', execute_date, 0)][0], State.QUEUED) executor.end(synchronous=True) self.assertEqual( executor.event_buffer[('success', 'fake_simple_ti', execute_date, 0)][0], State.SUCCESS) self.assertEqual( executor.event_buffer[('fail', 'fake_simple_ti', execute_date, 0)][0], State.FAILED) self.assertNotIn('success', executor.tasks) self.assertNotIn('fail', executor.tasks) self.assertEqual(executor.queued_tasks, {}) self.assertEqual(timedelta(0, 600), executor.task_adoption_timeout)
def test_exception_propagation(self): with _prepare_app(), self.assertLogs(celery_executor.log) as cm: executor = celery_executor.CeleryExecutor() executor.tasks = {'key': FakeCeleryResult()} executor.bulk_state_fetcher._get_many_using_multiprocessing(executor.tasks.values()) self.assertTrue(any(celery_executor.CELERY_FETCH_ERR_MSG_HEADER in line for line in cm.output)) self.assertTrue(any("Exception" in line for line in cm.output))
def test_try_adopt_task_instances_none(self): date = datetime.utcnow() start_date = datetime.utcnow() - timedelta(days=2) with DAG("test_try_adopt_task_instances_none"): task_1 = BaseOperator(task_id="task_1", start_date=start_date) key1 = TaskInstance(task=task_1, execution_date=date) tis = [key1] executor = celery_executor.CeleryExecutor() self.assertEqual(executor.try_adopt_task_instances(tis), tis)
def test_celery_integration(self, broker_url): with self._prepare_app(broker_url) as app: executor = celery_executor.CeleryExecutor() executor.start() with start_worker(app=app, logfile=sys.stdout, loglevel='info'): success_command = ['true', 'some_parameter'] fail_command = ['false', 'some_parameter'] execute_date = datetime.datetime.now() cached_celery_backend = celery_executor.execute_command.backend task_tuples_to_send = [ (('success', 'fake_simple_ti', execute_date, 0), None, success_command, celery_executor.celery_configuration['task_default_queue'], celery_executor.execute_command), (('fail', 'fake_simple_ti', execute_date, 0), None, fail_command, celery_executor.celery_configuration['task_default_queue'], celery_executor.execute_command) ] chunksize = executor._num_tasks_per_send_process(len(task_tuples_to_send)) num_processes = min(len(task_tuples_to_send), executor._sync_parallelism) send_pool = Pool(processes=num_processes) key_and_async_results = send_pool.map( celery_executor.send_task_to_executor, task_tuples_to_send, chunksize=chunksize) send_pool.close() send_pool.join() for task_instance_key, _, result in key_and_async_results: # Only pops when enqueued successfully, otherwise keep it # and expect scheduler loop to deal with it. result.backend = cached_celery_backend executor.running.add(task_instance_key) executor.tasks[task_instance_key] = result executor.last_state[task_instance_key] = celery_states.PENDING executor.running.add(('success', 'fake_simple_ti', execute_date, 0)) executor.running.add(('fail', 'fake_simple_ti', execute_date, 0)) executor.end(synchronous=True) self.assertEqual(executor.event_buffer[('success', 'fake_simple_ti', execute_date, 0)], State.SUCCESS) self.assertEqual(executor.event_buffer[('fail', 'fake_simple_ti', execute_date, 0)], State.FAILED) self.assertNotIn('success', executor.tasks) self.assertNotIn('fail', executor.tasks) self.assertNotIn('success', executor.last_state) self.assertNotIn('fail', executor.last_state)
def test_error_sending_task(self): def fake_execute_command(): pass with self._prepare_app(execute=fake_execute_command): # fake_execute_command takes no arguments while execute_command takes 1, # which will cause TypeError when calling task.apply_async() executor = celery_executor.CeleryExecutor() value_tuple = 'command', '_', 'queue', 'should_be_a_simple_ti' executor.queued_tasks['key'] = value_tuple executor.heartbeat() self.assertEqual(1, len(executor.queued_tasks)) self.assertEqual(executor.queued_tasks['key'], value_tuple)
def test_error_sending_task(self): def fake_execute_command(): pass with _prepare_app(execute=fake_execute_command): # fake_execute_command takes no arguments while execute_command takes 1, # which will cause TypeError when calling task.apply_async() executor = celery_executor.CeleryExecutor() task = BashOperator(task_id="test", bash_command="true", dag=DAG(dag_id='id'), start_date=datetime.datetime.now()) value_tuple = 'command', 1, None, \ SimpleTaskInstance(ti=TaskInstance(task=task, execution_date=datetime.datetime.now())) key = ('fail', 'fake_simple_ti', datetime.datetime.now(), 0) executor.queued_tasks[key] = value_tuple executor.heartbeat() self.assertEqual(1, len(executor.queued_tasks)) self.assertEqual(executor.queued_tasks[key], value_tuple)
def test_try_adopt_task_instances(self): exec_date = timezone.utcnow() - timedelta(minutes=2) start_date = timezone.utcnow() - timedelta(days=2) queued_dttm = timezone.utcnow() - timedelta(minutes=1) try_number = 1 with DAG("test_try_adopt_task_instances_none") as dag: task_1 = BaseOperator(task_id="task_1", start_date=start_date) task_2 = BaseOperator(task_id="task_2", start_date=start_date) ti1 = TaskInstance(task=task_1, execution_date=exec_date) ti1.external_executor_id = '231' ti1.queued_dttm = queued_dttm ti2 = TaskInstance(task=task_2, execution_date=exec_date) ti2.external_executor_id = '232' ti2.queued_dttm = queued_dttm tis = [ti1, ti2] executor = celery_executor.CeleryExecutor() self.assertEqual(executor.running, set()) self.assertEqual(executor.adopted_task_timeouts, {}) self.assertEqual(executor.tasks, {}) not_adopted_tis = executor.try_adopt_task_instances(tis) key_1 = TaskInstanceKey(dag.dag_id, task_1.task_id, exec_date, try_number) key_2 = TaskInstanceKey(dag.dag_id, task_2.task_id, exec_date, try_number) self.assertEqual(executor.running, {key_1, key_2}) self.assertEqual( dict(executor.adopted_task_timeouts), { key_1: queued_dttm + executor.task_adoption_timeout, key_2: queued_dttm + executor.task_adoption_timeout, }, ) self.assertEqual(executor.tasks, { key_1: AsyncResult("231"), key_2: AsyncResult("232") }) self.assertEqual(not_adopted_tis, [])
def test_exception_propagation(self): with self._prepare_app() as app: @app.task def fake_celery_task(): return {} mock_log = mock.MagicMock() executor = celery_executor.CeleryExecutor() executor._log = mock_log executor.tasks = {'key': fake_celery_task()} executor.sync() assert mock_log.error.call_count == 1 args, kwargs = mock_log.error.call_args_list[0] # Result of queuing is not a celery task but a dict, # and it should raise AttributeError and then get propagated # to the error log. self.assertIn(celery_executor.CELERY_FETCH_ERR_MSG_HEADER, args[0]) self.assertIn('AttributeError', args[1])
def test_retry_on_error_sending_task(self): """Test that Airflow retries publishing tasks to Celery Broker at least 3 times""" with _prepare_app(), self.assertLogs( celery_executor.log) as cm, mock.patch.object( # Mock `with timeout()` to _instantly_ fail. celery_executor.timeout, "__enter__", side_effect=AirflowTaskTimeout, ): executor = celery_executor.CeleryExecutor() assert executor.task_publish_retries == {} assert executor.task_publish_max_retries == 3, "Assert Default Max Retries is 3" task = BashOperator(task_id="test", bash_command="true", dag=DAG(dag_id='id'), start_date=datetime.now()) when = datetime.now() value_tuple = ( 'command', 1, None, SimpleTaskInstance( ti=TaskInstance(task=task, execution_date=datetime.now())), ) key = ('fail', 'fake_simple_ti', when, 0) executor.queued_tasks[key] = value_tuple # Test that when heartbeat is called again, task is published again to Celery Queue executor.heartbeat() assert dict(executor.task_publish_retries) == {key: 2} assert 1 == len( executor.queued_tasks), "Task should remain in queue" assert executor.event_buffer == {} assert ("INFO:airflow.executors.celery_executor.CeleryExecutor:" f"[Try 1 of 3] Task Timeout Error for Task: ({key})." in cm.output) executor.heartbeat() assert dict(executor.task_publish_retries) == {key: 3} assert 1 == len( executor.queued_tasks), "Task should remain in queue" assert executor.event_buffer == {} assert ("INFO:airflow.executors.celery_executor.CeleryExecutor:" f"[Try 2 of 3] Task Timeout Error for Task: ({key})." in cm.output) executor.heartbeat() assert dict(executor.task_publish_retries) == {key: 4} assert 1 == len( executor.queued_tasks), "Task should remain in queue" assert executor.event_buffer == {} assert ("INFO:airflow.executors.celery_executor.CeleryExecutor:" f"[Try 3 of 3] Task Timeout Error for Task: ({key})." in cm.output) executor.heartbeat() assert dict(executor.task_publish_retries) == {} assert 0 == len( executor.queued_tasks), "Task should no longer be in queue" assert executor.event_buffer[('fail', 'fake_simple_ti', when, 0)][0] == State.FAILED
def test_retry_on_error_sending_task(self): """Test that Airflow retries publishing tasks to Celery Broker at least 3 times""" def fake_execute_command(command): print(command) with _prepare_app(execute=fake_execute_command), self.assertLogs( celery_executor.log) as cm, mock.patch.object( celery_executor, "OPERATION_TIMEOUT", 0.001): # fake_execute_command takes no arguments while execute_command takes 1, # which will cause TypeError when calling task.apply_async() executor = celery_executor.CeleryExecutor() self.assertEqual(executor.task_publish_retries, {}) self.assertEqual(executor.task_publish_max_retries, 3, msg="Assert Default Max Retries is 3") task = BashOperator(task_id="test", bash_command="true", dag=DAG(dag_id='id'), start_date=datetime.now()) when = datetime.now() value_tuple = ( 'command', 1, None, SimpleTaskInstance( ti=TaskInstance(task=task, execution_date=datetime.now())), ) key = ('fail', 'fake_simple_ti', when, 0) executor.queued_tasks[key] = value_tuple # Test that when heartbeat is called again, task is published again to Celery Queue executor.heartbeat() self.assertEqual(dict(executor.task_publish_retries), {key: 2}) self.assertEqual(1, len(executor.queued_tasks), "Task should remain in queue") self.assertEqual(executor.event_buffer, {}) self.assertIn( "INFO:airflow.executors.celery_executor.CeleryExecutor:" f"[Try 1 of 3] Task Timeout Error for Task: ({key}).", cm.output, ) executor.heartbeat() self.assertEqual(dict(executor.task_publish_retries), {key: 3}) self.assertEqual(1, len(executor.queued_tasks), "Task should remain in queue") self.assertEqual(executor.event_buffer, {}) self.assertIn( "INFO:airflow.executors.celery_executor.CeleryExecutor:" f"[Try 2 of 3] Task Timeout Error for Task: ({key}).", cm.output, ) executor.heartbeat() self.assertEqual(dict(executor.task_publish_retries), {key: 4}) self.assertEqual(1, len(executor.queued_tasks), "Task should remain in queue") self.assertEqual(executor.event_buffer, {}) self.assertIn( "INFO:airflow.executors.celery_executor.CeleryExecutor:" f"[Try 3 of 3] Task Timeout Error for Task: ({key}).", cm.output, ) executor.heartbeat() self.assertEqual(dict(executor.task_publish_retries), {}) self.assertEqual(0, len(executor.queued_tasks), "Task should no longer be in queue") self.assertEqual( executor.event_buffer[('fail', 'fake_simple_ti', when, 0)][0], State.FAILED)