def test_error_sending_task(self): @app.task def fake_execute_command(): pass # fake_execute_command takes no arguments while execute_command takes 1, # which will cause TypeError when calling task.apply_async() celery_executor.execute_command = fake_execute_command executor = CeleryExecutor() value_tuple = 'command', '_', 'queue', 'should_be_a_simple_ti' executor.queued_tasks['key'] = value_tuple executor.heartbeat() self.assertEquals(1, len(executor.queued_tasks)) self.assertEquals(executor.queued_tasks['key'], value_tuple)
def _get_executor(executor_name): """ Creates a new instance of the named executor. In case the executor name is not know in airflow, look for it in the plugins """ parallelism = PARALLELISM if executor_name == Executors.LocalExecutor: return LocalExecutor(parallelism) elif executor_name == Executors.SequentialExecutor: return SequentialExecutor(parallelism) elif executor_name == Executors.CeleryExecutor: from airflow.executors.celery_executor import CeleryExecutor, execute_command return CeleryExecutor(parallelism, execute_command) elif executor_name == Executors.DaskExecutor: from airflow.executors.dask_executor import DaskExecutor cluster_address = configuration.conf.get('dask', 'cluster_address') tls_ca = configuration.conf.get('dask', 'tls_ca') tls_key = configuration.conf.get('dask', 'tls_key') tls_cert = configuration.conf.get('dask', 'tls_cert') return DaskExecutor(parallelism, cluster_address, tls_ca, tls_key, tls_cert) elif executor_name == Executors.MesosExecutor: from airflow.contrib.executors.mesos_executor import MesosExecutor return MesosExecutor(parallelism) elif executor_name == Executors.KubernetesExecutor: from airflow.contrib.executors.kubernetes_executor import KubernetesExecutor return KubernetesExecutor() else: # Loading plugins _integrate_plugins() # 从插件模块中获取指定类 args = [] kwargs = {'parallelism': PARALLELISM} return create_object_from_plugin_module(executor_name, *args, **kwargs)
def _create_subdag(subdag_func, parent_dag, task_id, phase, default_args, trigger_rule='all_success'): """Creates a subdag, initiated by a StartOperator and ended by a FinishOperator. Also, uses an internal function, passed as argument, to create the other subdag operators in between. """ subdag = DAG(dag_id='{}.{}'.format(parent_dag.dag_id, task_id), schedule_interval=None, catchup=False, default_args=default_args) begin_task = StartOperator(phase, trigger_rule=trigger_rule, **default_args) subdag >> begin_task end_task = FinishOperator(phase, trigger_rule=trigger_rule, **default_args) subdag >> end_task subdag_func(begin_task, end_task) return SubDagOperator(subdag=subdag, task_id=task_id, dag=parent_dag, executor=CeleryExecutor())
def test_job_id_setter(self): cel_exec = CeleryExecutor() k8s_exec = KubernetesExecutor() cel_k8s_exec = CeleryKubernetesExecutor(cel_exec, k8s_exec) job_id = 'this-job-id' cel_k8s_exec.job_id = job_id assert cel_exec.job_id == k8s_exec.job_id == cel_k8s_exec.job_id == job_id
def _get_executor(executor_name: str) -> BaseExecutor: """ Creates a new instance of the named executor. In case the executor name is unknown in airflow, look for it in the plugins """ if executor_name == ExecutorLoader.LOCAL_EXECUTOR: from airflow.executors.local_executor import LocalExecutor return LocalExecutor() elif executor_name == ExecutorLoader.SEQUENTIAL_EXECUTOR: from airflow.executors.sequential_executor import SequentialExecutor return SequentialExecutor() elif executor_name == ExecutorLoader.CELERY_EXECUTOR: from airflow.executors.celery_executor import CeleryExecutor return CeleryExecutor() elif executor_name == ExecutorLoader.DASK_EXECUTOR: from airflow.executors.dask_executor import DaskExecutor return DaskExecutor() elif executor_name == ExecutorLoader.KUBERNETES_EXECUTOR: from airflow.executors.kubernetes_executor import KubernetesExecutor return KubernetesExecutor() else: # Load plugins here for executors as at that time the plugins might not have been initialized yet # TODO: verify the above and remove two lines below in case plugins are always initialized first from airflow import plugins_manager plugins_manager.integrate_executor_plugins() executor_path = executor_name.split('.') assert len(executor_path) == 2, f"Executor {executor_name} not supported: " \ f"please specify in format plugin_module.executor" assert executor_path[0] in globals( ), f"Executor {executor_name} not supported" return globals()[executor_path[0]].__dict__[executor_path[1]]()
def _get_executor(executor_name): """ Creates a new instance of the named executor. In case the executor name is not know in airflow, look for it in the plugins """ if executor_name == 'LocalExecutor': return LocalExecutor() elif executor_name == 'SequentialExecutor': return SequentialExecutor() elif executor_name == 'CeleryExecutor': from airflow.executors.celery_executor import CeleryExecutor return CeleryExecutor() elif executor_name == 'DaskExecutor': from airflow.executors.dask_executor import DaskExecutor return DaskExecutor() elif executor_name == 'MesosExecutor': from airflow.contrib.executors.mesos_executor import MesosExecutor return MesosExecutor() else: # Loading plugins _integrate_plugins() executor_path = executor_name.split('.') if len(executor_path) != 2: raise AirflowException( "Executor {0} not supported: please specify in format plugin_module.executor" .format(executor_name)) if executor_path[0] in globals(): return globals()[executor_path[0]].__dict__[executor_path[1]]() else: raise AirflowException( "Executor {0} not supported.".format(executor_name))
def test_exception_propagation(self): @app.task def fake_celery_task(): return {} mock_log = mock.MagicMock() executor = CeleryExecutor() executor._log = mock_log executor.tasks = {'key': fake_celery_task()} executor.sync() mock_log.error.assert_called_once() args, kwargs = mock_log.error.call_args_list[0] log = args[0] # Result of queuing is not a celery task but a dict, # and it should raise AttributeError and then get propagated # to the error log. self.assertIn(CELERY_FETCH_ERR_MSG_HEADER, log) self.assertIn('AttributeError', log)
def get_sub_dag_operator(dag, sub_dag_name, sub_dag_func, **kwargs): """ Gets a sub-dag operator. :param dag: the main dag which will contain the subdag :param sub_dag_func: the method for building the subdag :return: the new subdag operator """ return SubDagOperator( subdag=sub_dag_func(sub_dag_name, **kwargs), task_id=sub_dag_name, dag=dag, executor=CeleryExecutor(), )
def test_queue_command(self, test_queue, k8s_queue_cmd, celery_queue_cmd): kwargs = dict( command=['airflow', 'run', 'dag'], priority=1, queue='default', ) kwarg_values = kwargs.values() cke = CeleryKubernetesExecutor(CeleryExecutor(), KubernetesExecutor()) simple_task_instance = mock.MagicMock() simple_task_instance.queue = test_queue cke.queue_command(simple_task_instance, **kwargs) if test_queue == KUBERNETES_QUEUE: k8s_queue_cmd.assert_called_once_with(simple_task_instance, *kwarg_values) celery_queue_cmd.assert_not_called() else: celery_queue_cmd.assert_called_once_with(simple_task_instance, *kwarg_values) k8s_queue_cmd.assert_not_called()
def test_exception_propagation(self): @app.task def fake_celery_task(): return {} mock_log = mock.MagicMock() executor = CeleryExecutor() executor._log = mock_log executor.tasks = {'key': fake_celery_task()} executor.sync() assert mock_log.error.call_count == 1 args, kwargs = mock_log.error.call_args_list[0] # Result of queuing is not a celery task but a dict, # and it should raise AttributeError and then get propagated # to the error log. self.assertIn(celery_executor.CELERY_FETCH_ERR_MSG_HEADER, args[0]) self.assertIn('AttributeError', args[1])
execution_date=DEFAULT_DATE, origin='/home', ) resp = admin_client.post('run', data=form, follow_redirects=True) check_content_in_response('', resp) msg = ( f"Task is in the '{state}' state which is not a valid state for " f"execution. The task must be cleared in order to be run") assert not re.search(msg, resp.get_data(as_text=True)) @pytest.mark.parametrize("state", QUEUEABLE_STATES) @unittest.mock.patch( 'airflow.executors.executor_loader.ExecutorLoader.get_default_executor', return_value=CeleryExecutor(), ) def test_run_with_not_runnable_states(_, admin_client, session, state): assert state not in RUNNABLE_STATES task_id = 'runme_0' session.query(TaskInstance).filter(TaskInstance.task_id == task_id).update( { 'state': state, 'end_date': timezone.utcnow() }) session.commit() form = dict( task_id=task_id, dag_id="example_bash_operator",
def test_celery_integration(self): executor = CeleryExecutor() executor.start() with start_worker(app=app, logfile=sys.stdout, loglevel='debug'): success_command = ['true', 'some_parameter'] fail_command = ['false', 'some_parameter'] executor.execute_async(key='success', command=success_command) # errors are propagated for some reason try: executor.execute_async(key='fail', command=fail_command) except: pass executor.running['success'] = True executor.running['fail'] = True executor.end(synchronous=True) self.assertTrue(executor.event_buffer['success'], State.SUCCESS) self.assertTrue(executor.event_buffer['fail'], State.FAILED) self.assertNotIn('success', executor.tasks) self.assertNotIn('fail', executor.tasks) self.assertNotIn('success', executor.last_state) self.assertNotIn('fail', executor.last_state)
def _integrate_plugins(): """Integrate plugins to the context.""" from airflow.plugins_manager import executors_modules for executors_module in executors_modules: sys.modules[executors_module.__name__] = executors_module globals()[executors_module._name] = executors_module _EXECUTOR = configuration.get('core', 'EXECUTOR') if _EXECUTOR == 'LocalExecutor': DEFAULT_EXECUTOR = LocalExecutor() elif _EXECUTOR == 'CeleryExecutor': DEFAULT_EXECUTOR = CeleryExecutor() elif _EXECUTOR == 'SequentialExecutor': DEFAULT_EXECUTOR = SequentialExecutor() elif _EXECUTOR == 'MesosExecutor': from airflow.contrib.executors.mesos_executor import MesosExecutor DEFAULT_EXECUTOR = MesosExecutor() else: # Loading plugins _integrate_plugins() if _EXECUTOR in globals(): DEFAULT_EXECUTOR = globals()[_EXECUTOR]() else: raise AirflowException("Executor {0} not supported.".format(_EXECUTOR)) _log.info("Using executor " + _EXECUTOR)
def test_celery_integration(self): executor = CeleryExecutor() executor.start() with start_worker(app=app, logfile=sys.stdout, loglevel='debug'): success_command = 'echo 1' fail_command = 'exit 1' executor.execute_async(key='success', command=success_command) # errors are propagated for some reason try: executor.execute_async(key='fail', command=fail_command) except: pass executor.running['success'] = True executor.running['fail'] = True executor.end(synchronous=True) self.assertTrue(executor.event_buffer['success'], State.SUCCESS) self.assertTrue(executor.event_buffer['fail'], State.FAILED) self.assertNotIn('success', executor.tasks) self.assertNotIn('fail', executor.tasks)
def test_celery_integration(self): executor = CeleryExecutor() executor.start() with start_worker(app=app, logfile=sys.stdout, loglevel='debug'): success_command = ['true', 'some_parameter'] fail_command = ['false', 'some_parameter'] cached_celery_backend = execute_command.backend task_tuples_to_send = [ ('success', 'fake_simple_ti', success_command, celery_configuration['task_default_queue'], execute_command), ('fail', 'fake_simple_ti', fail_command, celery_configuration['task_default_queue'], execute_command) ] chunksize = executor._num_tasks_per_send_process( len(task_tuples_to_send)) num_processes = min(len(task_tuples_to_send), executor._sync_parallelism) send_pool = Pool(processes=num_processes) key_and_async_results = send_pool.map(send_task_to_executor, task_tuples_to_send, chunksize=chunksize) send_pool.close() send_pool.join() for key, command, result in key_and_async_results: # Only pops when enqueued successfully, otherwise keep it # and expect scheduler loop to deal with it. result.backend = cached_celery_backend executor.running[key] = command executor.tasks[key] = result executor.last_state[key] = celery_states.PENDING executor.running['success'] = True executor.running['fail'] = True executor.end(synchronous=True) self.assertTrue(executor.event_buffer['success'], State.SUCCESS) self.assertTrue(executor.event_buffer['fail'], State.FAILED) self.assertNotIn('success', executor.tasks) self.assertNotIn('fail', executor.tasks) self.assertNotIn('success', executor.last_state) self.assertNotIn('fail', executor.last_state)
op_kwargs={ 'info_type': info_type, 'codeinfo_df': splited_df }, dag=subdag, ) return subdag task_id = 'collect_daily_items_info' items_task = SubDagOperator( task_id=task_id, subdag=subdag(task_id, 'daily_items_info', data_worker.worker.collect_some_daily_info), executor=CeleryExecutor(), dag=dag, ) task_id = 'collect_daily_items_indicator_info' items_indicator_task = SubDagOperator( task_id=task_id, subdag=subdag(task_id, 'daily_items_indicator_info', data_worker.worker.collect_some_daily_info), executor=CeleryExecutor(), dag=dag, ) task_id = 'collect_daily_items_info_all' items_all_task = SubDagOperator( task_id=task_id,
DAG_NAME = "deadlock_subdag" default_args = { 'owner': 'Airflow', 'start_date': airflow.utils.dates.days_ago(2), } with DAG(dag_id=DAG_NAME, default_args=default_args, schedule_interval="@once") as dag: start = DummyOperator(task_id='start') subdag_1 = SubDagOperator(task_id='subdag-1', subdag=factory_subdag(DAG_NAME, 'subdag-1', default_args), executor=CeleryExecutor()) subdag_2 = SubDagOperator(task_id='subdag-2', subdag=factory_subdag(DAG_NAME, 'subdag-2', default_args), executor=CeleryExecutor()) subdag_3 = SubDagOperator(task_id='subdag-3', subdag=factory_subdag(DAG_NAME, 'subdag-3', default_args), executor=CeleryExecutor()) subdag_4 = SubDagOperator(task_id='subdag-4', subdag=factory_subdag(DAG_NAME, 'subdag-4', default_args), executor=CeleryExecutor())