def test_error_sending_task(self):
        @app.task
        def fake_execute_command():
            pass

        # fake_execute_command takes no arguments while execute_command takes 1,
        # which will cause TypeError when calling task.apply_async()
        celery_executor.execute_command = fake_execute_command
        executor = CeleryExecutor()
        value_tuple = 'command', '_', 'queue', 'should_be_a_simple_ti'
        executor.queued_tasks['key'] = value_tuple
        executor.heartbeat()
        self.assertEquals(1, len(executor.queued_tasks))
        self.assertEquals(executor.queued_tasks['key'], value_tuple)
Ejemplo n.º 2
0
def _get_executor(executor_name):
    """
    Creates a new instance of the named executor.
    In case the executor name is not know in airflow,
    look for it in the plugins
    """
    parallelism = PARALLELISM
    if executor_name == Executors.LocalExecutor:
        return LocalExecutor(parallelism)
    elif executor_name == Executors.SequentialExecutor:
        return SequentialExecutor(parallelism)
    elif executor_name == Executors.CeleryExecutor:
        from airflow.executors.celery_executor import CeleryExecutor, execute_command
        return CeleryExecutor(parallelism, execute_command)
    elif executor_name == Executors.DaskExecutor:
        from airflow.executors.dask_executor import DaskExecutor
        cluster_address = configuration.conf.get('dask', 'cluster_address')
        tls_ca = configuration.conf.get('dask', 'tls_ca')
        tls_key = configuration.conf.get('dask', 'tls_key')
        tls_cert = configuration.conf.get('dask', 'tls_cert')
        return DaskExecutor(parallelism, cluster_address, tls_ca, tls_key,
                            tls_cert)
    elif executor_name == Executors.MesosExecutor:
        from airflow.contrib.executors.mesos_executor import MesosExecutor
        return MesosExecutor(parallelism)
    elif executor_name == Executors.KubernetesExecutor:
        from airflow.contrib.executors.kubernetes_executor import KubernetesExecutor
        return KubernetesExecutor()
    else:
        # Loading plugins
        _integrate_plugins()
        # 从插件模块中获取指定类
        args = []
        kwargs = {'parallelism': PARALLELISM}
        return create_object_from_plugin_module(executor_name, *args, **kwargs)
Ejemplo n.º 3
0
def _create_subdag(subdag_func,
                   parent_dag,
                   task_id,
                   phase,
                   default_args,
                   trigger_rule='all_success'):
    """Creates a subdag, initiated by a StartOperator and ended by a FinishOperator.
    Also, uses an internal function, passed as argument, to create the other subdag
    operators in between.
    """
    subdag = DAG(dag_id='{}.{}'.format(parent_dag.dag_id, task_id),
                 schedule_interval=None,
                 catchup=False,
                 default_args=default_args)

    begin_task = StartOperator(phase,
                               trigger_rule=trigger_rule,
                               **default_args)
    subdag >> begin_task
    end_task = FinishOperator(phase, trigger_rule=trigger_rule, **default_args)
    subdag >> end_task
    subdag_func(begin_task, end_task)

    return SubDagOperator(subdag=subdag,
                          task_id=task_id,
                          dag=parent_dag,
                          executor=CeleryExecutor())
Ejemplo n.º 4
0
 def test_job_id_setter(self):
     cel_exec = CeleryExecutor()
     k8s_exec = KubernetesExecutor()
     cel_k8s_exec = CeleryKubernetesExecutor(cel_exec, k8s_exec)
     job_id = 'this-job-id'
     cel_k8s_exec.job_id = job_id
     assert cel_exec.job_id == k8s_exec.job_id == cel_k8s_exec.job_id == job_id
Ejemplo n.º 5
0
    def _get_executor(executor_name: str) -> BaseExecutor:
        """
        Creates a new instance of the named executor.
        In case the executor name is unknown in airflow,
        look for it in the plugins
        """
        if executor_name == ExecutorLoader.LOCAL_EXECUTOR:
            from airflow.executors.local_executor import LocalExecutor
            return LocalExecutor()
        elif executor_name == ExecutorLoader.SEQUENTIAL_EXECUTOR:
            from airflow.executors.sequential_executor import SequentialExecutor
            return SequentialExecutor()
        elif executor_name == ExecutorLoader.CELERY_EXECUTOR:
            from airflow.executors.celery_executor import CeleryExecutor
            return CeleryExecutor()
        elif executor_name == ExecutorLoader.DASK_EXECUTOR:
            from airflow.executors.dask_executor import DaskExecutor
            return DaskExecutor()
        elif executor_name == ExecutorLoader.KUBERNETES_EXECUTOR:
            from airflow.executors.kubernetes_executor import KubernetesExecutor
            return KubernetesExecutor()
        else:
            # Load plugins here for executors as at that time the plugins might not have been initialized yet
            # TODO: verify the above and remove two lines below in case plugins are always initialized first
            from airflow import plugins_manager
            plugins_manager.integrate_executor_plugins()
            executor_path = executor_name.split('.')
            assert len(executor_path) == 2, f"Executor {executor_name} not supported: " \
                                            f"please specify in format plugin_module.executor"

            assert executor_path[0] in globals(
            ), f"Executor {executor_name} not supported"
            return globals()[executor_path[0]].__dict__[executor_path[1]]()
Ejemplo n.º 6
0
def _get_executor(executor_name):
    """
    Creates a new instance of the named executor. In case the executor name is not know in airflow, 
    look for it in the plugins
    """
    if executor_name == 'LocalExecutor':
        return LocalExecutor()
    elif executor_name == 'SequentialExecutor':
        return SequentialExecutor()
    elif executor_name == 'CeleryExecutor':
        from airflow.executors.celery_executor import CeleryExecutor
        return CeleryExecutor()
    elif executor_name == 'DaskExecutor':
        from airflow.executors.dask_executor import DaskExecutor
        return DaskExecutor()
    elif executor_name == 'MesosExecutor':
        from airflow.contrib.executors.mesos_executor import MesosExecutor
        return MesosExecutor()
    else:
        # Loading plugins
        _integrate_plugins()
        executor_path = executor_name.split('.')
        if len(executor_path) != 2:
            raise AirflowException(
                "Executor {0} not supported: please specify in format plugin_module.executor"
                .format(executor_name))

        if executor_path[0] in globals():
            return globals()[executor_path[0]].__dict__[executor_path[1]]()
        else:
            raise AirflowException(
                "Executor {0} not supported.".format(executor_name))
    def test_exception_propagation(self):
        @app.task
        def fake_celery_task():
            return {}

        mock_log = mock.MagicMock()
        executor = CeleryExecutor()
        executor._log = mock_log

        executor.tasks = {'key': fake_celery_task()}
        executor.sync()
        mock_log.error.assert_called_once()
        args, kwargs = mock_log.error.call_args_list[0]
        log = args[0]
        # Result of queuing is not a celery task but a dict,
        # and it should raise AttributeError and then get propagated
        # to the error log.
        self.assertIn(CELERY_FETCH_ERR_MSG_HEADER, log)
        self.assertIn('AttributeError', log)
Ejemplo n.º 8
0
 def get_sub_dag_operator(dag, sub_dag_name, sub_dag_func, **kwargs):
     """
     Gets a sub-dag operator.
     :param dag: the main dag which will contain the subdag
     :param sub_dag_func: the method for building the subdag
     :return: the new subdag operator
     """
     return SubDagOperator(
         subdag=sub_dag_func(sub_dag_name, **kwargs),
         task_id=sub_dag_name,
         dag=dag,
         executor=CeleryExecutor(),
     )
Ejemplo n.º 9
0
    def test_queue_command(self, test_queue, k8s_queue_cmd, celery_queue_cmd):
        kwargs = dict(
            command=['airflow', 'run', 'dag'],
            priority=1,
            queue='default',
        )
        kwarg_values = kwargs.values()
        cke = CeleryKubernetesExecutor(CeleryExecutor(), KubernetesExecutor())

        simple_task_instance = mock.MagicMock()
        simple_task_instance.queue = test_queue

        cke.queue_command(simple_task_instance, **kwargs)

        if test_queue == KUBERNETES_QUEUE:
            k8s_queue_cmd.assert_called_once_with(simple_task_instance, *kwarg_values)
            celery_queue_cmd.assert_not_called()
        else:
            celery_queue_cmd.assert_called_once_with(simple_task_instance, *kwarg_values)
            k8s_queue_cmd.assert_not_called()
Ejemplo n.º 10
0
    def test_exception_propagation(self):
        @app.task
        def fake_celery_task():
            return {}

        mock_log = mock.MagicMock()
        executor = CeleryExecutor()
        executor._log = mock_log

        executor.tasks = {'key': fake_celery_task()}
        executor.sync()
        assert mock_log.error.call_count == 1
        args, kwargs = mock_log.error.call_args_list[0]
        # Result of queuing is not a celery task but a dict,
        # and it should raise AttributeError and then get propagated
        # to the error log.
        self.assertIn(celery_executor.CELERY_FETCH_ERR_MSG_HEADER, args[0])
        self.assertIn('AttributeError', args[1])
Ejemplo n.º 11
0
        execution_date=DEFAULT_DATE,
        origin='/home',
    )
    resp = admin_client.post('run', data=form, follow_redirects=True)
    check_content_in_response('', resp)

    msg = (
        f"Task is in the '{state}' state which is not a valid state for "
        f"execution. The task must be cleared in order to be run")
    assert not re.search(msg, resp.get_data(as_text=True))


@pytest.mark.parametrize("state", QUEUEABLE_STATES)
@unittest.mock.patch(
    'airflow.executors.executor_loader.ExecutorLoader.get_default_executor',
    return_value=CeleryExecutor(),
)
def test_run_with_not_runnable_states(_, admin_client, session, state):
    assert state not in RUNNABLE_STATES

    task_id = 'runme_0'
    session.query(TaskInstance).filter(TaskInstance.task_id == task_id).update(
        {
            'state': state,
            'end_date': timezone.utcnow()
        })
    session.commit()

    form = dict(
        task_id=task_id,
        dag_id="example_bash_operator",
Ejemplo n.º 12
0
    def test_celery_integration(self):
        executor = CeleryExecutor()
        executor.start()
        with start_worker(app=app, logfile=sys.stdout, loglevel='debug'):

            success_command = ['true', 'some_parameter']
            fail_command = ['false', 'some_parameter']

            executor.execute_async(key='success', command=success_command)
            # errors are propagated for some reason
            try:
                executor.execute_async(key='fail', command=fail_command)
            except:
                pass
            executor.running['success'] = True
            executor.running['fail'] = True

            executor.end(synchronous=True)

        self.assertTrue(executor.event_buffer['success'], State.SUCCESS)
        self.assertTrue(executor.event_buffer['fail'], State.FAILED)

        self.assertNotIn('success', executor.tasks)
        self.assertNotIn('fail', executor.tasks)

        self.assertNotIn('success', executor.last_state)
        self.assertNotIn('fail', executor.last_state)
Ejemplo n.º 13
0

def _integrate_plugins():
    """Integrate plugins to the context."""
    from airflow.plugins_manager import executors_modules
    for executors_module in executors_modules:
        sys.modules[executors_module.__name__] = executors_module
        globals()[executors_module._name] = executors_module


_EXECUTOR = configuration.get('core', 'EXECUTOR')

if _EXECUTOR == 'LocalExecutor':
    DEFAULT_EXECUTOR = LocalExecutor()
elif _EXECUTOR == 'CeleryExecutor':
    DEFAULT_EXECUTOR = CeleryExecutor()
elif _EXECUTOR == 'SequentialExecutor':
    DEFAULT_EXECUTOR = SequentialExecutor()
elif _EXECUTOR == 'MesosExecutor':
    from airflow.contrib.executors.mesos_executor import MesosExecutor
    DEFAULT_EXECUTOR = MesosExecutor()
else:
    # Loading plugins
    _integrate_plugins()
    if _EXECUTOR in globals():
        DEFAULT_EXECUTOR = globals()[_EXECUTOR]()
    else:
        raise AirflowException("Executor {0} not supported.".format(_EXECUTOR))

_log.info("Using executor " + _EXECUTOR)
Ejemplo n.º 14
0
    def test_celery_integration(self):
        executor = CeleryExecutor()
        executor.start()
        with start_worker(app=app, logfile=sys.stdout, loglevel='debug'):

            success_command = 'echo 1'
            fail_command = 'exit 1'

            executor.execute_async(key='success', command=success_command)
            # errors are propagated for some reason
            try:
                executor.execute_async(key='fail', command=fail_command)
            except:
                pass
            executor.running['success'] = True
            executor.running['fail'] = True

            executor.end(synchronous=True)

        self.assertTrue(executor.event_buffer['success'], State.SUCCESS)
        self.assertTrue(executor.event_buffer['fail'], State.FAILED)

        self.assertNotIn('success', executor.tasks)
        self.assertNotIn('fail', executor.tasks)
    def test_celery_integration(self):
        executor = CeleryExecutor()
        executor.start()
        with start_worker(app=app, logfile=sys.stdout, loglevel='debug'):
            success_command = ['true', 'some_parameter']
            fail_command = ['false', 'some_parameter']

            cached_celery_backend = execute_command.backend
            task_tuples_to_send = [
                ('success', 'fake_simple_ti', success_command,
                 celery_configuration['task_default_queue'], execute_command),
                ('fail', 'fake_simple_ti', fail_command,
                 celery_configuration['task_default_queue'], execute_command)
            ]

            chunksize = executor._num_tasks_per_send_process(
                len(task_tuples_to_send))
            num_processes = min(len(task_tuples_to_send),
                                executor._sync_parallelism)

            send_pool = Pool(processes=num_processes)
            key_and_async_results = send_pool.map(send_task_to_executor,
                                                  task_tuples_to_send,
                                                  chunksize=chunksize)

            send_pool.close()
            send_pool.join()

            for key, command, result in key_and_async_results:
                # Only pops when enqueued successfully, otherwise keep it
                # and expect scheduler loop to deal with it.
                result.backend = cached_celery_backend
                executor.running[key] = command
                executor.tasks[key] = result
                executor.last_state[key] = celery_states.PENDING

            executor.running['success'] = True
            executor.running['fail'] = True

            executor.end(synchronous=True)

        self.assertTrue(executor.event_buffer['success'], State.SUCCESS)
        self.assertTrue(executor.event_buffer['fail'], State.FAILED)

        self.assertNotIn('success', executor.tasks)
        self.assertNotIn('fail', executor.tasks)

        self.assertNotIn('success', executor.last_state)
        self.assertNotIn('fail', executor.last_state)
Ejemplo n.º 16
0
Archivo: dag.py Proyecto: gsroot/storm
            op_kwargs={
                'info_type': info_type,
                'codeinfo_df': splited_df
            },
            dag=subdag,
        )

    return subdag


task_id = 'collect_daily_items_info'
items_task = SubDagOperator(
    task_id=task_id,
    subdag=subdag(task_id, 'daily_items_info',
                  data_worker.worker.collect_some_daily_info),
    executor=CeleryExecutor(),
    dag=dag,
)

task_id = 'collect_daily_items_indicator_info'
items_indicator_task = SubDagOperator(
    task_id=task_id,
    subdag=subdag(task_id, 'daily_items_indicator_info',
                  data_worker.worker.collect_some_daily_info),
    executor=CeleryExecutor(),
    dag=dag,
)

task_id = 'collect_daily_items_info_all'
items_all_task = SubDagOperator(
    task_id=task_id,
Ejemplo n.º 17
0
DAG_NAME = "deadlock_subdag"

default_args = {
    'owner': 'Airflow',
    'start_date': airflow.utils.dates.days_ago(2),
}

with DAG(dag_id=DAG_NAME, default_args=default_args,
         schedule_interval="@once") as dag:
    start = DummyOperator(task_id='start')

    subdag_1 = SubDagOperator(task_id='subdag-1',
                              subdag=factory_subdag(DAG_NAME, 'subdag-1',
                                                    default_args),
                              executor=CeleryExecutor())

    subdag_2 = SubDagOperator(task_id='subdag-2',
                              subdag=factory_subdag(DAG_NAME, 'subdag-2',
                                                    default_args),
                              executor=CeleryExecutor())

    subdag_3 = SubDagOperator(task_id='subdag-3',
                              subdag=factory_subdag(DAG_NAME, 'subdag-3',
                                                    default_args),
                              executor=CeleryExecutor())

    subdag_4 = SubDagOperator(task_id='subdag-4',
                              subdag=factory_subdag(DAG_NAME, 'subdag-4',
                                                    default_args),
                              executor=CeleryExecutor())