def test_gauge_executor_metrics(self, mock_stats_gauge, mock_trigger_tasks, mock_sync):
     executor = DaskExecutor()
     executor.heartbeat()
     calls = [mock.call('executor.open_slots', mock.ANY),
              mock.call('executor.queued_tasks', mock.ANY),
              mock.call('executor.running_tasks', mock.ANY)]
     mock_stats_gauge.assert_has_calls(calls)
Esempio n. 2
0
    def test_tls(self):
        with dask_testing_cluster(worker_kwargs={
                'security': tls_security(),
                "protocol": "tls"
        },
                                  scheduler_kwargs={
                                      'security': tls_security(),
                                      "protocol": "tls"
                                  }) as (cluster, _):

            # These use test certs that ship with dask/distributed and should not be
            #  used in production
            conf.set('dask', 'tls_ca', get_cert('tls-ca-cert.pem'))
            conf.set('dask', 'tls_cert', get_cert('tls-key-cert.pem'))
            conf.set('dask', 'tls_key', get_cert('tls-key.pem'))
            try:
                executor = DaskExecutor(cluster_address=cluster['address'])

                self.assert_tasks_on_executor(executor)

                executor.end()
                # close the executor, the cluster context manager expects all listeners
                # and tasks to have completed.
                executor.client.close()
            finally:
                conf.set('dask', 'tls_ca', '')
                conf.set('dask', 'tls_key', '')
                conf.set('dask', 'tls_cert', '')
    def test_dask_executor_functions(self):
        executor = DaskExecutor(cluster_address=self.cluster.scheduler_address)

        # start the executor
        executor.start()

        success_command = 'echo 1'
        fail_command = 'exit 1'

        executor.execute_async(key='success', command=success_command)
        executor.execute_async(key='fail', command=fail_command)

        success_future = next(k for k, v in executor.futures.items()
                              if v == 'success')
        fail_future = next(k for k, v in executor.futures.items()
                           if v == 'fail')

        # wait for the futures to execute, with a timeout
        timeout = timezone.utcnow() + timedelta(seconds=30)
        while not (success_future.done() and fail_future.done()):
            if timezone.utcnow() > timeout:
                raise ValueError(
                    'The futures should have finished; there is probably '
                    'an error communciating with the Dask cluster.')

        # both tasks should have finished
        self.assertTrue(success_future.done())
        self.assertTrue(fail_future.done())

        # check task exceptions
        self.assertTrue(success_future.exception() is None)
        self.assertTrue(fail_future.exception() is not None)
Esempio n. 4
0
def _get_executor(executor_name):
    """
    Creates a new instance of the named executor. In case the executor name is not know in airflow, 
    look for it in the plugins
    """
    if executor_name == 'LocalExecutor':
        return LocalExecutor()
    elif executor_name == 'SequentialExecutor':
        return SequentialExecutor()
    elif executor_name == 'CeleryExecutor':
        from airflow.executors.celery_executor import CeleryExecutor
        return CeleryExecutor()
    elif executor_name == 'DaskExecutor':
        from airflow.executors.dask_executor import DaskExecutor
        return DaskExecutor()
    elif executor_name == 'MesosExecutor':
        from airflow.contrib.executors.mesos_executor import MesosExecutor
        return MesosExecutor()
    else:
        # Loading plugins
        _integrate_plugins()
        executor_path = executor_name.split('.')
        if len(executor_path) != 2:
            raise AirflowException(
                "Executor {0} not supported: please specify in format plugin_module.executor"
                .format(executor_name))

        if executor_path[0] in globals():
            return globals()[executor_path[0]].__dict__[executor_path[1]]()
        else:
            raise AirflowException(
                "Executor {0} not supported.".format(executor_name))
    def test_backfill_integration(self):
        """
        Test that DaskExecutor can be used to backfill example dags
        """
        dags = [
            dag for dag in self.dagbag.dags.values()
            if dag.dag_id in [
                'example_bash_operator',
                # 'example_python_operator',
            ]
        ]

        for dag in dags:
            dag.clear(
                start_date=DEFAULT_DATE,
                end_date=DEFAULT_DATE)

        for i, dag in enumerate(sorted(dags, key=lambda d: d.dag_id)):
            job = BackfillJob(
                dag=dag,
                start_date=DEFAULT_DATE,
                end_date=DEFAULT_DATE,
                ignore_first_depends_on_past=True,
                executor=DaskExecutor(
                    cluster_address=self.cluster.scheduler_address))
            job.run()
Esempio n. 6
0
    def _get_executor(executor_name: str) -> BaseExecutor:
        """
        Creates a new instance of the named executor.
        In case the executor name is unknown in airflow,
        look for it in the plugins
        """
        if executor_name == ExecutorLoader.LOCAL_EXECUTOR:
            from airflow.executors.local_executor import LocalExecutor
            return LocalExecutor()
        elif executor_name == ExecutorLoader.SEQUENTIAL_EXECUTOR:
            from airflow.executors.sequential_executor import SequentialExecutor
            return SequentialExecutor()
        elif executor_name == ExecutorLoader.CELERY_EXECUTOR:
            from airflow.executors.celery_executor import CeleryExecutor
            return CeleryExecutor()
        elif executor_name == ExecutorLoader.DASK_EXECUTOR:
            from airflow.executors.dask_executor import DaskExecutor
            return DaskExecutor()
        elif executor_name == ExecutorLoader.KUBERNETES_EXECUTOR:
            from airflow.executors.kubernetes_executor import KubernetesExecutor
            return KubernetesExecutor()
        else:
            # Load plugins here for executors as at that time the plugins might not have been initialized yet
            # TODO: verify the above and remove two lines below in case plugins are always initialized first
            from airflow import plugins_manager
            plugins_manager.integrate_executor_plugins()
            executor_path = executor_name.split('.')
            assert len(executor_path) == 2, f"Executor {executor_name} not supported: " \
                                            f"please specify in format plugin_module.executor"

            assert executor_path[0] in globals(
            ), f"Executor {executor_name} not supported"
            return globals()[executor_path[0]].__dict__[executor_path[1]]()
Esempio n. 7
0
    def test_tls(self):
        # These use test certs that ship with dask/distributed and should not be
        #  used in production
        with dask_testing_cluster(
            worker_kwargs={'security': tls_security(), "protocol": "tls"},
            scheduler_kwargs={'security': tls_security(), "protocol": "tls"},
        ) as (cluster, _):

            executor = DaskExecutor(cluster_address=cluster['address'])

            self.assert_tasks_on_executor(executor)

            executor.end()
            # close the executor, the cluster context manager expects all listeners
            # and tasks to have completed.
            executor.client.close()
Esempio n. 8
0
def _get_executor(executor_name):
    """
    Creates a new instance of the named executor.
    In case the executor name is not know in airflow,
    look for it in the plugins
    """
    parallelism = PARALLELISM
    if executor_name == Executors.LocalExecutor:
        return LocalExecutor(parallelism)
    elif executor_name == Executors.SequentialExecutor:
        return SequentialExecutor(parallelism)
    elif executor_name == Executors.CeleryExecutor:
        from airflow.executors.celery_executor import CeleryExecutor, execute_command
        return CeleryExecutor(parallelism, execute_command)
    elif executor_name == Executors.DaskExecutor:
        from airflow.executors.dask_executor import DaskExecutor
        cluster_address = configuration.conf.get('dask', 'cluster_address')
        tls_ca = configuration.conf.get('dask', 'tls_ca')
        tls_key = configuration.conf.get('dask', 'tls_key')
        tls_cert = configuration.conf.get('dask', 'tls_cert')
        return DaskExecutor(parallelism, cluster_address, tls_ca, tls_key,
                            tls_cert)
    elif executor_name == Executors.MesosExecutor:
        from airflow.contrib.executors.mesos_executor import MesosExecutor
        return MesosExecutor(parallelism)
    elif executor_name == Executors.KubernetesExecutor:
        from airflow.contrib.executors.kubernetes_executor import KubernetesExecutor
        return KubernetesExecutor()
    else:
        # Loading plugins
        _integrate_plugins()
        # 从插件模块中获取指定类
        args = []
        kwargs = {'parallelism': PARALLELISM}
        return create_object_from_plugin_module(executor_name, *args, **kwargs)
Esempio n. 9
0
    def test_dask_executor_functions(self):
        executor = DaskExecutor(cluster_address=self.cluster.scheduler_address)

        # start the executor
        executor.start()

        success_command = 'echo 1'
        fail_command = 'exit 1'

        executor.execute_async(key='success', command=success_command)
        executor.execute_async(key='fail', command=fail_command)

        success_future = next(
            k for k, v in executor.futures.items() if v == 'success')
        fail_future = next(
            k for k, v in executor.futures.items() if v == 'fail')

        # wait for the futures to execute, with a timeout
        timeout = timezone.utcnow() + timedelta(seconds=30)
        while not (success_future.done() and fail_future.done()):
            if timezone.utcnow() > timeout:
                raise ValueError(
                    'The futures should have finished; there is probably '
                    'an error communciating with the Dask cluster.')

        # both tasks should have finished
        self.assertTrue(success_future.done())
        self.assertTrue(fail_future.done())

        # check task exceptions
        self.assertTrue(success_future.exception() is None)
        self.assertTrue(fail_future.exception() is not None)
Esempio n. 10
0
    def test_backfill_integration(self):
        """
        Test that DaskExecutor can be used to backfill example dags
        """
        dag = self.dagbag.get_dag('example_bash_operator')

        job = BackfillJob(dag=dag,
                          start_date=DEFAULT_DATE,
                          end_date=DEFAULT_DATE,
                          ignore_first_depends_on_past=True,
                          executor=DaskExecutor(
                              cluster_address=self.cluster.scheduler_address))
        job.run()
Esempio n. 11
0
    def test_tls(self):
        with dask_testing_cluster(
                worker_kwargs={'security': tls_security()},
                scheduler_kwargs={'security': tls_security()}) as (s, workers):

            # These use test certs that ship with dask/distributed and should not be
            #  used in production
            configuration.set('dask', 'tls_ca', get_cert('tls-ca-cert.pem'))
            configuration.set('dask', 'tls_cert', get_cert('tls-key-cert.pem'))
            configuration.set('dask', 'tls_key', get_cert('tls-key.pem'))
            try:
                executor = DaskExecutor(cluster_address=s['address'])

                self.assert_tasks_on_executor(executor)

                executor.end()
                # close the executor, the cluster context manager expects all listeners
                # and tasks to have completed.
                executor.client.close()
            finally:
                configuration.set('dask', 'tls_ca', '')
                configuration.set('dask', 'tls_key', '')
                configuration.set('dask', 'tls_cert', '')
 def test_dask_executor_functions(self):
     executor = DaskExecutor(cluster_address=self.cluster.scheduler_address)
     self.assert_tasks_on_executor(executor)
Esempio n. 13
0
    from airflow.plugins_manager import executors_modules
    for executors_module in executors_modules:
        sys.modules[executors_module.__name__] = executors_module
        globals()[executors_module._name] = executors_module


_EXECUTOR = configuration.get('core', 'EXECUTOR')

if _EXECUTOR == 'LocalExecutor':
    DEFAULT_EXECUTOR = LocalExecutor()
elif _EXECUTOR == 'SequentialExecutor':
    DEFAULT_EXECUTOR = SequentialExecutor()
elif _EXECUTOR == 'CeleryExecutor':
    from airflow.executors.celery_executor import CeleryExecutor
    DEFAULT_EXECUTOR = CeleryExecutor()
elif _EXECUTOR == 'DaskExecutor':
    from airflow.executors.dask_executor import DaskExecutor
    DEFAULT_EXECUTOR = DaskExecutor()
elif _EXECUTOR == 'MesosExecutor':
    from airflow.contrib.executors.mesos_executor import MesosExecutor
    DEFAULT_EXECUTOR = MesosExecutor()
else:
    # Loading plugins
    _integrate_plugins()
    if _EXECUTOR in globals():
        DEFAULT_EXECUTOR = globals()[_EXECUTOR]()
    else:
        raise AirflowException("Executor {0} not supported.".format(_EXECUTOR))

logging.info("Using executor " + _EXECUTOR)