def test_backfill_integration(self): """ Test that DaskExecutor can be used to backfill example dags """ dags = [ dag for dag in self.dagbag.dags.values() if dag.dag_id in [ 'example_bash_operator', # 'example_python_operator', ] ] for dag in dags: dag.clear( start_date=DEFAULT_DATE, end_date=DEFAULT_DATE) for i, dag in enumerate(sorted(dags, key=lambda d: d.dag_id)): job = BackfillJob( dag=dag, start_date=DEFAULT_DATE, end_date=DEFAULT_DATE, ignore_first_depends_on_past=True, executor=DaskExecutor( cluster_address=self.cluster.scheduler_address)) job.run()
def _get_executor(executor_name): """ Creates a new instance of the named executor. In case the executor name is not know in airflow, look for it in the plugins """ if executor_name == 'LocalExecutor': return LocalExecutor() elif executor_name == 'SequentialExecutor': return SequentialExecutor() elif executor_name == 'CeleryExecutor': from airflow.executors.celery_executor import CeleryExecutor return CeleryExecutor() elif executor_name == 'DaskExecutor': from airflow.executors.dask_executor import DaskExecutor return DaskExecutor() elif executor_name == 'MesosExecutor': from airflow.contrib.executors.mesos_executor import MesosExecutor return MesosExecutor() else: # Loading plugins _integrate_plugins() executor_path = executor_name.split('.') if len(executor_path) != 2: raise AirflowException( "Executor {0} not supported: please specify in format plugin_module.executor" .format(executor_name)) if executor_path[0] in globals(): return globals()[executor_path[0]].__dict__[executor_path[1]]() else: raise AirflowException( "Executor {0} not supported.".format(executor_name))
def test_gauge_executor_metrics(self, mock_stats_gauge, mock_trigger_tasks, mock_sync): executor = DaskExecutor() executor.heartbeat() calls = [mock.call('executor.open_slots', mock.ANY), mock.call('executor.queued_tasks', mock.ANY), mock.call('executor.running_tasks', mock.ANY)] mock_stats_gauge.assert_has_calls(calls)
def _get_executor(executor_name: str) -> BaseExecutor: """ Creates a new instance of the named executor. In case the executor name is unknown in airflow, look for it in the plugins """ if executor_name == ExecutorLoader.LOCAL_EXECUTOR: from airflow.executors.local_executor import LocalExecutor return LocalExecutor() elif executor_name == ExecutorLoader.SEQUENTIAL_EXECUTOR: from airflow.executors.sequential_executor import SequentialExecutor return SequentialExecutor() elif executor_name == ExecutorLoader.CELERY_EXECUTOR: from airflow.executors.celery_executor import CeleryExecutor return CeleryExecutor() elif executor_name == ExecutorLoader.DASK_EXECUTOR: from airflow.executors.dask_executor import DaskExecutor return DaskExecutor() elif executor_name == ExecutorLoader.KUBERNETES_EXECUTOR: from airflow.executors.kubernetes_executor import KubernetesExecutor return KubernetesExecutor() else: # Load plugins here for executors as at that time the plugins might not have been initialized yet # TODO: verify the above and remove two lines below in case plugins are always initialized first from airflow import plugins_manager plugins_manager.integrate_executor_plugins() executor_path = executor_name.split('.') assert len(executor_path) == 2, f"Executor {executor_name} not supported: " \ f"please specify in format plugin_module.executor" assert executor_path[0] in globals( ), f"Executor {executor_name} not supported" return globals()[executor_path[0]].__dict__[executor_path[1]]()
def test_dask_executor_functions(self): executor = DaskExecutor(cluster_address=self.cluster.scheduler_address) # start the executor executor.start() success_command = 'echo 1' fail_command = 'exit 1' executor.execute_async(key='success', command=success_command) executor.execute_async(key='fail', command=fail_command) success_future = next(k for k, v in executor.futures.items() if v == 'success') fail_future = next(k for k, v in executor.futures.items() if v == 'fail') # wait for the futures to execute, with a timeout timeout = timezone.utcnow() + timedelta(seconds=30) while not (success_future.done() and fail_future.done()): if timezone.utcnow() > timeout: raise ValueError( 'The futures should have finished; there is probably ' 'an error communciating with the Dask cluster.') # both tasks should have finished self.assertTrue(success_future.done()) self.assertTrue(fail_future.done()) # check task exceptions self.assertTrue(success_future.exception() is None) self.assertTrue(fail_future.exception() is not None)
def test_tls(self): with dask_testing_cluster(worker_kwargs={ 'security': tls_security(), "protocol": "tls" }, scheduler_kwargs={ 'security': tls_security(), "protocol": "tls" }) as (cluster, _): # These use test certs that ship with dask/distributed and should not be # used in production conf.set('dask', 'tls_ca', get_cert('tls-ca-cert.pem')) conf.set('dask', 'tls_cert', get_cert('tls-key-cert.pem')) conf.set('dask', 'tls_key', get_cert('tls-key.pem')) try: executor = DaskExecutor(cluster_address=cluster['address']) self.assert_tasks_on_executor(executor) executor.end() # close the executor, the cluster context manager expects all listeners # and tasks to have completed. executor.client.close() finally: conf.set('dask', 'tls_ca', '') conf.set('dask', 'tls_key', '') conf.set('dask', 'tls_cert', '')
def _get_executor(executor_name): """ Creates a new instance of the named executor. In case the executor name is not know in airflow, look for it in the plugins """ parallelism = PARALLELISM if executor_name == Executors.LocalExecutor: return LocalExecutor(parallelism) elif executor_name == Executors.SequentialExecutor: return SequentialExecutor(parallelism) elif executor_name == Executors.CeleryExecutor: from airflow.executors.celery_executor import CeleryExecutor, execute_command return CeleryExecutor(parallelism, execute_command) elif executor_name == Executors.DaskExecutor: from airflow.executors.dask_executor import DaskExecutor cluster_address = configuration.conf.get('dask', 'cluster_address') tls_ca = configuration.conf.get('dask', 'tls_ca') tls_key = configuration.conf.get('dask', 'tls_key') tls_cert = configuration.conf.get('dask', 'tls_cert') return DaskExecutor(parallelism, cluster_address, tls_ca, tls_key, tls_cert) elif executor_name == Executors.MesosExecutor: from airflow.contrib.executors.mesos_executor import MesosExecutor return MesosExecutor(parallelism) elif executor_name == Executors.KubernetesExecutor: from airflow.contrib.executors.kubernetes_executor import KubernetesExecutor return KubernetesExecutor() else: # Loading plugins _integrate_plugins() # 从插件模块中获取指定类 args = [] kwargs = {'parallelism': PARALLELISM} return create_object_from_plugin_module(executor_name, *args, **kwargs)
def test_backfill_integration(self): """ Test that DaskExecutor can be used to backfill example dags """ dag = self.dagbag.get_dag('example_bash_operator') job = BackfillJob(dag=dag, start_date=DEFAULT_DATE, end_date=DEFAULT_DATE, ignore_first_depends_on_past=True, executor=DaskExecutor( cluster_address=self.cluster.scheduler_address)) job.run()
def test_tls(self): # These use test certs that ship with dask/distributed and should not be # used in production with dask_testing_cluster( worker_kwargs={'security': tls_security(), "protocol": "tls"}, scheduler_kwargs={'security': tls_security(), "protocol": "tls"}, ) as (cluster, _): executor = DaskExecutor(cluster_address=cluster['address']) self.assert_tasks_on_executor(executor) executor.end() # close the executor, the cluster context manager expects all listeners # and tasks to have completed. executor.client.close()
def test_dask_executor_functions(self): executor = DaskExecutor(cluster_address=self.cluster.scheduler_address) self.assert_tasks_on_executor(executor)
from airflow.plugins_manager import executors_modules for executors_module in executors_modules: sys.modules[executors_module.__name__] = executors_module globals()[executors_module._name] = executors_module _EXECUTOR = configuration.get('core', 'EXECUTOR') if _EXECUTOR == 'LocalExecutor': DEFAULT_EXECUTOR = LocalExecutor() elif _EXECUTOR == 'SequentialExecutor': DEFAULT_EXECUTOR = SequentialExecutor() elif _EXECUTOR == 'CeleryExecutor': from airflow.executors.celery_executor import CeleryExecutor DEFAULT_EXECUTOR = CeleryExecutor() elif _EXECUTOR == 'DaskExecutor': from airflow.executors.dask_executor import DaskExecutor DEFAULT_EXECUTOR = DaskExecutor() elif _EXECUTOR == 'MesosExecutor': from airflow.contrib.executors.mesos_executor import MesosExecutor DEFAULT_EXECUTOR = MesosExecutor() else: # Loading plugins _integrate_plugins() if _EXECUTOR in globals(): DEFAULT_EXECUTOR = globals()[_EXECUTOR]() else: raise AirflowException("Executor {0} not supported.".format(_EXECUTOR)) logging.info("Using executor " + _EXECUTOR)