def _get_host_mode_executor(recon_pipeline, run_config, get_executor_def_fn, instance): execution_config = run_config.get("execution") if execution_config: executor_name, executor_config = ensure_single_item(execution_config) else: executor_name = None executor_config = {} executor_def = get_executor_def_fn(executor_name) executor_config_type = def_config_field(executor_def).config_type config_evr = process_config(executor_config_type, executor_config) if not config_evr.success: raise DagsterInvalidConfigError( "Error in executor config for executor {}".format( executor_def.name), config_evr.errors, executor_config, ) executor_config_value = config_evr.value init_context = InitExecutorContext( pipeline=recon_pipeline, executor_def=executor_def, executor_config=executor_config_value["config"], instance=instance, ) check_cross_process_constraints(init_context) return executor_def.executor_creation_fn(init_context)
def _get_host_mode_executor(recon_pipeline, run_config, executor_defs, instance): execution_config = run_config.get("execution", {}) execution_config_type = Field( selector_for_named_defs(executor_defs), default_value={executor_defs[0].name: {}} ).config_type config_evr = process_config(execution_config_type, execution_config) if not config_evr.success: raise DagsterInvalidConfigError( "Error processing execution config {}".format(execution_config), config_evr.errors, execution_config, ) execution_config_value = config_evr.value executor_name, executor_config = ensure_single_item(execution_config_value) executor_defs_by_name = {executor_def.name: executor_def for executor_def in executor_defs} executor_def = executor_defs_by_name[executor_name] init_context = InitExecutorContext( pipeline=recon_pipeline, executor_def=executor_def, executor_config=executor_config["config"], instance=instance, ) check_cross_process_constraints(init_context) return executor_def.executor_creation_fn(init_context)
def celery_executor(init_context): """Celery-based executor. The Celery executor exposes config settings for the underlying Celery app under the ``config_source`` key. This config corresponds to the "new lowercase settings" introduced in Celery version 4.0 and the object constructed from config will be passed to the :py:class:`celery.Celery` constructor as its ``config_source`` argument. (See https://docs.celeryproject.org/en/latest/userguide/configuration.html for details.) The executor also exposes the ``broker``, `backend`, and ``include`` arguments to the :py:class:`celery.Celery` constructor. In the most common case, you may want to modify the ``broker`` and ``backend`` (e.g., to use Redis instead of RabbitMQ). We expect that ``config_source`` will be less frequently modified, but that when solid executions are especially fast or slow, or when there are different requirements around idempotence or retry, it may make sense to execute pipelines with variations on these settings. If you'd like to configure a celery executor in addition to the :py:class:`~dagster.default_executors`, you should add it to the ``executor_defs`` defined on a :py:class:`~dagster.ModeDefinition` as follows: .. code-block:: python from dagster import ModeDefinition, default_executors, pipeline from dagster_celery import celery_executor @pipeline(mode_defs=[ModeDefinition(executor_defs=default_executors + [celery_executor])]) def celery_enabled_pipeline(): pass Then you can configure the executor as follows: .. code-block:: YAML execution: celery: config: broker: 'pyamqp://guest@localhost//' # Optional[str]: The URL of the Celery broker backend: 'rpc://' # Optional[str]: The URL of the Celery results backend include: ['my_module'] # Optional[List[str]]: Modules every worker should import config_source: # Dict[str, Any]: Any additional parameters to pass to the #... # Celery workers. This dict will be passed as the `config_source` #... # argument of celery.Celery(). Note that the YAML you provide here must align with the configuration with which the Celery workers on which you hope to run were started. If, for example, you point the executor at a different broker than the one your workers are listening to, the workers will never be able to pick up tasks for execution. """ check_cross_process_constraints(init_context) return CeleryExecutor( broker=init_context.executor_config.get("broker"), backend=init_context.executor_config.get("backend"), config_source=init_context.executor_config.get("config_source"), include=init_context.executor_config.get("include"), retries=RetryMode.from_config(init_context.executor_config["retries"]), )
def create_executor(context_creation_data: ContextCreationData) -> "Executor": check.inst_param(context_creation_data, "context_creation_data", ContextCreationData) init_context = InitExecutorContext( pipeline=context_creation_data.pipeline, executor_def=context_creation_data.executor_def, executor_config=context_creation_data.environment_config.execution. execution_engine_config, instance=context_creation_data.instance, ) check_cross_process_constraints(init_context) return context_creation_data.executor_def.executor_creation_fn( init_context)
def celery_executor(init_context): '''Celery-based executor. The Celery executor exposes config settings for the underlying Celery app under the ``config_source`` key. This config corresponds to the "new lowercase settings" introduced in Celery version 4.0 and the object constructed from config will be passed to the :py:class:`celery.Celery` constructor as its ``config_source`` argument. (See https://docs.celeryproject.org/en/latest/userguide/configuration.html for details.) The executor also exposes the ``broker``, `backend`, and ``include`` arguments to the :py:class:`celery.Celery` constructor. In the most common case, you may want to modify the ``broker`` and ``backend`` (e.g., to use Redis instead of RabbitMQ). We expect that ``celery_settings`` will be less frequently modified, but that when solid executions are especially fast or slow, or when there are different requirements around idempotence or retry, it will make sense to execute pipelines with variations on these settings. **Config**: .. code-block:: { broker?: 'pyamqp://guest@localhost//', # The URL of the Celery broker backend?: 'rpc://', # The URL of the Celery results backend include?: ['my_module'], # List of modules every worker should import celery_settings: { ... # Celery app config } } If you'd like to configure a celery executor in addition to the :py:class:`~dagster.default_executors`, you should add it to the ``executor_defs`` defined on a :py:class:`~dagster.ModeDefinition` as follows: .. code-block:: python from dagster import ModeDefinition, default_executors, pipeline from dagster_celery import celery_executor @pipeline(mode_defs=[ModeDefinition(executor_defs=default_executors + [celery_executor])]) def celery_enabled_pipeline(): pass ''' check_cross_process_constraints(init_context) return CeleryConfig(**init_context.executor_config)
def dask_executor(init_context): '''Dask-based executor. If the Dask executor is used without providing executor-specific config, a local Dask cluster will be created (as when calling :py:class:`dask.distributed.Client() <dask:distributed.Client>` without specifying the scheduler address). The Dask executor optionally takes the following config: .. code-block:: none cluster: { local?: # The cluster type, one of the following ('local', 'yarn', 'ssh', 'pbs', 'kube'). { address?: '127.0.0.1:8786', # The address of a Dask scheduler timeout?: 5, # Timeout duration for initial connection to the scheduler scheduler_file?: '/path/to/file' # Path to a file with scheduler information # Whether to connect directly to the workers, or ask the scheduler to serve as # intermediary direct_to_workers?: False, heartbeat_interval?: 1000, # Time in milliseconds between heartbeats to scheduler } } If you'd like to configure a dask executor in addition to the :py:class:`~dagster.default_executors`, you should add it to the ``executor_defs`` defined on a :py:class:`~dagster.ModeDefinition` as follows: .. code-block:: python from dagster import ModeDefinition, default_executors, pipeline from dagster_dask import dask_executor @pipeline(mode_defs=[ModeDefinition(executor_defs=default_executors + [dask_executor])]) def dask_enabled_pipeline(): pass ''' check_cross_process_constraints(init_context) ((cluster_type, cluster_configuration), ) = init_context.executor_config['cluster'].items() return DaskConfig(cluster_type, cluster_configuration)
def dask_executor(init_context): """Dask-based executor. The 'cluster' can be one of the following: ('local', 'yarn', 'ssh', 'pbs', 'moab', 'sge', 'lsf', 'slurm', 'oar', 'kube'). If the Dask executor is used without providing executor-specific config, a local Dask cluster will be created (as when calling :py:class:`dask.distributed.Client() <dask:distributed.Client>` with :py:class:`dask.distributed.LocalCluster() <dask:distributed.LocalCluster>`). The Dask executor optionally takes the following config: .. code-block:: none cluster: { local?: # takes distributed.LocalCluster parameters { timeout?: 5, # Timeout duration for initial connection to the scheduler n_workers?: 4 # Number of workers to start threads_per_worker?: 1 # Number of threads per each worker } } If you'd like to configure a dask executor in addition to the :py:class:`~dagster.default_executors`, you should add it to the ``executor_defs`` defined on a :py:class:`~dagster.ModeDefinition` as follows: .. code-block:: python from dagster import ModeDefinition, default_executors, pipeline from dagster_dask import dask_executor @pipeline(mode_defs=[ModeDefinition(executor_defs=default_executors + [dask_executor])]) def dask_enabled_pipeline(): pass """ check_cross_process_constraints(init_context) ((cluster_type, cluster_configuration), ) = init_context.executor_config["cluster"].items() return DaskExecutor(cluster_type, cluster_configuration)
def celery_k8s_job_executor(init_context): '''Celery-based executor which launches tasks as Kubernetes Jobs. The Celery executor exposes config settings for the underlying Celery app under the ``config_source`` key. This config corresponds to the "new lowercase settings" introduced in Celery version 4.0 and the object constructed from config will be passed to the :py:class:`celery.Celery` constructor as its ``config_source`` argument. (See https://docs.celeryproject.org/en/latest/userguide/configuration.html for details.) The executor also exposes the ``broker``, `backend`, and ``include`` arguments to the :py:class:`celery.Celery` constructor. In the most common case, you may want to modify the ``broker`` and ``backend`` (e.g., to use Redis instead of RabbitMQ). We expect that ``config_source`` will be less frequently modified, but that when solid executions are especially fast or slow, or when there are different requirements around idempotence or retry, it may make sense to execute pipelines with variations on these settings. If you'd like to configure a Celery Kubernetes Job executor in addition to the :py:class:`~dagster.default_executors`, you should add it to the ``executor_defs`` defined on a :py:class:`~dagster.ModeDefinition` as follows: .. code-block:: python from dagster import ModeDefinition, default_executors, pipeline from dagster_celery.executor_k8s import celery_k8s_job_executor @pipeline(mode_defs=[ ModeDefinition(executor_defs=default_executors + [celery_k8s_job_executor]) ]) def celery_enabled_pipeline(): pass Then you can configure the executor as follows: .. code-block:: YAML execution: celery-k8s: config: job_image: 'my_repo.com/image_name:latest' job_namespace: 'some-namespace' broker: 'pyamqp://guest@localhost//' # Optional[str]: The URL of the Celery broker backend: 'rpc://' # Optional[str]: The URL of the Celery results backend include: ['my_module'] # Optional[List[str]]: Modules every worker should import config_source: # Dict[str, Any]: Any additional parameters to pass to the #... # Celery workers. This dict will be passed as the `config_source` #... # argument of celery.Celery(). Note that the YAML you provide here must align with the configuration with which the Celery workers on which you hope to run were started. If, for example, you point the executor at a different broker than the one your workers are listening to, the workers will never be able to pick up tasks for execution. ''' from dagster_k8s import DagsterK8sJobConfig, CeleryK8sRunLauncher check_cross_process_constraints(init_context) run_launcher = init_context.instance.run_launcher exc_cfg = init_context.executor_config check.inst( run_launcher, CeleryK8sRunLauncher, 'This engine is only compatible with a CeleryK8sRunLauncher; configure the ' 'CeleryK8sRunLauncher on your instance to use it.', ) job_config = DagsterK8sJobConfig( dagster_home=run_launcher.dagster_home, instance_config_map=run_launcher.instance_config_map, postgres_password_secret=run_launcher.postgres_password_secret, job_image=exc_cfg.get('job_image'), image_pull_policy=exc_cfg.get('image_pull_policy'), image_pull_secrets=exc_cfg.get('image_pull_secrets'), service_account_name=exc_cfg.get('service_account_name'), env_config_maps=exc_cfg.get('env_config_maps'), env_secrets=exc_cfg.get('env_secrets'), ) # Set on the instance but overrideable here broker = run_launcher.broker or exc_cfg.get('broker') backend = run_launcher.backend or exc_cfg.get('backend') config_source = run_launcher.config_source or exc_cfg.get('config_source') include = run_launcher.include or exc_cfg.get('include') retries = run_launcher.retries or Retries.from_config(exc_cfg.get('retries')) return CeleryK8sJobConfig( broker=broker, backend=backend, config_source=config_source, include=include, retries=retries, job_config=job_config, job_namespace=exc_cfg.get('job_namespace'), load_incluster_config=exc_cfg.get('load_incluster_config'), kubeconfig_file=exc_cfg.get('kubeconfig_file'), )
def celery_docker_executor(init_context): '''Celery-based executor which launches tasks in docker containers. The Celery executor exposes config settings for the underlying Celery app under the ``config_source`` key. This config corresponds to the "new lowercase settings" introduced in Celery version 4.0 and the object constructed from config will be passed to the :py:class:`celery.Celery` constructor as its ``config_source`` argument. (See https://docs.celeryproject.org/en/latest/userguide/configuration.html for details.) The executor also exposes the ``broker``, `backend`, and ``include`` arguments to the :py:class:`celery.Celery` constructor. In the most common case, you may want to modify the ``broker`` and ``backend`` (e.g., to use Redis instead of RabbitMQ). We expect that ``config_source`` will be less frequently modified, but that when solid executions are especially fast or slow, or when there are different requirements around idempotence or retry, it may make sense to execute pipelines with variations on these settings. If you'd like to configure a Celery Docker executor in addition to the :py:class:`~dagster.default_executors`, you should add it to the ``executor_defs`` defined on a :py:class:`~dagster.ModeDefinition` as follows: .. code-block:: python from dagster import ModeDefinition, default_executors, pipeline from dagster_celery.executor_docker import celery_docker_executor @pipeline(mode_defs=[ ModeDefinition(executor_defs=default_executors + [celery_docker_executor]) ]) def celery_enabled_pipeline(): pass Then you can configure the executor as follows: .. code-block:: YAML execution: celery-docker: config: docker: image: 'my_repo.com/image_name:latest' registry: url: 'my_repo.com' username: '******' password: {env: 'DOCKER_PASSWORD'} env_vars: ["DAGSTER_HOME"] # environment vars to pass from celery worker to docker broker: 'pyamqp://guest@localhost//' # Optional[str]: The URL of the Celery broker backend: 'rpc://' # Optional[str]: The URL of the Celery results backend include: ['my_module'] # Optional[List[str]]: Modules every worker should import config_source: # Dict[str, Any]: Any additional parameters to pass to the #... # Celery workers. This dict will be passed as the `config_source` #... # argument of celery.Celery(). Note that the YAML you provide here must align with the configuration with which the Celery workers on which you hope to run were started. If, for example, you point the executor at a different broker than the one your workers are listening to, the workers will never be able to pick up tasks for execution. In deployments where the celery_k8s_job_executor is used all appropriate celery and dagster_celery commands must be invoked with the `-A dagster_celery_docker.app` argument. ''' check_cross_process_constraints(init_context) exc_cfg = init_context.executor_config return CeleryDockerExecutor( broker=exc_cfg.get('broker'), backend=exc_cfg.get('backend'), config_source=exc_cfg.get('config_source'), include=exc_cfg.get('include'), retries=Retries.from_config(exc_cfg.get('retries')), docker_config=exc_cfg.get('docker'), repo_location_name=exc_cfg.get('repo_location_name'), )
def celery_k8s_job_executor(init_context): """Celery-based executor which launches tasks as Kubernetes Jobs. The Celery executor exposes config settings for the underlying Celery app under the ``config_source`` key. This config corresponds to the "new lowercase settings" introduced in Celery version 4.0 and the object constructed from config will be passed to the :py:class:`celery.Celery` constructor as its ``config_source`` argument. (See https://docs.celeryproject.org/en/latest/userguide/configuration.html for details.) The executor also exposes the ``broker``, `backend`, and ``include`` arguments to the :py:class:`celery.Celery` constructor. In the most common case, you may want to modify the ``broker`` and ``backend`` (e.g., to use Redis instead of RabbitMQ). We expect that ``config_source`` will be less frequently modified, but that when solid executions are especially fast or slow, or when there are different requirements around idempotence or retry, it may make sense to execute pipelines with variations on these settings. If you'd like to configure a Celery Kubernetes Job executor in addition to the :py:class:`~dagster.default_executors`, you should add it to the ``executor_defs`` defined on a :py:class:`~dagster.ModeDefinition` as follows: .. literalinclude:: ../../../../../python_modules/libraries/dagster-celery-k8s/dagster_celery_k8s_tests/example_celery_mode_def.py :language: python Then you can configure the executor as follows: .. code-block:: YAML execution: celery-k8s: config: job_image: 'my_repo.com/image_name:latest' job_namespace: 'some-namespace' broker: 'pyamqp://guest@localhost//' # Optional[str]: The URL of the Celery broker backend: 'rpc://' # Optional[str]: The URL of the Celery results backend include: ['my_module'] # Optional[List[str]]: Modules every worker should import config_source: # Dict[str, Any]: Any additional parameters to pass to the #... # Celery workers. This dict will be passed as the `config_source` #... # argument of celery.Celery(). Note that the YAML you provide here must align with the configuration with which the Celery workers on which you hope to run were started. If, for example, you point the executor at a different broker than the one your workers are listening to, the workers will never be able to pick up tasks for execution. In deployments where the celery_k8s_job_executor is used all appropriate celery and dagster_celery commands must be invoked with the `-A dagster_celery_k8s.app` argument. """ check_cross_process_constraints(init_context) run_launcher = init_context.instance.run_launcher exc_cfg = init_context.executor_config if not isinstance(run_launcher, CeleryK8sRunLauncher): raise DagsterUnmetExecutorRequirementsError( "This engine is only compatible with a CeleryK8sRunLauncher; configure the " "CeleryK8sRunLauncher on your instance to use it.", ) job_config = DagsterK8sJobConfig( dagster_home=run_launcher.dagster_home, instance_config_map=run_launcher.instance_config_map, postgres_password_secret=run_launcher.postgres_password_secret, job_image=exc_cfg.get("job_image") or os.getenv("DAGSTER_CURRENT_IMAGE"), image_pull_policy=exc_cfg.get("image_pull_policy"), image_pull_secrets=exc_cfg.get("image_pull_secrets"), service_account_name=exc_cfg.get("service_account_name"), env_config_maps=exc_cfg.get("env_config_maps"), env_secrets=exc_cfg.get("env_secrets"), ) # Set on the instance but overrideable here broker = run_launcher.broker or exc_cfg.get("broker") backend = run_launcher.backend or exc_cfg.get("backend") config_source = run_launcher.config_source or exc_cfg.get("config_source") include = run_launcher.include or exc_cfg.get("include") retries = run_launcher.retries or Retries.from_config(exc_cfg.get("retries")) return CeleryK8sJobExecutor( broker=broker, backend=backend, config_source=config_source, include=include, retries=retries, job_config=job_config, job_namespace=exc_cfg.get("job_namespace"), load_incluster_config=exc_cfg.get("load_incluster_config"), kubeconfig_file=exc_cfg.get("kubeconfig_file"), repo_location_name=exc_cfg.get("repo_location_name"), )