def multiprocess_executor(init_context): """The default multiprocess executor. This simple multiprocess executor is available by default on any :py:class:`ModeDefinition` that does not provide custom executors. To select the multiprocess executor, include a fragment such as the following in your config: .. code-block:: yaml execution: multiprocess: config: max_concurrent: 4 The ``max_concurrent`` arg is optional and tells the execution engine how many processes may run concurrently. By default, or if you set ``max_concurrent`` to be 0, this is the return value of :py:func:`python:multiprocessing.cpu_count`. Execution priority can be configured using the ``dagster/priority`` tag via solid metadata, where the higher the number the higher the priority. 0 is the default and both positive and negative numbers can be used. """ from dagster.core.executor.init import InitExecutorContext from dagster.core.executor.multiprocess import MultiprocessExecutor check.inst_param(init_context, "init_context", InitExecutorContext) check_cross_process_constraints(init_context) return MultiprocessExecutor( pipeline=init_context.pipeline, max_concurrent=init_context.executor_config["max_concurrent"], retries=RetryMode.from_config(init_context.executor_config["retries"]), )
def __init__( self, instance_config_map, dagster_home, postgres_password_secret, load_incluster_config=True, kubeconfig_file=None, broker=None, backend=None, include=None, config_source=None, retries=None, inst_data=None, k8s_client_batch_api=None, env_config_maps=None, env_secrets=None, volume_mounts=None, volumes=None, ): self._inst_data = check.opt_inst_param(inst_data, "inst_data", ConfigurableClassData) if load_incluster_config: check.invariant( kubeconfig_file is None, "`kubeconfig_file` is set but `load_incluster_config` is True.", ) kubernetes.config.load_incluster_config() else: check.opt_str_param(kubeconfig_file, "kubeconfig_file") kubernetes.config.load_kube_config(kubeconfig_file) self._fixed_batch_api = k8s_client_batch_api self.instance_config_map = check.str_param(instance_config_map, "instance_config_map") self.dagster_home = check.str_param(dagster_home, "dagster_home") self.postgres_password_secret = check.str_param( postgres_password_secret, "postgres_password_secret") self.broker = check.opt_str_param(broker, "broker") self.backend = check.opt_str_param(backend, "backend") self.include = check.opt_list_param(include, "include") self.config_source = check.opt_dict_param(config_source, "config_source") retries = check.opt_dict_param(retries, "retries") or {"enabled": {}} self.retries = RetryMode.from_config(retries) self._env_config_maps = check.opt_list_param(env_config_maps, "env_config_maps", of_type=str) self._env_secrets = check.opt_list_param(env_secrets, "env_secrets", of_type=str) self._volume_mounts = check.opt_list_param(volume_mounts, "volume_mounts") self._volumes = check.opt_list_param(volumes, "volumes") super().__init__()
def docker_executor(init_context: InitExecutorContext) -> Executor: from . import DockerRunLauncher image = init_context.executor_config.get("image") registry = init_context.executor_config.get("registry") env_vars = init_context.executor_config.get("env_vars") network = init_context.executor_config.get("network") networks = init_context.executor_config.get("networks") container_kwargs = init_context.executor_config.get("container_kwargs") run_launcher = init_context.instance.run_launcher if isinstance(run_launcher, DockerRunLauncher): image = image or run_launcher.image registry = registry or run_launcher.registry env_vars = run_launcher.env_vars + (env_vars or []) networks = run_launcher.networks + (networks or []) container_kwargs = merge_dicts(run_launcher.container_kwargs, container_kwargs or {}) validate_docker_config(network, networks, container_kwargs) return StepDelegatingExecutor( DockerStepHandler( image, registry, env_vars, network, networks, container_kwargs, ), retries=RetryMode.from_config(init_context.executor_config["retries"]), )
def in_process_executor(init_context): """The default in-process executor. In most Dagster environments, this will be the default executor. It is available by default on any :py:class:`ModeDefinition` that does not provide custom executors. To select it explicitly, include the following top-level fragment in config: .. code-block:: yaml execution: in_process: Execution priority can be configured using the ``dagster/priority`` tag via solid metadata, where the higher the number the higher the priority. 0 is the default and both positive and negative numbers can be used. """ from dagster.core.executor.init import InitExecutorContext from dagster.core.executor.in_process import InProcessExecutor check.inst_param(init_context, "init_context", InitExecutorContext) return InProcessExecutor( # shouldn't need to .get() here - issue with defaults in config setup retries=RetryMode.from_config( init_context.executor_config.get("retries", {"enabled": {}})), marker_to_close=init_context.executor_config.get("marker_to_close"), )
def needs_config(_): from dagster.core.executor.in_process import InProcessExecutor return InProcessExecutor( retries=RetryMode.from_config({"enabled": {}}), marker_to_close=None, )
def _core_multiprocess_executor_creation(max_concurrent, retries_config): from dagster.core.executor.multiprocess import MultiprocessExecutor return MultiprocessExecutor( max_concurrent=max_concurrent, retries=RetryMode.from_config(retries_config), )
def celery_executor(init_context): """Celery-based executor. The Celery executor exposes config settings for the underlying Celery app under the ``config_source`` key. This config corresponds to the "new lowercase settings" introduced in Celery version 4.0 and the object constructed from config will be passed to the :py:class:`celery.Celery` constructor as its ``config_source`` argument. (See https://docs.celeryproject.org/en/latest/userguide/configuration.html for details.) The executor also exposes the ``broker``, `backend`, and ``include`` arguments to the :py:class:`celery.Celery` constructor. In the most common case, you may want to modify the ``broker`` and ``backend`` (e.g., to use Redis instead of RabbitMQ). We expect that ``config_source`` will be less frequently modified, but that when solid executions are especially fast or slow, or when there are different requirements around idempotence or retry, it may make sense to execute pipelines with variations on these settings. If you'd like to configure a celery executor in addition to the :py:class:`~dagster.default_executors`, you should add it to the ``executor_defs`` defined on a :py:class:`~dagster.ModeDefinition` as follows: .. code-block:: python from dagster import ModeDefinition, default_executors, pipeline from dagster_celery import celery_executor @pipeline(mode_defs=[ModeDefinition(executor_defs=default_executors + [celery_executor])]) def celery_enabled_pipeline(): pass Then you can configure the executor as follows: .. code-block:: YAML execution: celery: config: broker: 'pyamqp://guest@localhost//' # Optional[str]: The URL of the Celery broker backend: 'rpc://' # Optional[str]: The URL of the Celery results backend include: ['my_module'] # Optional[List[str]]: Modules every worker should import config_source: # Dict[str, Any]: Any additional parameters to pass to the #... # Celery workers. This dict will be passed as the `config_source` #... # argument of celery.Celery(). Note that the YAML you provide here must align with the configuration with which the Celery workers on which you hope to run were started. If, for example, you point the executor at a different broker than the one your workers are listening to, the workers will never be able to pick up tasks for execution. """ check_cross_process_constraints(init_context) return CeleryExecutor( broker=init_context.executor_config.get("broker"), backend=init_context.executor_config.get("backend"), config_source=init_context.executor_config.get("config_source"), include=init_context.executor_config.get("include"), retries=RetryMode.from_config(init_context.executor_config["retries"]), )
def for_cli(broker=None, backend=None, include=None, config_source=None): return CeleryExecutor( retries=RetryMode(RetryMode.DISABLED), broker=broker, backend=backend, include=include, config_source=config_source, )
def _core_in_process_executor_creation(retries_config, marker_to_close): from dagster.core.executor.in_process import InProcessExecutor return InProcessExecutor( # shouldn't need to .get() here - issue with defaults in config setup retries=RetryMode.from_config(retries_config), marker_to_close=marker_to_close, )
def _core_in_process_executor_creation(config: Dict[str, Any]): from dagster.core.executor.in_process import InProcessExecutor return InProcessExecutor( # shouldn't need to .get() here - issue with defaults in config setup retries=RetryMode.from_config(config["retries"]), marker_to_close=config.get("marker_to_close"), )
def docker_executor(init_context: InitExecutorContext) -> Executor: """ Executor which launches steps as Docker containers. To use the `docker_executor`, set it as the `executor_def` when defining a job: .. literalinclude:: ../../../../../../python_modules/libraries/dagster-docker/dagster_docker_tests/test_example_executor.py :start-after: start_marker :end-before: end_marker :language: python Then you can configure the executor with run config as follows: .. code-block:: YAML execution: config: registry: ... network: ... networks: ... container_kwargs: ... If you're using the DockerRunLauncher, configuration set on the containers created by the run launcher will also be set on the containers that are created for each step. """ from . import DockerRunLauncher image = init_context.executor_config.get("image") registry = init_context.executor_config.get("registry") env_vars = init_context.executor_config.get("env_vars") network = init_context.executor_config.get("network") networks = init_context.executor_config.get("networks") container_kwargs = init_context.executor_config.get("container_kwargs") run_launcher = init_context.instance.run_launcher if isinstance(run_launcher, DockerRunLauncher): image = image or run_launcher.image registry = registry or run_launcher.registry env_vars = run_launcher.env_vars + (env_vars or []) networks = run_launcher.networks + (networks or []) container_kwargs = merge_dicts(run_launcher.container_kwargs, container_kwargs or {}) validate_docker_config(network, networks, container_kwargs) return StepDelegatingExecutor( DockerStepHandler( image, registry, env_vars, network, networks, container_kwargs, ), retries=RetryMode.from_config(init_context.executor_config["retries"]), )
def test_executor(init_context): from dagster.core.executor.in_process import InProcessExecutor assert init_context.executor_config == "secret testing value!!" return InProcessExecutor( # shouldn't need to .get() here - issue with defaults in config setup retries=RetryMode.from_config({"enabled": {}}), marker_to_close=None, )
def _core_multiprocess_executor_creation(config: Dict[str, Any]): from dagster.core.executor.multiprocess import MultiprocessExecutor # unpack optional selector start_method = None start_cfg = {} start_selector = config.get("start_method") if start_selector: start_method, start_cfg = list(start_selector.items())[0] return MultiprocessExecutor( max_concurrent=config["max_concurrent"], retries=RetryMode.from_config(config["retries"]), start_method=start_method, explicit_forkserver_preload=start_cfg.get("preload_modules"), )
def celery_k8s_job_executor(init_context): """Celery-based executor which launches tasks as Kubernetes Jobs. The Celery executor exposes config settings for the underlying Celery app under the ``config_source`` key. This config corresponds to the "new lowercase settings" introduced in Celery version 4.0 and the object constructed from config will be passed to the :py:class:`celery.Celery` constructor as its ``config_source`` argument. (See https://docs.celeryproject.org/en/latest/userguide/configuration.html for details.) The executor also exposes the ``broker``, `backend`, and ``include`` arguments to the :py:class:`celery.Celery` constructor. In the most common case, you may want to modify the ``broker`` and ``backend`` (e.g., to use Redis instead of RabbitMQ). We expect that ``config_source`` will be less frequently modified, but that when solid executions are especially fast or slow, or when there are different requirements around idempotence or retry, it may make sense to execute pipelines with variations on these settings. If you'd like to configure a Celery Kubernetes Job executor in addition to the :py:class:`~dagster.default_executors`, you should add it to the ``executor_defs`` defined on a :py:class:`~dagster.ModeDefinition` as follows: .. literalinclude:: ../../../../../../python_modules/libraries/dagster-celery-k8s/dagster_celery_k8s_tests/example_celery_mode_def.py :language: python Then you can configure the executor as follows: .. code-block:: YAML execution: celery-k8s: config: job_image: 'my_repo.com/image_name:latest' job_namespace: 'some-namespace' broker: 'pyamqp://guest@localhost//' # Optional[str]: The URL of the Celery broker backend: 'rpc://' # Optional[str]: The URL of the Celery results backend include: ['my_module'] # Optional[List[str]]: Modules every worker should import config_source: # Dict[str, Any]: Any additional parameters to pass to the #... # Celery workers. This dict will be passed as the `config_source` #... # argument of celery.Celery(). Note that the YAML you provide here must align with the configuration with which the Celery workers on which you hope to run were started. If, for example, you point the executor at a different broker than the one your workers are listening to, the workers will never be able to pick up tasks for execution. In deployments where the celery_k8s_job_executor is used all appropriate celery and dagster_celery commands must be invoked with the `-A dagster_celery_k8s.app` argument. """ run_launcher = init_context.instance.run_launcher exc_cfg = init_context.executor_config if not isinstance(run_launcher, CeleryK8sRunLauncher): raise DagsterUnmetExecutorRequirementsError( "This engine is only compatible with a CeleryK8sRunLauncher; configure the " "CeleryK8sRunLauncher on your instance to use it.", ) job_config = DagsterK8sJobConfig( dagster_home=run_launcher.dagster_home, instance_config_map=run_launcher.instance_config_map, postgres_password_secret=run_launcher.postgres_password_secret, job_image=exc_cfg.get("job_image") or os.getenv("DAGSTER_CURRENT_IMAGE"), image_pull_policy=exc_cfg.get("image_pull_policy"), image_pull_secrets=exc_cfg.get("image_pull_secrets"), service_account_name=exc_cfg.get("service_account_name"), env_config_maps=exc_cfg.get("env_config_maps"), env_secrets=exc_cfg.get("env_secrets"), ) # Set on the instance but overrideable here broker = run_launcher.broker or exc_cfg.get("broker") backend = run_launcher.backend or exc_cfg.get("backend") config_source = run_launcher.config_source or exc_cfg.get("config_source") include = run_launcher.include or exc_cfg.get("include") retries = run_launcher.retries or RetryMode.from_config( exc_cfg.get("retries")) return CeleryK8sJobExecutor( broker=broker, backend=backend, config_source=config_source, include=include, retries=retries, job_config=job_config, job_namespace=exc_cfg.get("job_namespace"), load_incluster_config=exc_cfg.get("load_incluster_config"), kubeconfig_file=exc_cfg.get("kubeconfig_file"), repo_location_name=exc_cfg.get("repo_location_name"), )
def celery_docker_executor(init_context): """Celery-based executor which launches tasks in docker containers. The Celery executor exposes config settings for the underlying Celery app under the ``config_source`` key. This config corresponds to the "new lowercase settings" introduced in Celery version 4.0 and the object constructed from config will be passed to the :py:class:`celery.Celery` constructor as its ``config_source`` argument. (See https://docs.celeryproject.org/en/latest/userguide/configuration.html for details.) The executor also exposes the ``broker``, `backend`, and ``include`` arguments to the :py:class:`celery.Celery` constructor. In the most common case, you may want to modify the ``broker`` and ``backend`` (e.g., to use Redis instead of RabbitMQ). We expect that ``config_source`` will be less frequently modified, but that when op executions are especially fast or slow, or when there are different requirements around idempotence or retry, it may make sense to execute jobs with variations on these settings. To use the `celery_docker_executor`, set it as the `executor_def` when defining a job: .. code-block:: python from dagster import job from dagster_celery_docker.executor import celery_executor @job(executor_def=celery_docker_executor) def celery_enabled_job(): pass Then you can configure the executor as follows: .. code-block:: YAML execution: config: docker: image: 'my_repo.com/image_name:latest' registry: url: 'my_repo.com' username: '******' password: {env: 'DOCKER_PASSWORD'} env_vars: ["DAGSTER_HOME"] # environment vars to pass from celery worker to docker broker: 'pyamqp://guest@localhost//' # Optional[str]: The URL of the Celery broker backend: 'rpc://' # Optional[str]: The URL of the Celery results backend include: ['my_module'] # Optional[List[str]]: Modules every worker should import config_source: # Dict[str, Any]: Any additional parameters to pass to the #... # Celery workers. This dict will be passed as the `config_source` #... # argument of celery.Celery(). Note that the YAML you provide here must align with the configuration with which the Celery workers on which you hope to run were started. If, for example, you point the executor at a different broker than the one your workers are listening to, the workers will never be able to pick up tasks for execution. In deployments where the celery_k8s_job_executor is used all appropriate celery and dagster_celery commands must be invoked with the `-A dagster_celery_docker.app` argument. """ exc_cfg = init_context.executor_config return CeleryDockerExecutor( broker=exc_cfg.get("broker"), backend=exc_cfg.get("backend"), config_source=exc_cfg.get("config_source"), include=exc_cfg.get("include"), retries=RetryMode.from_config(exc_cfg.get("retries")), docker_config=exc_cfg.get("docker"), )
def k8s_job_executor(init_context: InitExecutorContext) -> Executor: """ Executor which launches steps as Kubernetes Jobs. To use the `k8s_job_executor`, set it as the `executor_def` when defining a job: .. literalinclude:: ../../../../../../python_modules/libraries/dagster-k8s/dagster_k8s_tests/unit_tests/test_example_executor_mode_def.py :start-after: start_marker :end-before: end_marker :language: python Then you can configure the executor with run config as follows: .. code-block:: YAML execution: config: job_namespace: 'some-namespace' image_pull_policy: ... image_pull_secrets: ... service_account_name: ... env_config_maps: ... env_secrets: ... job_image: ... # leave out if using userDeployments """ run_launcher = init_context.instance.run_launcher if not isinstance(run_launcher, K8sRunLauncher): raise DagsterUnmetExecutorRequirementsError( "This engine is only compatible with a K8sRunLauncher; configure the " "K8sRunLauncher on your instance to use it.", ) exc_cfg = init_context.executor_config job_config = DagsterK8sJobConfig( dagster_home=run_launcher.dagster_home, instance_config_map=run_launcher.instance_config_map, postgres_password_secret=run_launcher.postgres_password_secret, job_image=exc_cfg.get("job_image"), image_pull_policy=(exc_cfg.get("image_pull_policy") if exc_cfg.get("image_pull_policy") != None else run_launcher.image_pull_policy), image_pull_secrets=run_launcher.image_pull_secrets + (exc_cfg.get("image_pull_secrets") or []), service_account_name=(exc_cfg.get("service_account_name") if exc_cfg.get("service_account_name") != None else run_launcher.service_account_name), env_config_maps=run_launcher.env_config_maps + (exc_cfg.get("env_config_maps") or []), env_secrets=run_launcher.env_secrets + (exc_cfg.get("env_secrets") or []), volume_mounts=run_launcher.volume_mounts + (exc_cfg.get("volume_mounts") or []), volumes=run_launcher.volumes + (exc_cfg.get("volumes") or []), ) return StepDelegatingExecutor( K8sStepHandler( job_config=job_config, job_namespace=(exc_cfg.get("job_namespace") if exc_cfg.get("job_namespace") != None else run_launcher.job_namespace), load_incluster_config=run_launcher.load_incluster_config, kubeconfig_file=run_launcher.kubeconfig_file, ), retries=RetryMode.from_config(init_context.executor_config["retries"]), should_verify_step=True, )
def k8s_job_executor(init_context: InitExecutorContext) -> Executor: """ Executor which launches steps as Kubernetes Jobs. To use the `k8s_job_executor`, set it as the `executor_def` when defining a job: .. literalinclude:: ../../../../../../python_modules/libraries/dagster-k8s/dagster_k8s_tests/unit_tests/test_example_executor_mode_def.py :start-after: start_marker :end-before: end_marker :language: python Then you can configure the executor with run config as follows: .. code-block:: YAML execution: config: job_namespace: 'some-namespace' image_pull_policy: ... image_pull_secrets: ... service_account_name: ... env_config_maps: ... env_secrets: ... env_vars: ... job_image: ... # leave out if using userDeployments Configuration set on the Kubernetes Jobs and Pods created by the `K8sRunLauncher` will also be set on Kubernetes Jobs and Pods created by the `k8s_job_executor`. """ run_launcher = init_context.instance.run_launcher if not isinstance(run_launcher, K8sRunLauncher): raise DagsterUnmetExecutorRequirementsError( "This engine is only compatible with a K8sRunLauncher; configure the " "K8sRunLauncher on your instance to use it.", ) exc_cfg = init_context.executor_config k8s_container_context = K8sContainerContext( image_pull_policy=exc_cfg.get("image_pull_policy"), image_pull_secrets=exc_cfg.get("image_pull_secrets"), service_account_name=exc_cfg.get("service_account_name"), env_config_maps=exc_cfg.get("env_config_maps"), env_secrets=exc_cfg.get("env_secrets"), env_vars=exc_cfg.get("env_vars"), volume_mounts=exc_cfg.get("volume_mounts"), volumes=exc_cfg.get("volumes"), labels=exc_cfg.get("labels"), namespace=exc_cfg.get("job_namespace"), ) return StepDelegatingExecutor( K8sStepHandler( image=exc_cfg.get("job_image"), container_context=k8s_container_context, load_incluster_config=run_launcher.load_incluster_config, kubeconfig_file=run_launcher.kubeconfig_file, ), retries=RetryMode.from_config(init_context.executor_config["retries"]), should_verify_step=True, )