コード例 #1
0
ファイル: host_mode.py プロジェクト: simhaonline/dagster
def _get_host_mode_executor(recon_pipeline, run_config, get_executor_def_fn,
                            instance):
    execution_config = run_config.get("execution")
    if execution_config:
        executor_name, executor_config = ensure_single_item(execution_config)
    else:
        executor_name = None
        executor_config = {}

    executor_def = get_executor_def_fn(executor_name)

    executor_config_type = def_config_field(executor_def).config_type

    config_evr = process_config(executor_config_type, executor_config)
    if not config_evr.success:
        raise DagsterInvalidConfigError(
            "Error in executor config for executor {}".format(
                executor_def.name),
            config_evr.errors,
            executor_config,
        )
    executor_config_value = config_evr.value

    init_context = InitExecutorContext(
        pipeline=recon_pipeline,
        executor_def=executor_def,
        executor_config=executor_config_value["config"],
        instance=instance,
    )
    check_cross_process_constraints(init_context)
    return executor_def.executor_creation_fn(init_context)
コード例 #2
0
def _get_host_mode_executor(recon_pipeline, run_config, executor_defs, instance):
    execution_config = run_config.get("execution", {})
    execution_config_type = Field(
        selector_for_named_defs(executor_defs), default_value={executor_defs[0].name: {}}
    ).config_type

    config_evr = process_config(execution_config_type, execution_config)
    if not config_evr.success:
        raise DagsterInvalidConfigError(
            "Error processing execution config {}".format(execution_config),
            config_evr.errors,
            execution_config,
        )

    execution_config_value = config_evr.value

    executor_name, executor_config = ensure_single_item(execution_config_value)

    executor_defs_by_name = {executor_def.name: executor_def for executor_def in executor_defs}
    executor_def = executor_defs_by_name[executor_name]

    init_context = InitExecutorContext(
        pipeline=recon_pipeline,
        executor_def=executor_def,
        executor_config=executor_config["config"],
        instance=instance,
    )
    check_cross_process_constraints(init_context)
    return executor_def.executor_creation_fn(init_context)
コード例 #3
0
ファイル: executor.py プロジェクト: zuik/dagster
def celery_executor(init_context):
    """Celery-based executor.

    The Celery executor exposes config settings for the underlying Celery app under
    the ``config_source`` key. This config corresponds to the "new lowercase settings" introduced
    in Celery version 4.0 and the object constructed from config will be passed to the
    :py:class:`celery.Celery` constructor as its ``config_source`` argument.
    (See https://docs.celeryproject.org/en/latest/userguide/configuration.html for details.)

    The executor also exposes the ``broker``, `backend`, and ``include`` arguments to the
    :py:class:`celery.Celery` constructor.

    In the most common case, you may want to modify the ``broker`` and ``backend`` (e.g., to use
    Redis instead of RabbitMQ). We expect that ``config_source`` will be less frequently
    modified, but that when solid executions are especially fast or slow, or when there are
    different requirements around idempotence or retry, it may make sense to execute pipelines
    with variations on these settings.

    If you'd like to configure a celery executor in addition to the
    :py:class:`~dagster.default_executors`, you should add it to the ``executor_defs`` defined on a
    :py:class:`~dagster.ModeDefinition` as follows:

    .. code-block:: python

        from dagster import ModeDefinition, default_executors, pipeline
        from dagster_celery import celery_executor

        @pipeline(mode_defs=[ModeDefinition(executor_defs=default_executors + [celery_executor])])
        def celery_enabled_pipeline():
            pass

    Then you can configure the executor as follows:

    .. code-block:: YAML

        execution:
          celery:
            config:
              broker: 'pyamqp://guest@localhost//'  # Optional[str]: The URL of the Celery broker
              backend: 'rpc://' # Optional[str]: The URL of the Celery results backend
              include: ['my_module'] # Optional[List[str]]: Modules every worker should import
              config_source: # Dict[str, Any]: Any additional parameters to pass to the
                  #...       # Celery workers. This dict will be passed as the `config_source`
                  #...       # argument of celery.Celery().

    Note that the YAML you provide here must align with the configuration with which the Celery
    workers on which you hope to run were started. If, for example, you point the executor at a
    different broker than the one your workers are listening to, the workers will never be able to
    pick up tasks for execution.
    """
    check_cross_process_constraints(init_context)

    return CeleryExecutor(
        broker=init_context.executor_config.get("broker"),
        backend=init_context.executor_config.get("backend"),
        config_source=init_context.executor_config.get("config_source"),
        include=init_context.executor_config.get("include"),
        retries=RetryMode.from_config(init_context.executor_config["retries"]),
    )
コード例 #4
0
def create_executor(context_creation_data: ContextCreationData) -> "Executor":
    check.inst_param(context_creation_data, "context_creation_data",
                     ContextCreationData)
    init_context = InitExecutorContext(
        pipeline=context_creation_data.pipeline,
        executor_def=context_creation_data.executor_def,
        executor_config=context_creation_data.environment_config.execution.
        execution_engine_config,
        instance=context_creation_data.instance,
    )
    check_cross_process_constraints(init_context)
    return context_creation_data.executor_def.executor_creation_fn(
        init_context)
コード例 #5
0
def celery_executor(init_context):
    '''Celery-based executor.

    The Celery executor exposes config settings for the underlying Celery app under
    the ``config_source`` key. This config corresponds to the "new lowercase settings" introduced
    in Celery version 4.0 and the object constructed from config will be passed to the
    :py:class:`celery.Celery` constructor as its ``config_source`` argument.
    (See https://docs.celeryproject.org/en/latest/userguide/configuration.html for details.)

    The executor also exposes the ``broker``, `backend`, and ``include`` arguments to the
    :py:class:`celery.Celery` constructor.

    In the most common case, you may want to modify the ``broker`` and ``backend`` (e.g., to use
    Redis  instead of RabbitMQ). We expect that ``celery_settings`` will be less frequently
    modified, but that when solid executions are especially fast or slow, or when there are
    different requirements around idempotence or retry, it will make sense to execute pipelines
    with variations on these settings.

    **Config**:
    .. code-block::

        {
            broker?: 'pyamqp://guest@localhost//',  # The URL of the Celery broker
            backend?: 'rpc://', # The URL of the Celery results backend
            include?: ['my_module'], # List of modules every worker should import
            celery_settings: {
                ... # Celery app config
            }
        }

    If you'd like to configure a celery executor in addition to the
    :py:class:`~dagster.default_executors`, you should add it to the ``executor_defs`` defined on a
    :py:class:`~dagster.ModeDefinition` as follows:

    .. code-block:: python

        from dagster import ModeDefinition, default_executors, pipeline
        from dagster_celery import celery_executor

        @pipeline(mode_defs=[ModeDefinition(executor_defs=default_executors + [celery_executor])])
        def celery_enabled_pipeline():
            pass

    '''
    check_cross_process_constraints(init_context)

    return CeleryConfig(**init_context.executor_config)
コード例 #6
0
def dask_executor(init_context):
    '''Dask-based executor.

    If the Dask executor is used without providing executor-specific config, a local Dask cluster
    will be created (as when calling :py:class:`dask.distributed.Client() <dask:distributed.Client>`
    without specifying the scheduler address).

    The Dask executor optionally takes the following config:

    .. code-block:: none

        cluster:
            {
                local?:  # The cluster type, one of the following ('local', 'yarn', 'ssh', 'pbs', 'kube').
                    {
                        address?: '127.0.0.1:8786',  # The address of a Dask scheduler
                        timeout?: 5,  # Timeout duration for initial connection to the scheduler
                        scheduler_file?: '/path/to/file'  # Path to a file with scheduler information
                        # Whether to connect directly to the workers, or ask the scheduler to serve as
                        # intermediary
                        direct_to_workers?: False,
                        heartbeat_interval?: 1000,  # Time in milliseconds between heartbeats to scheduler
                    }
            }

    If you'd like to configure a dask executor in addition to the
    :py:class:`~dagster.default_executors`, you should add it to the ``executor_defs`` defined on a
    :py:class:`~dagster.ModeDefinition` as follows:

    .. code-block:: python

        from dagster import ModeDefinition, default_executors, pipeline
        from dagster_dask import dask_executor

        @pipeline(mode_defs=[ModeDefinition(executor_defs=default_executors + [dask_executor])])
        def dask_enabled_pipeline():
            pass

    '''
    check_cross_process_constraints(init_context)
    ((cluster_type, cluster_configuration),
     ) = init_context.executor_config['cluster'].items()
    return DaskConfig(cluster_type, cluster_configuration)
コード例 #7
0
def dask_executor(init_context):
    """Dask-based executor.

    The 'cluster' can be one of the following:
    ('local', 'yarn', 'ssh', 'pbs', 'moab', 'sge', 'lsf', 'slurm', 'oar', 'kube').

    If the Dask executor is used without providing executor-specific config, a local Dask cluster
    will be created (as when calling :py:class:`dask.distributed.Client() <dask:distributed.Client>`
    with :py:class:`dask.distributed.LocalCluster() <dask:distributed.LocalCluster>`).

    The Dask executor optionally takes the following config:

    .. code-block:: none

        cluster:
            {
                local?: # takes distributed.LocalCluster parameters
                    {
                        timeout?: 5,  # Timeout duration for initial connection to the scheduler
                        n_workers?: 4  # Number of workers to start
                        threads_per_worker?: 1 # Number of threads per each worker
                    }
            }

    If you'd like to configure a dask executor in addition to the
    :py:class:`~dagster.default_executors`, you should add it to the ``executor_defs`` defined on a
    :py:class:`~dagster.ModeDefinition` as follows:

    .. code-block:: python

        from dagster import ModeDefinition, default_executors, pipeline
        from dagster_dask import dask_executor

        @pipeline(mode_defs=[ModeDefinition(executor_defs=default_executors + [dask_executor])])
        def dask_enabled_pipeline():
            pass

    """
    check_cross_process_constraints(init_context)
    ((cluster_type, cluster_configuration),
     ) = init_context.executor_config["cluster"].items()
    return DaskExecutor(cluster_type, cluster_configuration)
コード例 #8
0
ファイル: executor_k8s.py プロジェクト: cy56/dagster
def celery_k8s_job_executor(init_context):
    '''Celery-based executor which launches tasks as Kubernetes Jobs.

    The Celery executor exposes config settings for the underlying Celery app under
    the ``config_source`` key. This config corresponds to the "new lowercase settings" introduced
    in Celery version 4.0 and the object constructed from config will be passed to the
    :py:class:`celery.Celery` constructor as its ``config_source`` argument.
    (See https://docs.celeryproject.org/en/latest/userguide/configuration.html for details.)

    The executor also exposes the ``broker``, `backend`, and ``include`` arguments to the
    :py:class:`celery.Celery` constructor.

    In the most common case, you may want to modify the ``broker`` and ``backend`` (e.g., to use
    Redis instead of RabbitMQ). We expect that ``config_source`` will be less frequently
    modified, but that when solid executions are especially fast or slow, or when there are
    different requirements around idempotence or retry, it may make sense to execute pipelines
    with variations on these settings.

    If you'd like to configure a Celery Kubernetes Job executor in addition to the
    :py:class:`~dagster.default_executors`, you should add it to the ``executor_defs`` defined on a
    :py:class:`~dagster.ModeDefinition` as follows:

    .. code-block:: python

        from dagster import ModeDefinition, default_executors, pipeline
        from dagster_celery.executor_k8s import celery_k8s_job_executor

        @pipeline(mode_defs=[
            ModeDefinition(executor_defs=default_executors + [celery_k8s_job_executor])
        ])
        def celery_enabled_pipeline():
            pass

    Then you can configure the executor as follows:

    .. code-block:: YAML

        execution:
          celery-k8s:
            config:
              job_image: 'my_repo.com/image_name:latest'
              job_namespace: 'some-namespace'
              broker: 'pyamqp://guest@localhost//'  # Optional[str]: The URL of the Celery broker
              backend: 'rpc://' # Optional[str]: The URL of the Celery results backend
              include: ['my_module'] # Optional[List[str]]: Modules every worker should import
              config_source: # Dict[str, Any]: Any additional parameters to pass to the
                  #...       # Celery workers. This dict will be passed as the `config_source`
                  #...       # argument of celery.Celery().

    Note that the YAML you provide here must align with the configuration with which the Celery
    workers on which you hope to run were started. If, for example, you point the executor at a
    different broker than the one your workers are listening to, the workers will never be able to
    pick up tasks for execution.
    '''
    from dagster_k8s import DagsterK8sJobConfig, CeleryK8sRunLauncher

    check_cross_process_constraints(init_context)

    run_launcher = init_context.instance.run_launcher
    exc_cfg = init_context.executor_config

    check.inst(
        run_launcher,
        CeleryK8sRunLauncher,
        'This engine is only compatible with a CeleryK8sRunLauncher; configure the '
        'CeleryK8sRunLauncher on your instance to use it.',
    )

    job_config = DagsterK8sJobConfig(
        dagster_home=run_launcher.dagster_home,
        instance_config_map=run_launcher.instance_config_map,
        postgres_password_secret=run_launcher.postgres_password_secret,
        job_image=exc_cfg.get('job_image'),
        image_pull_policy=exc_cfg.get('image_pull_policy'),
        image_pull_secrets=exc_cfg.get('image_pull_secrets'),
        service_account_name=exc_cfg.get('service_account_name'),
        env_config_maps=exc_cfg.get('env_config_maps'),
        env_secrets=exc_cfg.get('env_secrets'),
    )

    # Set on the instance but overrideable here
    broker = run_launcher.broker or exc_cfg.get('broker')
    backend = run_launcher.backend or exc_cfg.get('backend')
    config_source = run_launcher.config_source or exc_cfg.get('config_source')
    include = run_launcher.include or exc_cfg.get('include')
    retries = run_launcher.retries or Retries.from_config(exc_cfg.get('retries'))

    return CeleryK8sJobConfig(
        broker=broker,
        backend=backend,
        config_source=config_source,
        include=include,
        retries=retries,
        job_config=job_config,
        job_namespace=exc_cfg.get('job_namespace'),
        load_incluster_config=exc_cfg.get('load_incluster_config'),
        kubeconfig_file=exc_cfg.get('kubeconfig_file'),
    )
コード例 #9
0
ファイル: executor.py プロジェクト: danieldiamond/dagster
def celery_docker_executor(init_context):
    '''Celery-based executor which launches tasks in docker containers.

    The Celery executor exposes config settings for the underlying Celery app under
    the ``config_source`` key. This config corresponds to the "new lowercase settings" introduced
    in Celery version 4.0 and the object constructed from config will be passed to the
    :py:class:`celery.Celery` constructor as its ``config_source`` argument.
    (See https://docs.celeryproject.org/en/latest/userguide/configuration.html for details.)

    The executor also exposes the ``broker``, `backend`, and ``include`` arguments to the
    :py:class:`celery.Celery` constructor.

    In the most common case, you may want to modify the ``broker`` and ``backend`` (e.g., to use
    Redis instead of RabbitMQ). We expect that ``config_source`` will be less frequently
    modified, but that when solid executions are especially fast or slow, or when there are
    different requirements around idempotence or retry, it may make sense to execute pipelines
    with variations on these settings.

    If you'd like to configure a Celery Docker executor in addition to the
    :py:class:`~dagster.default_executors`, you should add it to the ``executor_defs`` defined on a
    :py:class:`~dagster.ModeDefinition` as follows:

    .. code-block:: python

        from dagster import ModeDefinition, default_executors, pipeline
        from dagster_celery.executor_docker import celery_docker_executor

        @pipeline(mode_defs=[
            ModeDefinition(executor_defs=default_executors + [celery_docker_executor])
        ])
        def celery_enabled_pipeline():
            pass

    Then you can configure the executor as follows:

    .. code-block:: YAML

        execution:
          celery-docker:
            config:

              docker:
                image: 'my_repo.com/image_name:latest'
                registry:
                  url: 'my_repo.com'
                  username: '******'
                  password: {env: 'DOCKER_PASSWORD'}
                env_vars: ["DAGSTER_HOME"] # environment vars to pass from celery worker to docker

              broker: 'pyamqp://guest@localhost//'  # Optional[str]: The URL of the Celery broker
              backend: 'rpc://' # Optional[str]: The URL of the Celery results backend
              include: ['my_module'] # Optional[List[str]]: Modules every worker should import
              config_source: # Dict[str, Any]: Any additional parameters to pass to the
                  #...       # Celery workers. This dict will be passed as the `config_source`
                  #...       # argument of celery.Celery().

    Note that the YAML you provide here must align with the configuration with which the Celery
    workers on which you hope to run were started. If, for example, you point the executor at a
    different broker than the one your workers are listening to, the workers will never be able to
    pick up tasks for execution.

    In deployments where the celery_k8s_job_executor is used all appropriate celery and dagster_celery
    commands must be invoked with the `-A dagster_celery_docker.app` argument.
    '''
    check_cross_process_constraints(init_context)

    exc_cfg = init_context.executor_config

    return CeleryDockerExecutor(
        broker=exc_cfg.get('broker'),
        backend=exc_cfg.get('backend'),
        config_source=exc_cfg.get('config_source'),
        include=exc_cfg.get('include'),
        retries=Retries.from_config(exc_cfg.get('retries')),
        docker_config=exc_cfg.get('docker'),
        repo_location_name=exc_cfg.get('repo_location_name'),
    )
コード例 #10
0
def celery_k8s_job_executor(init_context):
    """Celery-based executor which launches tasks as Kubernetes Jobs.

    The Celery executor exposes config settings for the underlying Celery app under
    the ``config_source`` key. This config corresponds to the "new lowercase settings" introduced
    in Celery version 4.0 and the object constructed from config will be passed to the
    :py:class:`celery.Celery` constructor as its ``config_source`` argument.
    (See https://docs.celeryproject.org/en/latest/userguide/configuration.html for details.)

    The executor also exposes the ``broker``, `backend`, and ``include`` arguments to the
    :py:class:`celery.Celery` constructor.

    In the most common case, you may want to modify the ``broker`` and ``backend`` (e.g., to use
    Redis instead of RabbitMQ). We expect that ``config_source`` will be less frequently
    modified, but that when solid executions are especially fast or slow, or when there are
    different requirements around idempotence or retry, it may make sense to execute pipelines
    with variations on these settings.

    If you'd like to configure a Celery Kubernetes Job executor in addition to the
    :py:class:`~dagster.default_executors`, you should add it to the ``executor_defs`` defined on a
    :py:class:`~dagster.ModeDefinition` as follows:

    .. literalinclude:: ../../../../../python_modules/libraries/dagster-celery-k8s/dagster_celery_k8s_tests/example_celery_mode_def.py
       :language: python

    Then you can configure the executor as follows:

    .. code-block:: YAML

        execution:
          celery-k8s:
            config:
              job_image: 'my_repo.com/image_name:latest'
              job_namespace: 'some-namespace'
              broker: 'pyamqp://guest@localhost//'  # Optional[str]: The URL of the Celery broker
              backend: 'rpc://' # Optional[str]: The URL of the Celery results backend
              include: ['my_module'] # Optional[List[str]]: Modules every worker should import
              config_source: # Dict[str, Any]: Any additional parameters to pass to the
                  #...       # Celery workers. This dict will be passed as the `config_source`
                  #...       # argument of celery.Celery().

    Note that the YAML you provide here must align with the configuration with which the Celery
    workers on which you hope to run were started. If, for example, you point the executor at a
    different broker than the one your workers are listening to, the workers will never be able to
    pick up tasks for execution.

    In deployments where the celery_k8s_job_executor is used all appropriate celery and dagster_celery
    commands must be invoked with the `-A dagster_celery_k8s.app` argument.
    """

    check_cross_process_constraints(init_context)

    run_launcher = init_context.instance.run_launcher
    exc_cfg = init_context.executor_config

    if not isinstance(run_launcher, CeleryK8sRunLauncher):
        raise DagsterUnmetExecutorRequirementsError(
            "This engine is only compatible with a CeleryK8sRunLauncher; configure the "
            "CeleryK8sRunLauncher on your instance to use it.",
        )

    job_config = DagsterK8sJobConfig(
        dagster_home=run_launcher.dagster_home,
        instance_config_map=run_launcher.instance_config_map,
        postgres_password_secret=run_launcher.postgres_password_secret,
        job_image=exc_cfg.get("job_image") or os.getenv("DAGSTER_CURRENT_IMAGE"),
        image_pull_policy=exc_cfg.get("image_pull_policy"),
        image_pull_secrets=exc_cfg.get("image_pull_secrets"),
        service_account_name=exc_cfg.get("service_account_name"),
        env_config_maps=exc_cfg.get("env_config_maps"),
        env_secrets=exc_cfg.get("env_secrets"),
    )

    # Set on the instance but overrideable here
    broker = run_launcher.broker or exc_cfg.get("broker")
    backend = run_launcher.backend or exc_cfg.get("backend")
    config_source = run_launcher.config_source or exc_cfg.get("config_source")
    include = run_launcher.include or exc_cfg.get("include")
    retries = run_launcher.retries or Retries.from_config(exc_cfg.get("retries"))

    return CeleryK8sJobExecutor(
        broker=broker,
        backend=backend,
        config_source=config_source,
        include=include,
        retries=retries,
        job_config=job_config,
        job_namespace=exc_cfg.get("job_namespace"),
        load_incluster_config=exc_cfg.get("load_incluster_config"),
        kubeconfig_file=exc_cfg.get("kubeconfig_file"),
        repo_location_name=exc_cfg.get("repo_location_name"),
    )