Ejemplo n.º 1
0
def in_process_executor(init_context):
    '''The default in-process executor.

    In most Dagster environments, this will be the default executor. It is available by default on
    any :py:class:`ModeDefinition` that does not provide custom executors. To select it explicitly,
    include the following top-level fragment in config:

    .. code-block:: yaml

        execution:
          in_process:

    Execution priority can be configured using the ``dagster/priority`` tag via solid metadata,
    where the higher the number the higher the priority. 0 is the default and both positive
    and negative numbers can be used.
    '''
    from dagster.core.engine.init import InitExecutorContext

    check.inst_param(init_context, 'init_context', InitExecutorContext)

    return InProcessExecutorConfig(
        # shouldn't need to .get() here - issue with defaults in config setup
        retries=Retries.from_config(
            init_context.executor_config.get('retries', {'enabled': {}})),
        marker_to_close=init_context.executor_config.get('marker_to_close'),
    )
Ejemplo n.º 2
0
    def _execute_plan(self, execute_step_args_packed, executable_dict):
        execute_step_args = unpack_value(
            check.dict_param(
                execute_step_args_packed,
                "execute_step_args_packed",
            ))
        check.inst_param(execute_step_args, "execute_step_args",
                         ExecuteStepArgs)

        check.dict_param(executable_dict, "executable_dict")

        instance = DagsterInstance.from_ref(execute_step_args.instance_ref)

        pipeline = ReconstructablePipeline.from_dict(executable_dict)
        retries = Retries.from_config(execute_step_args.retries_dict)

        pipeline_run = instance.get_run_by_id(
            execute_step_args.pipeline_run_id)
        check.invariant(
            pipeline_run,
            "Could not load run {}".format(execute_step_args.pipeline_run_id))

        step_keys_str = ", ".join(execute_step_args.step_keys_to_execute)

        execution_plan = create_execution_plan(
            pipeline,
            pipeline_run.run_config,
            mode=pipeline_run.mode,
            step_keys_to_execute=execute_step_args.step_keys_to_execute,
        )

        engine_event = instance.report_engine_event(
            "Executing steps {} in celery worker".format(step_keys_str),
            pipeline_run,
            EngineEventData(
                [
                    EventMetadataEntry.text(step_keys_str, "step_keys"),
                    EventMetadataEntry.text(self.request.hostname,
                                            "Celery worker"),
                ],
                marker_end=DELEGATE_MARKER,
            ),
            CeleryExecutor,
            step_key=execution_plan.step_key_for_single_step_plans(),
        )

        events = [engine_event]
        for step_event in execute_plan_iterator(
                execution_plan,
                pipeline_run=pipeline_run,
                run_config=pipeline_run.run_config,
                instance=instance,
                retries=retries,
        ):
            events.append(step_event)

        serialized_events = [
            serialize_dagster_namedtuple(event) for event in events
        ]
        return serialized_events
Ejemplo n.º 3
0
def execute_step_command(input_json):
    try:
        signal.signal(signal.SIGTERM, signal.getsignal(signal.SIGINT))
    except ValueError:
        warnings.warn((
            "Unexpected error attempting to manage signal handling on thread {thread_name}. "
            "You should not invoke this API (execute_step) from threads "
            "other than the main thread.").format(
                thread_name=threading.current_thread().name))

    args = check.inst(deserialize_json_to_dagster_namedtuple(input_json),
                      ExecuteStepArgs)

    with (DagsterInstance.from_ref(args.instance_ref)
          if args.instance_ref else DagsterInstance.get()) as instance:
        pipeline_run = instance.get_run_by_id(args.pipeline_run_id)
        check.inst(
            pipeline_run,
            PipelineRun,
            "Pipeline run with id '{}' not found for step execution".format(
                args.pipeline_run_id),
        )

        recon_pipeline = recon_pipeline_from_origin(args.pipeline_origin)
        retries = Retries.from_config(args.retries_dict)

        if args.should_verify_step:
            success = verify_step(instance, pipeline_run, retries,
                                  args.step_keys_to_execute)
            if not success:
                return

        execution_plan = create_execution_plan(
            recon_pipeline.subset_for_execution_from_existing_pipeline(
                pipeline_run.solids_to_execute),
            run_config=pipeline_run.run_config,
            step_keys_to_execute=args.step_keys_to_execute,
            mode=pipeline_run.mode,
        )

        buff = []

        # Flag that the step execution is skipped
        if should_skip_step(execution_plan,
                            instance=instance,
                            run_id=pipeline_run.run_id):
            click.echo(serialize_dagster_namedtuple(StepExecutionSkipped()))
            return

        for event in execute_plan_iterator(
                execution_plan,
                pipeline_run,
                instance,
                run_config=pipeline_run.run_config,
                retries=retries,
        ):
            buff.append(serialize_dagster_namedtuple(event))

        for line in buff:
            click.echo(line)
Ejemplo n.º 4
0
    def needs_config(_):
        from dagster.core.executor.in_process import InProcessExecutor

        return InProcessExecutor(
            retries=Retries.from_config({"enabled": {}}),
            marker_to_close=None,
        )
Ejemplo n.º 5
0
def multiprocess_executor(init_context):
    '''The default multiprocess executor.

    This simple multiprocess executor is available by default on any :py:class:`ModeDefinition`
    that does not provide custom executors. To select the multiprocess executor, include a fragment
    such as the following in your config:

    .. code-block:: yaml

        execution:
          multiprocess:
            max_concurrent: 4

    The ``max_concurrent`` arg is optional and tells the execution engine how many processes may run
    concurrently. By default, or if you set ``max_concurrent`` to be 0, this is the return value of
    :py:func:`python:multiprocessing.cpu_count`.

    Execution priority can be configured using the ``dagster/priority`` tag via solid metadata,
    where the higher the number the higher the priority. 0 is the default and both positive
    and negative numbers can be used.
    '''
    from dagster.core.definitions.handle import ExecutionTargetHandle
    from dagster.core.engine.init import InitExecutorContext

    check.inst_param(init_context, 'init_context', InitExecutorContext)

    check_cross_process_constraints(init_context)

    handle, _ = ExecutionTargetHandle.get_handle(init_context.pipeline_def)
    return MultiprocessExecutorConfig(
        handle=handle,
        max_concurrent=init_context.executor_config['max_concurrent'],
        retries=Retries.from_config(init_context.executor_config['retries']),
    )
Ejemplo n.º 6
0
def execute_step_with_structured_logs_command(input_json):
    signal.signal(signal.SIGTERM, signal.getsignal(signal.SIGINT))

    args = check.inst(deserialize_json_to_dagster_namedtuple(input_json),
                      ExecuteStepArgs)

    with (DagsterInstance.from_ref(args.instance_ref)
          if args.instance_ref else DagsterInstance.get()) as instance:
        pipeline_run = instance.get_run_by_id(args.pipeline_run_id)
        recon_pipeline = recon_pipeline_from_origin(args.pipeline_origin)

        execution_plan = create_execution_plan(
            recon_pipeline.subset_for_execution_from_existing_pipeline(
                pipeline_run.solids_to_execute),
            run_config=args.run_config,
            step_keys_to_execute=args.step_keys_to_execute,
            mode=args.mode,
        )

        retries = Retries.from_config(args.retries_dict)

        buff = []
        for event in execute_plan_iterator(
                execution_plan,
                pipeline_run,
                instance,
                run_config=args.run_config,
                retries=retries,
        ):
            buff.append(serialize_dagster_namedtuple(event))

        for line in buff:
            click.echo(line)
Ejemplo n.º 7
0
def celery_executor(init_context):
    '''Celery-based executor.

    The Celery executor exposes config settings for the underlying Celery app under
    the ``config_source`` key. This config corresponds to the "new lowercase settings" introduced
    in Celery version 4.0 and the object constructed from config will be passed to the
    :py:class:`celery.Celery` constructor as its ``config_source`` argument.
    (See https://docs.celeryproject.org/en/latest/userguide/configuration.html for details.)

    The executor also exposes the ``broker``, `backend`, and ``include`` arguments to the
    :py:class:`celery.Celery` constructor.

    In the most common case, you may want to modify the ``broker`` and ``backend`` (e.g., to use
    Redis instead of RabbitMQ). We expect that ``config_source`` will be less frequently
    modified, but that when solid executions are especially fast or slow, or when there are
    different requirements around idempotence or retry, it may make sense to execute pipelines
    with variations on these settings.

    If you'd like to configure a celery executor in addition to the
    :py:class:`~dagster.default_executors`, you should add it to the ``executor_defs`` defined on a
    :py:class:`~dagster.ModeDefinition` as follows:

    .. code-block:: python

        from dagster import ModeDefinition, default_executors, pipeline
        from dagster_celery import celery_executor

        @pipeline(mode_defs=[ModeDefinition(executor_defs=default_executors + [celery_executor])])
        def celery_enabled_pipeline():
            pass

    Then you can configure the executor as follows:

    .. code-block:: YAML

        execution:
          celery:
            config:
              broker: 'pyamqp://guest@localhost//'  # Optional[str]: The URL of the Celery broker
              backend: 'rpc://' # Optional[str]: The URL of the Celery results backend
              include: ['my_module'] # Optional[List[str]]: Modules every worker should import
              config_source: # Dict[str, Any]: Any additional parameters to pass to the
                  #...       # Celery workers. This dict will be passed as the `config_source`
                  #...       # argument of celery.Celery().

    Note that the YAML you provide here must align with the configuration with which the Celery
    workers on which you hope to run were started. If, for example, you point the executor at a
    different broker than the one your workers are listening to, the workers will never be able to
    pick up tasks for execution.
    '''
    check_cross_process_constraints(init_context)

    return CeleryConfig(
        broker=init_context.executor_config.get('broker'),
        backend=init_context.executor_config.get('backend'),
        config_source=init_context.executor_config.get('config_source'),
        include=init_context.executor_config.get('include'),
        retries=Retries.from_config(init_context.executor_config['retries']),
    )
Ejemplo n.º 8
0
    def test_executor(init_context):
        from dagster.core.executor.in_process import InProcessExecutor

        assert init_context.executor_config["value"] == "secret testing value!!"

        return InProcessExecutor(
            # shouldn't need to .get() here - issue with defaults in config setup
            retries=Retries.from_config({"enabled": {}}),
            marker_to_close=None,
        )
Ejemplo n.º 9
0
    def _execute_plan(_self, instance_ref_dict, handle_dict, run_id, step_keys,
                      retries_dict):
        check.dict_param(instance_ref_dict, 'instance_ref_dict')
        check.dict_param(handle_dict, 'handle_dict')
        check.str_param(run_id, 'run_id')
        check.list_param(step_keys, 'step_keys', of_type=str)
        check.dict_param(retries_dict, 'retries_dict')

        instance_ref = InstanceRef.from_dict(instance_ref_dict)
        instance = DagsterInstance.from_ref(instance_ref)
        handle = ExecutionTargetHandle.from_dict(handle_dict)
        retries = Retries.from_config(retries_dict)

        pipeline_run = instance.get_run_by_id(run_id)
        check.invariant(pipeline_run, 'Could not load run {}'.format(run_id))

        pipeline_def = handle.build_pipeline_definition().build_sub_pipeline(
            pipeline_run.selector.solid_subset)

        step_keys_str = ", ".join(step_keys)

        execution_plan = create_execution_plan(
            pipeline_def,
            pipeline_run.environment_dict,
            mode=pipeline_run.mode,
            step_keys_to_execute=pipeline_run.step_keys_to_execute,
        ).build_subset_plan(step_keys)

        engine_event = instance.report_engine_event(
            'Executing steps {} in celery worker'.format(step_keys_str),
            pipeline_run,
            EngineEventData(
                [
                    EventMetadataEntry.text(step_keys_str, 'step_keys'),
                ],
                marker_end=DELEGATE_MARKER,
            ),
            CeleryEngine,
            step_key=execution_plan.step_key_for_single_step_plans(),
        )

        events = [engine_event]
        for step_event in execute_plan_iterator(
                execution_plan,
                pipeline_run=pipeline_run,
                environment_dict=pipeline_run.environment_dict,
                instance=instance,
                retries=retries,
        ):
            events.append(step_event)

        serialized_events = [
            serialize_dagster_namedtuple(event) for event in events
        ]
        return serialized_events
Ejemplo n.º 10
0
def execute_step_command(input_json):
    with capture_interrupts():

        args = check.inst(deserialize_json_to_dagster_namedtuple(input_json),
                          ExecuteStepArgs)

        with (DagsterInstance.from_ref(args.instance_ref)
              if args.instance_ref else DagsterInstance.get()) as instance:
            pipeline_run = instance.get_run_by_id(args.pipeline_run_id)
            check.inst(
                pipeline_run,
                PipelineRun,
                "Pipeline run with id '{}' not found for step execution".
                format(args.pipeline_run_id),
            )

            recon_pipeline = recon_pipeline_from_origin(args.pipeline_origin)
            retries = Retries.from_config(args.retries_dict)

            if args.should_verify_step:
                success = verify_step(instance, pipeline_run, retries,
                                      args.step_keys_to_execute)
                if not success:
                    return

            execution_plan = create_execution_plan(
                recon_pipeline.subset_for_execution_from_existing_pipeline(
                    pipeline_run.solids_to_execute),
                run_config=pipeline_run.run_config,
                step_keys_to_execute=args.step_keys_to_execute,
                mode=pipeline_run.mode,
            )

            buff = []

            # Flag that the step execution is skipped
            if should_skip_step(execution_plan,
                                instance=instance,
                                run_id=pipeline_run.run_id):
                click.echo(serialize_dagster_namedtuple(
                    StepExecutionSkipped()))
                return

            for event in execute_plan_iterator(
                    execution_plan,
                    pipeline_run,
                    instance,
                    run_config=pipeline_run.run_config,
                    retries=retries,
            ):
                buff.append(serialize_dagster_namedtuple(event))

            for line in buff:
                click.echo(line)
Ejemplo n.º 11
0
    def _execute_plan(_self, instance_ref_dict, executable_dict, run_id,
                      step_keys, retries_dict):
        check.dict_param(instance_ref_dict, "instance_ref_dict")
        check.dict_param(executable_dict, "executable_dict")
        check.str_param(run_id, "run_id")
        check.list_param(step_keys, "step_keys", of_type=str)
        check.dict_param(retries_dict, "retries_dict")

        instance_ref = InstanceRef.from_dict(instance_ref_dict)
        instance = DagsterInstance.from_ref(instance_ref)
        pipeline = ReconstructablePipeline.from_dict(executable_dict)
        retries = Retries.from_config(retries_dict)

        pipeline_run = instance.get_run_by_id(run_id)
        check.invariant(pipeline_run, "Could not load run {}".format(run_id))

        step_keys_str = ", ".join(step_keys)

        execution_plan = create_execution_plan(
            pipeline,
            pipeline_run.run_config,
            mode=pipeline_run.mode,
            step_keys_to_execute=pipeline_run.step_keys_to_execute,
        ).build_subset_plan(step_keys)

        engine_event = instance.report_engine_event(
            "Executing steps {} in celery worker".format(step_keys_str),
            pipeline_run,
            EngineEventData(
                [
                    EventMetadataEntry.text(step_keys_str, "step_keys"),
                ],
                marker_end=DELEGATE_MARKER,
            ),
            CeleryExecutor,
            step_key=execution_plan.step_key_for_single_step_plans(),
        )

        events = [engine_event]
        for step_event in execute_plan_iterator(
                execution_plan,
                pipeline_run=pipeline_run,
                run_config=pipeline_run.run_config,
                instance=instance,
                retries=retries,
        ):
            events.append(step_event)

        serialized_events = [
            serialize_dagster_namedtuple(event) for event in events
        ]
        return serialized_events
Ejemplo n.º 12
0
    def __init__(
        self,
        instance_config_map,
        dagster_home,
        postgres_password_secret,
        load_incluster_config=True,
        kubeconfig_file=None,
        broker=None,
        backend=None,
        include=None,
        config_source=None,
        retries=None,
        inst_data=None,
        k8s_client_batch_api=None,
    ):
        self._inst_data = check.opt_inst_param(inst_data, "inst_data",
                                               ConfigurableClassData)

        if load_incluster_config:
            check.invariant(
                kubeconfig_file is None,
                "`kubeconfig_file` is set but `load_incluster_config` is True.",
            )
            kubernetes.config.load_incluster_config()
        else:
            check.opt_str_param(kubeconfig_file, "kubeconfig_file")
            kubernetes.config.load_kube_config(kubeconfig_file)

        self._batch_api = k8s_client_batch_api or kubernetes.client.BatchV1Api(
        )

        self.instance_config_map = check.str_param(instance_config_map,
                                                   "instance_config_map")
        self.dagster_home = check.str_param(dagster_home, "dagster_home")
        self.postgres_password_secret = check.str_param(
            postgres_password_secret, "postgres_password_secret")
        self.broker = check.opt_str_param(broker, "broker")
        self.backend = check.opt_str_param(backend, "backend")
        self.include = check.opt_list_param(include, "include")
        self.config_source = check.opt_dict_param(config_source,
                                                  "config_source")

        retries = check.opt_dict_param(retries, "retries") or {"enabled": {}}
        self.retries = Retries.from_config(retries)

        super().__init__()
Ejemplo n.º 13
0
    def __init__(
        self,
        instance_config_map,
        dagster_home,
        postgres_password_secret,
        load_incluster_config=True,
        kubeconfig_file=None,
        broker=None,
        backend=None,
        include=None,
        config_source=None,
        retries=None,
        inst_data=None,
    ):
        self._inst_data = check.opt_inst_param(inst_data, 'inst_data',
                                               ConfigurableClassData)

        if load_incluster_config:
            check.invariant(
                kubeconfig_file is None,
                '`kubeconfig_file` is set but `load_incluster_config` is True.',
            )
            kubernetes.config.load_incluster_config()
        else:
            check.opt_str_param(kubeconfig_file, 'kubeconfig_file')
            kubernetes.config.load_kube_config(kubeconfig_file)

        self.instance_config_map = check.str_param(instance_config_map,
                                                   'instance_config_map')
        self.dagster_home = check.str_param(dagster_home, 'dagster_home')
        self.postgres_password_secret = check.str_param(
            postgres_password_secret, 'postgres_password_secret')
        self.broker = check.opt_str_param(broker, 'broker')
        self.backend = check.opt_str_param(backend, 'backend')
        self.include = check.opt_list_param(include, 'include')
        self.config_source = check.opt_dict_param(config_source,
                                                  'config_source')

        retries = check.opt_dict_param(retries, 'retries') or {'enabled': {}}
        self.retries = Retries.from_config(retries)
        self._instance_ref = None
Ejemplo n.º 14
0
def execute_step_with_structured_logs_command(input_json):
    try:
        signal.signal(signal.SIGTERM, signal.getsignal(signal.SIGINT))
    except ValueError:
        warnings.warn((
            "Unexpected error attempting to manage signal handling on thread {thread_name}. "
            "You should not invoke this API (execute_step_with_structured_logs) from threads "
            "other than the main thread.").format(
                thread_name=threading.current_thread().name))

    args = check.inst(deserialize_json_to_dagster_namedtuple(input_json),
                      ExecuteStepArgs)

    with (DagsterInstance.from_ref(args.instance_ref)
          if args.instance_ref else DagsterInstance.get()) as instance:
        pipeline_run = instance.get_run_by_id(args.pipeline_run_id)
        recon_pipeline = recon_pipeline_from_origin(args.pipeline_origin)

        execution_plan = create_execution_plan(
            recon_pipeline.subset_for_execution_from_existing_pipeline(
                pipeline_run.solids_to_execute),
            run_config=args.run_config,
            step_keys_to_execute=args.step_keys_to_execute,
            mode=args.mode,
        )

        retries = Retries.from_config(args.retries_dict)

        buff = []
        for event in execute_plan_iterator(
                execution_plan,
                pipeline_run,
                instance,
                run_config=args.run_config,
                retries=retries,
        ):
            buff.append(serialize_dagster_namedtuple(event))

        for line in buff:
            click.echo(line)
Ejemplo n.º 15
0
def multiprocess_executor(init_context):
    '''The default multiprocess executor.

    This simple multiprocess executor is available by default on any :py:class:`ModeDefinition`
    that does not provide custom executors. To select the multiprocess executor, include a fragment
    such as the following in your config:

    .. code-block:: yaml

        execution:
          multiprocess:
            max_concurrent: 4

    The ``max_concurrent`` arg is optional and tells the execution engine how many processes may run
    concurrently. By default, or if you set ``max_concurrent`` to be 0, this is the return value of
    :py:func:`python:multiprocessing.cpu_count`.

    Execution priority can be configured using the ``dagster/priority`` tag via solid metadata,
    where the higher the number the higher the priority. 0 is the default and both positive
    and negative numbers can be used.
    '''
    from dagster.core.definitions.handle import ExecutionTargetHandle
    from dagster.core.engine.init import InitExecutorContext

    check.inst_param(init_context, 'init_context', InitExecutorContext)

    check_cross_process_constraints(init_context)

    # ExecutionTargetHandle.get_handle returns an ExecutionTargetHandleCacheEntry, which is a tuple
    # (handle, solid_subset). Right now we are throwing away the solid_subset that we store in the
    # cache -- this is fragile and we should fix this with
    # https://github.com/dagster-io/dagster/issues/2115 and friends so there are not multiple
    # sources of truth for the solid subset
    handle, _ = ExecutionTargetHandle.get_handle(init_context.pipeline_def)
    return MultiprocessExecutorConfig(
        handle=handle,
        max_concurrent=init_context.executor_config['max_concurrent'],
        retries=Retries.from_config(init_context.executor_config['retries']),
    )
Ejemplo n.º 16
0
def celery_k8s_job_executor(init_context):
    '''Celery-based executor which launches tasks as Kubernetes Jobs.

    The Celery executor exposes config settings for the underlying Celery app under
    the ``config_source`` key. This config corresponds to the "new lowercase settings" introduced
    in Celery version 4.0 and the object constructed from config will be passed to the
    :py:class:`celery.Celery` constructor as its ``config_source`` argument.
    (See https://docs.celeryproject.org/en/latest/userguide/configuration.html for details.)

    The executor also exposes the ``broker``, `backend`, and ``include`` arguments to the
    :py:class:`celery.Celery` constructor.

    In the most common case, you may want to modify the ``broker`` and ``backend`` (e.g., to use
    Redis instead of RabbitMQ). We expect that ``config_source`` will be less frequently
    modified, but that when solid executions are especially fast or slow, or when there are
    different requirements around idempotence or retry, it may make sense to execute pipelines
    with variations on these settings.

    If you'd like to configure a Celery Kubernetes Job executor in addition to the
    :py:class:`~dagster.default_executors`, you should add it to the ``executor_defs`` defined on a
    :py:class:`~dagster.ModeDefinition` as follows:

    .. code-block:: python

        from dagster import ModeDefinition, default_executors, pipeline
        from dagster_celery.executor_k8s import celery_k8s_job_executor

        @pipeline(mode_defs=[
            ModeDefinition(executor_defs=default_executors + [celery_k8s_job_executor])
        ])
        def celery_enabled_pipeline():
            pass

    Then you can configure the executor as follows:

    .. code-block:: YAML

        execution:
          celery-k8s:
            config:
              job_image: 'my_repo.com/image_name:latest'
              job_namespace: 'some-namespace'
              broker: 'pyamqp://guest@localhost//'  # Optional[str]: The URL of the Celery broker
              backend: 'rpc://' # Optional[str]: The URL of the Celery results backend
              include: ['my_module'] # Optional[List[str]]: Modules every worker should import
              config_source: # Dict[str, Any]: Any additional parameters to pass to the
                  #...       # Celery workers. This dict will be passed as the `config_source`
                  #...       # argument of celery.Celery().

    Note that the YAML you provide here must align with the configuration with which the Celery
    workers on which you hope to run were started. If, for example, you point the executor at a
    different broker than the one your workers are listening to, the workers will never be able to
    pick up tasks for execution.
    '''
    from dagster_k8s import DagsterK8sJobConfig, CeleryK8sRunLauncher

    check_cross_process_constraints(init_context)

    run_launcher = init_context.instance.run_launcher
    exc_cfg = init_context.executor_config

    check.inst(
        run_launcher,
        CeleryK8sRunLauncher,
        'This engine is only compatible with a CeleryK8sRunLauncher; configure the '
        'CeleryK8sRunLauncher on your instance to use it.',
    )

    job_config = DagsterK8sJobConfig(
        dagster_home=run_launcher.dagster_home,
        instance_config_map=run_launcher.instance_config_map,
        postgres_password_secret=run_launcher.postgres_password_secret,
        job_image=exc_cfg.get('job_image'),
        image_pull_policy=exc_cfg.get('image_pull_policy'),
        image_pull_secrets=exc_cfg.get('image_pull_secrets'),
        service_account_name=exc_cfg.get('service_account_name'),
        env_config_maps=exc_cfg.get('env_config_maps'),
        env_secrets=exc_cfg.get('env_secrets'),
    )

    # Set on the instance but overrideable here
    broker = run_launcher.broker or exc_cfg.get('broker')
    backend = run_launcher.backend or exc_cfg.get('backend')
    config_source = run_launcher.config_source or exc_cfg.get('config_source')
    include = run_launcher.include or exc_cfg.get('include')
    retries = run_launcher.retries or Retries.from_config(exc_cfg.get('retries'))

    return CeleryK8sJobConfig(
        broker=broker,
        backend=backend,
        config_source=config_source,
        include=include,
        retries=retries,
        job_config=job_config,
        job_namespace=exc_cfg.get('job_namespace'),
        load_incluster_config=exc_cfg.get('load_incluster_config'),
        kubeconfig_file=exc_cfg.get('kubeconfig_file'),
    )
Ejemplo n.º 17
0
    def _execute_step_k8s_job(
        _self,
        instance_ref_dict,
        step_keys,
        run_config,
        mode,
        repo_name,
        repo_location_name,
        run_id,
        job_config_dict,
        job_namespace,
        load_incluster_config,
        retries_dict,
        resources=None,
        kubeconfig_file=None,
    ):
        '''Run step execution in a K8s job pod.
        '''

        check.dict_param(instance_ref_dict, 'instance_ref_dict')
        check.list_param(step_keys, 'step_keys', of_type=str)
        check.invariant(
            len(step_keys) == 1, 'Celery K8s task executor can only execute 1 step at a time'
        )
        check.dict_param(run_config, 'run_config')
        check.str_param(mode, 'mode')
        check.str_param(repo_name, 'repo_name')
        check.str_param(repo_location_name, 'repo_location_name')
        check.str_param(run_id, 'run_id')

        # Celery will serialize this as a list
        job_config = DagsterK8sJobConfig.from_dict(job_config_dict)
        check.inst_param(job_config, 'job_config', DagsterK8sJobConfig)
        check.str_param(job_namespace, 'job_namespace')
        check.bool_param(load_incluster_config, 'load_incluster_config')
        check.dict_param(retries_dict, 'retries_dict')

        check.opt_dict_param(resources, 'resources', key_type=str, value_type=dict)
        check.opt_str_param(kubeconfig_file, 'kubeconfig_file')

        # For when launched via DinD or running the cluster
        if load_incluster_config:
            kubernetes.config.load_incluster_config()
        else:
            kubernetes.config.load_kube_config(kubeconfig_file)

        instance_ref = InstanceRef.from_dict(instance_ref_dict)
        instance = DagsterInstance.from_ref(instance_ref)
        pipeline_run = instance.get_run_by_id(run_id)
        check.invariant(pipeline_run, 'Could not load run {}'.format(run_id))

        step_keys_str = ", ".join(step_keys)

        # Ensure we stay below k8s name length limits
        k8s_name_key = _get_k8s_name_key(run_id, step_keys)

        retries = Retries.from_config(retries_dict)

        if retries.get_attempt_count(step_keys[0]):
            attempt_number = retries.get_attempt_count(step_keys[0])
            job_name = 'dagster-job-%s-%d' % (k8s_name_key, attempt_number)
            pod_name = 'dagster-job-%s-%d' % (k8s_name_key, attempt_number)
        else:
            job_name = 'dagster-job-%s' % (k8s_name_key)
            pod_name = 'dagster-job-%s' % (k8s_name_key)

        variables = {
            'executionParams': {
                'runConfigData': run_config,
                'mode': mode,
                'selector': {
                    'repositoryLocationName': repo_location_name,
                    'repositoryName': repo_name,
                    'pipelineName': pipeline_run.pipeline_name,
                    'solidSelection': pipeline_run.solid_selection,
                },
                'executionMetadata': {'runId': run_id},
                'stepKeys': step_keys,
            },
            'retries': retries.to_graphql_input(),
        }
        args = ['-p', 'executePlan', '-v', seven.json.dumps(variables)]

        job = construct_dagster_graphql_k8s_job(job_config, args, job_name, resources, pod_name)

        # Running list of events generated from this task execution
        events = []

        # Post event for starting execution
        engine_event = instance.report_engine_event(
            'Executing steps {} in Kubernetes job {}'.format(step_keys_str, job.metadata.name),
            pipeline_run,
            EngineEventData(
                [
                    EventMetadataEntry.text(step_keys_str, 'Step keys'),
                    EventMetadataEntry.text(job.metadata.name, 'Kubernetes Job name'),
                    EventMetadataEntry.text(pod_name, 'Kubernetes Pod name'),
                    EventMetadataEntry.text(job_config.job_image, 'Job image'),
                    EventMetadataEntry.text(job_config.image_pull_policy, 'Image pull policy'),
                    EventMetadataEntry.text(
                        str(job_config.image_pull_secrets), 'Image pull secrets'
                    ),
                    EventMetadataEntry.text(
                        str(job_config.service_account_name), 'Service account name'
                    ),
                ],
                marker_end=DELEGATE_MARKER,
            ),
            CeleryK8sJobExecutor,
            # validated above that step_keys is length 1, and it is not possible to use ETH or
            # execution plan in this function (Celery K8s workers should not access to user code)
            step_key=step_keys[0],
        )
        events.append(engine_event)

        kubernetes.client.BatchV1Api().create_namespaced_job(body=job, namespace=job_namespace)

        wait_for_job_success(job.metadata.name, namespace=job_namespace)
        pod_names = get_pod_names_in_job(job.metadata.name, namespace=job_namespace)

        # Post engine event for log retrieval
        engine_event = instance.report_engine_event(
            'Retrieving logs from Kubernetes Job pods',
            pipeline_run,
            EngineEventData([EventMetadataEntry.text('\n'.join(pod_names), 'Pod names')]),
            CeleryK8sJobExecutor,
            step_key=step_keys[0],
        )
        events.append(engine_event)

        logs = []
        for pod_name in pod_names:
            raw_logs = retrieve_pod_logs(pod_name, namespace=job_namespace)
            logs += raw_logs.split('\n')

        res = parse_raw_log_lines(logs)
        handle_execution_errors(res, 'executePlan')
        step_events = handle_execute_plan_result(res)

        events += step_events

        serialized_events = [serialize_dagster_namedtuple(event) for event in events]
        return serialized_events
Ejemplo n.º 18
0
    def _execute_step_k8s_job(
        self,
        instance_ref_dict,
        step_keys,
        run_config,
        mode,
        repo_name,
        repo_location_name,
        run_id,
        job_config_dict,
        job_namespace,
        load_incluster_config,
        retries_dict,
        pipeline_origin_packed,
        user_defined_k8s_config_dict=None,
        kubeconfig_file=None,
    ):
        """Run step execution in a K8s job pod.
        """

        check.dict_param(instance_ref_dict, "instance_ref_dict")
        check.list_param(step_keys, "step_keys", of_type=str)
        check.invariant(
            len(step_keys) == 1, "Celery K8s task executor can only execute 1 step at a time"
        )
        check.dict_param(run_config, "run_config")
        check.str_param(mode, "mode")
        check.str_param(repo_name, "repo_name")
        check.str_param(repo_location_name, "repo_location_name")
        check.str_param(run_id, "run_id")

        # Celery will serialize this as a list
        job_config = DagsterK8sJobConfig.from_dict(job_config_dict)
        check.inst_param(job_config, "job_config", DagsterK8sJobConfig)
        check.str_param(job_namespace, "job_namespace")

        check.bool_param(load_incluster_config, "load_incluster_config")
        check.dict_param(retries_dict, "retries_dict")

        pipeline_origin = unpack_value(
            check.dict_param(
                pipeline_origin_packed, "pipeline_origin_packed"
            )  # TODO: make part of args
        )
        check.inst(pipeline_origin, PipelineOrigin)

        user_defined_k8s_config = UserDefinedDagsterK8sConfig.from_dict(
            user_defined_k8s_config_dict
        )
        check.opt_inst_param(
            user_defined_k8s_config, "user_defined_k8s_config", UserDefinedDagsterK8sConfig,
        )
        check.opt_str_param(kubeconfig_file, "kubeconfig_file")

        # For when launched via DinD or running the cluster
        if load_incluster_config:
            kubernetes.config.load_incluster_config()
        else:
            kubernetes.config.load_kube_config(kubeconfig_file)

        instance_ref = InstanceRef.from_dict(instance_ref_dict)
        instance = DagsterInstance.from_ref(instance_ref)
        pipeline_run = instance.get_run_by_id(run_id)

        check.invariant(pipeline_run, "Could not load run {}".format(run_id))
        step_key = step_keys[0]

        celery_worker_name = self.request.hostname
        celery_pod_name = os.environ.get("HOSTNAME")
        instance.report_engine_event(
            "Task for step {step_key} picked up by Celery".format(step_key=step_key),
            pipeline_run,
            EngineEventData(
                [
                    EventMetadataEntry.text(celery_worker_name, "Celery worker name"),
                    EventMetadataEntry.text(celery_pod_name, "Celery worker Kubernetes Pod name"),
                ]
            ),
            CeleryK8sJobExecutor,
            step_key=step_key,
        )

        if pipeline_run.status != PipelineRunStatus.STARTED:
            instance.report_engine_event(
                "Not scheduling step because pipeline run status is not STARTED",
                pipeline_run,
                EngineEventData([EventMetadataEntry.text(step_key, "Step keys"),]),
                CeleryK8sJobExecutor,
                step_key=step_key,
            )
            return

        # Ensure we stay below k8s name length limits
        k8s_name_key = get_k8s_job_name(run_id, step_key)

        retries = Retries.from_config(retries_dict)

        if retries.get_attempt_count(step_key):
            attempt_number = retries.get_attempt_count(step_key)
            job_name = "dagster-job-%s-%d" % (k8s_name_key, attempt_number)
            pod_name = "dagster-job-%s-%d" % (k8s_name_key, attempt_number)
        else:
            job_name = "dagster-job-%s" % (k8s_name_key)
            pod_name = "dagster-job-%s" % (k8s_name_key)

        input_json = serialize_dagster_namedtuple(
            ExecuteStepArgs(
                pipeline_origin=pipeline_origin,
                pipeline_run_id=run_id,
                instance_ref=None,
                mode=mode,
                step_keys_to_execute=step_keys,
                run_config=run_config,
                retries_dict=retries_dict,
            )
        )
        command = ["dagster"]
        args = ["api", "execute_step_with_structured_logs", input_json]

        job = construct_dagster_k8s_job(
            job_config, command, args, job_name, user_defined_k8s_config, pod_name
        )

        # Running list of events generated from this task execution
        events = []

        # Post event for starting execution
        job_name = job.metadata.name
        engine_event = instance.report_engine_event(
            "Executing step {} in Kubernetes job {}".format(step_key, job_name),
            pipeline_run,
            EngineEventData(
                [
                    EventMetadataEntry.text(step_key, "Step keys"),
                    EventMetadataEntry.text(job_name, "Kubernetes Job name"),
                    EventMetadataEntry.text(pod_name, "Kubernetes Pod name"),
                    EventMetadataEntry.text(job_config.job_image, "Job image"),
                    EventMetadataEntry.text(job_config.image_pull_policy, "Image pull policy"),
                    EventMetadataEntry.text(
                        str(job_config.image_pull_secrets), "Image pull secrets"
                    ),
                    EventMetadataEntry.text(
                        str(job_config.service_account_name), "Service account name"
                    ),
                ],
                marker_end=DELEGATE_MARKER,
            ),
            CeleryK8sJobExecutor,
            # validated above that step_keys is length 1, and it is not possible to use ETH or
            # execution plan in this function (Celery K8s workers should not access to user code)
            step_key=step_key,
        )
        events.append(engine_event)

        try:
            kubernetes.client.BatchV1Api().create_namespaced_job(body=job, namespace=job_namespace)
        except kubernetes.client.rest.ApiException as e:
            if e.reason == "Conflict":
                # There is an existing job with the same name so do not procede.
                instance.report_engine_event(
                    "Did not create Kubernetes job {} for step {} since job name already "
                    "exists, exiting.".format(job_name, step_key),
                    pipeline_run,
                    EngineEventData(
                        [
                            EventMetadataEntry.text(step_key, "Step keys"),
                            EventMetadataEntry.text(job_name, "Kubernetes Job name"),
                            EventMetadataEntry.text(pod_name, "Kubernetes Pod name"),
                        ],
                        marker_end=DELEGATE_MARKER,
                    ),
                    CeleryK8sJobExecutor,
                    step_key=step_key,
                )
            else:
                instance.report_engine_event(
                    "Encountered unexpected error while creating Kubernetes job {} for step {}, "
                    "exiting.".format(job_name, step_key),
                    pipeline_run,
                    EngineEventData(
                        [
                            EventMetadataEntry.text(step_key, "Step keys"),
                            EventMetadataEntry.text(e, "Error"),
                        ]
                    ),
                    CeleryK8sJobExecutor,
                    step_key=step_key,
                )
            return

        try:
            wait_for_job_success(
                job_name=job_name, namespace=job_namespace, instance=instance, run_id=run_id,
            )
        except DagsterK8sPipelineStatusException:
            instance.report_engine_event(
                "Terminating Kubernetes Job because pipeline run status is not STARTED",
                pipeline_run,
                EngineEventData(
                    [
                        EventMetadataEntry.text(step_key, "Step keys"),
                        EventMetadataEntry.text(job_name, "Kubernetes Job name"),
                        EventMetadataEntry.text(job_namespace, "Kubernetes Job namespace"),
                    ]
                ),
                CeleryK8sJobExecutor,
                step_key=step_key,
            )
            delete_job(job_name=job_name, namespace=job_namespace)
            return

        pod_names = get_pod_names_in_job(job_name, namespace=job_namespace)

        # Post engine event for log retrieval
        engine_event = instance.report_engine_event(
            "Retrieving logs from Kubernetes Job pods",
            pipeline_run,
            EngineEventData([EventMetadataEntry.text("\n".join(pod_names), "Pod names")]),
            CeleryK8sJobExecutor,
            step_key=step_key,
        )
        events.append(engine_event)

        logs = []
        for pod_name in pod_names:
            raw_logs = retrieve_pod_logs(pod_name, namespace=job_namespace)
            logs += raw_logs.split("\n")

        events += filter_dagster_events_from_pod_logs(logs)
        serialized_events = [serialize_dagster_namedtuple(event) for event in events]
        return serialized_events
Ejemplo n.º 19
0
    def _execute_step_k8s_job(
        _self,
        instance_ref_dict,
        step_keys,
        run_config,
        mode,
        repo_name,
        repo_location_name,
        run_id,
        job_config_dict,
        job_namespace,
        load_incluster_config,
        retries_dict,
        pipeline_origin_packed,
        resources=None,
        kubeconfig_file=None,
    ):
        '''Run step execution in a K8s job pod.
        '''

        check.dict_param(instance_ref_dict, 'instance_ref_dict')
        check.list_param(step_keys, 'step_keys', of_type=str)
        check.invariant(
            len(step_keys) == 1,
            'Celery K8s task executor can only execute 1 step at a time')
        check.dict_param(run_config, 'run_config')
        check.str_param(mode, 'mode')
        check.str_param(repo_name, 'repo_name')
        check.str_param(repo_location_name, 'repo_location_name')
        check.str_param(run_id, 'run_id')

        # Celery will serialize this as a list
        job_config = DagsterK8sJobConfig.from_dict(job_config_dict)
        check.inst_param(job_config, 'job_config', DagsterK8sJobConfig)
        check.str_param(job_namespace, 'job_namespace')

        check.bool_param(load_incluster_config, 'load_incluster_config')
        check.dict_param(retries_dict, 'retries_dict')

        pipeline_origin = unpack_value(
            check.dict_param(
                pipeline_origin_packed,
                'pipeline_origin_packed')  # TODO: make part of args
        )
        check.inst(pipeline_origin, PipelineOrigin)

        check.opt_dict_param(resources,
                             'resources',
                             key_type=str,
                             value_type=dict)
        check.opt_str_param(kubeconfig_file, 'kubeconfig_file')

        # For when launched via DinD or running the cluster
        if load_incluster_config:
            kubernetes.config.load_incluster_config()
        else:
            kubernetes.config.load_kube_config(kubeconfig_file)

        instance_ref = InstanceRef.from_dict(instance_ref_dict)
        instance = DagsterInstance.from_ref(instance_ref)
        pipeline_run = instance.get_run_by_id(run_id)

        check.invariant(pipeline_run, 'Could not load run {}'.format(run_id))

        step_key = step_keys[0]
        if pipeline_run.status != PipelineRunStatus.STARTED:
            instance.report_engine_event(
                'Not scheduling step because pipeline run status is not STARTED',
                pipeline_run,
                EngineEventData([
                    EventMetadataEntry.text(step_key, 'Step keys'),
                ]),
                CeleryK8sJobExecutor,
                step_key=step_key,
            )
            return

        # Ensure we stay below k8s name length limits
        k8s_name_key = get_k8s_job_name(run_id, step_key)

        retries = Retries.from_config(retries_dict)

        if retries.get_attempt_count(step_key):
            attempt_number = retries.get_attempt_count(step_key)
            job_name = 'dagster-job-%s-%d' % (k8s_name_key, attempt_number)
            pod_name = 'dagster-job-%s-%d' % (k8s_name_key, attempt_number)
        else:
            job_name = 'dagster-job-%s' % (k8s_name_key)
            pod_name = 'dagster-job-%s' % (k8s_name_key)

        input_json = serialize_dagster_namedtuple(
            ExecuteStepArgs(
                pipeline_origin=pipeline_origin,
                pipeline_run_id=run_id,
                instance_ref=None,
                mode=mode,
                step_keys_to_execute=step_keys,
                run_config=run_config,
                retries_dict=retries_dict,
            ))
        command = ['dagster']
        args = ['api', 'execute_step_with_structured_logs', input_json]

        job = construct_dagster_k8s_job(job_config, command, args, job_name,
                                        resources, pod_name)

        # Running list of events generated from this task execution
        events = []

        # Post event for starting execution
        job_name = job.metadata.name
        engine_event = instance.report_engine_event(
            'Executing step {} in Kubernetes job {}'.format(
                step_key, job_name),
            pipeline_run,
            EngineEventData(
                [
                    EventMetadataEntry.text(step_key, 'Step keys'),
                    EventMetadataEntry.text(job_name, 'Kubernetes Job name'),
                    EventMetadataEntry.text(pod_name, 'Kubernetes Pod name'),
                    EventMetadataEntry.text(job_config.job_image, 'Job image'),
                    EventMetadataEntry.text(job_config.image_pull_policy,
                                            'Image pull policy'),
                    EventMetadataEntry.text(str(job_config.image_pull_secrets),
                                            'Image pull secrets'),
                    EventMetadataEntry.text(
                        str(job_config.service_account_name),
                        'Service account name'),
                ],
                marker_end=DELEGATE_MARKER,
            ),
            CeleryK8sJobExecutor,
            # validated above that step_keys is length 1, and it is not possible to use ETH or
            # execution plan in this function (Celery K8s workers should not access to user code)
            step_key=step_key,
        )
        events.append(engine_event)

        try:
            kubernetes.client.BatchV1Api().create_namespaced_job(
                body=job, namespace=job_namespace)
        except kubernetes.client.rest.ApiException as e:
            if e.reason == 'Conflict':
                # There is an existing job with the same name so do not procede.
                instance.report_engine_event(
                    'Did not create Kubernetes job {} for step {} since job name already '
                    'exists, exiting.'.format(job_name, step_key),
                    pipeline_run,
                    EngineEventData(
                        [
                            EventMetadataEntry.text(step_key, 'Step keys'),
                            EventMetadataEntry.text(job_name,
                                                    'Kubernetes Job name'),
                            EventMetadataEntry.text(pod_name,
                                                    'Kubernetes Pod name'),
                        ],
                        marker_end=DELEGATE_MARKER,
                    ),
                    CeleryK8sJobExecutor,
                    step_key=step_key,
                )
            else:
                instance.report_engine_event(
                    'Encountered unexpected error while creating Kubernetes job {} for step {}, '
                    'exiting.'.format(job_name, step_key),
                    pipeline_run,
                    EngineEventData([
                        EventMetadataEntry.text(step_key, 'Step keys'),
                        EventMetadataEntry.text(e, 'Error'),
                    ]),
                    CeleryK8sJobExecutor,
                    step_key=step_key,
                )
            return

        try:
            wait_for_job_success(
                job_name=job_name,
                namespace=job_namespace,
                instance=instance,
                run_id=run_id,
            )
        except DagsterK8sPipelineStatusException:
            instance.report_engine_event(
                'Terminating Kubernetes Job because pipeline run status is not STARTED',
                pipeline_run,
                EngineEventData([
                    EventMetadataEntry.text(step_key, 'Step keys'),
                    EventMetadataEntry.text(job_name, 'Kubernetes Job name'),
                    EventMetadataEntry.text(job_namespace,
                                            'Kubernetes Job namespace'),
                ]),
                CeleryK8sJobExecutor,
                step_key=step_key,
            )
            delete_job(job_name=job_name, namespace=job_namespace)
            return

        pod_names = get_pod_names_in_job(job_name, namespace=job_namespace)

        # Post engine event for log retrieval
        engine_event = instance.report_engine_event(
            'Retrieving logs from Kubernetes Job pods',
            pipeline_run,
            EngineEventData(
                [EventMetadataEntry.text('\n'.join(pod_names), 'Pod names')]),
            CeleryK8sJobExecutor,
            step_key=step_key,
        )
        events.append(engine_event)

        logs = []
        for pod_name in pod_names:
            raw_logs = retrieve_pod_logs(pod_name, namespace=job_namespace)
            logs += raw_logs.split('\n')

        events += filter_dagster_events_from_pod_logs(logs)
        serialized_events = [
            serialize_dagster_namedtuple(event) for event in events
        ]
        return serialized_events
Ejemplo n.º 20
0
def test_execute_step_with_structured_logs_verify_step():
    with get_foo_pipeline_handle() as pipeline_handle:
        runner = CliRunner()

        with instance_for_test(
                overrides={
                    "compute_logs": {
                        "module":
                        "dagster.core.storage.noop_compute_log_manager",
                        "class": "NoOpComputeLogManager",
                    }
                }) as instance:
            run = create_run_for_test(
                instance,
                pipeline_name="foo",
                run_id="new_run",
                run_config={"storage": {
                    "filesystem": {}
                }},
            )

            input_json = serialize_dagster_namedtuple(
                ExecuteStepArgs(
                    pipeline_origin=pipeline_handle.get_python_origin(),
                    pipeline_run_id=run.run_id,
                    instance_ref=instance.get_ref(),
                ))

            # Check that verify succeeds for step that has hasn't been fun (case 3)
            retries = Retries.from_config({"enabled": {}})
            assert verify_step(instance,
                               run,
                               retries,
                               step_keys_to_execute=["do_something.compute"])

            # Check that verify fails when trying to retry with no original attempt (case 3)
            retries = Retries.from_config({"enabled": {}})
            retries.mark_attempt("do_something.compute")
            assert not verify_step(
                instance,
                run,
                retries,
                step_keys_to_execute=["do_something.compute"])

            # Test trying to re-run a retry fails verify_step (case 2)
            with mock.patch("dagster.cli.api.get_step_stats_by_key"
                            ) as _step_stats_by_key:
                _step_stats_by_key.return_value = {
                    "do_something.compute":
                    RunStepKeyStatsSnapshot(run_id=run.run_id,
                                            step_key="do_something.compute",
                                            attempts=2)
                }

                retries = Retries.from_config({"enabled": {}})
                retries.mark_attempt("do_something.compute")
                assert not verify_step(
                    instance,
                    run,
                    retries,
                    step_keys_to_execute=["do_something.compute"])

            runner_execute_step_with_structured_logs(
                runner,
                [input_json],
            )

            # # Check that verify fails for step that has already run (case 1)
            retries = Retries.from_config({"enabled": {}})
            assert not verify_step(
                instance,
                run,
                retries,
                step_keys_to_execute=["do_something.compute"])
Ejemplo n.º 21
0
    def _execute_step_k8s_job(
        self,
        execute_step_args_packed,
        job_config_dict,
        job_namespace,
        load_incluster_config,
        user_defined_k8s_config_dict=None,
        kubeconfig_file=None,
    ):
        """Run step execution in a K8s job pod.
        """
        execute_step_args = unpack_value(
            check.dict_param(execute_step_args_packed, "execute_step_args_packed",)
        )
        check.inst_param(execute_step_args, "execute_step_args", ExecuteStepArgs)
        check.invariant(
            len(execute_step_args.step_keys_to_execute) == 1,
            "Celery K8s task executor can only execute 1 step at a time",
        )

        # Celery will serialize this as a list
        job_config = DagsterK8sJobConfig.from_dict(job_config_dict)
        check.inst_param(job_config, "job_config", DagsterK8sJobConfig)
        check.str_param(job_namespace, "job_namespace")

        check.bool_param(load_incluster_config, "load_incluster_config")

        user_defined_k8s_config = UserDefinedDagsterK8sConfig.from_dict(
            user_defined_k8s_config_dict
        )
        check.opt_inst_param(
            user_defined_k8s_config, "user_defined_k8s_config", UserDefinedDagsterK8sConfig,
        )
        check.opt_str_param(kubeconfig_file, "kubeconfig_file")

        # For when launched via DinD or running the cluster
        if load_incluster_config:
            kubernetes.config.load_incluster_config()
        else:
            kubernetes.config.load_kube_config(kubeconfig_file)

        instance = DagsterInstance.from_ref(execute_step_args.instance_ref)
        pipeline_run = instance.get_run_by_id(execute_step_args.pipeline_run_id)

        check.inst(
            pipeline_run,
            PipelineRun,
            "Could not load run {}".format(execute_step_args.pipeline_run_id),
        )
        step_key = execute_step_args.step_keys_to_execute[0]

        celery_worker_name = self.request.hostname
        celery_pod_name = os.environ.get("HOSTNAME")
        instance.report_engine_event(
            "Task for step {step_key} picked up by Celery".format(step_key=step_key),
            pipeline_run,
            EngineEventData(
                [
                    EventMetadataEntry.text(celery_worker_name, "Celery worker name"),
                    EventMetadataEntry.text(celery_pod_name, "Celery worker Kubernetes Pod name"),
                ]
            ),
            CeleryK8sJobExecutor,
            step_key=step_key,
        )

        if pipeline_run.status != PipelineRunStatus.STARTED:
            instance.report_engine_event(
                "Not scheduling step because pipeline run status is not STARTED",
                pipeline_run,
                EngineEventData([EventMetadataEntry.text(step_key, "Step key"),]),
                CeleryK8sJobExecutor,
                step_key=step_key,
            )
            return []

        # Ensure we stay below k8s name length limits
        k8s_name_key = get_k8s_job_name(execute_step_args.pipeline_run_id, step_key)

        retries = Retries.from_config(execute_step_args.retries_dict)

        if retries.get_attempt_count(step_key):
            attempt_number = retries.get_attempt_count(step_key)
            job_name = "dagster-job-%s-%d" % (k8s_name_key, attempt_number)
            pod_name = "dagster-job-%s-%d" % (k8s_name_key, attempt_number)
        else:
            job_name = "dagster-job-%s" % (k8s_name_key)
            pod_name = "dagster-job-%s" % (k8s_name_key)

        input_json = serialize_dagster_namedtuple(execute_step_args)
        args = ["dagster", "api", "execute_step", input_json]

        job = construct_dagster_k8s_job(
            job_config, args, job_name, user_defined_k8s_config, pod_name
        )

        # Running list of events generated from this task execution
        events = []

        # Post event for starting execution
        job_name = job.metadata.name
        engine_event = instance.report_engine_event(
            "Executing step {} in Kubernetes job {}".format(step_key, job_name),
            pipeline_run,
            EngineEventData(
                [
                    EventMetadataEntry.text(step_key, "Step key"),
                    EventMetadataEntry.text(job_name, "Kubernetes Job name"),
                    EventMetadataEntry.text(job_config.job_image, "Job image"),
                    EventMetadataEntry.text(job_config.image_pull_policy, "Image pull policy"),
                    EventMetadataEntry.text(
                        str(job_config.image_pull_secrets), "Image pull secrets"
                    ),
                    EventMetadataEntry.text(
                        str(job_config.service_account_name), "Service account name"
                    ),
                ],
                marker_end=DELEGATE_MARKER,
            ),
            CeleryK8sJobExecutor,
            # validated above that step_keys is length 1, and it is not possible to use ETH or
            # execution plan in this function (Celery K8s workers should not access to user code)
            step_key=step_key,
        )
        events.append(engine_event)
        try:
            kubernetes.client.BatchV1Api().create_namespaced_job(body=job, namespace=job_namespace)
        except kubernetes.client.rest.ApiException as e:
            if e.reason == "Conflict":
                # There is an existing job with the same name so proceed and see if the existing job succeeded
                instance.report_engine_event(
                    "Did not create Kubernetes job {} for step {} since job name already "
                    "exists, proceeding with existing job.".format(job_name, step_key),
                    pipeline_run,
                    EngineEventData(
                        [
                            EventMetadataEntry.text(step_key, "Step key"),
                            EventMetadataEntry.text(job_name, "Kubernetes Job name"),
                        ],
                        marker_end=DELEGATE_MARKER,
                    ),
                    CeleryK8sJobExecutor,
                    step_key=step_key,
                )
            else:
                instance.report_engine_event(
                    "Encountered unexpected error while creating Kubernetes job {} for step {}, "
                    "exiting.".format(job_name, step_key),
                    pipeline_run,
                    EngineEventData(
                        [EventMetadataEntry.text(step_key, "Step key"),],
                        error=serializable_error_info_from_exc_info(sys.exc_info()),
                    ),
                    CeleryK8sJobExecutor,
                    step_key=step_key,
                )
                return []

        try:
            wait_for_job_success(
                job_name=job_name,
                namespace=job_namespace,
                instance=instance,
                run_id=execute_step_args.pipeline_run_id,
            )
        except (DagsterK8sError, DagsterK8sTimeoutError) as err:
            step_failure_event = construct_step_failure_event_and_handle(
                pipeline_run, step_key, err, instance=instance
            )
            events.append(step_failure_event)
        except DagsterK8sPipelineStatusException:
            instance.report_engine_event(
                "Terminating Kubernetes Job because pipeline run status is not STARTED",
                pipeline_run,
                EngineEventData(
                    [
                        EventMetadataEntry.text(step_key, "Step key"),
                        EventMetadataEntry.text(job_name, "Kubernetes Job name"),
                        EventMetadataEntry.text(job_namespace, "Kubernetes Job namespace"),
                    ]
                ),
                CeleryK8sJobExecutor,
                step_key=step_key,
            )
            delete_job(job_name=job_name, namespace=job_namespace)
            return []
        except (
            DagsterK8sUnrecoverableAPIError,
            DagsterK8sAPIRetryLimitExceeded,
            # We shouldn't see unwrapped APIExceptions anymore, as they should all be wrapped in
            # a retry boundary. We still catch it here just in case we missed one so that we can
            # report it to the event log
            kubernetes.client.rest.ApiException,
        ) as err:
            instance.report_engine_event(
                "Encountered unexpected error while waiting on Kubernetes job {} for step {}, "
                "exiting.".format(job_name, step_key),
                pipeline_run,
                EngineEventData(
                    [EventMetadataEntry.text(step_key, "Step key"),],
                    error=serializable_error_info_from_exc_info(sys.exc_info()),
                ),
                CeleryK8sJobExecutor,
                step_key=step_key,
            )
            return []

        try:
            pod_names = get_pod_names_in_job(job_name, namespace=job_namespace)
        except kubernetes.client.rest.ApiException as e:
            instance.report_engine_event(
                "Encountered unexpected error retreiving Pods for Kubernetes job {} for step {}, "
                "exiting.".format(job_name, step_key),
                pipeline_run,
                EngineEventData(
                    [EventMetadataEntry.text(step_key, "Step key"),],
                    error=serializable_error_info_from_exc_info(sys.exc_info()),
                ),
                CeleryK8sJobExecutor,
                step_key=step_key,
            )
            return []

        # Post engine event for log retrieval
        engine_event = instance.report_engine_event(
            "Retrieving logs from Kubernetes Job pods",
            pipeline_run,
            EngineEventData([EventMetadataEntry.text("\n".join(pod_names), "Pod names")]),
            CeleryK8sJobExecutor,
            step_key=step_key,
        )
        events.append(engine_event)

        logs = []
        for pod_name in pod_names:
            try:
                raw_logs = retrieve_pod_logs(pod_name, namespace=job_namespace)
                logs += raw_logs.split("\n")
            except kubernetes.client.rest.ApiException as e:
                instance.report_engine_event(
                    "Encountered unexpected error while fetching pod logs for Kubernetes job {}, "
                    "Pod name {} for step {}. Will attempt to continue with other pods.".format(
                        job_name, pod_name, step_key
                    ),
                    pipeline_run,
                    EngineEventData(
                        [EventMetadataEntry.text(step_key, "Step key"),],
                        error=serializable_error_info_from_exc_info(sys.exc_info()),
                    ),
                    CeleryK8sJobExecutor,
                    step_key=step_key,
                )

        events += filter_dagster_events_from_pod_logs(logs)
        serialized_events = [serialize_dagster_namedtuple(event) for event in events]
        return serialized_events
Ejemplo n.º 22
0
def celery_k8s_job_executor(init_context):
    """Celery-based executor which launches tasks as Kubernetes Jobs.

    The Celery executor exposes config settings for the underlying Celery app under
    the ``config_source`` key. This config corresponds to the "new lowercase settings" introduced
    in Celery version 4.0 and the object constructed from config will be passed to the
    :py:class:`celery.Celery` constructor as its ``config_source`` argument.
    (See https://docs.celeryproject.org/en/latest/userguide/configuration.html for details.)

    The executor also exposes the ``broker``, `backend`, and ``include`` arguments to the
    :py:class:`celery.Celery` constructor.

    In the most common case, you may want to modify the ``broker`` and ``backend`` (e.g., to use
    Redis instead of RabbitMQ). We expect that ``config_source`` will be less frequently
    modified, but that when solid executions are especially fast or slow, or when there are
    different requirements around idempotence or retry, it may make sense to execute pipelines
    with variations on these settings.

    If you'd like to configure a Celery Kubernetes Job executor in addition to the
    :py:class:`~dagster.default_executors`, you should add it to the ``executor_defs`` defined on a
    :py:class:`~dagster.ModeDefinition` as follows:

    .. literalinclude:: ../../../../../python_modules/libraries/dagster-celery-k8s/dagster_celery_k8s_tests/example_celery_mode_def.py
       :language: python

    Then you can configure the executor as follows:

    .. code-block:: YAML

        execution:
          celery-k8s:
            config:
              job_image: 'my_repo.com/image_name:latest'
              job_namespace: 'some-namespace'
              broker: 'pyamqp://guest@localhost//'  # Optional[str]: The URL of the Celery broker
              backend: 'rpc://' # Optional[str]: The URL of the Celery results backend
              include: ['my_module'] # Optional[List[str]]: Modules every worker should import
              config_source: # Dict[str, Any]: Any additional parameters to pass to the
                  #...       # Celery workers. This dict will be passed as the `config_source`
                  #...       # argument of celery.Celery().

    Note that the YAML you provide here must align with the configuration with which the Celery
    workers on which you hope to run were started. If, for example, you point the executor at a
    different broker than the one your workers are listening to, the workers will never be able to
    pick up tasks for execution.

    In deployments where the celery_k8s_job_executor is used all appropriate celery and dagster_celery
    commands must be invoked with the `-A dagster_celery_k8s.app` argument.
    """

    check_cross_process_constraints(init_context)

    run_launcher = init_context.instance.run_launcher
    exc_cfg = init_context.executor_config

    if not isinstance(run_launcher, CeleryK8sRunLauncher):
        raise DagsterUnmetExecutorRequirementsError(
            "This engine is only compatible with a CeleryK8sRunLauncher; configure the "
            "CeleryK8sRunLauncher on your instance to use it.",
        )

    job_config = DagsterK8sJobConfig(
        dagster_home=run_launcher.dagster_home,
        instance_config_map=run_launcher.instance_config_map,
        postgres_password_secret=run_launcher.postgres_password_secret,
        job_image=exc_cfg.get("job_image") or os.getenv("DAGSTER_CURRENT_IMAGE"),
        image_pull_policy=exc_cfg.get("image_pull_policy"),
        image_pull_secrets=exc_cfg.get("image_pull_secrets"),
        service_account_name=exc_cfg.get("service_account_name"),
        env_config_maps=exc_cfg.get("env_config_maps"),
        env_secrets=exc_cfg.get("env_secrets"),
    )

    # Set on the instance but overrideable here
    broker = run_launcher.broker or exc_cfg.get("broker")
    backend = run_launcher.backend or exc_cfg.get("backend")
    config_source = run_launcher.config_source or exc_cfg.get("config_source")
    include = run_launcher.include or exc_cfg.get("include")
    retries = run_launcher.retries or Retries.from_config(exc_cfg.get("retries"))

    return CeleryK8sJobExecutor(
        broker=broker,
        backend=backend,
        config_source=config_source,
        include=include,
        retries=retries,
        job_config=job_config,
        job_namespace=exc_cfg.get("job_namespace"),
        load_incluster_config=exc_cfg.get("load_incluster_config"),
        kubeconfig_file=exc_cfg.get("kubeconfig_file"),
        repo_location_name=exc_cfg.get("repo_location_name"),
    )
Ejemplo n.º 23
0
def celery_docker_executor(init_context):
    """Celery-based executor which launches tasks in docker containers.

    The Celery executor exposes config settings for the underlying Celery app under
    the ``config_source`` key. This config corresponds to the "new lowercase settings" introduced
    in Celery version 4.0 and the object constructed from config will be passed to the
    :py:class:`celery.Celery` constructor as its ``config_source`` argument.
    (See https://docs.celeryproject.org/en/latest/userguide/configuration.html for details.)

    The executor also exposes the ``broker``, `backend`, and ``include`` arguments to the
    :py:class:`celery.Celery` constructor.

    In the most common case, you may want to modify the ``broker`` and ``backend`` (e.g., to use
    Redis instead of RabbitMQ). We expect that ``config_source`` will be less frequently
    modified, but that when solid executions are especially fast or slow, or when there are
    different requirements around idempotence or retry, it may make sense to execute pipelines
    with variations on these settings.

    If you'd like to configure a Celery Docker executor in addition to the
    :py:class:`~dagster.default_executors`, you should add it to the ``executor_defs`` defined on a
    :py:class:`~dagster.ModeDefinition` as follows:

    .. code-block:: python

        from dagster import ModeDefinition, default_executors, pipeline
        from dagster_celery_docker.executor import celery_docker_executor

        @pipeline(mode_defs=[
            ModeDefinition(executor_defs=default_executors + [celery_docker_executor])
        ])
        def celery_enabled_pipeline():
            pass

    Then you can configure the executor as follows:

    .. code-block:: YAML

        execution:
          celery-docker:
            config:

              docker:
                image: 'my_repo.com/image_name:latest'
                registry:
                  url: 'my_repo.com'
                  username: '******'
                  password: {env: 'DOCKER_PASSWORD'}
                env_vars: ["DAGSTER_HOME"] # environment vars to pass from celery worker to docker

              broker: 'pyamqp://guest@localhost//'  # Optional[str]: The URL of the Celery broker
              backend: 'rpc://' # Optional[str]: The URL of the Celery results backend
              include: ['my_module'] # Optional[List[str]]: Modules every worker should import
              config_source: # Dict[str, Any]: Any additional parameters to pass to the
                  #...       # Celery workers. This dict will be passed as the `config_source`
                  #...       # argument of celery.Celery().

    Note that the YAML you provide here must align with the configuration with which the Celery
    workers on which you hope to run were started. If, for example, you point the executor at a
    different broker than the one your workers are listening to, the workers will never be able to
    pick up tasks for execution.

    In deployments where the celery_k8s_job_executor is used all appropriate celery and dagster_celery
    commands must be invoked with the `-A dagster_celery_docker.app` argument.
    """
    check_cross_process_constraints(init_context)

    exc_cfg = init_context.executor_config

    return CeleryDockerExecutor(
        broker=exc_cfg.get("broker"),
        backend=exc_cfg.get("backend"),
        config_source=exc_cfg.get("config_source"),
        include=exc_cfg.get("include"),
        retries=Retries.from_config(exc_cfg.get("retries")),
        docker_config=exc_cfg.get("docker"),
        repo_location_name=exc_cfg.get("repo_location_name"),
    )