Exemple #1
0
def in_process_executor(init_context):
    """The default in-process executor.

    In most Dagster environments, this will be the default executor. It is available by default on
    any :py:class:`ModeDefinition` that does not provide custom executors. To select it explicitly,
    include the following top-level fragment in config:

    .. code-block:: yaml

        execution:
          in_process:

    Execution priority can be configured using the ``dagster/priority`` tag via solid metadata,
    where the higher the number the higher the priority. 0 is the default and both positive
    and negative numbers can be used.
    """
    from dagster.core.executor.init import InitExecutorContext
    from dagster.core.executor.in_process import InProcessExecutor

    check.inst_param(init_context, "init_context", InitExecutorContext)

    return InProcessExecutor(
        # shouldn't need to .get() here - issue with defaults in config setup
        retries=Retries.from_config(
            init_context.executor_config.get("retries", {"enabled": {}})),
        marker_to_close=init_context.executor_config.get("marker_to_close"),
    )
Exemple #2
0
def multiprocess_executor(init_context):
    """The default multiprocess executor.

    This simple multiprocess executor is available by default on any :py:class:`ModeDefinition`
    that does not provide custom executors. To select the multiprocess executor, include a fragment
    such as the following in your config:

    .. code-block:: yaml

        execution:
          multiprocess:
            config:
              max_concurrent: 4

    The ``max_concurrent`` arg is optional and tells the execution engine how many processes may run
    concurrently. By default, or if you set ``max_concurrent`` to be 0, this is the return value of
    :py:func:`python:multiprocessing.cpu_count`.

    Execution priority can be configured using the ``dagster/priority`` tag via solid metadata,
    where the higher the number the higher the priority. 0 is the default and both positive
    and negative numbers can be used.
    """
    from dagster.core.executor.init import InitExecutorContext
    from dagster.core.executor.multiprocess import MultiprocessExecutor

    check.inst_param(init_context, "init_context", InitExecutorContext)

    check_cross_process_constraints(init_context)

    return MultiprocessExecutor(
        pipeline=init_context.pipeline,
        max_concurrent=init_context.executor_config["max_concurrent"],
        retries=Retries.from_config(init_context.executor_config["retries"]),
    )
Exemple #3
0
def execute_step_command(input_json):
    try:
        signal.signal(signal.SIGTERM, signal.getsignal(signal.SIGINT))
    except ValueError:
        warnings.warn((
            "Unexpected error attempting to manage signal handling on thread {thread_name}. "
            "You should not invoke this API (execute_step) from threads "
            "other than the main thread.").format(
                thread_name=threading.current_thread().name))

    args = check.inst(deserialize_json_to_dagster_namedtuple(input_json),
                      ExecuteStepArgs)

    with (DagsterInstance.from_ref(args.instance_ref)
          if args.instance_ref else DagsterInstance.get()) as instance:
        pipeline_run = instance.get_run_by_id(args.pipeline_run_id)
        check.inst(
            pipeline_run,
            PipelineRun,
            "Pipeline run with id '{}' not found for step execution".format(
                args.pipeline_run_id),
        )

        recon_pipeline = recon_pipeline_from_origin(args.pipeline_origin)
        retries = Retries.from_config(args.retries_dict)

        if args.should_verify_step:
            success = verify_step(instance, pipeline_run, retries,
                                  args.step_keys_to_execute)
            if not success:
                return

        execution_plan = create_execution_plan(
            recon_pipeline.subset_for_execution_from_existing_pipeline(
                pipeline_run.solids_to_execute),
            run_config=pipeline_run.run_config,
            step_keys_to_execute=args.step_keys_to_execute,
            mode=pipeline_run.mode,
        )

        buff = []

        # Flag that the step execution is skipped
        if should_skip_step(execution_plan,
                            instance=instance,
                            run_id=pipeline_run.run_id):
            click.echo(serialize_dagster_namedtuple(StepExecutionSkipped()))
            return

        for event in execute_plan_iterator(
                execution_plan,
                pipeline_run,
                instance,
                run_config=pipeline_run.run_config,
                retries=retries,
        ):
            buff.append(serialize_dagster_namedtuple(event))

        for line in buff:
            click.echo(line)
Exemple #4
0
def initialize_step_context(scratch_dir):
    pipeline_run = PipelineRun(
        pipeline_name='foo_pipeline',
        run_id=str(uuid.uuid4()),
        run_config=make_run_config(scratch_dir, 'external'),
        mode='external',
    )

    plan = create_execution_plan(reconstructable(define_basic_pipeline),
                                 pipeline_run.run_config,
                                 mode='external')

    initialization_manager = pipeline_initialization_manager(
        plan,
        pipeline_run.run_config,
        pipeline_run,
        DagsterInstance.ephemeral(),
    )
    for _ in initialization_manager.generate_setup_events():
        pass
    pipeline_context = initialization_manager.get_object()

    active_execution = plan.start(retries=Retries(RetryMode.DISABLED))
    step = active_execution.get_next_step()
    step_context = pipeline_context.for_step(step)
    return step_context
Exemple #5
0
def execute_step_with_structured_logs_command(input_json):
    signal.signal(signal.SIGTERM, signal.getsignal(signal.SIGINT))

    args = check.inst(deserialize_json_to_dagster_namedtuple(input_json), ExecuteStepArgs)

    with (
        DagsterInstance.from_ref(args.instance_ref) if args.instance_ref else DagsterInstance.get()
    ) as instance:
        pipeline_run = instance.get_run_by_id(args.pipeline_run_id)
        recon_pipeline = recon_pipeline_from_origin(args.pipeline_origin)

        execution_plan = create_execution_plan(
            recon_pipeline.subset_for_execution_from_existing_pipeline(
                pipeline_run.solids_to_execute
            ),
            run_config=args.run_config,
            step_keys_to_execute=args.step_keys_to_execute,
            mode=args.mode,
        )

        retries = Retries.from_config(args.retries_dict)

        buff = []
        for event in execute_plan_iterator(
            execution_plan, pipeline_run, instance, run_config=args.run_config, retries=retries,
        ):
            buff.append(serialize_dagster_namedtuple(event))

        for line in buff:
            click.echo(line)
Exemple #6
0
def execute_plan_iterator(
    execution_plan,
    pipeline_run,
    instance,
    retries=None,
    run_config=None,
):
    check.inst_param(execution_plan, "execution_plan", ExecutionPlan)
    check.inst_param(pipeline_run, "pipeline_run", PipelineRun)
    check.inst_param(instance, "instance", DagsterInstance)
    retries = check.opt_inst_param(retries, "retries", Retries,
                                   Retries.disabled_mode())
    run_config = check.opt_dict_param(run_config, "run_config")

    return iter(
        _ExecuteRunWithPlanIterable(
            execution_plan=execution_plan,
            iterator=inner_plan_execution_iterator,
            execution_context_manager=PlanExecutionContextManager(
                retries=retries,
                execution_plan=execution_plan,
                run_config=run_config,
                pipeline_run=pipeline_run,
                instance=instance,
                raise_on_error=False,
            ),
        ))
def test_retries_deferred_active_execution():
    pipeline_def = define_diamond_pipeline()
    plan = create_execution_plan(pipeline_def)

    active_execution = plan.start(retries=Retries(RetryMode.DEFERRED))

    steps = active_execution.get_steps_to_execute()
    assert len(steps) == 1
    step_1 = steps[0]
    assert step_1.key == 'return_two.compute'

    steps = active_execution.get_steps_to_execute()
    assert len(steps) == 0  # cant progress

    active_execution.mark_up_for_retry(step_1.key)

    steps = active_execution.get_steps_to_execute()
    assert len(steps) == 0  # cant progress, retries are deferred

    assert not active_execution.is_complete

    steps = active_execution.get_steps_to_skip()
    # skip split of diamond
    assert len(steps) == 2
    _ = [active_execution.mark_skipped(step.key) for step in steps]

    assert not active_execution.is_complete

    steps = active_execution.get_steps_to_skip()
    # skip end of diamond
    assert len(steps) == 1
    active_execution.mark_skipped(steps[0].key)

    assert active_execution.is_complete
def test_retries_active_execution():
    pipeline_def = define_diamond_pipeline()
    plan = create_execution_plan(pipeline_def)

    active_execution = plan.start(retries=Retries(RetryMode.ENABLED))

    steps = active_execution.get_steps_to_execute()
    assert len(steps) == 1
    step_1 = steps[0]
    assert step_1.key == 'return_two.compute'

    steps = active_execution.get_steps_to_execute()
    assert len(steps) == 0  # cant progress

    active_execution.mark_up_for_retry(step_1.key)

    steps = active_execution.get_steps_to_execute()
    assert len(steps) == 1
    assert steps[0].key == 'return_two.compute'

    active_execution.mark_up_for_retry(step_1.key)

    steps = active_execution.get_steps_to_execute()
    assert len(steps) == 1
    assert steps[0].key == 'return_two.compute'

    active_execution.mark_success(step_1.key)

    steps = active_execution.get_steps_to_execute()
    assert len(steps) == 2
    step_2 = steps[0]
    step_3 = steps[1]
    assert step_2.key == 'add_three.compute'
    assert step_3.key == 'mult_three.compute'

    steps = active_execution.get_steps_to_execute()
    assert len(steps) == 0  # cant progress

    active_execution.mark_success(step_2.key)

    steps = active_execution.get_steps_to_execute()
    assert len(steps) == 0  # cant progress

    # uh oh failure
    active_execution.mark_failed(step_3.key)

    # cant progres to 4th step
    steps = active_execution.get_steps_to_execute()
    assert len(steps) == 0

    assert not active_execution.is_complete

    steps = active_execution.get_steps_to_skip()
    assert len(steps) == 1
    step_4 = steps[0]

    assert step_4.key == 'adder.compute'
    active_execution.mark_skipped(step_4.key)

    assert active_execution.is_complete
Exemple #9
0
    def _execute_plan(self, execute_step_args_packed, executable_dict):
        execute_step_args = unpack_value(
            check.dict_param(
                execute_step_args_packed,
                "execute_step_args_packed",
            ))
        check.inst_param(execute_step_args, "execute_step_args",
                         ExecuteStepArgs)

        check.dict_param(executable_dict, "executable_dict")

        instance = DagsterInstance.from_ref(execute_step_args.instance_ref)

        pipeline = ReconstructablePipeline.from_dict(executable_dict)
        retries = Retries.from_config(execute_step_args.retries_dict)

        pipeline_run = instance.get_run_by_id(
            execute_step_args.pipeline_run_id)
        check.invariant(
            pipeline_run,
            "Could not load run {}".format(execute_step_args.pipeline_run_id))

        step_keys_str = ", ".join(execute_step_args.step_keys_to_execute)

        execution_plan = create_execution_plan(
            pipeline,
            pipeline_run.run_config,
            mode=pipeline_run.mode,
            step_keys_to_execute=execute_step_args.step_keys_to_execute,
        )

        engine_event = instance.report_engine_event(
            "Executing steps {} in celery worker".format(step_keys_str),
            pipeline_run,
            EngineEventData(
                [
                    EventMetadataEntry.text(step_keys_str, "step_keys"),
                    EventMetadataEntry.text(self.request.hostname,
                                            "Celery worker"),
                ],
                marker_end=DELEGATE_MARKER,
            ),
            CeleryExecutor,
            step_key=execution_plan.step_key_for_single_step_plans(),
        )

        events = [engine_event]
        for step_event in execute_plan_iterator(
                execution_plan,
                pipeline_run=pipeline_run,
                run_config=pipeline_run.run_config,
                instance=instance,
                retries=retries,
        ):
            events.append(step_event)

        serialized_events = [
            serialize_dagster_namedtuple(event) for event in events
        ]
        return serialized_events
Exemple #10
0
def celery_executor(init_context):
    '''Celery-based executor.

    The Celery executor exposes config settings for the underlying Celery app under
    the ``config_source`` key. This config corresponds to the "new lowercase settings" introduced
    in Celery version 4.0 and the object constructed from config will be passed to the
    :py:class:`celery.Celery` constructor as its ``config_source`` argument.
    (See https://docs.celeryproject.org/en/latest/userguide/configuration.html for details.)

    The executor also exposes the ``broker``, `backend`, and ``include`` arguments to the
    :py:class:`celery.Celery` constructor.

    In the most common case, you may want to modify the ``broker`` and ``backend`` (e.g., to use
    Redis instead of RabbitMQ). We expect that ``config_source`` will be less frequently
    modified, but that when solid executions are especially fast or slow, or when there are
    different requirements around idempotence or retry, it may make sense to execute pipelines
    with variations on these settings.

    If you'd like to configure a celery executor in addition to the
    :py:class:`~dagster.default_executors`, you should add it to the ``executor_defs`` defined on a
    :py:class:`~dagster.ModeDefinition` as follows:

    .. code-block:: python

        from dagster import ModeDefinition, default_executors, pipeline
        from dagster_celery import celery_executor

        @pipeline(mode_defs=[ModeDefinition(executor_defs=default_executors + [celery_executor])])
        def celery_enabled_pipeline():
            pass

    Then you can configure the executor as follows:

    .. code-block:: YAML

        execution:
          celery:
            config:
              broker: 'pyamqp://guest@localhost//'  # Optional[str]: The URL of the Celery broker
              backend: 'rpc://' # Optional[str]: The URL of the Celery results backend
              include: ['my_module'] # Optional[List[str]]: Modules every worker should import
              config_source: # Dict[str, Any]: Any additional parameters to pass to the
                  #...       # Celery workers. This dict will be passed as the `config_source`
                  #...       # argument of celery.Celery().

    Note that the YAML you provide here must align with the configuration with which the Celery
    workers on which you hope to run were started. If, for example, you point the executor at a
    different broker than the one your workers are listening to, the workers will never be able to
    pick up tasks for execution.
    '''
    check_cross_process_constraints(init_context)

    return CeleryConfig(
        broker=init_context.executor_config.get('broker'),
        backend=init_context.executor_config.get('backend'),
        config_source=init_context.executor_config.get('config_source'),
        include=init_context.executor_config.get('include'),
        retries=Retries.from_config(init_context.executor_config['retries']),
    )
Exemple #11
0
 def for_cli(broker=None, backend=None, include=None, config_source=None):
     return CeleryConfig(
         retries=Retries(RetryMode.DISABLED),
         broker=broker,
         backend=backend,
         include=include,
         config_source=config_source,
     )
    def test_executor(init_context):
        from dagster.core.executor.in_process import InProcessExecutor

        assert init_context.executor_config["value"] == "secret testing value!!"

        return InProcessExecutor(
            # shouldn't need to .get() here - issue with defaults in config setup
            retries=Retries.from_config({"enabled": {}}),
            marker_to_close=None,
        )
Exemple #13
0
def test_incomplete_execution_plan():
    plan = create_execution_plan(define_diamond_pipeline())

    with pytest.raises(DagsterIncompleteExecutionPlanError):
        with plan.start(retries=Retries(RetryMode.DISABLED)) as active_execution:

            steps = active_execution.get_steps_to_execute()
            assert len(steps) == 1
            step_1 = steps[0]
            active_execution.mark_success(step_1.key)
Exemple #14
0
    def _execute_plan(_self, instance_ref_dict, handle_dict, run_id, step_keys,
                      retries_dict):
        check.dict_param(instance_ref_dict, 'instance_ref_dict')
        check.dict_param(handle_dict, 'handle_dict')
        check.str_param(run_id, 'run_id')
        check.list_param(step_keys, 'step_keys', of_type=str)
        check.dict_param(retries_dict, 'retries_dict')

        instance_ref = InstanceRef.from_dict(instance_ref_dict)
        instance = DagsterInstance.from_ref(instance_ref)
        handle = ExecutionTargetHandle.from_dict(handle_dict)
        retries = Retries.from_config(retries_dict)

        pipeline_run = instance.get_run_by_id(run_id)
        check.invariant(pipeline_run, 'Could not load run {}'.format(run_id))

        pipeline_def = handle.build_pipeline_definition().build_sub_pipeline(
            pipeline_run.selector.solid_subset)

        step_keys_str = ", ".join(step_keys)

        execution_plan = create_execution_plan(
            pipeline_def,
            pipeline_run.environment_dict,
            mode=pipeline_run.mode,
            step_keys_to_execute=pipeline_run.step_keys_to_execute,
        ).build_subset_plan(step_keys)

        engine_event = instance.report_engine_event(
            'Executing steps {} in celery worker'.format(step_keys_str),
            pipeline_run,
            EngineEventData(
                [
                    EventMetadataEntry.text(step_keys_str, 'step_keys'),
                ],
                marker_end=DELEGATE_MARKER,
            ),
            CeleryEngine,
            step_key=execution_plan.step_key_for_single_step_plans(),
        )

        events = [engine_event]
        for step_event in execute_plan_iterator(
                execution_plan,
                pipeline_run=pipeline_run,
                environment_dict=pipeline_run.environment_dict,
                instance=instance,
                retries=retries,
        ):
            events.append(step_event)

        serialized_events = [
            serialize_dagster_namedtuple(event) for event in events
        ]
        return serialized_events
Exemple #15
0
    def build_flyte_sdk_workflow(self):
        ordered_step_dict = self.execution_plan.execution_deps()
        instance = DagsterInstance.ephemeral()
        pipeline_run = instance.create_run(
            pipeline_name=self.execution_plan.pipeline_def.display_name,
            run_id=self.execution_plan.pipeline_def.display_name,
            run_config=self.run_config,
            mode=None,
            solids_to_execute=None,
            step_keys_to_execute=None,
            status=None,
            tags=None,
            root_run_id=None,
            parent_run_id=None,
            pipeline_snapshot=self.execution_plan.pipeline_def.
            get_pipeline_snapshot(),
            execution_plan_snapshot=snapshot_from_execution_plan(
                self.execution_plan,
                self.execution_plan.pipeline_def.get_pipeline_snapshot_id()),
            parent_pipeline_snapshot=self.execution_plan.pipeline_def.
            get_parent_pipeline_snapshot(),
        )

        initialization_manager = PlanExecutionContextManager(
            Retries.disabled_mode(),
            self.execution_plan,
            self.run_config,
            instance.get_run_by_id(
                self.execution_plan.pipeline_def.display_name),
            instance,
        )

        list(initialization_manager.prepare_context())
        pipeline_context = initialization_manager.get_context()

        for step_key in ordered_step_dict:
            solid_name = self.execution_plan.get_step_by_key(
                step_key).solid_name
            self.sdk_node_dict[solid_name] = self.get_sdk_node(
                pipeline_context,
                instance,
                pipeline_run,
                step_key,
                storage_request=self.compute_dict[solid_name].get(
                    "storage_request", None),
                cpu_request=self.compute_dict[solid_name].get(
                    "cpu_request", None),
                memory_request=self.compute_dict[solid_name].get(
                    "memory_request", None),
                storage_limit=self.compute_dict[solid_name].get(
                    "storage_limit", None),
                cpu_limit=self.compute_dict[solid_name].get("cpu_limit", None),
                memory_limit=self.compute_dict[solid_name].get(
                    "memory_limit", None),
            )
Exemple #16
0
def execute_step_command(input_json):
    with capture_interrupts():

        args = check.inst(deserialize_json_to_dagster_namedtuple(input_json),
                          ExecuteStepArgs)

        with (DagsterInstance.from_ref(args.instance_ref)
              if args.instance_ref else DagsterInstance.get()) as instance:
            pipeline_run = instance.get_run_by_id(args.pipeline_run_id)
            check.inst(
                pipeline_run,
                PipelineRun,
                "Pipeline run with id '{}' not found for step execution".
                format(args.pipeline_run_id),
            )

            recon_pipeline = recon_pipeline_from_origin(args.pipeline_origin)
            retries = Retries.from_config(args.retries_dict)

            if args.should_verify_step:
                success = verify_step(instance, pipeline_run, retries,
                                      args.step_keys_to_execute)
                if not success:
                    return

            execution_plan = create_execution_plan(
                recon_pipeline.subset_for_execution_from_existing_pipeline(
                    pipeline_run.solids_to_execute),
                run_config=pipeline_run.run_config,
                step_keys_to_execute=args.step_keys_to_execute,
                mode=pipeline_run.mode,
            )

            buff = []

            # Flag that the step execution is skipped
            if should_skip_step(execution_plan,
                                instance=instance,
                                run_id=pipeline_run.run_id):
                click.echo(serialize_dagster_namedtuple(
                    StepExecutionSkipped()))
                return

            for event in execute_plan_iterator(
                    execution_plan,
                    pipeline_run,
                    instance,
                    run_config=pipeline_run.run_config,
                    retries=retries,
            ):
                buff.append(serialize_dagster_namedtuple(event))

            for line in buff:
                click.echo(line)
Exemple #17
0
    def _execute_plan(_self, instance_ref_dict, executable_dict, run_id,
                      step_keys, retries_dict):
        check.dict_param(instance_ref_dict, "instance_ref_dict")
        check.dict_param(executable_dict, "executable_dict")
        check.str_param(run_id, "run_id")
        check.list_param(step_keys, "step_keys", of_type=str)
        check.dict_param(retries_dict, "retries_dict")

        instance_ref = InstanceRef.from_dict(instance_ref_dict)
        instance = DagsterInstance.from_ref(instance_ref)
        pipeline = ReconstructablePipeline.from_dict(executable_dict)
        retries = Retries.from_config(retries_dict)

        pipeline_run = instance.get_run_by_id(run_id)
        check.invariant(pipeline_run, "Could not load run {}".format(run_id))

        step_keys_str = ", ".join(step_keys)

        execution_plan = create_execution_plan(
            pipeline,
            pipeline_run.run_config,
            mode=pipeline_run.mode,
            step_keys_to_execute=pipeline_run.step_keys_to_execute,
        ).build_subset_plan(step_keys)

        engine_event = instance.report_engine_event(
            "Executing steps {} in celery worker".format(step_keys_str),
            pipeline_run,
            EngineEventData(
                [
                    EventMetadataEntry.text(step_keys_str, "step_keys"),
                ],
                marker_end=DELEGATE_MARKER,
            ),
            CeleryExecutor,
            step_key=execution_plan.step_key_for_single_step_plans(),
        )

        events = [engine_event]
        for step_event in execute_plan_iterator(
                execution_plan,
                pipeline_run=pipeline_run,
                run_config=pipeline_run.run_config,
                instance=instance,
                retries=retries,
        ):
            events.append(step_event)

        serialized_events = [
            serialize_dagster_namedtuple(event) for event in events
        ]
        return serialized_events
Exemple #18
0
def test_incomplete_execution_plan():
    plan = create_execution_plan(define_diamond_pipeline())

    with pytest.raises(
        DagsterInvariantViolationError,
        match="Execution of pipeline finished without completing the execution plan.",
    ):
        with plan.start(retries=Retries(RetryMode.DISABLED)) as active_execution:

            steps = active_execution.get_steps_to_execute()
            assert len(steps) == 1
            step_1 = steps[0]
            active_execution.mark_success(step_1.key)
Exemple #19
0
def test_failing_execution_plan():
    pipeline_def = define_diamond_pipeline()
    plan = create_execution_plan(pipeline_def)

    with plan.start(retries=Retries(RetryMode.DISABLED)) as active_execution:

        steps = active_execution.get_steps_to_execute()
        assert len(steps) == 1
        step_1 = steps[0]
        assert step_1.key == "return_two"

        steps = active_execution.get_steps_to_execute()
        assert len(steps) == 0  # cant progress

        active_execution.mark_success(step_1.key)
        active_execution.mark_step_produced_output(StepOutputHandle(step_1.key, "result"))

        steps = active_execution.get_steps_to_execute()
        assert len(steps) == 2
        step_2 = steps[0]
        step_3 = steps[1]
        assert step_2.key == "add_three"
        assert step_3.key == "mult_three"

        steps = active_execution.get_steps_to_execute()
        assert len(steps) == 0  # cant progress

        active_execution.mark_success(step_2.key)
        active_execution.mark_step_produced_output(StepOutputHandle(step_2.key, "result"))

        steps = active_execution.get_steps_to_execute()
        assert len(steps) == 0  # cant progress

        # uh oh failure
        active_execution.mark_failed(step_3.key)
        active_execution.mark_step_produced_output(StepOutputHandle(step_3.key, "result"))

        # cant progres to 4th step
        steps = active_execution.get_steps_to_execute()
        assert len(steps) == 0

        assert not active_execution.is_complete

        steps = active_execution.get_steps_to_abandon()
        assert len(steps) == 1
        step_4 = steps[0]

        assert step_4.key == "adder"
        active_execution.mark_abandoned(step_4.key)

        assert active_execution.is_complete
Exemple #20
0
def test_retry_deferral():
    events = execute_plan(
        create_execution_plan(define_retry_limit_pipeline()),
        pipeline_run=PipelineRun(pipeline_name='retry_limits', run_id='42'),
        retries=Retries(RetryMode.DEFERRED),
        instance=DagsterInstance.local_temp(),
    )
    events_by_type = defaultdict(list)
    for ev in events:
        events_by_type[ev.event_type].append(ev)

    assert len(events_by_type[DagsterEventType.STEP_START]) == 2
    assert len(events_by_type[DagsterEventType.STEP_UP_FOR_RETRY]) == 2
    assert DagsterEventType.STEP_RESTARTED not in events
    assert DagsterEventType.STEP_SUCCESS not in events
Exemple #21
0
def test_retries_disabled_active_execution():
    pipeline_def = define_diamond_pipeline()
    plan = create_execution_plan(pipeline_def)

    active_execution = plan.start(retries=Retries(RetryMode.DISABLED))

    steps = active_execution.get_steps_to_execute()
    assert len(steps) == 1
    step_1 = steps[0]
    assert step_1.key == "return_two.compute"

    steps = active_execution.get_steps_to_execute()
    assert len(steps) == 0  # cant progress

    with pytest.raises(check.CheckError):
        active_execution.mark_up_for_retry(step_1.key)
Exemple #22
0
def test_priorities():
    @solid(tags={"priority": 5})
    def pri_5(_):
        pass

    @solid(tags={"priority": 4})
    def pri_4(_):
        pass

    @solid(tags={"priority": 3})
    def pri_3(_):
        pass

    @solid(tags={"priority": 2})
    def pri_2(_):
        pass

    @solid(tags={"priority": -1})
    def pri_neg_1(_):
        pass

    @solid
    def pri_none(_):
        pass

    @pipeline
    def priorities():
        pri_neg_1()
        pri_3()
        pri_2()
        pri_none()
        pri_5()
        pri_4()

    sort_key_fn = lambda step: int(step.tags.get("priority", 0)) * -1

    plan = create_execution_plan(priorities)
    with plan.start(Retries(RetryMode.DISABLED),
                    sort_key_fn) as active_execution:
        steps = active_execution.get_steps_to_execute()
        assert steps[0].key == "pri_5.compute"
        assert steps[1].key == "pri_4.compute"
        assert steps[2].key == "pri_3.compute"
        assert steps[3].key == "pri_2.compute"
        assert steps[4].key == "pri_none.compute"
        assert steps[5].key == "pri_neg_1.compute"
        _ = [active_execution.mark_skipped(step.key) for step in steps]
Exemple #23
0
    def __init__(
        self,
        instance_config_map,
        dagster_home,
        postgres_password_secret,
        load_incluster_config=True,
        kubeconfig_file=None,
        broker=None,
        backend=None,
        include=None,
        config_source=None,
        retries=None,
        inst_data=None,
        k8s_client_batch_api=None,
    ):
        self._inst_data = check.opt_inst_param(inst_data, "inst_data",
                                               ConfigurableClassData)

        if load_incluster_config:
            check.invariant(
                kubeconfig_file is None,
                "`kubeconfig_file` is set but `load_incluster_config` is True.",
            )
            kubernetes.config.load_incluster_config()
        else:
            check.opt_str_param(kubeconfig_file, "kubeconfig_file")
            kubernetes.config.load_kube_config(kubeconfig_file)

        self._batch_api = k8s_client_batch_api or kubernetes.client.BatchV1Api(
        )

        self.instance_config_map = check.str_param(instance_config_map,
                                                   "instance_config_map")
        self.dagster_home = check.str_param(dagster_home, "dagster_home")
        self.postgres_password_secret = check.str_param(
            postgres_password_secret, "postgres_password_secret")
        self.broker = check.opt_str_param(broker, "broker")
        self.backend = check.opt_str_param(backend, "backend")
        self.include = check.opt_list_param(include, "include")
        self.config_source = check.opt_dict_param(config_source,
                                                  "config_source")

        retries = check.opt_dict_param(retries, "retries") or {"enabled": {}}
        self.retries = Retries.from_config(retries)

        super().__init__()
Exemple #24
0
def test_active_execution_plan():
    plan = create_execution_plan(define_diamond_pipeline())

    with plan.start(retries=Retries(RetryMode.DISABLED)) as active_execution:

        steps = active_execution.get_steps_to_execute()
        assert len(steps) == 1
        step_1 = steps[0]
        assert step_1.key == "return_two.compute"

        steps = active_execution.get_steps_to_execute()
        assert len(steps) == 0  # cant progress

        active_execution.mark_success(step_1.key)

        steps = active_execution.get_steps_to_execute()
        assert len(steps) == 2
        step_2 = steps[0]
        step_3 = steps[1]
        assert step_2.key == "add_three.compute"
        assert step_3.key == "mult_three.compute"

        steps = active_execution.get_steps_to_execute()
        assert len(steps) == 0  # cant progress

        active_execution.mark_success(step_2.key)

        steps = active_execution.get_steps_to_execute()
        assert len(steps) == 0  # cant progress

        active_execution.mark_success(step_3.key)

        steps = active_execution.get_steps_to_execute()
        assert len(steps) == 1
        step_4 = steps[0]

        assert step_4.key == "adder.compute"

        steps = active_execution.get_steps_to_execute()
        assert len(steps) == 0  # cant progress

        assert not active_execution.is_complete

        active_execution.mark_success(step_4.key)

        assert active_execution.is_complete
Exemple #25
0
def test_priorities():
    @solid(tags={'priority': 5})
    def pri_5(_):
        pass

    @solid(tags={'priority': 4})
    def pri_4(_):
        pass

    @solid(tags={'priority': 3})
    def pri_3(_):
        pass

    @solid(tags={'priority': 2})
    def pri_2(_):
        pass

    @solid(tags={'priority': -1})
    def pri_neg_1(_):
        pass

    @solid
    def pri_none(_):
        pass

    @pipeline
    def priorities():
        pri_neg_1()
        pri_3()
        pri_2()
        pri_none()
        pri_5()
        pri_4()

    sort_key_fn = lambda step: int(step.tags.get('priority', 0)) * -1

    plan = create_execution_plan(priorities)
    active_execution = plan.start(Retries(RetryMode.DISABLED), sort_key_fn)
    steps = active_execution.get_steps_to_execute()
    assert steps[0].key == 'pri_5.compute'
    assert steps[1].key == 'pri_4.compute'
    assert steps[2].key == 'pri_3.compute'
    assert steps[3].key == 'pri_2.compute'
    assert steps[4].key == 'pri_none.compute'
    assert steps[5].key == 'pri_neg_1.compute'
Exemple #26
0
def execute_plan_iterator(
    execution_plan,
    pipeline_run,
    instance,
    retries=None,
    environment_dict=None,
):
    check.inst_param(execution_plan, 'execution_plan', ExecutionPlan)
    check.inst_param(pipeline_run, 'pipeline_run', PipelineRun)
    check.inst_param(instance, 'instance', DagsterInstance)
    retries = check.opt_inst_param(retries, 'retries', Retries,
                                   Retries.disabled_mode())
    environment_dict = check.opt_dict_param(environment_dict,
                                            'environment_dict')

    initialization_manager = pipeline_initialization_manager(
        execution_plan.pipeline_def,
        environment_dict,
        pipeline_run,
        instance,
        execution_plan,
    )
    for event in initialization_manager.generate_setup_events():
        yield event

    pipeline_context = initialization_manager.get_object()

    generator_closed = False
    try:
        if pipeline_context:
            for event in inner_plan_execution_iterator(
                    pipeline_context,
                    execution_plan=execution_plan,
                    retries=retries):
                yield event
    except GeneratorExit:
        # Shouldn't happen, but avoid runtime-exception in case this generator gets GC-ed
        # (see https://amir.rachum.com/blog/2017/03/03/generator-cleanup/).
        generator_closed = True
        raise
    finally:
        for event in initialization_manager.generate_teardown_events():
            if not generator_closed:
                yield event
Exemple #27
0
def execute_plan_iterator(
    execution_plan, pipeline_run, instance, retries=None, run_config=None,
):
    check.inst_param(execution_plan, 'execution_plan', ExecutionPlan)
    check.inst_param(pipeline_run, 'pipeline_run', PipelineRun)
    check.inst_param(instance, 'instance', DagsterInstance)
    retries = check.opt_inst_param(retries, 'retries', Retries, Retries.disabled_mode())
    run_config = check.opt_dict_param(run_config, 'run_config')

    return iter(
        _ExecuteRunWithPlanIterable(
            execution_plan=execution_plan,
            run_config=run_config,
            pipeline_run=pipeline_run,
            instance=instance,
            retries=retries,
            iterator=inner_plan_execution_iterator,
            raise_on_error=False,
        )
    )
Exemple #28
0
    def __init__(
        self,
        instance_config_map,
        dagster_home,
        postgres_password_secret,
        load_incluster_config=True,
        kubeconfig_file=None,
        broker=None,
        backend=None,
        include=None,
        config_source=None,
        retries=None,
        inst_data=None,
    ):
        self._inst_data = check.opt_inst_param(inst_data, 'inst_data',
                                               ConfigurableClassData)

        if load_incluster_config:
            check.invariant(
                kubeconfig_file is None,
                '`kubeconfig_file` is set but `load_incluster_config` is True.',
            )
            kubernetes.config.load_incluster_config()
        else:
            check.opt_str_param(kubeconfig_file, 'kubeconfig_file')
            kubernetes.config.load_kube_config(kubeconfig_file)

        self.instance_config_map = check.str_param(instance_config_map,
                                                   'instance_config_map')
        self.dagster_home = check.str_param(dagster_home, 'dagster_home')
        self.postgres_password_secret = check.str_param(
            postgres_password_secret, 'postgres_password_secret')
        self.broker = check.opt_str_param(broker, 'broker')
        self.backend = check.opt_str_param(backend, 'backend')
        self.include = check.opt_list_param(include, 'include')
        self.config_source = check.opt_dict_param(config_source,
                                                  'config_source')

        retries = check.opt_dict_param(retries, 'retries') or {'enabled': {}}
        self.retries = Retries.from_config(retries)
        self._instance_ref = None
Exemple #29
0
def test_lost_steps():
    plan = create_execution_plan(define_diamond_pipeline())

    # run to completion - but step was in unknown state so exception thrown
    with pytest.raises(DagsterUnknownStepStateError):
        with plan.start(retries=Retries(RetryMode.DISABLED)) as active_execution:

            steps = active_execution.get_steps_to_execute()
            assert len(steps) == 1
            step_1 = steps[0]

            # called by verify_complete when success / fail event not observed
            active_execution.mark_unknown_state(step_1.key)

            # failure assumed for start step - so rest should skip
            steps_to_abandon = active_execution.get_steps_to_abandon()
            while steps_to_abandon:
                _ = [active_execution.mark_abandoned(step.key) for step in steps_to_abandon]
                steps_to_abandon = active_execution.get_steps_to_abandon()

            assert active_execution.is_complete
Exemple #30
0
def execute_step_with_structured_logs_command(input_json):
    try:
        signal.signal(signal.SIGTERM, signal.getsignal(signal.SIGINT))
    except ValueError:
        warnings.warn((
            "Unexpected error attempting to manage signal handling on thread {thread_name}. "
            "You should not invoke this API (execute_step_with_structured_logs) from threads "
            "other than the main thread.").format(
                thread_name=threading.current_thread().name))

    args = check.inst(deserialize_json_to_dagster_namedtuple(input_json),
                      ExecuteStepArgs)

    with (DagsterInstance.from_ref(args.instance_ref)
          if args.instance_ref else DagsterInstance.get()) as instance:
        pipeline_run = instance.get_run_by_id(args.pipeline_run_id)
        recon_pipeline = recon_pipeline_from_origin(args.pipeline_origin)

        execution_plan = create_execution_plan(
            recon_pipeline.subset_for_execution_from_existing_pipeline(
                pipeline_run.solids_to_execute),
            run_config=args.run_config,
            step_keys_to_execute=args.step_keys_to_execute,
            mode=args.mode,
        )

        retries = Retries.from_config(args.retries_dict)

        buff = []
        for event in execute_plan_iterator(
                execution_plan,
                pipeline_run,
                instance,
                run_config=args.run_config,
                retries=retries,
        ):
            buff.append(serialize_dagster_namedtuple(event))

        for line in buff:
            click.echo(line)