Ejemplo n.º 1
0
def dagster_event_sequence_for_step(step_context, inputs, intermediates_manager):
    '''
    Yield a sequence of dagster events for the given step with the step context.

    This function yields the events that directly result from the computation
    in the non-throwing case, and then also wraps that in an error boundary, so that
    any thrown exception gets translated in a step failure event, and then halts computation
    for that step.

    Additionally, if the pipeline is configured to reraise that error up through
    the execute_pipeline call (configured in the InProcessExecutorConfig) that
    reraise happens here.
    '''

    check.inst_param(step_context, 'step_context', SystemStepExecutionContext)
    check.dict_param(inputs, 'inputs', key_type=str)
    check.inst_param(intermediates_manager, 'intermediates_manager', IntermediatesManager)

    try:
        for step_event in check.generator(
            _core_dagster_event_sequence_for_step(step_context, inputs, intermediates_manager)
        ):
            if step_event.event_type is DagsterEventType.STEP_OUTPUT:
                step_context.log.info(
                    'Step {step} emitted {value} for output {output}'.format(
                        step=step_context.step.key,
                        value=step_event.step_output_data.value_repr,
                        output=step_event.step_output_data.output_name,
                    )
                )
            yield step_event
    except DagsterError as dagster_error:
        user_facing_exc_info = (
            # pylint does not know original_exc_info exists is is_user_code_error is true
            # pylint: disable=no-member
            dagster_error.original_exc_info
            if dagster_error.is_user_code_error
            else sys.exc_info()
        )

        error_info = serializable_error_info_from_exc_info(user_facing_exc_info)

        yield DagsterEvent.step_failure_event(
            step_context=step_context, step_failure_data=StepFailureData(error=error_info)
        )

        if step_context.executor_config.raise_on_error:
            raise dagster_error

        return
    except:
        error_info = serializable_error_info_from_exc_info(sys.exc_info())
        yield DagsterEvent.step_failure_event(
            step_context=step_context, step_failure_data=StepFailureData(error=error_info)
        )
        raise
Ejemplo n.º 2
0
    def check_step_health(
        self,
        step_contexts: List[IStepContext],
        known_state: KnownExecutionState,
    ):
        assert len(step_contexts) == 1, "Checking multiple steps is not currently supported"
        step_context = step_contexts[0]

        k8s_name_key = get_k8s_job_name(
            self.pipeline_context.plan_data.pipeline_run.run_id,
            step_context.step.key,
        )
        job_name = "dagster-job-%s" % (k8s_name_key)

        job = kubernetes.client.BatchV1Api().read_namespaced_job(
            namespace=self._job_namespace, name=job_name
        )
        if job.status.failed:
            step_failure_event = DagsterEvent.step_failure_event(
                step_context=step_context,
                step_failure_data=StepFailureData(error=None, user_failure_data=None),
            )

            return [step_failure_event]
        return []
Ejemplo n.º 3
0
def _step_failure_event_from_exc_info(step_context, exc_info, user_failure_data=None):
    return DagsterEvent.step_failure_event(
        step_context=step_context,
        step_failure_data=StepFailureData(
            error=serializable_error_info_from_exc_info(exc_info),
            user_failure_data=user_failure_data,
        ),
    )
Ejemplo n.º 4
0
def execute_step_in_memory(step_context, inputs, intermediates_manager):
    check.inst_param(step_context, 'step_context', SystemStepExecutionContext)
    check.dict_param(inputs, 'inputs', key_type=str)
    check.inst_param(intermediates_manager, 'intermediates_manager', IntermediatesManager)

    try:
        for step_event in check.generator(
            _execute_steps_core_loop(step_context, inputs, intermediates_manager)
        ):
            if step_event.event_type is DagsterEventType.STEP_OUTPUT:
                step_context.log.info(
                    'Step {step} emitted {value} for output {output}'.format(
                        step=step_context.step.key,
                        value=step_event.step_output_data.value_repr,
                        output=step_event.step_output_data.output_name,
                    )
                )
            yield step_event
    except DagsterError as dagster_error:
        user_facing_exc_info = (
            # pylint does not know original_exc_info exists is is_user_code_error is true
            # pylint: disable=no-member
            dagster_error.original_exc_info
            if dagster_error.is_user_code_error
            else sys.exc_info()
        )

        error_info = serializable_error_info_from_exc_info(user_facing_exc_info)

        yield DagsterEvent.step_failure_event(
            step_context=step_context, step_failure_data=StepFailureData(error=error_info)
        )

        if step_context.executor_config.raise_on_error:
            raise dagster_error

        return
    except:
        error_info = serializable_error_info_from_exc_info(sys.exc_info())
        yield DagsterEvent.step_failure_event(
            step_context=step_context, step_failure_data=StepFailureData(error=error_info)
        )
        raise
Ejemplo n.º 5
0
def _step_failure_event_from_exc_info(
    step_context: SystemStepExecutionContext,
    exc_info: ExcInfo,
    user_failure_data: Optional[UserFailureData] = None,
):
    return DagsterEvent.step_failure_event(
        step_context=step_context,
        step_failure_data=StepFailureData(
            error=serializable_error_info_from_exc_info(exc_info),
            user_failure_data=user_failure_data,
        ),
    )
Ejemplo n.º 6
0
def step_failure_event_from_exc_info(
    step_context: "StepExecutionContext",
    exc_info: ExcInfo,
    user_failure_data: Optional[UserFailureData] = None,
    error_source: Optional[ErrorSource] = None,
):
    from dagster.core.events import DagsterEvent

    return DagsterEvent.step_failure_event(
        step_context=step_context,
        step_failure_data=StepFailureData(
            error=serializable_error_info_from_exc_info(exc_info),
            user_failure_data=user_failure_data,
            error_source=error_source,
        ),
    )
Ejemplo n.º 7
0
    def execute(self, pipeline_context, execution_plan):
        check.inst_param(pipeline_context, "pipeline_context",
                         SystemPipelineExecutionContext)
        check.inst_param(execution_plan, "execution_plan", ExecutionPlan)

        limit = self.max_concurrent

        yield DagsterEvent.engine_event(
            pipeline_context,
            "Executing steps using multiprocess engine: parent process (pid: {pid})"
            .format(pid=os.getpid()),
            event_specific_data=EngineEventData.multiprocess(
                os.getpid(),
                step_keys_to_execute=execution_plan.step_keys_to_execute),
        )

        # It would be good to implement a reference tracking algorithm here so we could
        # garbage collection results that are no longer needed by any steps
        # https://github.com/dagster-io/dagster/issues/811
        with time_execution_scope() as timer_result:

            with execution_plan.start(
                    retries=self.retries) as active_execution:
                active_iters = {}
                errors = {}
                term_events = {}
                stopping = False

                while (not stopping
                       and not active_execution.is_complete) or active_iters:
                    try:
                        # start iterators
                        while len(active_iters) < limit and not stopping:
                            steps = active_execution.get_steps_to_execute(
                                limit=(limit - len(active_iters)))

                            if not steps:
                                break

                            for step in steps:
                                step_context = pipeline_context.for_step(step)
                                term_events[step.key] = multiprocessing.Event()
                                active_iters[
                                    step.
                                    key] = self.execute_step_out_of_process(
                                        step_context, step, errors,
                                        term_events)

                        # process active iterators
                        empty_iters = []
                        for key, step_iter in active_iters.items():
                            try:
                                event_or_none = next(step_iter)
                                if event_or_none is None:
                                    continue
                                else:
                                    yield event_or_none
                                    active_execution.handle_event(
                                        event_or_none)

                            except ChildProcessCrashException as crash:
                                serializable_error = serializable_error_info_from_exc_info(
                                    sys.exc_info())
                                yield DagsterEvent.engine_event(
                                    pipeline_context,
                                    ("Multiprocess executor: child process for step {step_key} "
                                     "unexpectedly exited with code {exit_code}"
                                     ).format(step_key=key,
                                              exit_code=crash.exit_code),
                                    EngineEventData.engine_error(
                                        serializable_error),
                                    step_key=key,
                                )
                                step_failure_event = DagsterEvent.step_failure_event(
                                    step_context=pipeline_context.for_step(
                                        active_execution.get_step_by_key(key)),
                                    step_failure_data=StepFailureData(
                                        error=serializable_error,
                                        user_failure_data=None),
                                )
                                active_execution.handle_event(
                                    step_failure_event)
                                yield step_failure_event
                                empty_iters.append(key)
                            except StopIteration:
                                empty_iters.append(key)

                        # clear and mark complete finished iterators
                        for key in empty_iters:
                            del active_iters[key]
                            if term_events[key].is_set():
                                stopping = True
                            del term_events[key]
                            active_execution.verify_complete(
                                pipeline_context, key)

                        # process skips from failures or uncovered inputs
                        for event in active_execution.skipped_step_events_iterator(
                                pipeline_context):
                            yield event

                    # In the very small chance that we get interrupted in this coordination section and not
                    # polling the subprocesses for events - try to clean up gracefully
                    except KeyboardInterrupt:
                        yield DagsterEvent.engine_event(
                            pipeline_context,
                            "Multiprocess engine: received KeyboardInterrupt - forwarding to active child processes",
                            EngineEventData.interrupted(
                                list(term_events.keys())),
                        )
                        stopping = True
                        for event in term_events.values():
                            event.set()

                errs = {pid: err for pid, err in errors.items() if err}
                if errs:
                    raise DagsterSubprocessError(
                        "During multiprocess execution errors occurred in child processes:\n{error_list}"
                        .format(error_list="\n".join([
                            "In process {pid}: {err}".format(
                                pid=pid, err=err.to_string())
                            for pid, err in errs.items()
                        ])),
                        subprocess_error_infos=list(errs.values()),
                    )

        yield DagsterEvent.engine_event(
            pipeline_context,
            "Multiprocess engine: parent process exiting after {duration} (pid: {pid})"
            .format(duration=format_duration(timer_result.millis),
                    pid=os.getpid()),
            event_specific_data=EngineEventData.multiprocess(os.getpid()),
        )
Ejemplo n.º 8
0
def _dagster_event_sequence_for_step(step_context, retries):
    """
    Yield a sequence of dagster events for the given step with the step context.

    This function also processes errors. It handles a few error cases:

        (1) User code requests to be retried:
            A RetryRequested has been raised. We will either put the step in to
            up_for_retry state or a failure state depending on the number of previous attempts
            and the max_retries on the received RetryRequested.

        (2) User code fails successfully:
            The user-space code has raised a Failure which may have
            explicit metadata attached.

        (3) User code fails unexpectedly:
            The user-space code has raised an Exception. It has been
            wrapped in an exception derived from DagsterUserCodeException. In that
            case the original user exc_info is stashed on the exception
            as the original_exc_info property.

        (4) User error:
            The framework raised a DagsterError that indicates a usage error
            or some other error not communicated by a user-thrown exception. For example,
            if the user yields an object out of a compute function that is not a
            proper event (not an Output, ExpectationResult, etc).

        (5) Framework failure or interrupt:
            An unexpected error occurred. This is a framework error. Either there
            has been an internal error in the framework OR we have forgotten to put a
            user code error boundary around invoked user-space code. These terminate
            the computation immediately (by re-raising).


    The "raised_dagster_errors" context manager can be used to force these errors to be
    re-raised and surfaced to the user. This is mostly to get sensible errors in test and
    ad-hoc contexts, rather than forcing the user to wade through the
    PipelineExecutionResult API in order to find the step that failed.

    For tools, however, this option should be false, and a sensible error message
    signaled to the user within that tool.
    """

    check.inst_param(step_context, "step_context", SystemStepExecutionContext)
    check.inst_param(retries, "retries", Retries)

    try:
        prior_attempt_count = retries.get_attempt_count(step_context.step.key)
        if step_context.step_launcher:
            step_events = step_context.step_launcher.launch_step(
                step_context, prior_attempt_count)
        else:
            step_events = core_dagster_event_sequence_for_step(
                step_context, prior_attempt_count)

        for step_event in check.generator(step_events):
            yield step_event

    # case (1) in top comment
    except RetryRequested as retry_request:
        retry_err_info = serializable_error_info_from_exc_info(sys.exc_info())

        if retries.disabled:
            fail_err = SerializableErrorInfo(
                message="RetryRequested but retries are disabled",
                stack=retry_err_info.stack,
                cls_name=retry_err_info.cls_name,
                cause=retry_err_info.cause,
            )
            yield DagsterEvent.step_failure_event(
                step_context=step_context,
                step_failure_data=StepFailureData(error=fail_err,
                                                  user_failure_data=None),
            )
        else:  # retries.enabled or retries.deferred
            prev_attempts = retries.get_attempt_count(step_context.step.key)
            if prev_attempts >= retry_request.max_retries:
                fail_err = SerializableErrorInfo(
                    message="Exceeded max_retries of {}".format(
                        retry_request.max_retries),
                    stack=retry_err_info.stack,
                    cls_name=retry_err_info.cls_name,
                    cause=retry_err_info.cause,
                )
                yield DagsterEvent.step_failure_event(
                    step_context=step_context,
                    step_failure_data=StepFailureData(error=fail_err,
                                                      user_failure_data=None),
                )
            else:
                yield DagsterEvent.step_retry_event(
                    step_context,
                    StepRetryData(
                        error=retry_err_info,
                        seconds_to_wait=retry_request.seconds_to_wait,
                    ),
                )

    # case (2) in top comment
    except Failure as failure:
        yield _step_failure_event_from_exc_info(
            step_context,
            sys.exc_info(),
            UserFailureData(
                label="intentional-failure",
                description=failure.description,
                metadata_entries=failure.metadata_entries,
            ),
        )
        if step_context.raise_on_error:
            raise failure

    # case (3) in top comment
    except DagsterUserCodeExecutionError as dagster_user_error:
        yield _step_failure_event_from_exc_info(
            step_context,
            dagster_user_error.original_exc_info,
        )

        if step_context.raise_on_error:
            raise dagster_user_error.user_exception

    # case (4) in top comment
    except DagsterError as dagster_error:
        yield _step_failure_event_from_exc_info(step_context, sys.exc_info())

        if step_context.raise_on_error:
            raise dagster_error

    # case (5) in top comment
    except (Exception, KeyboardInterrupt) as unexpected_exception:  # pylint: disable=broad-except
        yield _step_failure_event_from_exc_info(step_context, sys.exc_info())
        raise unexpected_exception
Ejemplo n.º 9
0
    def execute(self, pipeline_context, execution_plan):
        check.inst_param(pipeline_context, "pipeline_context",
                         SystemPipelineExecutionContext)
        check.inst_param(execution_plan, "execution_plan", ExecutionPlan)

        limit = self.max_concurrent

        yield DagsterEvent.engine_event(
            pipeline_context,
            "Executing steps using multiprocess executor: parent process (pid: {pid})"
            .format(pid=os.getpid()),
            event_specific_data=EngineEventData.multiprocess(
                os.getpid(),
                step_keys_to_execute=execution_plan.step_keys_to_execute),
        )

        # It would be good to implement a reference tracking algorithm here so we could
        # garbage collect results that are no longer needed by any steps
        # https://github.com/dagster-io/dagster/issues/811
        with time_execution_scope() as timer_result:
            with execution_plan.start(
                    retry_mode=self.retries) as active_execution:
                active_iters = {}
                errors = {}
                term_events = {}
                stopping = False

                while (not stopping
                       and not active_execution.is_complete) or active_iters:
                    if active_execution.check_for_interrupts():
                        yield DagsterEvent.engine_event(
                            pipeline_context,
                            "Multiprocess executor: received termination signal - "
                            "forwarding to active child processes",
                            EngineEventData.interrupted(
                                list(term_events.keys())),
                        )
                        stopping = True
                        active_execution.mark_interrupted()
                        for key, event in term_events.items():
                            event.set()

                    # start iterators
                    while len(active_iters) < limit and not stopping:
                        steps = active_execution.get_steps_to_execute(
                            limit=(limit - len(active_iters)))

                        if not steps:
                            break

                        for step in steps:
                            step_context = pipeline_context.for_step(step)
                            term_events[step.key] = multiprocessing.Event()
                            active_iters[
                                step.key] = self.execute_step_out_of_process(
                                    step_context,
                                    step,
                                    errors,
                                    term_events,
                                    active_execution.get_known_state(),
                                )

                    # process active iterators
                    empty_iters = []
                    for key, step_iter in active_iters.items():
                        try:
                            event_or_none = next(step_iter)
                            if event_or_none is None:
                                continue
                            else:
                                yield event_or_none
                                active_execution.handle_event(event_or_none)

                        except ChildProcessCrashException as crash:
                            serializable_error = serializable_error_info_from_exc_info(
                                sys.exc_info())
                            yield DagsterEvent.engine_event(
                                pipeline_context,
                                ("Multiprocess executor: child process for step {step_key} "
                                 "unexpectedly exited with code {exit_code}"
                                 ).format(step_key=key,
                                          exit_code=crash.exit_code),
                                EngineEventData.engine_error(
                                    serializable_error),
                                step_handle=active_execution.get_step_by_key(
                                    key).handle,
                            )
                            step_failure_event = DagsterEvent.step_failure_event(
                                step_context=pipeline_context.for_step(
                                    active_execution.get_step_by_key(key)),
                                step_failure_data=StepFailureData(
                                    error=serializable_error,
                                    user_failure_data=None),
                            )
                            active_execution.handle_event(step_failure_event)
                            yield step_failure_event
                            empty_iters.append(key)
                        except StopIteration:
                            empty_iters.append(key)

                    # clear and mark complete finished iterators
                    for key in empty_iters:
                        del active_iters[key]
                        del term_events[key]
                        active_execution.verify_complete(pipeline_context, key)

                    # process skipped and abandoned steps
                    yield from active_execution.plan_events_iterator(
                        pipeline_context)

                errs = {pid: err for pid, err in errors.items() if err}

                # After termination starts, raise an interrupted exception once all subprocesses
                # have finished cleaning up (and the only errors were from being interrupted)
                if (stopping and (not active_iters) and all([
                        err_info.cls_name == "DagsterExecutionInterruptedError"
                        for err_info in errs.values()
                ])):
                    yield DagsterEvent.engine_event(
                        pipeline_context,
                        "Multiprocess executor: interrupted all active child processes",
                        event_specific_data=EngineEventData(),
                    )
                    raise DagsterExecutionInterruptedError()
                elif errs:
                    raise DagsterSubprocessError(
                        "During multiprocess execution errors occurred in child processes:\n{error_list}"
                        .format(error_list="\n".join([
                            "In process {pid}: {err}".format(
                                pid=pid, err=err.to_string())
                            for pid, err in errs.items()
                        ])),
                        subprocess_error_infos=list(errs.values()),
                    )

        yield DagsterEvent.engine_event(
            pipeline_context,
            "Multiprocess executor: parent process exiting after {duration} (pid: {pid})"
            .format(duration=format_duration(timer_result.millis),
                    pid=os.getpid()),
            event_specific_data=EngineEventData.multiprocess(os.getpid()),
        )
Ejemplo n.º 10
0
def dagster_event_sequence_for_step(
    step_context: StepExecutionContext, force_local_execution: bool = False
) -> Iterator[DagsterEvent]:
    """
    Yield a sequence of dagster events for the given step with the step context.

    This function also processes errors. It handles a few error cases:

        (1) User code requests to be retried:
            A RetryRequested has been raised. We will either put the step in to
            up_for_retry state or a failure state depending on the number of previous attempts
            and the max_retries on the received RetryRequested.

        (2) User code fails successfully:
            The user-space code has raised a Failure which may have
            explicit metadata attached.

        (3) User code fails unexpectedly:
            The user-space code has raised an Exception. It has been
            wrapped in an exception derived from DagsterUserCodeException. In that
            case the original user exc_info is stashed on the exception
            as the original_exc_info property.

        (4) Execution interrupted:
            The run was interrupted in the middle of execution (typically by a
            termination request).

        (5) User error:
            The framework raised a DagsterError that indicates a usage error
            or some other error not communicated by a user-thrown exception. For example,
            if the user yields an object out of a compute function that is not a
            proper event (not an Output, ExpectationResult, etc).

        (6) Framework failure:
            An unexpected error occurred. This is a framework error. Either there
            has been an internal error in the framework OR we have forgotten to put a
            user code error boundary around invoked user-space code. These terminate
            the computation immediately (by re-raising).


    The "raised_dagster_errors" context manager can be used to force these errors to be
    re-raised and surfaced to the user. This is mostly to get sensible errors in test and
    ad-hoc contexts, rather than forcing the user to wade through the
    PipelineExecutionResult API in order to find the step that failed.

    For tools, however, this option should be false, and a sensible error message
    signaled to the user within that tool.

    When we launch a step that has a step launcher, we use this function on both the host process
    and the remote process. When we run the step in the remote process, to prevent an infinite loop
    of launching steps that then launch steps, and so on, the remote process will run this with
    the force_local_execution argument set to True.
    """

    check.inst_param(step_context, "step_context", StepExecutionContext)

    try:
        if step_context.step_launcher and not force_local_execution:
            # info all on step_context - should deprecate second arg
            step_events = step_context.step_launcher.launch_step(
                step_context, step_context.previous_attempt_count
            )
        else:
            step_events = core_dagster_event_sequence_for_step(step_context)

        for step_event in check.generator(step_events):
            yield step_event

    # case (1) in top comment
    except RetryRequested as retry_request:
        retry_err_info = serializable_error_info_from_exc_info(sys.exc_info())

        if step_context.retry_mode.disabled:
            fail_err = SerializableErrorInfo(
                message="RetryRequested but retries are disabled",
                stack=retry_err_info.stack,
                cls_name=retry_err_info.cls_name,
                cause=retry_err_info.cause,
            )
            step_context.capture_step_exception(retry_request)
            yield DagsterEvent.step_failure_event(
                step_context=step_context,
                step_failure_data=StepFailureData(error=fail_err, user_failure_data=None),
            )
        else:  # retries.enabled or retries.deferred
            prev_attempts = step_context.previous_attempt_count
            if prev_attempts >= retry_request.max_retries:
                fail_err = SerializableErrorInfo(
                    message=f"Exceeded max_retries of {retry_request.max_retries}\n",
                    stack=retry_err_info.stack,
                    cls_name=retry_err_info.cls_name,
                    cause=retry_err_info.cause,
                )
                step_context.capture_step_exception(retry_request)
                yield DagsterEvent.step_failure_event(
                    step_context=step_context,
                    step_failure_data=StepFailureData(
                        error=fail_err,
                        user_failure_data=None,
                        # set the flag to omit the outer stack if we have a cause to show
                        error_source=ErrorSource.USER_CODE_ERROR if fail_err.cause else None,
                    ),
                )
            else:
                yield DagsterEvent.step_retry_event(
                    step_context,
                    StepRetryData(
                        error=retry_err_info,
                        seconds_to_wait=retry_request.seconds_to_wait,
                    ),
                )

    # case (2) in top comment
    except Failure as failure:
        step_context.capture_step_exception(failure)
        yield step_failure_event_from_exc_info(
            step_context,
            sys.exc_info(),
            UserFailureData(
                label="intentional-failure",
                description=failure.description,
                metadata_entries=failure.metadata_entries,
            ),
        )
        if step_context.raise_on_error:
            raise failure

    # case (3) in top comment
    except DagsterUserCodeExecutionError as dagster_user_error:
        step_context.capture_step_exception(dagster_user_error.user_exception)
        yield step_failure_event_from_exc_info(
            step_context,
            sys.exc_info(),
            error_source=ErrorSource.USER_CODE_ERROR,
        )

        if step_context.raise_on_error:
            raise dagster_user_error.user_exception

    # case (4) in top comment
    except (KeyboardInterrupt, DagsterExecutionInterruptedError) as interrupt_error:
        step_context.capture_step_exception(interrupt_error)
        yield step_failure_event_from_exc_info(
            step_context,
            sys.exc_info(),
            error_source=ErrorSource.INTERRUPT,
        )
        raise interrupt_error

    # case (5) in top comment
    except DagsterError as dagster_error:
        step_context.capture_step_exception(dagster_error)
        yield step_failure_event_from_exc_info(
            step_context,
            sys.exc_info(),
            error_source=ErrorSource.FRAMEWORK_ERROR,
        )

        if step_context.raise_on_error:
            raise dagster_error

    # case (6) in top comment
    except BaseException as unexpected_exception:
        step_context.capture_step_exception(unexpected_exception)
        yield step_failure_event_from_exc_info(
            step_context,
            sys.exc_info(),
            error_source=ErrorSource.UNEXPECTED_ERROR,
        )
        raise unexpected_exception
Ejemplo n.º 11
0
    def execute(self, pipeline_context, execution_plan):
        check.inst_param(pipeline_context, "pipeline_context", SystemPipelineExecutionContext)
        check.inst_param(execution_plan, "execution_plan", ExecutionPlan)

        limit = self.max_concurrent

        yield DagsterEvent.engine_event(
            pipeline_context,
            "Executing steps using multithread executor (pid: {pid})".format(pid=os.getpid()),
            event_specific_data=EngineEventData.in_process(os.getpid(), execution_plan.step_keys_to_execute),
        )

        with time_execution_scope() as timer_result:
            with execution_plan.start(retries=self.retries) as active_execution:
                active_iters = {}
                errors = {}

                while not active_execution.is_complete or active_iters:

                    # start iterators
                    while len(active_iters) < limit:
                        steps = active_execution.get_steps_to_execute(limit=(limit - len(active_iters)))

                        if not steps:
                            break

                        for step in steps:
                            step_context = pipeline_context.for_step(step)
                            active_iters[step.key] = self.execute_step_in_thread(step.key, step_context, errors)

                    # process active iterators
                    empty_iters = []
                    for key, step_iter in active_iters.items():
                        try:
                            event_or_none = next(step_iter)
                            if event_or_none is None:
                                continue
                            yield event_or_none
                            active_execution.handle_event(event_or_none)

                        except ThreadCrashException:
                            serializable_error = serializable_error_info_from_exc_info(sys.exc_info())
                            yield DagsterEvent.engine_event(
                                pipeline_context,
                                f"Multithread executor: thread for step {key} exited unexpectedly",
                                EngineEventData.engine_error(serializable_error),
                            )
                            step_failure_event = DagsterEvent.step_failure_event(
                                step_context=pipeline_context.for_step(active_execution.get_step_by_key(key)),
                                step_failure_data=StepFailureData(error=serializable_error, user_failure_data=None),
                            )
                            active_execution.handle_event(step_failure_event)
                            yield step_failure_event
                            empty_iters.append(key)
                        except StopIteration:
                            empty_iters.append(key)

                    # clear and mark complete finished iterators
                    for key in empty_iters:
                        del active_iters[key]
                        active_execution.verify_complete(pipeline_context, key)

                    # process skipped and abandoned steps
                    for event in active_execution.plan_events_iterator(pipeline_context):
                        yield event

                errs = {tid: err for tid, err in errors.items() if err}
                if errs:
                    raise DagsterThreadError(
                        "During multithread execution errors occurred in threads:\n{error_list}".format(
                            error_list="\n".join(
                                [
                                    "In thread {tid}: {err}".format(tid=tid, err=err.to_string())
                                    for tid, err in errs.items()
                                ]
                            )
                        ),
                        thread_error_infos=list(errs.values()),
                    )

        yield DagsterEvent.engine_event(
            pipeline_context,
            "Multithread executor: parent process exiting after {duration} (pid: {pid})".format(
                duration=format_duration(timer_result.millis), pid=os.getpid()
            ),
            event_specific_data=EngineEventData.multiprocess(os.getpid()),
        )