Ejemplo n.º 1
0
    def __exit__(self, exc_type, exc_value, traceback):
        self._context_guard = False

        # Exiting due to exception, return to allow exception to bubble
        if exc_type or exc_value or traceback:
            return

        if not self.is_complete:
            pending_action = (self._executable + self._pending_abandon +
                              self._pending_retry + self._pending_skip)
            state_str = "{pending_str}{in_flight_str}{action_str}{retry_str}".format(
                in_flight_str="\nSteps still in flight: {}".format(
                    self._in_flight) if self._in_flight else "",
                pending_str="\nSteps pending processing: {}".format(
                    self._pending.keys()) if self._pending else "",
                action_str="\nSteps pending action: {}".format(pending_action)
                if pending_action else "",
                retry_str="\nSteps waiting to retry: {}".format(
                    self._waiting_to_retry.keys())
                if self._waiting_to_retry else "",
            )
            if self._interrupted:
                raise DagsterExecutionInterruptedError(
                    f"Execution of pipeline was interrupted before completing the execution plan. {state_str}"
                )
            else:
                raise DagsterInvariantViolationError(
                    f"Execution of pipeline finished without completing the execution plan. {state_str}"
                )

        # See verify_complete - steps for which we did not observe a failure/success event are in an unknown
        # state so we raise to ensure pipeline failure.
        if len(self._unknown_state) > 0:
            if self._interrupted:
                raise DagsterExecutionInterruptedError(
                    "Execution of pipeline exited with steps {step_list} in an unknown state after "
                    "being interrupted.".format(step_list=self._unknown_state))
            else:
                raise DagsterUnknownStepStateError(
                    "Execution of pipeline exited with steps {step_list} in an unknown state to this process.\n"
                    "This was likely caused by losing communication with the process performing step execution."
                    .format(step_list=self._unknown_state))
Ejemplo n.º 2
0
    def execute(self, pipeline_context, execution_plan):
        check.inst_param(pipeline_context, "pipeline_context",
                         SystemPipelineExecutionContext)
        check.inst_param(execution_plan, "execution_plan", ExecutionPlan)

        limit = self.max_concurrent

        yield DagsterEvent.engine_event(
            pipeline_context,
            "Executing steps using multiprocess executor: parent process (pid: {pid})"
            .format(pid=os.getpid()),
            event_specific_data=EngineEventData.multiprocess(
                os.getpid(),
                step_keys_to_execute=execution_plan.step_keys_to_execute),
        )

        # It would be good to implement a reference tracking algorithm here so we could
        # garbage collect results that are no longer needed by any steps
        # https://github.com/dagster-io/dagster/issues/811
        with time_execution_scope() as timer_result:
            with execution_plan.start(
                    retry_mode=self.retries) as active_execution:
                active_iters = {}
                errors = {}
                term_events = {}
                stopping = False

                while (not stopping
                       and not active_execution.is_complete) or active_iters:
                    if active_execution.check_for_interrupts():
                        yield DagsterEvent.engine_event(
                            pipeline_context,
                            "Multiprocess executor: received termination signal - "
                            "forwarding to active child processes",
                            EngineEventData.interrupted(
                                list(term_events.keys())),
                        )
                        stopping = True
                        active_execution.mark_interrupted()
                        for key, event in term_events.items():
                            event.set()

                    # start iterators
                    while len(active_iters) < limit and not stopping:
                        steps = active_execution.get_steps_to_execute(
                            limit=(limit - len(active_iters)))

                        if not steps:
                            break

                        for step in steps:
                            step_context = pipeline_context.for_step(step)
                            term_events[step.key] = multiprocessing.Event()
                            active_iters[
                                step.key] = self.execute_step_out_of_process(
                                    step_context,
                                    step,
                                    errors,
                                    term_events,
                                    active_execution.get_known_state(),
                                )

                    # process active iterators
                    empty_iters = []
                    for key, step_iter in active_iters.items():
                        try:
                            event_or_none = next(step_iter)
                            if event_or_none is None:
                                continue
                            else:
                                yield event_or_none
                                active_execution.handle_event(event_or_none)

                        except ChildProcessCrashException as crash:
                            serializable_error = serializable_error_info_from_exc_info(
                                sys.exc_info())
                            yield DagsterEvent.engine_event(
                                pipeline_context,
                                ("Multiprocess executor: child process for step {step_key} "
                                 "unexpectedly exited with code {exit_code}"
                                 ).format(step_key=key,
                                          exit_code=crash.exit_code),
                                EngineEventData.engine_error(
                                    serializable_error),
                                step_handle=active_execution.get_step_by_key(
                                    key).handle,
                            )
                            step_failure_event = DagsterEvent.step_failure_event(
                                step_context=pipeline_context.for_step(
                                    active_execution.get_step_by_key(key)),
                                step_failure_data=StepFailureData(
                                    error=serializable_error,
                                    user_failure_data=None),
                            )
                            active_execution.handle_event(step_failure_event)
                            yield step_failure_event
                            empty_iters.append(key)
                        except StopIteration:
                            empty_iters.append(key)

                    # clear and mark complete finished iterators
                    for key in empty_iters:
                        del active_iters[key]
                        del term_events[key]
                        active_execution.verify_complete(pipeline_context, key)

                    # process skipped and abandoned steps
                    yield from active_execution.plan_events_iterator(
                        pipeline_context)

                errs = {pid: err for pid, err in errors.items() if err}

                # After termination starts, raise an interrupted exception once all subprocesses
                # have finished cleaning up (and the only errors were from being interrupted)
                if (stopping and (not active_iters) and all([
                        err_info.cls_name == "DagsterExecutionInterruptedError"
                        for err_info in errs.values()
                ])):
                    yield DagsterEvent.engine_event(
                        pipeline_context,
                        "Multiprocess executor: interrupted all active child processes",
                        event_specific_data=EngineEventData(),
                    )
                    raise DagsterExecutionInterruptedError()
                elif errs:
                    raise DagsterSubprocessError(
                        "During multiprocess execution errors occurred in child processes:\n{error_list}"
                        .format(error_list="\n".join([
                            "In process {pid}: {err}".format(
                                pid=pid, err=err.to_string())
                            for pid, err in errs.items()
                        ])),
                        subprocess_error_infos=list(errs.values()),
                    )

        yield DagsterEvent.engine_event(
            pipeline_context,
            "Multiprocess executor: parent process exiting after {duration} (pid: {pid})"
            .format(duration=format_duration(timer_result.millis),
                    pid=os.getpid()),
            event_specific_data=EngineEventData.multiprocess(os.getpid()),
        )