def execute(self, pipeline_context, execution_plan): check.inst_param(pipeline_context, 'pipeline_context', SystemPipelineExecutionContext) check.inst_param(execution_plan, 'execution_plan', ExecutionPlan) limit = self.max_concurrent yield DagsterEvent.engine_event( pipeline_context, 'Executing steps using multiprocess engine: parent process (pid: {pid})' .format(pid=os.getpid()), event_specific_data=EngineEventData.multiprocess( os.getpid(), step_keys_to_execute=execution_plan.step_keys_to_execute), ) # It would be good to implement a reference tracking algorithm here so we could # garbage collection results that are no longer needed by any steps # https://github.com/dagster-io/dagster/issues/811 with time_execution_scope() as timer_result: active_execution = execution_plan.start(retries=self.retries) active_iters = {} errors = {} term_events = {} stopping = False while (not stopping and not active_execution.is_complete) or active_iters: try: # start iterators while len(active_iters) < limit and not stopping: steps = active_execution.get_steps_to_execute( limit=(limit - len(active_iters))) if not steps: break for step in steps: step_context = pipeline_context.for_step(step) term_events[ step.key] = get_multiprocessing_context( ).Event() active_iters[ step.key] = execute_step_out_of_process( step_context, step, errors, term_events) # process active iterators empty_iters = [] for key, step_iter in active_iters.items(): try: event_or_none = next(step_iter) if event_or_none is None: continue else: yield event_or_none active_execution.handle_event(event_or_none) except StopIteration: empty_iters.append(key) # clear and mark complete finished iterators for key in empty_iters: del active_iters[key] if term_events[key].is_set(): stopping = True del term_events[key] active_execution.verify_complete(pipeline_context, key) # process skips from failures or uncovered inputs for event in active_execution.skipped_step_events_iterator( pipeline_context): yield event # In the very small chance that we get interrupted in this coordination section and not # polling the subprocesses for events - try to clean up gracefully except KeyboardInterrupt: yield DagsterEvent.engine_event( pipeline_context, 'Multiprocess engine: received KeyboardInterrupt - forwarding to active child processes', EngineEventData.interrupted(list(term_events.keys())), ) stopping = True for event in term_events.values(): event.set() errs = {pid: err for pid, err in errors.items() if err} if errs: raise DagsterSubprocessError( 'During multiprocess execution errors occurred in child processes:\n{error_list}' .format(error_list='\n'.join([ 'In process {pid}: {err}'.format(pid=pid, err=err.to_string()) for pid, err in errs.items() ])), subprocess_error_infos=list(errs.values()), ) yield DagsterEvent.engine_event( pipeline_context, 'Multiprocess engine: parent process exiting after {duration} (pid: {pid})' .format(duration=format_duration(timer_result.millis), pid=os.getpid()), event_specific_data=EngineEventData.multiprocess(os.getpid()), )
def execute(self, plan_context, execution_plan): check.inst_param(plan_context, "plan_context", PlanOrchestrationContext) check.inst_param(execution_plan, "execution_plan", ExecutionPlan) pipeline = plan_context.reconstructable_pipeline limit = self.max_concurrent yield DagsterEvent.engine_event( plan_context, "Executing steps using multiprocess executor: parent process (pid: {pid})" .format(pid=os.getpid()), event_specific_data=EngineEventData.multiprocess( os.getpid(), step_keys_to_execute=execution_plan.step_keys_to_execute), ) # It would be good to implement a reference tracking algorithm here so we could # garbage collect results that are no longer needed by any steps # https://github.com/dagster-io/dagster/issues/811 with time_execution_scope() as timer_result: with execution_plan.start( retry_mode=self.retries) as active_execution: active_iters = {} errors = {} term_events = {} stopping = False while (not stopping and not active_execution.is_complete) or active_iters: if active_execution.check_for_interrupts(): yield DagsterEvent.engine_event( plan_context, "Multiprocess executor: received termination signal - " "forwarding to active child processes", EngineEventData.interrupted( list(term_events.keys())), ) stopping = True active_execution.mark_interrupted() for key, event in term_events.items(): event.set() # start iterators while len(active_iters) < limit and not stopping: steps = active_execution.get_steps_to_execute( limit=(limit - len(active_iters))) if not steps: break for step in steps: step_context = plan_context.for_step(step) term_events[step.key] = multiprocessing.Event() active_iters[ step.key] = self.execute_step_out_of_process( pipeline, step_context, step, errors, term_events, active_execution.get_known_state(), ) # process active iterators empty_iters = [] for key, step_iter in active_iters.items(): try: event_or_none = next(step_iter) if event_or_none is None: continue else: yield event_or_none active_execution.handle_event(event_or_none) except ChildProcessCrashException as crash: serializable_error = serializable_error_info_from_exc_info( sys.exc_info()) yield DagsterEvent.engine_event( plan_context, ("Multiprocess executor: child process for step {step_key} " "unexpectedly exited with code {exit_code}" ).format(step_key=key, exit_code=crash.exit_code), EngineEventData.engine_error( serializable_error), step_handle=active_execution.get_step_by_key( key).handle, ) step_failure_event = DagsterEvent.step_failure_event( step_context=plan_context.for_step( active_execution.get_step_by_key(key)), step_failure_data=StepFailureData( error=serializable_error, user_failure_data=None), ) active_execution.handle_event(step_failure_event) yield step_failure_event empty_iters.append(key) except StopIteration: empty_iters.append(key) # clear and mark complete finished iterators for key in empty_iters: del active_iters[key] del term_events[key] active_execution.verify_complete(plan_context, key) # process skipped and abandoned steps yield from active_execution.plan_events_iterator( plan_context) errs = {pid: err for pid, err in errors.items() if err} # After termination starts, raise an interrupted exception once all subprocesses # have finished cleaning up (and the only errors were from being interrupted) if (stopping and (not active_iters) and all([ err_info.cls_name == "DagsterExecutionInterruptedError" for err_info in errs.values() ])): yield DagsterEvent.engine_event( plan_context, "Multiprocess executor: interrupted all active child processes", event_specific_data=EngineEventData(), ) raise DagsterExecutionInterruptedError() elif errs: raise DagsterSubprocessError( "During multiprocess execution errors occurred in child processes:\n{error_list}" .format(error_list="\n".join([ "In process {pid}: {err}".format( pid=pid, err=err.to_string()) for pid, err in errs.items() ])), subprocess_error_infos=list(errs.values()), ) yield DagsterEvent.engine_event( plan_context, "Multiprocess executor: parent process exiting after {duration} (pid: {pid})" .format(duration=format_duration(timer_result.millis), pid=os.getpid()), event_specific_data=EngineEventData.multiprocess(os.getpid()), )
def execute(pipeline_context, execution_plan): check.inst_param(pipeline_context, 'pipeline_context', SystemPipelineExecutionContext) check.inst_param(execution_plan, 'execution_plan', ExecutionPlan) step_key_set = set(step.key for step in execution_plan.execution_steps()) yield DagsterEvent.engine_event( pipeline_context, 'Executing steps in process (pid: {pid})'.format(pid=os.getpid()), event_specific_data=EngineEventData.in_process( os.getpid(), step_key_set), ) with time_execution_scope() as timer_result: check.param_invariant( isinstance(pipeline_context.executor_config, ExecutorConfig), 'pipeline_context', 'Expected executor_config to be ExecutorConfig got {}'.format( pipeline_context.executor_config), ) for event in copy_required_intermediates_for_execution( pipeline_context, execution_plan): yield event failed_or_skipped_steps = set() step_levels = execution_plan.execution_step_levels() # It would be good to implement a reference tracking algorithm here to # garbage collect results that are no longer needed by any steps # https://github.com/dagster-io/dagster/issues/811 for step_level in step_levels: for step in step_level: step_context = pipeline_context.for_step(step) with mirror_step_io(step_context): # capture all of the logs for this step failed_inputs = [] for step_input in step.step_inputs: failed_inputs.extend( failed_or_skipped_steps.intersection( step_input.dependency_keys)) if failed_inputs: step_context.log.info(( 'Dependencies for step {step} failed: {failed_inputs}. Not executing.' ).format(step=step.key, failed_inputs=failed_inputs)) failed_or_skipped_steps.add(step.key) yield DagsterEvent.step_skipped_event(step_context) continue uncovered_inputs = pipeline_context.intermediates_manager.uncovered_inputs( step_context, step) if uncovered_inputs: # In partial pipeline execution, we may end up here without having validated the # missing dependent outputs were optional _assert_missing_inputs_optional( uncovered_inputs, execution_plan, step.key) step_context.log.info(( 'Not all inputs covered for {step}. Not executing. Output missing for ' 'inputs: {uncovered_inputs}').format( uncovered_inputs=uncovered_inputs, step=step.key)) failed_or_skipped_steps.add(step.key) yield DagsterEvent.step_skipped_event(step_context) continue for step_event in check.generator( dagster_event_sequence_for_step(step_context)): check.inst(step_event, DagsterEvent) if step_event.is_step_failure: failed_or_skipped_steps.add(step.key) yield step_event yield DagsterEvent.engine_event( pipeline_context, 'Finished steps in process (pid: {pid}) in {duration_ms}'.format( pid=os.getpid(), duration_ms=format_duration(timer_result.millis)), event_specific_data=EngineEventData.in_process( os.getpid(), step_key_set), )