def _execute_step_command_body(args: ExecuteStepArgs, instance: DagsterInstance, pipeline_run: PipelineRun): single_step_key = (args.step_keys_to_execute[0] if args.step_keys_to_execute and len(args.step_keys_to_execute) == 1 else None) try: check.inst( pipeline_run, PipelineRun, "Pipeline run with id '{}' not found for step execution".format( args.pipeline_run_id), ) if args.should_verify_step: success = verify_step( instance, pipeline_run, check.not_none(args.known_state).get_retry_state(), args.step_keys_to_execute, ) if not success: return recon_pipeline = recon_pipeline_from_origin( args.pipeline_origin).subset_for_execution_from_existing_pipeline( pipeline_run.solids_to_execute) execution_plan = create_execution_plan( recon_pipeline, run_config=pipeline_run.run_config, step_keys_to_execute=args.step_keys_to_execute, mode=pipeline_run.mode, known_state=args.known_state, ) yield from execute_plan_iterator( execution_plan, recon_pipeline, pipeline_run, instance, run_config=pipeline_run.run_config, retry_mode=args.retry_mode, ) except (KeyboardInterrupt, DagsterExecutionInterruptedError): yield instance.report_engine_event( message="Step execution terminated by interrupt", pipeline_run=pipeline_run, step_key=single_step_key, ) raise except Exception: yield instance.report_engine_event( "An exception was thrown during step execution that is likely a framework error, rather than an error in user code.", pipeline_run, EngineEventData.engine_error( serializable_error_info_from_exc_info(sys.exc_info())), step_key=single_step_key, ) raise
def execute_run_host_mode( pipeline: ReconstructablePipeline, pipeline_run: PipelineRun, instance: DagsterInstance, executor_defs: Optional[List[ExecutorDefinition]] = None, raise_on_error: bool = False, ): check.inst_param(pipeline, "pipeline", ReconstructablePipeline) check.inst_param(pipeline_run, "pipeline_run", PipelineRun) check.inst_param(instance, "instance", DagsterInstance) check.opt_list_param(executor_defs, "executor_defs", of_type=ExecutorDefinition) executor_defs = executor_defs if executor_defs != None else default_executors if pipeline_run.status == PipelineRunStatus.CANCELED: message = "Not starting execution since the run was canceled before execution could start" instance.report_engine_event( message, pipeline_run, ) raise DagsterInvariantViolationError(message) check.invariant( pipeline_run.status == PipelineRunStatus.NOT_STARTED or pipeline_run.status == PipelineRunStatus.STARTING, desc="Pipeline run {} ({}) in state {}, expected NOT_STARTED or STARTING" .format(pipeline_run.pipeline_name, pipeline_run.run_id, pipeline_run.status), ) if pipeline_run.solids_to_execute: pipeline = pipeline.subset_for_execution_from_existing_pipeline( frozenset(pipeline_run.solids_to_execute)) execution_plan_snapshot = instance.get_execution_plan_snapshot( pipeline_run.execution_plan_snapshot_id) execution_plan = ExecutionPlan.rebuild_from_snapshot( pipeline_run.pipeline_name, execution_plan_snapshot, ) _execute_run_iterable = ExecuteRunWithPlanIterable( execution_plan=execution_plan, iterator=pipeline_execution_iterator, execution_context_manager=PlanOrchestrationContextManager( context_event_generator=host_mode_execution_context_event_generator, pipeline=pipeline, execution_plan=execution_plan, run_config=pipeline_run.run_config, pipeline_run=pipeline_run, instance=instance, raise_on_error=raise_on_error, executor_defs=executor_defs, output_capture=None, ), ) event_list = list(_execute_run_iterable) return event_list
def execute_run_host_mode( pipeline: ReconstructablePipeline, pipeline_run: PipelineRun, instance: DagsterInstance, get_executor_def_fn: Callable[[Optional[str]], ExecutorDefinition] = None, raise_on_error: bool = False, ): check.inst_param(pipeline, "pipeline", ReconstructablePipeline) check.inst_param(pipeline_run, "pipeline_run", PipelineRun) check.inst_param(instance, "instance", DagsterInstance) check.opt_callable_param(get_executor_def_fn, "get_executor_def_fn") if pipeline_run.status == PipelineRunStatus.CANCELED: message = "Not starting execution since the run was canceled before execution could start" instance.report_engine_event( message, pipeline_run, ) raise DagsterInvariantViolationError(message) check.invariant( pipeline_run.status == PipelineRunStatus.NOT_STARTED or pipeline_run.status == PipelineRunStatus.STARTING, desc="Pipeline run {} ({}) in state {}, expected NOT_STARTED or STARTING" .format(pipeline_run.pipeline_name, pipeline_run.run_id, pipeline_run.status), ) if pipeline_run.solids_to_execute: pipeline = pipeline.subset_for_execution_from_existing_pipeline( pipeline_run.solids_to_execute) execution_plan_snapshot = instance.get_execution_plan_snapshot( pipeline_run.execution_plan_snapshot_id) execution_plan = ExecutionPlan.rebuild_from_snapshot( pipeline_run.pipeline_name, execution_plan_snapshot, ) _execute_run_iterable = ExecuteRunWithPlanIterable( execution_plan=execution_plan, iterator=pipeline_execution_iterator, execution_context_manager=HostModeRunWorkerExecutionContextManager( execution_plan=execution_plan, recon_pipeline=pipeline, pipeline_run=pipeline_run, instance=instance, run_config=pipeline_run.run_config, get_executor_def_fn=get_executor_def_fn, raise_on_error=raise_on_error, ), ) event_list = list(_execute_run_iterable) return event_list
def core_execute_run( recon_pipeline: ReconstructablePipeline, pipeline_run: PipelineRun, instance: DagsterInstance, resume_from_failure: bool = False, ) -> Generator[DagsterEvent, None, None]: check.inst_param(recon_pipeline, "recon_pipeline", ReconstructablePipeline) check.inst_param(pipeline_run, "pipeline_run", PipelineRun) check.inst_param(instance, "instance", DagsterInstance) # try to load the pipeline definition early try: recon_pipeline.get_definition() except Exception: yield instance.report_engine_event( "Could not load pipeline definition.", pipeline_run, EngineEventData.engine_error( serializable_error_info_from_exc_info(sys.exc_info())), ) yield from _report_run_failed_if_not_finished(instance, pipeline_run.run_id) raise try: yield from execute_run_iterator( recon_pipeline, pipeline_run, instance, resume_from_failure=resume_from_failure) except (KeyboardInterrupt, DagsterExecutionInterruptedError): yield from _report_run_failed_if_not_finished(instance, pipeline_run.run_id) yield instance.report_engine_event( message="Run execution terminated by interrupt", pipeline_run=pipeline_run, ) raise except Exception: yield instance.report_engine_event( "An exception was thrown during execution that is likely a framework error, " "rather than an error in user code.", pipeline_run, EngineEventData.engine_error( serializable_error_info_from_exc_info(sys.exc_info())), ) yield from _report_run_failed_if_not_finished(instance, pipeline_run.run_id) raise
def _resume_run_command_body( recon_pipeline: ReconstructablePipeline, pipeline_run_id: Optional[str], instance: DagsterInstance, write_stream_fn: Callable[[DagsterEvent], Any], set_exit_code_on_failure: bool, ): if instance.should_start_background_run_thread: cancellation_thread, cancellation_thread_shutdown_event = start_run_cancellation_thread( instance, pipeline_run_id) pipeline_run = instance.get_run_by_id(pipeline_run_id) check.inst( pipeline_run, PipelineRun, "Pipeline run with id '{}' not found for run execution.".format( pipeline_run_id), ) pid = os.getpid() instance.report_engine_event( "Started process for resuming pipeline (pid: {pid}).".format(pid=pid), pipeline_run, EngineEventData.in_process(pid, marker_end="cli_api_subprocess_init"), ) run_worker_failed = False try: for event in core_execute_run( recon_pipeline, pipeline_run, instance, resume_from_failure=True, ): write_stream_fn(event) if event.event_type == DagsterEventType.PIPELINE_FAILURE: run_worker_failed = True except: # relies on core_execute_run writing failures to the event log before raising run_worker_failed = True finally: if instance.should_start_background_run_thread: cancellation_thread_shutdown_event.set() if cancellation_thread.is_alive(): cancellation_thread.join(timeout=15) if cancellation_thread.is_alive(): instance.report_engine_event( "Cancellation thread did not shutdown gracefully", pipeline_run, ) instance.report_engine_event( "Process for pipeline exited (pid: {pid}).".format(pid=pid), pipeline_run, ) return 1 if (run_worker_failed and set_exit_code_on_failure) else 0
def execute_run( pipeline: IPipeline, pipeline_run: PipelineRun, instance: DagsterInstance, raise_on_error: bool = False, ) -> PipelineExecutionResult: """Executes an existing pipeline run synchronously. Synchronous version of execute_run_iterator. Args: pipeline (IPipeline): The pipeline to execute. pipeline_run (PipelineRun): The run to execute instance (DagsterInstance): The instance in which the run has been created. raise_on_error (Optional[bool]): Whether or not to raise exceptions when they occur. Defaults to ``False``. Returns: PipelineExecutionResult: The result of the execution. """ if isinstance(pipeline, PipelineDefinition): raise DagsterInvariantViolationError( "execute_run requires an IPipeline but received a PipelineDefinition " "directly instead. To support hand-off to other processes provide a " "ReconstructablePipeline which can be done using reconstructable(). For in " "process only execution you can use InMemoryPipeline.") check.inst_param(pipeline, "pipeline", IPipeline) check.inst_param(pipeline_run, "pipeline_run", PipelineRun) check.inst_param(instance, "instance", DagsterInstance) if pipeline_run.status == PipelineRunStatus.CANCELED: message = "Not starting execution since the run was canceled before execution could start" instance.report_engine_event( message, pipeline_run, ) raise DagsterInvariantViolationError(message) check.invariant( pipeline_run.status == PipelineRunStatus.NOT_STARTED or pipeline_run.status == PipelineRunStatus.STARTING, desc="Pipeline run {} ({}) in state {}, expected NOT_STARTED or STARTING" .format(pipeline_run.pipeline_name, pipeline_run.run_id, pipeline_run.status), ) pipeline_def = pipeline.get_definition() if pipeline_run.solids_to_execute: if isinstance(pipeline_def, PipelineSubsetDefinition): check.invariant( pipeline_run.solids_to_execute == pipeline.solids_to_execute, "Cannot execute PipelineRun with solids_to_execute {solids_to_execute} that " "conflicts with pipeline subset {pipeline_solids_to_execute}.". format( pipeline_solids_to_execute=str_format_set( pipeline.solids_to_execute), solids_to_execute=str_format_set( pipeline_run.solids_to_execute), ), ) else: # when `execute_run` is directly called, the sub pipeline hasn't been created # note that when we receive the solids to execute via PipelineRun, it won't support # solid selection query syntax pipeline = pipeline.subset_for_execution_from_existing_pipeline( pipeline_run.solids_to_execute) execution_plan = _get_execution_plan_from_run(pipeline, pipeline_run, instance) if is_memoized_run(pipeline_run.tags): resolved_run_config = ResolvedRunConfig.build( pipeline.get_definition(), pipeline_run.run_config, pipeline_run.mode) execution_plan = resolve_memoized_execution_plan( execution_plan, pipeline.get_definition(), pipeline_run.run_config, instance, resolved_run_config, ) output_capture: Optional[Dict[StepOutputHandle, Any]] = {} _execute_run_iterable = ExecuteRunWithPlanIterable( execution_plan=execution_plan, iterator=pipeline_execution_iterator, execution_context_manager=PlanOrchestrationContextManager( context_event_generator=orchestration_context_event_generator, pipeline=pipeline, execution_plan=execution_plan, pipeline_run=pipeline_run, instance=instance, run_config=pipeline_run.run_config, raise_on_error=raise_on_error, executor_defs=None, output_capture=output_capture, ), ) event_list = list(_execute_run_iterable) return PipelineExecutionResult( pipeline.get_definition(), pipeline_run.run_id, event_list, lambda: scoped_pipeline_context( execution_plan, pipeline, pipeline_run.run_config, pipeline_run, instance, ), output_capture=output_capture, )
def execute_run( pipeline: IPipeline, pipeline_run: PipelineRun, instance: DagsterInstance, raise_on_error: bool = False, ) -> PipelineExecutionResult: """Executes an existing pipeline run synchronously. Synchronous version of execute_run_iterator. Args: pipeline (IPipeline): The pipeline to execute. pipeline_run (PipelineRun): The run to execute instance (DagsterInstance): The instance in which the run has been created. raise_on_error (Optional[bool]): Whether or not to raise exceptions when they occur. Defaults to ``False``. Returns: PipelineExecutionResult: The result of the execution. """ if isinstance(pipeline, PipelineDefinition): raise DagsterInvariantViolationError( "execute_run requires an IPipeline but received a PipelineDefinition " "directly instead. To support hand-off to other processes provide a " "ReconstructablePipeline which can be done using reconstructable(). For in " "process only execution you can use InMemoryPipeline.") check.inst_param(pipeline, "pipeline", IPipeline) check.inst_param(pipeline_run, "pipeline_run", PipelineRun) check.inst_param(instance, "instance", DagsterInstance) if pipeline_run.status == PipelineRunStatus.CANCELED: message = "Not starting execution since the run was canceled before execution could start" instance.report_engine_event( message, pipeline_run, ) raise DagsterInvariantViolationError(message) check.invariant( pipeline_run.status == PipelineRunStatus.NOT_STARTED or pipeline_run.status == PipelineRunStatus.STARTING, desc="Pipeline run {} ({}) in state {}, expected NOT_STARTED or STARTING" .format(pipeline_run.pipeline_name, pipeline_run.run_id, pipeline_run.status), ) pipeline_def = pipeline.get_definition() if pipeline_run.solids_to_execute: if isinstance(pipeline_def, PipelineSubsetDefinition): check.invariant( pipeline_run.solids_to_execute == pipeline.solids_to_execute, "Cannot execute PipelineRun with solids_to_execute {solids_to_execute} that " "conflicts with pipeline subset {pipeline_solids_to_execute}.". format( pipeline_solids_to_execute=str_format_set( pipeline.solids_to_execute), solids_to_execute=str_format_set( pipeline_run.solids_to_execute), ), ) else: # when `execute_run` is directly called, the sub pipeline hasn't been created # note that when we receive the solids to execute via PipelineRun, it won't support # solid selection query syntax pipeline = pipeline.subset_for_execution_from_existing_pipeline( pipeline_run.solids_to_execute) execution_plan = create_execution_plan( pipeline, run_config=pipeline_run.run_config, mode=pipeline_run.mode, step_keys_to_execute=pipeline_run.step_keys_to_execute, ) if is_memoized_run(pipeline_run.tags): execution_plan = resolve_memoized_execution_plan(execution_plan) _execute_run_iterable = _ExecuteRunWithPlanIterable( execution_plan=execution_plan, iterator=_pipeline_execution_iterator, execution_context_manager=PipelineExecutionContextManager( execution_plan=execution_plan, pipeline_run=pipeline_run, instance=instance, run_config=pipeline_run.run_config, raise_on_error=raise_on_error, ), ) event_list = list(_execute_run_iterable) pipeline_context = _execute_run_iterable.pipeline_context # workaround for mem_io_manager to work in reconstruct_context, e.g. result.result_for_solid # in-memory values dict will get lost when the resource is re-initiated in reconstruct_context # so instead of re-initiating every single resource, we pass the resource instances to # reconstruct_context directly to avoid re-building from resource def. resource_instances_to_override = {} if pipeline_context: # None if we have a pipeline failure for ( key, resource_instance, ) in pipeline_context.scoped_resources_builder.resource_instance_dict.items( ): if isinstance(resource_instance, InMemoryIOManager): resource_instances_to_override[key] = resource_instance return PipelineExecutionResult( pipeline.get_definition(), pipeline_run.run_id, event_list, lambda hardcoded_resources_arg: scoped_pipeline_context( execution_plan, pipeline_run.run_config, pipeline_run, instance, intermediate_storage=pipeline_context.intermediate_storage, resource_instances_to_override=hardcoded_resources_arg, ), resource_instances_to_override=resource_instances_to_override, )