def core_dagster_event_sequence_for_step(step_context, prior_attempt_count): """ Execute the step within the step_context argument given the in-memory events. This function yields a sequence of DagsterEvents, but without catching any exceptions that have bubbled up during the computation of the step. """ check.inst_param(step_context, "step_context", SystemStepExecutionContext) check.int_param(prior_attempt_count, "prior_attempt_count") if prior_attempt_count > 0: yield DagsterEvent.step_restarted_event(step_context, prior_attempt_count) else: yield DagsterEvent.step_start_event(step_context) inputs = {} for input_name, input_value in _input_values_from_intermediate_storage( step_context): if isinstance(input_value, ObjectStoreOperation): yield DagsterEvent.object_store_operation( step_context, ObjectStoreOperation.serializable(input_value, value_name=input_name)) inputs[input_name] = input_value.obj elif isinstance(input_value, MultipleStepOutputsListWrapper): for op in input_value: if isinstance(input_value, ObjectStoreOperation): yield DagsterEvent.object_store_operation( step_context, ObjectStoreOperation.serializable( op, value_name=input_name)) elif isinstance(input_value, AssetStoreOperation): yield DagsterEvent.asset_store_operation( step_context, input_value) inputs[input_name] = [op.obj for op in input_value] elif isinstance(input_value, AssetStoreOperation): yield DagsterEvent.asset_store_operation(step_context, input_value) inputs[input_name] = input_value.obj else: inputs[input_name] = input_value for input_name, input_value in inputs.items(): for evt in check.generator( _type_checked_event_sequence_for_input(step_context, input_name, input_value)): yield evt with time_execution_scope() as timer_result: user_event_sequence = check.generator( _user_event_sequence_for_step_compute_fn(step_context, inputs)) # It is important for this loop to be indented within the # timer block above in order for time to be recorded accurately. for user_event in check.generator( _step_output_error_checked_user_event_sequence( step_context, user_event_sequence)): if isinstance(user_event, Output): for evt in _create_step_events_for_output( step_context, user_event): yield evt elif isinstance(user_event, (AssetMaterialization, Materialization)): yield DagsterEvent.step_materialization( step_context, user_event) elif isinstance(user_event, ExpectationResult): yield DagsterEvent.step_expectation_result( step_context, user_event) else: check.failed( "Unexpected event {event}, should have been caught earlier" .format(event=user_event)) yield DagsterEvent.step_success_event( step_context, StepSuccessData(duration_ms=timer_result.millis))
def _dagster_event_sequence_for_step(step_context, retries): ''' Yield a sequence of dagster events for the given step with the step context. This function also processes errors. It handles a few error cases: (1) User code requests to be retried: A RetryRequested has been raised. We will either put the step in to up_for_retry state or a failure state depending on the number of previous attempts and the max_retries on the received RetryRequested. (2) User code fails successfully: The user-space code has raised a Failure which may have explicit metadata attached. (3) User code fails unexpectedly: The user-space code has raised an Exception. It has been wrapped in an exception derived from DagsterUserCodeException. In that case the original user exc_info is stashed on the exception as the original_exc_info property. (4) User error: The framework raised a DagsterError that indicates a usage error or some other error not communicated by a user-thrown exception. For example, if the user yields an object out of a compute function that is not a proper event (not an Output, ExpectationResult, etc). (5) Framework failure or interrupt: An unexpected error occurred. This is a framework error. Either there has been an internal error in the framework OR we have forgotten to put a user code error boundary around invoked user-space code. These terminate the computation immediately (by re-raising). The "raised_dagster_errors" context manager can be used to force these errors to be re-raised and surfaced to the user. This is mostly to get sensible errors in test and ad-hoc contexts, rather than forcing the user to wade through the PipelineExecutionResult API in order to find the step that failed. For tools, however, this option should be false, and a sensible error message signaled to the user within that tool. ''' check.inst_param(step_context, 'step_context', SystemStepExecutionContext) check.inst_param(retries, 'retries', Retries) try: prior_attempt_count = retries.get_attempt_count(step_context.step.key) if step_context.step_launcher: step_events = step_context.step_launcher.launch_step( step_context, prior_attempt_count) else: step_events = core_dagster_event_sequence_for_step( step_context, prior_attempt_count) for step_event in check.generator(step_events): yield step_event # case (1) in top comment except RetryRequested as retry_request: retry_err_info = serializable_error_info_from_exc_info(sys.exc_info()) if retries.disabled: fail_err = SerializableErrorInfo( message='RetryRequested but retries are disabled', stack=retry_err_info.stack, cls_name=retry_err_info.cls_name, cause=retry_err_info.cause, ) yield DagsterEvent.step_failure_event( step_context=step_context, step_failure_data=StepFailureData(error=fail_err, user_failure_data=None), ) else: # retries.enabled or retries.deferred prev_attempts = retries.get_attempt_count(step_context.step.key) if prev_attempts >= retry_request.max_retries: fail_err = SerializableErrorInfo( message='Exceeded max_retries of {}'.format( retry_request.max_retries), stack=retry_err_info.stack, cls_name=retry_err_info.cls_name, cause=retry_err_info.cause, ) yield DagsterEvent.step_failure_event( step_context=step_context, step_failure_data=StepFailureData(error=fail_err, user_failure_data=None), ) else: yield DagsterEvent.step_retry_event( step_context, StepRetryData( error=retry_err_info, seconds_to_wait=retry_request.seconds_to_wait, ), ) # case (2) in top comment except Failure as failure: yield _step_failure_event_from_exc_info( step_context, sys.exc_info(), UserFailureData( label='intentional-failure', description=failure.description, metadata_entries=failure.metadata_entries, ), ) if step_context.raise_on_error: raise failure # case (3) in top comment except DagsterUserCodeExecutionError as dagster_user_error: yield _step_failure_event_from_exc_info( step_context, dagster_user_error.original_exc_info, ) if step_context.raise_on_error: raise dagster_user_error.user_exception # case (4) in top comment except DagsterError as dagster_error: yield _step_failure_event_from_exc_info(step_context, sys.exc_info()) if step_context.raise_on_error: raise dagster_error # case (5) in top comment except (Exception, KeyboardInterrupt) as unexpected_exception: # pylint: disable=broad-except yield _step_failure_event_from_exc_info(step_context, sys.exc_info()) raise unexpected_exception
def core_dagster_event_sequence_for_step(step_context, prior_attempt_count): """ Execute the step within the step_context argument given the in-memory events. This function yields a sequence of DagsterEvents, but without catching any exceptions that have bubbled up during the computation of the step. """ check.inst_param(step_context, "step_context", SystemStepExecutionContext) check.int_param(prior_attempt_count, "prior_attempt_count") if prior_attempt_count > 0: yield DagsterEvent.step_restarted_event(step_context, prior_attempt_count) else: yield DagsterEvent.step_start_event(step_context) inputs = {} for input_name, input_value in _load_input_values(step_context): # TODO yuhan retire ObjectStoreOperation https://github.com/dagster-io/dagster/issues/3043 if isinstance(input_value, ObjectStoreOperation): yield DagsterEvent.object_store_operation( step_context, ObjectStoreOperation.serializable(input_value, value_name=input_name)) inputs[input_name] = input_value.obj elif isinstance(input_value, FanInStepInputValuesWrapper): final_values = [] for inner_value in input_value: # inner value is either a store interaction # TODO yuhan retire ObjectStoreOperation https://github.com/dagster-io/dagster/issues/3043 if isinstance(inner_value, ObjectStoreOperation): yield DagsterEvent.object_store_operation( step_context, ObjectStoreOperation.serializable( inner_value, value_name=input_name), ) final_values.append(inner_value.obj) elif isinstance(inner_value, AssetStoreOperation): yield DagsterEvent.asset_store_operation( step_context, inner_value) final_values.append(inner_value.obj) # or the value directly else: final_values.append(inner_value) inputs[input_name] = final_values elif isinstance(input_value, AssetStoreOperation): yield DagsterEvent.asset_store_operation(step_context, input_value) inputs[input_name] = input_value.obj else: inputs[input_name] = input_value for input_name, input_value in inputs.items(): for evt in check.generator( _type_checked_event_sequence_for_input(step_context, input_name, input_value)): yield evt with time_execution_scope() as timer_result: user_event_sequence = check.generator( _user_event_sequence_for_step_compute_fn(step_context, inputs)) # It is important for this loop to be indented within the # timer block above in order for time to be recorded accurately. for user_event in check.generator( _step_output_error_checked_user_event_sequence( step_context, user_event_sequence)): if isinstance(user_event, (Output, DynamicOutput)): for evt in _create_step_events_for_output( step_context, user_event): yield evt elif isinstance(user_event, (AssetMaterialization, Materialization)): yield DagsterEvent.step_materialization( step_context, user_event) elif isinstance(user_event, ExpectationResult): yield DagsterEvent.step_expectation_result( step_context, user_event) else: check.failed( "Unexpected event {event}, should have been caught earlier" .format(event=user_event)) yield DagsterEvent.step_success_event( step_context, StepSuccessData(duration_ms=timer_result.millis))
def core_dagster_event_sequence_for_step( step_context: SystemStepExecutionContext, prior_attempt_count: int) -> Iterator[DagsterEvent]: """ Execute the step within the step_context argument given the in-memory events. This function yields a sequence of DagsterEvents, but without catching any exceptions that have bubbled up during the computation of the step. """ check.inst_param(step_context, "step_context", SystemStepExecutionContext) check.int_param(prior_attempt_count, "prior_attempt_count") if prior_attempt_count > 0: yield DagsterEvent.step_restarted_event(step_context, prior_attempt_count) else: yield DagsterEvent.step_start_event(step_context) inputs = {} for step_input in step_context.step.step_inputs: if step_input.dagster_type.kind == DagsterTypeKind.NOTHING: continue for event_or_input_value in ensure_gen( step_input.source.load_input_object(step_context)): if isinstance(event_or_input_value, DagsterEvent): yield event_or_input_value else: check.invariant(step_input.name not in inputs) inputs[step_input.name] = event_or_input_value for input_name, input_value in inputs.items(): for evt in check.generator( _type_checked_event_sequence_for_input(step_context, input_name, input_value)): yield evt with time_execution_scope() as timer_result: user_event_sequence = check.generator( _user_event_sequence_for_step_compute_fn(step_context, inputs)) # It is important for this loop to be indented within the # timer block above in order for time to be recorded accurately. for user_event in check.generator( _step_output_error_checked_user_event_sequence( step_context, user_event_sequence)): if isinstance(user_event, (Output, DynamicOutput)): for evt in _create_step_events_for_output( step_context, user_event): yield evt elif isinstance(user_event, (AssetMaterialization, Materialization)): yield DagsterEvent.step_materialization( step_context, user_event) elif isinstance(user_event, ExpectationResult): yield DagsterEvent.step_expectation_result( step_context, user_event) else: check.failed( "Unexpected event {event}, should have been caught earlier" .format(event=user_event)) yield DagsterEvent.step_success_event( step_context, StepSuccessData(duration_ms=timer_result.millis))
def inner_plan_execution_iterator( pipeline_context: PlanExecutionContext, execution_plan: ExecutionPlan ) -> Iterator[DagsterEvent]: check.inst_param(pipeline_context, "pipeline_context", PlanExecutionContext) check.inst_param(execution_plan, "execution_plan", ExecutionPlan) with execution_plan.start(retry_mode=pipeline_context.retry_mode) as active_execution: # It would be good to implement a reference tracking algorithm here to # garbage collect results that are no longer needed by any steps # https://github.com/dagster-io/dagster/issues/811 while not active_execution.is_complete: step = active_execution.get_next_step() step_context = cast( StepExecutionContext, pipeline_context.for_step( step, active_execution.retry_state.get_attempt_count(step.key) ), ) step_event_list = [] missing_resources = [ resource_key for resource_key in step_context.required_resource_keys if not hasattr(step_context.resources, resource_key) ] check.invariant( len(missing_resources) == 0, ( "Expected step context for solid {solid_name} to have all required resources, but " "missing {missing_resources}." ).format(solid_name=step_context.solid.name, missing_resources=missing_resources), ) # capture all of the logs for this step with ExitStack() as stack: log_capture_error = None try: stack.enter_context( pipeline_context.instance.compute_log_manager.watch( step_context.pipeline_run, step_context.step.key ) ) except Exception as e: log_capture_error = e logging.exception( "Exception while setting up compute log capture for step %s in run %s: %s", step_context.step.key, step_context.pipeline_run.run_id, e, ) if not log_capture_error: yield DagsterEvent.capture_logs( step_context, log_key=step_context.step.key, steps=[step_context.step] ) for step_event in check.generator(_dagster_event_sequence_for_step(step_context)): check.inst(step_event, DagsterEvent) step_event_list.append(step_event) yield step_event active_execution.handle_event(step_event) active_execution.verify_complete(pipeline_context, step.key) try: stack.close() except Exception as e: logging.exception( "Exception while cleaning up compute log capture for step %s in run %s: %s", step_context.step.key, step_context.pipeline_run.run_id, e, ) # process skips from failures or uncovered inputs for event in active_execution.plan_events_iterator(pipeline_context): step_event_list.append(event) yield event # pass a list of step events to hooks for hook_event in _trigger_hook(step_context, step_event_list): yield hook_event
def _dagster_event_sequence_for_step(step_context: StepExecutionContext) -> Iterator[DagsterEvent]: """ Yield a sequence of dagster events for the given step with the step context. This function also processes errors. It handles a few error cases: (1) User code requests to be retried: A RetryRequested has been raised. We will either put the step in to up_for_retry state or a failure state depending on the number of previous attempts and the max_retries on the received RetryRequested. (2) User code fails successfully: The user-space code has raised a Failure which may have explicit metadata attached. (3) User code fails unexpectedly: The user-space code has raised an Exception. It has been wrapped in an exception derived from DagsterUserCodeException. In that case the original user exc_info is stashed on the exception as the original_exc_info property. (4) Execution interrupted: The run was interrupted in the middle of execution (typically by a termination request). (5) User error: The framework raised a DagsterError that indicates a usage error or some other error not communicated by a user-thrown exception. For example, if the user yields an object out of a compute function that is not a proper event (not an Output, ExpectationResult, etc). (6) Framework failure: An unexpected error occurred. This is a framework error. Either there has been an internal error in the framework OR we have forgotten to put a user code error boundary around invoked user-space code. These terminate the computation immediately (by re-raising). The "raised_dagster_errors" context manager can be used to force these errors to be re-raised and surfaced to the user. This is mostly to get sensible errors in test and ad-hoc contexts, rather than forcing the user to wade through the PipelineExecutionResult API in order to find the step that failed. For tools, however, this option should be false, and a sensible error message signaled to the user within that tool. """ check.inst_param(step_context, "step_context", StepExecutionContext) try: if step_context.step_launcher: # info all on step_context - should deprecate second arg step_events = step_context.step_launcher.launch_step( step_context, step_context.previous_attempt_count ) else: step_events = core_dagster_event_sequence_for_step(step_context) for step_event in check.generator(step_events): yield step_event # case (1) in top comment except RetryRequested as retry_request: retry_err_info = serializable_error_info_from_exc_info(sys.exc_info()) if step_context.retry_mode.disabled: fail_err = SerializableErrorInfo( message="RetryRequested but retries are disabled", stack=retry_err_info.stack, cls_name=retry_err_info.cls_name, cause=retry_err_info.cause, ) step_context.capture_step_exception(retry_request) yield DagsterEvent.step_failure_event( step_context=step_context, step_failure_data=StepFailureData(error=fail_err, user_failure_data=None), ) else: # retries.enabled or retries.deferred prev_attempts = step_context.previous_attempt_count if prev_attempts >= retry_request.max_retries: fail_err = SerializableErrorInfo( message="Exceeded max_retries of {}".format(retry_request.max_retries), stack=retry_err_info.stack, cls_name=retry_err_info.cls_name, cause=retry_err_info.cause, ) step_context.capture_step_exception(retry_request) yield DagsterEvent.step_failure_event( step_context=step_context, step_failure_data=StepFailureData(error=fail_err, user_failure_data=None), ) else: yield DagsterEvent.step_retry_event( step_context, StepRetryData( error=retry_err_info, seconds_to_wait=retry_request.seconds_to_wait, ), ) # case (2) in top comment except Failure as failure: step_context.capture_step_exception(failure) yield step_failure_event_from_exc_info( step_context, sys.exc_info(), UserFailureData( label="intentional-failure", description=failure.description, metadata_entries=failure.metadata_entries, ), ) if step_context.raise_on_error: raise failure # case (3) in top comment except DagsterUserCodeExecutionError as dagster_user_error: step_context.capture_step_exception(dagster_user_error.user_exception) yield step_failure_event_from_exc_info( step_context, sys.exc_info(), error_source=ErrorSource.USER_CODE_ERROR, ) if step_context.raise_on_error: raise dagster_user_error.user_exception # case (4) in top comment except (KeyboardInterrupt, DagsterExecutionInterruptedError) as interrupt_error: step_context.capture_step_exception(interrupt_error) yield step_failure_event_from_exc_info( step_context, sys.exc_info(), error_source=ErrorSource.INTERRUPT, ) raise interrupt_error # case (5) in top comment except DagsterError as dagster_error: step_context.capture_step_exception(dagster_error) yield step_failure_event_from_exc_info( step_context, sys.exc_info(), error_source=ErrorSource.FRAMEWORK_ERROR, ) if step_context.raise_on_error: raise dagster_error # case (6) in top comment except BaseException as unexpected_exception: step_context.capture_step_exception(unexpected_exception) yield step_failure_event_from_exc_info( step_context, sys.exc_info(), error_source=ErrorSource.UNEXPECTED_ERROR, ) raise unexpected_exception
def execute(pipeline_context, execution_plan, step_keys_to_execute=None): check.inst_param(pipeline_context, 'pipeline_context', SystemPipelineExecutionContext) check.inst_param(execution_plan, 'execution_plan', ExecutionPlan) check.opt_list_param(step_keys_to_execute, 'step_keys_to_execute', of_type=str) step_key_set = None if step_keys_to_execute is None else set( step_keys_to_execute) check.param_invariant( isinstance(pipeline_context.executor_config, ExecutorConfig), 'pipeline_context', 'Expected executor_config to be ExecutorConfig got {}'.format( pipeline_context.executor_config), ) failed_or_skipped_steps = set() step_levels = execution_plan.topological_step_levels() # It would be good to implement a reference tracking algorithm here so we could # garbage collection results that are no longer needed by any steps # https://github.com/dagster-io/dagster/issues/811 for step_level in step_levels: for step in step_level: if step_key_set and step.key not in step_key_set: continue step_context = pipeline_context.for_step(step) failed_inputs = [ step_input.prev_output_handle.step_key for step_input in step.step_inputs if step_input.is_from_output and step_input. prev_output_handle.step_key in failed_or_skipped_steps ] if failed_inputs: step_context.log.info(( 'Dependencies for step {step} failed: {failed_inputs}. Not executing.' ).format(step=step.key, failed_inputs=failed_inputs)) failed_or_skipped_steps.add(step.key) yield DagsterEvent.step_skipped_event(step_context) continue uncovered_inputs = pipeline_context.intermediates_manager.uncovered_inputs( step_context, step) if uncovered_inputs: # In partial pipeline execution, we may end up here without having validated the # missing dependent outputs were optional _assert_missing_inputs_optional(uncovered_inputs, execution_plan, step.key) step_context.log.info(( 'Not all inputs covered for {step}. Not executing. Output missing for ' 'inputs: {uncovered_inputs}').format( uncovered_inputs=uncovered_inputs, step=step.key)) failed_or_skipped_steps.add(step.key) yield DagsterEvent.step_skipped_event(step_context) continue for step_event in check.generator( dagster_event_sequence_for_step(step_context)): check.inst(step_event, DagsterEvent) if step_event.is_step_failure: failed_or_skipped_steps.add(step.key) yield step_event
def dagster_event_sequence_for_step(step_context): ''' Yield a sequence of dagster events for the given step with the step context. Thie function also processes errors. It handles a few error cases: (1) The user-space code has raised an Exception. It has been wrapped in an exception derived from DagsterUserCodeException. In that case the original user exc_info is stashed on the exception as the original_exc_info property. Examples of this are computations with the compute_fn, and type checks. If the user has raised an intentional error via throwing Failure, they can also optionally pass along explicit metadata attached to that Failure. (2) The framework raised a DagsterError that indicates a usage error or some other error not communicated by a user-thrown exception. For example, if the user yields an object out of a compute function that is not a proper event (not an Output, ExpectationResult, etc). (3) An unexpected error occured. This is a framework error. Either there has been an internal error in the framewore OR we have forgtten to put a user code error boundary around invoked user-space code. These terminate the computation immediately (by re-raising) even if raise_on_error is false. If the raise_on_error option is set to True, these errors are reraised and surfaced to the user. This is mostly to get sensible errors in test and ad-hoc contexts, rather than forcing the user to wade through the PipelineExecutionResult API in order to find the step that errored. For tools, however, this option should be false, and a sensible error message signaled to the user within that tool. ''' check.inst_param(step_context, 'step_context', SystemStepExecutionContext) try: for step_event in check.generator( _core_dagster_event_sequence_for_step(step_context)): yield step_event # case (1) in top comment except DagsterUserCodeExecutionError as dagster_user_error: # case (1) above yield _step_failure_event_from_exc_info( step_context, dagster_user_error.original_exc_info, UserFailureData( label='intentional-failure', description=dagster_user_error.user_specified_failure. description, metadata_entries=dagster_user_error.user_specified_failure. metadata_entries, ) if dagster_user_error.is_user_specified_failure else None, ) if step_context.executor_config.raise_on_error: raise dagster_user_error # case (2) in top comment except DagsterError as dagster_error: yield _step_failure_event_from_exc_info(step_context, sys.exc_info()) if step_context.executor_config.raise_on_error: raise dagster_error # case (3) in top comment except Exception as unexpected_exception: # pylint: disable=broad-except yield _step_failure_event_from_exc_info(step_context, sys.exc_info()) raise unexpected_exception
def inner_plan_execution_iterator(pipeline_context, execution_plan): check.inst_param(pipeline_context, "pipeline_context", SystemExecutionContext) check.inst_param(execution_plan, "execution_plan", ExecutionPlan) retries = pipeline_context.retries for event in copy_required_intermediates_for_execution( pipeline_context, execution_plan): yield event with execution_plan.start(retries=retries) as active_execution: # It would be good to implement a reference tracking algorithm here to # garbage collect results that are no longer needed by any steps # https://github.com/dagster-io/dagster/issues/811 while not active_execution.is_complete: step = active_execution.get_next_step() step_context = pipeline_context.for_step(step) step_event_list = [] missing_resources = [ resource_key for resource_key in step_context.required_resource_keys if not hasattr(step_context.resources, resource_key) ] check.invariant( len(missing_resources) == 0, ("Expected step context for solid {solid_name} to have all required resources, but " "missing {missing_resources}.").format( solid_name=step_context.solid.name, missing_resources=missing_resources), ) with pipeline_context.instance.compute_log_manager.watch( step_context.pipeline_run, step_context.step.key): # capture all of the logs for this step uncovered_inputs = pipeline_context.intermediate_storage.uncovered_inputs( step_context, step) if uncovered_inputs: # In partial pipeline execution, we may end up here without having validated the # missing dependent outputs were optional _assert_missing_inputs_optional(uncovered_inputs, execution_plan, step.key) step_context.log.info(( "Not all inputs covered for {step}. Not executing. Output missing for " "inputs: {uncovered_inputs}").format( uncovered_inputs=uncovered_inputs, step=step.key)) step_event = DagsterEvent.step_skipped_event(step_context) step_event_list.append(step_event) yield step_event active_execution.mark_skipped(step.key) else: for step_event in check.generator( _dagster_event_sequence_for_step( step_context, retries)): check.inst(step_event, DagsterEvent) step_event_list.append(step_event) yield step_event active_execution.handle_event(step_event) active_execution.verify_complete(pipeline_context, step.key) # process skips from failures or uncovered inputs for event in active_execution.plan_events_iterator( pipeline_context): step_event_list.append(event) yield event # pass a list of step events to hooks for hook_event in _trigger_hook(step_context, step_event_list): yield hook_event