def _core_dagster_event_sequence_for_step(step_context): ''' Execute the step within the step_context argument given the in-memory events. This function yields a sequence of DagsterEvents, but without catching any exceptions that have bubbled up during the computation of the step. ''' check.inst_param(step_context, 'step_context', SystemStepExecutionContext) yield DagsterEvent.step_start_event(step_context) inputs = {} for input_name, input_value in _input_values_from_intermediates_manager( step_context).items(): if isinstance(input_value, ObjectStoreOperation): yield DagsterEvent.object_store_operation( step_context, ObjectStoreOperation.serializable(input_value, value_name=input_name)) inputs[input_name] = input_value.obj else: inputs[input_name] = input_value for input_name, input_value in inputs.items(): for evt in check.generator( _type_checked_event_sequence_for_input(step_context, input_name, input_value)): yield evt with time_execution_scope() as timer_result: user_event_sequence = check.generator( _user_event_sequence_for_step_compute_fn(step_context, inputs)) # It is important for this loop to be indented within the # timer block above in order for time to be recorded accurately. for user_event in check.generator( _step_output_error_checked_user_event_sequence( step_context, user_event_sequence)): if isinstance(user_event, Output): for evt in _create_step_events_for_output( step_context, user_event): yield evt elif isinstance(user_event, Materialization): yield DagsterEvent.step_materialization( step_context, user_event) elif isinstance(user_event, ExpectationResult): yield DagsterEvent.step_expectation_result( step_context, user_event) else: check.failed( 'Unexpected event {event}, should have been caught earlier' .format(event=user_event)) yield DagsterEvent.step_success_event( step_context, StepSuccessData(duration_ms=timer_result.millis))
def _set_intermediates(step_context, step_output, step_output_handle, output): res = step_context.intermediates_manager.set_intermediate( context=step_context, runtime_type=step_output.runtime_type, step_output_handle=step_output_handle, value=output.value, ) if isinstance(res, ObjectStoreOperation): yield DagsterEvent.object_store_operation( step_context, ObjectStoreOperation.serializable(res, value_name=output.output_name) )
def copy_required_intermediates_for_execution(pipeline_context, execution_plan): ''' Uses the intermediates manager to copy intermediates from the previous run that apply to the current execution plan, and yields the corresponding events ''' check.inst_param(pipeline_context, 'pipeline_context', SystemPipelineExecutionContext) check.inst_param(execution_plan, 'execution_plan', ExecutionPlan) parent_run_id = pipeline_context.pipeline_run.parent_run_id if not parent_run_id: return parent_run_logs = pipeline_context.instance.all_logs(parent_run_id) output_handles_for_current_run = output_handles_from_execution_plan( execution_plan) output_handles_from_previous_run = output_handles_from_event_logs( parent_run_logs) output_handles_to_copy = output_handles_for_current_run.intersection( output_handles_from_previous_run) output_handles_to_copy_by_step = defaultdict(list) for handle in output_handles_to_copy: output_handles_to_copy_by_step[handle.step_key].append(handle) intermediates_manager = pipeline_context.intermediates_manager for step in execution_plan.topological_steps(): step_context = pipeline_context.for_step(step) for handle in output_handles_to_copy_by_step.get(step.key, []): if intermediates_manager.has_intermediate(pipeline_context, handle): continue operation = intermediates_manager.copy_intermediate_from_run( pipeline_context, parent_run_id, handle) yield DagsterEvent.object_store_operation( step_context, ObjectStoreOperation.serializable( operation, value_name=handle.output_name), )
def core_dagster_event_sequence_for_step(step_context, prior_attempt_count): """ Execute the step within the step_context argument given the in-memory events. This function yields a sequence of DagsterEvents, but without catching any exceptions that have bubbled up during the computation of the step. """ check.inst_param(step_context, "step_context", SystemStepExecutionContext) check.int_param(prior_attempt_count, "prior_attempt_count") if prior_attempt_count > 0: yield DagsterEvent.step_restarted_event(step_context, prior_attempt_count) else: yield DagsterEvent.step_start_event(step_context) inputs = {} for input_name, input_value in _input_values_from_intermediate_storage( step_context): if isinstance(input_value, ObjectStoreOperation): yield DagsterEvent.object_store_operation( step_context, ObjectStoreOperation.serializable(input_value, value_name=input_name)) inputs[input_name] = input_value.obj elif isinstance(input_value, MultipleStepOutputsListWrapper): for op in input_value: yield DagsterEvent.object_store_operation( step_context, ObjectStoreOperation.serializable(op, value_name=input_name)) inputs[input_name] = [op.obj for op in input_value] else: inputs[input_name] = input_value for input_name, input_value in inputs.items(): for evt in check.generator( _type_checked_event_sequence_for_input(step_context, input_name, input_value)): yield evt with time_execution_scope() as timer_result: user_event_sequence = check.generator( _user_event_sequence_for_step_compute_fn(step_context, inputs)) # It is important for this loop to be indented within the # timer block above in order for time to be recorded accurately. for user_event in check.generator( _step_output_error_checked_user_event_sequence( step_context, user_event_sequence)): if isinstance(user_event, Output): for evt in _create_step_events_for_output( step_context, user_event): yield evt elif isinstance(user_event, (AssetMaterialization, Materialization)): yield DagsterEvent.step_materialization( step_context, user_event) elif isinstance(user_event, ExpectationResult): yield DagsterEvent.step_expectation_result( step_context, user_event) else: check.failed( "Unexpected event {event}, should have been caught earlier" .format(event=user_event)) # We only want to log exactly one step success event or failure event if possible, # so wait to handle any interrupts (that normally log a failure event) until the success # event has finished with delay_interrupts(): yield DagsterEvent.step_success_event( step_context, StepSuccessData(duration_ms=timer_result.millis))