def _set_intermediates(step_context, step_output, step_output_handle, output, version): if step_output.asset_store_handle: # use asset_store if it's configured on provided by the user res = _set_addressable_asset(step_context, step_output_handle, step_output.asset_store_handle, output.value) if isinstance(res, AssetStoreOperation): yield DagsterEvent.asset_store_operation(step_context, res) else: res = step_context.intermediate_storage.set_intermediate( context=step_context, dagster_type=step_output.dagster_type, step_output_handle=step_output_handle, value=output.value, version=version, ) if isinstance(res, ObjectStoreOperation): yield DagsterEvent.object_store_operation( step_context, ObjectStoreOperation.serializable( res, value_name=output.output_name), )
def _set_intermediates(step_context, step_output, step_output_handle, output, version): if step_context.using_asset_store(step_output_handle): res = _set_addressable_asset(step_context, step_output_handle, output.value) for evt in res: if isinstance(evt, AssetStoreOperation): yield DagsterEvent.asset_store_operation(step_context, evt) if isinstance(evt, AssetMaterialization): yield DagsterEvent.step_materialization(step_context, evt) else: res = step_context.intermediate_storage.set_intermediate( context=step_context, dagster_type=step_output.dagster_type, step_output_handle=step_output_handle, value=output.value, version=version, ) if isinstance(res, ObjectStoreOperation): yield DagsterEvent.object_store_operation( step_context, ObjectStoreOperation.serializable( res, value_name=output.output_name), )
def _set_objects(step_context, step_output, step_output_handle, output): from dagster.core.storage.asset_store import AssetStoreHandle output_def = step_output.output_def output_manager = step_context.get_output_manager(step_output_handle) output_context = step_context.get_output_context(step_output_handle) materializations = output_manager.handle_output(output_context, output.value) # TODO yuhan retire ObjectStoreOperation https://github.com/dagster-io/dagster/issues/3043 if isinstance(materializations, ObjectStoreOperation): yield DagsterEvent.object_store_operation( step_context, ObjectStoreOperation.serializable( materializations, value_name=step_output_handle.output_name), ) else: for evt in _materializations_to_events(step_context, step_output_handle, materializations): yield evt # SET_ASSET operation by AssetStore yield DagsterEvent.asset_store_operation( step_context, AssetStoreOperation( AssetStoreOperationType.SET_ASSET, step_output_handle, AssetStoreHandle(output_def.manager_key, output_def.metadata), ), )
def _set_intermediates(step_context, step_output, step_output_handle, output): res = step_context.intermediate_storage.set_intermediate( context=step_context, dagster_type=step_output.dagster_type, step_output_handle=step_output_handle, value=output.value, ) if isinstance(res, ObjectStoreOperation): yield DagsterEvent.object_store_operation( step_context, ObjectStoreOperation.serializable(res, value_name=output.output_name))
def copy_required_intermediates_for_execution(pipeline_context, execution_plan): """ Uses the intermediates manager to copy intermediates from the previous run that apply to the current execution plan, and yields the corresponding events """ check.inst_param(pipeline_context, "pipeline_context", SystemExecutionContext) check.inst_param(execution_plan, "execution_plan", ExecutionPlan) parent_run_id = pipeline_context.pipeline_run.parent_run_id if not parent_run_id: return parent_run_logs = pipeline_context.instance.all_logs(parent_run_id) output_handles_for_current_run = output_handles_from_execution_plan( execution_plan) output_handles_from_previous_run = output_handles_from_event_logs( parent_run_logs) output_handles_to_copy = output_handles_for_current_run.intersection( output_handles_from_previous_run) output_handles_to_copy_by_step = defaultdict(list) for handle in output_handles_to_copy: output_handles_to_copy_by_step[handle.step_key].append(handle) intermediate_storage = pipeline_context.intermediate_storage for step in execution_plan.get_all_steps_in_topo_order(): handles_to_copy = output_handles_to_copy_by_step.get(step.key, []) # exit early to avoid trying to make a context from an UnresolvedExecutionStep if not handles_to_copy: continue step_context = pipeline_context.for_step(step) for handle in handles_to_copy: if intermediate_storage.has_intermediate(pipeline_context, handle): continue operation = intermediate_storage.copy_intermediate_from_run( pipeline_context, parent_run_id, handle) yield DagsterEvent.object_store_operation( step_context, ObjectStoreOperation.serializable( operation, value_name=handle.output_name), )
def _set_objects(step_context, step_output, step_output_handle, output): from dagster.core.storage.asset_store import AssetStoreHandle output_def = step_output.output_def output_manager = step_context.get_output_manager(step_output_handle) output_context = step_context.get_output_context(step_output_handle) with user_code_error_boundary( DagsterExecutionHandleOutputError, control_flow_exceptions=[Failure, RetryRequested], msg_fn=lambda: (f"Error occurred during the the handling of step output:" f' step key: "{step_context.step.key}"' f' output name: "{output_context.name}"'), step_key=step_context.step.key, output_name=output_context.name, ): materializations = output_manager.handle_output( output_context, output.value) # TODO yuhan retire ObjectStoreOperation https://github.com/dagster-io/dagster/issues/3043 if isinstance(materializations, ObjectStoreOperation): yield DagsterEvent.object_store_operation( step_context, ObjectStoreOperation.serializable( materializations, value_name=step_output_handle.output_name), ) else: for evt in _materializations_to_events(step_context, step_output_handle, materializations): yield evt # SET_ASSET operation by AssetStore yield DagsterEvent.asset_store_operation( step_context, AssetStoreOperation.serializable( AssetStoreOperation( AssetStoreOperationType.SET_ASSET, step_output_handle, AssetStoreHandle(output_def.manager_key, output_def.metadata), )), )
def core_dagster_event_sequence_for_step(step_context, prior_attempt_count): """ Execute the step within the step_context argument given the in-memory events. This function yields a sequence of DagsterEvents, but without catching any exceptions that have bubbled up during the computation of the step. """ check.inst_param(step_context, "step_context", SystemStepExecutionContext) check.int_param(prior_attempt_count, "prior_attempt_count") if prior_attempt_count > 0: yield DagsterEvent.step_restarted_event(step_context, prior_attempt_count) else: yield DagsterEvent.step_start_event(step_context) inputs = {} for input_name, input_value in _input_values_from_intermediate_storage( step_context): if isinstance(input_value, ObjectStoreOperation): yield DagsterEvent.object_store_operation( step_context, ObjectStoreOperation.serializable(input_value, value_name=input_name)) inputs[input_name] = input_value.obj elif isinstance(input_value, MultipleStepOutputsListWrapper): for op in input_value: if isinstance(input_value, ObjectStoreOperation): yield DagsterEvent.object_store_operation( step_context, ObjectStoreOperation.serializable( op, value_name=input_name)) elif isinstance(input_value, AssetStoreOperation): yield DagsterEvent.asset_store_operation( step_context, input_value) inputs[input_name] = [op.obj for op in input_value] elif isinstance(input_value, AssetStoreOperation): yield DagsterEvent.asset_store_operation(step_context, input_value) inputs[input_name] = input_value.obj else: inputs[input_name] = input_value for input_name, input_value in inputs.items(): for evt in check.generator( _type_checked_event_sequence_for_input(step_context, input_name, input_value)): yield evt with time_execution_scope() as timer_result: user_event_sequence = check.generator( _user_event_sequence_for_step_compute_fn(step_context, inputs)) # It is important for this loop to be indented within the # timer block above in order for time to be recorded accurately. for user_event in check.generator( _step_output_error_checked_user_event_sequence( step_context, user_event_sequence)): if isinstance(user_event, Output): for evt in _create_step_events_for_output( step_context, user_event): yield evt elif isinstance(user_event, (AssetMaterialization, Materialization)): yield DagsterEvent.step_materialization( step_context, user_event) elif isinstance(user_event, ExpectationResult): yield DagsterEvent.step_expectation_result( step_context, user_event) else: check.failed( "Unexpected event {event}, should have been caught earlier" .format(event=user_event)) yield DagsterEvent.step_success_event( step_context, StepSuccessData(duration_ms=timer_result.millis))
def core_dagster_event_sequence_for_step(step_context, prior_attempt_count): """ Execute the step within the step_context argument given the in-memory events. This function yields a sequence of DagsterEvents, but without catching any exceptions that have bubbled up during the computation of the step. """ check.inst_param(step_context, "step_context", SystemStepExecutionContext) check.int_param(prior_attempt_count, "prior_attempt_count") if prior_attempt_count > 0: yield DagsterEvent.step_restarted_event(step_context, prior_attempt_count) else: yield DagsterEvent.step_start_event(step_context) inputs = {} for input_name, input_value in _load_input_values(step_context): # TODO yuhan retire ObjectStoreOperation https://github.com/dagster-io/dagster/issues/3043 if isinstance(input_value, ObjectStoreOperation): yield DagsterEvent.object_store_operation( step_context, ObjectStoreOperation.serializable(input_value, value_name=input_name)) inputs[input_name] = input_value.obj elif isinstance(input_value, FanInStepInputValuesWrapper): final_values = [] for inner_value in input_value: # inner value is either a store interaction # TODO yuhan retire ObjectStoreOperation https://github.com/dagster-io/dagster/issues/3043 if isinstance(inner_value, ObjectStoreOperation): yield DagsterEvent.object_store_operation( step_context, ObjectStoreOperation.serializable( inner_value, value_name=input_name), ) final_values.append(inner_value.obj) elif isinstance(inner_value, AssetStoreOperation): yield DagsterEvent.asset_store_operation( step_context, AssetStoreOperation.serializable(inner_value)) final_values.append(inner_value.obj) # or the value directly else: final_values.append(inner_value) inputs[input_name] = final_values elif isinstance(input_value, AssetStoreOperation): yield DagsterEvent.asset_store_operation( step_context, AssetStoreOperation.serializable(input_value)) inputs[input_name] = input_value.obj else: inputs[input_name] = input_value for input_name, input_value in inputs.items(): for evt in check.generator( _type_checked_event_sequence_for_input(step_context, input_name, input_value)): yield evt with time_execution_scope() as timer_result: user_event_sequence = check.generator( _user_event_sequence_for_step_compute_fn(step_context, inputs)) # It is important for this loop to be indented within the # timer block above in order for time to be recorded accurately. for user_event in check.generator( _step_output_error_checked_user_event_sequence( step_context, user_event_sequence)): if isinstance(user_event, (Output, DynamicOutput)): for evt in _create_step_events_for_output( step_context, user_event): yield evt elif isinstance(user_event, (AssetMaterialization, Materialization)): yield DagsterEvent.step_materialization( step_context, user_event) elif isinstance(user_event, ExpectationResult): yield DagsterEvent.step_expectation_result( step_context, user_event) else: check.failed( "Unexpected event {event}, should have been caught earlier" .format(event=user_event)) yield DagsterEvent.step_success_event( step_context, StepSuccessData(duration_ms=timer_result.millis))