def _set_intermediates(step_context, step_output, step_output_handle, output, version): if step_output.asset_store_handle: # use asset_store if it's configured on provided by the user res = _set_addressable_asset(step_context, step_output_handle, step_output.asset_store_handle, output.value) for evt in res: if isinstance(evt, AssetStoreOperation): yield DagsterEvent.asset_store_operation(step_context, evt) if isinstance(evt, AssetMaterialization): yield DagsterEvent.step_materialization(step_context, evt) else: res = step_context.intermediate_storage.set_intermediate( context=step_context, dagster_type=step_output.dagster_type, step_output_handle=step_output_handle, value=output.value, version=version, ) if isinstance(res, ObjectStoreOperation): yield DagsterEvent.object_store_operation( step_context, ObjectStoreOperation.serializable( res, value_name=output.output_name), )
def _create_output_materializations(step_context, output_name, value): step = step_context.step current_handle = step.solid_handle # check for output mappings at every point up the composition heirarchy while current_handle: solid_config = step_context.environment_config.solids.get( current_handle.to_string()) current_handle = current_handle.parent if solid_config is None: continue for output_spec in solid_config.outputs: check.invariant(len(output_spec) == 1) config_output_name, output_spec = list(output_spec.items())[0] if config_output_name == output_name: step_output = step.step_output_named(output_name) materialization = step_output.runtime_type.output_materialization_config.materialize_runtime_value( step_context, output_spec, value) if not isinstance(materialization, Materialization): raise DagsterInvariantViolationError(( 'materialize_runtime_value on type {type_name} has returned ' 'value {value} of type {python_type}. You must return a ' 'Materialization.').format( type_name=step_output.runtime_type.name, value=repr(materialization), python_type=type(materialization).__name__, )) yield DagsterEvent.step_materialization( step_context, materialization)
def _create_output_materializations(step_context, output_name, value): step = step_context.step solid_config = step_context.environment_config.solids.get(str(step.solid_handle)) if solid_config is None: return for output_spec in solid_config.outputs: check.invariant(len(output_spec) == 1) config_output_name, output_spec = list(output_spec.items())[0] if config_output_name == output_name: step_output = step.step_output_named(output_name) materialization = step_output.runtime_type.output_materialization_config.materialize_runtime_value( step_context, output_spec, value ) if not isinstance(materialization, Materialization): raise DagsterInvariantViolationError( ( 'materialize_runtime_value on type {type_name} has returned ' 'value {value} of type {python_type}. You must return a ' 'Materialization.' ).format( type_name=step_output.runtime_type.name, value=repr(materialization), python_type=type(materialization).__name__, ) ) yield DagsterEvent.step_materialization(step_context, materialization)
def _execute_steps_core_loop(step_context, inputs, intermediates_manager): check.inst_param(step_context, 'step_context', SystemStepExecutionContext) check.dict_param(inputs, 'inputs', key_type=str) check.inst_param(intermediates_manager, 'intermediates_manager', IntermediatesManager) evaluated_inputs = {} # do runtime type checks of inputs versus step inputs for input_name, input_value in inputs.items(): evaluated_inputs[input_name] = _get_evaluated_input( step_context.step, input_name, input_value) yield DagsterEvent.step_start_event(step_context) with time_execution_scope() as timer_result: step_output_iterator = check.generator( _iterate_step_outputs_within_boundary(step_context, evaluated_inputs)) for step_output in check.generator( _error_check_step_outputs(step_context, step_output_iterator)): if isinstance(step_output, StepOutputValue): yield _create_step_output_event(step_context, step_output, intermediates_manager) elif isinstance(step_output, Materialization): yield DagsterEvent.step_materialization(step_context, step_output) elif isinstance(step_output, ExpectationResult): yield DagsterEvent.step_expectation_result(step_context, step_output) else: check.failed( 'Unexpected step_output {step_output}, should have been caught earlier' .format(step_output=step_output)) yield DagsterEvent.step_success_event( step_context, StepSuccessData(duration_ms=timer_result.millis))
def _create_type_materializations(step_context: SystemStepExecutionContext, output_name: str, value: Any) -> Iterator[DagsterEvent]: """If the output has any dagster type materializers, runs them.""" step = step_context.step current_handle = step.solid_handle # check for output mappings at every point up the composition hierarchy while current_handle: solid_config = step_context.environment_config.solids.get( current_handle.to_string()) current_handle = current_handle.parent if solid_config is None: continue for output_spec in solid_config.outputs.type_materializer_specs: check.invariant(len(output_spec) == 1) config_output_name, output_spec = list(output_spec.items())[0] if config_output_name == output_name: step_output = step.step_output_named(output_name) with user_code_error_boundary( DagsterTypeMaterializationError, msg_fn=lambda: """Error occurred during output materialization: output name: "{output_name}" step key: "{key}" solid invocation: "{solid}" solid definition: "{solid_def}" """.format( output_name=output_name, key=step_context.step.key, solid_def=step_context.solid_def.name, solid=step_context.solid.name, ), ): output_def = step_context.solid_def.output_def_named( step_output.name) dagster_type = output_def.dagster_type materializations = dagster_type.materializer.materialize_runtime_values( step_context, output_spec, value) for materialization in materializations: if not isinstance(materialization, (AssetMaterialization, Materialization)): raise DagsterInvariantViolationError(( "materialize_runtime_values on type {type_name} has returned " "value {value} of type {python_type}. You must return an " "AssetMaterialization.").format( type_name=dagster_type.display_name, value=repr(materialization), python_type=type(materialization).__name__, )) yield DagsterEvent.step_materialization( step_context, materialization)
def _core_dagster_event_sequence_for_step(step_context): ''' Execute the step within the step_context argument given the in-memory events. This function yields a sequence of DagsterEvents, but without catching any exceptions that have bubbled up during the computation of the step. ''' check.inst_param(step_context, 'step_context', SystemStepExecutionContext) yield DagsterEvent.step_start_event(step_context) inputs = {} for input_name, input_value in _input_values_from_intermediates_manager( step_context).items(): if isinstance(input_value, ObjectStoreOperation): yield DagsterEvent.object_store_operation( step_context, ObjectStoreOperation.serializable(input_value, value_name=input_name)) inputs[input_name] = input_value.obj else: inputs[input_name] = input_value for input_name, input_value in inputs.items(): for evt in check.generator( _type_checked_event_sequence_for_input(step_context, input_name, input_value)): yield evt with time_execution_scope() as timer_result: user_event_sequence = check.generator( _user_event_sequence_for_step_compute_fn(step_context, inputs)) # It is important for this loop to be indented within the # timer block above in order for time to be recorded accurately. for user_event in check.generator( _step_output_error_checked_user_event_sequence( step_context, user_event_sequence)): if isinstance(user_event, Output): for evt in _create_step_events_for_output( step_context, user_event): yield evt elif isinstance(user_event, Materialization): yield DagsterEvent.step_materialization( step_context, user_event) elif isinstance(user_event, ExpectationResult): yield DagsterEvent.step_expectation_result( step_context, user_event) else: check.failed( 'Unexpected event {event}, should have been caught earlier' .format(event=user_event)) yield DagsterEvent.step_success_event( step_context, StepSuccessData(duration_ms=timer_result.millis))
def _create_output_materializations(step_context, output_name, value): step = step_context.step current_handle = step.solid_handle # check for output mappings at every point up the composition hierarchy while current_handle: solid_config = step_context.environment_config.solids.get(current_handle.to_string()) current_handle = current_handle.parent if solid_config is None: continue for output_spec in solid_config.outputs: check.invariant(len(output_spec) == 1) config_output_name, output_spec = list(output_spec.items())[0] if config_output_name == output_name: step_output = step.step_output_named(output_name) with user_code_error_boundary( DagsterOutputMaterializationError, msg_fn=lambda: '''Error occurred during output materialization: output name: "{output_name}" step key: "{key}" solid invocation: "{solid}" solid definition: "{solid_def}" '''.format( output_name=output_name, key=step_context.step.key, solid_def=step_context.solid_def.name, solid=step_context.solid.name, ), ): materializations = step_output.dagster_type.output_materialization_config.materialize_runtime_values( step_context, output_spec, value ) for materialization in materializations: if not isinstance(materialization, Materialization): raise DagsterInvariantViolationError( ( 'materialize_runtime_values on type {type_name} has returned ' 'value {value} of type {python_type}. You must return a ' 'Materialization.' ).format( type_name=step_output.dagster_type.name, value=repr(materialization), python_type=type(materialization).__name__, ) ) yield DagsterEvent.step_materialization(step_context, materialization)
def _materializations_to_events(step_context, step_output_handle, materializations): if materializations is not None: for materialization in ensure_gen(materializations): if not isinstance(materialization, AssetMaterialization): raise DagsterInvariantViolationError(( "IO manager on output {output_name} has returned " "value {value} of type {python_type}. The return type can only be " "AssetMaterialization.").format( output_name=step_output_handle.output_name, value=repr(materialization), python_type=type(materialization).__name__, )) yield DagsterEvent.step_materialization(step_context, materialization)
def _core_dagster_event_sequence_for_step(step_context, inputs, intermediates_manager): ''' Execute the step within the step_context argument given the in-memory events. This function yields a sequence of DagsterEvents, but without catching any exceptions that have bubbled up during the computation of the step. ''' check.inst_param(step_context, 'step_context', SystemStepExecutionContext) check.dict_param(inputs, 'inputs', key_type=str) check.inst_param(intermediates_manager, 'intermediates_manager', IntermediatesManager) evaluated_inputs = {} # do runtime type checks of inputs versus step inputs for input_name, input_value in inputs.items(): evaluated_inputs[input_name] = _get_evaluated_input( step_context.step, input_name, input_value ) yield DagsterEvent.step_start_event(step_context) with time_execution_scope() as timer_result: event_sequence = check.generator( _event_sequence_for_step_compute_fn(step_context, evaluated_inputs) ) # It is important for this loop to be indented within the # timer block above in order for time to be recorded accurately. for event in check.generator( _step_output_error_checked_event_sequence(step_context, event_sequence) ): if isinstance(event, Result): yield _create_step_output_event(step_context, event, intermediates_manager) elif isinstance(event, Materialization): yield DagsterEvent.step_materialization(step_context, event) elif isinstance(event, ExpectationResult): yield DagsterEvent.step_expectation_result(step_context, event) else: check.failed( 'Unexpected event {event}, should have been caught earlier'.format(event=event) ) yield DagsterEvent.step_success_event( step_context, StepSuccessData(duration_ms=timer_result.millis) )
def core_dagster_event_sequence_for_step(step_context, prior_attempt_count): """ Execute the step within the step_context argument given the in-memory events. This function yields a sequence of DagsterEvents, but without catching any exceptions that have bubbled up during the computation of the step. """ check.inst_param(step_context, "step_context", SystemStepExecutionContext) check.int_param(prior_attempt_count, "prior_attempt_count") if prior_attempt_count > 0: yield DagsterEvent.step_restarted_event(step_context, prior_attempt_count) else: yield DagsterEvent.step_start_event(step_context) inputs = {} for input_name, input_value in _input_values_from_intermediate_storage( step_context): if isinstance(input_value, ObjectStoreOperation): yield DagsterEvent.object_store_operation( step_context, ObjectStoreOperation.serializable(input_value, value_name=input_name)) inputs[input_name] = input_value.obj elif isinstance(input_value, MultipleStepOutputsListWrapper): for op in input_value: yield DagsterEvent.object_store_operation( step_context, ObjectStoreOperation.serializable(op, value_name=input_name)) inputs[input_name] = [op.obj for op in input_value] else: inputs[input_name] = input_value for input_name, input_value in inputs.items(): for evt in check.generator( _type_checked_event_sequence_for_input(step_context, input_name, input_value)): yield evt with time_execution_scope() as timer_result: user_event_sequence = check.generator( _user_event_sequence_for_step_compute_fn(step_context, inputs)) # It is important for this loop to be indented within the # timer block above in order for time to be recorded accurately. for user_event in check.generator( _step_output_error_checked_user_event_sequence( step_context, user_event_sequence)): if isinstance(user_event, Output): for evt in _create_step_events_for_output( step_context, user_event): yield evt elif isinstance(user_event, (AssetMaterialization, Materialization)): yield DagsterEvent.step_materialization( step_context, user_event) elif isinstance(user_event, ExpectationResult): yield DagsterEvent.step_expectation_result( step_context, user_event) else: check.failed( "Unexpected event {event}, should have been caught earlier" .format(event=user_event)) # We only want to log exactly one step success event or failure event if possible, # so wait to handle any interrupts (that normally log a failure event) until the success # event has finished with delay_interrupts(): yield DagsterEvent.step_success_event( step_context, StepSuccessData(duration_ms=timer_result.millis))
def core_dagster_event_sequence_for_step( step_context: SystemStepExecutionContext, prior_attempt_count: int) -> Iterator[DagsterEvent]: """ Execute the step within the step_context argument given the in-memory events. This function yields a sequence of DagsterEvents, but without catching any exceptions that have bubbled up during the computation of the step. """ check.inst_param(step_context, "step_context", SystemStepExecutionContext) check.int_param(prior_attempt_count, "prior_attempt_count") if prior_attempt_count > 0: yield DagsterEvent.step_restarted_event(step_context, prior_attempt_count) else: yield DagsterEvent.step_start_event(step_context) inputs = {} for step_input in step_context.step.step_inputs: input_def = step_input.source.get_input_def(step_context.pipeline_def) dagster_type = input_def.dagster_type if dagster_type.kind == DagsterTypeKind.NOTHING: continue for event_or_input_value in ensure_gen( step_input.source.load_input_object(step_context)): if isinstance(event_or_input_value, DagsterEvent): yield event_or_input_value else: check.invariant(step_input.name not in inputs) inputs[step_input.name] = event_or_input_value for input_name, input_value in inputs.items(): for evt in check.generator( _type_checked_event_sequence_for_input(step_context, input_name, input_value)): yield evt with time_execution_scope() as timer_result: user_event_sequence = check.generator( _user_event_sequence_for_step_compute_fn(step_context, inputs)) # It is important for this loop to be indented within the # timer block above in order for time to be recorded accurately. for user_event in check.generator( _step_output_error_checked_user_event_sequence( step_context, user_event_sequence)): if isinstance(user_event, (Output, DynamicOutput)): for evt in _type_check_and_store_output( step_context, user_event): yield evt elif isinstance(user_event, (AssetMaterialization, Materialization)): yield DagsterEvent.step_materialization( step_context, user_event) elif isinstance(user_event, ExpectationResult): yield DagsterEvent.step_expectation_result( step_context, user_event) else: check.failed( "Unexpected event {event}, should have been caught earlier" .format(event=user_event)) yield DagsterEvent.step_success_event( step_context, StepSuccessData(duration_ms=timer_result.millis))
def core_dagster_event_sequence_for_step(step_context, prior_attempt_count): """ Execute the step within the step_context argument given the in-memory events. This function yields a sequence of DagsterEvents, but without catching any exceptions that have bubbled up during the computation of the step. """ check.inst_param(step_context, "step_context", SystemStepExecutionContext) check.int_param(prior_attempt_count, "prior_attempt_count") if prior_attempt_count > 0: yield DagsterEvent.step_restarted_event(step_context, prior_attempt_count) else: yield DagsterEvent.step_start_event(step_context) inputs = {} for input_name, input_value in _load_input_values(step_context): # TODO yuhan retire ObjectStoreOperation https://github.com/dagster-io/dagster/issues/3043 if isinstance(input_value, ObjectStoreOperation): yield DagsterEvent.object_store_operation( step_context, ObjectStoreOperation.serializable(input_value, value_name=input_name)) inputs[input_name] = input_value.obj elif isinstance(input_value, FanInStepInputValuesWrapper): final_values = [] for inner_value in input_value: # inner value is either a store interaction # TODO yuhan retire ObjectStoreOperation https://github.com/dagster-io/dagster/issues/3043 if isinstance(inner_value, ObjectStoreOperation): yield DagsterEvent.object_store_operation( step_context, ObjectStoreOperation.serializable( inner_value, value_name=input_name), ) final_values.append(inner_value.obj) elif isinstance(inner_value, AssetStoreOperation): yield DagsterEvent.asset_store_operation( step_context, AssetStoreOperation.serializable(inner_value)) final_values.append(inner_value.obj) # or the value directly else: final_values.append(inner_value) inputs[input_name] = final_values elif isinstance(input_value, AssetStoreOperation): yield DagsterEvent.asset_store_operation( step_context, AssetStoreOperation.serializable(input_value)) inputs[input_name] = input_value.obj else: inputs[input_name] = input_value for input_name, input_value in inputs.items(): for evt in check.generator( _type_checked_event_sequence_for_input(step_context, input_name, input_value)): yield evt with time_execution_scope() as timer_result: user_event_sequence = check.generator( _user_event_sequence_for_step_compute_fn(step_context, inputs)) # It is important for this loop to be indented within the # timer block above in order for time to be recorded accurately. for user_event in check.generator( _step_output_error_checked_user_event_sequence( step_context, user_event_sequence)): if isinstance(user_event, (Output, DynamicOutput)): for evt in _create_step_events_for_output( step_context, user_event): yield evt elif isinstance(user_event, (AssetMaterialization, Materialization)): yield DagsterEvent.step_materialization( step_context, user_event) elif isinstance(user_event, ExpectationResult): yield DagsterEvent.step_expectation_result( step_context, user_event) else: check.failed( "Unexpected event {event}, should have been caught earlier" .format(event=user_event)) yield DagsterEvent.step_success_event( step_context, StepSuccessData(duration_ms=timer_result.millis))
def _store_output( step_context: SystemStepExecutionContext, step_output_handle: StepOutputHandle, output: Union[Output, DynamicOutput], input_lineage: List[AssetLineageInfo], ) -> Iterator[DagsterEvent]: output_def = step_context.solid_def.output_def_named(step_output_handle.output_name) output_manager = step_context.get_io_manager(step_output_handle) output_context = step_context.get_output_context(step_output_handle) with user_code_error_boundary( DagsterExecutionHandleOutputError, control_flow_exceptions=[Failure, RetryRequested], msg_fn=lambda: ( f'Error occurred while handling output "{output_context.name}" of ' f'step "{step_context.step.key}":' ), step_key=step_context.step.key, output_name=output_context.name, ): handle_output_res = output_manager.handle_output(output_context, output.value) manager_materializations = [] manager_metadata_entries = [] if handle_output_res is not None: for elt in ensure_gen(handle_output_res): if isinstance(elt, AssetMaterialization): manager_materializations.append(elt) elif isinstance(elt, (EventMetadataEntry, PartitionMetadataEntry)): experimental_functionality_warning( "Yielding metadata from an IOManager's handle_output() function" ) manager_metadata_entries.append(elt) else: raise DagsterInvariantViolationError( f"IO manager on output {output_def.name} has returned " f"value {elt} of type {type(elt).__name__}. The return type can only be " "one of AssetMaterialization, EventMetadataEntry, PartitionMetadataEntry." ) # do not alter explicitly created AssetMaterializations for materialization in manager_materializations: yield DagsterEvent.step_materialization(step_context, materialization, input_lineage) asset_key, partitions = _asset_key_and_partitions_for_output( output_context, output_def, output_manager ) if asset_key: for materialization in _get_output_asset_materializations( asset_key, partitions, output, output_def, manager_metadata_entries, ): yield DagsterEvent.step_materialization(step_context, materialization, input_lineage) yield DagsterEvent.handled_output( step_context, output_name=step_output_handle.output_name, manager_key=output_def.io_manager_key, message_override=f'Handled input "{step_output_handle.output_name}" using intermediate storage' if isinstance(output_manager, IntermediateStorageAdapter) else None, metadata_entries=[ entry for entry in manager_metadata_entries if isinstance(entry, EventMetadataEntry) ], )