def _store_output( step_context: SystemStepExecutionContext, step_output_handle: StepOutputHandle, output: Union[Output, DynamicOutput], ) -> Iterator[DagsterEvent]: output_def = step_context.solid_def.output_def_named( step_output_handle.output_name) output_manager = step_context.get_io_manager(step_output_handle) output_context = step_context.get_output_context(step_output_handle) with user_code_error_boundary( DagsterExecutionHandleOutputError, control_flow_exceptions=[Failure, RetryRequested], msg_fn=lambda: (f"Error occurred during the the handling of step output:" f' step key: "{step_context.step.key}"' f' output name: "{output_context.name}"'), step_key=step_context.step.key, output_name=output_context.name, ): materializations = output_manager.handle_output( output_context, output.value) for evt in _materializations_to_events(step_context, step_output_handle, materializations): yield evt yield DagsterEvent.handled_output( step_context, output_name=step_output_handle.output_name, manager_key=output_def.io_manager_key, message_override= f'Handled input "{step_output_handle.output_name}" using intermediate storage' if isinstance(output_manager, IntermediateStorageAdapter) else None, )
def _user_event_sequence_for_step_compute_fn(step_context, evaluated_inputs): check.inst_param(step_context, 'step_context', SystemStepExecutionContext) check.dict_param(evaluated_inputs, 'evaluated_inputs', key_type=str) with user_code_error_boundary( DagsterExecutionStepExecutionError, msg_fn=lambda: '''Error occured during the execution of step: step key: "{key}" solid invocation: "{solid}" solid definition: "{solid_def}" '''.format( key=step_context.step.key, solid_def=step_context.solid_def.name, solid=step_context.solid.name, ), step_key=step_context.step.key, solid_def_name=step_context.solid_def.name, solid_name=step_context.solid.name, ): gen = check.opt_generator( step_context.step.compute_fn(step_context, evaluated_inputs)) if gen is not None: for event in gen: yield event
def get_external_sensor_execution(recon_repo, instance_ref, sensor_name, last_completion_timestamp, last_run_key, cursor): check.inst_param( recon_repo, "recon_repo", ReconstructableRepository, ) definition = recon_repo.get_definition() sensor_def = definition.get_sensor_def(sensor_name) with SensorEvaluationContext( instance_ref, last_completion_time=last_completion_timestamp, last_run_key=last_run_key, cursor=cursor, repository_name=recon_repo.get_definition().name, ) as sensor_context: try: with user_code_error_boundary( SensorExecutionError, lambda: "Error occurred during the execution of evaluation_fn for sensor " "{sensor_name}".format(sensor_name=sensor_def.name), ): return sensor_def.evaluate_tick(sensor_context) except Exception: return ExternalSensorExecutionErrorData( serializable_error_info_from_exc_info(sys.exc_info()))
def _set_objects(step_context, step_output, step_output_handle, output): output_def = step_output.output_def output_manager = step_context.get_output_manager(step_output_handle) output_context = step_context.get_output_context(step_output_handle) with user_code_error_boundary( DagsterExecutionHandleOutputError, control_flow_exceptions=[Failure, RetryRequested], msg_fn=lambda: (f"Error occurred during the the handling of step output:" f' step key: "{step_context.step.key}"' f' output name: "{output_context.name}"'), step_key=step_context.step.key, output_name=output_context.name, ): materializations = output_manager.handle_output( output_context, output.value) for evt in _materializations_to_events(step_context, step_output_handle, materializations): yield evt yield DagsterEvent.handled_output( step_context, output_name=step_output_handle.output_name, manager_key=output_def.io_manager_key, )
def ExternalPartitionTags(self, request, _context): partition_args = deserialize_json_to_dagster_namedtuple( request.serialized_partition_args) check.inst_param(partition_args, 'partition_args', PartitionArgs) recon_repo = self._recon_repository_from_origin( partition_args.repository_origin) definition = recon_repo.get_definition() partition_set_def = definition.get_partition_set_def( partition_args.partition_set_name) partition = partition_set_def.get_partition( partition_args.partition_name) try: with user_code_error_boundary( PartitionExecutionError, lambda: 'Error occurred during the evaluation of the `tags_for_partition` function for ' 'partition set {partition_set_name}'.format( partition_set_name=partition_set_def.name), ): tags = partition_set_def.tags_for_partition(partition) return api_pb2.ExternalPartitionTagsReply( serialized_external_partition_tags_or_external_partition_execution_error =serialize_dagster_namedtuple( ExternalPartitionTagsData(name=partition.name, tags=tags))) except PartitionExecutionError: return api_pb2.ExternalPartitionTagsReply( serialized_external_partition_tags_or_external_partition_execution_error =serialize_dagster_namedtuple( ExternalPartitionExecutionErrorData( serializable_error_info_from_exc_info( sys.exc_info()))))
def _user_event_sequence_for_step_compute_fn( step_context: SystemStepExecutionContext, evaluated_inputs: Dict[str, Any]) -> Iterator[SolidOutputUnion]: check.inst_param(step_context, "step_context", SystemStepExecutionContext) check.dict_param(evaluated_inputs, "evaluated_inputs", key_type=str) gen = execute_core_compute( step_context.for_compute(), evaluated_inputs, step_context.solid_def.compute_fn, ) for event in iterate_with_context( lambda: user_code_error_boundary( DagsterExecutionStepExecutionError, control_flow_exceptions=[Failure, RetryRequested], msg_fn=lambda: """Error occurred during the execution of step: step key: "{key}" solid invocation: "{solid}" solid definition: "{solid_def}" """.format( key=step_context.step.key, solid_def=step_context.solid_def.name, solid=step_context.solid.name, ), step_key=step_context.step.key, solid_def_name=step_context.solid_def.name, solid_name=step_context.solid.name, ), gen, ): yield event
def _user_event_sequence_for_step_compute_fn(step_context, evaluated_inputs): check.inst_param(step_context, "step_context", SystemStepExecutionContext) check.dict_param(evaluated_inputs, "evaluated_inputs", key_type=str) with user_code_error_boundary( DagsterExecutionStepExecutionError, control_flow_exceptions=[Failure, RetryRequested], msg_fn=lambda: """Error occurred during the execution of step: step key: "{key}" solid invocation: "{solid}" solid definition: "{solid_def}" """.format( key=step_context.step.key, solid_def=step_context.solid_def.name, solid=step_context.solid.name, ), step_key=step_context.step.key, solid_def_name=step_context.solid_def.name, solid_name=step_context.solid.name, ): gen = check.opt_generator( step_context.step.compute_fn(step_context, evaluated_inputs)) if not gen: return # Allow interrupts again during each step of the execution for event in iterate_with_context(raise_interrupts_immediately, gen): yield event
def partition_data_command(args): check.inst_param(args, 'args', PartitionApiCommandArgs) recon_repo = recon_repository_from_origin(args.repository_origin) definition = recon_repo.get_definition() partition_set_def = definition.get_partition_set_def( args.partition_set_name) partition = partition_set_def.get_partition(args.partition_name) try: with user_code_error_boundary( PartitionScheduleExecutionError, lambda: 'Error occurred during the execution of user-provided partition functions for ' 'partition set {partition_set_name}'.format( partition_set_name=partition_set_def.name), ): run_config = partition_set_def.environment_dict_for_partition( partition) tags = partition_set_def.tags_for_partition(partition) return ExternalPartitionData(name=partition.name, tags=tags, run_config=run_config) except PartitionScheduleExecutionError: return ExternalPartitionData( name=partition.name, error=serializable_error_info_from_exc_info(sys.exc_info()))
def _iterate_step_outputs_within_boundary(step_context, evaluated_inputs): check.inst_param(step_context, 'step_context', SystemStepExecutionContext) check.dict_param(evaluated_inputs, 'evaluated_inputs', key_type=str) error_str = '''Error occured during the execution of step: step key: "{key}" solid instance: "{solid}" solid definition: "{solid_def}" '''.format( key=step_context.step.key, solid_def=step_context.solid_def.name, solid=step_context.solid.name, ) with user_code_error_boundary( DagsterExecutionStepExecutionError, error_str, step_key=step_context.step.key, solid_def_name=step_context.solid_def.name, solid_name=step_context.solid.name, ): gen = check.opt_generator( step_context.step.compute_fn(step_context, evaluated_inputs)) if gen is not None: for step_output in gen: yield step_output
def get_external_sensor_execution( recon_repo, instance_ref, sensor_name, last_completion_timestamp, last_run_key ): check.inst_param( recon_repo, "recon_repo", ReconstructableRepository, ) definition = recon_repo.get_definition() sensor_def = definition.get_sensor_def(sensor_name) with DagsterInstance.from_ref(instance_ref) as instance: sensor_context = SensorExecutionContext( instance, last_completion_time=last_completion_timestamp, last_run_key=last_run_key ) try: with user_code_error_boundary( SensorExecutionError, lambda: "Error occurred during the execution of evaluation_fn for sensor " "{sensor_name}".format(sensor_name=sensor_def.name), ): return ExternalSensorExecutionData.from_execution_data( sensor_def.get_execution_data(sensor_context) ) except SensorExecutionError: return ExternalSensorExecutionErrorData( serializable_error_info_from_exc_info(sys.exc_info()) )
def _get_mapped_solids_dict(composite_def, current_stack, current_solid_config): # the spec of the config mapping function is that it takes the dictionary at: # solid_name: # config: {dict_passed_to_user} # and it returns the dictionary rooted at solids # solid_name: # solids: {return_value_of_config_fn} # We must call the config mapping function and then validate it against # the child schema. with user_code_error_boundary(DagsterConfigMappingFunctionError, _get_error_lambda(current_stack)): mapped_solids_config = composite_def.config_mapping.config_fn( current_solid_config.get('config', {})) # Dynamically construct the type that the output of the config mapping function will # be evaluated against type_to_evaluate_against = define_solid_dictionary_cls( composite_def.solids, composite_def.dependency_structure, current_stack.handle) # process against that new type evr = process_config(type_to_evaluate_against, mapped_solids_config) if not evr.success: raise_composite_descent_config_error(current_stack, mapped_solids_config, evr) return evr.value
def _user_event_sequence_for_step_compute_fn( step_context: StepExecutionContext, evaluated_inputs: Dict[str, Any]) -> Iterator[SolidOutputUnion]: check.inst_param(step_context, "step_context", StepExecutionContext) check.dict_param(evaluated_inputs, "evaluated_inputs", key_type=str) gen = execute_core_compute( step_context, evaluated_inputs, step_context.solid_def.compute_fn, ) for event in iterate_with_context( lambda: user_code_error_boundary( DagsterExecutionStepExecutionError, control_flow_exceptions=[Failure, RetryRequested], msg_fn=lambda: f'Error occurred while executing solid "{step_context.solid.name}":', step_key=step_context.step.key, solid_def_name=step_context.solid_def.name, solid_name=step_context.solid.name, ), gen, ): yield event
def user_code_context_manager(user_fn, error_cls, msg_fn): '''Wraps the output of a user provided function that may yield or return a value and returns a generator that asserts it only yields a single value. ''' check.callable_param(user_fn, 'user_fn') check.subclass_param(error_cls, 'error_cls', DagsterUserCodeExecutionError) with user_code_error_boundary(error_cls, msg_fn): thing_or_gen = user_fn() gen = ensure_gen(thing_or_gen) try: thing = next(gen) except StopIteration: check.failed('Must yield one item. You did not yield anything.') yield thing stopped = False try: next(gen) except StopIteration: stopped = True check.invariant(stopped, 'Must yield one item. Yielded more than one item')
def _type_checked_event_sequence_for_input( step_context: StepExecutionContext, input_name: str, input_value: Any) -> Iterator[DagsterEvent]: check.inst_param(step_context, "step_context", StepExecutionContext) check.str_param(input_name, "input_name") step_input = step_context.step.step_input_named(input_name) input_def = step_input.source.get_input_def(step_context.pipeline_def) dagster_type = input_def.dagster_type with user_code_error_boundary( DagsterTypeCheckError, lambda: (f'Error occurred while type-checking input "{input_name}" of solid ' f'"{str(step_context.step.solid_handle)}", with Python type {type(input_value)} and ' f"Dagster type {dagster_type.display_name}"), ): type_check = do_type_check(step_context.for_type(dagster_type), dagster_type, input_value) yield _create_step_input_event(step_context, input_name, type_check=type_check, success=type_check.success) if not type_check.success: raise DagsterTypeCheckDidNotPass( description=(f'Type check failed for step input "{input_name}" - ' f'expected type "{dagster_type.display_name}". ' f"Description: {type_check.description}."), metadata_entries=type_check.metadata_entries, dagster_type=dagster_type, )
def get_external_schedule_execution(external_schedule_execution_args): check.inst_param( external_schedule_execution_args, 'external_schedule_execution_args', ExternalScheduleExecutionArgs, ) recon_repo = recon_repository_from_origin( external_schedule_execution_args.repository_origin) definition = recon_repo.get_definition() schedule_def = definition.get_schedule_def( external_schedule_execution_args.schedule_name) instance = DagsterInstance.from_ref( external_schedule_execution_args.instance_ref) schedule_context = ScheduleExecutionContext(instance) try: with user_code_error_boundary( ScheduleExecutionError, lambda: 'Error occurred during the execution of run_config_fn for schedule ' '{schedule_name}'.format(schedule_name=schedule_def.name), ): run_config = schedule_def.get_run_config(schedule_context) return ExternalScheduleExecutionData(run_config=run_config) except ScheduleExecutionError: return ExternalScheduleExecutionErrorData( serializable_error_info_from_exc_info(sys.exc_info()))
def ExternalPartitionNames(self, request, _context): partition_names_args = deserialize_json_to_dagster_namedtuple( request.serialized_partition_names_args ) check.inst_param(partition_names_args, "partition_names_args", PartitionNamesArgs) recon_repo = self._recon_repository_from_origin(partition_names_args.repository_origin) definition = recon_repo.get_definition() partition_set_def = definition.get_partition_set_def( partition_names_args.partition_set_name ) try: with user_code_error_boundary( PartitionExecutionError, lambda: "Error occurred during the execution of the partition generation function for " "partition set {partition_set_name}".format( partition_set_name=partition_set_def.name ), ): return api_pb2.ExternalPartitionNamesReply( serialized_external_partition_names_or_external_partition_execution_error=serialize_dagster_namedtuple( ExternalPartitionNamesData( partition_names=partition_set_def.get_partition_names() ) ) ) except PartitionExecutionError: return api_pb2.ExternalPartitionNamesReply( serialized_external_partition_names_or_external_partition_execution_error=serialize_dagster_namedtuple( ExternalPartitionExecutionErrorData( serializable_error_info_from_exc_info(sys.exc_info()) ) ) )
def get_external_schedule_execution( recon_repo, instance_ref, schedule_name, scheduled_execution_timestamp, scheduled_execution_timezone, ): check.inst_param( recon_repo, "recon_repo", ReconstructableRepository, ) definition = recon_repo.get_definition() schedule_def = definition.get_schedule_def(schedule_name) scheduled_execution_time = (pendulum.from_timestamp( scheduled_execution_timestamp, tz=scheduled_execution_timezone, ) if scheduled_execution_timestamp else None) schedule_context = ScheduleExecutionContext(instance_ref, scheduled_execution_time) try: with user_code_error_boundary( ScheduleExecutionError, lambda: "Error occurred during the execution function for schedule " "{schedule_name}".format(schedule_name=schedule_def.name), ): return ExternalScheduleExecutionData.from_execution_data( schedule_def.get_execution_data(schedule_context)) except ScheduleExecutionError: return ExternalScheduleExecutionErrorData( serializable_error_info_from_exc_info(sys.exc_info()))
def _type_checked_step_output_event_sequence(step_context, output): check.inst_param(step_context, 'step_context', SystemStepExecutionContext) check.inst_param(output, 'output', Output) step_output = step_context.step.step_output_named(output.output_name) try: with user_code_error_boundary( DagsterTypeCheckError, lambda: ( 'In solid "{handle}" the output "{output_name}" received ' 'value {output_value} of Python type {output_type} which ' 'does not pass the typecheck for Dagster type ' '{dagster_type_name}. Step {step_key}.' ).format( handle=str(step_context.step.solid_handle), output_name=output.output_name, output_value=output.value, output_type=type(output.value), dagster_type_name=step_output.runtime_type.name, step_key=step_context.step.key, ), ): yield _create_step_output_event( step_context, output, type_check=_do_type_check(step_output.runtime_type, output.value), success=True, ) except Exception as failure: # pylint: disable=broad-except yield _create_step_output_event( step_context, output, type_check=_type_check_from_failure(failure), success=False ) raise failure
def single_resource_event_generator(context, resource_name, resource_def): try: msg_fn = lambda: "Error executing resource_fn on ResourceDefinition {name}".format( name=resource_name) with user_code_error_boundary(DagsterResourceFunctionError, msg_fn, log_manager=context.log): try: with time_execution_scope() as timer_result: resource_or_gen = (resource_def.resource_fn(context) if is_context_provided( get_function_params( resource_def.resource_fn)) else resource_def.resource_fn()) # Flag for whether resource is generator. This is used to ensure that teardown # occurs when resources are initialized out of execution. is_gen = inspect.isgenerator( resource_or_gen) or isinstance(resource_or_gen, ContextDecorator) resource_iter = _wrapped_resource_iterator(resource_or_gen) resource = next(resource_iter) resource = InitializedResource( resource, format_duration(timer_result.millis), is_gen) except StopIteration: check.failed( "Resource generator {name} must yield one item.".format( name=resource_name)) yield resource except DagsterUserCodeExecutionError as dagster_user_error: raise dagster_user_error with user_code_error_boundary(DagsterResourceFunctionError, msg_fn, log_manager=context.log): try: next(resource_iter) except StopIteration: pass else: check.failed( "Resource generator {name} yielded more than one item.".format( name=resource_name))
def _invoke_user_config_fn(self, processed_config): with user_code_error_boundary( DagsterConfigMappingFunctionError, _get_user_code_error_str_lambda(self.parent_def), ): return { "config": self._config_fn(processed_config.get("config", {})) }
def load_input_object(self, step_context): with user_code_error_boundary( DagsterTypeLoadingError, msg_fn=_generate_error_boundary_msg_for_step_input( step_context, self.input_name), ): return self.dagster_type.loader.construct_from_config_value( step_context, self.config_data)
def get_partition_set_execution_param_data(args): check.inst_param(args, "args", PartitionSetExecutionParamArgs) recon_repo = recon_repository_from_origin(args.repository_origin) repo_definition = recon_repo.get_definition() partition_set_def = repo_definition.get_partition_set_def(args.partition_set_name) try: with user_code_error_boundary( PartitionExecutionError, lambda: "Error occurred during the partition generation for partition set " "{partition_set_name}".format(partition_set_name=partition_set_def.name), ): all_partitions = partition_set_def.get_partitions() partitions = [ partition for partition in all_partitions if partition.name in args.partition_names ] partition_data = [] for partition in partitions: def _error_message_fn(partition_set_name, partition_name): return lambda: ( "Error occurred during the partition config and tag generation for " "partition set {partition_set_name}::{partition_name}".format( partition_set_name=partition_set_name, partition_name=partition_name ) ) with user_code_error_boundary( PartitionExecutionError, _error_message_fn(partition_set_def.name, partition.name) ): run_config = partition_set_def.run_config_for_partition(partition) tags = partition_set_def.tags_for_partition(partition) partition_data.append( ExternalPartitionExecutionParamData( name=partition.name, tags=tags, run_config=run_config, ) ) return ExternalPartitionSetExecutionParamData(partition_data=partition_data) except PartitionExecutionError: return ExternalPartitionExecutionErrorData( serializable_error_info_from_exc_info(sys.exc_info()) )
def get_partition_set_execution_param_data(recon_repo, partition_set_name, partition_names): repo_definition = recon_repo.get_definition() partition_set_def = repo_definition.get_partition_set_def( partition_set_name) try: with user_code_error_boundary( PartitionExecutionError, lambda: "Error occurred during the partition generation for " f"{_get_target_for_partition_execution_error(partition_set_def)}", ): all_partitions = partition_set_def.get_partitions() partitions = [ partition for partition in all_partitions if partition.name in partition_names ] partition_data = [] for partition in partitions: def _error_message_fn(partition_name): return lambda: ( "Error occurred during the partition config and tag generation for " f"'{partition_name}' in {_get_target_for_partition_execution_error(partition_set_def)}" ) with user_code_error_boundary(PartitionExecutionError, _error_message_fn(partition.name)): run_config = partition_set_def.run_config_for_partition( partition) tags = partition_set_def.tags_for_partition(partition) partition_data.append( ExternalPartitionExecutionParamData( name=partition.name, tags=tags, run_config=run_config, )) return ExternalPartitionSetExecutionParamData( partition_data=partition_data) except PartitionExecutionError: return ExternalPartitionExecutionErrorData( serializable_error_info_from_exc_info(sys.exc_info()))
def _input_values_from_intermediates_manager(step_context): step = step_context.step input_values = {} for step_input in step.step_inputs: if step_input.runtime_type.is_nothing: continue if step_input.is_from_multiple_outputs: if hasattr(step_input.runtime_type, 'inner_type'): runtime_type = step_input.runtime_type.inner_type else: # This is the case where the fan-in is typed Any runtime_type = step_input.runtime_type _input_value = [ step_context.intermediates_manager.get_intermediate( step_context, runtime_type, source_handle) for source_handle in step_input.source_handles ] # When we're using an object store-backed intermediate store, we wrap the # ObjectStoreOperation[] representing the fan-in values in a MultipleStepOutputsListWrapper # so we can yield the relevant object store events and unpack the values in the caller if all( (isinstance(x, ObjectStoreOperation) for x in _input_value)): input_value = MultipleStepOutputsListWrapper(_input_value) else: input_value = _input_value elif step_input.is_from_single_output: input_value = step_context.intermediates_manager.get_intermediate( step_context, step_input.runtime_type, step_input.source_handles[0]) else: # is from config def _generate_error_boundary_msg_for_step_input(_context, _input): return lambda: '''Error occured during input hydration: input name: "{input}" step key: "{key}" solid invocation: "{solid}" solid definition: "{solid_def}" '''.format( input=_input.name, key=_context.step.key, solid_def=_context.solid_def.name, solid=_context.solid.name, ) with user_code_error_boundary( DagsterInputHydrationConfigError, msg_fn=_generate_error_boundary_msg_for_step_input( step_context, step_input), ): input_value = step_input.runtime_type.input_hydration_config.construct_from_config_value( step_context, step_input.config_data) input_values[step_input.name] = input_value return input_values
def _create_type_materializations(step_context: SystemStepExecutionContext, output_name: str, value: Any) -> Iterator[DagsterEvent]: """If the output has any dagster type materializers, runs them.""" step = step_context.step current_handle = step.solid_handle # check for output mappings at every point up the composition hierarchy while current_handle: solid_config = step_context.environment_config.solids.get( current_handle.to_string()) current_handle = current_handle.parent if solid_config is None: continue for output_spec in solid_config.outputs.type_materializer_specs: check.invariant(len(output_spec) == 1) config_output_name, output_spec = list(output_spec.items())[0] if config_output_name == output_name: step_output = step.step_output_named(output_name) with user_code_error_boundary( DagsterTypeMaterializationError, msg_fn=lambda: """Error occurred during output materialization: output name: "{output_name}" step key: "{key}" solid invocation: "{solid}" solid definition: "{solid_def}" """.format( output_name=output_name, key=step_context.step.key, solid_def=step_context.solid_def.name, solid=step_context.solid.name, ), ): output_def = step_context.solid_def.output_def_named( step_output.name) dagster_type = output_def.dagster_type materializations = dagster_type.materializer.materialize_runtime_values( step_context, output_spec, value) for materialization in materializations: if not isinstance(materialization, (AssetMaterialization, Materialization)): raise DagsterInvariantViolationError(( "materialize_runtime_values on type {type_name} has returned " "value {value} of type {python_type}. You must return an " "AssetMaterialization.").format( type_name=dagster_type.display_name, value=repr(materialization), python_type=type(materialization).__name__, )) yield DagsterEvent.step_materialization( step_context, materialization)
def _type_check_output( step_context: SystemStepExecutionContext, step_output_handle: StepOutputHandle, output: Any, version: Optional[str], ) -> Iterator[DagsterEvent]: check.inst_param(step_context, "step_context", SystemStepExecutionContext) check.inst_param(output, "output", (Output, DynamicOutput)) step_output = step_context.step.step_output_named(output.output_name) step_output_def = step_context.solid_def.output_def_named(step_output.name) dagster_type = step_output_def.dagster_type with user_code_error_boundary( DagsterTypeCheckError, lambda: ('In solid "{handle}" the output "{output_name}" received ' "value {output_value} of Python type {output_type} which " "does not pass the typecheck for Dagster type " "{dagster_type_name}. Step {step_key}.").format( handle=str(step_context.step.solid_handle), output_name=output.output_name, output_value=output.value, output_type=type(output.value), dagster_type_name=dagster_type.display_name, step_key=step_context.step.key, ), ): type_check = _do_type_check(step_context.for_type(dagster_type), dagster_type, output.value) yield DagsterEvent.step_output_event( step_context=step_context, step_output_data=StepOutputData( step_output_handle=step_output_handle, type_check_data=TypeCheckData( success=type_check.success, label=step_output_handle.output_name, description=type_check.description if type_check else None, metadata_entries=type_check.metadata_entries if type_check else [], ), version=version, ), ) if not type_check.success: raise DagsterTypeCheckDidNotPass( description= 'Type check failed for step output "{output_name}" - expected type "{dagster_type}".' .format( output_name=output.output_name, dagster_type=dagster_type.display_name, ), metadata_entries=type_check.metadata_entries, dagster_type=dagster_type, )
def _type_check_output( step_context: StepExecutionContext, step_output_handle: StepOutputHandle, output: Any, version: Optional[str], ) -> Iterator[DagsterEvent]: check.inst_param(step_context, "step_context", StepExecutionContext) check.inst_param(output, "output", (Output, DynamicOutput)) step_output = step_context.step.step_output_named(output.output_name) step_output_def = step_context.solid_def.output_def_named(step_output.name) dagster_type = step_output_def.dagster_type type_check_context = step_context.for_type(dagster_type) op_label = step_context.describe_op() output_type = type(output.value) with user_code_error_boundary( DagsterTypeCheckError, lambda: (f'Error occurred while type-checking output "{output.output_name}" of {op_label}, with ' f"Python type {output_type} and Dagster type {dagster_type.display_name}" ), log_manager=type_check_context.log, ): type_check = do_type_check(type_check_context, dagster_type, output.value) yield DagsterEvent.step_output_event( step_context=step_context, step_output_data=StepOutputData( step_output_handle=step_output_handle, type_check_data=TypeCheckData( success=type_check.success, label=step_output_handle.output_name, description=type_check.description if type_check else None, metadata_entries=type_check.metadata_entries if type_check else [], ), version=version, metadata_entries=[ entry for entry in output.metadata_entries if isinstance(entry, MetadataEntry) ], ), ) if not type_check.success: raise DagsterTypeCheckDidNotPass( description=( f'Type check failed for step output "{output.output_name}" - ' f'expected type "{dagster_type.display_name}". ' f"Description: {type_check.description}"), metadata_entries=type_check.metadata_entries, dagster_type=dagster_type, )
def _get_mapped_solids_dict( composite, graph_def, current_stack, current_solid_config, resource_defs, is_using_graph_job_op_apis, ): # the spec of the config mapping function is that it takes the dictionary at: # solid_name: # config: {dict_passed_to_user} # and it returns the dictionary rooted at solids # solid_name: # solids: {return_value_of_config_fn} # We must call the config mapping function and then validate it against # the child schema. # apply @configured config mapping to the composite's incoming config before we get to the # composite's own config mapping process config_mapped_solid_config = graph_def.apply_config_mapping( current_solid_config) if not config_mapped_solid_config.success: raise DagsterInvalidConfigError( "Error in config for composite solid {}".format(composite.name), config_mapped_solid_config.errors, config_mapped_solid_config, ) with user_code_error_boundary(DagsterConfigMappingFunctionError, _get_error_lambda(current_stack)): mapped_solids_config = graph_def.config_mapping.resolve_from_validated_config( config_mapped_solid_config.value.get("config", {})) # Dynamically construct the type that the output of the config mapping function will # be evaluated against type_to_evaluate_against = define_solid_dictionary_cls( solids=graph_def.solids, ignored_solids=None, dependency_structure=graph_def.dependency_structure, parent_handle=current_stack.handle, resource_defs=resource_defs, is_using_graph_job_op_apis=is_using_graph_job_op_apis, ) # process against that new type evr = process_config(type_to_evaluate_against, mapped_solids_config) if not evr.success: raise_composite_descent_config_error(current_stack, mapped_solids_config, evr) return evr.value
def _type_checked_step_output_event_sequence(step_context, output): from dagster.core.execution.api import create_execution_plan check.inst_param(step_context, "step_context", SystemStepExecutionContext) check.inst_param(output, "output", Output) step_output = step_context.step.step_output_named(output.output_name) speculative_execution_plan = create_execution_plan( step_context.pipeline_def, run_config=step_context.run_config, mode=step_context.mode_def.name, ) version = resolve_step_output_versions( speculative_execution_plan, run_config=step_context.run_config, mode=step_context.mode_def.name, )[StepOutputHandle(step_context.step.key, output.output_name)] with user_code_error_boundary( DagsterTypeCheckError, lambda: ('In solid "{handle}" the output "{output_name}" received ' "value {output_value} of Python type {output_type} which " "does not pass the typecheck for Dagster type " "{dagster_type_name}. Step {step_key}.").format( handle=str(step_context.step.solid_handle), output_name=output.output_name, output_value=output.value, output_type=type(output.value), dagster_type_name=step_output.dagster_type.name, step_key=step_context.step.key, ), ): type_check = _do_type_check( step_context.for_type(step_output.dagster_type), step_output.dagster_type, output.value) yield _create_step_output_event( step_context, output, type_check=type_check, success=type_check.success, version=version, ) if not type_check.success: raise DagsterTypeCheckDidNotPass( description= "Type check failed for step output {output_name} of type {dagster_type}." .format( output_name=output.output_name, dagster_type=step_output.dagster_type.name, ), metadata_entries=type_check.metadata_entries, dagster_type=step_output.dagster_type, )
def _input_values_from_intermediate_storage(step_context): step = step_context.step for step_input in step.step_inputs: if step_input.dagster_type.kind == DagsterTypeKind.NOTHING: continue if step_input.is_from_multiple_outputs: if (step_input.dagster_type.kind == DagsterTypeKind.LIST or step_input.dagster_type.kind == DagsterTypeKind.NULLABLE): dagster_type = step_input.dagster_type.inner_type else: # This is the case where the fan-in is typed Any dagster_type = step_input.dagster_type input_value = [ step_context.intermediate_storage.get_intermediate( context=step_context, step_output_handle=source_handle, dagster_type=dagster_type, ) for source_handle in step_input.source_handles # Filter out missing intermediates from skipped upstream outputs if step_context.intermediate_storage.has_intermediate( step_context, source_handle) ] # When we're using an object store-backed intermediate store, we wrap the # ObjectStoreOperation[] representing the fan-in values in a MultipleStepOutputsListWrapper # so we can yield the relevant object store events and unpack the values in the caller if all((isinstance(x, ObjectStoreOperation) for x in input_value)): input_value = MultipleStepOutputsListWrapper(input_value) elif step_input.is_from_single_output: input_value = step_context.intermediate_storage.get_intermediate( context=step_context, step_output_handle=step_input.source_handles[0], dagster_type=step_input.dagster_type, ) elif step_input.is_from_config: with user_code_error_boundary( DagsterTypeLoadingError, msg_fn=_generate_error_boundary_msg_for_step_input( step_context, step_input), ): input_value = step_input.dagster_type.loader.construct_from_config_value( step_context, step_input.config_data) elif step_input.is_from_default_value: input_value = step_input.config_data else: check.failed("Unhandled step_input type!") yield step_input.name, input_value