def build_dauphin_solid_handles(represented_pipeline, current_dep_index, parent=None): check.inst_param(represented_pipeline, 'represented_pipeline', RepresentedPipeline) check.opt_inst_param(parent, 'parent', DauphinSolidHandle) all_handle = [] for solid_invocation in current_dep_index.solid_invocations: solid_name, solid_def_name = solid_invocation.solid_name, solid_invocation.solid_def_name handle = DauphinSolidHandle( solid=DauphinSolid(represented_pipeline, solid_name, current_dep_index), handle=SolidHandle(solid_name, parent.handleID if parent else None), parent=parent if parent else None, ) solid_def_snap = represented_pipeline.get_solid_def_snap( solid_def_name) if isinstance(solid_def_snap, CompositeSolidDefSnap): all_handle += build_dauphin_solid_handles( represented_pipeline, represented_pipeline.get_dep_structure_index(solid_def_name), handle, ) all_handle.append(handle) return all_handle
def _result_for_handle(self, solid: Solid, handle: SolidHandle) -> NodeExecutionResult: node_def = solid.definition events_for_handle = _filter_step_events_by_handle( self.event_list, self.handle, handle) outputs_for_handle = (_filter_outputs_by_handle( self._output_capture, self.handle, handle) if self._output_capture else None) handle_with_ancestor = handle.with_ancestor(self.handle) if not handle_with_ancestor: raise DagsterInvariantViolationError( f"No handle provided for solid {solid.name}") if isinstance(node_def, SolidDefinition): return InProcessSolidResult( solid_def=node_def, handle=handle_with_ancestor, all_events=events_for_handle, output_capture=outputs_for_handle, ) else: return InProcessGraphResult( graph_def=node_def, handle=handle_with_ancestor, all_events=events_for_handle, output_capture=outputs_for_handle, )
def output_value(self, output_name=DEFAULT_OUTPUT): check.str_param(output_name, "output_name") if not self.solid.definition.has_output(output_name): raise DagsterInvariantViolationError( "Output '{output_name}' not defined in composite solid '{solid}': " "{outputs_clause}. If you were expecting this output to be present, you may " "be missing an output_mapping from an inner solid to its enclosing composite " "solid.".format( output_name=output_name, solid=self.solid.name, outputs_clause="found outputs {output_names}".format( output_names=str(list(self.solid.definition.output_dict.keys())) ) if self.solid.definition.output_dict else "no output mappings were defined", ) ) output_mapping = self.solid.definition.get_output_mapping(output_name) return self._result_for_handle( self.solid.definition.solid_named(output_mapping.solid_name), SolidHandle(output_mapping.solid_name, None), ).output_value(output_mapping.output_name)
def result_for_handle(self, handle): '''Get the result of a solid by its solid handle string. This allows indexing into top-level solids to retrieve the results of children of composite solids. Args: handle (str): The string handle for the solid. Returns: :py:class:`SolidExecutionResult`: The result of the given solid. ''' check.str_param(handle, 'handle') solid_def = self.pipeline.get_solid(SolidHandle.from_string(handle)) if not solid_def: raise DagsterInvariantViolationError( 'Can not find solid handle {handle} in pipeline.'.format( handle=handle)) events_by_kind = defaultdict(list) for event in self.event_list: if event.is_step_event: if event.solid_handle.is_or_descends_from(handle): events_by_kind[event.step_kind].append(event) return SolidExecutionResult(solid_def, events_by_kind, self.reconstruct_context)
def test_multiline_logging_complex(): msg = 'DagsterEventType.STEP_FAILURE for step start.materialization.output.result.0' kwargs = { 'pipeline': 'example', 'pipeline_name': 'example', 'step_key': 'start.materialization.output.result.0', 'solid': 'start', 'solid_definition': 'emit_num', 'dagster_event': DagsterEvent( event_type_value='STEP_FAILURE', pipeline_name='error_monster', step_key='start.materialization.output.result.0', solid_handle=SolidHandle('start', None), step_kind_value='MATERIALIZATION_THUNK', logging_tags={ 'pipeline': 'error_monster', 'step_key': 'start.materialization.output.result.0', 'solid': 'start', 'solid_definition': 'emit_num', }, event_specific_data=StepFailureData( error=SerializableErrorInfo( message= "FileNotFoundError: [Errno 2] No such file or directory: '/path/to/file'\n", stack=['a stack message'], cls_name='FileNotFoundError', ), user_failure_data=None, ), ), } with _setup_logger(DAGSTER_DEFAULT_LOGGER) as (captured_results, logger): dl = DagsterLogManager('123', {}, [logger]) dl.info(msg, **kwargs) expected_results = [ 'error_monster - 123 - STEP_FAILURE - DagsterEventType.STEP_FAILURE for step ' 'start.materialization.output.result.0', ' cls_name = "FileNotFoundError"', ' error_message = "FileNotFoundError: [Errno 2] No such file or directory:' ' \'/path/to/file\'\\n"', ' solid = "start"', ' solid_definition = "emit_num"', ' step_key = "start.materialization.output.result.0"', '', "FileNotFoundError: [Errno 2] No such file or directory: '/path/to/file'", '', 'a stack message', ] assert captured_results[0].split('\n') == expected_results
def test_multiline_logging_complex(): msg = "DagsterEventType.STEP_FAILURE for step start.materialization.output.result.0" kwargs = { "pipeline": "example", "pipeline_name": "example", "step_key": "start.materialization.output.result.0", "solid": "start", "solid_definition": "emit_num", "dagster_event": DagsterEvent( event_type_value="STEP_FAILURE", pipeline_name="error_monster", step_key="start.materialization.output.result.0", solid_handle=SolidHandle("start", None), step_kind_value="MATERIALIZATION_THUNK", logging_tags={ "pipeline": "error_monster", "step_key": "start.materialization.output.result.0", "solid": "start", "solid_definition": "emit_num", }, event_specific_data=StepFailureData( error=SerializableErrorInfo( message= "FileNotFoundError: [Errno 2] No such file or directory: '/path/to/file'\n", stack=["a stack message"], cls_name="FileNotFoundError", ), user_failure_data=None, ), ), } with _setup_logger(DAGSTER_DEFAULT_LOGGER) as (captured_results, logger): dl = DagsterLogManager("123", {}, [logger]) dl.info(msg, **kwargs) expected_results = [ "error_monster - 123 - STEP_FAILURE - DagsterEventType.STEP_FAILURE for step " "start.materialization.output.result.0", ' cls_name = "FileNotFoundError"', ' error_message = "FileNotFoundError: [Errno 2] No such file or directory:' " '/path/to/file'\\n\"", ' solid = "start"', ' solid_definition = "emit_num"', ' step_key = "start.materialization.output.result.0"', "", "FileNotFoundError: [Errno 2] No such file or directory: '/path/to/file'", "", "a stack message", ] assert captured_results[0].split("\n") == expected_results
def result_for_node(self, name: str) -> NodeExecutionResult: if not self._graph_def.has_solid_named(name): raise DagsterInvariantViolationError( "Tried to get result for node '{name}' in '{container}'. No such top level " "node.".format(name=name, container=self._graph_def.name)) handle = SolidHandle(name, None) solid = self._graph_def.get_solid(handle) return self._result_for_handle(solid, handle)
def dagster_event_from_dict(event_dict, pipeline_name): check.dict_param(event_dict, 'event_dict', key_type=str) check.str_param(pipeline_name, 'pipeline_name') materialization = event_dict.get('intermediateMaterialization') or {} # Get event_type event_type = _handled_events().get(event_dict['__typename']) if not event_type: raise Exception('unhandled event type %s' % event_dict['__typename']) # Get event_specific_data event_specific_data = None if event_type == DagsterEventType.STEP_OUTPUT: event_specific_data = StepOutputData( step_output_handle=StepOutputHandle(event_dict['step']['key'], event_dict['outputName']), value_repr=event_dict['valueRepr'], intermediate_materialization=Materialization( path=materialization.get('path'), description=materialization.get('description')), ) elif event_type == DagsterEventType.STEP_SUCCESS: event_specific_data = StepSuccessData(0.0) elif event_type == DagsterEventType.STEP_MATERIALIZATION: event_specific_data = StepMaterializationData( materialization=Materialization(path=materialization.get('path'), description=materialization.get( 'description'))) elif event_type == DagsterEventType.STEP_EXPECTATION_RESULT: result_metadata = event_dict['expectationResult'][ 'resultMetadataJsonString'] expectation_result = ExpectationResult( event_dict['expectationResult']['success'], event_dict['expectationResult']['name'], event_dict['expectationResult']['message'], json.loads(result_metadata) if result_metadata else None, ) event_specific_data = StepExpectationResultData(expectation_result) elif event_type == DagsterEventType.STEP_FAILURE: error_info = SerializableErrorInfo(event_dict['error']['message'], stack=None, cls_name=None) event_specific_data = StepFailureData(error_info) return DagsterEvent( event_type_value=event_type.value, pipeline_name=pipeline_name, step_key=event_dict['step']['key'], solid_handle=SolidHandle(event_dict['step']['solidHandleID'], None, None), step_kind_value=event_dict['step']['kind'], logging_tags=None, event_specific_data=event_specific_data, )
def _filter_step_events_by_handle( event_list: List[DagsterEvent], ancestor_handle: SolidHandle, current_handle: SolidHandle) -> List[DagsterEvent]: events = [] handle_with_ancestor = cast(SolidHandle, current_handle.with_ancestor(ancestor_handle)) for event in event_list: if event.is_step_event: solid_handle = cast(SolidHandle, event.solid_handle) if solid_handle.is_or_descends_from(handle_with_ancestor): events.append(event) return events
def output_for_solid(self, handle_str, output_name=DEFAULT_OUTPUT): """Get the output of a solid by its solid handle string and output name. Args: handle_str (str): The string handle for the solid. output_name (str): Optional. The name of the output, default to DEFAULT_OUTPUT. Returns: The output value for the handle and output_name. """ check.str_param(handle_str, "handle_str") check.str_param(output_name, "output_name") return self.result_for_handle(SolidHandle.from_string(handle_str)).output_value(output_name)
def _filter_outputs_by_handle( output_dict: Dict[StepOutputHandle, Any], ancestor_handle: SolidHandle, current_handle: SolidHandle, ): handle_with_ancestor = current_handle.with_ancestor(ancestor_handle) outputs = {} for step_output_handle, value in output_dict.items(): handle = _get_solid_handle_from_output(step_output_handle) if handle and handle_with_ancestor and handle.is_or_descends_from( handle_with_ancestor): outputs[step_output_handle] = value return outputs
def output_values(self): values = {} for output_name in self.solid.definition.output_dict: output_mapping = self.solid.definition.get_output_mapping(output_name) inner_solid_values = self._result_for_handle( self.solid.definition.solid_named(output_mapping.solid_name), SolidHandle(output_mapping.solid_name, None), ).output_values if inner_solid_values is not None: # may be None if inner solid was skipped if output_mapping.output_name in inner_solid_values: values[output_name] = inner_solid_values[output_mapping.output_name] return values
def build_dauphin_solid_handles(container, parent=None): check.inst_param(container, 'container', IContainSolids) all_handle = [] for solid in container.solids: handle = DauphinSolidHandle( solid=build_dauphin_solid(solid, container.dependency_structure), handle=SolidHandle(solid.name, solid.definition.name, parent.handleID if parent else None), parent=parent if parent else None, ) if isinstance(solid.definition, IContainSolids): all_handle += build_dauphin_solid_handles(solid.definition, handle) all_handle.append(handle) return all_handle
def result_for_solid(self, name): '''Get the result of a top level solid. Args: name (str): The name of the top-level solid or aliased solid for which to retrieve the result. Returns: Union[CompositeSolidExecutionResult, SolidExecutionResult]: The result of the solid execution within the pipeline. ''' if not self.container.has_solid_named(name): raise DagsterInvariantViolationError( 'Tried to get result for solid \'{name}\' in \'{container}\'. No such top level ' 'solid.'.format(name=name, container=self.container.name)) return self.result_for_handle(SolidHandle(name, None))
def output_values(self) -> Dict[str, Any]: values = {} for output_name in self._graph_def.output_dict: output_mapping = self._graph_def.get_output_mapping(output_name) inner_solid_values = self._result_for_handle( self._graph_def.solid_named( output_mapping.maps_from.solid_name), SolidHandle(output_mapping.maps_from.solid_name, None), ).output_values if inner_solid_values is not None: # may be None if inner solid was skipped if output_mapping.maps_from.output_name in inner_solid_values: values[output_name] = inner_solid_values[ output_mapping.maps_from.output_name] return values
def result_for_handle(self, handle): '''Get the result of a solid by its solid handle string. This allows indexing into top-level solids to retrieve the results of children of composite solids. Args: handle (str): The string handle for the solid. Returns: [CompositeSolidExecutionResult, SolidExecutionResult]: The result of the given solid. ''' check.str_param(handle, 'handle') solid = self.solid.definition.get_solid( SolidHandle.from_string(handle)) return self._result_for_handle(solid, '.'.join([self.handle, handle]))
def result_for_handle(self, handle): '''Get a :py:class:`SolidExecutionResult` for a given solid handle string. ''' check.str_param(handle, 'handle') solid_def = self.pipeline.get_solid(SolidHandle.from_string(handle)) if not solid_def: raise DagsterInvariantViolationError( 'Cant not find solid handle {handle} in pipeline.'.format( handle=handle)) events_by_kind = defaultdict(list) for event in self.event_list: if event.is_step_event: if event.solid_handle.is_or_descends_from(handle): events_by_kind[event.step_kind].append(event) return SolidExecutionResult(solid_def, events_by_kind, self.reconstruct_context)
def define_solid_dictionary_cls( name, solids, dependency_structure, pipeline_name, parent_handle=None ): check.str_param(name, 'name') check.list_param(solids, 'solids', of_type=Solid) check.inst_param(dependency_structure, 'dependency_structure', DependencyStructure) check.str_param(pipeline_name, 'pipeline_name') check.opt_inst_param(parent_handle, 'parent_handle', SolidHandle) fields = {} for solid in solids: if solid.definition.has_config_entry: fields[solid.name] = define_isolid_field( solid, SolidHandle(solid.name, solid.definition.name, parent_handle), dependency_structure, pipeline_name, ) return SystemNamedDict(name, fields)
def result_for_handle(self, handle): """Get the result of a solid by its solid handle. This allows indexing into top-level solids to retrieve the results of children of composite solids. Args: handle (Union[str,SolidHandle]): The handle for the solid. Returns: Union[CompositeSolidExecutionResult, SolidExecutionResult]: The result of the given solid. """ if isinstance(handle, six.string_types): handle = SolidHandle.from_string(handle) else: check.inst_param(handle, "handle", SolidHandle) solid = self.container.get_solid(handle) return self._result_for_handle(solid, handle)
def dagster_event_from_dict(event_dict, pipeline_name): check.dict_param(event_dict, 'event_dict', key_type=str) check.str_param(pipeline_name, 'pipeline_name') # Get event_type event_type = HANDLED_EVENTS.get(event_dict['__typename']) if not event_type: raise Exception('unhandled event type %s' % event_dict['__typename']) # Get event_specific_data event_specific_data = None if event_type == DagsterEventType.STEP_OUTPUT: event_specific_data = StepOutputData( step_output_handle=StepOutputHandle(event_dict['step']['key'], event_dict['outputName']), type_check_data=TypeCheckData( success=event_dict['typeCheck']['success'], label=event_dict['typeCheck']['label'], description=event_dict.get('description'), metadata_entries=list( event_metadata_entries(event_dict.get('metadataEntries')) or []), ), ) elif event_type == DagsterEventType.STEP_INPUT: event_specific_data = StepInputData( input_name=event_dict['inputName'], type_check_data=TypeCheckData( success=event_dict['typeCheck']['success'], label=event_dict['typeCheck']['label'], description=event_dict.get('description'), metadata_entries=list( event_metadata_entries(event_dict.get('metadataEntries')) or []), ), ) elif event_type == DagsterEventType.STEP_SUCCESS: event_specific_data = StepSuccessData(0.0) elif event_type == DagsterEventType.STEP_UP_FOR_RETRY: event_specific_data = StepRetryData( error=error_from_data(event_dict['retryError']), seconds_to_wait=event_dict['secondsToWait'], ) elif event_type == DagsterEventType.STEP_MATERIALIZATION: materialization = event_dict['materialization'] event_specific_data = StepMaterializationData( materialization=materialization_from_data(materialization)) elif event_type == DagsterEventType.STEP_EXPECTATION_RESULT: expectation_result = expectation_result_from_data( event_dict['expectationResult']) event_specific_data = StepExpectationResultData(expectation_result) elif event_type == DagsterEventType.STEP_FAILURE: event_specific_data = StepFailureData( error_from_data(event_dict['error']), UserFailureData( label=event_dict['failureMetadata']['label'], description=event_dict['failureMetadata']['description'], metadata_entries=list( event_metadata_entries(event_dict.get('metadataEntries')) or []), ) if event_dict.get('failureMetadata') else None, ) elif event_type == DagsterEventType.ENGINE_EVENT: event_specific_data = EngineEventData( metadata_entries=list( event_metadata_entries(event_dict.get('metadataEntries'))), marker_start=event_dict.get('markerStart'), marker_end=event_dict.get('markerEnd'), error=error_from_data(event_dict['engineError']) if event_dict.get('engineError') else None, ) # We should update the GraphQL response so that clients don't need to do this handle parsing. # See: https://github.com/dagster-io/dagster/issues/1559 handle = None step_key = None step_kind_value = None if 'step' in event_dict and event_dict['step']: step_key = event_dict['step']['key'] step_kind_value = event_dict['step']['kind'] keys = event_dict['step']['solidHandleID'].split('.') while keys: handle = SolidHandle(keys.pop(0), parent=handle) return DagsterEvent( event_type_value=event_type.value, pipeline_name=pipeline_name, step_key=step_key, solid_handle=handle, step_kind_value=step_kind_value, logging_tags=None, event_specific_data=event_specific_data, )
def _build_from_sorted_solids( self, solids, dependency_structure, parent_handle=None, parent_step_inputs=None ): terminal_transform_step = None for solid in solids: handle = SolidHandle(solid.name, solid.definition.name, parent_handle) ### 1. INPUTS # Create and add execution plan steps for solid inputs step_inputs = [] for input_name, input_def in solid.definition.input_dict.items(): prev_step_output_handle = get_input_source_step_handles( self, solid, input_name, input_def, dependency_structure, handle, parent_step_inputs, ) # We return None for the handle (see above in get_input_source_step_handle) when the # input def runtime type is "Nothing" if not prev_step_output_handle: continue subplan = create_subplan_for_input( self.pipeline_name, self.environment_config, solid, prev_step_output_handle, input_def, handle, ) self.add_steps(subplan.steps) step_inputs.append( StepInput( input_name, input_def.runtime_type, subplan.terminal_step_output_handle ) ) ### 2. COMPUTE FUNCTION OR RECURSE # Create and add execution plan step for the solid compute function or # recurse over the solids in a CompositeSolid if isinstance(solid.definition, SolidDefinition): solid_transform_step = create_compute_step( self.pipeline_name, self.environment_config, solid, step_inputs, handle ) self.add_step(solid_transform_step) terminal_transform_step = solid_transform_step elif isinstance(solid.definition, CompositeSolidDefinition): terminal_transform_step = self._build_from_sorted_solids( solids_in_topological_order(solid.definition), solid.definition.dependency_structure, parent_handle=handle, parent_step_inputs=step_inputs, ) else: check.invariant( False, 'Unexpected solid type {type} encountered during execution planning'.format( type=type(solid.definition) ), ) ### 3. OUTPUTS # Create and add execution plan steps (and output handles) for solid outputs for name, output_def in solid.definition.output_dict.items(): subplan = create_subplan_for_output( self.pipeline_name, self.environment_config, solid, terminal_transform_step, output_def, handle, ) self.add_steps(subplan.steps) output_handle = solid.output_handle(name) self.set_output_handle(output_handle, subplan.terminal_step_output_handle) return terminal_transform_step
def _build_from_sorted_solids( self, solids: List[Solid], dependency_structure: DependencyStructure, parent_handle: Optional[SolidHandle] = None, parent_step_inputs: Optional[List[ Union[StepInput, UnresolvedMappedStepInput, UnresolvedCollectStepInput]]] = None, ): for solid in solids: handle = SolidHandle(solid.name, parent_handle) ### 1. INPUTS # Create and add execution plan steps for solid inputs has_unresolved_input = False has_pending_input = False step_inputs: List[Union[StepInput, UnresolvedMappedStepInput, UnresolvedCollectStepInput]] = [] for input_name, input_def in solid.definition.input_dict.items(): step_input_source = get_step_input_source( self, solid, input_name, input_def, dependency_structure, handle, parent_step_inputs, ) # If an input with dagster_type "Nothing" doesn't have a value # we don't create a StepInput if step_input_source is None: continue if isinstance( step_input_source, (FromPendingDynamicStepOutput, FromUnresolvedStepOutput), ): has_unresolved_input = True step_inputs.append( UnresolvedMappedStepInput( name=input_name, dagster_type_key=input_def.dagster_type.key, source=step_input_source, )) elif isinstance(step_input_source, FromDynamicCollect): has_pending_input = True step_inputs.append( UnresolvedCollectStepInput( name=input_name, dagster_type_key=input_def.dagster_type.key, source=step_input_source, )) else: check.inst_param(step_input_source, "step_input_source", StepInputSource) step_inputs.append( StepInput( name=input_name, dagster_type_key=input_def.dagster_type.key, source=step_input_source, )) ### 2a. COMPUTE FUNCTION # Create and add execution plan step for the solid compute function if isinstance(solid.definition, SolidDefinition): step_outputs = create_step_outputs(solid, handle, self.environment_config) if has_pending_input and has_unresolved_input: check.failed( "Can not have pending and unresolved step inputs") elif has_unresolved_input: new_step: IExecutionStep = UnresolvedMappedExecutionStep( handle=UnresolvedStepHandle(solid_handle=handle), pipeline_name=self.pipeline_name, step_inputs=cast( List[Union[StepInput, UnresolvedMappedStepInput]], step_inputs), step_outputs=step_outputs, tags=solid.tags, ) elif has_pending_input: new_step = UnresolvedCollectExecutionStep( handle=StepHandle(solid_handle=handle), pipeline_name=self.pipeline_name, step_inputs=cast( List[Union[StepInput, UnresolvedCollectStepInput]], step_inputs), step_outputs=step_outputs, tags=solid.tags, ) else: new_step = ExecutionStep( handle=StepHandle(solid_handle=handle), pipeline_name=self.pipeline_name, step_inputs=cast(List[StepInput], step_inputs), step_outputs=step_outputs, tags=solid.tags, ) self.add_step(new_step) ### 2b. RECURSE # Recurse over the solids contained in an instance of GraphDefinition elif isinstance(solid.definition, GraphDefinition): self._build_from_sorted_solids( solid.definition.solids_in_topological_order, solid.definition.dependency_structure, parent_handle=handle, parent_step_inputs=step_inputs, ) else: check.invariant( False, "Unexpected solid type {type} encountered during execution planning" .format(type=type(solid.definition)), ) ### 3. OUTPUTS # Create output handles for solid outputs for name, output_def in solid.definition.output_dict.items(): output_handle = solid.output_handle(name) # Punch through layers of composition scope to map to the output of the # actual compute step resolved_output_def, resolved_handle = solid.definition.resolve_output_to_origin( output_def.name, handle) step = self.get_step_by_solid_handle(resolved_handle) if isinstance(step, (ExecutionStep, UnresolvedCollectExecutionStep)): step_output_handle: Union[ StepOutputHandle, UnresolvedStepOutputHandle] = StepOutputHandle( step.key, resolved_output_def.name) elif isinstance(step, UnresolvedMappedExecutionStep): step_output_handle = UnresolvedStepOutputHandle( step.handle, resolved_output_def.name, step.resolved_by_step_key, step.resolved_by_output_name, ) else: check.failed(f"Unexpected step type {step}") self.set_output_handle(output_handle, step_output_handle)
def get_step_by_solid_handle(self, handle: SolidHandle) -> IExecutionStep: check.inst_param(handle, "handle", SolidHandle) return self._steps[handle.to_string()]
def _build_from_sorted_solids(self, solids, dependency_structure, parent_handle=None, parent_step_inputs=None): for solid in solids: handle = SolidHandle(solid.name, parent_handle) ### 1. INPUTS # Create and add execution plan steps for solid inputs has_unresolved_input = False step_inputs = [] for input_name, input_def in solid.definition.input_dict.items(): step_input_source = get_step_input_source( self, solid, input_name, input_def, dependency_structure, handle, parent_step_inputs, ) # If an input with dagster_type "Nothing" doesn't have a value # we don't create a StepInput if step_input_source is None: continue if isinstance( step_input_source, (FromPendingDynamicStepOutput, FromUnresolvedStepOutput)): has_unresolved_input = True step_inputs.append( UnresolvedStepInput( name=input_name, dagster_type=input_def.dagster_type, source=step_input_source, )) else: check.inst_param(step_input_source, "step_input_source", StepInputSource) step_inputs.append( StepInput( name=input_name, dagster_type=input_def.dagster_type, source=step_input_source, )) ### 2a. COMPUTE FUNCTION # Create and add execution plan step for the solid compute function if isinstance(solid.definition, SolidDefinition): if has_unresolved_input: step = create_unresolved_step( solid=solid, solid_handle=handle, step_inputs=step_inputs, environment_config=self.environment_config, pipeline_name=self.pipeline_name, ) else: step = create_compute_step( solid=solid, solid_handle=handle, step_inputs=step_inputs, environment_config=self.environment_config, pipeline_name=self.pipeline_name, ) self.add_step(step) ### 2b. RECURSE # Recurse over the solids contained in an instance of GraphDefinition elif isinstance(solid.definition, GraphDefinition): self._build_from_sorted_solids( solid.definition.solids_in_topological_order, solid.definition.dependency_structure, parent_handle=handle, parent_step_inputs=step_inputs, ) else: check.invariant( False, "Unexpected solid type {type} encountered during execution planning" .format(type=type(solid.definition)), ) ### 3. OUTPUTS # Create output handles for solid outputs for name, output_def in solid.definition.output_dict.items(): output_handle = solid.output_handle(name) # Punch through layers of composition scope to map to the output of the # actual compute step resolved_output_def, resolved_handle = solid.definition.resolve_output_to_origin( output_def.name, handle) step = self.get_step_by_solid_handle(resolved_handle) if isinstance(step, ExecutionStep): step_output_handle = StepOutputHandle( step.key, resolved_output_def.name) else: step_output_handle = UnresolvedStepOutputHandle( step.handle, resolved_output_def.name, step.resolved_by_step_key, step.resolved_by_output_name, ) self.set_output_handle(output_handle, step_output_handle)
def _build_from_sorted_solids(self, solids, dependency_structure, parent_handle=None, parent_step_inputs=None): terminal_compute_step = None for solid in solids: handle = SolidHandle(solid.name, solid.definition.name, parent_handle) ### 1. INPUTS # Create and add execution plan steps for solid inputs step_inputs = [] for input_name, input_def in solid.definition.input_dict.items(): step_input = get_step_input( self, solid, input_name, input_def, dependency_structure, handle, parent_step_inputs, ) # If an input with runtime_type "Nothing" doesnt have a value # we don't create a StepInput if step_input is None: continue check.inst_param(step_input, 'step_input', StepInput) step_inputs.append(step_input) ### 2. COMPUTE FUNCTION OR RECURSE # Create and add execution plan step for the solid compute function or # recurse over the solids in a CompositeSolid if isinstance(solid.definition, SolidDefinition): solid_compute_step = create_compute_step( self.pipeline_name, self.environment_config, solid, step_inputs, handle) self.add_step(solid_compute_step) terminal_compute_step = solid_compute_step elif isinstance(solid.definition, CompositeSolidDefinition): terminal_compute_step = self._build_from_sorted_solids( solids_in_topological_order(solid.definition), solid.definition.dependency_structure, parent_handle=handle, parent_step_inputs=step_inputs, ) else: check.invariant( False, 'Unexpected solid type {type} encountered during execution planning' .format(type=type(solid.definition)), ) ### 3. OUTPUTS # Create and add execution plan steps (and output handles) for solid outputs for name, output_def in solid.definition.output_dict.items(): subplan = create_subplan_for_output(self.pipeline_name, solid, terminal_compute_step, output_def) self.add_steps(subplan.steps) output_handle = solid.output_handle(name) self.set_output_handle(output_handle, subplan.terminal_step_output_handle) return terminal_compute_step
def dagster_event_from_dict(event_dict, pipeline_name): check.dict_param(event_dict, "event_dict", key_type=str) check.str_param(pipeline_name, "pipeline_name") # Get event_type event_type = HANDLED_EVENTS.get(event_dict["__typename"]) if not event_type: raise Exception("unhandled event type %s" % event_dict["__typename"]) # Get event_specific_data event_specific_data = None if event_type == DagsterEventType.STEP_OUTPUT: event_specific_data = StepOutputData( step_output_handle=StepOutputHandle(event_dict["stepKey"], event_dict["outputName"]), type_check_data=TypeCheckData( success=event_dict["typeCheck"]["success"], label=event_dict["typeCheck"]["label"], description=event_dict.get("description"), metadata_entries=list( event_metadata_entries(event_dict.get("metadataEntries")) or []), ), ) elif event_type == DagsterEventType.STEP_INPUT: event_specific_data = StepInputData( input_name=event_dict["inputName"], type_check_data=TypeCheckData( success=event_dict["typeCheck"]["success"], label=event_dict["typeCheck"]["label"], description=event_dict.get("description"), metadata_entries=list( event_metadata_entries(event_dict.get("metadataEntries")) or []), ), ) elif event_type == DagsterEventType.STEP_SUCCESS: event_specific_data = StepSuccessData(0.0) elif event_type == DagsterEventType.STEP_UP_FOR_RETRY: event_specific_data = StepRetryData( error=error_from_data(event_dict["retryError"]), seconds_to_wait=event_dict["secondsToWait"], ) elif event_type == DagsterEventType.STEP_MATERIALIZATION: materialization = event_dict["materialization"] event_specific_data = StepMaterializationData( materialization=materialization_from_data(materialization)) elif event_type == DagsterEventType.STEP_EXPECTATION_RESULT: expectation_result = expectation_result_from_data( event_dict["expectationResult"]) event_specific_data = StepExpectationResultData(expectation_result) elif event_type == DagsterEventType.STEP_FAILURE: event_specific_data = StepFailureData( error_from_data(event_dict["error"]), UserFailureData( label=event_dict["failureMetadata"]["label"], description=event_dict["failureMetadata"]["description"], metadata_entries=list( event_metadata_entries(event_dict.get("metadataEntries")) or []), ) if event_dict.get("failureMetadata") else None, ) elif event_type == DagsterEventType.ENGINE_EVENT: event_specific_data = EngineEventData( metadata_entries=list( event_metadata_entries(event_dict.get("metadataEntries"))), marker_start=event_dict.get("markerStart"), marker_end=event_dict.get("markerEnd"), error=error_from_data(event_dict["engineError"]) if event_dict.get("engineError") else None, ) return DagsterEvent( event_type_value=event_type.value, pipeline_name=pipeline_name, step_key=event_dict.get("stepKey"), solid_handle=SolidHandle.from_string(event_dict["solidHandleID"]) if event_dict.get("solidHandleID") else None, # at the time of writing this: # * 'COMPUTE` is the only step kind # * this code should get deleted in the near future as we move away from # dagster-graphql CLI as what we invoke in dask/k8s/etc. step_kind_value="COMPUTE" if event_dict.get("stepKey") else None, logging_tags=None, event_specific_data=event_specific_data, )
def _get_solid_handle_from_output( step_output_handle: StepOutputHandle) -> Optional[SolidHandle]: return SolidHandle.from_string(step_output_handle.step_key)
def _build_from_sorted_solids(self, solids, dependency_structure, parent_handle=None, parent_step_inputs=None): for solid in solids: handle = SolidHandle(solid.name, solid.definition.name, parent_handle) ### 1. INPUTS # Create and add execution plan steps for solid inputs step_inputs = [] for input_name, input_def in solid.definition.input_dict.items(): step_input = get_step_input( self, solid, input_name, input_def, dependency_structure, handle, parent_step_inputs, ) # If an input with runtime_type "Nothing" doesnt have a value # we don't create a StepInput if step_input is None: continue check.inst_param(step_input, 'step_input', StepInput) step_inputs.append(step_input) ### 2a. COMPUTE FUNCTION # Create and add execution plan step for the solid compute function if isinstance(solid.definition, SolidDefinition): solid_compute_step = create_compute_step( self.pipeline_name, self.environment_config, solid, step_inputs, handle) self.add_step(solid_compute_step) ### 2b. RECURSE # Recurse over the solids contained in an instance of CompositeSolidDefinition elif isinstance(solid.definition, CompositeSolidDefinition): self._build_from_sorted_solids( solids_in_topological_order(solid.definition), solid.definition.dependency_structure, parent_handle=handle, parent_step_inputs=step_inputs, ) else: check.invariant( False, 'Unexpected solid type {type} encountered during execution planning' .format(type=type(solid.definition)), ) ### 3. OUTPUTS # Create output handles for solid outputs for name, output_def in solid.definition.output_dict.items(): output_handle = solid.output_handle(name) # Punch through layers of composition scope to map to the output of the # actual compute step resolved_output_def, resolved_handle = solid.definition.resolve_output_to_origin( output_def.name, handle) compute_step = self.get_step_by_handle(resolved_handle) self.set_output_handle( output_handle, StepOutputHandle.from_step(compute_step, resolved_output_def.name), )
def test_multiline_logging_complex(): msg = 'DagsterEventType.STEP_FAILURE for step start.materialization.output.result.0' kwargs = { 'pipeline': 'example', 'pipeline_name': 'example', 'step_key': 'start.materialization.output.result.0', 'solid': 'start', 'solid_definition': 'emit_num', 'dagster_event': DagsterEvent( event_type_value='STEP_FAILURE', pipeline_name='error_monster', step_key='start.materialization.output.result.0', solid_handle=SolidHandle('start', 'emit_num', None), step_kind_value='MATERIALIZATION_THUNK', logging_tags={ 'pipeline': 'error_monster', 'step_key': 'start.materialization.output.result.0', 'solid': 'start', 'solid_definition': 'emit_num', }, event_specific_data=StepFailureData(error=SerializableErrorInfo( message= "FileNotFoundError: [Errno 2] No such file or directory: '/path/to/file'\n", stack=[ ' File "/Users/nate/src/dagster/python_modules/dagster/dagster/core/errors.py", line 186, in user_code_error_boundary\n yield\n', ' File "/Users/nate/src/dagster/python_modules/dagster/dagster/core/execution_plan/simple_engine.py", line 365, in _iterate_step_outputs_within_boundary\n for step_output in gen:\n', ' File "/Users/nate/src/dagster/python_modules/dagster/dagster/core/execution_plan/materialization_thunk.py", line 28, in _fn\n runtime_type.output_schema.materialize_runtime_value(config_spec, runtime_value)\n', ' File "/Users/nate/src/dagster/python_modules/dagster/dagster/core/types/config_schema.py", line 93, in materialize_runtime_value\n return func(config_value, runtime_value)\n', ' File "/Users/nate/src/dagster/python_modules/dagster/dagster/core/types/config_schema.py", line 110, in _selector\n return func(selector_key, selector_value, runtime_value)\n', ' File "/Users/nate/src/dagster/python_modules/dagster/dagster/core/types/builtin_config_schemas.py", line 59, in _builtin_output_schema\n with open(json_file_path, \'w\') as ff:\n', ], cls_name='FileNotFoundError', )), ), } with _setup_logger(DAGSTER_DEFAULT_LOGGER) as (captured_results, logger): dl = DagsterLogManager('123', {}, [logger]) dl.info(msg, **kwargs) kv_pairs = set(captured_results[0].split('\n')[1:]) expected_pairs = [ ' orig_message = "DagsterEventType.STEP_FAILURE for step start.materialization.output.result.0"', ' run_id = "123"', ' pipeline = "example"', ' solid_definition = "emit_num"', ' pipeline_name = "example"', ' solid = "start"', ' step_key = "start.materialization.output.result.0"', ] for e in expected_pairs: assert e in kv_pairs assert _regex_match_kv_pair( r' log_message_id = "{0}"'.format(REGEX_UUID), kv_pairs) assert _regex_match_kv_pair( r' log_timestamp = "{0}"'.format(REGEX_TS), kv_pairs) expected_dagster_event = { 'event_specific_data': [[ "FileNotFoundError: [Errno 2] No such file or directory: '/path/to/file'\n", [ ' File "/Users/nate/src/dagster/python_modules/dagster/dagster/core/errors.py", line 186, in user_code_error_boundary\n yield\n', ' File "/Users/nate/src/dagster/python_modules/dagster/dagster/core/execution_plan/simple_engine.py", line 365, in _iterate_step_outputs_within_boundary\n for step_output in gen:\n', ' File "/Users/nate/src/dagster/python_modules/dagster/dagster/core/execution_plan/materialization_thunk.py", line 28, in _fn\n runtime_type.output_schema.materialize_runtime_value(config_spec, runtime_value)\n', ' File "/Users/nate/src/dagster/python_modules/dagster/dagster/core/types/config_schema.py", line 93, in materialize_runtime_value\n return func(config_value, runtime_value)\n', ' File "/Users/nate/src/dagster/python_modules/dagster/dagster/core/types/config_schema.py", line 110, in _selector\n return func(selector_key, selector_value, runtime_value)\n', ' File "/Users/nate/src/dagster/python_modules/dagster/dagster/core/types/builtin_config_schemas.py", line 59, in _builtin_output_schema\n with open(json_file_path, \'w\') as ff:\n', ], 'FileNotFoundError', ]], 'event_type_value': 'STEP_FAILURE', 'pipeline_name': 'error_monster', 'solid_handle': ['start', 'emit_num', None], 'step_key': 'start.materialization.output.result.0', 'step_kind_value': 'MATERIALIZATION_THUNK', 'logging_tags': { 'pipeline': 'error_monster', 'solid': 'start', 'solid_definition': 'emit_num', 'step_key': 'start.materialization.output.result.0', }, } dagster_event = json.loads([ pair for pair in kv_pairs if 'dagster_event' in pair ][0].strip(' dagster_event = ')) assert dagster_event == expected_dagster_event
def dagster_event_from_dict(event_dict, pipeline_name): check.dict_param(event_dict, 'event_dict', key_type=str) check.str_param(pipeline_name, 'pipeline_name') # Get event_type event_type = _handled_events().get(event_dict['__typename']) if not event_type: raise Exception('unhandled event type %s' % event_dict['__typename']) # Get event_specific_data event_specific_data = None if event_type == DagsterEventType.STEP_OUTPUT: event_specific_data = StepOutputData( step_output_handle=StepOutputHandle(event_dict['step']['key'], event_dict['outputName']), type_check_data=TypeCheckData( success=event_dict['typeCheck']['success'], label=event_dict['typeCheck']['label'], description=event_dict.get('description'), metadata_entries=list( event_metadata_entries(event_dict.get('metadataEntries')) or []), ), ) elif event_type == DagsterEventType.STEP_INPUT: event_specific_data = StepInputData( input_name=event_dict['inputName'], type_check_data=TypeCheckData( success=event_dict['typeCheck']['success'], label=event_dict['typeCheck']['label'], description=event_dict.get('description'), metadata_entries=list( event_metadata_entries(event_dict.get('metadataEntries')) or []), ), ) elif event_type == DagsterEventType.STEP_SUCCESS: event_specific_data = StepSuccessData(0.0) elif event_type == DagsterEventType.STEP_MATERIALIZATION: materialization = event_dict['materialization'] event_specific_data = StepMaterializationData( materialization=materialization_from_data(materialization)) elif event_type == DagsterEventType.STEP_EXPECTATION_RESULT: expectation_result = expectation_result_from_data( event_dict['expectationResult']) event_specific_data = StepExpectationResultData(expectation_result) elif event_type == DagsterEventType.STEP_FAILURE: error_info = SerializableErrorInfo(event_dict['error']['message'], stack=None, cls_name=None) event_specific_data = StepFailureData( error_info, UserFailureData( label=event_dict['failureMetadata']['label'], description=event_dict['failureMetadata']['description'], metadata_entries=list( event_metadata_entries(event_dict.get('metadataEntries')) or []), ) if event_dict.get('failureMetadata') else None, ) # We should update the GraphQL response so that clients don't need to do this handle parsing. # See: https://github.com/dagster-io/dagster/issues/1559 keys = event_dict['step']['solidHandleID'].split('.') handle = None while keys: handle = SolidHandle(keys.pop(0), definition_name=None, parent=handle) return DagsterEvent( event_type_value=event_type.value, pipeline_name=pipeline_name, step_key=event_dict['step']['key'], solid_handle=handle, step_kind_value=event_dict['step']['kind'], logging_tags=None, event_specific_data=event_specific_data, )