def test_dict_roundtrip(): handle = NodeHandle("baz", NodeHandle("bar", NodeHandle("foo", None))) assert NodeHandle.from_dict(json.loads(json.dumps( handle._asdict()))) == handle handle = NodeHandle("foo", None) assert NodeHandle.from_dict(json.loads(json.dumps( handle._asdict()))) == handle
def test_in_pipeline_manager_solid_config(): with in_pipeline_manager() as manager: assert manager.context.solid_config is None with in_pipeline_manager( pipeline_name="hello_world_config_pipeline", solid_handle=NodeHandle("hello_world_config", None), executable_dict=ReconstructablePipeline.for_module( "dagstermill.examples.repository", "hello_world_config_pipeline", ).to_dict(), step_key="hello_world_config", ) as manager: assert manager.context.solid_config == {"greeting": "hello"} with in_pipeline_manager( pipeline_name="hello_world_config_pipeline", solid_handle=NodeHandle("hello_world_config", None), run_config={ "solids": { "hello_world_config": {"config": {"greeting": "bonjour"}}, "goodbye_config": {"config": {"farewell": "goodbye"}}, } }, executable_dict=ReconstructablePipeline.for_module( "dagstermill.examples.repository", "hello_world_config_pipeline", ).to_dict(), step_key="hello_world_config", ) as manager: assert manager.context.solid_config == {"greeting": "bonjour"} with in_pipeline_manager( pipeline_name="hello_world_config_pipeline", solid_handle=NodeHandle("goodbye_config", None), run_config={ "solids": { "hello_world_config": { "config": {"greeting": "bonjour"}, }, "goodbye_config": {"config": {"farewell": "goodbye"}}, } }, executable_dict=ReconstructablePipeline.for_module( "dagstermill.examples.repository", "hello_world_config_pipeline", ).to_dict(), step_key="goodbye_config", ) as manager: assert manager.context.solid_config == {"farewell": "goodbye"}
def parse_from_key( string: str, ) -> Union["StepHandle", "ResolvedFromDynamicStepHandle", "UnresolvedStepHandle"]: unresolved_match = re.match(r"(.*)\[\?\]", string) if unresolved_match: return UnresolvedStepHandle( NodeHandle.from_string(unresolved_match.group(1))) resolved_match = re.match(r"(.*)\[(.*)\]", string) if resolved_match: return ResolvedFromDynamicStepHandle( NodeHandle.from_string(resolved_match.group(1)), resolved_match.group(2)) return StepHandle(NodeHandle.from_string(string))
def test_in_pipeline_manager_bad_solid(): with pytest.raises( check.CheckError, match=("hello_world_pipeline has no solid named foobar"), ): with in_pipeline_manager(solid_handle=NodeHandle("foobar", None)) as _manager: pass
def test_construct_log_string_for_event(): step_output_event = DagsterEvent( event_type_value="STEP_OUTPUT", pipeline_name="my_pipeline", step_key="solid2", solid_handle=NodeHandle("solid2", None), step_kind_value="COMPUTE", logging_tags={}, event_specific_data=StepOutputData( step_output_handle=StepOutputHandle("solid2", "result")), message= 'Yielded output "result" of type "Any" for step "solid2". (Type check passed).', pid=54348, ) logging_metadata = DagsterLoggingMetadata( run_id="f79a8a93-27f1-41b5-b465-b35d0809b26d", pipeline_name="my_pipeline") dagster_message_props = DagsterMessageProps( orig_message=step_output_event.message, dagster_event=step_output_event, ) assert ( construct_log_string(logging_metadata=logging_metadata, message_props=dagster_message_props) == 'my_pipeline - f79a8a93-27f1-41b5-b465-b35d0809b26d - 54348 - STEP_OUTPUT - Yielded output "result" of type "Any" for step "solid2". (Type check passed).' )
def _event_record(run_id, solid_name, timestamp, event_type, event_specific_data=None): pipeline_name = "pipeline_name" solid_handle = NodeHandle(solid_name, None) step_handle = StepHandle(solid_handle) return EventLogEntry( None, "", "debug", "", run_id, timestamp, step_key=step_handle.to_key(), pipeline_name=pipeline_name, dagster_event=DagsterEvent( event_type.value, pipeline_name, solid_handle=solid_handle, step_handle=step_handle, event_specific_data=event_specific_data, ), )
def __new__(cls, solid_handle: NodeHandle, key: Optional[str] = None): return super(StepHandle, cls).__new__( cls, solid_handle=check.inst_param(solid_handle, "solid_handle", NodeHandle), # mypy can't tell that if default is set, this is guaranteed to be a str key=cast( str, check.opt_str_param(key, "key", default=solid_handle.to_string())), )
def test_handle_path(): handle = NodeHandle("baz", NodeHandle("bar", NodeHandle("foo", None))) assert handle.path == ["foo", "bar", "baz"] assert NodeHandle.from_path(handle.path) == handle handle = NodeHandle("foo", None) assert handle.path == ["foo"] assert NodeHandle.from_path(handle.path) == handle
def test_yield_unserializable_result(): manager = Manager() assert manager.yield_result(threading.Lock()) with in_pipeline_manager( pipeline_name="hello_world_output_pipeline", solid_handle=NodeHandle("hello_world_output", None), executable_dict=ReconstructablePipeline.for_module( "dagstermill.examples.repository", "hello_world_output_pipeline", ).to_dict(), ) as manager: with pytest.raises(TypeError): manager.yield_result(threading.Lock())
def test_solid_handle_node_handle(): # serialize in current code test_handle = NodeHandle("test", None) test_str = serialize_dagster_namedtuple(test_handle) # deserialize in "legacy" code legacy_env = WhitelistMap.create() @_whitelist_for_serdes(legacy_env) class SolidHandle(namedtuple("_SolidHandle", "name parent")): pass result = _deserialize_json(test_str, legacy_env) assert isinstance(result, SolidHandle) assert result.name == test_handle.name
def in_pipeline_manager( pipeline_name="hello_world_pipeline", solid_handle=NodeHandle("hello_world", None), step_key="hello_world", executable_dict=None, mode=None, **kwargs, ): manager = Manager() run_id = make_new_run_id() with instance_for_test() as instance: marshal_dir = tempfile.mkdtemp() if not executable_dict: executable_dict = ReconstructablePipeline.for_module( "dagstermill.examples.repository", "hello_world_pipeline").to_dict() pipeline_run_dict = pack_value( PipelineRun( pipeline_name=pipeline_name, run_id=run_id, mode=mode or "default", run_config=None, step_keys_to_execute=None, status=PipelineRunStatus.NOT_STARTED, )) try: with safe_tempfile_path() as output_log_file_path: context_dict = { "pipeline_run_dict": pipeline_run_dict, "solid_handle_kwargs": solid_handle._asdict(), "executable_dict": executable_dict, "marshal_dir": marshal_dir, "run_config": {}, "output_log_path": output_log_file_path, "instance_ref_dict": pack_value(instance.get_ref()), "step_key": step_key, } manager.reconstitute_pipeline_context( **dict(context_dict, **kwargs)) yield manager finally: shutil.rmtree(marshal_dir)
def test_in_pipeline_manager_with_resources(): with tempfile.NamedTemporaryFile() as fd: path = fd.name try: with in_pipeline_manager( pipeline_name="resource_pipeline", executable_dict=ReconstructablePipeline.for_module( "dagstermill.examples.repository", "resource_pipeline", ).to_dict(), solid_handle=NodeHandle("hello_world_resource", None), run_config={"resources": { "list": { "config": path } }}, mode="prod", step_key="hello_world_resource", ) as manager: assert "list" in manager.context.resources._asdict() with open(path, "rb") as fd: messages = pickle.load(fd) messages = [message.split(": ") for message in messages] assert len(messages) == 1 assert messages[0][1] == "Opened" manager.teardown_resources() with open(path, "rb") as fd: messages = pickle.load(fd) messages = [message.split(": ") for message in messages] assert len(messages) == 2 assert messages[1][1] == "Closed" finally: if os.path.exists(path): os.unlink(path)
def make_log_string(error, error_source=None): step_failure_event = DagsterEvent( event_type_value="STEP_FAILURE", pipeline_name="my_pipeline", step_key="solid2", solid_handle=NodeHandle("solid2", None), step_kind_value="COMPUTE", logging_tags={}, event_specific_data=StepFailureData(error=error, user_failure_data=None, error_source=error_source), message='Execution of step "solid2" failed.', pid=54348, ) logging_metadata = DagsterLoggingMetadata( run_id="f79a8a93-27f1-41b5-b465-b35d0809b26d", pipeline_name="my_pipeline") dagster_message_props = DagsterMessageProps( orig_message=step_failure_event.message, dagster_event=step_failure_event, ) return construct_log_string(logging_metadata, dagster_message_props)
def make_log_string(error, error_source=None): step_failure_event = DagsterEvent( event_type_value="STEP_FAILURE", pipeline_name="my_pipeline", step_key="solid2", solid_handle=NodeHandle("solid2", None), step_kind_value="COMPUTE", logging_tags={}, event_specific_data=StepFailureData( error=error, user_failure_data=None, error_source=error_source ), message='Execution of step "solid2" failed.', pid=54348, ) message_props = {"dagster_event": step_failure_event, "pipeline_name": "my_pipeline"} synth_props = { "orig_message": step_failure_event.message, "run_id": "f79a8a93-27f1-41b5-b465-b35d0809b26d", } return construct_log_string( message_props=message_props, logging_tags={}, synth_props=synth_props )
def test_rehydrate_solid_handle(): h = NodeHandle.from_dict({"name": "foo", "parent": None}) assert h.name == "foo" assert h.parent is None h = NodeHandle.from_dict(json.loads(json.dumps(h._asdict()))) assert h.name == "foo" assert h.parent is None h = NodeHandle.from_dict({"name": "foo", "parent": ["bar", None]}) assert h.name == "foo" assert isinstance(h.parent, NodeHandle) assert h.parent.name == "bar" assert h.parent.parent is None h = NodeHandle.from_dict(json.loads(json.dumps(h._asdict()))) assert h.name == "foo" assert isinstance(h.parent, NodeHandle) assert h.parent.name == "bar" assert h.parent.parent is None h = NodeHandle.from_dict({"name": "foo", "parent": ["bar", ["baz", None]]}) assert h.name == "foo" assert isinstance(h.parent, NodeHandle) assert h.parent.name == "bar" assert isinstance(h.parent.parent, NodeHandle) assert h.parent.parent.name == "baz" assert h.parent.parent.parent is None h = NodeHandle.from_dict(json.loads(json.dumps(h._asdict()))) assert h.name == "foo" assert isinstance(h.parent, NodeHandle) assert h.parent.name == "bar" assert isinstance(h.parent.parent, NodeHandle) assert h.parent.parent.name == "baz" assert h.parent.parent.parent is None
def reconstitute_pipeline_context( self, output_log_path=None, marshal_dir=None, run_config=None, executable_dict=None, pipeline_run_dict=None, solid_handle_kwargs=None, instance_ref_dict=None, step_key=None, ): """Reconstitutes a context for dagstermill-managed execution. You'll see this function called to reconstruct a pipeline context within the ``injected parameters`` cell of a dagstermill output notebook. Users should not call this function interactively except when debugging output notebooks. Use :func:`dagstermill.get_context` in the ``parameters`` cell of your notebook to define a context for interactive exploration and development. This call will be replaced by one to :func:`dagstermill.reconstitute_pipeline_context` when the notebook is executed by dagstermill. """ check.opt_str_param(output_log_path, "output_log_path") check.opt_str_param(marshal_dir, "marshal_dir") run_config = check.opt_dict_param(run_config, "run_config", key_type=str) check.dict_param(pipeline_run_dict, "pipeline_run_dict") check.dict_param(executable_dict, "executable_dict") check.dict_param(solid_handle_kwargs, "solid_handle_kwargs") check.dict_param(instance_ref_dict, "instance_ref_dict") check.str_param(step_key, "step_key") pipeline = ReconstructablePipeline.from_dict(executable_dict) pipeline_def = pipeline.get_definition() try: instance_ref = unpack_value(instance_ref_dict) instance = DagsterInstance.from_ref(instance_ref) except Exception as err: raise DagstermillError( "Error when attempting to resolve DagsterInstance from serialized InstanceRef" ) from err pipeline_run = unpack_value(pipeline_run_dict) solid_handle = NodeHandle.from_dict(solid_handle_kwargs) solid = pipeline_def.get_solid(solid_handle) solid_def = solid.definition self.marshal_dir = marshal_dir self.in_pipeline = True self.solid_def = solid_def self.pipeline = pipeline resolved_run_config = ResolvedRunConfig.build(pipeline_def, run_config, mode=pipeline_run.mode) execution_plan = ExecutionPlan.build( self.pipeline, resolved_run_config, step_keys_to_execute=pipeline_run.step_keys_to_execute, ) with scoped_pipeline_context( execution_plan, pipeline, run_config, pipeline_run, instance, scoped_resources_builder_cm=self._setup_resources, # Set this flag even though we're not in test for clearer error reporting raise_on_error=True, ) as pipeline_context: self.context = DagstermillRuntimeExecutionContext( pipeline_context=pipeline_context, pipeline_def=pipeline_def, solid_config=run_config.get("solids", {}).get(solid.name, {}).get("config"), resource_keys_to_init=get_required_resource_keys_to_init( execution_plan, pipeline_def, resolved_run_config, ), solid_name=solid.name, solid_handle=solid_handle, step_context=pipeline_context.for_step( execution_plan.get_step_by_key(step_key)), ) return self.context
def get_context(self, solid_config=None, mode_def=None, run_config=None): """Get a dagstermill execution context for interactive exploration and development. Args: solid_config (Optional[Any]): If specified, this value will be made available on the context as its ``solid_config`` property. mode_def (Optional[:class:`dagster.ModeDefinition`]): If specified, defines the mode to use to construct the context. Specify this if you would like a context constructed with specific ``resource_defs`` or ``logger_defs``. By default, an ephemeral mode with a console logger will be constructed. run_config(Optional[dict]): The config dict with which to construct the context. Returns: :py:class:`~dagstermill.DagstermillExecutionContext` """ check.opt_inst_param(mode_def, "mode_def", ModeDefinition) run_config = check.opt_dict_param(run_config, "run_config", key_type=str) # If we are running non-interactively, and there is already a context reconstituted, return # that context rather than overwriting it. if self.context is not None and isinstance( self.context, DagstermillRuntimeExecutionContext): return self.context if not mode_def: mode_def = ModeDefinition( logger_defs={"dagstermill": colored_console_logger}) run_config["loggers"] = {"dagstermill": {}} solid_def = SolidDefinition( name="this_solid", input_defs=[], compute_fn=lambda *args, **kwargs: None, output_defs=[], description= "Ephemeral solid constructed by dagstermill.get_context()", required_resource_keys=mode_def.resource_key_set, ) pipeline_def = PipelineDefinition( [solid_def], mode_defs=[mode_def], name="ephemeral_dagstermill_pipeline") run_id = make_new_run_id() # construct stubbed PipelineRun for notebook exploration... # The actual pipeline run during pipeline execution will be serialized and reconstituted # in the `reconstitute_pipeline_context` call pipeline_run = DagsterRun( pipeline_name=pipeline_def.name, run_id=run_id, run_config=run_config, mode=mode_def.name, step_keys_to_execute=None, status=PipelineRunStatus.NOT_STARTED, tags=None, ) self.in_pipeline = False self.solid_def = solid_def self.pipeline = pipeline_def resolved_run_config = ResolvedRunConfig.build(pipeline_def, run_config, mode=mode_def.name) pipeline = InMemoryPipeline(pipeline_def) execution_plan = ExecutionPlan.build(pipeline, resolved_run_config) with scoped_pipeline_context( execution_plan, pipeline, run_config, pipeline_run, DagsterInstance.ephemeral(), scoped_resources_builder_cm=self._setup_resources, ) as pipeline_context: self.context = DagstermillExecutionContext( pipeline_context=pipeline_context, pipeline_def=pipeline_def, solid_config=solid_config, resource_keys_to_init=get_required_resource_keys_to_init( execution_plan, pipeline_def, resolved_run_config, ), solid_name=solid_def.name, solid_handle=NodeHandle(solid_def.name, parent=None), ) return self.context
def test_handle_to_from_string(): handle = NodeHandle("baz", NodeHandle("bar", NodeHandle("foo", None))) assert handle.to_string() == "foo.bar.baz" assert NodeHandle.from_string(handle.to_string()) == handle handle = NodeHandle("foo", None) assert handle.to_string() == "foo" assert NodeHandle.from_string(handle.to_string()) == handle
def test_is_or_descends_from(): ancestor = NodeHandle( "baz", NodeHandle("bar", NodeHandle("foo", NodeHandle("quux", None)))) descendant = NodeHandle("baz", NodeHandle("bar", NodeHandle("foo", None))) assert not descendant.is_or_descends_from(ancestor) ancestor = NodeHandle("baz", NodeHandle("bar", NodeHandle("foo", None))) descendant = NodeHandle("baz", NodeHandle("bar", NodeHandle("foo", None))) assert descendant.is_or_descends_from(ancestor) ancestor = NodeHandle("bar", NodeHandle("foo", None)) descendant = NodeHandle("baz", NodeHandle("bar", NodeHandle("foo", None))) assert descendant.is_or_descends_from(ancestor) ancestor = NodeHandle("foo", None) descendant = NodeHandle("baz", NodeHandle("bar", NodeHandle("foo", None))) assert descendant.is_or_descends_from(ancestor) ancestor = NodeHandle("foo", None) descendant = NodeHandle("foo", None) assert descendant.is_or_descends_from(ancestor) ancestor = NodeHandle("foo", None) descendant = NodeHandle("bar", None) assert not descendant.is_or_descends_from(ancestor) ancestor = NodeHandle("baz", NodeHandle("bar", NodeHandle("foo", None))) descendant = NodeHandle("baz", None) assert not descendant.is_or_descends_from(ancestor) ancestor = NodeHandle("baz", NodeHandle("bar", NodeHandle("foo", None))) descendant = NodeHandle("baz", NodeHandle("bar", None)) assert not descendant.is_or_descends_from(ancestor)
def test_with_ancestor(): handle = NodeHandle("baz", NodeHandle("bar", NodeHandle("foo", None))) assert handle.with_ancestor(None) == handle assert handle.with_ancestor(NodeHandle("quux", None)) == NodeHandle( "baz", NodeHandle("bar", NodeHandle("foo", NodeHandle("quux", None))))
def test_pop(): handle = NodeHandle("baz", NodeHandle("bar", NodeHandle("foo", None))) assert handle.pop(NodeHandle("foo", None)) == NodeHandle("baz", NodeHandle("bar", None)) assert handle.pop(NodeHandle("bar", NodeHandle("foo", None))) == NodeHandle("baz", None) with pytest.raises(CheckError, match="does not descend from"): handle = NodeHandle("baz", NodeHandle("bar", NodeHandle("foo", None))) handle.pop(NodeHandle("quux", None))
def get_subselected_graph_definition( graph: GraphDefinition, resolved_op_selection_dict: Dict, parent_handle: Optional[NodeHandle] = None, ) -> SubselectedGraphDefinition: deps: Dict[ Union[str, NodeInvocation], Dict[str, IDependencyDefinition], ] = {} selected_nodes: List[Tuple[str, NodeDefinition]] = [] for node in graph.solids_in_topological_order: node_handle = NodeHandle(node.name, parent=parent_handle) # skip if the node isn't selected if node.name not in resolved_op_selection_dict: continue # rebuild graph if any nodes inside the graph are selected if node.is_graph and resolved_op_selection_dict[node.name] is not LeafNodeSelection: definition = get_subselected_graph_definition( node.definition, resolved_op_selection_dict[node.name], parent_handle=node_handle, ) # use definition if the node as a whole is selected. this includes selecting the entire graph else: definition = node.definition selected_nodes.append((node.name, definition)) # build dependencies for the node. we do it for both cases because nested graphs can have # inputs and outputs too deps[_dep_key_of(node)] = {} for input_handle in node.input_handles(): if graph.dependency_structure.has_direct_dep(input_handle): output_handle = graph.dependency_structure.get_direct_dep(input_handle) if output_handle.solid.name in resolved_op_selection_dict: deps[_dep_key_of(node)][input_handle.input_def.name] = DependencyDefinition( solid=output_handle.solid.name, output=output_handle.output_def.name ) elif graph.dependency_structure.has_dynamic_fan_in_dep(input_handle): output_handle = graph.dependency_structure.get_dynamic_fan_in_dep(input_handle) if output_handle.solid.name in resolved_op_selection_dict: deps[_dep_key_of(node)][ input_handle.input_def.name ] = DynamicCollectDependencyDefinition( solid_name=output_handle.solid.name, output_name=output_handle.output_def.name, ) elif graph.dependency_structure.has_fan_in_deps(input_handle): output_handles = graph.dependency_structure.get_fan_in_deps(input_handle) multi_dependencies = [ DependencyDefinition( solid=output_handle.solid.name, output=output_handle.output_def.name ) for output_handle in output_handles if ( isinstance(output_handle, SolidOutputHandle) and output_handle.solid.name in resolved_op_selection_dict ) ] deps[_dep_key_of(node)][input_handle.input_def.name] = MultiDependencyDefinition( cast( List[Union[DependencyDefinition, Type[MappedInputPlaceholder]]], multi_dependencies, ) ) # else input is unconnected # filter out unselected input/output mapping new_input_mappings = list( filter( lambda input_mapping: input_mapping.maps_to.solid_name in [name for name, _ in selected_nodes], graph._input_mappings, # pylint: disable=protected-access ) ) new_output_mappings = list( filter( lambda output_mapping: output_mapping.maps_from.solid_name in [name for name, _ in selected_nodes], graph._output_mappings, # pylint: disable=protected-access ) ) try: return SubselectedGraphDefinition( parent_graph_def=graph, dependencies=deps, node_defs=[definition for _, definition in selected_nodes], input_mappings=new_input_mappings, output_mappings=new_output_mappings, ) except DagsterInvalidDefinitionError as exc: # This handles the case when you construct a subset such that an unsatisfied # input cannot be loaded from config. Instead of throwing a DagsterInvalidDefinitionError, # we re-raise a DagsterInvalidSubsetError. raise DagsterInvalidSubsetError( f"The attempted subset {str_format_set(resolved_op_selection_dict)} for graph " f"{graph.name} results in an invalid graph." ) from exc
def descend(self, solid): return self._replace(handle=NodeHandle(solid.name, parent=self.handle))
def core_execute_in_process( node: NodeDefinition, run_config: Dict[str, Any], ephemeral_pipeline: PipelineDefinition, instance: Optional[DagsterInstance], output_capturing_enabled: bool, ): pipeline_def = ephemeral_pipeline mode_def = pipeline_def.get_mode_definition() pipeline = InMemoryPipeline(pipeline_def) execution_plan = create_execution_plan(pipeline, run_config=run_config, mode=mode_def.name) recorder: Dict[StepOutputHandle, Any] = {} with ephemeral_instance_if_missing(instance) as execute_instance: pipeline_run = execute_instance.create_run_for_pipeline( pipeline_def=pipeline_def, run_config=run_config, mode=mode_def.name, ) _execute_run_iterable = ExecuteRunWithPlanIterable( execution_plan=execution_plan, iterator=pipeline_execution_iterator, execution_context_manager=PlanOrchestrationContextManager( context_event_generator=orchestration_context_event_generator, pipeline=pipeline, execution_plan=execution_plan, pipeline_run=pipeline_run, instance=execute_instance, run_config=run_config, executor_defs=None, output_capture=recorder if output_capturing_enabled else None, ), ) event_list = list(_execute_run_iterable) top_level_node_handle = NodeHandle.from_string(node.name) if isinstance(node, GraphDefinition) and node == ephemeral_pipeline.graph: event_list_for_top_lvl_node = event_list handle = None return InProcessGraphResult(node, handle, event_list_for_top_lvl_node, recorder) else: event_list_for_top_lvl_node = [ event for event in event_list if event.solid_handle and event.solid_handle.is_or_descends_from(top_level_node_handle) ] handle = NodeHandle(node.name, None) if isinstance(node, SolidDefinition): return InProcessSolidResult(node, handle, event_list_for_top_lvl_node, recorder) elif isinstance(node, GraphDefinition): return InProcessGraphResult(node, handle, event_list_for_top_lvl_node, recorder) check.failed(f"Unexpected node type {node}")