Ejemplo n.º 1
0
def test_dict_roundtrip():
    handle = NodeHandle("baz", NodeHandle("bar", NodeHandle("foo", None)))
    assert NodeHandle.from_dict(json.loads(json.dumps(
        handle._asdict()))) == handle

    handle = NodeHandle("foo", None)
    assert NodeHandle.from_dict(json.loads(json.dumps(
        handle._asdict()))) == handle
Ejemplo n.º 2
0
def test_in_pipeline_manager_solid_config():
    with in_pipeline_manager() as manager:
        assert manager.context.solid_config is None

    with in_pipeline_manager(
        pipeline_name="hello_world_config_pipeline",
        solid_handle=NodeHandle("hello_world_config", None),
        executable_dict=ReconstructablePipeline.for_module(
            "dagstermill.examples.repository",
            "hello_world_config_pipeline",
        ).to_dict(),
        step_key="hello_world_config",
    ) as manager:
        assert manager.context.solid_config == {"greeting": "hello"}

    with in_pipeline_manager(
        pipeline_name="hello_world_config_pipeline",
        solid_handle=NodeHandle("hello_world_config", None),
        run_config={
            "solids": {
                "hello_world_config": {"config": {"greeting": "bonjour"}},
                "goodbye_config": {"config": {"farewell": "goodbye"}},
            }
        },
        executable_dict=ReconstructablePipeline.for_module(
            "dagstermill.examples.repository",
            "hello_world_config_pipeline",
        ).to_dict(),
        step_key="hello_world_config",
    ) as manager:
        assert manager.context.solid_config == {"greeting": "bonjour"}

    with in_pipeline_manager(
        pipeline_name="hello_world_config_pipeline",
        solid_handle=NodeHandle("goodbye_config", None),
        run_config={
            "solids": {
                "hello_world_config": {
                    "config": {"greeting": "bonjour"},
                },
                "goodbye_config": {"config": {"farewell": "goodbye"}},
            }
        },
        executable_dict=ReconstructablePipeline.for_module(
            "dagstermill.examples.repository",
            "hello_world_config_pipeline",
        ).to_dict(),
        step_key="goodbye_config",
    ) as manager:
        assert manager.context.solid_config == {"farewell": "goodbye"}
Ejemplo n.º 3
0
    def parse_from_key(
        string: str,
    ) -> Union["StepHandle", "ResolvedFromDynamicStepHandle",
               "UnresolvedStepHandle"]:
        unresolved_match = re.match(r"(.*)\[\?\]", string)
        if unresolved_match:
            return UnresolvedStepHandle(
                NodeHandle.from_string(unresolved_match.group(1)))

        resolved_match = re.match(r"(.*)\[(.*)\]", string)
        if resolved_match:
            return ResolvedFromDynamicStepHandle(
                NodeHandle.from_string(resolved_match.group(1)),
                resolved_match.group(2))

        return StepHandle(NodeHandle.from_string(string))
Ejemplo n.º 4
0
def test_in_pipeline_manager_bad_solid():
    with pytest.raises(
        check.CheckError,
        match=("hello_world_pipeline has no solid named foobar"),
    ):
        with in_pipeline_manager(solid_handle=NodeHandle("foobar", None)) as _manager:
            pass
Ejemplo n.º 5
0
def test_construct_log_string_for_event():
    step_output_event = DagsterEvent(
        event_type_value="STEP_OUTPUT",
        pipeline_name="my_pipeline",
        step_key="solid2",
        solid_handle=NodeHandle("solid2", None),
        step_kind_value="COMPUTE",
        logging_tags={},
        event_specific_data=StepOutputData(
            step_output_handle=StepOutputHandle("solid2", "result")),
        message=
        'Yielded output "result" of type "Any" for step "solid2". (Type check passed).',
        pid=54348,
    )

    logging_metadata = DagsterLoggingMetadata(
        run_id="f79a8a93-27f1-41b5-b465-b35d0809b26d",
        pipeline_name="my_pipeline")
    dagster_message_props = DagsterMessageProps(
        orig_message=step_output_event.message,
        dagster_event=step_output_event,
    )

    assert (
        construct_log_string(logging_metadata=logging_metadata,
                             message_props=dagster_message_props) ==
        'my_pipeline - f79a8a93-27f1-41b5-b465-b35d0809b26d - 54348 - STEP_OUTPUT - Yielded output "result" of type "Any" for step "solid2". (Type check passed).'
    )
Ejemplo n.º 6
0
def _event_record(run_id,
                  solid_name,
                  timestamp,
                  event_type,
                  event_specific_data=None):
    pipeline_name = "pipeline_name"
    solid_handle = NodeHandle(solid_name, None)
    step_handle = StepHandle(solid_handle)
    return EventLogEntry(
        None,
        "",
        "debug",
        "",
        run_id,
        timestamp,
        step_key=step_handle.to_key(),
        pipeline_name=pipeline_name,
        dagster_event=DagsterEvent(
            event_type.value,
            pipeline_name,
            solid_handle=solid_handle,
            step_handle=step_handle,
            event_specific_data=event_specific_data,
        ),
    )
Ejemplo n.º 7
0
 def __new__(cls, solid_handle: NodeHandle, key: Optional[str] = None):
     return super(StepHandle, cls).__new__(
         cls,
         solid_handle=check.inst_param(solid_handle, "solid_handle",
                                       NodeHandle),
         # mypy can't tell that if default is set, this is guaranteed to be a str
         key=cast(
             str,
             check.opt_str_param(key,
                                 "key",
                                 default=solid_handle.to_string())),
     )
Ejemplo n.º 8
0
def test_handle_path():
    handle = NodeHandle("baz", NodeHandle("bar", NodeHandle("foo", None)))
    assert handle.path == ["foo", "bar", "baz"]
    assert NodeHandle.from_path(handle.path) == handle

    handle = NodeHandle("foo", None)
    assert handle.path == ["foo"]
    assert NodeHandle.from_path(handle.path) == handle
Ejemplo n.º 9
0
def test_yield_unserializable_result():
    manager = Manager()
    assert manager.yield_result(threading.Lock())

    with in_pipeline_manager(
            pipeline_name="hello_world_output_pipeline",
            solid_handle=NodeHandle("hello_world_output", None),
            executable_dict=ReconstructablePipeline.for_module(
                "dagstermill.examples.repository",
                "hello_world_output_pipeline",
            ).to_dict(),
    ) as manager:
        with pytest.raises(TypeError):
            manager.yield_result(threading.Lock())
Ejemplo n.º 10
0
def test_solid_handle_node_handle():
    # serialize in current code
    test_handle = NodeHandle("test", None)
    test_str = serialize_dagster_namedtuple(test_handle)

    # deserialize in "legacy" code
    legacy_env = WhitelistMap.create()

    @_whitelist_for_serdes(legacy_env)
    class SolidHandle(namedtuple("_SolidHandle", "name parent")):
        pass

    result = _deserialize_json(test_str, legacy_env)
    assert isinstance(result, SolidHandle)
    assert result.name == test_handle.name
Ejemplo n.º 11
0
def in_pipeline_manager(
    pipeline_name="hello_world_pipeline",
    solid_handle=NodeHandle("hello_world", None),
    step_key="hello_world",
    executable_dict=None,
    mode=None,
    **kwargs,
):
    manager = Manager()

    run_id = make_new_run_id()
    with instance_for_test() as instance:
        marshal_dir = tempfile.mkdtemp()

        if not executable_dict:
            executable_dict = ReconstructablePipeline.for_module(
                "dagstermill.examples.repository",
                "hello_world_pipeline").to_dict()

        pipeline_run_dict = pack_value(
            PipelineRun(
                pipeline_name=pipeline_name,
                run_id=run_id,
                mode=mode or "default",
                run_config=None,
                step_keys_to_execute=None,
                status=PipelineRunStatus.NOT_STARTED,
            ))

        try:
            with safe_tempfile_path() as output_log_file_path:
                context_dict = {
                    "pipeline_run_dict": pipeline_run_dict,
                    "solid_handle_kwargs": solid_handle._asdict(),
                    "executable_dict": executable_dict,
                    "marshal_dir": marshal_dir,
                    "run_config": {},
                    "output_log_path": output_log_file_path,
                    "instance_ref_dict": pack_value(instance.get_ref()),
                    "step_key": step_key,
                }

                manager.reconstitute_pipeline_context(
                    **dict(context_dict, **kwargs))
                yield manager
        finally:
            shutil.rmtree(marshal_dir)
Ejemplo n.º 12
0
def test_in_pipeline_manager_with_resources():
    with tempfile.NamedTemporaryFile() as fd:
        path = fd.name

    try:
        with in_pipeline_manager(
                pipeline_name="resource_pipeline",
                executable_dict=ReconstructablePipeline.for_module(
                    "dagstermill.examples.repository",
                    "resource_pipeline",
                ).to_dict(),
                solid_handle=NodeHandle("hello_world_resource", None),
                run_config={"resources": {
                    "list": {
                        "config": path
                    }
                }},
                mode="prod",
                step_key="hello_world_resource",
        ) as manager:
            assert "list" in manager.context.resources._asdict()

            with open(path, "rb") as fd:
                messages = pickle.load(fd)

            messages = [message.split(": ") for message in messages]

            assert len(messages) == 1
            assert messages[0][1] == "Opened"

            manager.teardown_resources()

            with open(path, "rb") as fd:
                messages = pickle.load(fd)

            messages = [message.split(": ") for message in messages]

            assert len(messages) == 2
            assert messages[1][1] == "Closed"

    finally:
        if os.path.exists(path):
            os.unlink(path)
Ejemplo n.º 13
0
def make_log_string(error, error_source=None):
    step_failure_event = DagsterEvent(
        event_type_value="STEP_FAILURE",
        pipeline_name="my_pipeline",
        step_key="solid2",
        solid_handle=NodeHandle("solid2", None),
        step_kind_value="COMPUTE",
        logging_tags={},
        event_specific_data=StepFailureData(error=error,
                                            user_failure_data=None,
                                            error_source=error_source),
        message='Execution of step "solid2" failed.',
        pid=54348,
    )

    logging_metadata = DagsterLoggingMetadata(
        run_id="f79a8a93-27f1-41b5-b465-b35d0809b26d",
        pipeline_name="my_pipeline")
    dagster_message_props = DagsterMessageProps(
        orig_message=step_failure_event.message,
        dagster_event=step_failure_event,
    )
    return construct_log_string(logging_metadata, dagster_message_props)
Ejemplo n.º 14
0
def make_log_string(error, error_source=None):
    step_failure_event = DagsterEvent(
        event_type_value="STEP_FAILURE",
        pipeline_name="my_pipeline",
        step_key="solid2",
        solid_handle=NodeHandle("solid2", None),
        step_kind_value="COMPUTE",
        logging_tags={},
        event_specific_data=StepFailureData(
            error=error, user_failure_data=None, error_source=error_source
        ),
        message='Execution of step "solid2" failed.',
        pid=54348,
    )

    message_props = {"dagster_event": step_failure_event, "pipeline_name": "my_pipeline"}

    synth_props = {
        "orig_message": step_failure_event.message,
        "run_id": "f79a8a93-27f1-41b5-b465-b35d0809b26d",
    }
    return construct_log_string(
        message_props=message_props, logging_tags={}, synth_props=synth_props
    )
Ejemplo n.º 15
0
def test_rehydrate_solid_handle():
    h = NodeHandle.from_dict({"name": "foo", "parent": None})
    assert h.name == "foo"
    assert h.parent is None

    h = NodeHandle.from_dict(json.loads(json.dumps(h._asdict())))
    assert h.name == "foo"
    assert h.parent is None

    h = NodeHandle.from_dict({"name": "foo", "parent": ["bar", None]})
    assert h.name == "foo"
    assert isinstance(h.parent, NodeHandle)
    assert h.parent.name == "bar"
    assert h.parent.parent is None

    h = NodeHandle.from_dict(json.loads(json.dumps(h._asdict())))
    assert h.name == "foo"
    assert isinstance(h.parent, NodeHandle)
    assert h.parent.name == "bar"
    assert h.parent.parent is None

    h = NodeHandle.from_dict({"name": "foo", "parent": ["bar", ["baz", None]]})
    assert h.name == "foo"
    assert isinstance(h.parent, NodeHandle)
    assert h.parent.name == "bar"
    assert isinstance(h.parent.parent, NodeHandle)
    assert h.parent.parent.name == "baz"
    assert h.parent.parent.parent is None

    h = NodeHandle.from_dict(json.loads(json.dumps(h._asdict())))
    assert h.name == "foo"
    assert isinstance(h.parent, NodeHandle)
    assert h.parent.name == "bar"
    assert isinstance(h.parent.parent, NodeHandle)
    assert h.parent.parent.name == "baz"
    assert h.parent.parent.parent is None
Ejemplo n.º 16
0
    def reconstitute_pipeline_context(
        self,
        output_log_path=None,
        marshal_dir=None,
        run_config=None,
        executable_dict=None,
        pipeline_run_dict=None,
        solid_handle_kwargs=None,
        instance_ref_dict=None,
        step_key=None,
    ):
        """Reconstitutes a context for dagstermill-managed execution.

        You'll see this function called to reconstruct a pipeline context within the ``injected
        parameters`` cell of a dagstermill output notebook. Users should not call this function
        interactively except when debugging output notebooks.

        Use :func:`dagstermill.get_context` in the ``parameters`` cell of your notebook to define a
        context for interactive exploration and development. This call will be replaced by one to
        :func:`dagstermill.reconstitute_pipeline_context` when the notebook is executed by
        dagstermill.
        """
        check.opt_str_param(output_log_path, "output_log_path")
        check.opt_str_param(marshal_dir, "marshal_dir")
        run_config = check.opt_dict_param(run_config,
                                          "run_config",
                                          key_type=str)
        check.dict_param(pipeline_run_dict, "pipeline_run_dict")
        check.dict_param(executable_dict, "executable_dict")
        check.dict_param(solid_handle_kwargs, "solid_handle_kwargs")
        check.dict_param(instance_ref_dict, "instance_ref_dict")
        check.str_param(step_key, "step_key")

        pipeline = ReconstructablePipeline.from_dict(executable_dict)
        pipeline_def = pipeline.get_definition()

        try:
            instance_ref = unpack_value(instance_ref_dict)
            instance = DagsterInstance.from_ref(instance_ref)
        except Exception as err:
            raise DagstermillError(
                "Error when attempting to resolve DagsterInstance from serialized InstanceRef"
            ) from err

        pipeline_run = unpack_value(pipeline_run_dict)

        solid_handle = NodeHandle.from_dict(solid_handle_kwargs)
        solid = pipeline_def.get_solid(solid_handle)
        solid_def = solid.definition

        self.marshal_dir = marshal_dir
        self.in_pipeline = True
        self.solid_def = solid_def
        self.pipeline = pipeline

        resolved_run_config = ResolvedRunConfig.build(pipeline_def,
                                                      run_config,
                                                      mode=pipeline_run.mode)

        execution_plan = ExecutionPlan.build(
            self.pipeline,
            resolved_run_config,
            step_keys_to_execute=pipeline_run.step_keys_to_execute,
        )

        with scoped_pipeline_context(
                execution_plan,
                pipeline,
                run_config,
                pipeline_run,
                instance,
                scoped_resources_builder_cm=self._setup_resources,
                # Set this flag even though we're not in test for clearer error reporting
                raise_on_error=True,
        ) as pipeline_context:
            self.context = DagstermillRuntimeExecutionContext(
                pipeline_context=pipeline_context,
                pipeline_def=pipeline_def,
                solid_config=run_config.get("solids",
                                            {}).get(solid.name,
                                                    {}).get("config"),
                resource_keys_to_init=get_required_resource_keys_to_init(
                    execution_plan,
                    pipeline_def,
                    resolved_run_config,
                ),
                solid_name=solid.name,
                solid_handle=solid_handle,
                step_context=pipeline_context.for_step(
                    execution_plan.get_step_by_key(step_key)),
            )

        return self.context
Ejemplo n.º 17
0
    def get_context(self, solid_config=None, mode_def=None, run_config=None):
        """Get a dagstermill execution context for interactive exploration and development.

        Args:
            solid_config (Optional[Any]): If specified, this value will be made available on the
                context as its ``solid_config`` property.
            mode_def (Optional[:class:`dagster.ModeDefinition`]): If specified, defines the mode to
                use to construct the context. Specify this if you would like a context constructed
                with specific ``resource_defs`` or ``logger_defs``. By default, an ephemeral mode
                with a console logger will be constructed.
            run_config(Optional[dict]): The config dict with which to construct
                the context.

        Returns:
            :py:class:`~dagstermill.DagstermillExecutionContext`
        """
        check.opt_inst_param(mode_def, "mode_def", ModeDefinition)
        run_config = check.opt_dict_param(run_config,
                                          "run_config",
                                          key_type=str)

        # If we are running non-interactively, and there is already a context reconstituted, return
        # that context rather than overwriting it.
        if self.context is not None and isinstance(
                self.context, DagstermillRuntimeExecutionContext):
            return self.context

        if not mode_def:
            mode_def = ModeDefinition(
                logger_defs={"dagstermill": colored_console_logger})
            run_config["loggers"] = {"dagstermill": {}}

        solid_def = SolidDefinition(
            name="this_solid",
            input_defs=[],
            compute_fn=lambda *args, **kwargs: None,
            output_defs=[],
            description=
            "Ephemeral solid constructed by dagstermill.get_context()",
            required_resource_keys=mode_def.resource_key_set,
        )

        pipeline_def = PipelineDefinition(
            [solid_def],
            mode_defs=[mode_def],
            name="ephemeral_dagstermill_pipeline")

        run_id = make_new_run_id()

        # construct stubbed PipelineRun for notebook exploration...
        # The actual pipeline run during pipeline execution will be serialized and reconstituted
        # in the `reconstitute_pipeline_context` call
        pipeline_run = DagsterRun(
            pipeline_name=pipeline_def.name,
            run_id=run_id,
            run_config=run_config,
            mode=mode_def.name,
            step_keys_to_execute=None,
            status=PipelineRunStatus.NOT_STARTED,
            tags=None,
        )

        self.in_pipeline = False
        self.solid_def = solid_def
        self.pipeline = pipeline_def

        resolved_run_config = ResolvedRunConfig.build(pipeline_def,
                                                      run_config,
                                                      mode=mode_def.name)

        pipeline = InMemoryPipeline(pipeline_def)
        execution_plan = ExecutionPlan.build(pipeline, resolved_run_config)

        with scoped_pipeline_context(
                execution_plan,
                pipeline,
                run_config,
                pipeline_run,
                DagsterInstance.ephemeral(),
                scoped_resources_builder_cm=self._setup_resources,
        ) as pipeline_context:

            self.context = DagstermillExecutionContext(
                pipeline_context=pipeline_context,
                pipeline_def=pipeline_def,
                solid_config=solid_config,
                resource_keys_to_init=get_required_resource_keys_to_init(
                    execution_plan,
                    pipeline_def,
                    resolved_run_config,
                ),
                solid_name=solid_def.name,
                solid_handle=NodeHandle(solid_def.name, parent=None),
            )

        return self.context
Ejemplo n.º 18
0
def test_handle_to_from_string():
    handle = NodeHandle("baz", NodeHandle("bar", NodeHandle("foo", None)))
    assert handle.to_string() == "foo.bar.baz"
    assert NodeHandle.from_string(handle.to_string()) == handle

    handle = NodeHandle("foo", None)
    assert handle.to_string() == "foo"
    assert NodeHandle.from_string(handle.to_string()) == handle
Ejemplo n.º 19
0
def test_is_or_descends_from():
    ancestor = NodeHandle(
        "baz", NodeHandle("bar", NodeHandle("foo", NodeHandle("quux", None))))
    descendant = NodeHandle("baz", NodeHandle("bar", NodeHandle("foo", None)))
    assert not descendant.is_or_descends_from(ancestor)

    ancestor = NodeHandle("baz", NodeHandle("bar", NodeHandle("foo", None)))
    descendant = NodeHandle("baz", NodeHandle("bar", NodeHandle("foo", None)))
    assert descendant.is_or_descends_from(ancestor)

    ancestor = NodeHandle("bar", NodeHandle("foo", None))
    descendant = NodeHandle("baz", NodeHandle("bar", NodeHandle("foo", None)))
    assert descendant.is_or_descends_from(ancestor)

    ancestor = NodeHandle("foo", None)
    descendant = NodeHandle("baz", NodeHandle("bar", NodeHandle("foo", None)))
    assert descendant.is_or_descends_from(ancestor)

    ancestor = NodeHandle("foo", None)
    descendant = NodeHandle("foo", None)
    assert descendant.is_or_descends_from(ancestor)

    ancestor = NodeHandle("foo", None)
    descendant = NodeHandle("bar", None)
    assert not descendant.is_or_descends_from(ancestor)

    ancestor = NodeHandle("baz", NodeHandle("bar", NodeHandle("foo", None)))
    descendant = NodeHandle("baz", None)
    assert not descendant.is_or_descends_from(ancestor)

    ancestor = NodeHandle("baz", NodeHandle("bar", NodeHandle("foo", None)))
    descendant = NodeHandle("baz", NodeHandle("bar", None))
    assert not descendant.is_or_descends_from(ancestor)
Ejemplo n.º 20
0
def test_with_ancestor():
    handle = NodeHandle("baz", NodeHandle("bar", NodeHandle("foo", None)))
    assert handle.with_ancestor(None) == handle
    assert handle.with_ancestor(NodeHandle("quux", None)) == NodeHandle(
        "baz", NodeHandle("bar", NodeHandle("foo", NodeHandle("quux", None))))
Ejemplo n.º 21
0
def test_pop():
    handle = NodeHandle("baz", NodeHandle("bar", NodeHandle("foo", None)))
    assert handle.pop(NodeHandle("foo",
                                 None)) == NodeHandle("baz",
                                                      NodeHandle("bar", None))
    assert handle.pop(NodeHandle("bar",
                                 NodeHandle("foo",
                                            None))) == NodeHandle("baz", None)

    with pytest.raises(CheckError, match="does not descend from"):
        handle = NodeHandle("baz", NodeHandle("bar", NodeHandle("foo", None)))
        handle.pop(NodeHandle("quux", None))
Ejemplo n.º 22
0
def get_subselected_graph_definition(
    graph: GraphDefinition,
    resolved_op_selection_dict: Dict,
    parent_handle: Optional[NodeHandle] = None,
) -> SubselectedGraphDefinition:
    deps: Dict[
        Union[str, NodeInvocation],
        Dict[str, IDependencyDefinition],
    ] = {}

    selected_nodes: List[Tuple[str, NodeDefinition]] = []

    for node in graph.solids_in_topological_order:
        node_handle = NodeHandle(node.name, parent=parent_handle)
        # skip if the node isn't selected
        if node.name not in resolved_op_selection_dict:
            continue

        # rebuild graph if any nodes inside the graph are selected
        if node.is_graph and resolved_op_selection_dict[node.name] is not LeafNodeSelection:
            definition = get_subselected_graph_definition(
                node.definition,
                resolved_op_selection_dict[node.name],
                parent_handle=node_handle,
            )
        # use definition if the node as a whole is selected. this includes selecting the entire graph
        else:
            definition = node.definition
        selected_nodes.append((node.name, definition))

        # build dependencies for the node. we do it for both cases because nested graphs can have
        # inputs and outputs too
        deps[_dep_key_of(node)] = {}
        for input_handle in node.input_handles():
            if graph.dependency_structure.has_direct_dep(input_handle):
                output_handle = graph.dependency_structure.get_direct_dep(input_handle)
                if output_handle.solid.name in resolved_op_selection_dict:
                    deps[_dep_key_of(node)][input_handle.input_def.name] = DependencyDefinition(
                        solid=output_handle.solid.name, output=output_handle.output_def.name
                    )
            elif graph.dependency_structure.has_dynamic_fan_in_dep(input_handle):
                output_handle = graph.dependency_structure.get_dynamic_fan_in_dep(input_handle)
                if output_handle.solid.name in resolved_op_selection_dict:
                    deps[_dep_key_of(node)][
                        input_handle.input_def.name
                    ] = DynamicCollectDependencyDefinition(
                        solid_name=output_handle.solid.name,
                        output_name=output_handle.output_def.name,
                    )
            elif graph.dependency_structure.has_fan_in_deps(input_handle):
                output_handles = graph.dependency_structure.get_fan_in_deps(input_handle)
                multi_dependencies = [
                    DependencyDefinition(
                        solid=output_handle.solid.name, output=output_handle.output_def.name
                    )
                    for output_handle in output_handles
                    if (
                        isinstance(output_handle, SolidOutputHandle)
                        and output_handle.solid.name in resolved_op_selection_dict
                    )
                ]
                deps[_dep_key_of(node)][input_handle.input_def.name] = MultiDependencyDefinition(
                    cast(
                        List[Union[DependencyDefinition, Type[MappedInputPlaceholder]]],
                        multi_dependencies,
                    )
                )
            # else input is unconnected

    # filter out unselected input/output mapping
    new_input_mappings = list(
        filter(
            lambda input_mapping: input_mapping.maps_to.solid_name
            in [name for name, _ in selected_nodes],
            graph._input_mappings,  # pylint: disable=protected-access
        )
    )
    new_output_mappings = list(
        filter(
            lambda output_mapping: output_mapping.maps_from.solid_name
            in [name for name, _ in selected_nodes],
            graph._output_mappings,  # pylint: disable=protected-access
        )
    )

    try:
        return SubselectedGraphDefinition(
            parent_graph_def=graph,
            dependencies=deps,
            node_defs=[definition for _, definition in selected_nodes],
            input_mappings=new_input_mappings,
            output_mappings=new_output_mappings,
        )
    except DagsterInvalidDefinitionError as exc:
        # This handles the case when you construct a subset such that an unsatisfied
        # input cannot be loaded from config. Instead of throwing a DagsterInvalidDefinitionError,
        # we re-raise a DagsterInvalidSubsetError.
        raise DagsterInvalidSubsetError(
            f"The attempted subset {str_format_set(resolved_op_selection_dict)} for graph "
            f"{graph.name} results in an invalid graph."
        ) from exc
Ejemplo n.º 23
0
 def descend(self, solid):
     return self._replace(handle=NodeHandle(solid.name, parent=self.handle))
Ejemplo n.º 24
0
def core_execute_in_process(
    node: NodeDefinition,
    run_config: Dict[str, Any],
    ephemeral_pipeline: PipelineDefinition,
    instance: Optional[DagsterInstance],
    output_capturing_enabled: bool,
):
    pipeline_def = ephemeral_pipeline
    mode_def = pipeline_def.get_mode_definition()
    pipeline = InMemoryPipeline(pipeline_def)

    execution_plan = create_execution_plan(pipeline,
                                           run_config=run_config,
                                           mode=mode_def.name)

    recorder: Dict[StepOutputHandle, Any] = {}

    with ephemeral_instance_if_missing(instance) as execute_instance:
        pipeline_run = execute_instance.create_run_for_pipeline(
            pipeline_def=pipeline_def,
            run_config=run_config,
            mode=mode_def.name,
        )

        _execute_run_iterable = ExecuteRunWithPlanIterable(
            execution_plan=execution_plan,
            iterator=pipeline_execution_iterator,
            execution_context_manager=PlanOrchestrationContextManager(
                context_event_generator=orchestration_context_event_generator,
                pipeline=pipeline,
                execution_plan=execution_plan,
                pipeline_run=pipeline_run,
                instance=execute_instance,
                run_config=run_config,
                executor_defs=None,
                output_capture=recorder if output_capturing_enabled else None,
            ),
        )
        event_list = list(_execute_run_iterable)

    top_level_node_handle = NodeHandle.from_string(node.name)

    if isinstance(node, GraphDefinition) and node == ephemeral_pipeline.graph:
        event_list_for_top_lvl_node = event_list
        handle = None
        return InProcessGraphResult(node, handle, event_list_for_top_lvl_node,
                                    recorder)
    else:
        event_list_for_top_lvl_node = [
            event for event in event_list if event.solid_handle
            and event.solid_handle.is_or_descends_from(top_level_node_handle)
        ]
        handle = NodeHandle(node.name, None)

        if isinstance(node, SolidDefinition):
            return InProcessSolidResult(node, handle,
                                        event_list_for_top_lvl_node, recorder)
        elif isinstance(node, GraphDefinition):
            return InProcessGraphResult(node, handle,
                                        event_list_for_top_lvl_node, recorder)

    check.failed(f"Unexpected node type {node}")