Beispiel #1
0
    def _filter_pipeline(
        self,
        pipeline: Pipeline,
        tags: Iterable[str] = None,
        from_nodes: Iterable[str] = None,
        to_nodes: Iterable[str] = None,
        node_names: Iterable[str] = None,
        from_inputs: Iterable[str] = None,
    ) -> Pipeline:
        """Filter the pipeline as the intersection of all conditions."""
        new_pipeline = pipeline
        # We need to intersect with the pipeline because the order
        # of operations matters, so we don't want to do it incrementally.
        # As an example, with a pipeline of nodes 1,2,3, think of
        # "from 1", and "only 1 and 3" - the order you do them in results in
        # either 1 & 3, or just 1.
        if tags:
            new_pipeline &= pipeline.only_nodes_with_tags(*tags)
            if not new_pipeline.nodes:
                raise KedroContextError(
                    "Pipeline contains no nodes with tags: {}".format(
                        str(tags)))
        if from_nodes:
            new_pipeline &= pipeline.from_nodes(*from_nodes)
        if to_nodes:
            new_pipeline &= pipeline.to_nodes(*to_nodes)
        if node_names:
            new_pipeline &= pipeline.only_nodes(*node_names)
        if from_inputs:
            new_pipeline &= pipeline.from_inputs(*from_inputs)

        if not new_pipeline.nodes:
            raise KedroContextError("Pipeline contains no nodes")
        return new_pipeline
Beispiel #2
0
    def _suggest_resume_scenario(self, pipeline: Pipeline,
                                 done_nodes: Iterable[Node]) -> None:
        remaining_nodes = set(pipeline.nodes) - set(done_nodes)
        command = "kedro run"

        if done_nodes:
            node_names = [n.name for n in remaining_nodes]
            resume_pipeline = pipeline.only_nodes(*node_names)

            command += " --from-inputs {}".format(",".join(
                resume_pipeline.inputs()))

        self._logger.warning(
            "There are %d nodes that have not run.\n"
            "You can resume the pipeline run with the following command:\n%s",
            len(remaining_nodes),
            command,
        )
Beispiel #3
0
    def _suggest_resume_scenario(self, pipeline: Pipeline,
                                 done_nodes: Iterable[Node]) -> None:
        remaining_nodes = set(pipeline.nodes) - set(done_nodes)

        postfix = ""
        if done_nodes:
            node_names = (n.name for n in remaining_nodes)
            resume_p = pipeline.only_nodes(*node_names)

            start_p = resume_p.only_nodes_with_inputs(*resume_p.inputs())
            start_node_names = (n.name for n in start_p.nodes)
            postfix += '  --from-nodes "{}"'.format(",".join(start_node_names))

        self._logger.warning(
            "There are %d nodes that have not run.\n"
            "You can resume the pipeline run by adding the following "
            "argument to your previous command:\n%s",
            len(remaining_nodes),
            postfix,
        )
Beispiel #4
0
 def test_only_nodes_missing(self, pipeline_list_with_lists,
                             target_node_names):
     pattern = r"Pipeline does not contain nodes"
     full = Pipeline(pipeline_list_with_lists["nodes"])
     with pytest.raises(ValueError, match=pattern):
         full.only_nodes(*target_node_names)
Beispiel #5
0
 def test_only_nodes(self, target_node_names, pipeline_list_with_lists):
     full = Pipeline(pipeline_list_with_lists["nodes"])
     partial = full.only_nodes(*target_node_names)
     target_list = list(target_node_names)
     names = map(lambda node_: node_.name, partial.nodes)
     assert sorted(names) == sorted(target_list)