def test_pipeline_ascii_multistage(tmp_dir, dvc, run_copy): tmp_dir.gen("foo", "foo") run_copy("foo", "bar", name="copy-foo-bar") run_copy("bar", "foobar", single_stage=True) command = CmdPipelineShow([]) nodes, edges, _ = command._build_graph("foobar.dvc") assert set(nodes) == {"copy-foo-bar", "foobar.dvc"} assert set(edges) == { ("foobar.dvc", "copy-foo-bar"), } nodes, *_ = command._build_graph("copy-foo-bar") assert set(nodes) == {"copy-foo-bar"}
def test_pipeline_ascii_multistage(tmp_dir, dvc, run_copy): tmp_dir.gen("foo", "foo") run_copy("foo", "bar", name="copy-foo-bar") run_copy("bar", "foobar") command = CmdPipelineShow([]) nodes, edges, is_tree = command._build_graph("foobar.dvc") assert set(nodes) == {"pipelines.yaml:copy-foo-bar", "foobar.dvc"} assert set(edges) == { ("foobar.dvc", "pipelines.yaml:copy-foo-bar"), } nodes, edges, is_tree = command._build_graph("pipelines.yaml:copy-foo-bar") assert set(nodes) == {"pipelines.yaml:copy-foo-bar"}
def test_disconnected_stage(tmp_dir, dvc): tmp_dir.dvc_gen({"base": "base"}) dvc.add("base") dvc.run( deps=["base"], outs=["derived1"], cmd="echo derived1 > derived1", single_stage=True, ) dvc.run( deps=["base"], outs=["derived2"], cmd="echo derived2 > derived2", single_stage=True, ) final_stage = dvc.run( deps=["derived1"], outs=["final"], cmd="echo final > final", single_stage=True, ) command = CmdPipelineShow([]) # Need to test __build_graph directly nodes, edges, is_tree = command._build_graph(final_stage.path, commands=False, outs=True) assert set(nodes) == {"final", "derived1", "base"} assert edges == [("final", "derived1"), ("derived1", "base")] assert is_tree is True
def test_split_pipeline(tmp_dir, scm, dvc): tmp_dir.scm_gen("git_dep1", "git_dep1") tmp_dir.scm_gen("git_dep2", "git_dep2") tmp_dir.dvc_gen("data", "source file content") dvc.run( deps=["git_dep1", "data"], outs=["data_train", "data_valid"], cmd="echo train >> data_train && echo valid >> data_valid", single_stage=True, ) stage = dvc.run( deps=["git_dep2", "data_train", "data_valid"], outs=["result"], cmd="echo result >> result", single_stage=True, ) command = CmdPipelineShow([]) nodes, edges, _ = command._build_graph(stage.path, commands=False, outs=True) assert set(nodes) == {"data", "data_train", "data_valid", "result"} assert set(edges) == { ("result", "data_train"), ("result", "data_valid"), ("data_train", "data"), ("data_valid", "data"), }
def test_pipeline_multi_outputs_stages(dvc): dvc.run( outs=["alice", "bob"], cmd="echo alice>alice && echo bob>bob", single_stage=True, ) dvc.run( deps=["alice"], outs=["mary", "mike"], cmd="echo mary>mary && echo mike>mike", single_stage=True, ) stage = dvc.run( deps=["mary"], outs=["carol"], cmd="echo carol>carol", single_stage=True, ) command = CmdPipelineShow([]) nodes, edges, _ = command._build_graph(stage.path, outs=True) assert set(nodes) == {"alice", "mary", "carol"} assert set(edges) == {("carol", "mary"), ("mary", "alice")}