def setUp(self): """ R0 R1 R2 \ / | I0 I1 \ / FINAL """ result = Dataset("final_result") intermediate0 = Dataset("intermediate0") intermediate1 = Dataset("intermediate1") raw0 = Dataset("raw0") raw1 = Dataset("raw1") raw2 = Dataset("raw2") result.dependson(intermediate0, intermediate1, raw0) intermediate0.dependson(raw0, raw1) intermediate1.dependson(raw2) self.result = result self.intermediate0 = intermediate0 self.intermediate1 = intermediate1 self.raw0 = raw0 self.raw1 = raw1 self.raw2 = raw2 return
def test_graph_include(self): a = Dataset("a") b = Dataset("b") c = Dataset("c") d = Dataset("d") d.dependson(c) c.dependson(a, b) # Include only parents of `d` dot = depgraph.graphviz(d, include=lambda d1, d2: d in d1.children(0)) self.assertEqual(len(dot.split("\n")), 3) self.assertTrue('"c" -> "d"' in dot)
def test_graph_node_name(self): a = Dataset("a") b = Dataset("b") c = Dataset("c") d = Dataset("d") d.dependson(c) c.dependson(a, b) dot = depgraph.graphviz(d, node_id=lambda d: d.name.upper()) self.assertEqual(len(dot.split("\n")), 5) self.assertTrue('"C" -> "D"' in dot) self.assertTrue('"A" -> "C"' in dot) self.assertTrue('"B" -> "C"' in dot)
def test_graph(self): a = Dataset("a") b = Dataset("b") c = Dataset("c") d = Dataset("d") d.dependson(c) c.dependson(a, b) dot = depgraph.graphviz(d) self.assertEqual(len(dot.split("\n")), 5) self.assertTrue('"c" -> "d"' in dot) self.assertTrue('"a" -> "c"' in dot) self.assertTrue('"b" -> "c"' in dot)
def test_is_older3(self): """ compare a dependency group to a singular dependency """ dep1a = Dataset(fullpath("testdata/1a")) dep1b = Dataset(fullpath("testdata/1b")) dep1c = Dataset(fullpath("testdata/1c")) group1 = DatasetGroup(fullpath("testdata/1"), [dep1a, dep1b, dep1c]) for dep in group1: makefile(dep.name) time.sleep(0.05) dep2 = Dataset(fullpath("testdata/2")) makefile(dep2.name) self.assertTrue(group1.is_older_than(dep2))
def test_graph_edge_style(self): a = Dataset("violet") b = Dataset("green") c = Dataset("red") d = Dataset("blue") d.dependson(c) c.dependson(a, b) dot = depgraph.graphviz(d, style=lambda d1, d2: {"color": d2.name, "weight": 2}) self.assertEqual(len(dot.split("\n")), 5) for line in dot.split("\n"): if line.startswith('"red"'): self.assertTrue("color=blue" in line) self.assertTrue("weight=2" in line) elif line.startswith('"violent"'): self.assertTrue("color=red" in line) self.assertTrue("weight=2" in line) elif line.startswith('"green"'): self.assertTrue("color=red" in line) self.assertTrue("weight=2" in line)
def test_graph(self): a = Dataset("a") b = Dataset("b") c = Dataset("c") d = Dataset("d") d.dependson(c) c.dependson(a, b) dot = depgraph.graphviz(d) self.assertEqual(len(dot.split("\n")), 5) self.assertTrue("c -> d" in dot) self.assertTrue("a -> c" in dot) self.assertTrue("b -> c" in dot)
def test_is_older2(self): """ define two dependency groups, where files ages overlap, and so group 1 is not absolutely older than group 2 """ dep1a = Dataset(fullpath("testdata/1a")) dep1b = Dataset(fullpath("testdata/1b")) dep2c = Dataset(fullpath("testdata/2c")) for dep in (dep1a, dep1b, dep2c): makefile(dep.name) time.sleep(0.05) dep1c = Dataset(fullpath("testdata/1c")) dep2a = Dataset(fullpath("testdata/2a")) dep2b = Dataset(fullpath("testdata/2b")) for dep in (dep1c, dep2a, dep2b): makefile(dep.name) group1 = DatasetGroup(fullpath("testdata/1"), [dep1a, dep1b, dep1c]) group2 = DatasetGroup(fullpath("testdata/2"), [dep2a, dep2b, dep2c]) self.assertFalse(group1.is_older_than(group2))
def test_is_older1(self): """ define two dependency groups, where all files are older in one than in the other. """ dep1a = Dataset(fullpath("testdata/1a")) dep1b = Dataset(fullpath("testdata/1b")) dep1c = Dataset(fullpath("testdata/1c")) for dep in (dep1a, dep1b, dep1c): makefile(dep.name) time.sleep(0.05) dep2a = Dataset(fullpath("testdata/2a")) dep2b = Dataset(fullpath("testdata/2b")) dep2c = Dataset(fullpath("testdata/2c")) for dep in (dep2a, dep2b, dep2c): makefile(dep.name) group1 = DatasetGroup(fullpath("testdata/1"), [dep1a, dep1b, dep1c]) group2 = DatasetGroup(fullpath("testdata/2"), [dep2a, dep2b, dep2c]) self.assertTrue(group1.is_older_than(group2))
def test_cyclic2(self): a = Dataset("a") b = Dataset("b") c = Dataset("c") d = Dataset("d") e = Dataset("e") f = Dataset("f") a.dependson(f) f.dependson(d, e) e.dependson(b, c) d.dependson(b) c.dependson(a) b.dependson(a) self.assertFalse(depgraph.is_acyclic(f))
def test_children(self): d1a = Dataset("1a") d1b = Dataset("1b") d1c = Dataset("1c") d1d = Dataset("1d") d2a = Dataset("2a") d2b = Dataset("2b") d2c = Dataset("2c") d2a.dependson(d1a, d1b) d2b.dependson(d1c) d2c.dependson(d1d) dg = DatasetGroup("dg", [d1a, d1b, d1c]) self.assertEqual(set(dg.children()), set([d2a, d2b]))
def setUp(self): """ define a simple dependency graph that is complex enough to be interesting. R0 R1 R2 R3 [raw data] \ / | | DA0 DA1 / \ / \ / DB0 DB1 \ / | \ \ / | \ DC0 DC1 DC2 [products] """ raw0 = Dataset(fullpath("testdata/raw0"), prog="rawdata") raw1 = Dataset(fullpath("testdata/raw1"), prog="rawdata") raw2 = Dataset(fullpath("testdata/raw2"), prog="rawdata") raw3 = Dataset(fullpath("testdata/raw3"), prog="rawdata") da0 = Dataset(fullpath("testproject/da0"), prog="step1") da1 = Dataset(fullpath("testproject/da1"), prog="step2") db0 = Dataset(fullpath("testproject/db0"), prog="step3") db1 = Dataset(fullpath("testproject/db1"), prog="step4") dc0 = Dataset(fullpath("testproject/dc0"), prog="step5") dc1 = Dataset(fullpath("testproject/dc1"), prog="step6") dc2 = Dataset(fullpath("testproject/dc2"), prog="step7") da0.dependson(raw0, raw1) da1.dependson(raw2) db0.dependson(da0, da1) db1.dependson(da1, raw3) dc0.dependson(db0, db1) dc1.dependson(db1) dc2.dependson(db1) self.raw0 = raw0 self.raw1 = raw1 self.raw2 = raw2 self.raw3 = raw3 self.da0 = da0 self.da1 = da1 self.db0 = db0 self.db1 = db1 self.dc0 = dc0 self.dc1 = dc1 self.dc2 = dc2 # initialize "raw" data rawdir = fullpath("testdata") ensureisdir(rawdir) for dep in (raw0, raw1, raw2, raw3): makefile(dep.name) time.sleep(0.05) cleandir(fullpath("testproject")) return
from depgraph import Dataset, buildmanager # Define Datasets # Use an optional keyword `tool` to provide a key instructing our build tool # how to assemble this product. Here we've used strings, but another pattern # would be to provide a callback function R0 = Dataset("data/raw0", tool="read_csv") R1 = Dataset("data/raw1", tool="read_csv") R2 = Dataset("data/raw2", tool="database_query") R3 = Dataset("data/raw3", tool="read_hdf") DA0 = Dataset("step1/da0", tool="merge_fish_counts") DA1 = Dataset("step1/da1", tool="process_filter") DB0 = Dataset("step2/db0", tool="join_counts") DB1 = Dataset("step2/db1", tool="join_by_date") DC0 = Dataset("results/dc0", tool="merge_model_obs") DC1 = Dataset("results/dc1", tool="compute_uncertainty") DC2 = Dataset("results/dc2", tool="make_plots") # Declare dependency relationships so that depgraph and determine the order of # the build DA0.dependson(R0, R1) DA1.dependson(R2) DB0.dependson(DA0, DA1) DB1.dependson(DA1, R3) DC0.dependson(DB0, DB1) DC1.dependson(DB1) DC2.dependson(DB1)