def test_buildall_partial(self): # R0 R1 R2 R3 [raw data] # \ / | | # [DA0] DA1 / # \ / \ / # DB0 DB1 # \ / | \ # \ / | \ # DC0 DC1 DC2 [products] makefile(self.da1.name) makefile(self.db0.name) makefile(self.db1.name) makefile(self.dc0.name) makefile(self.dc1.name) makefile(self.dc2.name) stage_count = 0 for i, group in enumerate(depgraph.buildall(self.dc0)): stage_count += 1 datasets = set([d for d,_ in group]) if i == 0: self.assertEqual(datasets, set([self.da0])) elif i == 1: self.assertEqual(datasets, set([self.db0])) elif i == 2: self.assertEqual(datasets, set([self.dc0])) else: raise ValueError self.assertEqual(stage_count, 3)
def test_buildall(self): for i, group in enumerate(depgraph.buildall(self.dc0)): datasets = [d for d,_ in group] if i == 0: self.assertTrue(self.da0 in datasets) self.assertTrue(self.da1 in datasets) elif i == 1: self.assertTrue(self.db0 in datasets) self.assertTrue(self.db1 in datasets) elif i == 2: self.assertTrue(self.dc0 in datasets) else: raise ValueError
def test_parallel_builder(self): import concurrent.futures def build(dep, reason): makefile(dep.name) return reason futures = [] with concurrent.futures.ThreadPoolExecutor(max_workers=1) as pool: for stage in depgraph.buildall(self.dc0): for dep, reason in stage: futures.append(pool.submit(build, dep, reason)) concurrent.futures.wait(futures) self.assertTrue(os.path.isfile(self.da0.name)) self.assertTrue(os.path.isfile(self.da1.name)) self.assertTrue(os.path.isfile(self.db0.name)) self.assertTrue(os.path.isfile(self.db1.name)) return
for root in roots: print root.name batchbuilder(DC0) print "" # Option 2: # Implement the build loop manually from depgraph import buildall def build(dependency, reason): # This may have the same logic as `batchbuilder` above, but we # will call it directly rather than wrapping it in @buildmanager # [....] return 0 for stage in buildall(DC1): # A build stage is a list of dependencies whose own dependencies are met and # that are independent, i.e. they can be built in parallel for dep, reason in stage: # Each target is a dataset with a 'name' attribute and whatever # additional keyword arguments where defined with it. # The 'reason' is a depgraph.Reason object that codifies why a # particular target is necessary (e.g. it's out of date, it's missing # and required by a subsequent target, etc.) print("Building {0} with {1} because {2}".format(dep.name, dep.tool, reason)) # Call a function or start a subprocess that will result in the