def t_num_expr_impl(self, cls: Type[NumExprABC]) -> Tuple[Any, ...]: s = self.scheduler() random1 = RandomTable(10, rows=100000, scheduler=s) random2 = RandomTable(10, rows=100000, scheduler=s) module = cls( columns={ "first": ["_1", "_2", "_3"], "second": ["_1", "_2", "_3"] }, scheduler=s, ) module.input.first = random1.output.result module.input.second = random2.output.result pr = Print(proc=self.terse, scheduler=s) pr.input[0] = module.output.result aio.run(s.start()) first = random1.table.to_array() first_2 = first[:, 1] first_3 = first[:, 2] second = random2.table.to_array() second_2 = second[:, 1] second_3 = second[:, 2] ne_1 = ne.evaluate("first_2+2*second_3") ne_2 = ne.evaluate("first_3-5*second_2") res = module.table.to_array() self.assertTrue(np.allclose(res[:, 0], ne_1, equal_nan=True)) self.assertTrue(np.allclose(res[:, 1], ne_2, equal_nan=True)) return first_2, first_3, second_2, second_3
def t_mix_ufunc_impl( self, cls: Type[MixUfuncABC], ufunc1: np.ufunc = np.log, ufunc2: np.ufunc = np.add, ) -> None: s = self.scheduler() random1 = RandomTable(10, rows=100000, scheduler=s) random2 = RandomTable(10, rows=100000, scheduler=s) module = cls( columns={ "first": ["_1", "_2", "_3"], "second": ["_1", "_2", "_3"] }, scheduler=s, ) module.input.first = random1.output.result module.input.second = random2.output.result pr = Print(proc=self.terse, scheduler=s) pr.input[0] = module.output.result aio.run(s.start()) first = random1.table.to_array() first_2 = first[:, 1] _ = first[:, 2] second = random2.table.to_array() _ = second[:, 1] second_3 = second[:, 2] ne_1 = ufunc2(first_2, second_3).astype("float64") ne_2 = ufunc1(second_3).astype("float64") res = module.table.to_array() self.assertTrue(np.allclose(res[:, 0], ne_1, equal_nan=True)) self.assertTrue(np.allclose(res[:, 1], ne_2, equal_nan=True))
def test_binary2(self) -> None: s = self.scheduler() cols = 10 _ = RandomTable(cols, rows=100_000, scheduler=s) _ = RandomTable(cols, rows=100_000, scheduler=s) with self.assertRaises(AssertionError): _ = Binary(np.add, columns=["_3", "_5", "_7"], scheduler=s)
def _t_impl(self, cls: Type[TableModule], ufunc: np.ufunc, mod_name: str) -> None: print("Testing", mod_name) s = self.scheduler() random1 = RandomTable( 3, rows=100_000, scheduler=s, random=lambda x: np.random.randint(10, size=x), # type: ignore dtype="int64", ) random2 = RandomTable( 3, rows=100_000, scheduler=s, random=lambda x: np.random.randint(10, size=x), # type: ignore dtype="int64", ) module = cls(scheduler=s) module.input.first = random1.output.result module.input.second = random2.output.result pr = Print(proc=self.terse, scheduler=s) pr.input[0] = module.output.result aio.run(s.start()) res1 = ufunc(random1.table.to_array(), random2.table.to_array()) res2 = module.table.to_array() self.assertTrue(module.name.startswith(mod_name)) self.assertTrue(np.allclose(res1, res2, equal_nan=True))
def test_hadamard(self) -> None: s = self.scheduler() random1 = RandomTable(3, rows=100000, scheduler=s) random2 = RandomTable(3, rows=100000, scheduler=s) module = Hadamard(scheduler=s) module.input.x1 = random1.output.result module.input.x2 = random2.output.result pr = Print(proc=self.terse, scheduler=s) pr.input[0] = module.output.result aio.run(s.start()) res1 = np.multiply(random1.table.to_array(), random2.table.to_array()) res2 = module.table.to_array() self.assertTrue(np.allclose(res1, res2, equal_nan=True))
def test_binary(self) -> None: s = self.scheduler() random1 = RandomTable(3, rows=100_000, scheduler=s) random2 = RandomTable(3, rows=100_000, scheduler=s) module = Binary(np.add, scheduler=s) module.input.first = random1.output.result module.input.second = random2.output.result pr = Print(proc=self.terse, scheduler=s) pr.input[0] = module.output.result aio.run(s.start()) res1 = np.add(random1.table.to_array(), random2.table.to_array()) res2 = module.table.to_array() self.assertTrue(module.name.startswith("binary_")) self.assertTrue(np.allclose(res1, res2, equal_nan=True))
def test_dataflow_1_dynamic(self) -> None: scheduler = self.scheduler(clean=True) table = RandomTable(name="table", columns=["a"], throttle=1000, scheduler=scheduler) m = Min(name="min", scheduler=scheduler) prt = Print(proc=self.terse, name="print_min", scheduler=scheduler) m.input.table = table.output.result prt.input.df = m.output.result started = False def proc(x: Any) -> None: nonlocal started print("proc max called") started = True async def _add_max(scheduler: Scheduler, run_number: int) -> None: with scheduler: print("adding new modules") m = Max(name="max", scheduler=scheduler) prt = Print(name="print_max", proc=proc, scheduler=scheduler) m.input.table = table.output.result prt.input.df = m.output.result scheduler.on_loop(_add_max, 5) # run the function after 5 loops scheduler.on_loop(self._stop, 10) # from nose.tools import set_trace; set_trace() aio.run(scheduler.start()) self.assertTrue(started)
def test_paste(self) -> None: s = self.scheduler() random = RandomTable(10, rows=10000, scheduler=s) min_1 = Min(name="min_1" + str(hash(random)), scheduler=s, columns=["_1"]) min_1.input[0] = random.output.result d2t_1 = Dict2Table(scheduler=s) d2t_1.input.dict_ = min_1.output.result min_2 = Min(name="min_2" + str(hash(random)), scheduler=s, columns=["_2"]) min_2.input[0] = random.output.result d2t_2 = Dict2Table(scheduler=s) d2t_2.input.dict_ = min_2.output.result bj = Paste(scheduler=s) bj.input.first = d2t_1.output.result bj.input.second = d2t_2.output.result pr = Print(proc=self.terse, scheduler=s) pr.input[0] = bj.output.result aio.run(s.start()) res1 = random.table.min() res2 = notNone(bj.table.last()).to_dict() self.assertAlmostEqual(res1["_1"], res2["_1"]) self.assertAlmostEqual(res1["_2"], res2["_2"])
def test_bisect2(self) -> None: s = self.scheduler() random = RandomTable(2, rows=100_000, scheduler=s) stirrer = Stirrer( update_column="_1", delete_rows=100, # update_rows=5, # fixed_step_size=100, scheduler=s, ) stirrer.input[0] = random.output.result t = Table(name=None, dshape="{value: string}", data={"value": [0.5]}) min_value = Constant(table=t, scheduler=s) hist_index = HistogramIndex(column="_1", scheduler=s) hist_index.create_dependent_modules(stirrer, "result") bisect_ = Bisect(column="_1", op=">", hist_index=hist_index, scheduler=s) bisect_.input[0] = hist_index.output.result # bisect_.input[0] = random.output.result bisect_.input.limit = min_value.output.result pr = Print(proc=self.terse, scheduler=s) pr.input[0] = bisect_.output.result aio.run(s.start()) idx = stirrer.table.eval("_1>0.5", result_object="index") self.assertEqual(bisect_.table.index, bitmap(idx))
def test_hist_index_min_max(self): "Test min_out and max_out on HistogramIndex" s = self.scheduler() random = RandomTable(2, rows=100000, scheduler=s) t_min = Table(name=None, dshape='{_1: float64}', data={'_1': [0.3]}) min_value = Constant(table=t_min, scheduler=s) t_max = Table(name=None, dshape='{_1: float64}', data={'_1': [0.8]}) max_value = Constant(table=t_max, scheduler=s) range_qry = RangeQuery(column='_1', scheduler=s) range_qry.create_dependent_modules(random, 'table', min_value=min_value, max_value=max_value) prt = Print(proc=self.terse, scheduler=s) prt.input.df = range_qry.output.table hist_index = range_qry.hist_index min_ = Min(name='min_' + str(hash(hist_index)), scheduler=s) min_.input.table = hist_index.output.min_out prt2 = Print(proc=self.terse, scheduler=s) prt2.input.df = min_.output.table max_ = Max(name='max_' + str(hash(hist_index)), scheduler=s) max_.input.table = hist_index.output.max_out pr3 = Print(proc=self.terse, scheduler=s) pr3.input.df = max_.output.table s.start() s.join() res1 = random.table().min()['_1'] res2 = min_.table().last().to_dict()['_1'] self.assertAlmostEqual(res1, res2) res1 = random.table().max()['_1'] res2 = max_.table().last().to_dict()['_1'] self.assertAlmostEqual(res1, res2)
def t_mix_ufunc_table_dict_impl(self, cls: Type[MixUfuncABC]) -> None: s = self.scheduler() random1 = RandomDict(10, scheduler=s) random2 = RandomTable(10, rows=100000, scheduler=s) module = cls( columns={ "first": ["_1", "_2", "_3"], "second": ["_1", "_2", "_3"] }, scheduler=s, ) module.input.first = random1.output.result module.input.second = random2.output.result pr = Print(proc=self.terse, scheduler=s) pr.input[0] = module.output.result aio.run(s.start()) first = list(random1.psdict.values()) first_2 = first[1] _ = first[2] second = random2.table.to_array() _ = second[:, 1] second_3 = second[:, 2] ne_1 = np.add(first_2, second_3) ne_2 = np.log(second_3) res = module.table.to_array() self.assertTrue(np.allclose(res[:, 0], ne_1, equal_nan=True)) self.assertTrue(np.allclose(res[:, 1], ne_2, equal_nan=True))
def test_idxmax2(self) -> None: s = self.scheduler() random = RandomTable(10, rows=10000, throttle=1000, scheduler=s) stirrer = Stirrer(update_column="_1", delete_rows=5, fixed_step_size=100, scheduler=s) stirrer.input[0] = random.output.result idxmax = IdxMax(scheduler=s) idxmax.input[0] = stirrer.output.result max_ = Max(scheduler=s) max_.input[0] = stirrer.output.result pr = Print(proc=self.terse, scheduler=s) pr.input[0] = idxmax.output.result pr2 = Print(proc=self.terse, scheduler=s) pr2.input[0] = max_.output.result aio.run(s.start()) # import pdb;pdb.set_trace() max1 = max_.psdict # print('max1', max1) max = idxmax.max() assert max is not None max2 = notNone(max.last()).to_dict() # print('max2', max2) self.compare(max1, max2)
def test_dataflow_2_add_remove(self) -> None: scheduler = self.scheduler(clean=True) table = RandomTable(name="table", columns=["a"], throttle=1000, scheduler=scheduler) m = Min(name="min", scheduler=scheduler) prt = Print(proc=self.terse, name="print_min", scheduler=scheduler) m.input.table = table.output.result prt.input.df = m.output.result started = False def proc(x: Any) -> None: nonlocal started print("proc max called") started = True async def _add_max_remove_min(scheduler: Scheduler, run_number: int) -> None: with scheduler as dataflow: print("adding new modules") m = Max(name="max", scheduler=scheduler) prt = Print(name="print_max", proc=proc, scheduler=scheduler) m.input.table = table.output.result prt.input.df = m.output.result print("removing min module") dataflow.delete_modules("min", "print_min") # t = _add_max_remove_min(csv, scheduler, proc=proc) scheduler.on_loop(_add_max_remove_min, 5) scheduler.on_loop(self._stop, 10) aio.run(scheduler.start()) self.assertTrue(started)
def test_intersection(self): s = self.scheduler() random = RandomTable(2, rows=100000, scheduler=s) t_min = Table(name=None, dshape='{_1: float64}', data={'_1': [0.3]}) min_value = Constant(table=t_min, scheduler=s) t_max = Table(name=None, dshape='{_1: float64}', data={'_1': [0.8]}) max_value = Constant(table=t_max, scheduler=s) hist_index = HistogramIndex(column='_1', scheduler=s) hist_index.create_dependent_modules(random, 'table') bisect_min = Bisect(column='_1', op='>', hist_index=hist_index, scheduler=s) bisect_min.input.table = hist_index.output.table #bisect_.input.table = random.output.table bisect_min.input.limit = min_value.output.table bisect_max = Bisect(column='_1', op='<', hist_index=hist_index, scheduler=s) bisect_max.input.table = hist_index.output.table #bisect_.input.table = random.output.table bisect_max.input.limit = max_value.output.table inter = Intersection(scheduler=s) inter.input.table = bisect_min.output.table inter.input.table = bisect_max.output.table pr = Print(proc=self.terse, scheduler=s) pr.input.df = inter.output.table s.start() s.join() idx = hist_index.input_module.output['table']\ .data().eval('(_1>0.3)&(_1<0.8)', result_object='index') self.assertEqual(inter.table().selection, bitmap(idx))
def t_histogram2d_impl(self, **kw: Any) -> None: s = self.scheduler() random = RandomTable(3, rows=100000, scheduler=s) stirrer = Stirrer(update_column="_2", fixed_step_size=1000, scheduler=s, **kw) stirrer.input[0] = random.output.result min_ = Min(scheduler=s) min_.input[0] = stirrer.output.result max_ = Max(scheduler=s) max_.input[0] = stirrer.output.result histogram2d = Histogram2D( 0, 1, xbins=100, ybins=100, scheduler=s ) # columns are called 1..30 histogram2d.input[0] = stirrer.output.result histogram2d.input.min = min_.output.result histogram2d.input.max = max_.output.result heatmap = Heatmap(filename="histo_%03d.png", scheduler=s) heatmap.input.array = histogram2d.output.result pr = Every(proc=self.terse, scheduler=s) pr.input[0] = heatmap.output.result aio.run(s.start()) last = notNone(histogram2d.table.last()).to_dict() h1 = last["array"] bounds = [[last["ymin"], last["ymax"]], [last["xmin"], last["xmax"]]] t = stirrer.table.loc[:, ["_1", "_2"]] assert t is not None v = t.to_array() bins = [histogram2d.params.ybins, histogram2d.params.xbins] h2 = fh.histogram2d(v[:, 1], v[:, 0], bins=bins, range=bounds) h2 = np.flip(h2, axis=0) # type: ignore self.assertEqual(np.sum(h1), np.sum(h2)) self.assertListEqual(h1.reshape(-1).tolist(), h2.reshape(-1).tolist())
def test_ldexp(self) -> None: cls, ufunc, mod_name = ColsLdexp, np.ldexp, "cols_ldexp_" print("Testing", mod_name) s = self.scheduler() cols = 10 random = RandomTable( cols, rows=10_000, scheduler=s, random=lambda x: np.random.randint(10, size=x), # type: ignore dtype="int64", ) module = cls( first=["_3", "_5", "_7"], second=["_4", "_6", "_8"], cols_out=["x", "y", "z"], scheduler=s, ) module.input[0] = random.output.result pr = Print(proc=self.terse, scheduler=s) pr.input[0] = module.output.result aio.run(s.start()) self.assertListEqual(module.table.columns, ["x", "y", "z"]) arr = random.table.to_array() res1 = ufunc(arr[:, [2, 4, 6]], arr[:, [3, 5, 7]]) res2 = module.table.to_array() self.assertTrue(module.name.startswith(mod_name)) self.assertTrue(np.allclose(res1, res2, equal_nan=True))
def test_binary3(self) -> None: s = self.scheduler() cols = 10 random1 = RandomTable(cols, rows=100_000, scheduler=s) random2 = RandomDict(cols, scheduler=s) module = Binary( np.add, columns={ "first": ["_3", "_5", "_7"], "second": ["_4", "_6", "_8"] }, scheduler=s, ) module.input.first = random1.output.result module.input.second = random2.output.result pr = Print(proc=self.terse, scheduler=s) pr.input[0] = module.output.result aio.run(s.start()) res1 = np.add( random1.table.to_array()[:, [2, 4, 6]], np.array(list(random2.psdict.values()))[[3, 5, 7]], ) res2 = module.table.to_array() self.assertTrue(module.name.startswith("binary_")) self.assertTrue(np.allclose(res1, res2, equal_nan=True))
def test_hub_if_else(self): s = Scheduler() random = RandomTable(2, rows=100000, scheduler=s) stirrer = Stirrer( update_column="_1", delete_rows=5, update_rows=5, fixed_step_size=100, scheduler=s, ) stirrer.input[0] = random.output.result switch = Switch(condition=lambda x: False, scheduler=s) switch.input[0] = stirrer.output.result max_ = Max(name="max_" + str(hash(random)), scheduler=s) max_.input[0] = switch.output.result min_ = Min(name="min_" + str(hash(random)), scheduler=s) min_.input[0] = switch.output.result_else hub = Hub(scheduler=s) hub.input.table = min_.output.result hub.input.table = max_.output.result pr = Print(proc=self.terse, scheduler=s) pr.input[0] = hub.output.result aio.run(s.start()) res1 = stirrer.result.min() res2 = hub.result self.compare(res1, res2)
def test_hist_index_min_max(self) -> None: "Test min_out and max_out on HistogramIndex" s = self.scheduler() with s: random = RandomTable(2, rows=100000, scheduler=s) t_min = PsDict({"_1": 0.3}) min_value = Constant(table=t_min, scheduler=s) t_max = PsDict({"_1": 0.8}) max_value = Constant(table=t_max, scheduler=s) range_qry = RangeQuery(column="_1", scheduler=s) range_qry.create_dependent_modules( random, "result", min_value=min_value, max_value=max_value ) prt = Print(proc=self.terse, scheduler=s) prt.input[0] = range_qry.output.result hist_index = range_qry.hist_index assert hist_index is not None min_ = Min(name="min_" + str(hash(hist_index)), scheduler=s) min_.input[0] = hist_index.output.min_out prt2 = Print(proc=self.terse, scheduler=s) prt2.input[0] = min_.output.result max_ = Max(name="max_" + str(hash(hist_index)), scheduler=s) max_.input[0] = hist_index.output.max_out pr3 = Print(proc=self.terse, scheduler=s) pr3.input[0] = max_.output.result aio.run(s.start()) res1 = cast(float, random.table.min()["_1"]) res2 = cast(float, min_.psdict["_1"]) self.assertAlmostEqual(res1, res2) res1 = cast(float, random.table.max()["_1"]) res2 = cast(float, max_.psdict["_1"]) self.assertAlmostEqual(res1, res2)
def test_intersection(self) -> None: s = self.scheduler() random = RandomTable(2, rows=100000, scheduler=s) t_min = Table(name=None, dshape="{_1: float64}", data={"_1": [0.3]}) min_value = Constant(table=t_min, scheduler=s) t_max = Table(name=None, dshape="{_1: float64}", data={"_1": [0.8]}) max_value = Constant(table=t_max, scheduler=s) hist_index = HistogramIndex(column="_1", scheduler=s) hist_index.create_dependent_modules(random, "result") bisect_min = Bisect(column="_1", op=">", hist_index=hist_index, scheduler=s) bisect_min.input[0] = hist_index.output.result # bisect_.input[0] = random.output.result bisect_min.input.limit = min_value.output.result bisect_max = Bisect(column="_1", op="<", hist_index=hist_index, scheduler=s) bisect_max.input[0] = hist_index.output.result # bisect_.input[0] = random.output.result bisect_max.input.limit = max_value.output.result inter = Intersection(scheduler=s) inter.input[0] = bisect_min.output.result inter.input[0] = bisect_max.output.result pr = Print(proc=self.terse, scheduler=s) pr.input[0] = inter.output.result aio.run(s.start()) assert hist_index.input_module is not None idx = (hist_index.input_module.output["result"].data().eval( "(_1>0.3)&(_1<0.8)", result_object="index")) self.assertEqual(inter.table.index, bitmap(idx))
def _impl_tst_percentiles(self, accuracy): """ """ s = self.scheduler() random = RandomTable(2, rows=10000, scheduler=s) hist_index = HistogramIndex(column='_1', scheduler=s) hist_index.input.table = random.output.table t_percentiles = Table( name=None, dshape='{_25: float64, _50: float64, _75: float64}', data={ '_25': [25.0], '_50': [50.0], '_75': [75.0] }) which_percentiles = Constant(table=t_percentiles, scheduler=s) percentiles = Percentiles(hist_index, accuracy=accuracy, scheduler=s) percentiles.input.table = random.output.table percentiles.input.percentiles = which_percentiles.output.table prt = Print(proc=self.terse, scheduler=s) prt.input.df = percentiles.output.table s.start() s.join() pdict = percentiles.table().last().to_dict() v = random.table()['_1'].values p25 = np.percentile(v, 25.0) p50 = np.percentile(v, 50.0) p75 = np.percentile(v, 75.0) print("Table=> accuracy: ", accuracy, " 25:", p25, pdict['_25'], " 50:", p50, pdict['_50'], " 75:", p75, pdict['_75']) self.assertAlmostEqual(p25, pdict['_25'], delta=0.01) self.assertAlmostEqual(p50, pdict['_50'], delta=0.01) self.assertAlmostEqual(p75, pdict['_75'], delta=0.01)
def test_scatterplot2(self): s = self.scheduler() random = RandomTable(2, rows=2000000, scheduler=s) sp = MCScatterPlot(scheduler=s, classes=[('Scatterplot', '_1', '_2')], approximate=True) sp.create_dependent_modules(random, 'table', with_sampling=False) cnt = Every(proc=self.terse, constant_time=True, scheduler=s) cnt.input.df = random.output.table prt = Print(proc=self.terse, scheduler=s) prt.input.df = sp.output.table decorate(s, VariablePatch1("variable_1")) decorate(s, VariablePatch2("variable_2")) decorate(s, ScatterPlotPatch("mc_scatter_plot_1")) sp.scheduler().start(idle_proc=idle_proc) s.join() js = sp.to_json() x, y, _ = zip(*js['sample']['data']) min_x = min(x) max_x = max(x) min_y = min(y) max_y = max(y) self.assertGreaterEqual(min_x, LOWER_X) self.assertGreaterEqual(min_y, LOWER_Y) self.assertLessEqual(max_x, UPPER_X) self.assertLessEqual(max_y, UPPER_Y)
def setUpStep(self, step): self.set_step_info("{} rows".format(step * L)) s = Scheduler() random = RandomTable(10, rows=step * L, scheduler=s) s.start() #return random self.random_table = pd.DataFrame( random.output.table.output_module.table().to_dict())
def p10s_random_min_max(n): StorageEngine.default = "hdf5" s = Scheduler() random = RandomTable(10, rows=n * L, scheduler=s) min_ = Min(name='min_' + str(hash(random)), scheduler=s) min_.input.table = random.output.table max_ = Max(name='max_' + str(hash(random)), scheduler=s) max_.input.table = random.output.table s.start()
def test_datashape(self): np.random.seed(42) s = self.scheduler() random = RandomTable(3, rows=10_000, scheduler=s) ds = DataShape(scheduler=s) ds.input.table = random.output.result pr = Print(proc=self.terse, scheduler=s) pr.input[0] = ds.output.result aio.run(s.start()) print(s.modules())
def p10s_random_min_max(self): n = self.current_step StorageEngine.default = "hdf5" s = Scheduler() random = RandomTable(10, rows=n * L, scheduler=s) min_ = Min(mid='min_' + str(hash(random)), scheduler=s) min_.input.table = random.output.table max_ = Max(id='max_' + str(hash(random)), scheduler=s) max_.input.table = random.output.table s.start()
def test_max(self) -> None: s = self.scheduler() random = RandomTable(10, rows=10_000, scheduler=s) max_ = Max(name="max_" + str(hash(random)), scheduler=s) max_.input[0] = random.output.result pr = Print(proc=self.terse, scheduler=s) pr.input[0] = max_.output.result aio.run(s.start()) res1 = random.table.max() res2 = max_.cxx_module.get_output_table().last().to_dict(ordered=True) self.compare(res1, res2)
def test_filter(self) -> None: s = Scheduler() random = RandomTable(2, rows=100000, scheduler=s) filter_ = FilterMod(expr="_1 > 0.5", scheduler=s) filter_.input[0] = random.output.result pr = Print(proc=self.terse, scheduler=s) pr.input[0] = filter_.output.result aio.run(s.start()) idx = (filter_.get_input_slot("table").data().eval( "_1>0.5", result_object="index")) self.assertEqual(filter_.table.index, bitmap(idx))
def test_online_cov(self): s = self.scheduler() random = RandomTable(2, rows=100_000, scheduler=s) cov = Corr(mode="CovarianceOnly", scheduler=s) cov.input[0] = random.output.result pr = Print(proc=self.terse, scheduler=s) pr.input[0] = cov.output.result aio.run(s.start()) res1 = np.cov(random.result.to_array().T) res2 = cov.result_as_df(["_1", "_2"]).values self.assertTrue(np.allclose(res1, res2))
def test_dataflow_6_dynamic(self) -> None: s = self.scheduler() table = RandomTable(name="table", columns=["a"], throttle=1000, scheduler=s) sink = Sink(name="sink", scheduler=s) sink.input.inp = table.output.result prt = Print(name="prt", proc=self.terse, scheduler=s) prt.input.df = table.output.result prt2 = Print(name="prt2", proc=self.terse, scheduler=s) prt2.input.df = table.output.result # from nose.tools import set_trace; set_trace() s.commit() async def modify_1(scheduler: Scheduler, run_number: int) -> None: with s as dataflow: print("Checking module deletion") self.assertTrue(isinstance(dataflow, Dataflow)) deps = dataflow.collateral_damage("prt2") self.assertEqual(deps, set(["prt2"])) deps = dataflow.collateral_damage("prt") self.assertEqual(deps, set(["prt"])) deps = dataflow.collateral_damage("prt", "prt2") self.assertEqual(deps, set(["prt", "prt2"])) dataflow.delete_modules("prt2") s.on_loop(modify_2, 5) async def modify_2(scheduler: Scheduler, run_number: Any) -> None: self.assertFalse("prt2" in scheduler) with s as dataflow: print("Checking more module deletion") deps = dataflow.collateral_damage("prt") self.assertEqual(deps, {"prt"}) deps = dataflow.collateral_damage("prt", "sink") self.assertEqual(deps, {"prt", "sink", "table"}) dataflow.delete_modules("prt") s.on_loop(modify_3, 5) async def modify_3(scheduler: Scheduler, run_number: int) -> None: self.assertFalse("prt" in scheduler) with s as dataflow: print("Checking even more module deletion") deps = dataflow.collateral_damage("sink") self.assertEqual(deps, {"sink", "table"}) dataflow.delete_modules("sink", "table") async def stop_error(scheduler: Scheduler, run_number: int) -> None: self.assertFalse("Scheduler should have stopped") await scheduler.stop() s.on_loop(modify_1, 5) s.on_loop(stop_error, 100) aio.run(s.start())