def test_histogram2d1(self) -> None: s = self.scheduler() csv = CSVLoader( get_dataset("bigfile"), index_col=False, header=None, scheduler=s ) min_ = Min(scheduler=s) min_.input[0] = csv.output.result max_ = Max(scheduler=s) max_.input[0] = csv.output.result histogram2d = Histogram2D( 1, 2, xbins=100, ybins=100, scheduler=s ) # columns are called 1..30 histogram2d.input[0] = csv.output.result histogram2d.input.min = min_.output.result histogram2d.input.max = max_.output.result heatmap = Heatmap(filename="histo_%03d.png", scheduler=s) heatmap.input.array = histogram2d.output.result pr = Every(proc=self.terse, scheduler=s) pr.input[0] = heatmap.output.result aio.run(csv.scheduler().start()) last = notNone(histogram2d.table.last()).to_dict() h1 = last["array"] bounds = [[last["ymin"], last["ymax"]], [last["xmin"], last["xmax"]]] df = pd.read_csv( get_dataset("bigfile"), header=None, usecols=[1, 2] # type: ignore ) v = df.to_numpy() # .reshape(-1, 2) bins = [histogram2d.params.ybins, histogram2d.params.xbins] h2 = fh.histogram2d(v[:, 1], v[:, 0], bins=bins, range=bounds) h2 = np.flip(h2, axis=0) # type: ignore self.assertTrue(np.allclose(h1, h2))
def t_histogram1d_impl(self, **kw: Any) -> None: s = self.scheduler() csv = CSVLoader( get_dataset("bigfile"), index_col=False, header=None, scheduler=s ) stirrer = Stirrer(update_column="_2", fixed_step_size=1000, scheduler=s, **kw) stirrer.input[0] = csv.output.result min_ = Min(scheduler=s) min_.input[0] = stirrer.output.result max_ = Max(scheduler=s) max_.input[0] = stirrer.output.result histogram1d = Histogram1D("_2", scheduler=s) # columns are called 1..30 histogram1d.input[0] = stirrer.output.result histogram1d.input.min = min_.output.result histogram1d.input.max = max_.output.result # pr = Print(scheduler=s) pr = Every(proc=self.terse, scheduler=s) pr.input[0] = histogram1d.output.result aio.run(s.start()) _ = histogram1d.trace_stats() last = notNone(histogram1d.table.last()).to_dict() h1 = last["array"] bounds = (last["min"], last["max"]) tab = stirrer.table.loc[:, ["_2"]] assert tab is not None v = tab.to_array().reshape(-1) h2, _ = np.histogram( # type: ignore v, bins=histogram1d.params.bins, density=False, range=bounds ) self.assertEqual(np.sum(h1), np.sum(h2)) self.assertListEqual(h1.tolist(), h2.tolist())
def test_hub_if_else(self): s = Scheduler() random = RandomTable(2, rows=100000, scheduler=s) stirrer = Stirrer( update_column="_1", delete_rows=5, update_rows=5, fixed_step_size=100, scheduler=s, ) stirrer.input[0] = random.output.result switch = Switch(condition=lambda x: False, scheduler=s) switch.input[0] = stirrer.output.result max_ = Max(name="max_" + str(hash(random)), scheduler=s) max_.input[0] = switch.output.result min_ = Min(name="min_" + str(hash(random)), scheduler=s) min_.input[0] = switch.output.result_else hub = Hub(scheduler=s) hub.input.table = min_.output.result hub.input.table = max_.output.result pr = Print(proc=self.terse, scheduler=s) pr.input[0] = hub.output.result aio.run(s.start()) res1 = stirrer.result.min() res2 = hub.result self.compare(res1, res2)
def test_histogram1d1(self) -> None: s = self.scheduler() csv = CSVLoader( get_dataset("bigfile"), index_col=False, header=None, scheduler=s ) min_ = Min(scheduler=s) min_.input[0] = csv.output.result max_ = Max(scheduler=s) max_.input[0] = csv.output.result histogram1d = Histogram1D("_2", scheduler=s) # columns are called 1..30 histogram1d.input[0] = csv.output.result histogram1d.input.min = min_.output.result histogram1d.input.max = max_.output.result pr = Every(proc=self.terse, scheduler=s) pr.input[0] = histogram1d.output.result aio.run(s.start()) _ = histogram1d.trace_stats() last = notNone(histogram1d.table.last()).to_dict() h1 = last["array"] bounds = (last["min"], last["max"]) df = pd.read_csv( get_dataset("bigfile"), header=None, usecols=[2] # type: ignore ) v = df.to_numpy().reshape(-1) h2, _ = np.histogram( # type: ignore v, bins=histogram1d.params.bins, density=False, range=bounds ) self.assertListEqual(h1.tolist(), h2.tolist())
def test_hist_index_min_max(self): "Test min_out and max_out on HistogramIndex" s = self.scheduler() random = RandomTable(2, rows=100000, scheduler=s) t_min = Table(name=None, dshape='{_1: float64}', data={'_1': [0.3]}) min_value = Constant(table=t_min, scheduler=s) t_max = Table(name=None, dshape='{_1: float64}', data={'_1': [0.8]}) max_value = Constant(table=t_max, scheduler=s) range_qry = RangeQuery(column='_1', scheduler=s) range_qry.create_dependent_modules(random, 'table', min_value=min_value, max_value=max_value) prt = Print(proc=self.terse, scheduler=s) prt.input.df = range_qry.output.table hist_index = range_qry.hist_index min_ = Min(name='min_' + str(hash(hist_index)), scheduler=s) min_.input.table = hist_index.output.min_out prt2 = Print(proc=self.terse, scheduler=s) prt2.input.df = min_.output.table max_ = Max(name='max_' + str(hash(hist_index)), scheduler=s) max_.input.table = hist_index.output.max_out pr3 = Print(proc=self.terse, scheduler=s) pr3.input.df = max_.output.table s.start() s.join() res1 = random.table().min()['_1'] res2 = min_.table().last().to_dict()['_1'] self.assertAlmostEqual(res1, res2) res1 = random.table().max()['_1'] res2 = max_.table().last().to_dict()['_1'] self.assertAlmostEqual(res1, res2)
def test_hist_index_min_max(self) -> None: "Test min_out and max_out on HistogramIndex" s = self.scheduler() with s: random = RandomTable(2, rows=100000, scheduler=s) t_min = PsDict({"_1": 0.3}) min_value = Constant(table=t_min, scheduler=s) t_max = PsDict({"_1": 0.8}) max_value = Constant(table=t_max, scheduler=s) range_qry = RangeQuery(column="_1", scheduler=s) range_qry.create_dependent_modules( random, "result", min_value=min_value, max_value=max_value ) prt = Print(proc=self.terse, scheduler=s) prt.input[0] = range_qry.output.result hist_index = range_qry.hist_index assert hist_index is not None min_ = Min(name="min_" + str(hash(hist_index)), scheduler=s) min_.input[0] = hist_index.output.min_out prt2 = Print(proc=self.terse, scheduler=s) prt2.input[0] = min_.output.result max_ = Max(name="max_" + str(hash(hist_index)), scheduler=s) max_.input[0] = hist_index.output.max_out pr3 = Print(proc=self.terse, scheduler=s) pr3.input[0] = max_.output.result aio.run(s.start()) res1 = cast(float, random.table.min()["_1"]) res2 = cast(float, min_.psdict["_1"]) self.assertAlmostEqual(res1, res2) res1 = cast(float, random.table.max()["_1"]) res2 = cast(float, max_.psdict["_1"]) self.assertAlmostEqual(res1, res2)
def t_histogram2d_impl(self, **kw: Any) -> None: s = self.scheduler() random = RandomTable(3, rows=100000, scheduler=s) stirrer = Stirrer(update_column="_2", fixed_step_size=1000, scheduler=s, **kw) stirrer.input[0] = random.output.result min_ = Min(scheduler=s) min_.input[0] = stirrer.output.result max_ = Max(scheduler=s) max_.input[0] = stirrer.output.result histogram2d = Histogram2D( 0, 1, xbins=100, ybins=100, scheduler=s ) # columns are called 1..30 histogram2d.input[0] = stirrer.output.result histogram2d.input.min = min_.output.result histogram2d.input.max = max_.output.result heatmap = Heatmap(filename="histo_%03d.png", scheduler=s) heatmap.input.array = histogram2d.output.result pr = Every(proc=self.terse, scheduler=s) pr.input[0] = heatmap.output.result aio.run(s.start()) last = notNone(histogram2d.table.last()).to_dict() h1 = last["array"] bounds = [[last["ymin"], last["ymax"]], [last["xmin"], last["xmax"]]] t = stirrer.table.loc[:, ["_1", "_2"]] assert t is not None v = t.to_array() bins = [histogram2d.params.ybins, histogram2d.params.xbins] h2 = fh.histogram2d(v[:, 1], v[:, 0], bins=bins, range=bounds) h2 = np.flip(h2, axis=0) # type: ignore self.assertEqual(np.sum(h1), np.sum(h2)) self.assertListEqual(h1.reshape(-1).tolist(), h2.reshape(-1).tolist())
def test_min(self) -> None: s = self.scheduler() random = RandomTable(10, rows=10000, scheduler=s) min_ = Min(name="min_" + str(hash(random)), scheduler=s) min_.input[0] = random.output.result pr = Print(proc=self.terse, scheduler=s) pr.input[0] = min_.output.result aio.run(s.start()) # s.join() res1 = random.table.min() res2 = min_.psdict self.compare(res1, res2)
def test_min(self): s = self.scheduler() random = RandomTable(10, rows=10000, scheduler=s) min_ = Min(name='min_' + str(hash(random)), scheduler=s) min_.input.table = random.output.table pr = Print(proc=self.terse, scheduler=s) pr.input.df = min_.output.table s.start() s.join() res1 = random.table().min() res2 = min_.table().last() self.compare(res1, res2)
def test_min(self): s = self.scheduler() random = SimpleCSVLoader( get_dataset("bigfile_multiscale"), nrows=10_000, scheduler=s ) min_ = Min(name="min_" + str(hash(random)), scheduler=s) min_.input[0] = random.output.result pr = Print(proc=self.terse, scheduler=s) pr.input[0] = min_.output.result aio.run(s.start()) # s.join() res1 = random.result.min() res2 = min_.result self.compare(res1, res2)
def test_idxmin(self): s=Scheduler() random = RandomTable(10, rows=10000,throttle=1000, scheduler=s) idxmin=IdxMin(scheduler=s) idxmin.input.df = random.output.df min=Min(scheduler=s) min.input.df = random.output.df pr=Print(scheduler=s) pr.input.df = idxmin.output.min s.start() min1=last_row(min.df(),remove_update=True) #print min1 min2=last_row(idxmin.min(),remove_update=True) #print min2 self.assertTrue((min1==min2).all())
def test_histogram2d(self): s = self.scheduler() csv = CSVLoader(get_dataset('bigfile'), index_col=False, header=None, scheduler=s) min_ = Min(scheduler=s) min_.input.table = csv.output.table max_ = Max(scheduler=s) max_.input.table = csv.output.table histogram2d = Histogram2D(1, 2, xbins=100, ybins=100, scheduler=s) # columns are called 1..30 histogram2d.input.table = csv.output.table histogram2d.input.min = min_.output.table histogram2d.input.max = max_.output.table heatmap = Heatmap(filename='histo_%03d.png', scheduler=s) heatmap.input.array = histogram2d.output.table #pr = Print(scheduler=s) pr = Every(proc=self.terse, scheduler=s) #pr.input.df = heatmap.output.heatmap #pr.input.df = histogram2d.output.df pr.input.df = csv.output.table csv.scheduler().start() s.join() #self.scheduler.thread.join() s = histogram2d.trace_stats()
def test_dataflow_1_dynamic(self) -> None: scheduler = self.scheduler(clean=True) table = RandomTable(name="table", columns=["a"], throttle=1000, scheduler=scheduler) m = Min(name="min", scheduler=scheduler) prt = Print(proc=self.terse, name="print_min", scheduler=scheduler) m.input.table = table.output.result prt.input.df = m.output.result started = False def proc(x: Any) -> None: nonlocal started print("proc max called") started = True async def _add_max(scheduler: Scheduler, run_number: int) -> None: with scheduler: print("adding new modules") m = Max(name="max", scheduler=scheduler) prt = Print(name="print_max", proc=proc, scheduler=scheduler) m.input.table = table.output.result prt.input.df = m.output.result scheduler.on_loop(_add_max, 5) # run the function after 5 loops scheduler.on_loop(self._stop, 10) # from nose.tools import set_trace; set_trace() aio.run(scheduler.start()) self.assertTrue(started)
def test_dataflow_2_add_remove(self) -> None: scheduler = self.scheduler(clean=True) table = RandomTable(name="table", columns=["a"], throttle=1000, scheduler=scheduler) m = Min(name="min", scheduler=scheduler) prt = Print(proc=self.terse, name="print_min", scheduler=scheduler) m.input.table = table.output.result prt.input.df = m.output.result started = False def proc(x: Any) -> None: nonlocal started print("proc max called") started = True async def _add_max_remove_min(scheduler: Scheduler, run_number: int) -> None: with scheduler as dataflow: print("adding new modules") m = Max(name="max", scheduler=scheduler) prt = Print(name="print_max", proc=proc, scheduler=scheduler) m.input.table = table.output.result prt.input.df = m.output.result print("removing min module") dataflow.delete_modules("min", "print_min") # t = _add_max_remove_min(csv, scheduler, proc=proc) scheduler.on_loop(_add_max_remove_min, 5) scheduler.on_loop(self._stop, 10) aio.run(scheduler.start()) self.assertTrue(started)
def test_idxmin(self): s = self.scheduler() random = RandomTable(10, rows=10000, throttle=1000, scheduler=s) idxmin = IdxMin(scheduler=s) idxmin.input.table = random.output.table min_ = Min(scheduler=s) min_.input.table = random.output.table pr = Print(proc=self.terse, scheduler=s) pr.input.df = idxmin.output.min s.start() s.join() min1 = min_.table().last().to_dict() #print('min1', min1) min2 = idxmin.min().last().to_dict() #print('min2', min2) self.assertAlmostEqual(min1, min2)
def test_histogram1d(self) -> None: s = self.scheduler() csv = CSVLoader( get_dataset("bigfile"), index_col=False, header=None, scheduler=s ) min_ = Min(scheduler=s) min_.input[0] = csv.output.result max_ = Max(scheduler=s) max_.input[0] = csv.output.result histogram1d = Histogram1D("_2", scheduler=s) # columns are called 1..30 histogram1d.input[0] = csv.output.result histogram1d.input.min = min_.output.result histogram1d.input.max = max_.output.result pr = Every(proc=self.terse, scheduler=s) pr.input[0] = histogram1d.output.result aio.run(s.start()) _ = histogram1d.trace_stats()
def test_bin_join(self): s = self.scheduler() random = RandomTable(10, rows=10000, scheduler=s) min_1 = Min(name='min_1'+str(hash(random)), scheduler=s, columns=['_1']) min_1.input.table = random.output.table min_2 = Min(name='min_2'+str(hash(random)), scheduler=s, columns=['_2']) min_2.input.table = random.output.table bj = BinJoin(scheduler=s) bj.input.first = min_1.output.table bj.input.second = min_2.output.table pr=Print(proc=self.terse, scheduler=s) pr.input.df = bj.output.table s.start() s.join() res1 = random.table().min() res2 = bj.table().last().to_dict() self.assertAlmostEqual(res1['_1'], res2['_1']) self.assertAlmostEqual(res1['_2'], res2['_2'])
def p10s_random_min_max(n): StorageEngine.default = "hdf5" s = Scheduler() random = RandomTable(10, rows=n * L, scheduler=s) min_ = Min(name='min_' + str(hash(random)), scheduler=s) min_.input.table = random.output.table max_ = Max(name='max_' + str(hash(random)), scheduler=s) max_.input.table = random.output.table s.start()
def add_min(): m = Min(scheduler=s) # Of course, sleeping here is a bad idea. this is to illustrate # that add_min will be executed atomically by the scheduler. # using a sleep outside of add_oneshot_tick_proc would lead to an inconsistent # state. #sleep(1) m.input.table = csv.output.table prt = Print(proc=self.terse, scheduler=s) prt.input.df = m.output.table
def p10s_random_min_max(self): n = self.current_step StorageEngine.default = "hdf5" s = Scheduler() random = RandomTable(10, rows=n * L, scheduler=s) min_ = Min(mid='min_' + str(hash(random)), scheduler=s) min_.input.table = random.output.table max_ = Max(id='max_' + str(hash(random)), scheduler=s) max_.input.table = random.output.table s.start()
def test_idxmin(self) -> None: s = self.scheduler() random = RandomTable(10, rows=10000, throttle=1000, scheduler=s) idxmin = IdxMin(scheduler=s) idxmin.input[0] = random.output.result min_ = Min(scheduler=s) min_.input[0] = random.output.result pr = Print(proc=self.terse, scheduler=s) pr.input[0] = idxmin.output.result pr2 = Print(proc=self.terse, scheduler=s) pr2.input[0] = min_.output.result aio.run(s.start()) min1 = min_.psdict # print('min1', min1) min = idxmin.min() assert min is not None min2 = notNone(min.last()).to_dict() # print('min2', min2) self.compare(min1, min2)
def test_histogram2d(self) -> None: s = self.scheduler() csv = CSVLoader( get_dataset("bigfile"), index_col=False, header=None, scheduler=s ) min_ = Min(scheduler=s) min_.input[0] = csv.output.result max_ = Max(scheduler=s) max_.input[0] = csv.output.result histogram2d = Histogram2D( 1, 2, xbins=100, ybins=100, scheduler=s ) # columns are called 1..30 histogram2d.input[0] = csv.output.result histogram2d.input.min = min_.output.result histogram2d.input.max = max_.output.result heatmap = Heatmap(filename="histo_%03d.png", scheduler=s) heatmap.input.array = histogram2d.output.result pr = Every(proc=self.terse, scheduler=s) pr.input[0] = heatmap.output.result aio.run(csv.scheduler().start()) _ = histogram2d.trace_stats()
def test_paste(self) -> None: s = self.scheduler() random = RandomTable(10, rows=10000, scheduler=s) min_1 = Min(name="min_1" + str(hash(random)), scheduler=s, columns=["_1"]) min_1.input[0] = random.output.result d2t_1 = Dict2Table(scheduler=s) d2t_1.input.dict_ = min_1.output.result min_2 = Min(name="min_2" + str(hash(random)), scheduler=s, columns=["_2"]) min_2.input[0] = random.output.result d2t_2 = Dict2Table(scheduler=s) d2t_2.input.dict_ = min_2.output.result bj = Paste(scheduler=s) bj.input.first = d2t_1.output.result bj.input.second = d2t_2.output.result pr = Print(proc=self.terse, scheduler=s) pr.input[0] = bj.output.result aio.run(s.start()) res1 = random.table.min() res2 = notNone(bj.table.last()).to_dict() self.assertAlmostEqual(res1["_1"], res2["_1"]) self.assertAlmostEqual(res1["_2"], res2["_2"])
def test_dataflow_3_dels(self) -> None: s = self.scheduler() table = RandomTable(name="table", columns=["a"], throttle=1000, scheduler=s) m = Min(name="min", scheduler=s) m.input.table = table.output.result prt = Print(name="prt", scheduler=s) prt.input.df = m.output.result aio.run(s.step()) with s as dataflow: self.assertTrue(isinstance(dataflow, Dataflow)) deps = dataflow.collateral_damage("table") self.assertEqual(deps, set(["table", "min", "prt"]))
def main(): "Main function" csvmod = RandomTable(columns=['a', 'b', 'c'], rows=1000000, random=np.random.randn, throttle=1000, scheduler=s) minmod = Min(scheduler=s) minmod.input.table = csvmod.output.table maxmod = Max(scheduler=s) maxmod.input.table = csvmod.output.table histograms = Histograms(scheduler=s) histograms.input.table = csvmod.output.table histograms.input.min = minmod.output.table histograms.input.max = maxmod.output.table prlen = Every(scheduler=s) prlen.input.df = histograms.output.table return csvmod
def test_dataflow(self): s = Scheduler() with Dataflow(s): csv = CSVLoader(get_dataset('bigfile'), name="csv", index_col=False, header=None) m = Min() m.input.table = csv.output.table prt = Print(proc=self.terse) prt.input.df = m.output.table self.assertIs(s["csv"], csv) csv.scheduler().start() sleep(1) self.assertTrue(csv.scheduler().is_running()) s.stop() s.join()
def test_histogram1d(self): s = self.scheduler() csv = CSVLoader(get_dataset('bigfile'), index_col=False, header=None, scheduler=s) min_ = Min(scheduler=s) min_.input.table = csv.output.table max_ = Max(scheduler=s) max_.input.table = csv.output.table histogram1d = Histogram1D('_2', scheduler=s) # columns are called 1..30 histogram1d.input.table = csv.output.table histogram1d.input.min = min_.output.table histogram1d.input.max = max_.output.table #pr = Print(scheduler=s) pr = Every(proc=self.terse, scheduler=s) pr.input.df = csv.output.table s.start(tick_proc=lambda s, r: csv.is_terminated() and s.stop()) s.join() s = histogram1d.trace_stats()
def add_min(s: Scheduler, r: int) -> None: with s: m = Min(scheduler=s) m.input.table = csv.output.result prt = Print(proc=self.terse, scheduler=s) prt.input.df = m.output.result
from progressivis import Scheduler, Every, Print from progressivis.io import CSVLoader from progressivis.stats import Histogram2D, Min, Max from progressivis.datasets import get_dataset from progressivis.vis import Heatmap print("Loading test_histogram2d") print("Type of default_scheduler is %s" % type(Scheduler.default)) csv = CSVLoader(get_dataset('bigfile'), index_col=False, header=None, engine='c') pr = Every() pr.input.df = csv.output.table min_ = Min() min_.input.table = csv.output.table max_ = Max() max_.input.table = csv.output.table histogram2d = Histogram2D('_1', '_2', xbins=128, ybins=128) histogram2d.input.table = csv.output.table histogram2d.input.min = min_.output.table histogram2d.input.max = max_.output.table # heatmap heatmap = Heatmap(filename='histo_%03d.png') heatmap.input.array = histogram2d.output.table pr = Print(name='print') pr.input.df = csv.output.table if __name__ == '__main__': csv.start()
#SUFFIX= '' PREFIX= '../nyc-taxi/' SUFFIX= '.bz2' URLS = [ PREFIX+'yellow_tripdata_2015-01.csv'+SUFFIX, PREFIX+'yellow_tripdata_2015-02.csv'+SUFFIX, PREFIX+'yellow_tripdata_2015-03.csv'+SUFFIX, PREFIX+'yellow_tripdata_2015-04.csv'+SUFFIX, PREFIX+'yellow_tripdata_2015-05.csv'+SUFFIX, PREFIX+'yellow_tripdata_2015-06.csv'+SUFFIX, ] filenames = pd.DataFrame({'filename': URLS}) cst = Constant(Table('filenames', data=filenames), scheduler=s) csv = CSVLoader(index_col=False,skipinitialspace=True,usecols=['dropoff_longitude', 'dropoff_latitude'], filter_=filter_, scheduler=s) csv.input.filenames = cst.output.table min = Min(scheduler=s) min.input.table = csv.output.table max = Max(scheduler=s) max.input.table = csv.output.table histogram2d = Histogram2D('dropoff_longitude', 'dropoff_latitude', xbins=RESOLUTION, ybins=RESOLUTION, scheduler=s) histogram2d.input.table = csv.output.table histogram2d.input.min = min.output.table histogram2d.input.max = max.output.table heatmap = Heatmap(filename='nyc_dropoff_yellow%d.png', history=5, scheduler=s) heatmap.input.array = histogram2d.output.table if __name__=='__main__': s.start()
def test_dataflow_0(self) -> None: scheduler = self.scheduler() saved_inputs = None saved_outputs = None with scheduler as dataflow: csv = CSVLoader( get_dataset("smallfile"), name="csv", index_col=False, header=None, scheduler=scheduler, ) self.assertIs(scheduler["csv"], csv) self.assertEqual( dataflow.validate_module(csv), ['Output slot "result" missing in module "csv"'], ) m = Min(name="min", scheduler=scheduler) self.assertIs(dataflow[m.name], m) self.assertEqual( dataflow.validate_module(m), [ 'Input slot "table" missing in module "min"', 'Output slot "result" missing in module "min"', ], ) prt = Print(proc=self.terse, name="print", scheduler=scheduler) self.assertIs(dataflow[prt.name], prt) self.assertEqual( dataflow.validate_module(prt), ['Input slot "df" missing in module "print"'], ) m.input.table = csv.output.result prt.input.df = m.output.result self.assertEqual(len(dataflow), 3) self.assertEqual(dataflow.dir(), ["csv", "min", "print"]) errors = dataflow.validate() self.assertEqual(errors, []) deps = dataflow.order_modules() self.assertEqual(deps, ["csv", m.name, prt.name]) saved_inputs = dataflow.inputs saved_outputs = dataflow.outputs # dataflow.__exit__() is called here # print('Old modules:', end=' ') # pprint(scheduler._modules) # scheduler._update_modules() # force modules in the main loop # print('New modules:', end=' ') # pprint(scheduler.modules()) with scheduler as dataflow: # nothing should change when nothing is modified in dataflow self.assertEqual(len(dataflow), 3) deps = dataflow.order_modules() self.assertEqual(deps, ["csv", m.name, prt.name]) self.assertEqual(dataflow.inputs, saved_inputs) self.assertEqual(dataflow.outputs, saved_outputs) # scheduler._update_modules() # force modules in the main loop with scheduler as dataflow: sink = Sink(name="sink", scheduler=scheduler) sink.input.inp = m.output.result dataflow.delete_modules(prt) self.assertEqual(len(dataflow), 3) deps = dataflow.order_modules() self.assertEqual(deps, ["csv", m.name, "sink"]) # pprint(dataflow.inputs) # pprint(dataflow.outputs) # print('Old modules:') # pprint(scheduler._new_modules) # scheduler._update_modules() # force modules in the main loop # print('New modules:') # pprint(scheduler.modules()) with scheduler as dataflow: self.assertEqual(len(dataflow), 3) deps = dataflow.order_modules() self.assertEqual(deps, ["csv", m.name, "sink"]) prt = Print(proc=self.terse, name="print", scheduler=scheduler) self.assertIs(dataflow[prt.name], prt) self.assertEqual( dataflow.validate_module(prt), ['Input slot "df" missing in module "print"'], ) prt.input.df = m.output.result