def test_scheduler(self): s = MTScheduler() csv = CSVLoader(get_dataset('bigfile'),index_col=False,header=None,scheduler=s) smp = Sample(n=10,scheduler=s) smp.input.df = csv.output.df csv.scheduler().start() sleep(1) self.assertTrue(csv.scheduler().is_running()) smp2 = Sample(n=15, scheduler=s) smp2.input.df = csv.output.df def add_min(): m = Min(scheduler=s) # Of course, sleeping here is a bad idea. this is to illustrate # that add_min will be executed atomically by the scheduler. # using a sleep outside of add_oneshot_tick_proc would lead to an inconsistent # state. #sleep(1) m.input.df = smp2.output.df prt = Print(scheduler=s) prt.input.df = m.output.df s.add_oneshot_tick_proc(add_min) sleep(1) self.assertTrue(s._runorder.index(smp.id) > s._runorder.index(csv.id)) self.assertTrue(s._runorder.index(smp2.id) > s._runorder.index(csv.id)) #self.assertTrue(s._runorder.index(m.id) > s._runorder.index(smp2.id)) s.stop() s.join()
def test_histogram2d(self): s = self.scheduler() csv = CSVLoader(get_dataset('bigfile'), index_col=False, header=None, scheduler=s) min_ = Min(scheduler=s) min_.input.table = csv.output.table max_ = Max(scheduler=s) max_.input.table = csv.output.table histogram2d = Histogram2D(1, 2, xbins=100, ybins=100, scheduler=s) # columns are called 1..30 histogram2d.input.table = csv.output.table histogram2d.input.min = min_.output.table histogram2d.input.max = max_.output.table heatmap = Heatmap(filename='histo_%03d.png', scheduler=s) heatmap.input.array = histogram2d.output.table #pr = Print(scheduler=s) pr = Every(proc=self.terse, scheduler=s) #pr.input.df = heatmap.output.heatmap #pr.input.df = histogram2d.output.df pr.input.df = csv.output.table csv.scheduler().start() s.join() #self.scheduler.thread.join() s = histogram2d.trace_stats()
def test_sample(self): s = Scheduler() csv = CSVLoader(get_dataset('bigfile'),index_col=False,header=None,scheduler=s) smp = Sample(n=10,scheduler=s) smp.input.df = csv.output.df prt = Print(scheduler=s) prt.input.df = smp.output.df csv.scheduler().start()
def test_scatterplot(self): s=Scheduler() csv = CSVLoader(get_dataset('bigfile'),index_col=False,header=None,force_valid_ids=True,scheduler=s) sp = ScatterPlot(x_column='_1', y_column='_2', scheduler=s) sp.create_dependent_modules(csv,'df') cnt = Every(proc=print_len,constant_time=True,scheduler=s) cnt.input.df = csv.output.df prt = Print(scheduler=s) prt.input.df = sp.histogram2d.output.df csv.scheduler().start(None,idle_proc) self.assertEquals(len(csv.df()), 1000000)
def test_sample(self): s = self.scheduler() csv = CSVLoader(get_dataset('bigfile'), index_col=False, header=None, scheduler=s) smp = Sample(samples=10, scheduler=s) smp.input.table = csv.output.table prt = Print(proc=self.terse, scheduler=s) prt.input.df = smp.output.table csv.scheduler().start() s.join() #print(repr(smp.table())) self.assertEqual(len(smp.table()), 10)
def test_histogram2d1(self) -> None: s = self.scheduler() csv = CSVLoader( get_dataset("bigfile"), index_col=False, header=None, scheduler=s ) min_ = Min(scheduler=s) min_.input[0] = csv.output.result max_ = Max(scheduler=s) max_.input[0] = csv.output.result histogram2d = Histogram2D( 1, 2, xbins=100, ybins=100, scheduler=s ) # columns are called 1..30 histogram2d.input[0] = csv.output.result histogram2d.input.min = min_.output.result histogram2d.input.max = max_.output.result heatmap = Heatmap(filename="histo_%03d.png", scheduler=s) heatmap.input.array = histogram2d.output.result pr = Every(proc=self.terse, scheduler=s) pr.input[0] = heatmap.output.result aio.run(csv.scheduler().start()) last = notNone(histogram2d.table.last()).to_dict() h1 = last["array"] bounds = [[last["ymin"], last["ymax"]], [last["xmin"], last["xmax"]]] df = pd.read_csv( get_dataset("bigfile"), header=None, usecols=[1, 2] # type: ignore ) v = df.to_numpy() # .reshape(-1, 2) bins = [histogram2d.params.ybins, histogram2d.params.xbins] h2 = fh.histogram2d(v[:, 1], v[:, 0], bins=bins, range=bounds) h2 = np.flip(h2, axis=0) # type: ignore self.assertTrue(np.allclose(h1, h2))
def test_scatterplot(self): s = self.scheduler() csv = CSVLoader(get_dataset('smallfile'), index_col=False, header=None, force_valid_ids=True, scheduler=s) sp = MCScatterPlot(scheduler=s, classes=[('Scatterplot', '_1', '_2')], approximate=True) sp.create_dependent_modules(csv, 'table') cnt = Every(proc=self.terse, constant_time=True, scheduler=s) cnt.input.df = csv.output.table prt = Print(proc=self.terse, scheduler=s) prt.input.df = sp.output.table csv.scheduler().start(idle_proc=idle_proc) s.join() self.assertEqual(len(csv.table()), 30000)
def test_dataflow(self): s = Scheduler() with Dataflow(s): csv = CSVLoader(get_dataset('bigfile'), name="csv", index_col=False, header=None) m = Min() m.input.table = csv.output.table prt = Print(proc=self.terse) prt.input.df = m.output.table self.assertIs(s["csv"], csv) csv.scheduler().start() sleep(1) self.assertTrue(csv.scheduler().is_running()) s.stop() s.join()
def test_sample(self) -> None: s = self.scheduler() csv = CSVLoader(get_dataset("bigfile"), index_col=False, header=None, scheduler=s) smp = Sample(samples=10, scheduler=s) smp.input[0] = csv.output.result prt = Print(proc=self.terse, scheduler=s) prt.input[0] = smp.output.result aio.run(csv.scheduler().start()) # print(repr(smp.result)) self.assertEqual(len(smp.table), 10)
def test_scheduler(self): s = Scheduler() csv = CSVLoader(get_dataset('bigfile'), name="csv", index_col=False, header=None, scheduler=s) #smp = Sample(n=10,scheduler=s) #smp.input.df = csv.output.table self.assertIs(s["csv"], csv) csv.scheduler().start() sleep(1) self.assertTrue(csv.scheduler().is_running()) #smp2 = Sample(n=15, scheduler=s) #smp2.input.df = csv.output.df def add_min(): m = Min(scheduler=s) # Of course, sleeping here is a bad idea. this is to illustrate # that add_min will be executed atomically by the scheduler. # using a sleep outside of add_oneshot_tick_proc would lead to an inconsistent # state. #sleep(1) m.input.table = csv.output.table prt = Print(proc=self.terse, scheduler=s) prt.input.df = m.output.table s.on_tick_once(add_min) sleep(1) #self.assertTrue(s._runorder.index(smp.id) > s._runorder.index(csv.id)) #self.assertTrue(s._runorder.index(smp2.id) > s._runorder.index(csv.id)) #self.assertTrue(s._runorder.index(m.id) > s._runorder.index(smp2.id)) s.stop() s.join()
def test_histogram2d(self) -> None: s = self.scheduler() csv = CSVLoader( get_dataset("bigfile"), index_col=False, header=None, scheduler=s ) min_ = Min(scheduler=s) min_.input[0] = csv.output.result max_ = Max(scheduler=s) max_.input[0] = csv.output.result histogram2d = Histogram2D( 1, 2, xbins=100, ybins=100, scheduler=s ) # columns are called 1..30 histogram2d.input[0] = csv.output.result histogram2d.input.min = min_.output.result histogram2d.input.max = max_.output.result heatmap = Heatmap(filename="histo_%03d.png", scheduler=s) heatmap.input.array = histogram2d.output.result pr = Every(proc=self.terse, scheduler=s) pr.input[0] = heatmap.output.result aio.run(csv.scheduler().start()) _ = histogram2d.trace_stats()
def test_scatterplot(self) -> None: s = self.scheduler(clean=True) with s: csv = CSVLoader( get_dataset("smallfile"), index_col=False, header=None, force_valid_ids=True, scheduler=s, ) sp = MCScatterPlot(scheduler=s, classes=[("Scatterplot", "_1", "_2")], approximate=True) sp.create_dependent_modules(csv, "result") cnt = Every(proc=self.terse, constant_time=True, scheduler=s) cnt.input[0] = csv.output.result prt = Print(proc=self.terse, scheduler=s) prt.input[0] = sp.output.result # sts = sleep_then_stop(s, 5) s.on_loop(self._stop, 5) aio.run(csv.scheduler().start()) self.assertEqual(len(csv.table), 30000)