Esempio n. 1
0
    def test_scheduler(self):
        s = MTScheduler()
        csv = CSVLoader(get_dataset('bigfile'),index_col=False,header=None,scheduler=s)

        smp = Sample(n=10,scheduler=s)
        smp.input.df = csv.output.df

        csv.scheduler().start()

        sleep(1)
        self.assertTrue(csv.scheduler().is_running())

        smp2 = Sample(n=15, scheduler=s)
        smp2.input.df = csv.output.df

        def add_min():
            m = Min(scheduler=s)
            # Of course, sleeping here is a bad idea. this is to illustrate
            # that add_min will be executed atomically by the scheduler. 
            # using a sleep outside of add_oneshot_tick_proc would lead to an inconsistent
            # state.
            #sleep(1)
            m.input.df = smp2.output.df
            prt = Print(scheduler=s)
            prt.input.df = m.output.df

        s.add_oneshot_tick_proc(add_min)

        sleep(1)
        self.assertTrue(s._runorder.index(smp.id) > s._runorder.index(csv.id))
        self.assertTrue(s._runorder.index(smp2.id) > s._runorder.index(csv.id))
        #self.assertTrue(s._runorder.index(m.id) > s._runorder.index(smp2.id))
        s.stop()
        s.join()
 def test_histogram2d(self):
     s = self.scheduler()
     csv = CSVLoader(get_dataset('bigfile'),
                     index_col=False,
                     header=None,
                     scheduler=s)
     min_ = Min(scheduler=s)
     min_.input.table = csv.output.table
     max_ = Max(scheduler=s)
     max_.input.table = csv.output.table
     histogram2d = Histogram2D(1, 2, xbins=100, ybins=100,
                               scheduler=s)  # columns are called 1..30
     histogram2d.input.table = csv.output.table
     histogram2d.input.min = min_.output.table
     histogram2d.input.max = max_.output.table
     heatmap = Heatmap(filename='histo_%03d.png', scheduler=s)
     heatmap.input.array = histogram2d.output.table
     #pr = Print(scheduler=s)
     pr = Every(proc=self.terse, scheduler=s)
     #pr.input.df = heatmap.output.heatmap
     #pr.input.df = histogram2d.output.df
     pr.input.df = csv.output.table
     csv.scheduler().start()
     s.join()
     #self.scheduler.thread.join()
     s = histogram2d.trace_stats()
Esempio n. 3
0
 def test_sample(self):
     s = Scheduler()
     csv = CSVLoader(get_dataset('bigfile'),index_col=False,header=None,scheduler=s)
     smp = Sample(n=10,scheduler=s)
     smp.input.df = csv.output.df
     prt = Print(scheduler=s)
     prt.input.df = smp.output.df
     csv.scheduler().start()
Esempio n. 4
0
 def test_scatterplot(self):
     s=Scheduler()
     csv = CSVLoader(get_dataset('bigfile'),index_col=False,header=None,force_valid_ids=True,scheduler=s)
     sp = ScatterPlot(x_column='_1', y_column='_2', scheduler=s)
     sp.create_dependent_modules(csv,'df')
     cnt = Every(proc=print_len,constant_time=True,scheduler=s)
     cnt.input.df = csv.output.df
     prt = Print(scheduler=s)
     prt.input.df = sp.histogram2d.output.df
     csv.scheduler().start(None,idle_proc)
     self.assertEquals(len(csv.df()), 1000000)
Esempio n. 5
0
 def test_sample(self):
     s = self.scheduler()
     csv = CSVLoader(get_dataset('bigfile'),
                     index_col=False,
                     header=None,
                     scheduler=s)
     smp = Sample(samples=10, scheduler=s)
     smp.input.table = csv.output.table
     prt = Print(proc=self.terse, scheduler=s)
     prt.input.df = smp.output.table
     csv.scheduler().start()
     s.join()
     #print(repr(smp.table()))
     self.assertEqual(len(smp.table()), 10)
 def test_histogram2d1(self) -> None:
     s = self.scheduler()
     csv = CSVLoader(
         get_dataset("bigfile"), index_col=False, header=None, scheduler=s
     )
     min_ = Min(scheduler=s)
     min_.input[0] = csv.output.result
     max_ = Max(scheduler=s)
     max_.input[0] = csv.output.result
     histogram2d = Histogram2D(
         1, 2, xbins=100, ybins=100, scheduler=s
     )  # columns are called 1..30
     histogram2d.input[0] = csv.output.result
     histogram2d.input.min = min_.output.result
     histogram2d.input.max = max_.output.result
     heatmap = Heatmap(filename="histo_%03d.png", scheduler=s)
     heatmap.input.array = histogram2d.output.result
     pr = Every(proc=self.terse, scheduler=s)
     pr.input[0] = heatmap.output.result
     aio.run(csv.scheduler().start())
     last = notNone(histogram2d.table.last()).to_dict()
     h1 = last["array"]
     bounds = [[last["ymin"], last["ymax"]], [last["xmin"], last["xmax"]]]
     df = pd.read_csv(
         get_dataset("bigfile"), header=None, usecols=[1, 2]  # type: ignore
     )
     v = df.to_numpy()  # .reshape(-1, 2)
     bins = [histogram2d.params.ybins, histogram2d.params.xbins]
     h2 = fh.histogram2d(v[:, 1], v[:, 0], bins=bins, range=bounds)
     h2 = np.flip(h2, axis=0)  # type: ignore
     self.assertTrue(np.allclose(h1, h2))
 def test_scatterplot(self):
     s = self.scheduler()
     csv = CSVLoader(get_dataset('smallfile'),
                     index_col=False,
                     header=None,
                     force_valid_ids=True,
                     scheduler=s)
     sp = MCScatterPlot(scheduler=s,
                        classes=[('Scatterplot', '_1', '_2')],
                        approximate=True)
     sp.create_dependent_modules(csv, 'table')
     cnt = Every(proc=self.terse, constant_time=True, scheduler=s)
     cnt.input.df = csv.output.table
     prt = Print(proc=self.terse, scheduler=s)
     prt.input.df = sp.output.table
     csv.scheduler().start(idle_proc=idle_proc)
     s.join()
     self.assertEqual(len(csv.table()), 30000)
    def test_dataflow(self):
        s = Scheduler()
        with Dataflow(s):
            csv = CSVLoader(get_dataset('bigfile'),
                            name="csv",
                            index_col=False,
                            header=None)
            m = Min()
            m.input.table = csv.output.table
            prt = Print(proc=self.terse)
            prt.input.df = m.output.table

        self.assertIs(s["csv"], csv)
        csv.scheduler().start()

        sleep(1)
        self.assertTrue(csv.scheduler().is_running())

        s.stop()
        s.join()
Esempio n. 9
0
 def test_sample(self) -> None:
     s = self.scheduler()
     csv = CSVLoader(get_dataset("bigfile"),
                     index_col=False,
                     header=None,
                     scheduler=s)
     smp = Sample(samples=10, scheduler=s)
     smp.input[0] = csv.output.result
     prt = Print(proc=self.terse, scheduler=s)
     prt.input[0] = smp.output.result
     aio.run(csv.scheduler().start())
     # print(repr(smp.result))
     self.assertEqual(len(smp.table), 10)
    def test_scheduler(self):
        s = Scheduler()
        csv = CSVLoader(get_dataset('bigfile'),
                        name="csv",
                        index_col=False,
                        header=None,
                        scheduler=s)

        #smp = Sample(n=10,scheduler=s)
        #smp.input.df = csv.output.table

        self.assertIs(s["csv"], csv)
        csv.scheduler().start()

        sleep(1)
        self.assertTrue(csv.scheduler().is_running())

        #smp2 = Sample(n=15, scheduler=s)
        #smp2.input.df = csv.output.df

        def add_min():
            m = Min(scheduler=s)
            # Of course, sleeping here is a bad idea. this is to illustrate
            # that add_min will be executed atomically by the scheduler.
            # using a sleep outside of add_oneshot_tick_proc would lead to an inconsistent
            # state.
            #sleep(1)
            m.input.table = csv.output.table
            prt = Print(proc=self.terse, scheduler=s)
            prt.input.df = m.output.table

        s.on_tick_once(add_min)

        sleep(1)
        #self.assertTrue(s._runorder.index(smp.id) > s._runorder.index(csv.id))
        #self.assertTrue(s._runorder.index(smp2.id) > s._runorder.index(csv.id))
        #self.assertTrue(s._runorder.index(m.id) > s._runorder.index(smp2.id))
        s.stop()
        s.join()
Esempio n. 11
0
 def test_histogram2d(self) -> None:
     s = self.scheduler()
     csv = CSVLoader(
         get_dataset("bigfile"), index_col=False, header=None, scheduler=s
     )
     min_ = Min(scheduler=s)
     min_.input[0] = csv.output.result
     max_ = Max(scheduler=s)
     max_.input[0] = csv.output.result
     histogram2d = Histogram2D(
         1, 2, xbins=100, ybins=100, scheduler=s
     )  # columns are called 1..30
     histogram2d.input[0] = csv.output.result
     histogram2d.input.min = min_.output.result
     histogram2d.input.max = max_.output.result
     heatmap = Heatmap(filename="histo_%03d.png", scheduler=s)
     heatmap.input.array = histogram2d.output.result
     pr = Every(proc=self.terse, scheduler=s)
     pr.input[0] = heatmap.output.result
     aio.run(csv.scheduler().start())
     _ = histogram2d.trace_stats()
Esempio n. 12
0
 def test_scatterplot(self) -> None:
     s = self.scheduler(clean=True)
     with s:
         csv = CSVLoader(
             get_dataset("smallfile"),
             index_col=False,
             header=None,
             force_valid_ids=True,
             scheduler=s,
         )
         sp = MCScatterPlot(scheduler=s,
                            classes=[("Scatterplot", "_1", "_2")],
                            approximate=True)
         sp.create_dependent_modules(csv, "result")
         cnt = Every(proc=self.terse, constant_time=True, scheduler=s)
         cnt.input[0] = csv.output.result
         prt = Print(proc=self.terse, scheduler=s)
         prt.input[0] = sp.output.result
         # sts = sleep_then_stop(s, 5)
     s.on_loop(self._stop, 5)
     aio.run(csv.scheduler().start())
     self.assertEqual(len(csv.table), 30000)