コード例 #1
0
 def t_histogram2d_impl(self, **kw: Any) -> None:
     s = self.scheduler()
     random = RandomTable(3, rows=100000, scheduler=s)
     stirrer = Stirrer(update_column="_2", fixed_step_size=1000, scheduler=s, **kw)
     stirrer.input[0] = random.output.result
     min_ = Min(scheduler=s)
     min_.input[0] = stirrer.output.result
     max_ = Max(scheduler=s)
     max_.input[0] = stirrer.output.result
     histogram2d = Histogram2D(
         0, 1, xbins=100, ybins=100, scheduler=s
     )  # columns are called 1..30
     histogram2d.input[0] = stirrer.output.result
     histogram2d.input.min = min_.output.result
     histogram2d.input.max = max_.output.result
     heatmap = Heatmap(filename="histo_%03d.png", scheduler=s)
     heatmap.input.array = histogram2d.output.result
     pr = Every(proc=self.terse, scheduler=s)
     pr.input[0] = heatmap.output.result
     aio.run(s.start())
     last = notNone(histogram2d.table.last()).to_dict()
     h1 = last["array"]
     bounds = [[last["ymin"], last["ymax"]], [last["xmin"], last["xmax"]]]
     t = stirrer.table.loc[:, ["_1", "_2"]]
     assert t is not None
     v = t.to_array()
     bins = [histogram2d.params.ybins, histogram2d.params.xbins]
     h2 = fh.histogram2d(v[:, 1], v[:, 0], bins=bins, range=bounds)
     h2 = np.flip(h2, axis=0)  # type: ignore
     self.assertEqual(np.sum(h1), np.sum(h2))
     self.assertListEqual(h1.reshape(-1).tolist(), h2.reshape(-1).tolist())
コード例 #2
0
 def test_histogram1d1(self) -> None:
     s = self.scheduler()
     csv = CSVLoader(
         get_dataset("bigfile"), index_col=False, header=None, scheduler=s
     )
     min_ = Min(scheduler=s)
     min_.input[0] = csv.output.result
     max_ = Max(scheduler=s)
     max_.input[0] = csv.output.result
     histogram1d = Histogram1D("_2", scheduler=s)  # columns are called 1..30
     histogram1d.input[0] = csv.output.result
     histogram1d.input.min = min_.output.result
     histogram1d.input.max = max_.output.result
     pr = Every(proc=self.terse, scheduler=s)
     pr.input[0] = histogram1d.output.result
     aio.run(s.start())
     _ = histogram1d.trace_stats()
     last = notNone(histogram1d.table.last()).to_dict()
     h1 = last["array"]
     bounds = (last["min"], last["max"])
     df = pd.read_csv(
         get_dataset("bigfile"), header=None, usecols=[2]  # type: ignore
     )
     v = df.to_numpy().reshape(-1)
     h2, _ = np.histogram(  # type: ignore
         v, bins=histogram1d.params.bins, density=False, range=bounds
     )
     self.assertListEqual(h1.tolist(), h2.tolist())
コード例 #3
0
    def test_mb_k_means(self) -> None:
        s = self.scheduler()
        n_clusters = 3
        try:
            dataset = (get_dataset("cluster:s3"), )
        except TimeoutError:
            print("Cannot download cluster:s3")
            return

        with s:
            csv = CSVLoader(
                dataset,
                sep=" ",
                skipinitialspace=True,
                header=None,
                index_col=False,
                scheduler=s,
            )
            km = MBKMeans(
                n_clusters=n_clusters,
                random_state=42,
                is_input=False,
                is_greedy=False,
                scheduler=s,
            )
            # km.input.table = csv.output.result
            km.create_dependent_modules(csv)
            pr = Print(proc=self.terse, scheduler=s)
            pr.input[0] = km.output.result
            e = Every(proc=self.terse, scheduler=s)
            e.input[0] = km.output.labels
        aio.run(s.start())
        labels = km.labels()
        assert labels is not None
        self.assertEqual(len(csv.table), len(labels))
コード例 #4
0
 def test_join(self) -> None:
     s = self.scheduler()
     csv = CSVLoader(
         get_dataset("bigfile"), index_col=False, header=None, scheduler=s
     )
     stat1 = Stats(1, reset_index=True, scheduler=s)
     stat1.input[0] = csv.output.result
     stat2 = Stats(2, reset_index=True, scheduler=s)
     stat2.input[0] = csv.output.result
     # join=Join(scheduler=s)
     # reduce_ = Reduce(BinJoin, "first", "second", "table", scheduler=s)
     # reduce_.input[0] = stat1.output.stats
     # reduce_.input[0] = stat2.output.stats
     # join = reduce_.expand()
     join = Reduce.expand(
         BinJoin,
         "first",
         "second",
         "table",
         [stat1.output.stats, stat2.output.stats],
         scheduler=s,
     )
     pr = Print(proc=self.terse, scheduler=s)
     pr.input[0] = join.output.result
     prlen = Every(proc=self.terse, constant_time=True, scheduler=s)
     prlen.input[0] = csv.output.result
     aio.run(s.start())
     res = join.trace_stats(max_runs=1)
     print(res)
コード例 #5
0
 def test_histogram2d1(self) -> None:
     s = self.scheduler()
     csv = CSVLoader(
         get_dataset("bigfile"), index_col=False, header=None, scheduler=s
     )
     min_ = Min(scheduler=s)
     min_.input[0] = csv.output.result
     max_ = Max(scheduler=s)
     max_.input[0] = csv.output.result
     histogram2d = Histogram2D(
         1, 2, xbins=100, ybins=100, scheduler=s
     )  # columns are called 1..30
     histogram2d.input[0] = csv.output.result
     histogram2d.input.min = min_.output.result
     histogram2d.input.max = max_.output.result
     heatmap = Heatmap(filename="histo_%03d.png", scheduler=s)
     heatmap.input.array = histogram2d.output.result
     pr = Every(proc=self.terse, scheduler=s)
     pr.input[0] = heatmap.output.result
     aio.run(csv.scheduler().start())
     last = notNone(histogram2d.table.last()).to_dict()
     h1 = last["array"]
     bounds = [[last["ymin"], last["ymax"]], [last["xmin"], last["xmax"]]]
     df = pd.read_csv(
         get_dataset("bigfile"), header=None, usecols=[1, 2]  # type: ignore
     )
     v = df.to_numpy()  # .reshape(-1, 2)
     bins = [histogram2d.params.ybins, histogram2d.params.xbins]
     h2 = fh.histogram2d(v[:, 1], v[:, 0], bins=bins, range=bounds)
     h2 = np.flip(h2, axis=0)  # type: ignore
     self.assertTrue(np.allclose(h1, h2))
コード例 #6
0
 def test_blobs_table2(self) -> None:
     s = self.scheduler()
     sz = 100000
     centers = [(0.1, 0.3), (0.7, 0.5), (-0.4, -0.3)]
     blob1 = BlobsTable(["a", "b"],
                        centers=centers,
                        cluster_std=0.2,
                        rows=sz,
                        scheduler=s)
     blob1.default_step_size = 1500
     blob2 = BlobsTable(["a", "b"],
                        centers=centers,
                        cluster_std=0.2,
                        rows=sz,
                        scheduler=s)
     blob2.default_step_size = 200
     add = Add(scheduler=s)
     add.input.first = blob1.output.result
     add.input.second = blob2.output.result
     prlen = Every(proc=self.terse, constant_time=True, scheduler=s)
     prlen.input[0] = add.output.result
     aio.run(s.start())
     # s.join()
     self.assertEqual(len(blob1.table), sz)
     self.assertEqual(len(blob2.table), sz)
     arr1 = blob1.table.to_array()
     arr2 = blob2.table.to_array()
     self.assertTrue(np.allclose(arr1, arr2))
コード例 #7
0
 def test_mv_blobs_table2(self) -> None:
     s = self.scheduler()
     sz = 100000
     blob1 = MVBlobsTable(["a", "b"],
                          means=means,
                          covs=covs,
                          rows=sz,
                          scheduler=s)
     blob1.default_step_size = 1500
     blob2 = MVBlobsTable(["a", "b"],
                          means=means,
                          covs=covs,
                          rows=sz,
                          scheduler=s)
     blob2.default_step_size = 200
     add = Add(scheduler=s)
     add.input.first = blob1.output.result
     add.input.second = blob2.output.result
     prlen = Every(proc=self.terse, constant_time=True, scheduler=s)
     prlen.input[0] = add.output.result
     aio.run(s.start())
     # s.join()
     self.assertEqual(len(blob1.table), sz)
     self.assertEqual(len(blob2.table), sz)
     arr1 = blob1.table.to_array()
     arr2 = blob2.table.to_array()
     self.assertTrue(np.allclose(arr1, arr2))
コード例 #8
0
    def t_histogram1d_impl(self, **kw: Any) -> None:
        s = self.scheduler()
        csv = CSVLoader(
            get_dataset("bigfile"), index_col=False, header=None, scheduler=s
        )
        stirrer = Stirrer(update_column="_2", fixed_step_size=1000, scheduler=s, **kw)
        stirrer.input[0] = csv.output.result
        min_ = Min(scheduler=s)
        min_.input[0] = stirrer.output.result
        max_ = Max(scheduler=s)
        max_.input[0] = stirrer.output.result
        histogram1d = Histogram1D("_2", scheduler=s)  # columns are called 1..30
        histogram1d.input[0] = stirrer.output.result
        histogram1d.input.min = min_.output.result
        histogram1d.input.max = max_.output.result

        # pr = Print(scheduler=s)
        pr = Every(proc=self.terse, scheduler=s)
        pr.input[0] = histogram1d.output.result
        aio.run(s.start())
        _ = histogram1d.trace_stats()
        last = notNone(histogram1d.table.last()).to_dict()
        h1 = last["array"]
        bounds = (last["min"], last["max"])
        tab = stirrer.table.loc[:, ["_2"]]
        assert tab is not None
        v = tab.to_array().reshape(-1)
        h2, _ = np.histogram(  # type: ignore
            v, bins=histogram1d.params.bins, density=False, range=bounds
        )
        self.assertEqual(np.sum(h1), np.sum(h2))
        self.assertListEqual(h1.tolist(), h2.tolist())
コード例 #9
0
 def test_random_table(self) -> None:
     s = self.scheduler()
     module = RandomTable(["a", "b"], rows=10000, scheduler=s)
     self.assertEqual(module.table.columns[0], "a")
     self.assertEqual(module.table.columns[1], "b")
     self.assertEqual(len(module.table.columns), 2)  # add the UPDATE_COLUMN
     prlen = Every(proc=self.terse, constant_time=True, scheduler=s)
     prlen.input[0] = module.output.result
     aio.run(s.start())
     # s.join()
     self.assertEqual(len(module.table), 10000)
コード例 #10
0
 def NOtest_vec_distances(self) -> None:
     s = self.scheduler()
     vec = VECLoader(get_dataset("warlogs"), scheduler=s)
     #        dis=PairwiseDistances(metric='cosine',scheduler=s)
     #        dis.input[0] = vec.output.df
     #        dis.input.array = vec.output.array
     cnt = Every(proc=self.terse, constant_time=True, scheduler=s)
     #        cnt.input[0] = dis.output.dist
     cnt.input[0] = vec.output.result
     global times
     times = 0
     s.start()
     _ = vec.result
コード例 #11
0
 def test_csv_distances(self) -> None:
     s = self.scheduler()
     vec = CSVLoader(
         get_dataset("smallfile"), index_col=False, header=None, scheduler=s
     )
     #        dis=PairwiseDistances(metric='euclidean',scheduler=s)
     #        dis.input[0] = vec.output.df
     cnt = Every(proc=self.terse, constant_time=True, scheduler=s)
     #        cnt.input[0] = dis.output.dist
     cnt.input[0] = vec.output.result
     global times
     times = 0
     aio.run(s.start(ten_times))
     _ = vec.result
コード例 #12
0
 def test_blobs_table(self) -> None:
     s = self.scheduler()
     module = BlobsTable(["a", "b"],
                         centers=centers,
                         rows=10000,
                         scheduler=s)
     self.assertEqual(module.table.columns[0], "a")
     self.assertEqual(module.table.columns[1], "b")
     self.assertEqual(len(module.table.columns), 2)
     prlen = Every(proc=self.terse, constant_time=True, scheduler=s)
     prlen.input[0] = module.output.result
     aio.run(s.start())
     # s.join()
     self.assertEqual(len(module.table), 10000)
コード例 #13
0
 def test_last_row(self) -> None:
     s = self.scheduler()
     csv = CSVLoader(
         get_dataset("smallfile"), index_col=False, header=None, scheduler=s
     )
     lr1 = LastRow(scheduler=s)
     lr1.input[0] = csv.output.result
     prlen = Every(proc=self.terse, constant_time=True, scheduler=s)
     prlen.input[0] = lr1.output.result
     aio.run(s.start())
     df = csv.table
     res = lr1.table
     assert res is not None
     self.assertEqual(res.at[0, "_1"], notNone(df.last())["_1"])
コード例 #14
0
 def test_join(self):
     s = self.scheduler()
     csv = CSVLoader(get_dataset('bigfile'),
                     index_col=False,
                     header=None,
                     scheduler=s)
     stat1 = Stats(1, reset_index=True, scheduler=s)
     stat1.input.table = csv.output.table
     stat2 = Stats(2, reset_index=True, scheduler=s)
     stat2.input.table = csv.output.table
     stat3 = Stats(3, reset_index=True, scheduler=s)
     stat3.input.table = csv.output.table
     #join=Join(scheduler=s)
     #import pdb;pdb.set_trace()
     reduce_ = Reduce(BinJoin, "first", "second", "table", scheduler=s)
     reduce_.input.table = stat1.output.stats
     reduce_.input.table = stat2.output.stats
     join = reduce_.expand()
     pr = Print(proc=self.terse, scheduler=s)
     pr.input.df = join.output.table
     prlen = Every(proc=self.terse, constant_time=True, scheduler=s)
     prlen.input.df = csv.output.table
     s.start()
     res = join.trace_stats(max_runs=1)
     print(res)
コード例 #15
0
 def test_histogram2d(self):
     s = self.scheduler()
     csv = CSVLoader(get_dataset('bigfile'),
                     index_col=False,
                     header=None,
                     scheduler=s)
     min_ = Min(scheduler=s)
     min_.input.table = csv.output.table
     max_ = Max(scheduler=s)
     max_.input.table = csv.output.table
     histogram2d = Histogram2D(1, 2, xbins=100, ybins=100,
                               scheduler=s)  # columns are called 1..30
     histogram2d.input.table = csv.output.table
     histogram2d.input.min = min_.output.table
     histogram2d.input.max = max_.output.table
     heatmap = Heatmap(filename='histo_%03d.png', scheduler=s)
     heatmap.input.array = histogram2d.output.table
     #pr = Print(scheduler=s)
     pr = Every(proc=self.terse, scheduler=s)
     #pr.input.df = heatmap.output.heatmap
     #pr.input.df = histogram2d.output.df
     pr.input.df = csv.output.table
     csv.scheduler().start()
     s.join()
     #self.scheduler.thread.join()
     s = histogram2d.trace_stats()
コード例 #16
0
    def test_module(self) -> None:
        # pylint: disable=broad-except
        s = self.scheduler()
        with self.assertRaises(TypeError):  # abstract base class
            module = Module(name="a", scheduler=s)  # type: ignore

        with s:
            module = Every(proc=self.terse, name="a", scheduler=s)
            self.assertEqual(module.name, "a")
            self.assertEqual(s.exists("a"), True)
            self.assertEqual(module.get_progress(), (0, 0))
            try:
                module = SimpleModule(name="a", scheduler=s)
                self.fail("Exception not triggered with a duplicate name")
            except ProgressiveError:
                self.assertTrue(True)
            else:
                self.fail("Unexpected exception")
            mod2 = SimpleModule(name="b", scheduler=s)
            self.assertEqual(mod2.get_progress(), (0, 0))
            self.assertTrue(module.is_valid())
            self.assertFalse(module.is_visualization())
            self.assertIsNone(module.get_visualization())
            self.assertIsNone(module.get_data("error"))
            self.assertEqual(module.last_time(), 0)
            module.debug = True
            self.assertEqual(module.params.debug, True)
            module.set_current_params({"quantum": 2.0})
            self.assertEqual(module.params.quantum, 2.0)
            params = module.get_data("_params")
            self.assertIsInstance(params, Table)
            del s["a"]
        self.assertEqual(s.exists("a"), False)
コード例 #17
0
 def test_scatterplot2(self):
     s = self.scheduler()
     random = RandomTable(2, rows=2000000, scheduler=s)
     sp = MCScatterPlot(scheduler=s,
                        classes=[('Scatterplot', '_1', '_2')],
                        approximate=True)
     sp.create_dependent_modules(random, 'table', with_sampling=False)
     cnt = Every(proc=self.terse, constant_time=True, scheduler=s)
     cnt.input.df = random.output.table
     prt = Print(proc=self.terse, scheduler=s)
     prt.input.df = sp.output.table
     decorate(s, VariablePatch1("variable_1"))
     decorate(s, VariablePatch2("variable_2"))
     decorate(s, ScatterPlotPatch("mc_scatter_plot_1"))
     sp.scheduler().start(idle_proc=idle_proc)
     s.join()
     js = sp.to_json()
     x, y, _ = zip(*js['sample']['data'])
     min_x = min(x)
     max_x = max(x)
     min_y = min(y)
     max_y = max(y)
     self.assertGreaterEqual(min_x, LOWER_X)
     self.assertGreaterEqual(min_y, LOWER_Y)
     self.assertLessEqual(max_x, UPPER_X)
     self.assertLessEqual(max_y, UPPER_Y)
コード例 #18
0
 def test_random_table2(self) -> None:
     s = self.scheduler()
     # produces more than 4M rows per second on my laptop
     module = RandomTable(10,
                          rows=1000000,
                          force_valid_ids=True,
                          scheduler=s)
     self.assertEqual(len(module.table.columns),
                      10)  # add the UPDATE_COLUMN
     self.assertEqual(module.table.columns[0], "_1")
     self.assertEqual(module.table.columns[1], "_2")
     prlen = Every(proc=self.terse, constant_time=True, scheduler=s)
     prlen.input[0] = module.output.result
     aio.run(s.start())
     # s.join()
     self.assertEqual(len(module.table), 1000000)
コード例 #19
0
    def test_module(self):
        # pylint: disable=broad-except
        s = self.scheduler()
        with self.assertRaises(TypeError):  # abstract base class
            module = Module(name='a', scheduler=s)

        module = Every(proc=self.terse, name='a', scheduler=s)
        self.assertEqual(module.name, 'a')
        self.assertEqual(s.exists('a'), True)
        self.assertEqual(module.get_progress(), (0, 0))
        with self.assertRaises(ProgressiveError):
            module = SimpleModule(name='a', scheduler=s)
        mod2 = SimpleModule(name='b', scheduler=s)
        self.assertEqual(mod2.get_progress(), (0, 0))
        module.debug = True
        self.assertEqual(module.params.debug, True)
        module.set_current_params({'quantum': 2.0})
        self.assertEqual(module.params.quantum, 2.0)
        params = module.get_data("_params")
        self.assertIsInstance(params, Table)
        module.destroy()
        self.assertEqual(s.exists('a'), False)
        module.describe()
        json = module.to_json(short=True)
        self.assertEqual(json.get('is_running'), False)
        self.assertEqual(json.get('is_terminated'), False)
        json = module.to_json(short=False)
        self.assertEqual(json.get('start_time', 0), None)
        # maybe check others
        self.assertFalse(module.has_any_output())
コード例 #20
0
 def test_histogram1d(self) -> None:
     s = self.scheduler()
     csv = CSVLoader(
         get_dataset("bigfile"), index_col=False, header=None, scheduler=s
     )
     min_ = Min(scheduler=s)
     min_.input[0] = csv.output.result
     max_ = Max(scheduler=s)
     max_.input[0] = csv.output.result
     histogram1d = Histogram1D("_2", scheduler=s)  # columns are called 1..30
     histogram1d.input[0] = csv.output.result
     histogram1d.input.min = min_.output.result
     histogram1d.input.max = max_.output.result
     pr = Every(proc=self.terse, scheduler=s)
     pr.input[0] = histogram1d.output.result
     aio.run(s.start())
     _ = histogram1d.trace_stats()
コード例 #21
0
    def test_percentile(self) -> None:
        s = self.scheduler()
        csv_module = CSVLoader(get_dataset("smallfile"),
                               index_col=False,
                               header=None,
                               scheduler=s)
        module = Percentiles(
            "_1",
            name="test_percentile",
            percentiles=[0.1, 0.25, 0.5, 0.75, 0.9],
            scheduler=s,
        )
        module.input[0] = csv_module.output.result
        prt = Every(proc=self.terse, name="print", scheduler=s)
        prt.input[0] = module.output.result

        aio.run(s.start())
コード例 #22
0
 def test_select(self) -> None:
     s = self.scheduler()
     csv = CSVLoader(
         get_dataset("bigfile"),
         index_col=False,
         header=None,
         force_valid_ids=True,
         scheduler=s,
     )
     cst = Constant(PsDict({"query": ["_1 < 0.5"]}), scheduler=s)
     q = Select(scheduler=s)
     q.input[0] = csv.output.df
     q.input.query = cst.output.df
     prlen = Every(proc=self.terse, constant_time=True, scheduler=s)
     prlen.input[0] = q.output.df
     aio.run(s.start())
     self.assertTrue(len(q.table) < 1000000)
コード例 #23
0
 def test_merge(self) -> None:
     s = self.scheduler()
     csv = CSVLoader(get_dataset("bigfile"),
                     index_col=False,
                     header=None,
                     scheduler=s)
     stat1 = Stats(1, scheduler=s)
     stat1.input[0] = csv.output.result
     stat2 = Stats(2, scheduler=s)
     stat2.input[0] = csv.output.result
     merge = Merge(left_index=True, right_index=True, scheduler=s)
     merge.input[0] = stat1.output.result
     merge.input[0] = stat2.output.result
     pr = Print(proc=self.terse, scheduler=s)
     pr.input[0] = merge.output.result
     prlen = Every(proc=self.terse, constant_time=True, scheduler=s)
     prlen.input[0] = csv.output.result
     aio.run(s.start())
     _ = merge.trace_stats(max_runs=1)
コード例 #24
0
    def test_scatterplot2(self) -> None:
        s = self.scheduler(clean=True)
        with s:
            random = RandomTable(2, rows=2000000, throttle=1000, scheduler=s)
            sp = MCScatterPlot(scheduler=s,
                               classes=[("Scatterplot", "_1", "_2")],
                               approximate=True)
            sp.create_dependent_modules(random, "result", with_sampling=False)
            cnt = Every(proc=self.terse, constant_time=True, scheduler=s)
            cnt.input[0] = random.output.result
            prt = Print(proc=self.terse, scheduler=s)
            prt.input[0] = sp.output.result

        async def fake_input_1(scheduler: Scheduler, rn: int) -> None:
            module = scheduler["dyn_var_1"]
            print("from input dyn_var_1")
            await module.from_input({"x": LOWER_X, "y": LOWER_Y})

        async def fake_input_2(scheduler: Scheduler, rn: int) -> None:
            module = scheduler["dyn_var_2"]
            print("from input dyn_var_2")
            await module.from_input({"x": UPPER_X, "y": UPPER_Y})

        # finp1 = fake_input(s, "dyn_var_1", 6, {"x": LOWER_X, "y": LOWER_Y})
        # finp2 = fake_input(s, "dyn_var_2", 6, {"x": UPPER_X, "y": UPPER_Y})
        # sts = sleep_then_stop(s, 10)
        s.on_loop(self._stop, 10)
        # s.on_loop(prt)
        s.on_loop(fake_input_1, 3)
        s.on_loop(fake_input_2, 3)
        # aio.run_gather(sp.scheduler().start(), sts)
        aio.run(s.start())
        js = sp.to_json()
        x, y, _ = zip(*js["sample"]["data"])
        min_x = min(x)
        max_x = max(x)
        min_y = min(y)
        max_y = max(y)
        self.assertGreaterEqual(min_x, LOWER_X)
        self.assertGreaterEqual(min_y, LOWER_Y)
        self.assertLessEqual(max_x, UPPER_X)
        self.assertLessEqual(max_y, UPPER_Y)
コード例 #25
0
ファイル: test_03_join.py プロジェクト: jdfekete/progressivis
 def test_join(self) -> None:
     s = self.scheduler()
     csv = CSVLoader(get_dataset("bigfile"),
                     index_col=False,
                     header=None,
                     scheduler=s)
     stat1 = Stats(1, reset_index=True, scheduler=s)
     stat1.input[0] = csv.output.result
     stat2 = Stats(2, reset_index=True, scheduler=s)
     stat2.input[0] = csv.output.result
     join = Join(scheduler=s)
     join.input[0] = stat1.output.result
     join.input[0] = stat2.output.result
     pr = Print(proc=self.terse, scheduler=s)
     pr.input[0] = join.output.result
     prlen = Every(proc=self.terse, constant_time=True, scheduler=s)
     prlen.input[0] = csv.output.result
     aio.run(s.start())
     res = join.trace_stats(max_runs=1)
     print(res)
コード例 #26
0
 def test_random_table(self):
     s = self.scheduler()
     module=RandomTable(['a', 'b'], rows=10000, scheduler=s)
     self.assertEqual(module.table().columns[0],'a')
     self.assertEqual(module.table().columns[1],'b')
     self.assertEqual(len(module.table().columns), 2) # add the UPDATE_COLUMN
     prlen = Every(proc=self.terse, constant_time=True, scheduler=s)
     prlen.input.df = module.output.table
     s.start()
     s.join()
     self.assertEqual(len(module.table()), 10000)
コード例 #27
0
 def test_percentile(self):
     s = self.scheduler()
     csv_module = CSVLoader(get_dataset('smallfile'), index_col=False, header=None, scheduler=s)
     module=Percentiles('_1', name='test_percentile',
                        percentiles=[0.1, 0.25, 0.5, 0.75, 0.9],
                        scheduler=s)
     module.input.table = csv_module.output.table
     prt = Every(proc=self.terse, name='print', scheduler=s)
     prt.input.df = module.output.percentiles
             
     s.start()
     s.join()
コード例 #28
0
 def test_random_table2(self):
     s = self.scheduler()
     # produces more than 4M rows per second on my laptop
     module=RandomTable(10, rows=1000000, force_valid_ids=True, scheduler=s)
     self.assertEqual(len(module.table().columns), 10) # add the UPDATE_COLUMN
     self.assertEqual(module.table().columns[0],'_1')
     self.assertEqual(module.table().columns[1],'_2')
     prlen = Every(proc=self.terse, constant_time=True, scheduler=s)
     prlen.input.df = module.output.table
     s.start()
     s.join()        
     self.assertEqual(len(module.table()), 1000000)
コード例 #29
0
 def test_histogram2d(self) -> None:
     s = self.scheduler()
     csv = CSVLoader(
         get_dataset("bigfile"), index_col=False, header=None, scheduler=s
     )
     min_ = Min(scheduler=s)
     min_.input[0] = csv.output.result
     max_ = Max(scheduler=s)
     max_.input[0] = csv.output.result
     histogram2d = Histogram2D(
         1, 2, xbins=100, ybins=100, scheduler=s
     )  # columns are called 1..30
     histogram2d.input[0] = csv.output.result
     histogram2d.input.min = min_.output.result
     histogram2d.input.max = max_.output.result
     heatmap = Heatmap(filename="histo_%03d.png", scheduler=s)
     heatmap.input.array = histogram2d.output.result
     pr = Every(proc=self.terse, scheduler=s)
     pr.input[0] = heatmap.output.result
     aio.run(csv.scheduler().start())
     _ = histogram2d.trace_stats()
コード例 #30
0
    def test_csv_distances(self):
        s = self.scheduler()
        vec=CSVLoader(get_dataset('smallfile'),index_col=False,header=None,scheduler=s)
#        dis=PairwiseDistances(metric='euclidean',scheduler=s)
#        dis.input.df = vec.output.df
        cnt = Every(proc=self.terse,constant_time=True,scheduler=s)
#        cnt.input.df = dis.output.dist
        cnt.input.df = vec.output.table
        global times
        times = 0
        s.start(ten_times)
        s.join()
        table = vec.table()