コード例 #1
0
 def test_hub_if_else(self):
     s = Scheduler()
     random = RandomTable(2, rows=100000, scheduler=s)
     stirrer = Stirrer(
         update_column="_1",
         delete_rows=5,
         update_rows=5,
         fixed_step_size=100,
         scheduler=s,
     )
     stirrer.input[0] = random.output.result
     switch = Switch(condition=lambda x: False, scheduler=s)
     switch.input[0] = stirrer.output.result
     max_ = Max(name="max_" + str(hash(random)), scheduler=s)
     max_.input[0] = switch.output.result
     min_ = Min(name="min_" + str(hash(random)), scheduler=s)
     min_.input[0] = switch.output.result_else
     hub = Hub(scheduler=s)
     hub.input.table = min_.output.result
     hub.input.table = max_.output.result
     pr = Print(proc=self.terse, scheduler=s)
     pr.input[0] = hub.output.result
     aio.run(s.start())
     res1 = stirrer.result.min()
     res2 = hub.result
     self.compare(res1, res2)
コード例 #2
0
    def t_histogram1d_impl(self, **kw: Any) -> None:
        s = self.scheduler()
        csv = CSVLoader(
            get_dataset("bigfile"), index_col=False, header=None, scheduler=s
        )
        stirrer = Stirrer(update_column="_2", fixed_step_size=1000, scheduler=s, **kw)
        stirrer.input[0] = csv.output.result
        min_ = Min(scheduler=s)
        min_.input[0] = stirrer.output.result
        max_ = Max(scheduler=s)
        max_.input[0] = stirrer.output.result
        histogram1d = Histogram1D("_2", scheduler=s)  # columns are called 1..30
        histogram1d.input[0] = stirrer.output.result
        histogram1d.input.min = min_.output.result
        histogram1d.input.max = max_.output.result

        # pr = Print(scheduler=s)
        pr = Every(proc=self.terse, scheduler=s)
        pr.input[0] = histogram1d.output.result
        aio.run(s.start())
        _ = histogram1d.trace_stats()
        last = notNone(histogram1d.table.last()).to_dict()
        h1 = last["array"]
        bounds = (last["min"], last["max"])
        tab = stirrer.table.loc[:, ["_2"]]
        assert tab is not None
        v = tab.to_array().reshape(-1)
        h2, _ = np.histogram(  # type: ignore
            v, bins=histogram1d.params.bins, density=False, range=bounds
        )
        self.assertEqual(np.sum(h1), np.sum(h2))
        self.assertListEqual(h1.tolist(), h2.tolist())
コード例 #3
0
 def test_bisect2(self) -> None:
     s = self.scheduler()
     random = RandomTable(2, rows=100_000, scheduler=s)
     stirrer = Stirrer(
         update_column="_1",
         delete_rows=100,
         # update_rows=5,
         # fixed_step_size=100,
         scheduler=s,
     )
     stirrer.input[0] = random.output.result
     t = Table(name=None, dshape="{value: string}", data={"value": [0.5]})
     min_value = Constant(table=t, scheduler=s)
     hist_index = HistogramIndex(column="_1", scheduler=s)
     hist_index.create_dependent_modules(stirrer, "result")
     bisect_ = Bisect(column="_1",
                      op=">",
                      hist_index=hist_index,
                      scheduler=s)
     bisect_.input[0] = hist_index.output.result
     # bisect_.input[0] = random.output.result
     bisect_.input.limit = min_value.output.result
     pr = Print(proc=self.terse, scheduler=s)
     pr.input[0] = bisect_.output.result
     aio.run(s.start())
     idx = stirrer.table.eval("_1>0.5", result_object="index")
     self.assertEqual(bisect_.table.index, bitmap(idx))
コード例 #4
0
 def test_idxmax2(self) -> None:
     s = self.scheduler()
     random = RandomTable(10, rows=10000, throttle=1000, scheduler=s)
     stirrer = Stirrer(update_column="_1",
                       delete_rows=5,
                       fixed_step_size=100,
                       scheduler=s)
     stirrer.input[0] = random.output.result
     idxmax = IdxMax(scheduler=s)
     idxmax.input[0] = stirrer.output.result
     max_ = Max(scheduler=s)
     max_.input[0] = stirrer.output.result
     pr = Print(proc=self.terse, scheduler=s)
     pr.input[0] = idxmax.output.result
     pr2 = Print(proc=self.terse, scheduler=s)
     pr2.input[0] = max_.output.result
     aio.run(s.start())
     # import pdb;pdb.set_trace()
     max1 = max_.psdict
     # print('max1', max1)
     max = idxmax.max()
     assert max is not None
     max2 = notNone(max.last()).to_dict()
     # print('max2', max2)
     self.compare(max1, max2)
コード例 #5
0
 def t_histogram2d_impl(self, **kw: Any) -> None:
     s = self.scheduler()
     random = RandomTable(3, rows=100000, scheduler=s)
     stirrer = Stirrer(update_column="_2", fixed_step_size=1000, scheduler=s, **kw)
     stirrer.input[0] = random.output.result
     min_ = Min(scheduler=s)
     min_.input[0] = stirrer.output.result
     max_ = Max(scheduler=s)
     max_.input[0] = stirrer.output.result
     histogram2d = Histogram2D(
         0, 1, xbins=100, ybins=100, scheduler=s
     )  # columns are called 1..30
     histogram2d.input[0] = stirrer.output.result
     histogram2d.input.min = min_.output.result
     histogram2d.input.max = max_.output.result
     heatmap = Heatmap(filename="histo_%03d.png", scheduler=s)
     heatmap.input.array = histogram2d.output.result
     pr = Every(proc=self.terse, scheduler=s)
     pr.input[0] = heatmap.output.result
     aio.run(s.start())
     last = notNone(histogram2d.table.last()).to_dict()
     h1 = last["array"]
     bounds = [[last["ymin"], last["ymax"]], [last["xmin"], last["xmax"]]]
     t = stirrer.table.loc[:, ["_1", "_2"]]
     assert t is not None
     v = t.to_array()
     bins = [histogram2d.params.ybins, histogram2d.params.xbins]
     h2 = fh.histogram2d(v[:, 1], v[:, 0], bins=bins, range=bounds)
     h2 = np.flip(h2, axis=0)  # type: ignore
     self.assertEqual(np.sum(h1), np.sum(h2))
     self.assertListEqual(h1.reshape(-1).tolist(), h2.reshape(-1).tolist())
コード例 #6
0
    def _impl_stirred_tst_percentiles_rq(self, accuracy: float,
                                         **kw: Any) -> None:
        """ """
        s = self.scheduler()
        with s:
            random = RandomTable(2, rows=10000, scheduler=s)
            stirrer = Stirrer(update_column="_2",
                              fixed_step_size=1000,
                              scheduler=s,
                              **kw)
            stirrer.input[0] = random.output.result
            t_min = PsDict({"_1": 0.3})
            min_value = Constant(table=t_min, scheduler=s)
            t_max = PsDict({"_1": 0.8})
            max_value = Constant(table=t_max, scheduler=s)
            range_qry = RangeQuery(column="_1", scheduler=s)
            range_qry.create_dependent_modules(stirrer,
                                               "result",
                                               min_value=min_value,
                                               max_value=max_value)

            hist_index = range_qry.hist_index
            assert hist_index
            t_percentiles = PsDict({"_25": 25.0, "_50": 50.0, "_75": 75.0})
            which_percentiles = Constant(table=t_percentiles, scheduler=s)
            percentiles = Percentiles(accuracy=accuracy, scheduler=s)
            percentiles.input[0] = range_qry.output.result
            percentiles.input.percentiles = which_percentiles.output.result
            percentiles.input.hist = hist_index.output.result
            prt = Print(proc=self.terse, scheduler=s)
            prt.input[0] = percentiles.output.result
        aio.run(s.start())
        pdict = notNone(percentiles.table.last()).to_dict()
        v = range_qry.table["_1"].values
        p25 = np.percentile(v, 25.0)  # type: ignore
        p50 = np.percentile(v, 50.0)  # type: ignore
        p75 = np.percentile(v, 75.0)  # type: ignore
        print(
            "TSV=> accuracy: ",
            accuracy,
            " 25:",
            p25,
            pdict["_25"],
            " 50:",
            p50,
            pdict["_50"],
            " 75:",
            p75,
            pdict["_75"],
        )
        self.assertAlmostEqual(p25, pdict["_25"], delta=0.01)
        self.assertAlmostEqual(p50, pdict["_50"], delta=0.01)
        self.assertAlmostEqual(p75, pdict["_75"], delta=0.01)
コード例 #7
0
 def _t_stirred_unary(self, **kw: Any) -> None:
     s = self.scheduler()
     random = RandomTable(10, rows=100_000, scheduler=s)
     stirrer = Stirrer(update_column="_3",
                       fixed_step_size=1000,
                       scheduler=s,
                       **kw)
     stirrer.input[0] = random.output.result
     module = Unary(np.log, columns=["_3", "_5", "_7"], scheduler=s)
     module.input[0] = stirrer.output.result
     pr = Print(proc=self.terse, scheduler=s)
     pr.input[0] = module.output.result
     aio.run(s.start())
     res1 = np.log(stirrer.table.to_array()[:, [2, 4, 6]])
     res2 = module.table.to_array()
     self.assertTrue(module.name.startswith("unary_"))
     self.assertTrue(np.allclose(res1, res2, equal_nan=True))
コード例 #8
0
 def _impl_stirred_tst_percentiles(self, accuracy: float,
                                   **kw: Any) -> None:
     """ """
     s = self.scheduler()
     with s:
         random = RandomTable(2, rows=10000, scheduler=s)
         stirrer = Stirrer(update_column="_2",
                           fixed_step_size=1000,
                           scheduler=s,
                           **kw)
         stirrer.input[0] = random.output.result
         hist_index = HistogramIndex(column="_1", scheduler=s)
         hist_index.input[0] = stirrer.output.result
         t_percentiles = PsDict({"_25": 25.0, "_50": 50.0, "_75": 75.0})
         which_percentiles = Constant(table=t_percentiles, scheduler=s)
         percentiles = Percentiles(accuracy=accuracy, scheduler=s)
         percentiles.input[0] = stirrer.output.result
         percentiles.input.percentiles = which_percentiles.output.result
         percentiles.input.hist = hist_index.output.result
         prt = Print(proc=self.terse, scheduler=s)
         prt.input[0] = percentiles.output.result
     aio.run(s.start())
     pdict = notNone(percentiles.table.last()).to_dict()
     # v = random.table()['_1'].values
     # from nose.tools import set_trace; set_trace()
     v = stirrer.table.to_array(columns=["_1"]).reshape(-1)
     p25 = np.percentile(v, 25.0)  # type: ignore
     p50 = np.percentile(v, 50.0)  # type: ignore
     p75 = np.percentile(v, 75.0)  # type: ignore
     print(
         "Table=> accuracy: ",
         accuracy,
         " 25:",
         p25,
         pdict["_25"],
         " 50:",
         p50,
         pdict["_50"],
         " 75:",
         p75,
         pdict["_75"],
     )
     # from nose.tools import set_trace; set_trace()
     self.assertAlmostEqual(p25, pdict["_25"], delta=0.01)
     self.assertAlmostEqual(p50, pdict["_50"], delta=0.01)
     self.assertAlmostEqual(p75, pdict["_75"], delta=0.01)
コード例 #9
0
 def test_stirrer(self) -> None:
     s = Scheduler()
     random = RandomTable(2, rows=100000, scheduler=s)
     stirrer = Stirrer(
         update_column="_1",
         delete_rows=5,
         update_rows=5,
         fixed_step_size=100,
         scheduler=s,
     )
     stirrer.input[0] = random.output.result
     max_ = Max(name="max_" + str(hash(random)), scheduler=s)
     max_.input[0] = stirrer.output.result
     pr = Print(proc=self.terse, scheduler=s)
     pr.input[0] = max_.output.result
     aio.run(s.start())
     res1 = stirrer.table.max()
     res2 = max_.result
     self.compare(res1, res2)
コード例 #10
0
 def test_filter3(self) -> None:
     s = Scheduler()
     random = RandomTable(2, rows=100000, scheduler=s)
     stirrer = Stirrer(update_column="_1",
                       update_rows=5,
                       fixed_step_size=100,
                       scheduler=s)
     stirrer.input[0] = random.output.result
     filter_ = FilterMod(expr="_1 > 0.5", scheduler=s)
     filter_.input[0] = stirrer.output.result
     pr = Print(proc=self.terse, scheduler=s)
     pr.input[0] = filter_.output.result
     aio.run(s.start())
     tbl = filter_.get_input_slot("table").data()
     idx = tbl.eval("_1>0.5", result_object="index")
     self.assertEqual(filter_.table.index, bitmap(idx))
     df = pd.DataFrame(tbl.to_dict(), index=tbl.index.to_array())
     dfe = df.eval("_1>0.5")
     self.assertEqual(filter_.table.index, bitmap(df.index[dfe]))
コード例 #11
0
 def _impl_stirred_tst_intersection(self, **kw: Any) -> None:
     s = self.scheduler()
     random = RandomTable(2, rows=100000, scheduler=s)
     stirrer = Stirrer(update_column="_2",
                       fixed_step_size=1000,
                       scheduler=s,
                       **kw)
     stirrer.input[0] = random.output.result
     t_min = Table(name=None, dshape="{_1: float64}", data={"_1": [0.3]})
     min_value = Constant(table=t_min, scheduler=s)
     t_max = Table(name=None, dshape="{_1: float64}", data={"_1": [0.8]})
     max_value = Constant(table=t_max, scheduler=s)
     hist_index = HistogramIndex(column="_1", scheduler=s)
     hist_index.create_dependent_modules(stirrer, "result")
     bisect_min = Bisect(column="_1",
                         op=">",
                         hist_index=hist_index,
                         scheduler=s)
     bisect_min.input[0] = hist_index.output.result
     # bisect_.input[0] = random.output.result
     bisect_min.input.limit = min_value.output.result
     bisect_max = Bisect(column="_1",
                         op="<",
                         hist_index=hist_index,
                         scheduler=s)
     bisect_max.input[0] = hist_index.output.result
     # bisect_.input[0] = random.output.result
     bisect_max.input.limit = max_value.output.result
     inter = Intersection(scheduler=s)
     inter.input[0] = bisect_min.output.result
     inter.input[0] = bisect_max.output.result
     pr = Print(proc=self.terse, scheduler=s)
     pr.input[0] = inter.output.result
     aio.run(s.start())
     assert hist_index.input_module is not None
     idx = (hist_index.input_module.output["result"].data().eval(
         "(_1>0.3)&(_1<0.8)", result_object="index"))
     self.assertEqual(inter.table.index, bitmap(idx))
コード例 #12
0
 def t_stirred_cols_binary(self, **kw: Any) -> None:
     s = self.scheduler()
     cols = 10
     random = RandomTable(cols, rows=10_000, scheduler=s)
     stirrer = Stirrer(update_column="_3",
                       fixed_step_size=1000,
                       scheduler=s,
                       **kw)
     stirrer.input[0] = random.output.result
     module = ColsBinary(np.add,
                         first=["_3", "_5", "_7"],
                         second=["_4", "_6", "_8"],
                         scheduler=s)
     module.input[0] = stirrer.output.result
     pr = Print(proc=self.terse, scheduler=s)
     pr.input[0] = module.output.result
     aio.run(s.start())
     self.assertListEqual(module.table.columns, ["_3", "_5", "_7"])
     arr = stirrer.table.to_array()
     res1 = np.add(arr[:, [2, 4, 6]], arr[:, [3, 5, 7]])
     res2 = module.table.to_array()
     self.assertTrue(module.name.startswith("cols_binary_"))
     self.assertTrue(np.allclose(res1, res2, equal_nan=True))
コード例 #13
0
 def test_switch_if_then(self):
     s = Scheduler()
     random = RandomTable(2, rows=100000, scheduler=s)
     stirrer = Stirrer(
         update_column="_1",
         delete_rows=5,
         update_rows=5,
         fixed_step_size=100,
         scheduler=s,
     )
     stirrer.input[0] = random.output.result
     switch = Switch(condition=lambda x: True, scheduler=s)
     switch.input[0] = stirrer.output.result
     max_ = Max(name="max_" + str(hash(random)), scheduler=s)
     max_.input[0] = switch.output.result
     pr_else = Print(proc=self.terse, scheduler=s)
     pr_else.input[0] = switch.output.result_else
     pr = Print(proc=self.terse, scheduler=s)
     pr.input[0] = max_.output.result
     aio.run(s.start())
     res1 = stirrer.result.max()
     res2 = max_.result
     self.compare(res1, res2)
コード例 #14
0
 def test_idxmin2(self) -> None:
     s = self.scheduler()
     random = RandomTable(10, rows=10000, throttle=1000, scheduler=s)
     stirrer = Stirrer(update_column="_1",
                       delete_rows=5,
                       fixed_step_size=100,
                       scheduler=s)
     stirrer.input[0] = random.output.result
     idxmin = IdxMin(scheduler=s)
     idxmin.input[0] = stirrer.output.result
     min_ = Min(scheduler=s)
     min_.input[0] = stirrer.output.result
     pr = Print(proc=self.terse, scheduler=s)
     pr.input[0] = idxmin.output.result
     pr2 = Print(proc=self.terse, scheduler=s)
     pr2.input[0] = min_.output.result
     aio.run(s.start())
     min1 = min_.psdict
     # print('min1', min1)
     min = idxmin.min()
     assert min is not None
     min2 = notNone(min.last()).to_dict()
     # print('min2', min2)
     self.compare(min1, min2)
コード例 #15
0
 def _t_stirred_binary(self, **kw: Any) -> None:
     s = self.scheduler()
     random1 = RandomTable(10, rows=100000, scheduler=s)
     random2 = RandomTable(10, rows=100000, scheduler=s)
     stirrer1 = Stirrer(update_column="_3",
                        fixed_step_size=1000,
                        scheduler=s,
                        **kw)
     stirrer1.input[0] = random1.output.result
     stirrer2 = Stirrer(update_column="_3",
                        fixed_step_size=1000,
                        scheduler=s,
                        **kw)
     stirrer2.input[0] = random2.output.result
     module = Binary(
         np.add,
         columns={
             "first": ["_3", "_5", "_7"],
             "second": ["_4", "_6", "_8"]
         },
         scheduler=s,
     )
     module.input.first = stirrer1.output.result
     module.input.second = stirrer2.output.result
     pr = Print(proc=self.terse, scheduler=s)
     pr.input[0] = module.output.result
     aio.run(s.start())
     idx1 = stirrer1.table.index.to_array()
     idx2 = stirrer2.table.index.to_array()
     common = bitmap(idx1) & bitmap(idx2)
     bt1 = stirrer1.table.loc[common, :]
     bt2 = stirrer2.table.loc[common, :]
     assert bt1 is not None and bt2 is not None
     t1 = bt1.to_array()[:, [2, 4, 6]]
     t2 = bt2.to_array()[:, [3, 5, 7]]
     res1 = np.add(t1, t2)
     res2 = module.table.to_array()
     self.assertTrue(module.name.startswith("binary_"))
     self.assertTrue(np.allclose(res1, res2, equal_nan=True))