Ejemplo n.º 1
0
 def test_intersection(self) -> None:
     s = self.scheduler()
     random = RandomTable(2, rows=100000, scheduler=s)
     t_min = Table(name=None, dshape="{_1: float64}", data={"_1": [0.3]})
     min_value = Constant(table=t_min, scheduler=s)
     t_max = Table(name=None, dshape="{_1: float64}", data={"_1": [0.8]})
     max_value = Constant(table=t_max, scheduler=s)
     hist_index = HistogramIndex(column="_1", scheduler=s)
     hist_index.create_dependent_modules(random, "result")
     bisect_min = Bisect(column="_1",
                         op=">",
                         hist_index=hist_index,
                         scheduler=s)
     bisect_min.input[0] = hist_index.output.result
     # bisect_.input[0] = random.output.result
     bisect_min.input.limit = min_value.output.result
     bisect_max = Bisect(column="_1",
                         op="<",
                         hist_index=hist_index,
                         scheduler=s)
     bisect_max.input[0] = hist_index.output.result
     # bisect_.input[0] = random.output.result
     bisect_max.input.limit = max_value.output.result
     inter = Intersection(scheduler=s)
     inter.input[0] = bisect_min.output.result
     inter.input[0] = bisect_max.output.result
     pr = Print(proc=self.terse, scheduler=s)
     pr.input[0] = inter.output.result
     aio.run(s.start())
     assert hist_index.input_module is not None
     idx = (hist_index.input_module.output["result"].data().eval(
         "(_1>0.3)&(_1<0.8)", result_object="index"))
     self.assertEqual(inter.table.index, bitmap(idx))
Ejemplo n.º 2
0
    def test_intersection(self):
        s = self.scheduler()
        random = RandomTable(2, rows=100000, scheduler=s)
        t_min = Table(name=None, dshape='{_1: float64}', data={'_1': [0.3]})
        min_value = Constant(table=t_min, scheduler=s)
        t_max = Table(name=None, dshape='{_1: float64}', data={'_1': [0.8]})
        max_value = Constant(table=t_max, scheduler=s)
        hist_index = HistogramIndex(column='_1', scheduler=s)
        hist_index.create_dependent_modules(random, 'table')
        bisect_min = Bisect(column='_1',
                            op='>',
                            hist_index=hist_index,
                            scheduler=s)
        bisect_min.input.table = hist_index.output.table
        #bisect_.input.table = random.output.table
        bisect_min.input.limit = min_value.output.table

        bisect_max = Bisect(column='_1',
                            op='<',
                            hist_index=hist_index,
                            scheduler=s)
        bisect_max.input.table = hist_index.output.table
        #bisect_.input.table = random.output.table
        bisect_max.input.limit = max_value.output.table
        inter = Intersection(scheduler=s)
        inter.input.table = bisect_min.output.table
        inter.input.table = bisect_max.output.table
        pr = Print(proc=self.terse, scheduler=s)
        pr.input.df = inter.output.table
        s.start()
        s.join()
        idx = hist_index.input_module.output['table']\
          .data().eval('(_1>0.3)&(_1<0.8)', result_object='index')
        self.assertEqual(inter.table().selection, bitmap(idx))
Ejemplo n.º 3
0
 def test_bisect2(self) -> None:
     s = self.scheduler()
     random = RandomTable(2, rows=100_000, scheduler=s)
     stirrer = Stirrer(
         update_column="_1",
         delete_rows=100,
         # update_rows=5,
         # fixed_step_size=100,
         scheduler=s,
     )
     stirrer.input[0] = random.output.result
     t = Table(name=None, dshape="{value: string}", data={"value": [0.5]})
     min_value = Constant(table=t, scheduler=s)
     hist_index = HistogramIndex(column="_1", scheduler=s)
     hist_index.create_dependent_modules(stirrer, "result")
     bisect_ = Bisect(column="_1",
                      op=">",
                      hist_index=hist_index,
                      scheduler=s)
     bisect_.input[0] = hist_index.output.result
     # bisect_.input[0] = random.output.result
     bisect_.input.limit = min_value.output.result
     pr = Print(proc=self.terse, scheduler=s)
     pr.input[0] = bisect_.output.result
     aio.run(s.start())
     idx = stirrer.table.eval("_1>0.5", result_object="index")
     self.assertEqual(bisect_.table.index, bitmap(idx))
Ejemplo n.º 4
0
 def _impl_tst_percentiles(self, accuracy):
     """
     """
     s = self.scheduler()
     random = RandomTable(2, rows=10000, scheduler=s)
     hist_index = HistogramIndex(column='_1', scheduler=s)
     hist_index.input.table = random.output.table
     t_percentiles = Table(
         name=None,
         dshape='{_25: float64, _50: float64, _75: float64}',
         data={
             '_25': [25.0],
             '_50': [50.0],
             '_75': [75.0]
         })
     which_percentiles = Constant(table=t_percentiles, scheduler=s)
     percentiles = Percentiles(hist_index, accuracy=accuracy, scheduler=s)
     percentiles.input.table = random.output.table
     percentiles.input.percentiles = which_percentiles.output.table
     prt = Print(proc=self.terse, scheduler=s)
     prt.input.df = percentiles.output.table
     s.start()
     s.join()
     pdict = percentiles.table().last().to_dict()
     v = random.table()['_1'].values
     p25 = np.percentile(v, 25.0)
     p50 = np.percentile(v, 50.0)
     p75 = np.percentile(v, 75.0)
     print("Table=> accuracy: ", accuracy, " 25:", p25, pdict['_25'],
           " 50:", p50, pdict['_50'], " 75:", p75, pdict['_75'])
     self.assertAlmostEqual(p25, pdict['_25'], delta=0.01)
     self.assertAlmostEqual(p50, pdict['_50'], delta=0.01)
     self.assertAlmostEqual(p75, pdict['_75'], delta=0.01)
 def _impl_stirred_tst_percentiles(self, accuracy: float,
                                   **kw: Any) -> None:
     """ """
     s = self.scheduler()
     with s:
         random = RandomTable(2, rows=10000, scheduler=s)
         stirrer = Stirrer(update_column="_2",
                           fixed_step_size=1000,
                           scheduler=s,
                           **kw)
         stirrer.input[0] = random.output.result
         hist_index = HistogramIndex(column="_1", scheduler=s)
         hist_index.input[0] = stirrer.output.result
         t_percentiles = PsDict({"_25": 25.0, "_50": 50.0, "_75": 75.0})
         which_percentiles = Constant(table=t_percentiles, scheduler=s)
         percentiles = Percentiles(accuracy=accuracy, scheduler=s)
         percentiles.input[0] = stirrer.output.result
         percentiles.input.percentiles = which_percentiles.output.result
         percentiles.input.hist = hist_index.output.result
         prt = Print(proc=self.terse, scheduler=s)
         prt.input[0] = percentiles.output.result
     aio.run(s.start())
     pdict = notNone(percentiles.table.last()).to_dict()
     # v = random.table()['_1'].values
     # from nose.tools import set_trace; set_trace()
     v = stirrer.table.to_array(columns=["_1"]).reshape(-1)
     p25 = np.percentile(v, 25.0)  # type: ignore
     p50 = np.percentile(v, 50.0)  # type: ignore
     p75 = np.percentile(v, 75.0)  # type: ignore
     print(
         "Table=> accuracy: ",
         accuracy,
         " 25:",
         p25,
         pdict["_25"],
         " 50:",
         p50,
         pdict["_50"],
         " 75:",
         p75,
         pdict["_75"],
     )
     # from nose.tools import set_trace; set_trace()
     self.assertAlmostEqual(p25, pdict["_25"], delta=0.01)
     self.assertAlmostEqual(p50, pdict["_50"], delta=0.01)
     self.assertAlmostEqual(p75, pdict["_75"], delta=0.01)
Ejemplo n.º 6
0
 def test_bisect(self):
     s = self.scheduler()
     random = RandomTable(2, rows=100000, scheduler=s)
     t = Table(name=None, dshape='{value: string}', data={'value': [0.5]})
     min_value = Constant(table=t, scheduler=s)
     hist_index = HistogramIndex(column='_1', scheduler=s)
     hist_index.create_dependent_modules(random, 'table')
     bisect_ = Bisect(column='_1',
                      op='>',
                      hist_index=hist_index,
                      scheduler=s)
     bisect_.input.table = hist_index.output.table
     #bisect_.input.table = random.output.table
     bisect_.input.limit = min_value.output.table
     pr = Print(proc=self.terse, scheduler=s)
     pr.input.df = bisect_.output.table
     s.start()
     s.join()
     idx = random.table().eval('_1>0.5', result_object='index')
     self.assertEqual(bisect_._table.selection, bitmap(idx))
 def _impl_tst_percentiles(self, accuracy: float) -> None:
     """ """
     s = self.scheduler()
     with s:
         random = RandomTable(2, rows=10000, scheduler=s)
         hist_index = HistogramIndex(column="_1", scheduler=s)
         hist_index.input[0] = random.output.result
         t_percentiles = PsDict({"_25": 25.0, "_50": 50.0, "_75": 75.0})
         which_percentiles = Constant(table=t_percentiles, scheduler=s)
         percentiles = Percentiles(accuracy=accuracy, scheduler=s)
         percentiles.input[0] = random.output.result
         percentiles.input.percentiles = which_percentiles.output.result
         percentiles.input.hist = hist_index.output.result
         prt = Print(proc=self.terse, scheduler=s)
         prt.input[0] = percentiles.output.result
     aio.run(s.start())
     last = percentiles.table.last()
     assert last is not None
     pdict = last.to_dict()
     v = random.table["_1"].values
     p25 = np.percentile(v, 25.0)  # type: ignore
     p50 = np.percentile(v, 50.0)  # type: ignore
     p75 = np.percentile(v, 75.0)  # type: ignore
     print(
         "Table=> accuracy: ",
         accuracy,
         " 25:",
         p25,
         pdict["_25"],
         " 50:",
         p50,
         pdict["_50"],
         " 75:",
         p75,
         pdict["_75"],
     )
     # from nose.tools import set_trace; set_trace()
     self.assertAlmostEqual(p25, pdict["_25"], delta=0.01)
     self.assertAlmostEqual(p50, pdict["_50"], delta=0.01)
     self.assertAlmostEqual(p75, pdict["_75"], delta=0.01)