def test_hist_index_min_max(self) -> None:
     "Test min_out and max_out on HistogramIndex"
     s = self.scheduler()
     with s:
         random = RandomTable(2, rows=100000, scheduler=s)
         t_min = PsDict({"_1": 0.3})
         min_value = Constant(table=t_min, scheduler=s)
         t_max = PsDict({"_1": 0.8})
         max_value = Constant(table=t_max, scheduler=s)
         range_qry = RangeQuery(column="_1", scheduler=s)
         range_qry.create_dependent_modules(
             random, "result", min_value=min_value, max_value=max_value
         )
         prt = Print(proc=self.terse, scheduler=s)
         prt.input[0] = range_qry.output.result
         hist_index = range_qry.hist_index
         assert hist_index is not None
         min_ = Min(name="min_" + str(hash(hist_index)), scheduler=s)
         min_.input[0] = hist_index.output.min_out
         prt2 = Print(proc=self.terse, scheduler=s)
         prt2.input[0] = min_.output.result
         max_ = Max(name="max_" + str(hash(hist_index)), scheduler=s)
         max_.input[0] = hist_index.output.max_out
         pr3 = Print(proc=self.terse, scheduler=s)
         pr3.input[0] = max_.output.result
     aio.run(s.start())
     res1 = cast(float, random.table.min()["_1"])
     res2 = cast(float, min_.psdict["_1"])
     self.assertAlmostEqual(res1, res2)
     res1 = cast(float, random.table.max()["_1"])
     res2 = cast(float, max_.psdict["_1"])
     self.assertAlmostEqual(res1, res2)
Example #2
0
 def test_hist_index_min_max(self):
     "Test min_out and max_out on HistogramIndex"
     s = self.scheduler()
     random = RandomTable(2, rows=100000, scheduler=s)
     t_min = Table(name=None, dshape='{_1: float64}', data={'_1': [0.3]})
     min_value = Constant(table=t_min, scheduler=s)
     t_max = Table(name=None, dshape='{_1: float64}', data={'_1': [0.8]})
     max_value = Constant(table=t_max, scheduler=s)
     range_qry = RangeQuery(column='_1', scheduler=s)
     range_qry.create_dependent_modules(random,
                                        'table',
                                        min_value=min_value,
                                        max_value=max_value)
     prt = Print(proc=self.terse, scheduler=s)
     prt.input.df = range_qry.output.table
     hist_index = range_qry.hist_index
     min_ = Min(name='min_' + str(hash(hist_index)), scheduler=s)
     min_.input.table = hist_index.output.min_out
     prt2 = Print(proc=self.terse, scheduler=s)
     prt2.input.df = min_.output.table
     max_ = Max(name='max_' + str(hash(hist_index)), scheduler=s)
     max_.input.table = hist_index.output.max_out
     pr3 = Print(proc=self.terse, scheduler=s)
     pr3.input.df = max_.output.table
     s.start()
     s.join()
     res1 = random.table().min()['_1']
     res2 = min_.table().last().to_dict()['_1']
     self.assertAlmostEqual(res1, res2)
     res1 = random.table().max()['_1']
     res2 = max_.table().last().to_dict()['_1']
     self.assertAlmostEqual(res1, res2)
    def _impl_stirred_tst_percentiles_rq(self, accuracy: float,
                                         **kw: Any) -> None:
        """ """
        s = self.scheduler()
        with s:
            random = RandomTable(2, rows=10000, scheduler=s)
            stirrer = Stirrer(update_column="_2",
                              fixed_step_size=1000,
                              scheduler=s,
                              **kw)
            stirrer.input[0] = random.output.result
            t_min = PsDict({"_1": 0.3})
            min_value = Constant(table=t_min, scheduler=s)
            t_max = PsDict({"_1": 0.8})
            max_value = Constant(table=t_max, scheduler=s)
            range_qry = RangeQuery(column="_1", scheduler=s)
            range_qry.create_dependent_modules(stirrer,
                                               "result",
                                               min_value=min_value,
                                               max_value=max_value)

            hist_index = range_qry.hist_index
            assert hist_index
            t_percentiles = PsDict({"_25": 25.0, "_50": 50.0, "_75": 75.0})
            which_percentiles = Constant(table=t_percentiles, scheduler=s)
            percentiles = Percentiles(accuracy=accuracy, scheduler=s)
            percentiles.input[0] = range_qry.output.result
            percentiles.input.percentiles = which_percentiles.output.result
            percentiles.input.hist = hist_index.output.result
            prt = Print(proc=self.terse, scheduler=s)
            prt.input[0] = percentiles.output.result
        aio.run(s.start())
        pdict = notNone(percentiles.table.last()).to_dict()
        v = range_qry.table["_1"].values
        p25 = np.percentile(v, 25.0)  # type: ignore
        p50 = np.percentile(v, 50.0)  # type: ignore
        p75 = np.percentile(v, 75.0)  # type: ignore
        print(
            "TSV=> accuracy: ",
            accuracy,
            " 25:",
            p25,
            pdict["_25"],
            " 50:",
            p50,
            pdict["_50"],
            " 75:",
            p75,
            pdict["_75"],
        )
        self.assertAlmostEqual(p25, pdict["_25"], delta=0.01)
        self.assertAlmostEqual(p50, pdict["_50"], delta=0.01)
        self.assertAlmostEqual(p75, pdict["_75"], delta=0.01)
Example #4
0
 def _query_min_max_impl(self, random, t_min, t_max, s):
     min_value = Constant(table=t_min, scheduler=s)
     max_value = Constant(table=t_max, scheduler=s)
     range_qry = RangeQuery(column='_1', scheduler=s)
     range_qry.create_dependent_modules(random,
                                        'table',
                                        min_value=min_value,
                                        max_value=max_value)
     prt = Print(proc=self.terse, scheduler=s)
     prt.input.df = range_qry.output.table
     prt2 = Print(proc=self.terse, scheduler=s)
     prt2.input.df = range_qry.output.min
     pr3 = Print(proc=self.terse, scheduler=s)
     pr3.input.df = range_qry.output.max
     return range_qry
 def _query_min_max_impl(
     self, random: RandomTable, t_min: PsDict, t_max: PsDict, s: Scheduler
 ) -> RangeQuery:
     min_value = Constant(table=t_min, scheduler=s)
     max_value = Constant(table=t_max, scheduler=s)
     range_qry = RangeQuery(column="_1", scheduler=s)
     range_qry.create_dependent_modules(
         random, "result", min_value=min_value, max_value=max_value
     )
     prt = Print(proc=self.terse, scheduler=s)
     prt.input[0] = range_qry.output.result
     prt2 = Print(proc=self.terse, scheduler=s)
     prt2.input[0] = range_qry.output.min
     pr3 = Print(proc=self.terse, scheduler=s)
     pr3.input[0] = range_qry.output.max
     return range_qry
Example #6
0
    def _impl_tst_percentiles_rq(self, accuracy):
        """
        """
        s = self.scheduler()
        random = RandomTable(2, rows=10000, scheduler=s)
        t_min = Table(name=None, dshape='{_1: float64}', data={'_1': [0.3]})
        min_value = Constant(table=t_min, scheduler=s)
        t_max = Table(name=None, dshape='{_1: float64}', data={'_1': [0.8]})
        max_value = Constant(table=t_max, scheduler=s)
        range_qry = RangeQuery(column='_1', scheduler=s)
        range_qry.create_dependent_modules(random,
                                           'table',
                                           min_value=min_value,
                                           max_value=max_value)

        hist_index = range_qry.hist_index
        t_percentiles = Table(
            name=None,
            dshape='{_25: float64, _50: float64, _75: float64}',
            data={
                '_25': [25.0],
                '_50': [50.0],
                '_75': [75.0]
            })
        which_percentiles = Constant(table=t_percentiles, scheduler=s)
        percentiles = Percentiles(hist_index, accuracy=accuracy, scheduler=s)
        percentiles.input.table = range_qry.output.table
        percentiles.input.percentiles = which_percentiles.output.table
        prt = Print(proc=self.terse, scheduler=s)
        prt.input.df = percentiles.output.table
        s.start()
        s.join()
        pdict = percentiles.table().last().to_dict()
        v = range_qry.table()['_1'].values
        p25 = np.percentile(v, 25.0)
        p50 = np.percentile(v, 50.0)
        p75 = np.percentile(v, 75.0)
        print("TSV=> accuracy: ", accuracy, " 25:", p25, pdict['_25'], " 50:",
              p50, pdict['_50'], " 75:", p75, pdict['_75'])
        self.assertAlmostEqual(p25, pdict['_25'], delta=0.01)
        self.assertAlmostEqual(p50, pdict['_50'], delta=0.01)
        self.assertAlmostEqual(p75, pdict['_75'], delta=0.01)
Example #7
0
 def test_range_query(self):
     "Run tests of the RangeQuery module"
     s = self.scheduler()
     random = RandomTable(2, rows=1000, scheduler=s)
     t_min = Table(name=None, dshape='{_1: float64}', data={'_1': [0.3]})
     min_value = Constant(table=t_min, scheduler=s)
     t_max = Table(name=None, dshape='{_1: float64}', data={'_1': [0.8]})
     max_value = Constant(table=t_max, scheduler=s)
     range_qry = RangeQuery(column='_1', scheduler=s)
     range_qry.create_dependent_modules(random,
                                        'table',
                                        min_value=min_value,
                                        max_value=max_value)
     prt = Print(proc=self.terse, scheduler=s)
     prt.input.df = range_qry.output.table
     s.start()
     s.join()
     idx = range_qry.input_module.output['table']\
       .data().eval('(_1>0.3)&(_1<0.8)', result_object='index')
     self.assertEqual(range_qry.table().selection, bitmap(idx))
 def _range_query_impl(self, lo, up) -> None:
     "Run tests of the RangeQuery module"
     s = self.scheduler()
     with s:
         random = RandomTable(2, rows=1000, scheduler=s)
         t_min = PsDict({"_1": lo})
         min_value = Constant(table=t_min, scheduler=s)
         t_max = PsDict({"_1": up})
         max_value = Constant(table=t_max, scheduler=s)
         range_qry = RangeQuery(column="_1", scheduler=s)
         range_qry.create_dependent_modules(
             random, "result", min_value=min_value, max_value=max_value
         )
         prt = Print(proc=self.terse, scheduler=s)
         prt.input[0] = range_qry.output.result
     aio.run(s.start())
     assert range_qry.input_module is not None
     idx = (
         range_qry.input_module.output["result"]
         .data()
         .eval(f"(_1>{lo})&(_1<{up})", result_object="index")
     )
     self.assertEqual(range_qry.table.index, bitmap(idx))