예제 #1
0
 def test_hist_index_min_max(self):
     "Test min_out and max_out on HistogramIndex"
     s = self.scheduler()
     random = RandomTable(2, rows=100000, scheduler=s)
     t_min = Table(name=None, dshape='{_1: float64}', data={'_1': [0.3]})
     min_value = Constant(table=t_min, scheduler=s)
     t_max = Table(name=None, dshape='{_1: float64}', data={'_1': [0.8]})
     max_value = Constant(table=t_max, scheduler=s)
     range_qry = RangeQuery(column='_1', scheduler=s)
     range_qry.create_dependent_modules(random,
                                        'table',
                                        min_value=min_value,
                                        max_value=max_value)
     prt = Print(proc=self.terse, scheduler=s)
     prt.input.df = range_qry.output.table
     hist_index = range_qry.hist_index
     min_ = Min(name='min_' + str(hash(hist_index)), scheduler=s)
     min_.input.table = hist_index.output.min_out
     prt2 = Print(proc=self.terse, scheduler=s)
     prt2.input.df = min_.output.table
     max_ = Max(name='max_' + str(hash(hist_index)), scheduler=s)
     max_.input.table = hist_index.output.max_out
     pr3 = Print(proc=self.terse, scheduler=s)
     pr3.input.df = max_.output.table
     s.start()
     s.join()
     res1 = random.table().min()['_1']
     res2 = min_.table().last().to_dict()['_1']
     self.assertAlmostEqual(res1, res2)
     res1 = random.table().max()['_1']
     res2 = max_.table().last().to_dict()['_1']
     self.assertAlmostEqual(res1, res2)
예제 #2
0
 def _impl_tst_percentiles(self, accuracy):
     """
     """
     s = self.scheduler()
     random = RandomTable(2, rows=10000, scheduler=s)
     hist_index = HistogramIndex(column='_1', scheduler=s)
     hist_index.input.table = random.output.table
     t_percentiles = Table(
         name=None,
         dshape='{_25: float64, _50: float64, _75: float64}',
         data={
             '_25': [25.0],
             '_50': [50.0],
             '_75': [75.0]
         })
     which_percentiles = Constant(table=t_percentiles, scheduler=s)
     percentiles = Percentiles(hist_index, accuracy=accuracy, scheduler=s)
     percentiles.input.table = random.output.table
     percentiles.input.percentiles = which_percentiles.output.table
     prt = Print(proc=self.terse, scheduler=s)
     prt.input.df = percentiles.output.table
     s.start()
     s.join()
     pdict = percentiles.table().last().to_dict()
     v = random.table()['_1'].values
     p25 = np.percentile(v, 25.0)
     p50 = np.percentile(v, 50.0)
     p75 = np.percentile(v, 75.0)
     print("Table=> accuracy: ", accuracy, " 25:", p25, pdict['_25'],
           " 50:", p50, pdict['_50'], " 75:", p75, pdict['_75'])
     self.assertAlmostEqual(p25, pdict['_25'], delta=0.01)
     self.assertAlmostEqual(p50, pdict['_50'], delta=0.01)
     self.assertAlmostEqual(p75, pdict['_75'], delta=0.01)
예제 #3
0
 def test_min(self):
     s = self.scheduler()
     random = RandomTable(10, rows=10000, scheduler=s)
     min_ = Min(name='min_' + str(hash(random)), scheduler=s)
     min_.input.table = random.output.table
     pr = Print(proc=self.terse, scheduler=s)
     pr.input.df = min_.output.table
     s.start()
     s.join()
     res1 = random.table().min()
     res2 = min_.table().last()
     self.compare(res1, res2)
예제 #4
0
 def test_max(self):
     s = self.scheduler()
     random = RandomTable(10, rows=10000, scheduler=s)
     max_ = Max(name='max_' + str(hash(random)), scheduler=s)
     max_.input.table = random.output.table
     pr = Print(proc=self.terse, scheduler=s)
     pr.input.df = max_.output.table
     s.start()
     s.join()
     res1 = random.table().max()
     res2 = max_.cxx_module.get_output_table().last()
     self.compare(res1, res2)
예제 #5
0
 def test_range_query_min_max3(self):
     "Test min and max on RangeQuery output"
     s = self.scheduler()
     random = RandomTable(2, rows=100000, scheduler=s)
     t_min = Table(name=None, dshape='{_1: float64}', data={'_1': [0.3]})
     t_max = Table(name=None, dshape='{_1: float64}', data={'_1': [15000.]})
     range_qry = self._query_min_max_impl(random, t_min, t_max, s)
     s.start()
     s.join()
     min_data = range_qry.output.min.data()
     max_data = range_qry.output.max.data()
     max_rand = random.table().max()['_1']
     self.assertAlmostEqual(min_data['_1'].loc[0], 0.3)
     self.assertAlmostEqual(max_data['_1'].loc[0], max_rand)
예제 #6
0
 def test_var(self):
     s = self.scheduler()
     random = RandomTable(1, rows=1000, scheduler=s)
     var = Var(scheduler=s)
     var.input.table = random.output.table
     pr = Print(proc=self.terse, scheduler=s)
     pr.input.df = var.output.table
     s.start()
     s.join()
     res1 = np.array(
         [float(e) for e in random.table().var(ddof=1).values()])
     res2 = np.array([
         float(e)
         for e in var.table().last().to_dict(ordered=True).values()
     ])
     print('res1:', res1)
     print('res2:', res2)
     self.assertTrue(np.allclose(res1, res2))
예제 #7
0
 def test_bin_join(self):
     s = self.scheduler()
     random = RandomTable(10, rows=10000, scheduler=s)
     min_1 = Min(name='min_1'+str(hash(random)), scheduler=s, columns=['_1'])
     min_1.input.table = random.output.table
     min_2 = Min(name='min_2'+str(hash(random)), scheduler=s, columns=['_2'])
     min_2.input.table = random.output.table
     bj = BinJoin(scheduler=s)
     bj.input.first = min_1.output.table
     bj.input.second = min_2.output.table
     pr=Print(proc=self.terse, scheduler=s)
     pr.input.df = bj.output.table
     s.start()
     s.join()
     res1 = random.table().min()
     res2 = bj.table().last().to_dict()
     self.assertAlmostEqual(res1['_1'], res2['_1'])
     self.assertAlmostEqual(res1['_2'], res2['_2'])
예제 #8
0
 def test_bisect(self):
     s = self.scheduler()
     random = RandomTable(2, rows=100000, scheduler=s)
     t = Table(name=None, dshape='{value: string}', data={'value': [0.5]})
     min_value = Constant(table=t, scheduler=s)
     hist_index = HistogramIndex(column='_1', scheduler=s)
     hist_index.create_dependent_modules(random, 'table')
     bisect_ = Bisect(column='_1',
                      op='>',
                      hist_index=hist_index,
                      scheduler=s)
     bisect_.input.table = hist_index.output.table
     #bisect_.input.table = random.output.table
     bisect_.input.limit = min_value.output.table
     pr = Print(proc=self.terse, scheduler=s)
     pr.input.df = bisect_.output.table
     s.start()
     s.join()
     idx = random.table().eval('_1>0.5', result_object='index')
     self.assertEqual(bisect_._table.selection, bitmap(idx))