def test_hist_index_min_max(self): "Test min_out and max_out on HistogramIndex" s = self.scheduler() random = RandomTable(2, rows=100000, scheduler=s) t_min = Table(name=None, dshape='{_1: float64}', data={'_1': [0.3]}) min_value = Constant(table=t_min, scheduler=s) t_max = Table(name=None, dshape='{_1: float64}', data={'_1': [0.8]}) max_value = Constant(table=t_max, scheduler=s) range_qry = RangeQuery(column='_1', scheduler=s) range_qry.create_dependent_modules(random, 'table', min_value=min_value, max_value=max_value) prt = Print(proc=self.terse, scheduler=s) prt.input.df = range_qry.output.table hist_index = range_qry.hist_index min_ = Min(name='min_' + str(hash(hist_index)), scheduler=s) min_.input.table = hist_index.output.min_out prt2 = Print(proc=self.terse, scheduler=s) prt2.input.df = min_.output.table max_ = Max(name='max_' + str(hash(hist_index)), scheduler=s) max_.input.table = hist_index.output.max_out pr3 = Print(proc=self.terse, scheduler=s) pr3.input.df = max_.output.table s.start() s.join() res1 = random.table().min()['_1'] res2 = min_.table().last().to_dict()['_1'] self.assertAlmostEqual(res1, res2) res1 = random.table().max()['_1'] res2 = max_.table().last().to_dict()['_1'] self.assertAlmostEqual(res1, res2)
def _impl_tst_percentiles(self, accuracy): """ """ s = self.scheduler() random = RandomTable(2, rows=10000, scheduler=s) hist_index = HistogramIndex(column='_1', scheduler=s) hist_index.input.table = random.output.table t_percentiles = Table( name=None, dshape='{_25: float64, _50: float64, _75: float64}', data={ '_25': [25.0], '_50': [50.0], '_75': [75.0] }) which_percentiles = Constant(table=t_percentiles, scheduler=s) percentiles = Percentiles(hist_index, accuracy=accuracy, scheduler=s) percentiles.input.table = random.output.table percentiles.input.percentiles = which_percentiles.output.table prt = Print(proc=self.terse, scheduler=s) prt.input.df = percentiles.output.table s.start() s.join() pdict = percentiles.table().last().to_dict() v = random.table()['_1'].values p25 = np.percentile(v, 25.0) p50 = np.percentile(v, 50.0) p75 = np.percentile(v, 75.0) print("Table=> accuracy: ", accuracy, " 25:", p25, pdict['_25'], " 50:", p50, pdict['_50'], " 75:", p75, pdict['_75']) self.assertAlmostEqual(p25, pdict['_25'], delta=0.01) self.assertAlmostEqual(p50, pdict['_50'], delta=0.01) self.assertAlmostEqual(p75, pdict['_75'], delta=0.01)
def test_min(self): s = self.scheduler() random = RandomTable(10, rows=10000, scheduler=s) min_ = Min(name='min_' + str(hash(random)), scheduler=s) min_.input.table = random.output.table pr = Print(proc=self.terse, scheduler=s) pr.input.df = min_.output.table s.start() s.join() res1 = random.table().min() res2 = min_.table().last() self.compare(res1, res2)
def test_max(self): s = self.scheduler() random = RandomTable(10, rows=10000, scheduler=s) max_ = Max(name='max_' + str(hash(random)), scheduler=s) max_.input.table = random.output.table pr = Print(proc=self.terse, scheduler=s) pr.input.df = max_.output.table s.start() s.join() res1 = random.table().max() res2 = max_.cxx_module.get_output_table().last() self.compare(res1, res2)
def test_range_query_min_max3(self): "Test min and max on RangeQuery output" s = self.scheduler() random = RandomTable(2, rows=100000, scheduler=s) t_min = Table(name=None, dshape='{_1: float64}', data={'_1': [0.3]}) t_max = Table(name=None, dshape='{_1: float64}', data={'_1': [15000.]}) range_qry = self._query_min_max_impl(random, t_min, t_max, s) s.start() s.join() min_data = range_qry.output.min.data() max_data = range_qry.output.max.data() max_rand = random.table().max()['_1'] self.assertAlmostEqual(min_data['_1'].loc[0], 0.3) self.assertAlmostEqual(max_data['_1'].loc[0], max_rand)
def test_var(self): s = self.scheduler() random = RandomTable(1, rows=1000, scheduler=s) var = Var(scheduler=s) var.input.table = random.output.table pr = Print(proc=self.terse, scheduler=s) pr.input.df = var.output.table s.start() s.join() res1 = np.array( [float(e) for e in random.table().var(ddof=1).values()]) res2 = np.array([ float(e) for e in var.table().last().to_dict(ordered=True).values() ]) print('res1:', res1) print('res2:', res2) self.assertTrue(np.allclose(res1, res2))
def test_bin_join(self): s = self.scheduler() random = RandomTable(10, rows=10000, scheduler=s) min_1 = Min(name='min_1'+str(hash(random)), scheduler=s, columns=['_1']) min_1.input.table = random.output.table min_2 = Min(name='min_2'+str(hash(random)), scheduler=s, columns=['_2']) min_2.input.table = random.output.table bj = BinJoin(scheduler=s) bj.input.first = min_1.output.table bj.input.second = min_2.output.table pr=Print(proc=self.terse, scheduler=s) pr.input.df = bj.output.table s.start() s.join() res1 = random.table().min() res2 = bj.table().last().to_dict() self.assertAlmostEqual(res1['_1'], res2['_1']) self.assertAlmostEqual(res1['_2'], res2['_2'])
def test_bisect(self): s = self.scheduler() random = RandomTable(2, rows=100000, scheduler=s) t = Table(name=None, dshape='{value: string}', data={'value': [0.5]}) min_value = Constant(table=t, scheduler=s) hist_index = HistogramIndex(column='_1', scheduler=s) hist_index.create_dependent_modules(random, 'table') bisect_ = Bisect(column='_1', op='>', hist_index=hist_index, scheduler=s) bisect_.input.table = hist_index.output.table #bisect_.input.table = random.output.table bisect_.input.limit = min_value.output.table pr = Print(proc=self.terse, scheduler=s) pr.input.df = bisect_.output.table s.start() s.join() idx = random.table().eval('_1>0.5', result_object='index') self.assertEqual(bisect_._table.selection, bitmap(idx))