def test_agg_all_by(self): test_table = empty_table(10) test_table = test_table.update( ["grp_id=(int)(i/5)", "var=(int)i", "weights=(double)1.0/(i+1)"]) aggs = [ group(), avg(), first(), last(), max_(), median(), min_(), pct(0.20), std(), sum_(), abs_sum(), var(), weighted_avg("var"), ] for agg in aggs: with self.subTest(agg): result_table = test_table.agg_all_by(agg, ["grp_id"]) self.assertGreaterEqual(result_table.size, 1) # column names in the Aggregation are ignored aggs = [ group(["aggGroup=var"]), avg(["aggAvg=var"]), pct(0.20, ["aggPct=var"]), std(["aggStd=var"]), sum_(["aggSum=var"]), abs_sum(["aggAbsSum=var"]), var(["aggVar=var"]), weighted_avg("var", ["weights"]), ] for agg in aggs: with self.subTest(agg): result_table = test_table.agg_all_by(agg, ["grp_id"]) self.assertGreaterEqual(result_table.size, 1) with self.assertRaises(DHError) as cm: test_table.agg_all_by(count_("aggCount"), "grp_id") self.assertIn("unsupported", cm.exception.root_cause) for agg in aggs: with self.subTest(agg): result_table = test_table.agg_all_by(agg) self.assertEqual(result_table.size, 1)
def test_agg_by_2(self): test_table = empty_table(10) test_table = test_table.update( ["grp_id=(int)(i/5)", "var=(int)i", "weights=(double)1.0/(i+1)"]) aggs = [ group(["aggGroup=var"]), avg(["aggAvg=var"]), count_("aggCount"), first(["aggFirst=var"]), last(["aggLast=var"]), max_(["aggMax=var"]), median(["aggMed=var"]), min_(["aggMin=var"]), pct(0.20, ["aggPct=var"]), std(["aggStd=var"]), sum_(["aggSum=var"]), abs_sum(["aggAbsSum=var"]), var(["aggVar=var"]), weighted_avg("var", ["weights"]), ] result_table = test_table.agg_by(aggs, ["grp_id"]) self.assertGreaterEqual(result_table.size, 1) for agg in aggs: result_table = test_table.agg_by(agg, "grp_id") self.assertGreaterEqual(result_table.size, 1)
def test_agg_by(self): num_distinct_a = self.test_table.select_distinct(formulas=["a"]).size aggs = [ sum_(cols=["SumC=c"]), avg(cols=["AvgB = b", "AvgD = d"]), pct(percentile=0.5, cols=["PctC = c"]), weighted_avg(wcol="d", cols=["WavGD = d"]), formula(formula="min(each)", formula_param="each", cols=["MinA=a", "MinD=d"]), ] result_table = self.test_table.agg_by(aggs=aggs, by=["a"]) self.assertEqual(result_table.size, num_distinct_a)