Example #1
0
    def test_agg_all_by(self):
        test_table = empty_table(10)
        test_table = test_table.update(
            ["grp_id=(int)(i/5)", "var=(int)i", "weights=(double)1.0/(i+1)"])

        aggs = [
            group(),
            avg(),
            first(),
            last(),
            max_(),
            median(),
            min_(),
            pct(0.20),
            std(),
            sum_(),
            abs_sum(),
            var(),
            weighted_avg("var"),
        ]
        for agg in aggs:
            with self.subTest(agg):
                result_table = test_table.agg_all_by(agg, ["grp_id"])
                self.assertGreaterEqual(result_table.size, 1)

        # column names in the Aggregation are ignored
        aggs = [
            group(["aggGroup=var"]),
            avg(["aggAvg=var"]),
            pct(0.20, ["aggPct=var"]),
            std(["aggStd=var"]),
            sum_(["aggSum=var"]),
            abs_sum(["aggAbsSum=var"]),
            var(["aggVar=var"]),
            weighted_avg("var", ["weights"]),
        ]
        for agg in aggs:
            with self.subTest(agg):
                result_table = test_table.agg_all_by(agg, ["grp_id"])
                self.assertGreaterEqual(result_table.size, 1)

        with self.assertRaises(DHError) as cm:
            test_table.agg_all_by(count_("aggCount"), "grp_id")
        self.assertIn("unsupported", cm.exception.root_cause)

        for agg in aggs:
            with self.subTest(agg):
                result_table = test_table.agg_all_by(agg)
                self.assertEqual(result_table.size, 1)
Example #2
0
    def test_agg_by_2(self):
        test_table = empty_table(10)
        test_table = test_table.update(
            ["grp_id=(int)(i/5)", "var=(int)i", "weights=(double)1.0/(i+1)"])

        aggs = [
            group(["aggGroup=var"]),
            avg(["aggAvg=var"]),
            count_("aggCount"),
            first(["aggFirst=var"]),
            last(["aggLast=var"]),
            max_(["aggMax=var"]),
            median(["aggMed=var"]),
            min_(["aggMin=var"]),
            pct(0.20, ["aggPct=var"]),
            std(["aggStd=var"]),
            sum_(["aggSum=var"]),
            abs_sum(["aggAbsSum=var"]),
            var(["aggVar=var"]),
            weighted_avg("var", ["weights"]),
        ]

        result_table = test_table.agg_by(aggs, ["grp_id"])
        self.assertGreaterEqual(result_table.size, 1)

        for agg in aggs:
            result_table = test_table.agg_by(agg, "grp_id")
            self.assertGreaterEqual(result_table.size, 1)
Example #3
0
    def test_agg_by(self):
        num_distinct_a = self.test_table.select_distinct(formulas=["a"]).size

        aggs = [
            sum_(cols=["SumC=c"]),
            avg(cols=["AvgB = b", "AvgD = d"]),
            pct(percentile=0.5, cols=["PctC = c"]),
            weighted_avg(wcol="d", cols=["WavGD = d"]),
            formula(formula="min(each)",
                    formula_param="each",
                    cols=["MinA=a", "MinD=d"]),
        ]

        result_table = self.test_table.agg_by(aggs=aggs, by=["a"])
        self.assertEqual(result_table.size, num_distinct_a)