Ejemplo n.º 1
0
    def test_agg_all_by(self):
        test_table = empty_table(10)
        test_table = test_table.update(
            ["grp_id=(int)(i/5)", "var=(int)i", "weights=(double)1.0/(i+1)"])

        aggs = [
            group(),
            avg(),
            first(),
            last(),
            max_(),
            median(),
            min_(),
            pct(0.20),
            std(),
            sum_(),
            abs_sum(),
            var(),
            weighted_avg("var"),
        ]
        for agg in aggs:
            with self.subTest(agg):
                result_table = test_table.agg_all_by(agg, ["grp_id"])
                self.assertGreaterEqual(result_table.size, 1)

        # column names in the Aggregation are ignored
        aggs = [
            group(["aggGroup=var"]),
            avg(["aggAvg=var"]),
            pct(0.20, ["aggPct=var"]),
            std(["aggStd=var"]),
            sum_(["aggSum=var"]),
            abs_sum(["aggAbsSum=var"]),
            var(["aggVar=var"]),
            weighted_avg("var", ["weights"]),
        ]
        for agg in aggs:
            with self.subTest(agg):
                result_table = test_table.agg_all_by(agg, ["grp_id"])
                self.assertGreaterEqual(result_table.size, 1)

        with self.assertRaises(DHError) as cm:
            test_table.agg_all_by(count_("aggCount"), "grp_id")
        self.assertIn("unsupported", cm.exception.root_cause)

        for agg in aggs:
            with self.subTest(agg):
                result_table = test_table.agg_all_by(agg)
                self.assertEqual(result_table.size, 1)
Ejemplo n.º 2
0
    def test_agg_by_2(self):
        test_table = empty_table(10)
        test_table = test_table.update(
            ["grp_id=(int)(i/5)", "var=(int)i", "weights=(double)1.0/(i+1)"])

        aggs = [
            group(["aggGroup=var"]),
            avg(["aggAvg=var"]),
            count_("aggCount"),
            first(["aggFirst=var"]),
            last(["aggLast=var"]),
            max_(["aggMax=var"]),
            median(["aggMed=var"]),
            min_(["aggMin=var"]),
            pct(0.20, ["aggPct=var"]),
            std(["aggStd=var"]),
            sum_(["aggSum=var"]),
            abs_sum(["aggAbsSum=var"]),
            var(["aggVar=var"]),
            weighted_avg("var", ["weights"]),
        ]

        result_table = test_table.agg_by(aggs, ["grp_id"])
        self.assertGreaterEqual(result_table.size, 1)

        for agg in aggs:
            result_table = test_table.agg_by(agg, "grp_id")
            self.assertGreaterEqual(result_table.size, 1)
Ejemplo n.º 3
0
    def test_agg_by(self):
        num_distinct_a = self.test_table.select_distinct(formulas=["a"]).size

        aggs = [
            sum_(cols=["SumC=c"]),
            avg(cols=["AvgB = b", "AvgD = d"]),
            pct(percentile=0.5, cols=["PctC = c"]),
            weighted_avg(wcol="d", cols=["WavGD = d"]),
            formula(formula="min(each)",
                    formula_param="each",
                    cols=["MinA=a", "MinD=d"]),
        ]

        result_table = self.test_table.agg_by(aggs=aggs, by=["a"])
        self.assertEqual(result_table.size, num_distinct_a)
Ejemplo n.º 4
0
                       key_spec=KeyValueSpec.IGNORE,
                       value_spec=ck.json_spec([('user_id', dh.int_),
                                                ('url', dh.string),
                                                ('channel', dh.string),
                                                ('received_at', dh.DateTime)]),
                       table_type=TableType.Append)

pageviews_stg = pageviews \
    .update_view([
        'url_path = url.split(`/`)',
        'pageview_type = url_path[1]',
        'target_id = Long.parseLong(url_path[2])'
    ]).drop_columns('url_path')

purchases_by_item = purchases.agg_by([
    agg.sum_(['revenue = purchase_price']),
    agg.count_('orders'),
    agg.sum_(['items_sold = quantity'])
], 'item_id')

pageviews_by_item = pageviews_stg \
    .where(['pageview_type = `products`']) \
    .count_by('pageviews', ['item_id = target_id'])

item_summary = items \
    .view(['item_id = id', 'name', 'category']) \
    .natural_join(purchases_by_item, on = ['item_id']) \
    .natural_join(pageviews_by_item, on = ['item_id']) \
    .drop_columns('item_id') \
    .move_columns_down(['revenue', 'pageviews']) \
    .update_view(['conversion_rate = orders / (double) pageviews'])