Example #1
2
    def test_ordered_aggregation(self):
        imps = Table.from_tag(IMPS)
        resx = select(imps.ad_id, imps.cpm_millis, where=imps.date == '2014-01-27')

        sum_millis = {}
        for ad_id, millis in resx:
            if ad_id not in sum_millis:
                sum_millis[ad_id] = [0, 0]
            sum_millis[ad_id][0] += millis
            sum_millis[ad_id][1] += 1

        results = select(imps.ad_id, h_sum(imps.cpm_millis), h_count(),
                         where=imps.date == '2014-01-27',
                         order_by=2,
                         limit=3,
                         nest=True)
        self.assertGreater(len(list(results)), 0)
        lowest = 0
        for ad_id, millis, count in results:
            self.assertLessEqual(lowest, count)
            lowest = count
            ad_tup = sum_millis[ad_id]
            self.assertEqual(millis, ad_tup[0])
            self.assertEqual(count, ad_tup[1])
        self.assertEqual(len(list(results)), min(len(sum_millis), 3))
Example #2
1
    def test_overflow(self):
        from itertools import izip

        imps = Table.from_tag(IMPS)
        fly_results = select(imps.date, h_sum(imps.impression), where=imps, order_by=imps.date)

        nest_tab = select(imps.date, h_sum(imps.impression), where=imps, nest=True)
        nest_results = select(*star(nest_tab), where=nest_tab, order_by=0)

        for ((fdate, fimps), (ndate, nimps)) in izip(fly_results, nest_results):
            self.assertEqual(fdate, ndate)
            self.assertEqual(fimps, nimps)
Example #3
1
    def test_aggregate_join(self):
        imps = Table.from_tag(IMPS)
        pix = Table.from_tag(PIXELS)

        imp_sites = list(select(imps.site_id, imps.ad_id,
                                where=imps.date < '2014-01-13'))
        pix_sites = list(select(pix.site_id, pix.amount,
                                where=pix.date < '2014-01-13'))

        join = {}
        for imp_site, imp_ad_id in imp_sites:
            for pix_site, pix_amount in pix_sites:
                if imp_site == pix_site:
                    if imp_ad_id not in join:
                        join[imp_ad_id] = [0, 0]
                    join[imp_ad_id][0] += pix_amount
                    join[imp_ad_id][1] += 1

        res = select(imps.ad_id, h_sum(pix.amount), h_count(),
                     where=(imps.date < '2014-01-13', pix.date < '2014-01-13'),
                     join=(imps.site_id, pix.site_id))
        results = list(res)
        self.assertEqual(len(results), len(join))

        for (ad_id, amount, count) in results:
            ramount, rcount = join[ad_id]
            self.assertEqual(ramount, amount)
            self.assertEqual(rcount, count)
Example #4
1
    def test_nested_agg(self):
        imps = Table.from_tag(IMPS)
        results = select(imps.ad_id,
                         imps.date,
                         imps.cpm_millis,
                         where=imps.date > '2014-01-22')

        sum_millis = {}
        for ad_id, dt, millis in results:
            key = str(ad_id) + dt
            if key not in sum_millis:
                sum_millis[key] = [0, 0]
            sum_millis[key][0] += millis
            sum_millis[key][1] += 1

        newtab = select(imps.ad_id,
                        imps.date,
                        h_sum(imps.cpm_millis),
                        h_count(),
                        where=imps.date > '2014-01-22',
                        nest=True)
        results = select(*star(newtab), where=newtab)
        self.assertGreater(len(list(results)), 0)
        for ad_id, dt, millis, count in results:
            ad_tup = sum_millis[str(ad_id) + dt]
            self.assertEqual(millis, ad_tup[0])
            self.assertEqual(count, ad_tup[1])
Example #5
0
    def test_aggregate(self):
        imps = Table.from_tag(IMPS)
        res = select(h_sum(imps.click), h_sum(imps.conversion), h_sum(imps.impression), where=imps)

        (clicks, conversions, impressions) = list(res)[0]

        self.assertEqual(clicks, 21)
        self.assertEqual(conversions, 5)
        self.assertEqual(impressions, 174)
Example #6
0
    def test_nested_self_join(self):
        """
        A self join is joining the table against itself.  This requires the use of aliases.
        """
        imps = Table.from_tag(IMPS)

        early_res = select(imps.ad_id,
                           imps.cpm_millis,
                           where=imps.date < '2014-01-20')
        early = list(early_res)
        late_res = select(imps.ad_id,
                          imps.cpm_millis,
                          where=imps.date >= '2014-01-20')
        late = list(late_res)

        join = {}
        for eid, ecpm in early:
            for lid, lcpm in late:
                if eid == lid:
                    if eid not in join:
                        join[eid] = [0, 0, 0]
                    join[eid][0] += ecpm
                    join[eid][1] += lcpm
                    join[eid][2] += 1

        early = select(imps.ad_id,
                       imps.cpm_millis,
                       where=imps.date < '2014-01-20',
                       nest=True)
        late = select(imps.ad_id,
                      imps.cpm_millis,
                      where=imps.date >= '2014-01-20',
                      nest=True)
        jimmy = select(early.ad_id.named('adididid'),
                       h_sum(early.cpm_millis).named('emillis'),
                       h_sum(late.cpm_millis).named('lmillis'),
                       h_count(),
                       where=(early, late),
                       join='ad_id')

        james = list(jimmy)
        self.assertEqual(len(join), len(james))

        for (ad_id, emillis, lmillis, cnt) in james:
            ecpm, lcpm, ocnt = join[ad_id]
            self.assertEqual(emillis, ecpm)
            self.assertEqual(lmillis, lcpm)
            self.assertEqual(cnt, ocnt)
        early_res.purge()
        late_res.purge()
        jimmy.purge()
Example #7
0
    def test_ordered_aggregation(self):
        imps = Table.from_tag(IMPS)
        resx = select(imps.ad_id,
                      imps.cpm_millis,
                      where=imps.date == '2014-01-27')

        sum_millis = {}
        for ad_id, millis in resx:
            if ad_id not in sum_millis:
                sum_millis[ad_id] = [0, 0]
            sum_millis[ad_id][0] += millis
            sum_millis[ad_id][1] += 1

        results = select(imps.ad_id,
                         h_sum(imps.cpm_millis),
                         h_count(),
                         where=imps.date == '2014-01-27',
                         order_by=2,
                         limit=3,
                         nest=True)
        self.assertGreater(len(list(results)), 0)
        lowest = 0
        for ad_id, millis, count in results:
            self.assertLessEqual(lowest, count)
            lowest = count
            ad_tup = sum_millis[ad_id]
            self.assertEqual(millis, ad_tup[0])
            self.assertEqual(count, ad_tup[1])
        self.assertEqual(len(list(results)), min(len(sum_millis), 3))

        resx.purge()
Example #8
0
    def test_aggregate_join(self):
        imps = Table.from_tag(IMPS)
        pix = Table.from_tag(PIXELS)

        imp_sites = [(s, a) for (s, a), _ in result_iterator(
            select(imps.site_id, imps.ad_id, where=imps.date < '2014-01-13'))]
        pix_sites = [(s, a) for (s, a), _ in result_iterator(
            select(pix.site_id, pix.amount, where=pix.date < '2014-01-13'))]

        join = {}
        for imp_site, imp_ad_id in imp_sites:
            for pix_site, pix_amount in pix_sites:
                if imp_site == pix_site:
                    if imp_ad_id not in join:
                        join[imp_ad_id] = [0, 0]
                    join[imp_ad_id][0] += pix_amount
                    join[imp_ad_id][1] += 1

        res = select(imps.ad_id,
                     h_sum(pix.amount),
                     h_count(),
                     where=(imps.date < '2014-01-13', pix.date < '2014-01-13'),
                     join=(imps.site_id, pix.site_id))
        results = [(ad_id, amount, count)
                   for (ad_id, amount, count), _ in result_iterator(res)]
        self.assertTrue(len(results), len(join))

        for (ad_id, amount, count) in results:
            ramount, rcount = join[ad_id]
            self.assertEqual(ramount, amount)
            self.assertEqual(rcount, count)
Example #9
0
    def test_ordered_aggregation(self):
        imps = Table.from_tag(IMPS)
        res = select(imps.ad_id, imps.cpm_millis, where=imps.date == '2014-01-27')
        resx = [c for c, _ in result_iterator(res)]

        sum_millis = {}
        for ad_id, millis in resx:
            if ad_id not in sum_millis:
                sum_millis[ad_id] = [0, 0]
            sum_millis[ad_id][0] += millis
            sum_millis[ad_id][1] += 1

        res = select(imps.ad_id, h_sum(imps.cpm_millis), h_count(),
                     where=imps.date == '2014-01-27',
                     order_by=2,
                     limit=3)
        results = [c for c, _ in result_iterator(res)]
        lowest = 0
        for ad_id, millis, count in results:
            self.assertLessEqual(lowest, count)
            lowest = count
            ad_tup = sum_millis[ad_id]
            self.assertEqual(millis, ad_tup[0])
            self.assertEqual(count, ad_tup[1])
        self.assertTrue(len(results) == min(len(sum_millis), 3))
Example #10
0
    def test_overflow(self):
        from itertools import izip

        imps = Table.from_tag(IMPS)
        fly_results = select(imps.date,
                             h_sum(imps.impression),
                             where=imps,
                             order_by=imps.date)

        nest_tab = select(imps.date,
                          h_sum(imps.impression),
                          where=imps,
                          nest=True)
        nest_results = select(*star(nest_tab), where=nest_tab, order_by=0)

        for ((fdate, fimps), (ndate, nimps)) in izip(fly_results,
                                                     nest_results):
            self.assertEqual(fdate, ndate)
            self.assertEqual(fimps, nimps)
Example #11
0
    def test_nested_self_join(self):
        """
        A self join is joining the table against itself.  This requires the use of aliases.
        """
        imps = Table.from_tag(IMPS)

        early_res = select(imps.ad_id, imps.cpm_millis,
                           where=imps.date < '2014-01-20')
        early = list(early_res)
        late_res = select(imps.ad_id, imps.cpm_millis,
                          where=imps.date >= '2014-01-20')
        late = list(late_res)

        join = {}
        for eid, ecpm in early:
            for lid, lcpm in late:
                if eid == lid:
                    if eid not in join:
                        join[eid] = [0, 0, 0]
                    join[eid][0] += ecpm
                    join[eid][1] += lcpm
                    join[eid][2] += 1

        early = select(imps.ad_id, imps.cpm_millis, where=imps.date < '2014-01-20', nest=True)
        late = select(imps.ad_id, imps.cpm_millis, where=imps.date >= '2014-01-20', nest=True)
        jimmy = select(early.ad_id.named('adididid'),
                       h_sum(early.cpm_millis).named('emillis'),
                       h_sum(late.cpm_millis).named('lmillis'),
                       h_count(),
                       where=(early, late),
                       join='ad_id')

        james = list(jimmy)
        self.assertEqual(len(join), len(james))

        for (ad_id, emillis, lmillis, cnt) in james:
            ecpm, lcpm, ocnt = join[ad_id]
            self.assertEqual(emillis, ecpm)
            self.assertEqual(lmillis, lcpm)
            self.assertEqual(cnt, ocnt)
        early_res.purge()
        late_res.purge()
        jimmy.purge()
Example #12
0
    def test_simple_aggregation(self):
        imps = Table.from_tag(IMPS)
        results = select(imps.ad_id, imps.cpm_millis, where=imps.date == '2014-01-27')

        sum_millis = {}
        for ad_id, millis in results:
            if ad_id not in sum_millis:
                sum_millis[ad_id] = [0, 0]
            sum_millis[ad_id][0] += millis
            sum_millis[ad_id][1] += 1

        results = select(imps.ad_id, h_sum(imps.cpm_millis), h_count(), where=imps.date == '2014-01-27')
        self.assertGreater(len(list(results)), 0)
        for ad_id, millis, count in results:
            ad_tup = sum_millis[ad_id]
            self.assertEqual(millis, ad_tup[0])
            self.assertEqual(count, ad_tup[1])
Example #13
0
    def test_multiple_group_bys(self):
        imps = Table.from_tag(IMPS)
        results = select(imps.ad_id, imps.date, imps.cpm_millis, where=imps.date > '2014-01-22')

        sum_millis = {}
        for ad_id, dt, millis in results:
            key = str(ad_id) + dt
            if key not in sum_millis:
                sum_millis[key] = [0, 0]
            sum_millis[key][0] += millis
            sum_millis[key][1] += 1

        results = select(imps.ad_id, imps.date, h_sum(imps.cpm_millis), h_count(), where=imps.date > '2014-01-22')
        self.assertGreater(len(list(results)), 0)
        for ad_id, dt, millis, count in results:
            ad_tup = sum_millis[str(ad_id) + dt]
            self.assertEqual(millis, ad_tup[0])
            self.assertEqual(count, ad_tup[1])
Example #14
0
    def test_simple_aggregation(self):
        imps = Table.from_tag(IMPS)
        res = select(imps.ad_id, imps.cpm_millis, where=imps.date == '2014-01-27')
        results = [c for c, _ in result_iterator(res)]

        sum_millis = {}
        for ad_id, millis in results:
            if ad_id not in sum_millis:
                sum_millis[ad_id] = [0, 0]
            sum_millis[ad_id][0] += millis
            sum_millis[ad_id][1] += 1

        res = select(imps.ad_id, h_sum(imps.cpm_millis), h_count(), where=imps.date == '2014-01-27')
        results = [c for c, _ in result_iterator(res)]
        for ad_id, millis, count in results:
            ad_tup = sum_millis[ad_id]
            self.assertEqual(millis, ad_tup[0])
            self.assertEqual(count, ad_tup[1])
Example #15
0
    def test_nested_agg(self):
        imps = Table.from_tag(IMPS)
        results = select(imps.ad_id, imps.date, imps.cpm_millis, where=imps.date > '2014-01-22')

        sum_millis = {}
        for ad_id, dt, millis in results:
            key = str(ad_id) + dt
            if key not in sum_millis:
                sum_millis[key] = [0, 0]
            sum_millis[key][0] += millis
            sum_millis[key][1] += 1
        results.purge()

        newtab = select(imps.ad_id, imps.date, h_sum(imps.cpm_millis), h_count(),
                        where=imps.date > '2014-01-22',
                        nest=True)
        results = select(*star(newtab), where=newtab)
        self.assertGreater(len(list(results)), 0)
        for ad_id, dt, millis, count in results:
            ad_tup = sum_millis[str(ad_id) + dt]
            self.assertEqual(millis, ad_tup[0])
            self.assertEqual(count, ad_tup[1])
        results.purge()
Example #16
0
    def test_simple_aggregation(self):
        imps = Table.from_tag(IMPS)
        results = select(imps.ad_id,
                         imps.cpm_millis,
                         where=imps.date == '2014-01-27')

        sum_millis = {}
        for ad_id, millis in results:
            if ad_id not in sum_millis:
                sum_millis[ad_id] = [0, 0]
            sum_millis[ad_id][0] += millis
            sum_millis[ad_id][1] += 1
        results.purge()

        results = select(imps.ad_id,
                         h_sum(imps.cpm_millis),
                         h_count(),
                         where=imps.date == '2014-01-27')
        self.assertGreater(len(list(results)), 0)
        for ad_id, millis, count in results:
            ad_tup = sum_millis[ad_id]
            self.assertEqual(millis, ad_tup[0])
            self.assertEqual(count, ad_tup[1])
        results.purge()
Example #17
-1
    def test_multiple_group_bys(self):
        imps = Table.from_tag(IMPS)
        results = select(imps.ad_id,
                         imps.date,
                         imps.cpm_millis,
                         where=imps.date > '2014-01-22')

        sum_millis = {}
        for ad_id, dt, millis in results:
            key = str(ad_id) + dt
            if key not in sum_millis:
                sum_millis[key] = [0, 0]
            sum_millis[key][0] += millis
            sum_millis[key][1] += 1
        results.purge()

        results = select(imps.ad_id,
                         imps.date,
                         h_sum(imps.cpm_millis),
                         h_count(),
                         where=imps.date > '2014-01-22')
        self.assertGreater(len(list(results)), 0)
        for ad_id, dt, millis, count in results:
            ad_tup = sum_millis[str(ad_id) + dt]
            self.assertEqual(millis, ad_tup[0])
            self.assertEqual(count, ad_tup[1])
        results.purge()