コード例 #1
2
ファイル: test_aggregation.py プロジェクト: aprial/hustle
    def test_ordered_aggregation(self):
        imps = Table.from_tag(IMPS)
        resx = select(imps.ad_id, imps.cpm_millis, where=imps.date == '2014-01-27')

        sum_millis = {}
        for ad_id, millis in resx:
            if ad_id not in sum_millis:
                sum_millis[ad_id] = [0, 0]
            sum_millis[ad_id][0] += millis
            sum_millis[ad_id][1] += 1

        results = select(imps.ad_id, h_sum(imps.cpm_millis), h_count(),
                         where=imps.date == '2014-01-27',
                         order_by=2,
                         limit=3,
                         nest=True)
        self.assertGreater(len(list(results)), 0)
        lowest = 0
        for ad_id, millis, count in results:
            self.assertLessEqual(lowest, count)
            lowest = count
            ad_tup = sum_millis[ad_id]
            self.assertEqual(millis, ad_tup[0])
            self.assertEqual(count, ad_tup[1])
        self.assertEqual(len(list(results)), min(len(sum_millis), 3))
コード例 #2
1
ファイル: test_join.py プロジェクト: aprial/hustle
    def test_aggregate_join(self):
        imps = Table.from_tag(IMPS)
        pix = Table.from_tag(PIXELS)

        imp_sites = list(select(imps.site_id, imps.ad_id,
                                where=imps.date < '2014-01-13'))
        pix_sites = list(select(pix.site_id, pix.amount,
                                where=pix.date < '2014-01-13'))

        join = {}
        for imp_site, imp_ad_id in imp_sites:
            for pix_site, pix_amount in pix_sites:
                if imp_site == pix_site:
                    if imp_ad_id not in join:
                        join[imp_ad_id] = [0, 0]
                    join[imp_ad_id][0] += pix_amount
                    join[imp_ad_id][1] += 1

        res = select(imps.ad_id, h_sum(pix.amount), h_count(),
                     where=(imps.date < '2014-01-13', pix.date < '2014-01-13'),
                     join=(imps.site_id, pix.site_id))
        results = list(res)
        self.assertEqual(len(results), len(join))

        for (ad_id, amount, count) in results:
            ramount, rcount = join[ad_id]
            self.assertEqual(ramount, amount)
            self.assertEqual(rcount, count)
コード例 #3
0
ファイル: test_join.py プロジェクト: tonicbupt/hustle
    def test_aggregate_join(self):
        imps = Table.from_tag(IMPS)
        pix = Table.from_tag(PIXELS)

        imp_sites = [(s, a) for (s, a), _ in result_iterator(
            select(imps.site_id, imps.ad_id, where=imps.date < '2014-01-13'))]
        pix_sites = [(s, a) for (s, a), _ in result_iterator(
            select(pix.site_id, pix.amount, where=pix.date < '2014-01-13'))]

        join = {}
        for imp_site, imp_ad_id in imp_sites:
            for pix_site, pix_amount in pix_sites:
                if imp_site == pix_site:
                    if imp_ad_id not in join:
                        join[imp_ad_id] = [0, 0]
                    join[imp_ad_id][0] += pix_amount
                    join[imp_ad_id][1] += 1

        res = select(imps.ad_id,
                     h_sum(pix.amount),
                     h_count(),
                     where=(imps.date < '2014-01-13', pix.date < '2014-01-13'),
                     join=(imps.site_id, pix.site_id))
        results = [(ad_id, amount, count)
                   for (ad_id, amount, count), _ in result_iterator(res)]
        self.assertTrue(len(results), len(join))

        for (ad_id, amount, count) in results:
            ramount, rcount = join[ad_id]
            self.assertEqual(ramount, amount)
            self.assertEqual(rcount, count)
コード例 #4
0
ファイル: test_join.py プロジェクト: paulgb/hustle
    def test_nested_join(self):
        imps = Table.from_tag(IMPS)
        pix = Table.from_tag(PIXELS)

        imp_sites = list(
            select(imps.site_id, imps.ad_id, where=imps.date < '2014-01-13'))
        pix_sites = list(
            select(pix.site_id,
                   pix.amount,
                   where=((pix.date < '2014-01-13') & (pix.isActive == True))))

        join = []
        for imp_site, imp_ad_id in imp_sites:
            for pix_site, pix_amount in pix_sites:
                if imp_site == pix_site:
                    join.append((imp_ad_id, pix_amount))

        sub_pix = select(pix.site_id,
                         pix.amount,
                         pix.date,
                         where=((pix.date < '2014-01-15') &
                                (pix.isActive == True)),
                         nest=True)

        res = select(imps.ad_id,
                     sub_pix.amount,
                     where=(imps.date < '2014-01-13',
                            sub_pix.date < '2014-01-13'),
                     join=(imps.site_id, sub_pix.site_id))
        results = [tuple(c) for c in res]
        self.assertEqual(len(results), len(join))

        for jtup in join:
            self.assertIn(jtup, results)
コード例 #5
0
ファイル: test_join.py プロジェクト: tonicbupt/hustle
    def test_simple_join(self):
        imps = Table.from_tag(IMPS)
        pix = Table.from_tag(PIXELS)

        imp_sites = [(s, a) for (s, a), _ in result_iterator(
            select(imps.site_id, imps.ad_id, where=imps.date < '2014-01-13'))]
        pix_sites = [(s, a) for (s, a), _ in result_iterator(
            select(pix.site_id, pix.amount, where=pix.date < '2014-01-13'))]

        join = []
        for imp_site, imp_ad_id in imp_sites:
            for pix_site, pix_amount in pix_sites:
                if imp_site == pix_site:
                    join.append((imp_ad_id, pix_amount))

        res = select(imps.ad_id,
                     pix.amount,
                     where=(imps.date < '2014-01-13', pix.date < '2014-01-13'),
                     join=(imps.site_id, pix.site_id),
                     order_by='amount')
        results = [(ad_id, amount)
                   for (ad_id, amount), _ in result_iterator(res)]
        self.assertTrue(len(results), len(join))

        for jtup in join:
            self.assertIn(jtup, results)

        lowest = 0
        for ad_id, amount in results:
            self.assertLessEqual(lowest, amount)
            lowest = amount
コード例 #6
0
ファイル: test_join.py プロジェクト: pooya/hustle
    def test_nested_join(self):
        imps = Table.from_tag(IMPS)
        pix  = Table.from_tag(PIXELS)

        imp_sites = [(s, a) for (s, a), _ in result_iterator(select(imps.site_id, imps.ad_id,
                                                                    where=imps.date < '2014-01-13'))]
        pix_sites = [(s, a) for (s, a), _ in result_iterator(select(pix.site_id, pix.amount,
                                                                    where=((pix.date < '2014-01-13') &
                                                                           (pix.isActive > 0))))]

        join = []
        for imp_site, imp_ad_id in imp_sites:
            for pix_site, pix_amount in pix_sites:
                if imp_site == pix_site:
                    join.append((imp_ad_id, pix_amount))

        sub_pix = select(pix.site_id, pix.amount, pix.date,
                         where=((pix.date < '2014-01-15') & (pix.isActive > 0)),
                         nest=True)

        res = select(imps.ad_id, sub_pix.amount,
                     where=(imps.date < '2014-01-13', sub_pix.date < '2014-01-13'),
                     join=(imps.site_id, sub_pix.site_id))
        results = [(ad_id, amount) for (ad_id, amount), _ in result_iterator(res)]
        self.assertTrue(len(results), len(join))

        for jtup in join:
            self.assertIn(jtup, results)
コード例 #7
0
ファイル: test_join.py プロジェクト: aprial/hustle
    def test_nested_join(self):
        imps = Table.from_tag(IMPS)
        pix = Table.from_tag(PIXELS)

        imp_sites = list(select(imps.site_id, imps.ad_id,
                                where=imps.date < '2014-01-13'))
        pix_sites = list(select(pix.site_id, pix.amount,
                                where=((pix.date < '2014-01-13') &
                                       (pix.isActive == True))))

        join = []
        for imp_site, imp_ad_id in imp_sites:
            for pix_site, pix_amount in pix_sites:
                if imp_site == pix_site:
                    join.append((imp_ad_id, pix_amount))

        sub_pix = select(pix.site_id, pix.amount, pix.date,
                         where=((pix.date < '2014-01-15') & (pix.isActive == True)),
                         nest=True)

        res = select(imps.ad_id, sub_pix.amount,
                     where=(imps.date < '2014-01-13', sub_pix.date < '2014-01-13'),
                     join=(imps.site_id, sub_pix.site_id))
        results = [tuple(c) for c in res]
        self.assertEqual(len(results), len(join))

        for jtup in join:
            self.assertIn(jtup, results)
コード例 #8
0
def ensure_tables():
    overrides['server'] = 'disco://localhost'
    overrides['dump'] = False
    overrides['nest'] = False
    settings = Settings()
    ddfs = settings['ddfs']

    imps = Table.create(IMPS,
                        fields=[
                            '=$token', '%url', '+%site_id', '@cpm_millis',
                            '+#ad_id', '+$date', '+@time'
                        ],
                        partition='date',
                        force=True)
    pixels = Table.create(PIXELS,
                          fields=[
                              '=$token', '+@1isActive', '+%site_id', '@amount',
                              '+#account_id', '+%city', '+%2state', '+#2metro',
                              '$ip', '*keyword', '+$date'
                          ],
                          partition='date',
                          force=True)

    tags = ddfs.list("hustle:%s:" % IMPS)
    if len(tags) == 0:
        # insert the files
        insert(imps, phile='fixtures/imps.json', preprocess=imp_process)

    tags = ddfs.list("hustle:%s:" % PIXELS)
    if len(tags) == 0:
        # insert the files
        insert(pixels, phile='fixtures/pixel.json')
コード例 #9
0
ファイル: test_join.py プロジェクト: tonicbupt/hustle
    def test_nested_join(self):
        imps = Table.from_tag(IMPS)
        pix = Table.from_tag(PIXELS)

        imp_sites = [(s, a) for (s, a), _ in result_iterator(
            select(imps.site_id, imps.ad_id, where=imps.date < '2014-01-13'))]
        pix_sites = [(s, a) for (s, a), _ in result_iterator(
            select(pix.site_id,
                   pix.amount,
                   where=((pix.date < '2014-01-13') & (pix.isActive > 0))))]

        join = []
        for imp_site, imp_ad_id in imp_sites:
            for pix_site, pix_amount in pix_sites:
                if imp_site == pix_site:
                    join.append((imp_ad_id, pix_amount))

        sub_pix = select(pix.site_id,
                         pix.amount,
                         pix.date,
                         where=((pix.date < '2014-01-15') &
                                (pix.isActive > 0)),
                         nest=True)

        res = select(imps.ad_id,
                     sub_pix.amount,
                     where=(imps.date < '2014-01-13',
                            sub_pix.date < '2014-01-13'),
                     join=(imps.site_id, sub_pix.site_id))
        results = [(ad_id, amount)
                   for (ad_id, amount), _ in result_iterator(res)]
        self.assertTrue(len(results), len(join))

        for jtup in join:
            self.assertIn(jtup, results)
コード例 #10
0
ファイル: test_join.py プロジェクト: aprial/hustle
    def test_simple_join(self):
        imps = Table.from_tag(IMPS)
        pix = Table.from_tag(PIXELS)

        imp_sites = [(s, a) for (s, a) in select(imps.site_id, imps.ad_id,
                                                 where=imps.date < '2014-01-13')]
        pix_sites = [(s, a) for (s, a) in select(pix.site_id, pix.amount,
                                                 where=pix.date < '2014-01-13')]

        join = []
        for imp_site, imp_ad_id in imp_sites:
            for pix_site, pix_amount in pix_sites:
                if imp_site == pix_site:
                    join.append((imp_ad_id, pix_amount))

        res = select(imps.ad_id, pix.amount,
                     where=(imps.date < '2014-01-13', pix.date < '2014-01-13'),
                     join=(imps.site_id, pix.site_id),
                     order_by='amount')
        results = list(res)
        self.assertEqual(len(results), len(join))

        for jtup in join:
            self.assertIn(jtup, results)

        lowest = 0
        for ad_id, amount in results:
            self.assertLessEqual(lowest, amount)
            lowest = amount
コード例 #11
0
ファイル: setup.py プロジェクト: aprial/hustle
def ensure_tables():
    overrides['server'] = 'disco://localhost'
    overrides['dump'] = False
    overrides['nest'] = False
    settings = Settings()
    ddfs = settings['ddfs']

    imps = Table.create(IMPS,
                        columns=['wide index string token', 'trie url', 'index trie site_id', 'uint cpm_millis',
                                 'index int ad_id', 'index string date', 'index uint time', 'bit click',
                                 'index bit impression', 'bit conversion'],
                        partition='date',
                        force=True)
    pixels = Table.create(PIXELS,
                          columns=['wide index string token', 'index bit isActive', 'index trie site_id',
                                   'uint amount', 'index int account_id', 'index trie city', 'index trie16 state',
                                   'index int16 metro', 'string ip', 'lz4 keyword', 'index string date'],
                          partition='date',
                          force=True)

    tags = ddfs.list("hustle:%s:" % IMPS)
    if len(tags) == 0:
        # insert the files
        insert(imps, phile='fixtures/imps.json', preprocess=imp_process)

    tags = ddfs.list("hustle:%s:" % PIXELS)
    if len(tags) == 0:
        # insert the files
        insert(pixels, phile='fixtures/pixel.json')
コード例 #12
0
def ensure_tables():
    overrides['server'] = 'disco://localhost'
    overrides['dump'] = False
    overrides['nest'] = False
    settings = Settings()
    ddfs = settings['ddfs']

    imps = Table.create(IMPS,
                        columns=[
                            'wide index string token', 'trie url',
                            'index trie site_id', 'uint cpm_millis',
                            'index int ad_id', 'index string date',
                            'index uint time', 'bit click',
                            'index bit impression', 'bit conversion'
                        ],
                        partition='date',
                        force=True)
    pixels = Table.create(PIXELS,
                          columns=[
                              'wide index string token', 'index bit isActive',
                              'index trie site_id', 'uint amount',
                              'index int account_id', 'index trie city',
                              'index trie16 state', 'index int16 metro',
                              'string ip', 'lz4 keyword', 'index string date'
                          ],
                          partition='date',
                          force=True)
    pixel_hlls = Table.create(PIXELS_HLL,
                              columns=[
                                  'index bit isActive', 'index trie site_id',
                                  'index int account_id', 'index trie city',
                                  'index trie16 state', 'index string date',
                                  'binary hll'
                              ],
                              partition='date',
                              force=True)
    ips = Table.create(IPS,
                       columns=['index trie16 exchange_id', 'index uint32 ip'],
                       force=True)

    tags = ddfs.list("hustle:%s:" % IMPS)
    if len(tags) == 0:
        # insert the files
        insert(imps, File='fixtures/imps.json', preprocess=imp_process)

    tags = ddfs.list("hustle:%s:" % PIXELS)
    if len(tags) == 0:
        # insert the files
        insert(pixels, File='fixtures/pixel.json')

    tags = ddfs.list("hustle:%s:" % IPS)
    if len(tags) == 0:
        # insert the files
        insert(ips, File='fixtures/ip.json')

    tags = ddfs.list("hustle:%s:" % PIXELS_HLL)
    if len(tags) == 0:
        # insert the files
        insert_hll(pixel_hlls, file='./fixtures/pixel.json', hll_field='token')
コード例 #13
0
ファイル: test_table.py プロジェクト: UIKit0/hustle
    def test_create_syntax(self):
        full_columns = ['wide index uint32 x', 'index string y', 'int16 z', 'lz4 a', 'trie32 b', 'binary c']
        full_fields = ['=@4x', '+$y', '#2z', '*a', '%4b', '&c']
        fields = Table.parse_column_specs(full_columns)
        self.assertListEqual(fields, full_fields)

        default_columns = ['wide index x', 'index int y', 'uint z', 'trie b', 'c']
        default_fields = ['=x', '+#y', '@z', '%b', 'c']
        fields = Table.parse_column_specs(default_columns)
        self.assertListEqual(fields, default_fields)
コード例 #14
0
ファイル: test_table.py プロジェクト: zhengyuv/hustle
    def test_create_syntax(self):
        full_columns = [
            'wide index uint32 x', 'index string y', 'int16 z', 'lz4 a',
            'trie32 b', 'binary c'
        ]
        full_fields = ['=@4x', '+$y', '#2z', '*a', '%4b', '&c']
        fields = Table.parse_column_specs(full_columns)
        self.assertListEqual(fields, full_fields)

        default_columns = [
            'wide index x', 'index int y', 'uint z', 'trie b', 'c'
        ]
        default_fields = ['=x', '+#y', '@z', '%b', 'c']
        fields = Table.parse_column_specs(default_columns)
        self.assertListEqual(fields, default_fields)
コード例 #15
0
 def test_combo_where_on_or_partition(self):
     imps = Table.from_tag(IMPS)
     res = select(imps.ad_id, imps.date, imps.cpm_millis,
                  where=((imps.date == '2014-01-21') | (imps.date == '2014-01-25') | (imps.ad_id == 30010)))
     results = [c for c, _ in result_iterator(res)]
     self.assertEqual(len(results), 27)
     self.assertTrue(all(d == '2014-01-21' or d == '2014-01-25' or a == 30010 for a, d, _ in results))
コード例 #16
0
ファイル: test_aggregation.py プロジェクト: paulgb/hustle
    def test_nested_agg(self):
        imps = Table.from_tag(IMPS)
        results = select(imps.ad_id,
                         imps.date,
                         imps.cpm_millis,
                         where=imps.date > '2014-01-22')

        sum_millis = {}
        for ad_id, dt, millis in results:
            key = str(ad_id) + dt
            if key not in sum_millis:
                sum_millis[key] = [0, 0]
            sum_millis[key][0] += millis
            sum_millis[key][1] += 1

        newtab = select(imps.ad_id,
                        imps.date,
                        h_sum(imps.cpm_millis),
                        h_count(),
                        where=imps.date > '2014-01-22',
                        nest=True)
        results = select(*star(newtab), where=newtab)
        self.assertGreater(len(list(results)), 0)
        for ad_id, dt, millis, count in results:
            ad_tup = sum_millis[str(ad_id) + dt]
            self.assertEqual(millis, ad_tup[0])
            self.assertEqual(count, ad_tup[1])
コード例 #17
0
ファイル: test_simple_query.py プロジェクト: aprial/hustle
 def test_combo_where_on_and_partition(self):
     imps = Table.from_tag(IMPS)
     res = select(imps.ad_id, imps.date, imps.cpm_millis,
                  where=((imps.date >= '2014-01-21') & (imps.date <= '2014-01-23') & (imps.ad_id == 30010)))
     results = list(res)
     self.assertEqual(len(results), 2)
     self.assertTrue(all(d in ('2014-01-21', '2014-01-22', '2014-01-23') and a == 30010 for a, d, _ in results))
コード例 #18
0
 def test_overall(self):
     imps = Table.from_tag(IMPS)
     res = select(imps.ad_id, imps.date, where=imps.date == '2014-01-27', distinct=True, limit=4,
                  order_by='ad_id', desc=True)
     results = [a for (a, d), _ in result_iterator(res)]
     self.assertEqual(len(results), 4)
     self.assertListEqual(results, [30019, 30018, 30017, 30015])
コード例 #19
0
ファイル: test_simple_query.py プロジェクト: UIKit0/hustle
 def test_range_on_partition(self):
     imps = Table.from_tag(IMPS)
     res = select(imps.ad_id, imps.date, imps.cpm_millis, where=imps.date > '2014-01-27')
     results = list(res)
     self.assertEqual(len(results), 20)
     self.assertTrue(all(d in ('2014-01-28', '2014-01-29') for _, d, _ in results))
     res.purge()
コード例 #20
0
 def test_combo_where_on_mixed_partition(self):
     imps = Table.from_tag(IMPS)
     res = select(imps.ad_id, imps.date, imps.cpm_millis,
                  where=(((imps.date >= '2014-01-21') & (imps.date <= '2014-01-23') & (imps.time > 170000))))
     results = [c for c, _ in result_iterator(res)]
     self.assertEqual(len(results), 2)
     self.assertTrue(all((d in ('2014-01-21', '2014-01-22', '2014-01-23') and a == 30003) for a, d, c in results))
コード例 #21
0
ファイル: test_simple_query.py プロジェクト: aprial/hustle
 def test_combo_where_on_or_partition_ex1(self):
     imps = Table.from_tag(IMPS)
     res = select(imps.ad_id, imps.date, imps.cpm_millis,
                  where=((imps.date << ['2014-01-21', '2014-01-25']) | (imps.ad_id << [30003, 30010])))
     results = list(res)
     self.assertEqual(len(results), 40)
     self.assertTrue(all(d == '2014-01-21' or d == '2014-01-25' or a == 30010 or a == 30003 for a, d, _ in results))
コード例 #22
0
ファイル: inserttest.py プロジェクト: txiner/db-xiner
def ensure_tables():
    overrides['server'] = 'disco://localhost'
    overrides['dump'] = False
    overrides['nest'] = False
    settings = Settings()
    ddfs = settings['ddfs']

    pcappacket= Table.create(mmm,columns=['index int32 GMTtime','index int32 MicroTime','index int32 caplen','index int32 len','index string ethtype','index string protocol','index uint8 ttl','index uint8 sip0','index uint8 sip1','index uint8 sip2','index uint8 sip3','index uint8 dip0','index uint8 dip1','index uint8 dip2','index uint8 dip3','index uint16 sport','index uint16 dport','lz4 data'],partition=None,force=True)
    '''
    imps = Table.create(IMPS,
                        columns=['wide index string token', 'trie url', 'index trie site_id', 'uint cpm_millis',
                                 'index int ad_id', 'index string date', 'index uint time'],
                        partition='date',
                        force=True)
    pixels = Table.create(PIXELS,
                          columns=['wide index string token', 'index uint8 isActive', 'index trie site_id',
                                   'uint amount', 'index int account_id', 'index trie city', 'index trie16 state',
                                   'index int16 metro', 'string ip', 'lz4 keyword', 'index string date'],
                          partition='date',
                          force=True)
    '''
    from ls import *
    ls('/home/xiner/hustle/pcapread/testall.pcap')
    tags = ddfs.list("hustle:%s:" % mmm)
    print len(tags)
    if len(tags) == 0:
        # insert the files
        insert(pcappacket, phile='result.json' )

    '''tags = ddfs.list("hustle:%s:" % PIXELS)
コード例 #23
0
ファイル: test_simple_query.py プロジェクト: dinedal/hustle
 def test_combo_where_on_mixed_partition(self):
     imps = Table.from_tag(IMPS)
     res = select(imps.ad_id, imps.date, imps.cpm_millis,
                  where=(((imps.date >= '2014-01-21') & (imps.date <= '2014-01-23') & (imps.time > 170000))))
     results = [c for c, _ in result_iterator(res)]
     self.assertEqual(len(results), 2)
     self.assertTrue(all((d in ('2014-01-21', '2014-01-22', '2014-01-23') and a == 30003) for a, d, c in results))
コード例 #24
0
ファイル: test_cardinality.py プロジェクト: cih-y2k/hustle
    def test_cardinality_with_order_by(self):
        hll = Table.from_tag(PIXELS_HLL)
        tokens_by_date = defaultdict(set)
        with open("./fixtures/pixel.json") as f:
            for line in f:
                record = ujson.loads(line)
                tokens_by_date[record["date"]].add(record["token"])
        result = [(date, len(tokens))
                  for date, tokens in tokens_by_date.items()]

        # Test order by date
        expects = sorted(result, key=itemgetter(0), reverse=True)
        res = select(hll.date,
                     h_hll(hll.hll),
                     where=hll,
                     order_by=0,
                     desc=True)
        estimates = list(res)
        for i, (date, expected_cardinality) in enumerate(expects):
            self.assertEqual(estimates[i][0], date)
            self.checkEstimate(estimates[i][1], expected_cardinality)
        res.purge()

        # Test order by hll
        res = select(hll.date,
                     h_hll(hll.hll),
                     where=hll,
                     order_by=1,
                     desc=True)
        l = list(res)
        for i in range(len(l) - 1):
            self.assertTrue(l[i][1] >= l[i + 1][1])
        res.purge()
コード例 #25
0
ファイル: test_simple_query.py プロジェクト: UIKit0/hustle
 def test_combo_where_no_partition(self):
     imps = Table.from_tag(IMPS)
     res = select(imps.ad_id, imps.date, imps.cpm_millis, where=(imps.time >= 180000))
     results = list(res)
     print results
     self.assertEqual(len(results), 5)
     res.purge()
コード例 #26
0
ファイル: test_aggregation.py プロジェクト: tonicbupt/hustle
    def test_ordered_aggregation(self):
        imps = Table.from_tag(IMPS)
        res = select(imps.ad_id, imps.cpm_millis, where=imps.date == '2014-01-27')
        resx = [c for c, _ in result_iterator(res)]

        sum_millis = {}
        for ad_id, millis in resx:
            if ad_id not in sum_millis:
                sum_millis[ad_id] = [0, 0]
            sum_millis[ad_id][0] += millis
            sum_millis[ad_id][1] += 1

        res = select(imps.ad_id, h_sum(imps.cpm_millis), h_count(),
                     where=imps.date == '2014-01-27',
                     order_by=2,
                     limit=3)
        results = [c for c, _ in result_iterator(res)]
        lowest = 0
        for ad_id, millis, count in results:
            self.assertLessEqual(lowest, count)
            lowest = count
            ad_tup = sum_millis[ad_id]
            self.assertEqual(millis, ad_tup[0])
            self.assertEqual(count, ad_tup[1])
        self.assertTrue(len(results) == min(len(sum_millis), 3))
コード例 #27
0
 def test_single_int_order(self):
     imps = Table.from_tag(IMPS)
     res = select(imps.ad_id, imps.date, imps.cpm_millis, where=imps.date == '2014-01-27', order_by=imps.cpm_millis)
     lowest = 0
     for (a, d, c), _ in result_iterator(res):
         self.assertLessEqual(lowest, c)
         lowest = c
コード例 #28
0
ファイル: test_column_fn.py プロジェクト: UIKit0/hustle
 def test_column_fn(self):
     ips = Table.from_tag(IPS)
     res = select(ips.exchange_id, ip_ntoa(ips.ip),
                  where=ips.exchange_id == "Adx")
     results = list(res)
     self.assertEqual(len(results), 29)
     res.purge()
コード例 #29
0
ファイル: test_aggregation.py プロジェクト: zhengyuv/hustle
    def test_ordered_aggregation(self):
        imps = Table.from_tag(IMPS)
        resx = select(imps.ad_id,
                      imps.cpm_millis,
                      where=imps.date == '2014-01-27')

        sum_millis = {}
        for ad_id, millis in resx:
            if ad_id not in sum_millis:
                sum_millis[ad_id] = [0, 0]
            sum_millis[ad_id][0] += millis
            sum_millis[ad_id][1] += 1

        results = select(imps.ad_id,
                         h_sum(imps.cpm_millis),
                         h_count(),
                         where=imps.date == '2014-01-27',
                         order_by=2,
                         limit=3,
                         nest=True)
        self.assertGreater(len(list(results)), 0)
        lowest = 0
        for ad_id, millis, count in results:
            self.assertLessEqual(lowest, count)
            lowest = count
            ad_tup = sum_millis[ad_id]
            self.assertEqual(millis, ad_tup[0])
            self.assertEqual(count, ad_tup[1])
        self.assertEqual(len(list(results)), min(len(sum_millis), 3))

        resx.purge()
コード例 #30
0
ファイル: test_simple_query.py プロジェクト: dinedal/hustle
 def test_combo_where_on_or_partition(self):
     imps = Table.from_tag(IMPS)
     res = select(imps.ad_id, imps.date, imps.cpm_millis,
                  where=((imps.date == '2014-01-21') | (imps.date == '2014-01-25') | (imps.ad_id == 30010)))
     results = [c for c, _ in result_iterator(res)]
     self.assertEqual(len(results), 27)
     self.assertTrue(all(d == '2014-01-21' or d == '2014-01-25' or a == 30010 for a, d, _ in results))
コード例 #31
0
ファイル: test_simple_query.py プロジェクト: dinedal/hustle
 def test_combo_where_on_or_partition_ex2(self):
     imps = Table.from_tag(IMPS)
     res = select(imps.ad_id, imps.date, imps.cpm_millis,
                  where=((imps.date << ['2014-01-21', '2014-01-25']) & (imps.ad_id << [30003, 30010])))
     results = [c for c, _ in result_iterator(res)]
     self.assertEqual(len(results), 1)
     self.assertTrue(all(d == '2014-01-21' and a == 30010 for a, d, _ in results))
コード例 #32
0
    def test_column_fn_with_agg(self):
        ips = Table.from_tag(IPS)
        res = select(ips.exchange_id,
                     h_max(ip_ntoa(ips.ip)),
                     where=ips,
                     order_by=(ips.exchange_id, ))
        results = list(res)
        res.purge()
        exchanges = [ex for ex, _ in results]
        ipss = [ip for _, ip in results]
        self.assertListEqual(['Adx', 'Appnexus', 'OpenX', 'Rubycon'],
                             exchanges)
        self.assertListEqual(['192.168.1.1'] * 4, ipss)

        res = select(ips.exchange_id,
                     h_min(ip_ntoa(ips.ip)),
                     where=ips,
                     order_by=(ips.exchange_id, ))
        results = list(res)
        res.purge()
        exchanges = [ex for ex, _ in results]
        ipss = [ip for _, ip in results]
        self.assertListEqual(['Adx', 'Appnexus', 'OpenX', 'Rubycon'],
                             exchanges)
        self.assertListEqual(['127.0.0.1'] * 4, ipss)
コード例 #33
0
    def test_bit_values(self):
        pix = Table.from_tag(PIXELS)
        res = select(pix.isActive, where=pix.isActive == 1)
        actives = 0
        for (act, ) in res:
            actives += act

        self.assertEqual(actives, 234)
コード例 #34
0
 def test_column_fn(self):
     ips = Table.from_tag(IPS)
     res = select(ips.exchange_id,
                  ip_ntoa(ips.ip),
                  where=ips.exchange_id == "Adx")
     results = list(res)
     self.assertEqual(len(results), 29)
     res.purge()
コード例 #35
0
ファイル: test_project_order.py プロジェクト: paulgb/hustle
 def test_distinct(self):
     imps = Table.from_tag(IMPS)
     res = select(imps.ad_id,
                  imps.date,
                  where=imps.date == '2014-01-27',
                  distinct=True)
     results = list(res)
     self.assertEqual(len(results), 8)
コード例 #36
0
ファイル: test_simple_query.py プロジェクト: dinedal/hustle
 def test_equality_on_partition(self):
     imps = Table.from_tag(IMPS)
     res = select(imps.ad_id, imps.date, imps.cpm_millis, where=imps.date == '2014-01-27')
     results = [c for c, _ in result_iterator(res)]
     self.assertEqual(len(results), 10)
     found = next((a, d, c) for a, d, c in results if a == 30018 and d == '2014-01-27' and c == 4506)
     self.assertIsNotNone(found)
     self.assertTrue(all(d == '2014-01-27' for _, d, _ in results))
コード例 #37
0
 def test_equality_on_partition(self):
     imps = Table.from_tag(IMPS)
     res = select(imps.ad_id, imps.date, imps.cpm_millis, where=imps.date == '2014-01-27')
     results = [c for c, _ in result_iterator(res)]
     self.assertEqual(len(results), 10)
     found = next((a, d, c) for a, d, c in results if a == 30018 and d == '2014-01-27' and c == 4506)
     self.assertIsNotNone(found)
     self.assertTrue(all(d == '2014-01-27' for _, d, _ in results))
コード例 #38
0
    def test_aggregate(self):
        imps = Table.from_tag(IMPS)
        res = select(h_sum(imps.click), h_sum(imps.conversion), h_sum(imps.impression), where=imps)

        (clicks, conversions, impressions) = list(res)[0]

        self.assertEqual(clicks, 21)
        self.assertEqual(conversions, 5)
        self.assertEqual(impressions, 174)
コード例 #39
0
ファイル: test_project_order.py プロジェクト: paulgb/hustle
 def test_low_limit(self):
     imps = Table.from_tag(IMPS)
     res = select(imps.ad_id,
                  imps.date,
                  imps.cpm_millis,
                  where=imps.date == '2014-01-27',
                  limit=4)
     results = list(res)
     self.assertEqual(len(results), 4)
コード例 #40
0
ファイル: test_simple_query.py プロジェクト: paulgb/hustle
 def test_combo_where_no_partition(self):
     imps = Table.from_tag(IMPS)
     res = select(imps.ad_id,
                  imps.date,
                  imps.cpm_millis,
                  where=(imps.time >= 180000))
     results = list(res)
     print results
     self.assertEqual(len(results), 5)
コード例 #41
0
ファイル: test_column_fn.py プロジェクト: UIKit0/hustle
 def test_column_fn_with_distinct(self):
     ips = Table.from_tag(IPS)
     res = select(ip_ntoa(ips.ip),
                  where=ips.exchange_id == "Adx", order_by=(ip_ntoa(ips.ip),),
                  distinct=True)
     results = list(res)
     res.purge()
     ipss = [ip[0] for ip in results]
     self.assertListEqual(['127.0.0.1', '192.1.1.1', '192.1.1.2', '192.168.1.1'],
                          ipss)
コード例 #42
0
ファイル: pipeline.py プロジェクト: tonicbupt/hustle
 def get_result_schema(self, project):
     from hustle import Table
     fields = []
     for col in project:
         col = col.column
         if col.name not in fields:
             fields.append(col.schema_string())
     # print "GEEWHIZ: %s %s %s" % (indicies, fields, blobs)
     name = '-'.join([w._name for w in self.wheres])[:64]
     return Table(name="sub-%s" % name, fields=fields)
コード例 #43
0
ファイル: test_simple_query.py プロジェクト: paulgb/hustle
 def test_range_on_partition(self):
     imps = Table.from_tag(IMPS)
     res = select(imps.ad_id,
                  imps.date,
                  imps.cpm_millis,
                  where=imps.date > '2014-01-27')
     results = list(res)
     self.assertEqual(len(results), 20)
     self.assertTrue(
         all(d in ('2014-01-28', '2014-01-29') for _, d, _ in results))
コード例 #44
0
ファイル: test_cardinality.py プロジェクト: tspurway/hustle
 def test_cardinality_all(self):
     hll = Table.from_tag(PIXELS_HLL)
     res = select(h_hll(hll.hll), where=hll)
     estimate = next(iter(res))[0]
     tokens = set([])
     with open("./fixtures/pixel.json") as f:
         for line in f:
             record = ujson.loads(line)
             tokens.add(record["token"])
     self.checkEstimate(estimate, len(tokens))
     res.purge()
コード例 #45
0
ファイル: test_simple_query.py プロジェクト: paulgb/hustle
 def test_combo_where_on_partition(self):
     imps = Table.from_tag(IMPS)
     res = select(imps.ad_id,
                  imps.date,
                  imps.cpm_millis,
                  where=((imps.date >= '2014-01-20') &
                         (imps.ad_id == 30010)))
     results = list(res)
     self.assertEqual(len(results), 6)
     self.assertTrue(
         all(d >= '2014-01-20' and a == 30010 for a, d, _ in results))
コード例 #46
0
ファイル: test_simple_query.py プロジェクト: paulgb/hustle
 def test_combo_where_on_or_partition_ex2(self):
     imps = Table.from_tag(IMPS)
     res = select(imps.ad_id,
                  imps.date,
                  imps.cpm_millis,
                  where=((imps.date << ['2014-01-21', '2014-01-25']) &
                         (imps.ad_id << [30003, 30010])))
     results = list(res)
     self.assertEqual(len(results), 1)
     self.assertTrue(
         all(d == '2014-01-21' and a == 30010 for a, d, _ in results))
コード例 #47
0
ファイル: test_cardinality.py プロジェクト: cih-y2k/hustle
 def test_cardinality_all(self):
     hll = Table.from_tag(PIXELS_HLL)
     res = select(h_hll(hll.hll), where=hll)
     estimate = next(iter(res))[0]
     tokens = set([])
     with open("./fixtures/pixel.json") as f:
         for line in f:
             record = ujson.loads(line)
             tokens.add(record["token"])
     self.checkEstimate(estimate, len(tokens))
     res.purge()
コード例 #48
0
 def test_column_fn_with_distinct(self):
     ips = Table.from_tag(IPS)
     res = select(ip_ntoa(ips.ip),
                  where=ips.exchange_id == "Adx",
                  order_by=(ip_ntoa(ips.ip), ),
                  distinct=True)
     results = list(res)
     res.purge()
     ipss = [ip[0] for ip in results]
     self.assertListEqual(
         ['127.0.0.1', '192.1.1.1', '192.1.1.2', '192.168.1.1'], ipss)
コード例 #49
0
ファイル: setup.py プロジェクト: pooya/hustle
def ensure_tables():
    overrides["server"] = "disco://localhost"
    overrides["dump"] = False
    overrides["nest"] = False
    settings = Settings()
    ddfs = settings["ddfs"]

    imps = Table.create(
        IMPS,
        fields=["=$token", "%url", "+%site_id", "@cpm_millis", "+#ad_id", "+$date", "+@time"],
        partition="date",
        force=True,
    )
    pixels = Table.create(
        PIXELS,
        fields=[
            "=$token",
            "+@1isActive",
            "+%site_id",
            "@amount",
            "+#account_id",
            "+%city",
            "+%2state",
            "+#2metro",
            "$ip",
            "*keyword",
            "+$date",
        ],
        partition="date",
        force=True,
    )

    tags = ddfs.list("hustle:%s:" % IMPS)
    if len(tags) == 0:
        # insert the files
        insert(imps, phile="fixtures/imps.json", preprocess=imp_process)

    tags = ddfs.list("hustle:%s:" % PIXELS)
    if len(tags) == 0:
        # insert the files
        insert(pixels, phile="fixtures/pixel.json")
コード例 #50
0
ファイル: test_bool.py プロジェクト: aprial/hustle
    def test_project(self):
        imps = Table.from_tag(IMPS)
        res = select(imps.click, imps.conversion, imps.impression, where=imps)
        clicks = conversions = impressions = 0
        for (click, conv, imp) in res:
            clicks += click
            conversions += conv
            impressions += imp

        self.assertEqual(clicks, 21)
        self.assertEqual(conversions, 5)
        self.assertEqual(impressions, 174)
コード例 #51
0
ファイル: test_aggregation.py プロジェクト: aprial/hustle
    def test_overflow(self):
        from itertools import izip

        imps = Table.from_tag(IMPS)
        fly_results = select(imps.date, h_sum(imps.impression), where=imps, order_by=imps.date)

        nest_tab = select(imps.date, h_sum(imps.impression), where=imps, nest=True)
        nest_results = select(*star(nest_tab), where=nest_tab, order_by=0)

        for ((fdate, fimps), (ndate, nimps)) in izip(fly_results, nest_results):
            self.assertEqual(fdate, ndate)
            self.assertEqual(fimps, nimps)
コード例 #52
0
ファイル: test_drop.py プロジェクト: UIKit0/hustle
 def test_delete_partial(self):
     delete(self.table.date >= '2014-01-13')
     self.assertEqual(['hustle:__test_drop_imps:2014-01-10',
                       'hustle:__test_drop_imps:2014-01-11',
                       'hustle:__test_drop_imps:2014-01-12'],
                      get_partitions(self.table))
     tags = self.ddfs.list(Table.base_tag(self.table._name))
     self.assertEqual(len(tags), 4)
     self.assertIn("hustle:__test_drop_imps", tags)
     drop(self.table)
     with self.assertRaises(ValueError):
         delete(self.table.site_id == 'foobar')
         delete(self.tale.url)
コード例 #53
0
ファイル: test_project_order.py プロジェクト: dinedal/hustle
 def test_combo_order(self):
     imps = Table.from_tag(IMPS)
     res = select(imps.ad_id, imps.date, imps.cpm_millis,
                  where=imps.date > '2014-01-21',
                  order_by=(imps.date, imps.cpm_millis))
     lowest_cpm = 0
     lowest_date = '2000-01-01'
     for (a, d, c), _ in result_iterator(res):
         if lowest_date == d:
             self.assertLessEqual(lowest_cpm, c)
             lowest_cpm = c
         else:
             self.assertLessEqual(lowest_date, d)
             lowest_date = d
             lowest_cpm = c
コード例 #54
0
ファイル: test_project_order.py プロジェクト: dinedal/hustle
 def test_combo_descending(self):
     imps = Table.from_tag(IMPS)
     res = select(imps.ad_id, imps.date, imps.cpm_millis,
                  where=imps.date > '2014-01-21',
                  order_by=(imps.date, imps.cpm_millis),
                  desc=True)
     highest_cpm = 1000000000
     highest_date = '2222-01-01'
     for (a, d, c), _ in result_iterator(res):
         if highest_date == d:
             self.assertGreaterEqual(highest_cpm, c)
             highest_cpm = c
         else:
             self.assertGreaterEqual(highest_date, d)
             highest_date = d
             highest_cpm = c
コード例 #55
0
ファイル: test_aggregation.py プロジェクト: aprial/hustle
    def test_simple_aggregation(self):
        imps = Table.from_tag(IMPS)
        results = select(imps.ad_id, imps.cpm_millis, where=imps.date == '2014-01-27')

        sum_millis = {}
        for ad_id, millis in results:
            if ad_id not in sum_millis:
                sum_millis[ad_id] = [0, 0]
            sum_millis[ad_id][0] += millis
            sum_millis[ad_id][1] += 1

        results = select(imps.ad_id, h_sum(imps.cpm_millis), h_count(), where=imps.date == '2014-01-27')
        self.assertGreater(len(list(results)), 0)
        for ad_id, millis, count in results:
            ad_tup = sum_millis[ad_id]
            self.assertEqual(millis, ad_tup[0])
            self.assertEqual(count, ad_tup[1])
コード例 #56
-1
ファイル: test_aggregation.py プロジェクト: zhengyuv/hustle
    def test_multiple_group_bys(self):
        imps = Table.from_tag(IMPS)
        results = select(imps.ad_id,
                         imps.date,
                         imps.cpm_millis,
                         where=imps.date > '2014-01-22')

        sum_millis = {}
        for ad_id, dt, millis in results:
            key = str(ad_id) + dt
            if key not in sum_millis:
                sum_millis[key] = [0, 0]
            sum_millis[key][0] += millis
            sum_millis[key][1] += 1
        results.purge()

        results = select(imps.ad_id,
                         imps.date,
                         h_sum(imps.cpm_millis),
                         h_count(),
                         where=imps.date > '2014-01-22')
        self.assertGreater(len(list(results)), 0)
        for ad_id, dt, millis, count in results:
            ad_tup = sum_millis[str(ad_id) + dt]
            self.assertEqual(millis, ad_tup[0])
            self.assertEqual(count, ad_tup[1])
        results.purge()