コード例 #1
0
ファイル: worker.py プロジェクト: yankaics/data007
 def on_update(id):
     print('updating shopinfo of shopid {}'.format(id))
     si = get_shop(id)
     if 'error' in si:
         if si['error'] == 'not found':
             try:
                 print('deleting shop id: {}'.format(id))
                 LC.delete('shop', id)
                 delete_shop(id)
                 as1.task_done(id)
             except:
                 traceback.print_exc()
             return si
     if si and 'error' not in si:
         update_shop(si)
コード例 #2
0
ファイル: info.py プロジェクト: mobishift2011/data007
def show_counts(args):
    num_items = 0
    termwidth, _ = gettermsize()
    chartwidth = min(20, termwidth - 20)

    types = ['item', 'shop', 'shopinfo']

    max_count = 0
    counts = dict()
    for t in types:
        count = LC.count(t)
        counts[t] = count
        max_count = max(max_count, count)

    scale = get_scale(max_count)
    ratio = chartwidth * 1.0 / scale

    print('Counts Info:')
    for t in types:
        count = counts[t]
        chart = green('|' + '█' * int(ratio * count))
        line = '    %-12s %s %d' % (t, chart, count)
        print(line)

        num_items += count

    chart = green('|' + '█' * int(ratio * WC.count()))
    line = '    %-12s %s %d' % ('items(wrong cate)', chart, WC.count())
    print(line)
コード例 #3
0
def show_counts(args):
    num_items = 0
    termwidth, _ = gettermsize()
    chartwidth = min(20, termwidth - 20)

    types = ['item', 'shop', 'shopinfo']

    max_count = 0
    counts = dict()
    for t in types:
        count = LC.count(t)
        counts[t] = count
        max_count = max(max_count, count)

    scale = get_scale(max_count)
    ratio = chartwidth * 1.0 / scale

    print('Counts Info:')
    for t in types:
        count = counts[t]
        chart = green('|' + '█' * int(ratio * count))
        line = '    %-12s %s %d' % (t, chart, count)
        print(line)

        num_items += count

    chart = green('|' + '█' * int(ratio * WC.count()))
    line = '    %-12s %s %d' % ('items(wrong cate)', chart, WC.count())
    print(line)
コード例 #4
0
ファイル: worker.py プロジェクト: yankaics/data007
        def on_update(itemid):
            print('updating item id: {}'.format(itemid))
            if ENV == 'DEV':
                tpm = 6000
            else:
                tpm = 600
            d = call_with_throttling(get_item,
                                     args=(itemid, ),
                                     threshold_per_minute=tpm)
            if 'error' in d:
                if d['error'] in ['not found']:
                    try:
                        print('deleting id: {}'.format(itemid))
                        LC.delete('item', itemid)
                        ItemCT.delete(itemid)
                        delete_item(itemid)
                        ai1.task_done(itemid)
                        ai2.task_done(itemid)
                    except:
                        traceback.print_exc()
                    return d
                else:
                    raise ValueError('unknown error: {}'.format(d))

            # check if we should save this item in the first place
            # we only accept a few cates
            if 'cid' in d and not need_crawl(d['cid']):
                WC.add(d['id'])
                return d

            # for connection errors, we simply raise exception here
            # the exceptions will be captured in LC.update_if_needed
            # the task will not clean up and will be requeued by requeue worker
            if not d:
                raise ValueError('item incomplete error: {}'.format(d))
            elif d and 'shopid' in d:
                try:
                    update_item(d)
                except:
                    traceback.print_exc()
                    raise ValueError('item update failed: {}'.format(d))

                if LC.need_update('shop', d['shopid']):
                    # queue shop jobs
                    as1.put(d['shopid'])

            return d
コード例 #5
0
 def run(self):
     ids = ItemCT.get_items(self.ct)
     if ids:
         print('ct = {}'.format(self.ct))
         print('scheduled {} items for lastcheck'.format(len(ids)))
         ids = LC.need_update('item', *ids)
         ai1.put(*ids)
         print('putting {} items in queue'.format(len(ids)))
コード例 #6
0
ファイル: worker.py プロジェクト: yankaics/data007
    def work(self):
        shopid = None

        def on_update(ids):
            ItemCT.add_items(*ids)
            ai2.put(*ids)

        def spawn_shop(shopid):
            print('updating shop-item of shop {}'.format(shopid))
            asi1.put(shopid)
            self.pool.spawn(list_shop, shopid, on_update)

        while True:
            try:
                result = poll([as1], timeout=10)
                if result:
                    queue, shopid = result
                    LC.update_if_needed('shop', shopid, spawn_shop, queue)
            except:
                traceback.print_exc()
コード例 #7
0
ファイル: recount.py プロジェクト: mobishift2011/data007
        for j in range(batch):
            if i*batch+j >= total:
                break
            bucket = 'thinset_{}_{}'.format(ts.name, i*batch+j)
            p.scard(bucket)
        count += sum(p.execute())
        print 'current count', count
    ts.conn.set(ts.counterkey, count) 

def recount_thinhash(th):
    print 'counting', th.name
    count = 0
    total = th.modulo
    batch = 10000
    for i in range(total/batch):
        p = th.conn.pipeline()
        for j in range(batch):
            if i*batch+j >= total:
                break
            bucket = 'thinhash_{}_{}'.format(th.name, i*batch+j)
            p.hlen(bucket)
        count += sum(p.execute())
        print 'current count', count
    th.conn.set(th.counterkey, count)

if __name__ == '__main__':
    #recount_thinset(WC)
    #recount_thinset(IF)
    #recount_thinhash(LC.gethash('shop'))
    recount_thinhash(LC.gethash('item'))
コード例 #8
0
ファイル: aaaa.py プロジェクト: yankaics/data007
from caches import LC

print bin(int(LC.gethash('item').hmget('24405056006')[0]))
#print LC.gethash('item').hset('22403775078', 145600000001383869847)
コード例 #9
0
ファイル: recount.py プロジェクト: yankaics/data007
                break
            bucket = 'thinset_{}_{}'.format(ts.name, i * batch + j)
            p.scard(bucket)
        count += sum(p.execute())
        print 'current count', count
    ts.conn.set(ts.counterkey, count)


def recount_thinhash(th):
    print 'counting', th.name
    count = 0
    total = th.modulo
    batch = 10000
    for i in range(total / batch):
        p = th.conn.pipeline()
        for j in range(batch):
            if i * batch + j >= total:
                break
            bucket = 'thinhash_{}_{}'.format(th.name, i * batch + j)
            p.hlen(bucket)
        count += sum(p.execute())
        print 'current count', count
    th.conn.set(th.counterkey, count)


if __name__ == '__main__':
    #recount_thinset(WC)
    #recount_thinset(IF)
    #recount_thinhash(LC.gethash('shop'))
    recount_thinhash(LC.gethash('item'))