def on_update(id): print('updating shopinfo of shopid {}'.format(id)) si = get_shop(id) if 'error' in si: if si['error'] == 'not found': try: print('deleting shop id: {}'.format(id)) LC.delete('shop', id) delete_shop(id) as1.task_done(id) except: traceback.print_exc() return si if si and 'error' not in si: update_shop(si)
def show_counts(args): num_items = 0 termwidth, _ = gettermsize() chartwidth = min(20, termwidth - 20) types = ['item', 'shop', 'shopinfo'] max_count = 0 counts = dict() for t in types: count = LC.count(t) counts[t] = count max_count = max(max_count, count) scale = get_scale(max_count) ratio = chartwidth * 1.0 / scale print('Counts Info:') for t in types: count = counts[t] chart = green('|' + '█' * int(ratio * count)) line = ' %-12s %s %d' % (t, chart, count) print(line) num_items += count chart = green('|' + '█' * int(ratio * WC.count())) line = ' %-12s %s %d' % ('items(wrong cate)', chart, WC.count()) print(line)
def on_update(itemid): print('updating item id: {}'.format(itemid)) if ENV == 'DEV': tpm = 6000 else: tpm = 600 d = call_with_throttling(get_item, args=(itemid, ), threshold_per_minute=tpm) if 'error' in d: if d['error'] in ['not found']: try: print('deleting id: {}'.format(itemid)) LC.delete('item', itemid) ItemCT.delete(itemid) delete_item(itemid) ai1.task_done(itemid) ai2.task_done(itemid) except: traceback.print_exc() return d else: raise ValueError('unknown error: {}'.format(d)) # check if we should save this item in the first place # we only accept a few cates if 'cid' in d and not need_crawl(d['cid']): WC.add(d['id']) return d # for connection errors, we simply raise exception here # the exceptions will be captured in LC.update_if_needed # the task will not clean up and will be requeued by requeue worker if not d: raise ValueError('item incomplete error: {}'.format(d)) elif d and 'shopid' in d: try: update_item(d) except: traceback.print_exc() raise ValueError('item update failed: {}'.format(d)) if LC.need_update('shop', d['shopid']): # queue shop jobs as1.put(d['shopid']) return d
def run(self): ids = ItemCT.get_items(self.ct) if ids: print('ct = {}'.format(self.ct)) print('scheduled {} items for lastcheck'.format(len(ids))) ids = LC.need_update('item', *ids) ai1.put(*ids) print('putting {} items in queue'.format(len(ids)))
def work(self): shopid = None def on_update(ids): ItemCT.add_items(*ids) ai2.put(*ids) def spawn_shop(shopid): print('updating shop-item of shop {}'.format(shopid)) asi1.put(shopid) self.pool.spawn(list_shop, shopid, on_update) while True: try: result = poll([as1], timeout=10) if result: queue, shopid = result LC.update_if_needed('shop', shopid, spawn_shop, queue) except: traceback.print_exc()
for j in range(batch): if i*batch+j >= total: break bucket = 'thinset_{}_{}'.format(ts.name, i*batch+j) p.scard(bucket) count += sum(p.execute()) print 'current count', count ts.conn.set(ts.counterkey, count) def recount_thinhash(th): print 'counting', th.name count = 0 total = th.modulo batch = 10000 for i in range(total/batch): p = th.conn.pipeline() for j in range(batch): if i*batch+j >= total: break bucket = 'thinhash_{}_{}'.format(th.name, i*batch+j) p.hlen(bucket) count += sum(p.execute()) print 'current count', count th.conn.set(th.counterkey, count) if __name__ == '__main__': #recount_thinset(WC) #recount_thinset(IF) #recount_thinhash(LC.gethash('shop')) recount_thinhash(LC.gethash('item'))
from caches import LC print bin(int(LC.gethash('item').hmget('24405056006')[0])) #print LC.gethash('item').hset('22403775078', 145600000001383869847)
break bucket = 'thinset_{}_{}'.format(ts.name, i * batch + j) p.scard(bucket) count += sum(p.execute()) print 'current count', count ts.conn.set(ts.counterkey, count) def recount_thinhash(th): print 'counting', th.name count = 0 total = th.modulo batch = 10000 for i in range(total / batch): p = th.conn.pipeline() for j in range(batch): if i * batch + j >= total: break bucket = 'thinhash_{}_{}'.format(th.name, i * batch + j) p.hlen(bucket) count += sum(p.execute()) print 'current count', count th.conn.set(th.counterkey, count) if __name__ == '__main__': #recount_thinset(WC) #recount_thinset(IF) #recount_thinhash(LC.gethash('shop')) recount_thinhash(LC.gethash('item'))