def update_if_needed(type, id, on_update, queue): """ try update item by id, update lastcheck if needed """ hashkey = LC.hashkey.format(type) tsnow = time.mktime(time.gmtime()) today_key = time.strftime("%Y-%m-%d", time.gmtime()) if LC.need_update(type, int(id)): info = None conn_record.hincrby(today_key, '{}:crawl-sum'.format(type)) try: info = on_update(id) except: conn_record.hincrby(today_key, '{}:crawl-err-except'.format(type)) print('we do not set task_done for id {}, so we will pick them up in requeue'.format(id)) traceback.print_exc() else: conn_record.hincrby(today_key, '{}:crawl-return'.format(type)) queue.task_done(id) if type == "item": if not info or not info.has_key('num_sold30'): conn_record.hincrby(today_key, '{}:crawl-data-err'.format(type)) print "item err:", info return conn_record.hincrby(today_key, '{}:crawl-success'.format(type)) ret_bin = LC.gethash(type).hget(id) new_bin = debouncing.get_update_bin(ret_bin, info) print "update:", bin(new_bin), len(bin(new_bin)) LC.gethash(type).hset(id, new_bin) else: LC.gethash(type).hset(id, tsnow) else: queue.task_done(id)
def unchange_days(self, num): for i in range(1, num+1): time.time = partial(later_time, i) if can_update(self.sbin): info = {'num_instock':self.instock} self.sbin = get_update_bin(self.sbin, info) print "{} to__crawl".format(i) else: print "{} deboucing".format(i)
def update_if_needed(type, id, on_update, queue): """ try update item by id, update lastcheck if needed """ hashkey = LC.hashkey.format(type) tsnow = time.mktime(time.gmtime()) today_key = time.strftime("%Y-%m-%d", time.gmtime()) if LC.need_update(type, int(id)): info = None conn_record.hincrby(today_key, '{}:crawl-sum'.format(type)) try: info = on_update(id) except: conn_record.hincrby(today_key, '{}:crawl-err-except'.format(type)) print( 'we do not set task_done for id {}, so we will pick them up in requeue' .format(id)) traceback.print_exc() else: conn_record.hincrby(today_key, '{}:crawl-return'.format(type)) queue.task_done(id) if type == "item": if not info or not info.has_key('num_sold30'): conn_record.hincrby(today_key, '{}:crawl-data-err'.format(type)) print "item err:", info return conn_record.hincrby(today_key, '{}:crawl-success'.format(type)) ret_bin = LC.gethash(type).hget(id) new_bin = debouncing.get_update_bin(ret_bin, info) print "update:", bin(new_bin), len(bin(new_bin)) LC.gethash(type).hset(id, new_bin) else: LC.gethash(type).hset(id, tsnow) else: queue.task_done(id)
def unchange_days(self, num): bin = self.sbin for i in range(0, num): info = {'num_instock':100} bin = get_update_bin(bin, info) return unpack_bin(bin)[2]