def main(): scan_cursor = 0 count = 0 bulk_action = [] number = r.scard('user_set') print number if RUN_TYPE: ts = time.time() - DAY date = ts2datetime(ts) start_time = str(ts2datetime(time.time())) print "/cron/push_mid2redis.py&start&%s" %start_time else: date = '2013-09-05' index_name = flow_text_index_name_pre+date print index_name ts = time.time() while 1: re_scan = r.sscan("user_set", scan_cursor, count=3000) scan_cursor = re_scan[0] uid_list = re_scan[1] #具体数据 if len(uid_list): for uid in uid_list: detail_dict = r.hgetall(uid) for k,v in detail_dict.iteritems(): update_dict = dict() if "_origin_weibo_retweeted" in k and v: mid = k.split('_')[0] update_dict["retweeted"] = int(v) elif "_origin_weibo_comment" in k and v: mid = k.split('_')[0] update_dict["comment"] = int(v) elif '_retweeted_weibo_comment' in k and v: mid = k.split('_')[0] update_dict["comment"] = int(v) elif '_retweeted_weibo_retweeted' in k and v: mid = k.split('_')[0] update_dict["retweeted"] = int(v) else: pass if update_dict: action = {"update": {"_id": mid}} xdata = {"doc": update_dict} bulk_action.extend([action, xdata]) count += 1 if count % 400 == 0: r_flow.lpush('update_mid_list', json.dumps(bulk_action)) bulk_action = [] tp = time.time() #print "%s cost %s" %(count, tp-ts) ts = tp if int(scan_cursor) == 0: break if bulk_action: r_flow.lpush('update_mid_list', json.dumps(bulk_action)) print count
def main(): scan_cursor = 0 count = 0 bulk_action = [] number = r.scard('user_set') print number if RUN_TYPE: ts = time.time() - DAY date = ts2datetime(ts) else: date = '2013-09-05' index_name = flow_text_index_name_pre + date ts = time.time() while 1: re_scan = r.sscan("user_set", scan_cursor, count=3000) scan_cursor = re_scan[0] uid_list = re_scan[1] #具体数据 if len(uid_list): for uid in uid_list: detail_dict = r.hgetall(uid) for k, v in detail_dict.iteritems(): update_dict = dict() if "_origin_weibo_retweeted" in k and int(v): mid = k.split('_')[0] update_dict["retweeted"] = int(v) elif "_origin_weibo_comment" in k and int(v): mid = k.split('_')[0] update_dict["comment"] = int(v) else: pass if update_dict: action = {"update": {"_id": mid}} xdata = {"doc": update_dict} bulk_action.extend([action, xdata]) count += 1 if count % 1000 == 0: #print bulk_action r_flow.lpush('update_mid_list', json.dumps(bulk_action)) bulk_action = [] tp = time.time() print "%s cost %s" % (count, tp - ts) ts = tp if int(scan_cursor) == 0: break if bulk_action: r_flow.lpush('update_mid_list', json.dumps(bulk_action)) print count
"should":[ ] } } } }, "size": 10000 } if __name__ == "__main__": scan_cursor = 0 sensitive_uid_list = [] count = 0 while 1: re_scan = r_cluster.sscan('s_user_set', scan_cursor, count=10000) if int(re_scan[0]) == 0: sensitive_uid_list.extend(re_scan[1]) count += len(re_scan[1]) print count break else: sensitive_uid_list.extend(re_scan[1]) count += 10000 scan_cursor = re_scan[0] temp_list = sensitive_uid_list count = 0 patition = 100 number = int(math.ceil(len(temp_list)/float(100))) print number