def main(): scan_cursor = 0 count = 0 bulk_action = [] number = r.scard('user_set') print number if RUN_TYPE: ts = time.time() - DAY date = ts2datetime(ts) start_time = str(ts2datetime(time.time())) print "/cron/push_mid2redis.py&start&%s" %start_time else: date = '2013-09-05' index_name = flow_text_index_name_pre+date print index_name ts = time.time() while 1: re_scan = r.sscan("user_set", scan_cursor, count=3000) scan_cursor = re_scan[0] uid_list = re_scan[1] #具体数据 if len(uid_list): for uid in uid_list: detail_dict = r.hgetall(uid) for k,v in detail_dict.iteritems(): update_dict = dict() if "_origin_weibo_retweeted" in k and v: mid = k.split('_')[0] update_dict["retweeted"] = int(v) elif "_origin_weibo_comment" in k and v: mid = k.split('_')[0] update_dict["comment"] = int(v) elif '_retweeted_weibo_comment' in k and v: mid = k.split('_')[0] update_dict["comment"] = int(v) elif '_retweeted_weibo_retweeted' in k and v: mid = k.split('_')[0] update_dict["retweeted"] = int(v) else: pass if update_dict: action = {"update": {"_id": mid}} xdata = {"doc": update_dict} bulk_action.extend([action, xdata]) count += 1 if count % 400 == 0: r_flow.lpush('update_mid_list', json.dumps(bulk_action)) bulk_action = [] tp = time.time() #print "%s cost %s" %(count, tp-ts) ts = tp if int(scan_cursor) == 0: break if bulk_action: r_flow.lpush('update_mid_list', json.dumps(bulk_action)) print count
def main(): scan_cursor = 0 count = 0 bulk_action = [] number = r.scard('user_set') print number if RUN_TYPE: ts = time.time() - DAY date = ts2datetime(ts) else: date = '2013-09-05' index_name = flow_text_index_name_pre + date ts = time.time() while 1: re_scan = r.sscan("user_set", scan_cursor, count=3000) scan_cursor = re_scan[0] uid_list = re_scan[1] #具体数据 if len(uid_list): for uid in uid_list: detail_dict = r.hgetall(uid) for k, v in detail_dict.iteritems(): update_dict = dict() if "_origin_weibo_retweeted" in k and int(v): mid = k.split('_')[0] update_dict["retweeted"] = int(v) elif "_origin_weibo_comment" in k and int(v): mid = k.split('_')[0] update_dict["comment"] = int(v) else: pass if update_dict: action = {"update": {"_id": mid}} xdata = {"doc": update_dict} bulk_action.extend([action, xdata]) count += 1 if count % 1000 == 0: #print bulk_action r_flow.lpush('update_mid_list', json.dumps(bulk_action)) bulk_action = [] tp = time.time() print "%s cost %s" % (count, tp - ts) ts = tp if int(scan_cursor) == 0: break if bulk_action: r_flow.lpush('update_mid_list', json.dumps(bulk_action)) print count
def scan_offline_task(): query = { "query": { "bool": { "must": [{ "term": { "status": 0 } }] } }, "size": 1000 } results = es_user_portrait.search(index=USER_RANK_KEYWORD_TASK_INDEX, doc_type=USER_RANK_KEYWORD_TASK_TYPE, body=query)['hits']['hits'] if results: for item in results: task_id = item['_id'] iter_item = item['_source'] search_type = iter_item['search_type'] pre = iter_item['pre'] during = iter_item['during'] start_time = iter_item['start_time'] keyword = json.loads(iter_item['keyword']) search_key = iter_item['user_ts'] number = iter_item['number'] sort_norm = iter_item['sort_norm'] sort_scope = iter_item['sort_scope'] time = iter_item['time'] isall = iter_item['isall'] redis_task.lpush( "task_user_rank", json.dumps([ task_id, search_type, pre, during, start_time, keyword, search_key, sort_norm, sort_scope, time, isall, number ])) iter_item['status'] = -1 task_id = item['_id'] #print item es_user_portrait.index(index=USER_RANK_KEYWORD_TASK_INDEX, doc_type=USER_RANK_KEYWORD_TASK_TYPE, id=task_id, body=iter_item)
def scan_offline_task(): query = {"query":{"bool":{"must":[{"term":{"status":-1}}]}},"size":1000} results = es_user_portrait.search(index = USER_RANK_KEYWORD_TASK_INDEX , doc_type = USER_RANK_KEYWORD_TASK_TYPE,body=query)['hits']['hits'] if results : for item in results: task_id = item['_id'] iter_item = item['_source'] search_type = iter_item['search_type'] pre = iter_item['pre'] during = iter_item['during'] start_time = iter_item['start_time'] keyword = json.loads(iter_item['keyword']) search_key = iter_item['user_ts'] number = iter_item['number'] sort_norm = iter_item['sort_norm'] sort_scope = iter_item['sort_scope'] time = iter_item['time'] isall = iter_item['isall'] redis_task.lpush("task_user_rank", json.dumps([task_id, search_type , pre , during , start_time , keyword , search_key , sort_norm , sort_scope ,time , isall, number])) iter_item['status'] = -1 task_id = item['_id'] #print item es_user_portrait.index(index=USER_RANK_KEYWORD_TASK_INDEX, doc_type=USER_RANK_KEYWORD_TASK_TYPE, id=task_id, body=iter_item)
def get_temporal_rank(timestamp): index = get_queue_index(timestamp) index_ts = "influence_timestamp_" + str(index) uid_list = r.zrange(index_ts, 0, 10000, desc=True) user_info = [] in_portrait = [] # 入库 if uid_list: search_result = es_user_portrait.mget(index=portrait_index_name, doc_type=portrait_index_type, body={"ids":uid_list}, field=SOCIAL_SENSOR_INFO)["docs"] for item in search_result: if item["found"]: temp = [] in_portrait.append(item['_id']) for iter_key in SOCIAL_SENSOR_INFO: """
""" 放弃原先设想的采用redis cluster方案,而使用单台redis形式 """ import sys import redis from redis import StrictRedis reload(sys) sys.path.append('../../') from global_utils import R_CLUSTER_FLOW1 as r if __name__ == '__main__': """ startup_nodes = [{"host": '219.224.135.91', "port": "6379"}] weibo_redis = RedisCluster(startup_nodes = startup_nodes) weibo_redis.flushall() startup_nodes = [{"host": '219.224.135.91', "port": "6380"}] weibo_redis = RedisCluster(startup_nodes = startup_nodes) weibo_redis.flushall() startup_nodes = [{"host": '219.224.135.93', "port": "6380"}] weibo_redis = RedisCluster(startup_nodes = startup_nodes) weibo_redis.flushall() print "finish flushing!" """ r.flushdb()
"should":[ ] } } } }, "size": 10000 } if __name__ == "__main__": scan_cursor = 0 sensitive_uid_list = [] count = 0 while 1: re_scan = r_cluster.sscan('s_user_set', scan_cursor, count=10000) if int(re_scan[0]) == 0: sensitive_uid_list.extend(re_scan[1]) count += len(re_scan[1]) print count break else: sensitive_uid_list.extend(re_scan[1]) count += 10000 scan_cursor = re_scan[0] temp_list = sensitive_uid_list count = 0 patition = 100 number = int(math.ceil(len(temp_list)/float(100))) print number
number = iter_item['number'] sort_norm = iter_item['sort_norm'] sort_scope = iter_item['sort_scope'] time = iter_item['time'] isall = iter_item['isall'] redis_task.lpush("task_user_rank", json.dumps([task_id, search_type , pre , during , start_time , keyword , search_key , sort_norm , sort_scope ,time , isall, number])) iter_item['status'] = -1 task_id = item['_id'] #print item es_user_portrait.index(index=USER_RANK_KEYWORD_TASK_INDEX, doc_type=USER_RANK_KEYWORD_TASK_TYPE, id=task_id, body=iter_item) def cron_task(data): key_words_search(data[0], data[1], data[2], data[3], data[4], data[5], data[6], data[7], data[8], data[9], data[10], data[11]) if __name__ == "__main__": scan_offline_task() while 1: data = redis_task.rpop("task_user_rank") print data #""" if data: cron_task(json.loads(data)) else: break #"""
json.dumps([ task_id, search_type, pre, during, start_time, keyword, search_key, sort_norm, sort_scope, time, isall, number ])) iter_item['status'] = -1 task_id = item['_id'] #print item es_user_portrait.index(index=USER_RANK_KEYWORD_TASK_INDEX, doc_type=USER_RANK_KEYWORD_TASK_TYPE, id=task_id, body=iter_item) def cron_task(data): key_words_search(data[0], data[1], data[2], data[3], data[4], data[5], data[6], data[7], data[8], data[9], data[10], data[11]) if __name__ == "__main__": scan_offline_task() while 1: data = redis_task.rpop("task_user_rank") print data #""" if data: cron_task(json.loads(data)) else: break #"""
return int(index) if __name__ == "__main__": now_ts = time.time() date_ts = datetime2ts(ts2datetime(now_ts)) if now_ts - TIME_INTERVAL < date_ts: sys.exit(0) tmp_date = ts2date(now_ts) print "cron_influence_start_" + tmp_date index = get_queue_index(now_ts) #当前时间戳所对应的时间区间 influence_ts = "influence_timestamp_" + str(index) scan_cursor = 0 count = 0 while 1: re_scan = r.hscan(influence_ts, scan_cursor, count=1000) scan_cursor = re_scan[0] detail = re_scan[1] if len(detail): for k,v in detail.iteritems(): r.zadd(influence_ts, v, k) count += 1 if int(scan_cursor) == 0: break tmp_date = ts2date(time.time()) print count print "cron_influence_end_" + tmp_date