def compute_group_inner(task_name, task_user, start_ts): #step1: get task_user in-monitor task user retweet relation from monitor_inner_r #step2: get task_user in-task user retweet relation #step3: compute every inner user be-retweet ratio in task #step4: save top5 to es--monitor_result, doc_type=task_name, _id='inner_'+date e:'inner_2013-09-01' group_status = 0 time_segment = 3600 * 24 iter_time_segment = 900 iter_ts = start_ts - time_segment inner_group_dict = {} user_count_dict = {} print 'group inner ask_user:'******''' if iter_ts >= start_ts: break ''' key = 'inner_' + str(iter_ts) print 'iter_ts:', ts2date(iter_ts) inner_retweet_string = monitor_inner_r.hget(root_uid, key) print 'root_uid, key, inner_retweet_string:', root_uid, key, inner_retweet_string if inner_retweet_string: print 'yes' inner_retweet_dict = json.loads(inner_retweet_string) else: inner_retweet_dict = None if inner_retweet_dict: inner_group_dict[root_uid] = merge_dict( inner_group_dict[root_uid], inner_retweet_dict) iter_ts += iter_time_segment user_inner_retweet_count = sum(inner_group_dict[root_uid].values()) user_count_dict[root_uid] = user_inner_retweet_count all_be_retweet_count = sum(user_count_dict.values()) if all_be_retweet_count == 0: group_status = 1 return group_status sort_user_inner_retweet_count = sorted(user_count_dict.items(), key=lambda x: x[1], reverse=True) top5_user = sort_user_inner_retweet_count[:5] # timestamp: '2013-09-01' date = ts2datetime(start_ts - 24 * 3600) index_body = {'date': date} for rank in range(1, 6): key = 'top' + str(rank) index_body[key] = json.dumps(top5_user[rank - 1]) key = 'inner_' + date # save inner-retweet graph by dict {root_uid1:{uid1:count1, uid2:count2}, ...} index_body['inner_graph'] = json.dumps(inner_group_dict) es.index(index=monitor_index_name, doc_type=task_name, id=key, body=index_body) group_status = 1 return group_status
def inner_group_retweet(item): root_uid = str(item['root_uid']) uid = str(item['uid']) timestamp = item['timestamp'] date = ts2datetime(timestamp) date_ts = datetime2ts(date) time_segment = int((timestamp - date_ts) / 900) start_ts = date_ts + time_segment * 900 key = 'inner_' + str(start_ts) inner_retweet_exist = monitor_inner_r.hget(root_uid, key) if not inner_retweet_exist: monitor_inner_r.hset(root_uid, key, json.dumps({uid: 1})) else: inner_retweet_dict = json.loads(inner_retweet_exist) if uid in inner_retweet_dict: inner_retweet_dict[uid] += 1 else: inner_retweet_dict[uid] = 1 monitor_inner_r.hset(root_uid, key, json.dumps(inner_retweet_dict))
def compute_group_inner(task_name, task_user, start_ts): #step1: get task_user in-monitor task user retweet relation from monitor_inner_r #step2: get task_user in-task user retweet relation #step3: compute every inner user be-retweet ratio in task #step4: save top5 to es--monitor_result, doc_type=task_name, _id='inner_'+date e:'inner_2013-09-01' group_status = 0 time_segment = 3600*24 iter_time_segment = 900 iter_ts = start_ts - time_segment inner_group_dict = {} user_count_dict = {} print 'group inner ask_user:'******''' if iter_ts >= start_ts: break ''' key = 'inner_' + str(iter_ts) print 'iter_ts:', ts2date(iter_ts) inner_retweet_string = monitor_inner_r.hget(root_uid, key) print 'root_uid, key, inner_retweet_string:', root_uid, key, inner_retweet_string if inner_retweet_string: print 'yes' inner_retweet_dict = json.loads(inner_retweet_string) else: inner_retweet_dict = None if inner_retweet_dict: inner_group_dict[root_uid] = merge_dict(inner_group_dict[root_uid], inner_retweet_dict) iter_ts += iter_time_segment user_inner_retweet_count = sum(inner_group_dict[root_uid].values()) user_count_dict[root_uid] = user_inner_retweet_count all_be_retweet_count = sum(user_count_dict.values()) if all_be_retweet_count==0: group_status = 1 return group_status sort_user_inner_retweet_count = sorted(user_count_dict.items(), key=lambda x:x[1], reverse=True) top5_user = sort_user_inner_retweet_count[:5] # timestamp: '2013-09-01' date = ts2datetime(start_ts - 24*3600) index_body = {'date': date} for rank in range(1,6): key = 'top' + str(rank) index_body[key] = json.dumps(top5_user[rank-1]) key = 'inner_' + date # save inner-retweet graph by dict {root_uid1:{uid1:count1, uid2:count2}, ...} index_body['inner_graph'] = json.dumps(inner_group_dict) es.index(index=monitor_index_name, doc_type=task_name, id=key, body=index_body) group_status = 1 return group_status