def search_user_info(es,index_name,doc_type,uid,result_name): try: retweet_result = es.get(index=index_name, doc_type=doc_type, id=uid)['_source'] except: return None if retweet_result: retweet_dict = json.loads(retweet_result[result_name]) sorted_list = sorted(retweet_dict.iteritems(),key=lambda x:x[1],reverse=True)[:20] uid_list = [i[0] for i in sorted_list if i[0] != uid] portrait_result = [] try: user_result = es_user_profile.mget(index=profile_index_name, doc_type=profile_index_type, body={'ids':uid_list})['docs'] except: user_result = [] try: bci_history_result = es_bci_history.mget(index=bci_history_index_name, doc_type=bci_history_index_type, body={'ids':uid_list}, fields=fields)['docs'] except: bci_history_result = [] #print bci_history_result iter_count = 0 out_portrait_list = [] for out_user_item in user_result: uid = out_user_item['_id'] if out_user_item['found'] == True: source = out_user_item['_source'] uname = source['nick_name'] photo_url = source['photo_url'] if uname == '': uname = u'未知' #location = source['user_location'] friendsnum = source['friendsnum'] else: uname = u'未知' location = '' friendsnum = '' photo_url = 'unknown' #add index from bci_history try: bci_history_item = bci_history_result[iter_count] except: bci_history_item = {'found': False} if bci_history_item['found']==True: fansnum = bci_history_item['fields'][fields[0]][0] user_weibo_count = bci_history_item['fields'][fields[1]][0] user_friendsnum = bci_history_item['fields'][fields[2]][0] influence = bci_history_item['fields'][fields[3]][0] else: fansnum = '' user_weibo_count = '' user_friendsnum = '' influence = '' #retweet_count = int(retweet_dict[uid]) count = retweet_dict[uid] out_portrait_list.append({'uid':uid,'photo_url':photo_url,'count':count,'uname':uname,'influence':influence,'fansnum':fansnum, 'friendsnum':user_friendsnum,'weibo_count':user_weibo_count})#location, iter_count += 1 return out_portrait_list else: return None
def show_keywords_rank(task_id, sort_type, count): try: task_found = es_network_task.get(index=network_keywords_index_name, \ doc_type=network_keywords_index_type, id=task_id)['_source'] except: task_found = {} return task_found search_results = json.loads(task_found['results']) sort_results = search_results[sort_type] results = [] uid_list = [] sort_list = [] for source_uid, sort_value in sort_results: uid_list.append(source_uid) sort_list.append(sort_value) # 查看背景信息 if uid_list: profile_result = es_user_profile.mget(index=profile_index_name, doc_type=profile_index_type, body={"ids":uid_list})["docs"] for item in profile_result: _id = item['_id'] index = profile_result.index(item) tmp = [] if item['found']: item = item['_source'] tmp.append(item['uid']) tmp.append(item['nick_name']) tmp.append(item['user_location']) else: tmp.extend([_id,'','']) value = sort_list[index] tmp.append(value) results.append(tmp) if uid_list: count = 0 history_result = es_bci_history.mget(index=bci_history_index_name, doc_type=bci_history_index_type, body={"ids":uid_list})["docs"] for item in history_result: if item['found']: item = item['_source'] results[count].extend([item['user_fansnum'], item['weibo_month_sum']]) else: results[count].extend(['','']) count += 1 if uid_list: count = 0 portrait_result = es_user_portrait.mget(index=portrait_index_name, doc_type=portrait_index_type, body={"ids":uid_list})["docs"] for item in portrait_result: if item['found']: results[count].append("1") else: results[count].append("0") count += 1 return results
def search_fans(uid,top_count): results = {} now_ts = time.time() now_date_ts = datetime2ts(ts2datetime(now_ts)) db_number = get_db_num(now_date_ts) be_comment_index_name = be_comment_index_name_pre + str(db_number) be_retweet_index_name = be_retweet_index_name_pre + str(db_number) result = {} be_retweet_inter_dict = {} be_comment_inter_dict = {} center_uid = uid try: be_retweet_result = es_retweet.get(index = be_retweet_index_name,doc_type=be_retweet_index_type,id=uid)['_source'] except: be_retweet_result = {} if be_retweet_result: be_retweet_uid_dict = json.loads(be_retweet_result['uid_be_retweet']) else: be_retweet_uid_dict = {} # print "be_retweet_uid_dict", be_retweet_uid_dict try: be_comment_result = es_be_comment.get(index=be_comment_index_name, doc_type=be_comment_index_type, id=uid)['_source'] except: be_comment_result = {} if be_comment_result: be_comment_uid_dict = json.loads(be_comment_result['uid_be_comment']) else: be_comment_uid_dict = {} # print "be_comment_uid_dict", be_comment_uid_dict fans_result = union_dict(be_retweet_uid_dict,be_comment_uid_dict) fans_user_set = set(fans_result.keys()) fans_list = list(fans_user_set) # print "fans_list", fans_list all_fans_dict = {} for fans_user in fans_list: if fans_user != center_uid: all_fans_dict[fans_user] = fans_result[fans_user] sort_all_fans_dict = sorted(all_fans_dict.items(), key=lambda x:x[1], reverse=True) all_fans_uid_list=[] all_fans_uid_list_all = [item[0] for item in sort_all_fans_dict] print all_fans_uid_list_all count = 0 for i in all_fans_uid_list_all: count += 1 all_fans_uid_list.append(i) if count == 1000: break print all_fans_uid_list out_portrait_list = all_fans_uid_list #use to get user information from user profile out_portrait_result = {} try: out_user_result = es_user_profile.mget(index=profile_index_name, doc_type=profile_index_type, body={'ids':out_portrait_list})['docs'] except: out_user_result = [] #add index from bci_history try: bci_history_result = es_bci_history.mget(index=bci_history_index_name, doc_type=bci_history_index_type, body={'ids': out_portrait_list}, fields=fields)['docs'] except: bci_history_result = [] iter_count = 0 out_portrait_list = [] for out_user_item in out_user_result: uid = out_user_item['_id'] if out_user_item['found'] == True: source = out_user_item['_source'] uname = source['nick_name'] photo_url = source['photo_url'] if uname == '': uname = u'未知' location = source['user_location'] friendsnum = source['friendsnum'] else: uname = u'未知' location = '' friendsnum = '' photo_url = 'unknown' #add index from bci_history try: bci_history_item = bci_history_result[iter_count] except: bci_history_item = {'found': False} # print bci_history_item if bci_history_item['found'] == True: fansnum = bci_history_item['fields'][fields[0]][0] user_weibo_count = bci_history_item['fields'][fields[1]][0] user_friendsnum = bci_history_item['fields'][fields[2]][0] influence = bci_history_item['fields'][fields[3]][0] else: fansnum = '' user_weibo_count = '' user_friendsnum = '' fans_count = int(all_fans_dict[uid]) out_portrait_list.append({'uid':uid,'photo_url':photo_url,'uname':uname, 'count':fans_count, 'fansnum':fansnum,'friendsnum': user_friendsnum,'weibo_count': user_weibo_count}) iter_count += 1 return out_portrait_list
def search_bidirect_interaction(uid, top_count): now_ts = time.time() now_date_ts = datetime2ts(ts2datetime(now_ts)) db_number = get_db_num(now_date_ts) retweet_index_name = retweet_index_name_pre + str(db_number) be_retweet_index_name = be_retweet_index_name_pre + str(db_number) comment_index_name = comment_index_name_pre + str(db_number) be_comment_index_name = be_comment_index_name_pre + str(db_number) results = {} retweet_inter_dict = {} comment_inter_dict = {} center_uid = uid #bidirect interaction in retweet and be_retweet try: retweet_result = es_retweet.get(index=retweet_index_name, doc_type=retweet_index_type, id=uid)['_source'] except: retweet_result = {} if retweet_result: retweet_uid_dict = json.loads(retweet_result['uid_retweet']) else: retweet_uid_dict = {} retweet_uid_list = retweet_uid_dict.keys() try: be_retweet_result = es_retweet.get(index=be_retweet_index_name, doc_type=be_retweet_index_type, id=uid)['_source'] except: be_retweet_result = {} if be_retweet_result: be_retweet_uid_dict = json.loads(be_retweet_result['uid_be_retweet']) else: be_retweet_uid_dict = {} #bidirect interaction in comment and be_comment try: comment_result = es_comment.get(index=comment_index_name, doc_type=comment_index_type, id=uid)['_source'] except: comment_result = {} if comment_result: comment_uid_dict = json.loads(comment_result['uid_comment']) else: comment_uid_dict = {} comment_uid_list = comment_uid_dict.keys() try: be_comment_result = es_comment.get(index=be_coment_index_name, doc_type=be_comment_index_type, id=uid)['_source'] except: be_comment_result = {} if be_comment_result: be_comment_uid_dict = json.loads(be_comment_result['uid_be_comment']) else: be_comment_uid_dict = {} #get bidirect_interaction dict #all_interaction_dict = union_dict(retweet_inter_dict, comment_inter_dict) retweet_comment_result = union_dict(retweet_uid_dict, comment_uid_dict) be_retweet_comment_result = union_dict(be_retweet_uid_dict, be_comment_uid_dict) interaction_user_set = set(retweet_comment_result.keys()) & set(be_retweet_comment_result.keys()) interaction_user_list = list(interaction_user_set) all_interaction_dict = {} for interaction_user in interaction_user_list: if interaction_user != center_uid: all_interaction_dict[interaction_user] = retweet_comment_result[interaction_user] + be_retweet_comment_result[interaction_user] sort_all_interaction_dict = sorted(all_interaction_dict.items(), key=lambda x:x[1], reverse=True) #get in_portrait_list, in_portrait_results and out_portrait_list all_interaction_uid_list = [item[0] for item in sort_all_interaction_dict] #print all_interaction_uid_list # if RUN_TYPE == 0: # all_interaction_dict = {'2029036025':3,'1282005885':2,'2549228714':2,'1809833450':1} # all_interaction_uid_list = ['2029036025', '1282005885', '2549228714', '1809833450'] out_portrait_list = all_interaction_uid_list #use to get user information from user profile out_portrait_result = {} try: out_user_result = es_user_profile.mget(index=profile_index_name, doc_type=profile_index_type, body={'ids':out_portrait_list})['docs'] except: out_user_result = [] #add index from bci_history try: bci_history_result = es_bci_history.mget(index=bci_history_index_name, doc_type=bci_history_index_type, body={'ids': out_portrait_list}, fields=fields)['docs'] except: bci_history_result = [] iter_count = 0 out_portrait_list = [] for out_user_item in out_user_result: uid = out_user_item['_id'] if out_user_item['found'] == True: source = out_user_item['_source'] uname = source['nick_name'] photo_url = source['photo_url'] if uname == '': uname = u'未知' location = source['user_location'] friendsnum = source['friendsnum'] else: uname = u'未知' location = '' friendsnum = '' photo_url = 'unknown' #add index from bci_history try: bci_history_item = bci_history_result[iter_count] except: bci_history_item = {'found': False} # print bci_history_item if bci_history_item['found'] == True: fansnum = bci_history_item['fields'][fields[0]][0] user_weibo_count = bci_history_item['fields'][fields[1]][0] user_friendsnum = bci_history_item['fields'][fields[2]][0] influence = bci_history_item['fields'][fields[3]][0] else: fansnum = '' user_weibo_count = '' user_friendsnum = '' interaction_count = int(all_interaction_dict[uid]) out_portrait_list.append({'uid':uid,'photo_url':photo_url,'uname':uname, 'count':interaction_count, 'fansnum':fansnum,'friendsnum': user_friendsnum,'weibo_count': user_weibo_count}) iter_count += 1 return out_portrait_list
def search_mention(now_ts, uid, top_count): date = ts2datetime(now_ts) #evaluate_max_dict = get_evaluate_max() ts = datetime2ts(date) stat_results = dict() results = dict() uid_dict = {} for i in range(1,8): ts = ts - DAY try: result_string = r_cluster.hget('at_' + str(ts), str(uid)) except: result_string = '' if not result_string: continue result_dict = json.loads(result_string) for at_uname in result_dict: try: stat_results[at_uname] += result_dict[at_uname] except: stat_results[at_uname] = result_dict[at_uname] sort_stat_results = sorted(stat_results.items(), key=lambda x:x[1], reverse=True) # print sort_stat_results out_portrait_list = [] out_list = stat_results.keys() #use to get user information from user profile out_query_list = [{'match':{'uname':item}} for item in out_list] if len(out_query_list) != 0: query = [{'bool':{'should': out_query_list}}] try: out_profile_result = es_user_profile.search(index=profile_index_name, doc_type=profile_index_type, body={'query':{'bool':{'must':query}}, 'size':100})['hits']['hits'] except: out_profile_result = [] else: out_profile_result = [] out_in_profile_list = [] bci_search_id_list = [] for out_item in out_profile_result: source = out_item['_source'] uname = source['nick_name'] uid = source['uid'] location = source['location'] friendsnum = source['friendsnum'] out_portrait_list.append([uid, uname, stat_results[uname], '', location, friendsnum, '']) out_in_profile_list.append(uname) #use to search bci history bci_search_id_list.append(uid) out_out_profile_list = list(set(out_list) - set(out_in_profile_list)) for out_out_item in out_out_profile_list: out_portrait_list.append(['', out_out_item, stat_results[out_out_item],'', '', '', '']) #add index from bci_history new_out_portrait_list = [] try: bci_history_result = es_bci_history.mget(index=bci_history_index_name, doc_type=bci_history_index_type, body={'ids': bci_search_id_list}, fields=['user_fansnum', 'weibo_month_sum', 'user_friendsnum'])['docs'] except: bci_history_result = [] iter_count = 0 for out_portrait_item in out_portrait_list: append_dict = {} try: bci_history_item = bci_history_result[iter_count] except: bci_history_item = {} new_out_portrait_item = out_portrait_item append_dict['uid'] = out_portrait_item[0] append_dict['uname'] = out_portrait_item[1] append_dict['count'] = out_portrait_item[2] if bci_history_item: if bci_history_item['found'] == True: fansnum = bci_history_item['fields']['user_fansnum'][0] user_weibo_count = bci_history_item['fields']['weibo_month_sum'][0] user_friendsnum = bci_history_item['fields']['user_friendsnum'][0] else: fansnum = '' user_weibo_count = '' user_friendsnum = '' else: fansnum = '' user_weibo_count = '' user_friendsnum = '' append_dict['fansnum'] = fansnum append_dict['weibo_count'] = user_weibo_count append_dict['friendsnum'] = user_friendsnum # new_out_portrait_item[3] = fansnum # new_out_portrait_item[6] = user_weibo_count # new_out_portrait_item[-2] = user_friendsnum #new_out_portrait_list.append(new_out_portrait_item) new_out_portrait_list.append(append_dict) iter_count += 1 #print append_dict return new_out_portrait_list # uid,名字,提及次数,粉丝数,注册地,关注数,微博数
def get_final_submit_user_info(uid_list): final_results = [] try: profile_results = es_user_profile.mget(index=profile_index_name, doc_type=profile_index_type, body={'ids': uid_list})['docs'] except: profile_results = [] try: bci_history_results =es_bci_history.mget(index=bci_history_index_name, doc_type=bci_history_index_type, body={'ids': uid_list})['docs'] except: bci_history_results = [] #get bci_history max value now_time_ts = time.time() search_date_ts = datetime2ts(ts2datetime(now_time_ts - DAY)) bci_key = 'bci_' + str(search_date_ts) query_body = { 'query':{ 'match_all':{} }, 'sort': [{bci_key:{'order': 'desc'}}], 'size': 1 } #try: bci_max_result = es_bci_history.search(index=bci_history_index_name, doc_type=bci_history_index_type, body=query_body, _source=False, fields=[bci_key])['hits']['hits'] #except: # bci_max_result = {} if bci_max_result: bci_max_value = bci_max_result[0]['fields'][bci_key][0] else: bci_max_value = MAX_VALUE iter_count = 0 for uid in uid_list: try: profile_item = profile_results[iter_count] except: profile_item = {} try: bci_history_item = bci_history_results[iter_count] except: bci_history_item = {} if profile_item and profile_item['found'] == True: uname = profile_item['_source']['nick_name'] location = profile_item['_source']['user_location'] else: uname = '' location = '' if bci_history_item and bci_history_item['found'] == True: fansnum = bci_history_item['_source']['user_fansnum'] statusnum = bci_history_item['_source']['weibo_month_sum'] try: bci = bci_history_item['_source'][bci_key] normal_bci = math.log(bci / bci_max_value * 9 + 1, 10) * 100 except: normal_bci = '' else: fansnum = '' statusnum = '' normal_bci = '' final_results.append([uid, uname, location, fansnum, statusnum, normal_bci]) iter_count += 1 return final_results
"query":{ "match_all":{} }, "size":1, "sort":{sensitive_string:{"order":"desc"}} } try: top_sensitive_result = es_bci_history.search(index=ES_SENSITIVE_INDEX, doc_type=DOCTYPE_SENSITIVE_INDEX, body=query_sensitive_body, _source=False, fields=[sensitive_string])['hits']['hits'] top_sensitive = top_sensitive_result[0]['fields'][sensitive_string][0] except Exception, reason: print Exception, reason top_sensitive = 400 index_type = 'bci' user_bci_result = es_cluster.mget(index=index_name, doc_type=index_type, body={'ids':uid_list}, _source=True)['docs'] user_profile_result = es_user_profile.mget(index='weibo_user', doc_type='user', body={'ids':uid_list}, _source=True)['docs'] bci_history_result = es_bci_history.mget(index=bci_history_index_name, doc_type=bci_history_index_type, body={"ids":uid_list}, fields=['user_fansnum', 'weibo_month_sum'])['docs'] sensitive_history_result = es_bci_history.mget(index=ES_SENSITIVE_INDEX, doc_type=DOCTYPE_SENSITIVE_INDEX, body={'ids':uid_list}, fields=[sensitive_string], _source=False)['docs'] max_evaluate_influ = get_evaluate_max(index_name) for i in range(0, len(uid_list)): uid = uid_list[i] bci_dict = user_bci_result[i] profile_dict = user_profile_result[i] bci_history_dict = bci_history_result[i] sensitive_history_dict = sensitive_history_result[i] #print sensitive_history_dict try: bci_source = bci_dict['_source'] except: bci_source = None if bci_source: influence = bci_source['user_index']
def identify_user_portrait(user_set, filter_type): in_portrait_result = [] out_portrait_result = [] user_list = list(user_set) #identify the user_portrait iter_count = 0 all_user_count = len(user_list) all_in_portrait_user = dict() all_out_portrait_user_list = [] max_result = get_evaluate_max() while iter_count <= all_user_count: iter_user_list = user_list[iter_count: iter_count + SENTIMENT_ITER_USER_COUNT] #search user in user_portrait try: in_portrait_result = es_user_portrait.mget(index=portrait_index_name, doc_type=portrait_index_type,\ body={'ids': iter_user_list}, _source=False, \ fields=['uname', 'influence', 'activeness', 'importance', 'sensitive'])['docs'] except: in_portrait_result = [] #add all hit user for in_portrait_item in in_portrait_result: if in_portrait_item['found'] == True: uname = in_portrait_item['fields']['uname'][0] if uname == '' or uname == 'unknown': uname = in_portrait_item['_id'] influence = in_portrait_item['fields']['influence'][0] normal_influence = math.log(influence / max_result['influence'] * 9 + 1 , 10) * 100 activeness = in_portrait_item['fields']['activeness'][0] normal_activeness = math.log(activeness / max_result['activeness'] * 9 + 1 , 10) * 100 importance = in_portrait_item['fields']['importance'][0] normal_importance = math.log(importance / max_result['importance'] * 9 + 1 , 10) * 100 try: sensitive = in_portrait_item['fields']['sensitive'][0] normal_sensitive = math.log(sensitive / max_result['sensitive'] * 9 + 1 , 10) * 100 except: normal_sensitive = 0 all_in_portrait_user[in_portrait_item['_id']] = [uname, normal_influence, normal_activeness, \ normal_importance, normal_sensitive] else: all_out_portrait_user_list.append(int(in_portrait_item['_id'])) iter_count += SENTIMENT_ITER_USER_COUNT if filter_type == 'in': return all_in_portrait_user #get out portrait user info iter_count = 0 all_out_portrait_user = dict() all_out_user_count = len(all_out_portrait_user_list) while iter_count <= all_out_user_count: iter_uid_list = all_out_portrait_user_list[iter_count: iter_count+SENTIMENT_ITER_USER_COUNT] bci_iter_uid_list = [str(item) for item in iter_uid_list] try: profile_result = es_user_profile.mget(index=profile_index_name, doc_type=profile_index_type,\ body={'ids':iter_uid_list}, _source=False, fields=['nick_name'])['docs'] except: profile_result = [] #bci_history try: bci_history_result = es_bci_history.mget(index=bci_history_index_name, doc_type=bci_history_index_type, body={'ids': bci_iter_uid_list}, _source=False, fields=['user_fansnum', 'weibo_month_sum', 'user_friendsnum'])['docs'] except: bci_history_result = [] bci_iter_count = 0 for uid in iter_uid_list: try: profile_item = profile_result[bci_iter_count] except: profile_item = {'found': False} if profile_item['found'] == True: uname = profile_item['fields']['nick_name'][0] else: uname= profile_item['_id'] try: bci_history_item = bci_history_result[bci_iter_count] except: bci_history_item = {'found': False} if bci_history_item['found'] == True: statusnum = bci_history_item['fields']['weibo_month_sum'][0] fansnum = bci_history_item['fields']['user_fansnum'][0] friendsnum = bci_history_item['fields']['user_friendsnum'][0] else: statusnum = 0 fansnum = 0 friendsnum = 0 all_out_portrait_user[str(uid)] = [uname, statusnum, friendsnum, fansnum] bci_iter_count += 1 iter_count += SENTIMENT_ITER_USER_COUNT return all_in_portrait_user, all_out_portrait_user
def search_follower(uid, top_count): results = {} now_ts = time.time() db_number = get_db_num(now_ts) index_name = be_retweet_index_name_pre + str(db_number) center_uid = uid try: retweet_result = es_retweet.get(index=index_name, doc_type=be_retweet_index_type, id=uid)['_source'] except: return None if retweet_result: retweet_dict = json.loads(retweet_result['uid_be_retweet']) uid_list = retweet_dict.keys() portrait_result = [] try: user_result = es_user_profile.mget(index=profile_index_name, doc_type=profile_index_type, body={'ids':uid_list})['docs'] except: user_result = [] try: bci_history_result = es_bci_history.mget(index=bci_history_index_name, doc_type=bci_history_index_type, body={'ids':uid_list}, fields=fields)['docs'] except: bci_history_result = [] print bci_history_result iter_count = 0 out_portrait_list = [] for out_user_item in user_result: uid = out_user_item['_id'] if out_user_item['found'] == True: source = out_user_item['_source'] uname = source['nick_name'] photo_url = source['photo_url'] if uname == '': uname = u'未知' #location = source['user_location'] friendsnum = source['friendsnum'] photo_url = 'unknown' else: uname = u'未知' location = '' friendsnum = '' #add index from bci_history try: bci_history_item = bci_history_result[iter_count] except: bci_history_item = {'found': False} if bci_history_item['found']==True: fansnum = bci_history_item['fields'][fields[0]][0] user_weibo_count = bci_history_item['fields'][fields[1]][0] user_friendsnum = bci_history_item['fields'][fields[2]][0] influence = bci_history_item['fields'][fields[3]][0] else: fansnum = '' user_weibo_count = '' user_friendsnum = '' influence = '' #retweet_count = int(retweet_dict[uid]) count = retweet_dict[uid] out_portrait_list.append({'uid':uid,'photo_url':photo_url,'count':count,'uname':uname,'influence':influence,'fansnum':fansnum, 'friendsnum':user_friendsnum,'weibo_count':user_weibo_count})#location, iter_count += 1 return out_portrait_list else: return None
index = profile_result.index(item) tmp = [] if item['found']: item = item['_source'] tmp.append(item['uid']) tmp.append(item['nick_name']) tmp.append(item['user_location']) else: tmp.extend([_id,'','']) value = sort_list[index] tmp.append(value) results.append(tmp) if uid_list: count = 0 history_result = es_bci_history.mget(index=bci_history_index_name, doc_type=bci_history_index_type, body={"ids":uid_list})["docs"] for item in history_result: if item['found']: item = item['_source'] results[count].extend([item['user_fansnum'], item['weibo_month_sum']]) else: results[count].extend(['','']) count += 1 if uid_list: count = 0 portrait_result = es_user_portrait.mget(index=portrait_index_name, doc_type=portrait_index_type, body={"ids":uid_list})["docs"] for item in portrait_result: if item['found']: results[count].append("1") else: