def search_max_single_field(field, index_name, doctype, top_k=3): # field = "origin_weibo_retweeted_top_number", "origin_weibo_comment_top_number" query_body = { "query": { "match_all": {} }, "sort": [{field: {"order": "desc"}}], "size": top_k } return_list = [] rank = 1 count_c = 0 start = 0 while 1: search_list = [] user_list = search_k(es, index_name, doctype, start, field, 100) start += 100 for item in user_list: uid = item.get('user','0') search_list.append(uid) # uid list search_result = es_portrait.mget(index="user_portrait", doc_type="user", body={"ids": search_list}, _source=True)["docs"] profile_result = es_profile.mget(index="weibo_user", doc_type="user", body={"ids": search_list}, _source=True)["docs"] for i in range(len(search_result)): if search_result[i]['found']: info = ['','','','','','','1'] info[0] = rank info[2] = search_result[i].get('_id','') if profile_result[i]['found']: info[1] = profile_result[i]['_source'].get('photo_url','') info[3] = profile_result[i]['_source'].get('nick_name','') if 'retweeted' in field: temp_mid = user_list[i]['origin_weibo_top_retweeted_id'] info[5] = weiboinfo2url(info[2], temp_mid) info[4] = user_list[i]['origin_weibo_retweeted_top_number'] else: temp_mid = user_list[i]['origin_weibo_top_comment_id'] info[5] = weiboinfo2url(info[2], temp_mid) info[4] = user_list[i]['origin_weibo_comment_top_number'] rank += 1 return_list.append(info) if rank >= int(top_k)+1: return return_list
def search_tag(es, number, active_index, active_type, portrait_index, portrait_type, tag): #field_dict = {"domain":"art"} return_list = [] count_s = 0 count_c = 0 start = 0 rank = 1 while 1: search_list = [] user_list = search_k(es, active_index, active_type, start, "user_index", 10000) start += 10000 for item in user_list: uid = item.get('user', '0') search_list.append(uid) # uid list search_result = es_portrait.mget(index=portrait_index, doc_type=portrait_type, body={"ids": search_list}, _source=True)["docs"] profile_result = es_profile.mget(index="weibo_user", doc_type="user", body={"ids": search_list}, _source=True)["docs"] for item in search_result: count_s += 1 if item['found'] and tag in item['_source']['domain']: info = ['', '', '', '', '', '', ''] info[0] = rank index = search_result.index(item) if profile_result[index]['found']: info[1] = profile_result[index]['_source'].get( 'photo_url', '') info[3] = profile_result[index]['_source'].get( 'nick_name', '') info[2] = search_result[index].get('_id', '') info[4] = user_list[index]['user_index'] info[5] = search_result[index]['_source'].get('activeness', '') info[6] = search_result[index]['_source'].get('importance', '') rank += 1 return_list.append(info) if rank >= int(number) + 1: return return_list if count_s > 100000: return return_list
def search_tag(es, number, active_index, active_type, portrait_index, portrait_type, field_dict): #field_dict = {"domain":"art"} return_list = [] count_s = 0 count_c = 0 start = 0 rank = 1 while 1: search_list = [] user_list = search_k(es, active_index, active_type, start, "user_index", 1000) start += 1000 for item in user_list: uid = item.get('user', '0') search_list.append(uid) # uid list search_result = es_portrait.mget(index=portrait_index, doc_type=portrait_type, body={"ids": search_list}, _source=True)["docs"] profile_result = es_profile.mget(index="weibo_user", doc_type="user", body={"ids": search_list}, _source=True)["docs"] for item in search_result: count_s += 1 if item['found'] and field_dict.values()[0] in item['_source'][field_dict.keys()[0]]: info = ['','','','','','',''] info[0] = rank index = search_result.index(item) if profile_result[index]['found']: info[1] = profile_result[index]['_source'].get('photo_url','') info[3] = profile_result[index]['_source'].get('nick_name','') info[2] = search_result[index].get('_id','') info[4] = user_list[index]['user_index'] info[5] = search_result[index]['_source'].get('activeness','') info[6] = search_result[index]['_source'].get('importance','') rank += 1 return_list.append(info) if rank >= int(number)+1: return return_list if count_s > 10000: return return_list
def search_max_single_field(field, index_name, doctype, top_k=3): # field = "origin_weibo_retweeted_top_number", "origin_weibo_comment_top_number" query_body = { "query": { "match_all": {} }, "sort": [{ field: { "order": "desc" } }], "size": top_k } return_list = [] rank = 1 count_c = 0 start = 0 while 1: search_list = [] user_list = search_k(es, index_name, doctype, start, field, 100) start += 100 for item in user_list: uid = item.get('user', '0') search_list.append(uid) # uid list search_result = es_portrait.mget(index=portrait_index_name, doc_type=portrait_index_type, body={"ids": search_list}, _source=True)["docs"] profile_result = es_profile.mget(index=profile_index_name, doc_type=profile_index_type, body={"ids": search_list}, _source=True)["docs"] for i in range(len(search_result)): if search_result[i]['found']: info = ['', '', '', '', '', '', '1'] info[0] = rank info[2] = search_result[i].get('_id', '') if profile_result[i]['found']: info[1] = profile_result[i]['_source'].get('photo_url', '') info[3] = profile_result[i]['_source'].get('nick_name', '') if 'retweeted' in field: temp_mid = user_list[i]['origin_weibo_top_retweeted_id'] info[5] = weiboinfo2url(info[2], temp_mid) info[4] = user_list[i]['origin_weibo_retweeted_top_number'] else: temp_mid = user_list[i]['origin_weibo_top_comment_id'] info[5] = weiboinfo2url(info[2], temp_mid) info[4] = user_list[i]['origin_weibo_comment_top_number'] rank += 1 return_list.append(info) if rank >= int(top_k) + 1: return return_list