def search_top_index(index_name, top_k=1, index_type="bci", top=False, sort_order="user_index"): query_body = { "query": { "match_all": {} }, "size": top_k, "sort": [{sort_order: {"order": "desc"}}] } if top: result = es.search(index=index_name, doc_type=index_type, body=query_body)['hits']['hits'][0]['_source'][sort_order] else: search_result = es.search(index=index_name, doc_type=index_type, body=query_body)['hits']['hits'] uid_list = [] for item in search_result: uid_list.append(item['_id']) profile_result = es_profile.mget(index="weibo_user",doc_type="user", body={"ids":uid_list}, _source=True)['docs'] portrait_result = es_portrait.mget(index="user_portrait", doc_type="user", body={"ids":uid_list}, _source=True)['docs'] result = [] rank = 1 for i in range(len(search_result)): info = ['','','',''] info[0] = rank if profile_result[i]['found']: info[1] = profile_result[i]['_source'].get('photo_url','') info[3] = profile_result[i]['_source'].get('nick_name','') info[2] = search_result[i].get('_id','') if sort_order in ["user_index","origin_weibo_retweeted_brust_average","origin_weibo_comment_brust_average"]: info.append(search_result[i]['_source'][sort_order]) if portrait_result[i]['found']: info.append("1") else: info.append("0") elif sort_order == "origin_weibo_retweeted_top_number": info.append(search_result[i]['_source']['origin_weibo_retweeted_top_number']) mid = search_result[i]['_source']['origin_weibo_top_retweeted_id'] info.append(weiboinfo2url(info[2],mid)) if portrait_result[i]['found']: info.append("1") else: info.append("0") elif sort_order == "origin_weibo_comment_top_number": info.append(search_result[i]['_source']['origin_weibo_comment_top_number']) mid = search_result[i]['_source']['origin_weibo_top_comment_id'] info.append(weiboinfo2url(info[2],mid)) if portrait_result[i]['found']: info.append("1") else: info.append("0") rank += 1 result.append(info) return result
def query_brust(index_name,field_name, range_1=0, range_2=50000, count=0): query_body = { "query":{ "filtered": { "query": { "match_all":{} }, "filter": { "range": { field_name: { "gte": range_1, "lt": range_2 } } } } } } if count == 1: result = es.count(index=index_name, doc_type="bci", body=query_body)['count'] return result else: query_body['size'] = 1000 result = es.search(index=index_name, doc_type="bci", body=query_body)['hits']['hits'] profile_list = [] for item in result: profile_list.append(item['_id']) return profile_list
def search_portrait_history_active_info(uid, date, index_name="copy_user_portrait", doctype="user"): # date.formate: 20130901 date_list = time_series(date) try: result = es.get(index=index_name, doc_type=doctype, id=uid, _source=True)['_source'] except NotFoundError: return "NotFound" except: return None date_max = {} for date_str in date_list: query_body = { 'query':{ 'match_all':{} }, 'size': 1, 'sort': [{date_str: {'order': 'desc'}}] } try: max_item = es.search(index=index_name, doc_type=doctype, body=query_body)['hits']['hits'] except Exception, e: raise e date_max[date_str] = max_item[0]['_source'][date_str]
def query_brust(index_name, field_name, range_1=0, range_2=50000, count=0): query_body = { "query": { "filtered": { "query": { "match_all": {} }, "filter": { "range": { field_name: { "gte": range_1, "lt": range_2 } } } } } } if count == 1: result = es.count(index=index_name, doc_type="bci", body=query_body)['count'] return result else: query_body['size'] = 1000 result = es.search(index=index_name, doc_type="bci", body=query_body)['hits']['hits'] profile_list = [] for item in result: profile_list.append(item['_id']) return profile_list
def search_k(es, index_name, index_type, start, field="user_index", size=100): query_body = { "query":{ "match_all": {} }, "size": size, "from": start, "sort": [{field: {"order": "desc"}}] } result = es.search(index=index_name, doc_type=index_type, body=query_body)['hits']['hits'] search_list = [] for item in result: search_list.append(item['_source']) return search_list
def get_evaluate_max(index_name): max_result = {} index_type = 'bci' evaluate_index = ['user_index'] for evaluate in evaluate_index: query_body = { 'query':{ 'match_all':{} }, 'size':1, 'sort':[{evaluate: {'order': 'desc'}}] } try: result = es_cluster.search(index=index_name, doc_type=index_type, body=query_body)['hits']['hits'] except Exception, e: raise e max_evaluate = result[0]['_source'][evaluate] max_result[evaluate] = max_evaluate
def search_k(es, index_name, index_type, start, field="user_index", size=100): query_body = { "query": { "match_all": {} }, "size": size, "from": start, "sort": [{ field: { "order": "desc" } }] } result = es.search(index=index_name, doc_type=index_type, body=query_body)['hits']['hits'] search_list = [] for item in result: search_list.append(item['_source']) return search_list
def search_portrait_history_active_info(uid, date, index_name=copy_portrait_index_name, doctype=copy_portrait_index_name): # date.formate: 20130901 date_list = time_series(date) try: result = es.get(index=index_name, doc_type=doctype, id=uid, _source=True)['_source'] except NotFoundError: return "NotFound" except: return None date_max = {} for date_str in date_list: query_body = { 'query': { 'match_all': {} }, 'size': 1, 'sort': [{ date_str: { 'order': 'desc' } }] } try: max_item = es.search(index=index_name, doc_type=doctype, body=query_body)['hits']['hits'] except Exception, e: raise e date_max[date_str] = max_item[0]['_source'][date_str]
def search_top_index(index_name, top_k=1, index_type="bci", top=False, sort_order="user_index"): query_body = { "query": { "match_all": {} }, "size": top_k, "sort": [{ sort_order: { "order": "desc" } }] } if top: result = es.search( index=index_name, doc_type=index_type, body=query_body)['hits']['hits'][0]['_source'][sort_order] else: search_result = es.search(index=index_name, doc_type=index_type, body=query_body)['hits']['hits'] uid_list = [] for item in search_result: uid_list.append(item['_id']) profile_result = es_profile.mget(index=profile_index_name, doc_type=profile_index_type, body={"ids": uid_list}, _source=True)['docs'] portrait_result = es_portrait.mget(index=portrait_index_name, doc_type=portrait_index_type, body={"ids": uid_list}, _source=True)['docs'] result = [] rank = 1 for i in range(len(search_result)): info = ['', '', '', ''] info[0] = rank if profile_result[i]['found']: info[1] = profile_result[i]['_source'].get('photo_url', '') info[3] = profile_result[i]['_source'].get('nick_name', '') info[2] = search_result[i].get('_id', '') if sort_order in [ "user_index", "origin_weibo_retweeted_brust_average", "origin_weibo_comment_brust_average" ]: info.append(search_result[i]['_source'][sort_order]) if portrait_result[i]['found']: info.append("1") else: info.append("0") elif sort_order == "origin_weibo_retweeted_top_number": info.append(search_result[i]['_source'] ['origin_weibo_retweeted_top_number']) mid = search_result[i]['_source'][ 'origin_weibo_top_retweeted_id'] info.append(weiboinfo2url(info[2], mid)) if portrait_result[i]['found']: info.append("1") else: info.append("0") elif sort_order == "origin_weibo_comment_top_number": info.append(search_result[i]['_source'] ['origin_weibo_comment_top_number']) mid = search_result[i]['_source'][ 'origin_weibo_top_comment_id'] info.append(weiboinfo2url(info[2], mid)) if portrait_result[i]['found']: info.append("1") else: info.append("0") rank += 1 result.append(info) return result