def search_sensitive_weibo(index_name): # sensitive weibo user recommend query_body = { "query": { "filtered": { "filter": { "bool": { "should": [{ "range": { "s_retweeted_weibo_number": { "gt": 0 } } }, { "range": { "s_origin_weibo_number": { "gt": 0 } } }] } } } }, "size": 10000000 } result = es_cluster.search(index=index_name, doc_type="bci", body=query_body)['hits']['hits'] sensitive_uid = [] for item in result: sensitive_uid.append(item['_source']['uid']) return sensitive_uid
def search_rank(index_name, start_point, size, index_type="bci"): query_body={ "query": { "match_all": {} }, "sort": [{"user_index": {"order": "desc"}}], "from": start_point, "size": size } result = es.search(index=index_name, doc_type=index_type, body=query_body, _source=False)['hits']['hits'] return result
def search_sensitive_weibo(index_name): query_body={ "query":{ "match_all":{} }, "sort":{'s_origin_weibo_comment_top_number':{"order": "desc"}}, "size":2000 } result = es_cluster.search(index=index_name, doc_type="bci", body=query_body)['hits']['hits'] sensitive_uid = [] for item in result: sensitive_uid.append(item['_source']['uid']) return sensitive_uid
def search_from_es(date): index_time = 'bci_' + date.replace('-', '') index_type = 'bci' print index_time query_body = { 'query':{ 'match_all':{} }, 'size':k, 'sort':[{'user_index':{'order':'desc'}}] } result = es_bci.search(index=index_time, doc_type=index_type, body=query_body, _source=False, fields=['user_index'])['hits']['hits'] user_set = [] user_set = [user_dict['_id'] for user_dict in result] return set(user_set), result
def search_top_k(index_name, top_k): # top_k recommend query_body={ "query":{ "match_all":{} }, "size":top_k, "sort": [{"user_index": {"order": "desc"}}] } result = es_cluster.search(index=index_name,doc_type="bci", body=query_body)["hits"]["hits"] sensitive_uid = [] for item in result: sensitive_uid.append(item['_source']['uid']) return sensitive_uid
def get_evaluate_max(index_name): max_result = {} index_type = 'bci' evaluate_index = ['user_index'] for evaluate in evaluate_index: query_body = { 'query':{ 'match_all':{} }, 'size':1, 'sort':[{evaluate: {'order': 'desc'}}] } try: result = es_cluster.search(index=index_name, doc_type=index_type, body=query_body)['hits']['hits'] except Exception, e: raise e max_evaluate = result[0]['_source'][evaluate] max_result[evaluate] = max_evaluate
def search_rank(index_name, start_point, size, index_type="bci"): query_body = { "query": { "match_all": {} }, "sort": [{ "user_index": { "order": "desc" } }], "from": start_point, "size": size } result = es.search(index=index_name, doc_type=index_type, body=query_body, _source=False)['hits']['hits'] return result
def search_top_k(index_name, top_k): # top_k recommend query_body = { "query": { "match_all": {} }, "size": top_k, "sort": [{ "user_index": { "order": "desc" } }] } result = es_cluster.search(index=index_name, doc_type="bci", body=query_body)["hits"]["hits"] sensitive_uid = [] for item in result: sensitive_uid.append(item['_source']['uid']) return sensitive_uid
def search_from_es(date): index_time = 'bci_' + date.replace('-', '') index_type = 'bci' print index_time query_body = { 'query': { 'match_all': {} }, 'size': k, 'sort': [{ 'user_index': { 'order': 'desc' } }] } result = es_bci.search(index=index_time, doc_type=index_type, body=query_body, _source=False, fields=['user_index'])['hits']['hits'] user_set = [] user_set = [user_dict['_id'] for user_dict in result] return set(user_set), result
def search_sensitive_weibo(index_name): # sensitive weibo user recommend query_body={ "query":{ "filtered":{ "filter":{ "bool":{ "should":[ {"range":{"s_retweeted_weibo_number":{"gt":0}}}, {"range":{"s_origin_weibo_number":{"gt":0}}} ] } } } }, "size":10000000 } result = es_cluster.search(index=index_name, doc_type="bci", body=query_body)['hits']['hits'] sensitive_uid = [] for item in result: sensitive_uid.append(item['_source']['uid']) return sensitive_uid