def search_by_xnr_number(xnr_qq_number, current_date,group_qq_name): group_qq_name_list = group_qq_name.encode('utf-8').split(',') # 用于显示操作页面初始的所有群历史信息 query_body = { "query": { "filtered":{ "filter":{ "bool":{ "must":[ {"term":{"xnr_qq_number":xnr_qq_number}}, {'terms':{'qq_group_nickname':group_qq_name_list}} ] } } } }, "size": MAX_VALUE, "sort":{"timestamp":{"order":"desc"}} } enddate = current_date startdate = ts2datetime(datetime2ts(enddate)-group_message_windowsize*DAY) index_names = get_groupmessage_index_list(startdate,enddate) print 'index_names::',index_names index_names.reverse() results = {} for index_name in index_names: # if not es_xnr.indices.exsits(index=index_name): # continue try: result = es_xnr.search(index=index_name, doc_type=group_message_index_type,body=query_body) if results != {}: results['hits']['hits'].extend(result['hits']['hits']) else: results=result #.copy() except: pass # results_new = [] # for index_name in index_names: # try: # es_results = es_xnr.search(index=index_name, doc_type=group_message_index_type,body=query_body)['hits']['hits'] # print 'es_results::',es_results # for es_result in es_results: # es_result = es_result['_source'] # results_new.append(es_result) # except: # continue return results
def search_by_period(xnr_qq_number, startdate, enddate, group_qq_name): group_qq_name_list = group_qq_name.encode('utf-8').split(',') results = {} query_body = { "query": { "filtered": { "filter": { "bool": { "must": [{ "term": { "xnr_qq_number": xnr_qq_number } }, { 'terms': { 'qq_group_nickname': group_qq_name_list } }] } } } }, "size": MAX_VALUE, "sort": { "timestamp": { "order": "desc" } } } # es.search(index=”flow_text_2013-09-02”, doc_type=”text”, body=query_body) index_names = get_groupmessage_index_list(startdate, enddate) index_names.reverse() #print 'index_names::',index_names for index_name in index_names: # if not es_xnr.indices.exsits(index_name): # continue print index_name try: result = es_xnr.search(index=index_name, doc_type=group_message_index_type, body=query_body) print result if results != {}: results['hits']['hits'].extend(result['hits']['hits']) else: results = result.copy() except: pass if results == {}: results = {'hits': {'hits': []}} return results
def search_by_xnr_number(xnr_qq_number, current_date): # 用于显示操作页面初始的所有群历史信息 query_body = { "query": { "filtered": { "filter": { "bool": { "must": [{ "term": { "xnr_qq_number": xnr_qq_number }, "term": { "sensitive_flag": 1 } }] } } } }, "size": MAX_VALUE, "sort": { "sensitive_value": { "order": "desc" } } } enddate = current_date startdate = ts2datetime( datetime2ts(enddate) - group_message_windowsize * DAY) index_names = get_groupmessage_index_list(startdate, enddate) # print index_names results = [] for index_name in index_names: # if not es_xnr.indices.exsits(index=index_name): # continue try: result = es_xnr.search(index=index_name, doc_type=group_message_index_type, body=query_body)['hits']['hits'] # if results != {}: # results['hits']['hits'].extend(result['hits']['hits']) # else: # results=result.copy() if result: for item in result: item['_source']['_id'] = item['_id'] results.append(item['_source']) else: pass except: pass # print 'results:',results return results
def search_by_period(xnr_qq_number, startdate, enddate): results = [] query_body = { "query": { "filtered": { "filter": { "bool": { "must": [{ "term": { "xnr_qq_number": xnr_qq_number }, "term": { "sensitive_flag": 1 } }] } } } }, "size": MAX_VALUE, "sort": { "timestamp": { "order": "desc" } } } # es.search(index=”flow_text_2013-09-02”, doc_type=”text”, body=query_body) index_names = get_groupmessage_index_list(startdate, enddate) for index_name in index_names: # if not es_xnr.indices.exsits(index_name): # continue try: result = es_xnr.search(index=index_name, doc_type=group_message_index_type, body=query_body)['hits']['hits'] # if results != {}: # results['hits']['hits'].extend(result['hits']['hits']) # else: # results=result.copy() if result: for item in result: item['_source']['_id'] = item['_id'] results.append(item['_source']) else: pass except: pass # if results == {}: # results={'hits':{'hits':[]}} return results
def aggr_sen_users(xnr_qq_number, startdate, enddate): # print 'startdate:',startdate,type(startdate) start_ts = datetime2ts(startdate) end_ts = datetime2ts(enddate) query_body = { "query": { "bool": { "must": [{ 'term': { 'xnr_qq_number': xnr_qq_number } }, { "term": { "sensitive_flag": 1 } }, { 'range': { 'timestamp': { 'gte': start_ts, 'lt': end_ts } } }] } }, "aggs": { "all_senusers": { # "terms":{"field": "speaker_qq_number"} "terms": { "field": "speaker_nickname" } } } } #enddate = datetime.datetime.now().strftime('%Y-%m-%d') #startdate = ts2datetime(datetime2ts(enddate)-group_message_windowsize*DAY) index_names = get_groupmessage_index_list(startdate, enddate) print index_names results = [] for index_name in index_names: try: result = es_xnr.search(index=index_name,\ doc_type=group_message_index_type,\ body=query_body)["aggregations"]["all_senusers"]["buckets"] except Exception, e: result = [] print 'index_name,result:', index_name, result if result != []: for item in result: # print 'item:',item inner_item = {} # inner_item['qq_number'] = item['key'] inner_item['qq_nick'] = item['key'] inner_item['count'] = item['doc_count'] info = get_speaker_info(item['key'], index_name) if info == {}: # inner_item['qq_nick'] = '' inner_item['qq_number'] = '' inner_item['qq_groups'] = '' inner_item['last_speak_ts'] = '' inner_item['text'] = [] else: # inner_item['qq_nick'] = info['qq_nick'] inner_item['qq_number'] = info['qq_number'] inner_item['qq_groups'] = info['qq_groups'] inner_item['last_speak_ts'] = info['last_speak_ts'] inner_item['text'] = info['text'] flag = 1 for aa in results: #检验是否已经在结果中 # if aa['qq_number'] == inner_item['qq_number']: if aa['qq_nick'] == inner_item['qq_nick']: aa['count'] += inner_item['count'] aa['last_speak_ts'] = inner_item['last_speak_ts'] aa['qq_groups'].update( inner_item['qq_groups']) # 多个群发言的更新 aa['text'].extend(inner_item['text']) flag = 0 continue if flag: results.append(inner_item)