def utils_search(wxbot_id, period, startdate='', enddate=''): start_ts, end_ts, period = dump_date(period, startdate, enddate) index_names = get_wx_groupmessage_index_list(ts2datetime(start_ts), ts2datetime(end_ts)) index_names.reverse() xnr_puid = load_wxxnr_redis_data(wxbot_id=wxbot_id, items=['puid'])['puid'] query_body = { "query": { "filtered":{ "filter":{ "bool":{ "must":[ {"term":{"xnr_id": xnr_puid}}, {"term":{"sensitive_flag": 1}}, {'term':{'msg_type':'Text'.lower()}} ] } } } }, "size": MAX_VALUE, "sort":{"sensitive_value":{"order":"desc"}} } results = [] for index_name in index_names: try: search_result = es_xnr.search(index=index_name, doc_type=wx_group_message_index_type,body=query_body) if search_result: results.extend(search_result['hits']['hits']) except Exception,e: pass
def utils_show_report_content(wxbot_id, report_type, period, startdate, enddate): start_ts, end_ts, period = dump_date(period, startdate, enddate) xnr_puid = load_wxxnr_redis_data(wxbot_id=wxbot_id, items=['puid'])['puid'] result = [] query_body = { 'query':{ 'bool':{ 'must':[ {'term':{'report_type': report_type}}, {'range':{'report_time':{'gte':start_ts, 'lte':end_ts}}}, {'term':{'xnr_user_no': wxbot_id}}, {'term':{'xnr_puid': xnr_puid}} ] } }, 'size': MAX_SEARCH_SIZE, 'sort': [{'report_time':{'order':'desc'}}] } try: wx_report_management_mappings() es_result = es_xnr.search(index=wx_report_management_index_name, doc_type=wx_report_management_index_type, body=query_body)['hits']['hits'] if es_result: result = [item['_source'] for item in es_result] except Exception,e: print e
def utils_show_sensitive_users(wxbot_id, period, startdate='', enddate=''): start_ts, end_ts, period = dump_date(period, startdate, enddate) index_names = get_wx_groupmessage_index_list(ts2datetime(start_ts), ts2datetime(end_ts)) index_names.reverse() xnr_puid = load_wxxnr_redis_data(wxbot_id=wxbot_id, items=['puid'])['puid'] query_body = { "query": { "filtered":{ "filter":{ "bool":{ "must":[ {"term":{"xnr_id": xnr_puid}}, {"term":{"sensitive_flag": 1}}, {'term':{'msg_type':'Text'.lower()}} ] } } } }, "aggs":{ "sen_users":{ "terms":{"field": "speaker_id"} } }, "sort":{"timestamp":{"order":"desc"}} } sensitive_users = {} for index_name in index_names: try: search_result = es_xnr.search(index=index_name, doc_type=wx_group_message_index_type,body=query_body) if search_result: res = search_result['aggregations']['sen_users']['buckets'] docs = search_result['hits']['hits'] for r in res: groups_list = [] speaker_id = r['key'] count = r['doc_count'] for doc in docs: if doc['_source']['speaker_id'] == speaker_id: groups_list.append(doc['_source']['group_name']) if speaker_id in sensitive_users: #update: groups&count。因为是倒序查询,所以last_speak_ts在最初创建的时候就是最终的值,不需要更新。 sensitive_users[speaker_id]['count'] += count sensitive_users[speaker_id]['groups_list'].extend(groups_list) else: #匹配第一条即可 for doc in docs: if doc['_source']['speaker_id'] == speaker_id: nickname = doc['_source']['speaker_name'] last_speak_ts = doc['_source']['timestamp'] break sensitive_users[speaker_id] = { 'nickname': nickname, 'count': count, 'last_speak_ts': last_speak_ts, 'groups_list': groups_list } except Exception,e: pass
def utils_show_report_content(wxbot_id, report_type, period, startdate, enddate): start_ts, end_ts, period = dump_date(period, startdate, enddate) xnr_puid = load_wxxnr_redis_data(wxbot_id=wxbot_id, items=['puid'])['puid'] result = [] query_body = { 'query': { 'bool': { 'must': [{ 'term': { 'report_type': report_type } }, { 'range': { 'report_time': { 'gte': start_ts, 'lte': end_ts } } }, { 'term': { 'xnr_user_no': wxbot_id } }, { 'term': { 'xnr_puid': xnr_puid } }] } }, 'size': MAX_SEARCH_SIZE, 'sort': [{ 'report_time': { 'order': 'desc' } }] } try: wx_report_management_mappings() es_result = es_xnr.search(index=wx_report_management_index_name, doc_type=wx_report_management_index_type, body=query_body)['hits']['hits'] if es_result: for item in es_result: try: data = item['_source'] data['_id'] = item['_id'] report_content = eval(item['_source']['report_content']) data['sensitive_value'] = report_content['sensitive_value'] data['text'] = report_content['text'] data['sensitive_words_string'] = report_content[ 'sensitive_words_string'].decode('utf8') data.pop('report_content') result.append(data) except: pass except Exception, e: print 'wx_report_management Exception: ', str(e)
def utils_get_safe(wxbot_id, period, startdate, enddate): start_ts, end_ts, period = dump_date(period, startdate, enddate) current_timestamp = int(time.time()) current_date = ts2datetime(current_timestamp) if period == 0: #获取今天的数据 current_time = datetime2ts(current_date) last_date = ts2datetime(current_time-DAY) speak_dict = {} speak_dict['speak_day'] = {} speak_dict['speak_total'] = {} xnr_puid = load_wxxnr_redis_data(wxbot_id=wxbot_id, items=['puid'])['puid'] #获取xnr今日发言总数 today_count = 0 query_body = { 'query':{ 'bool':{ 'must':[ {'term':{'speaker_id': xnr_puid}}, {'term':{'xnr_id':xnr_puid}} ] } } } today_index_name = wx_group_message_index_name_pre + current_date try: today_count_result = es_xnr.count(index=today_index_name,doc_type=wx_group_message_index_type,body=query_body) if today_count_result['_shards']['successful'] != 0: today_count = today_count_result['count'] except Exception,e: pass #print 'today_count Exception: ', str(e) #获取xnr历史发言总数 total_count = 0 total_query_body = { 'query':{ 'bool':{ 'must':[ {'term':{'xnr_user_no': wxbot_id}}, {'term':{'puid':xnr_puid}}, {'term':{'date_time':last_date}} ] } } } total_index_name = wx_xnr_history_count_index_name try: total_count_result = es_xnr.search(index=total_index_name,doc_type=wx_xnr_history_count_index_type,body=total_query_body) if total_count_result['_shards']['successful'] != 0: total_count = total_count_result['hits']['hits'][0]['_source']['total_post_num'] except Exception,e: pass
def get_penetration_num(xnr_user_no): current_timestamp = int(time.time() - DAY) current_date = ts2datetime(current_timestamp) current_time = datetime2ts(current_date) xnr_data = load_wxxnr_redis_data(wxbot_id=xnr_user_no, items=['puid', 'groups_list']) puid = xnr_data['puid'] group_list = xnr_data['groups_list'] #查询1 sensitive_value = 0 wx_group_message_index_name = wx_group_message_index_name_pre + current_date query_body_info = { 'query': { 'filtered': { 'filter': { 'bool': { 'must': [{ 'terms': { 'group_id': group_list } }, { 'range': { 'sensitive_value': { 'gte': -1 } } }] } } } }, 'aggs': { 'avg_sensitive': { 'avg': { 'field': 'sensitive_value' } } } } try: es_sensitive_result = es_xnr.search( index=wx_group_message_index_name, doc_type=wx_group_message_index_type, body=query_body_info)['aggregations'] sensitive_value = es_sensitive_result['avg_sensitive']['value'] if sensitive_value == None: sensitive_value = 0 except Exception, e: print 'sensitive_value Exception: ', str(e)
def utils_get_influence(wxbot_id, period, startdate, enddate): start_ts, end_ts, period = dump_date(period, startdate, enddate) current_timestamp = int(time.time()) current_date = ts2datetime(current_timestamp) if period == 0: #获取今天的数据 xnr_puid = load_wxxnr_redis_data(wxbot_id=wxbot_id, items=['puid'])['puid'] current_time = datetime2ts(current_date) query_at_num = { 'query': { 'bool': { 'must': [{ 'term': { 'xnr_id': xnr_puid } }, { 'term': { 'at_flag': 1 } }] } } } #虚拟人今天被@数量 at_num_xnr = 0 wx_group_message_index_name = wx_group_message_index_name_pre + current_date try: results_xnr = es_xnr.count(index=wx_group_message_index_name, doc_type=wx_group_message_index_type, body=query_at_num) if results_xnr['_shards']['successful'] != 0: at_num_xnr = results_xnr['count'] except Exception, e: print 'at_num_xnr Exception: ', str(e) # 截止目前所有被@总数 at_num_total = 0 wx_group_message_index_list = get_wx_groupmessage_index_list( WX_GROUP_MESSAGE_START_DATE_ASSESSMENT, ts2datetime(current_time)) for index_name in wx_group_message_index_list: try: r = es_xnr.count(index=index_name, doc_type=wx_group_message_index_type, body=query_at_num) if r['_shards']['successful'] != 0: at_num_total += r['count'] except Exception, e: pass
def utils_report_warning_content(wxbot_id, report_type, report_time, speaker_id, wx_content_info_str): xnr_puid = load_wxxnr_redis_data(wxbot_id=wxbot_id, items=['puid'])['puid'] report_dict = { 'report_type': report_type, 'report_time': report_time, 'xnr_user_no': wxbot_id, 'xnr_puid': xnr_puid, 'speaker_id': speaker_id, 'report_content': wx_content_info_str } report_id = wxbot_id + '_' + str(report_time) mark = 0 try: wx_report_management_mappings() es_xnr.index(index=wx_report_management_index_name, doc_type=wx_report_management_index_type, id=report_id,body=report_dict) mark = 1 except Exception,e: print e
def utils_get_penetration(wxbot_id, period, startdate, enddate): start_ts, end_ts, period = dump_date(period, startdate, enddate) current_timestamp = int(time.time()) current_date = ts2datetime(current_timestamp) if period == 0: #获取今天的数据 current_time = datetime2ts(current_date) xnr_data = load_wxxnr_redis_data(wxbot_id=wxbot_id, items=['puid', 'groups_list']) puid = xnr_data['puid'] group_list = xnr_data['groups_list'] #查询1 sensitive_value = 0 wx_group_message_index_name = wx_group_message_index_name_pre + current_date query_body_info = { 'query': { 'filtered': { 'filter': { 'bool': { 'must': [{ 'terms': { 'group_id': group_list } }, { 'range': { 'sensitive_value': { 'gte': -1 } } }] } } } }, 'aggs': { 'avg_sensitive': { 'avg': { 'field': 'sensitive_value' } } } } try: es_sensitive_result = es_xnr.search( index=wx_group_message_index_name, doc_type=wx_group_message_index_type, body=query_body_info)['aggregations'] sensitive_value = es_sensitive_result['avg_sensitive']['value'] if sensitive_value == None: sensitive_value = 0 except Exception, e: print 'sensitive_value Exception: ', str(e) #查询2 max_sensitive = 0 query_body_max = { "query": { "filtered": { "filter": { "bool": { "must": [ { 'terms': { 'group_id': group_list } }, { "range": { "sensitive_value": { #不会写exists语句,就用这个代替吧 "gte": -1 } } } ] } } } }, 'sort': { 'sensitive_value': { 'order': 'desc' } } } try: max_results = es_xnr.search(index=wx_group_message_index_name,doc_type=wx_group_message_index_type,\ body=query_body_max)['hits']['hits'] max_sensitive = max_results[0]['_source']['sensitive_value'] except Exception, e: print 'max_sensitive Exception: ', str(e)
def utils_get_safe(wxbot_id, period, startdate, enddate): start_ts, end_ts, period = dump_date(period, startdate, enddate) current_timestamp = int(time.time()) current_date = ts2datetime(current_timestamp) if period == 0: #获取今天的数据 current_time = datetime2ts(current_date) last_date = ts2datetime(current_time-DAY) speak_dict = {} speak_dict['speak_day'] = {} speak_dict['speak_total'] = {} xnr_puid = load_wxxnr_redis_data(wxbot_id=wxbot_id, items=['puid'])['puid'] #获取今日发言总数 query_body = { 'query':{ 'bool':{ 'must':[ {'term':{'speaker_id': xnr_puid}}, {'term':{'xnr_id':xnr_puid}} ] } } } today_index_name = wx_group_message_index_name_pre + current_date today_count_result = es_xnr.count(index=today_index_name,doc_type=wx_group_message_index_type,body=query_body) if today_count_result['_shards']['successful'] != 0: today_count = today_count_result['count'] else: print 'es index rank error' today_count = 0 #获取历史发言总数 total_query_body = { 'query':{ 'bool':{ 'must':[ {'term':{'xnr_user_no': wxbot_id}}, {'term':{'puid':xnr_puid}}, {'term':{'date_time':last_date}} ] } } } total_index_name = wx_xnr_history_count_index_name try: total_count_result = es_xnr.search(index=total_index_name,doc_type=wx_xnr_history_count_index_type,body=total_query_body) if total_count_result['_shards']['successful'] != 0: total_count = total_count_result['hits']['hits'][0]['_source']['total_post_num'] except Exception,e: print e total_count = 0 #包括今天在内的发言总数 total_count_totay = total_count + today_count #发言次数最大值 query_body_total_day = { 'query':{ 'filtered':{ 'filter':{ 'term':{'xnr_id':xnr_puid} } } }, 'aggs':{ 'all_speakers':{ 'terms':{'field':'speaker_id',"order" : { "_count" : "desc" }} } } } try: results_total_day = es_xnr.search(index=wx_group_message_index_name,doc_type=wx_group_message_index_type,\ body=query_body_total_day)['aggregations']['all_speakers']['buckets'] speaker_max = results_total_day[0]['doc_count'] except: speaker_max = today_count #整合 speak_dict = dict() speak_dict['speak_today'] = {} speak_dict['speak_total'] = {} speak_dict['speak_today'][current_time] = today_count speak_dict['speak_total'][current_time] = total_count_totay safe_active = (float(math.log(today_count+1))/(math.log(speaker_max+1)+1))*100 safe_active = round(safe_active,2) # 保留两位小数 speak_dict['mark'] = safe_active return speak_dict
def utils_get_influence(wxbot_id, period, startdate, enddate): start_ts, end_ts, period = dump_date(period, startdate, enddate) current_timestamp = int(time.time()) current_date = ts2datetime(current_timestamp) if period == 0: #获取今天的数据 xnr_puid = load_wxxnr_redis_data(wxbot_id=wxbot_id, items=['puid'])['puid'] current_time = datetime2ts(current_date) query_at_num = { 'query':{ 'bool':{ 'must':[ {'term':{'xnr_id':xnr_puid}}, {'term':{'at_flag':1}} ] } } } #虚拟人今天被@数量 wx_group_message_index_name = wx_group_message_index_name_pre + current_date try: results_xnr = es_xnr.count(index=wx_group_message_index_name,doc_type=wx_group_message_index_type,body=query_at_num) if results_xnr['_shards']['successful'] != 0: at_num_xnr = results_xnr['count'] else: print 'es index rank error' at_num_xnr = 0 except: at_num_xnr = 0 # 截止目前所有被@总数 wx_group_message_index_list = get_wx_groupmessage_index_list(WX_GROUP_MESSAGE_START_DATE_ASSESSMENT,ts2datetime(current_time)) at_num_total = 0 for index_name in wx_group_message_index_list: r = es_xnr.count(index=wx_group_message_index_name,doc_type=wx_group_message_index_type,body=query_at_num) if r['_shards']['successful'] != 0: at_num_total += r['count'] #查询所有人被@的次数 query_body_total_day = { 'query':{ 'bool':{ 'must':[ {'term':{'xnr_id':xnr_puid}}, {'wildcard':{'text':'*'+'@'+'*'}} ] } } } try: results_total_day = es_xnr.count(index=wx_group_message_index_name,doc_type=wx_group_message_index_type,body=query_body_total_day) if results_total_day['_shards']['successful'] != 0: at_num_total_day = results_total_day['count'] else: print 'es index rank error' at_num_total_day = 0 except: at_num_total_day = 0 #统计 at_dict = {} at_dict['at_day'] = {} at_dict['at_total'] = {} at_dict['at_day'][current_time] = at_num_xnr at_dict['at_total'][current_time] = at_num_total influence = (float(math.log(at_num_xnr+1))/(math.log(at_num_total_day+1)+1))*100 influence = round(influence,2) # 保留两位小数 at_dict['mark'] = influence return at_dict else: at_dict = {} at_dict['at_day'] = {} at_dict['at_total'] = {} query_body = { 'query':{ 'filtered':{ 'filter':{ 'bool':{ 'must':[ {'term':{'xnr_user_no':wxbot_id}}, {'range':{'timestamp':{'gte':start_ts,'lte':end_ts}}} ] } } } }, 'size':MAX_SEARCH_SIZE, 'sort':{'timestamp':{'order':'asc'}} } search_results = es_xnr.search(index=wx_xnr_history_count_index_name,doc_type=wx_xnr_history_count_index_type,\ body=query_body)['hits']['hits'] #初始化 ts_list = load_timestamp_list(start_ts, end_ts) for ts in ts_list: at_dict['at_day'][ts] = 0 at_dict['at_total'][ts] = 0 at_dict['mark'] = 0 #填充数据 for result in search_results: result = result['_source'] timestamp = result['timestamp'] at_dict['at_day'][timestamp] = result['daily_be_at_num'] at_dict['at_total'][timestamp] = result['total_be_at_num'] at_dict['mark'] = result['influence'] return at_dict
def utils_get_penetration(wxbot_id, period, startdate, enddate): start_ts, end_ts, period = dump_date(period, startdate, enddate) current_timestamp = int(time.time()) current_date = ts2datetime(current_timestamp) if period == 0 : #获取今天的数据 current_time = datetime2ts(current_date) xnr_data = load_wxxnr_redis_data(wxbot_id=wxbot_id, items=['puid','groups_list']) puid = xnr_data['puid'] group_list = xnr_data['groups_list'] #查询1 wx_group_message_index_name = wx_group_message_index_name_pre + current_date query_body_info = { 'query':{ 'filtered':{ 'filter':{ 'terms':{'group_id':group_list} } } }, 'aggs':{ 'avg_sensitive':{ 'avg':{ 'field':'sensitive_value' } } } } try: es_sensitive_result = es_xnr.search(index=wx_group_message_index_name,doc_type=wx_group_message_index_type,body=query_body_info)['aggregations'] sensitive_value = es_sensitive_result['avg_sensitive']['value'] if sensitive_value == None: sensitive_value = 0 except: sensitive_value = 0 #查询2 query_body_max = { 'query':{ 'filtered':{ 'filter':{ 'terms':{'group_id':group_list} } } }, 'sort':{'sensitive_value':{'order':'desc'}} } try: max_results = es_xnr.search(index=group_message_index_name,doc_type=group_message_index_type,\ body=query_body_max)['hits']['hits'] max_sensitive = max_results[0]['_source']['sensitive_value'] except: max_sensitive = 0 #统计 follow_group_sensitive = {'sensitive_info': {current_time: sensitive_value}} penetration = (math.log(sensitive_value+1)/(math.log(max_sensitive+1)+1))*100 penetration = round(penetration,2) follow_group_sensitive['mark'] = penetration return follow_group_sensitive else: follow_group_sensitive = {} follow_group_sensitive['sensitive_info'] = {} query_body = { 'query':{ 'filtered':{ 'filter':{ 'bool':{ 'must':[ {'term':{'xnr_user_no':wxbot_id}}, {'range':{'timestamp':{'gte':start_ts,'lte':end_ts}}} ] } } } }, 'size':MAX_SEARCH_SIZE, 'sort':{'timestamp':{'order':'asc'}} } search_results = es_xnr.search(index=wx_xnr_history_count_index_name,doc_type=wx_xnr_history_count_index_type,\ body=query_body)['hits']['hits'] #初始化 ts_list = load_timestamp_list(start_ts, end_ts) for ts in ts_list: follow_group_sensitive['sensitive_info'][ts] = 0 follow_group_sensitive['mark'] = 0 #填充数据 for result in search_results: result = result['_source'] timestamp = result['timestamp'] follow_group_sensitive['sensitive_info'][timestamp] = result['daily_sensitive_num'] follow_group_sensitive['mark'] = result['penetration'] return follow_group_sensitive