Ejemplo n.º 1
0
def utils_search(wxbot_id, period, startdate='', enddate=''):
    start_ts, end_ts, period = dump_date(period, startdate, enddate)
    index_names = get_wx_groupmessage_index_list(ts2datetime(start_ts), ts2datetime(end_ts))
    index_names.reverse()
    xnr_puid = load_wxxnr_redis_data(wxbot_id=wxbot_id, items=['puid'])['puid']
    query_body = {
        "query": {
            "filtered":{
                "filter":{
                    "bool":{
                        "must":[
                            {"term":{"xnr_id": xnr_puid}},
                            {"term":{"sensitive_flag": 1}},
                            {'term':{'msg_type':'Text'.lower()}}
                        ]
                    }
                }
            }
        },
        "size": MAX_VALUE,
        "sort":{"sensitive_value":{"order":"desc"}}
    }
    results = []
    for index_name in index_names:
        try:
            search_result = es_xnr.search(index=index_name, doc_type=wx_group_message_index_type,body=query_body)
            if search_result:
                results.extend(search_result['hits']['hits'])
        except Exception,e:
            pass
Ejemplo n.º 2
0
def utils_show_sensitive_users(wxbot_id, period, startdate='', enddate=''):
    start_ts, end_ts, period = dump_date(period, startdate, enddate)
    index_names = get_wx_groupmessage_index_list(ts2datetime(start_ts), ts2datetime(end_ts))
    index_names.reverse()
    xnr_puid = load_wxxnr_redis_data(wxbot_id=wxbot_id, items=['puid'])['puid']
    query_body = {
        "query": {
            "filtered":{
                "filter":{
                    "bool":{
                        "must":[
                            {"term":{"xnr_id": xnr_puid}},
                            {"term":{"sensitive_flag": 1}},
                            {'term':{'msg_type':'Text'.lower()}}
                        ]
                    }
                }
            }
        },
        "aggs":{
            "sen_users":{
                "terms":{"field": "speaker_id"}
            }
        },
        "sort":{"timestamp":{"order":"desc"}}
    }
    sensitive_users = {}
    for index_name in index_names:
        try:
            search_result = es_xnr.search(index=index_name, doc_type=wx_group_message_index_type,body=query_body)
            if search_result:
                res = search_result['aggregations']['sen_users']['buckets']
                docs = search_result['hits']['hits']
                for r in res:
                    groups_list = []
                    speaker_id = r['key']
                    count = r['doc_count']
                    for doc in docs:
                        if doc['_source']['speaker_id'] == speaker_id:
                            groups_list.append(doc['_source']['group_name'])
                    if speaker_id in sensitive_users:
                        #update: groups&count。因为是倒序查询,所以last_speak_ts在最初创建的时候就是最终的值,不需要更新。
                        sensitive_users[speaker_id]['count'] += count
                        sensitive_users[speaker_id]['groups_list'].extend(groups_list)
                    else:
                        #匹配第一条即可
                        for doc in docs:
                            if doc['_source']['speaker_id'] == speaker_id:
                                nickname = doc['_source']['speaker_name']
                                last_speak_ts = doc['_source']['timestamp']
                                break
                        sensitive_users[speaker_id] = {
                            'nickname': nickname,
                            'count': count,
                            'last_speak_ts': last_speak_ts,
                            'groups_list': groups_list
                        }
        except Exception,e:
            pass
Ejemplo n.º 3
0
def utils_get_influence(wxbot_id, period, startdate, enddate):
    start_ts, end_ts, period = dump_date(period, startdate, enddate)
    current_timestamp = int(time.time())
    current_date = ts2datetime(current_timestamp)
    if period == 0:  #获取今天的数据
        xnr_puid = load_wxxnr_redis_data(wxbot_id=wxbot_id,
                                         items=['puid'])['puid']
        current_time = datetime2ts(current_date)
        query_at_num = {
            'query': {
                'bool': {
                    'must': [{
                        'term': {
                            'xnr_id': xnr_puid
                        }
                    }, {
                        'term': {
                            'at_flag': 1
                        }
                    }]
                }
            }
        }
        #虚拟人今天被@数量
        at_num_xnr = 0
        wx_group_message_index_name = wx_group_message_index_name_pre + current_date
        try:
            results_xnr = es_xnr.count(index=wx_group_message_index_name,
                                       doc_type=wx_group_message_index_type,
                                       body=query_at_num)
            if results_xnr['_shards']['successful'] != 0:
                at_num_xnr = results_xnr['count']
        except Exception, e:
            print 'at_num_xnr Exception: ', str(e)

        # 截止目前所有被@总数
        at_num_total = 0
        wx_group_message_index_list = get_wx_groupmessage_index_list(
            WX_GROUP_MESSAGE_START_DATE_ASSESSMENT, ts2datetime(current_time))
        for index_name in wx_group_message_index_list:
            try:
                r = es_xnr.count(index=index_name,
                                 doc_type=wx_group_message_index_type,
                                 body=query_at_num)
                if r['_shards']['successful'] != 0:
                    at_num_total += r['count']
            except Exception, e:
                pass
Ejemplo n.º 4
0
        }
    }
    #虚拟人今天被@数量
    at_num_xnr = 0
    wx_group_message_index_name = wx_group_message_index_name_pre + current_date
    try:
        results_xnr = es_xnr.count(index=wx_group_message_index_name,doc_type=wx_group_message_index_type,body=query_at_num)
        if results_xnr['_shards']['successful'] != 0:
           at_num_xnr = results_xnr['count']
    except Exception,e:
        #print 'at_num_xnr Exception: ', str(e)
		pass

    # 截止目前所有被@总数
    at_num_total = 0
    wx_group_message_index_list = get_wx_groupmessage_index_list(WX_GROUP_MESSAGE_START_DATE_ASSESSMENT,ts2datetime(current_time))
    for index_name in wx_group_message_index_list:
		try:
    		r = es_xnr.count(index=index_name,doc_type=wx_group_message_index_type,body=query_at_num)
        	if r['_shards']['successful'] != 0:
            	at_num_total += r['count']
    	except Exception,e:
	    	pass


    #查询所有人被@的次数
    at_num_total_day = 0
    query_body_total_day = {
        'query':{
            'bool':{
                'must':[
Ejemplo n.º 5
0
def utils_get_influence(wxbot_id, period, startdate, enddate):
    start_ts, end_ts, period = dump_date(period, startdate, enddate)
    current_timestamp = int(time.time())
    current_date = ts2datetime(current_timestamp)
    if period == 0:    #获取今天的数据
        xnr_puid = load_wxxnr_redis_data(wxbot_id=wxbot_id, items=['puid'])['puid']
        current_time = datetime2ts(current_date)
        query_at_num = {
            'query':{
                'bool':{
                    'must':[
                        {'term':{'xnr_id':xnr_puid}},
                        {'term':{'at_flag':1}}
                    ]
                }
            }
        }
        #虚拟人今天被@数量
        wx_group_message_index_name = wx_group_message_index_name_pre + current_date
        try:
            results_xnr = es_xnr.count(index=wx_group_message_index_name,doc_type=wx_group_message_index_type,body=query_at_num)
            if results_xnr['_shards']['successful'] != 0:
               at_num_xnr = results_xnr['count']
            else:
                print 'es index rank error'
                at_num_xnr = 0
        except:
            at_num_xnr = 0
        # 截止目前所有被@总数
        wx_group_message_index_list = get_wx_groupmessage_index_list(WX_GROUP_MESSAGE_START_DATE_ASSESSMENT,ts2datetime(current_time))
        at_num_total = 0
        for index_name in wx_group_message_index_list:
            r = es_xnr.count(index=wx_group_message_index_name,doc_type=wx_group_message_index_type,body=query_at_num)
            if r['_shards']['successful'] != 0:
                at_num_total += r['count']
        #查询所有人被@的次数
        query_body_total_day = {
            'query':{
                'bool':{
                    'must':[
                        {'term':{'xnr_id':xnr_puid}},
                        {'wildcard':{'text':'*'+'@'+'*'}}
                    ]
                }
            }
        }
        try:
            results_total_day = es_xnr.count(index=wx_group_message_index_name,doc_type=wx_group_message_index_type,body=query_body_total_day)
            if results_total_day['_shards']['successful'] != 0:
               at_num_total_day = results_total_day['count']
            else:
                print 'es index rank error'
                at_num_total_day = 0
        except:
            at_num_total_day = 0
        #统计
        at_dict = {}
        at_dict['at_day'] = {}
        at_dict['at_total'] = {}
        at_dict['at_day'][current_time] = at_num_xnr
        at_dict['at_total'][current_time] = at_num_total
        influence = (float(math.log(at_num_xnr+1))/(math.log(at_num_total_day+1)+1))*100
        influence = round(influence,2)  # 保留两位小数
        at_dict['mark'] = influence
        return at_dict
    else:
        at_dict = {}
        at_dict['at_day'] = {}
        at_dict['at_total'] = {}
        query_body = {
            'query':{
                'filtered':{
                    'filter':{
                        'bool':{
                            'must':[
                                {'term':{'xnr_user_no':wxbot_id}},
                                {'range':{'timestamp':{'gte':start_ts,'lte':end_ts}}}
                            ]
                        }
                    }
                }
            },
            'size':MAX_SEARCH_SIZE,
            'sort':{'timestamp':{'order':'asc'}}
        }
        search_results = es_xnr.search(index=wx_xnr_history_count_index_name,doc_type=wx_xnr_history_count_index_type,\
                        body=query_body)['hits']['hits']
        #初始化
        ts_list = load_timestamp_list(start_ts, end_ts)
        for ts in ts_list:
            at_dict['at_day'][ts] = 0
            at_dict['at_total'][ts] = 0
        at_dict['mark'] = 0
        #填充数据
        for result in search_results:
            result = result['_source']
            timestamp = result['timestamp']
            at_dict['at_day'][timestamp] = result['daily_be_at_num']
            at_dict['at_total'][timestamp] = result['total_be_at_num']
            at_dict['mark'] = result['influence']
        return at_dict