Exemple #1
0
def show_trace_community(xnr_user_no,now_time):
    if S_TYPE == 'test':
        now_time = datetime2ts(WEIBO_COMMUNITY_DATE)
    else:
        pass
    query_body = {
        'query':{
            'filtered':{
                'filter':{
                    'bool':{
                        'must':[
                        {'term':{'xnr_user_no':xnr_user_no}},
                        {'terms':{'community_status':[1,-2]}}                        
                        ]
                    }
                }
            }
        }
    }
    weibo_community_index_name = get_community_index(now_time)

    #print 'weibo_community_index_name:',weibo_community_index_name
    try:
        community_result = es_xnr.search(index = weibo_community_index_name,doc_type = weibo_community_index_type,body = query_body)['hits']['hits']
        community_list = []
        for item in community_result:
            #跟踪判断提示
            if item['_source']['warning_remind'] >= 3:
                item['_source']['trace_message'] = u'该社区已经连续3周未出现预警,请选择放弃跟踪或强制跟踪!'
            else:
                item['_source']['trace_message'] = u''
           # print 'community_id::',item['_id']
            community_list.append(item['_source'])
        community_list.sort(key=lambda k:(k.get('warning_rank',0)),reverse=True)
    except:
        community_list = []
    return community_list
Exemple #2
0
def show_date_warning(account_name,start_time,end_time):
    if S_TYPE == 'test':
        test_today_date = TWITTER_FLOW_START_DATE
        test_time_gap = end_time - start_time
        today_datetime = datetime2ts(test_today_date)
        end_time = today_datetime
        start_time = end_time - test_time_gap
        end_datetime = datetime2ts(ts2datetime(end_time))
        start_datetime = datetime2ts(ts2datetime(start_time))
    else:
        now_time = int(time.time())
        today_datetime = datetime2ts(ts2datetime(now_time))
        end_datetime = datetime2ts(ts2datetime(end_time))
        start_datetime = datetime2ts(ts2datetime(start_time))

    result=lookup_date_info(account_name,start_time,end_time,today_datetime)
    #print 'result',result
    return result
Exemple #3
0
def utils_get_penetration(wxbot_id, period, startdate, enddate):
    start_ts, end_ts, period = dump_date(period, startdate, enddate)
    current_timestamp = int(time.time())
    current_date = ts2datetime(current_timestamp)
    if period == 0:  #获取今天的数据
        current_time = datetime2ts(current_date)
        xnr_data = load_wxxnr_redis_data(wxbot_id=wxbot_id,
                                         items=['puid', 'groups_list'])
        puid = xnr_data['puid']
        group_list = xnr_data['groups_list']

        #查询1
        sensitive_value = 0
        wx_group_message_index_name = wx_group_message_index_name_pre + current_date
        query_body_info = {
            'query': {
                'filtered': {
                    'filter': {
                        'bool': {
                            'must': [{
                                'terms': {
                                    'group_id': group_list
                                }
                            }, {
                                'range': {
                                    'sensitive_value': {
                                        'gte': -1
                                    }
                                }
                            }]
                        }
                    }
                }
            },
            'aggs': {
                'avg_sensitive': {
                    'avg': {
                        'field': 'sensitive_value'
                    }
                }
            }
        }
        try:
            es_sensitive_result = es_xnr.search(
                index=wx_group_message_index_name,
                doc_type=wx_group_message_index_type,
                body=query_body_info)['aggregations']
            sensitive_value = es_sensitive_result['avg_sensitive']['value']
            if sensitive_value == None:
                sensitive_value = 0
        except Exception, e:
            print 'sensitive_value Exception: ', str(e)

        #查询2
        max_sensitive = 0
        query_body_max = {
            "query": {
                "filtered": {
                    "filter": {
                        "bool": {
                            "must": [
                                {
                                    'terms': {
                                        'group_id': group_list
                                    }
                                },
                                {
                                    "range": {
                                        "sensitive_value":
                                        {  #不会写exists语句,就用这个代替吧
                                            "gte": -1
                                        }
                                    }
                                }
                            ]
                        }
                    }
                }
            },
            'sort': {
                'sensitive_value': {
                    'order': 'desc'
                }
            }
        }
        try:
            max_results = es_xnr.search(index=wx_group_message_index_name,doc_type=wx_group_message_index_type,\
                            body=query_body_max)['hits']['hits']
            max_sensitive = max_results[0]['_source']['sensitive_value']
        except Exception, e:
            print 'max_sensitive Exception: ', str(e)
Exemple #4
0
def get_generate_example_model(domain_name,role_name):
    domain_pinyin = pinyin.get(domain_name,format='strip',delimiter='_')
    role_en = fb_domain_ch2en_dict[role_name]
    task_id = domain_pinyin + '_' + role_en
    es_result = es.get(index=fb_role_index_name,doc_type=fb_role_index_type,id=task_id)['_source']
    item = es_result
    print 'es_result:::',es_result
    # 政治倾向
    political_side = json.loads(item['political_side'])[0][0]

    if political_side == 'mid':
        item['political_side'] = u'中立'
    elif political_side == 'left':
        item['political_side'] = u'左倾'
    else:
        item['political_side'] = u'右倾'

    # 心理特征
    psy_feature_list = []
    psy_feature = json.loads(item['psy_feature'])
    for i in range(TOP_PSY_FEATURE):
        psy_feature_list.append(psy_feature[i][0])
    item['psy_feature'] = '&'.join(psy_feature_list)
    role_group_uids = json.loads(item['member_uids'])

    if S_TYPE == 'test':
        current_time  = datetime2ts(S_DATE)
    else:
        current_time = int(time.time())

    index_name_list = get_flow_text_index_list(current_time)
    query_body_search = {
        'query':{
            'filtered':{
                'filter':{
                    'terms':{'uid':role_group_uids}
                }
            }
        },
        'size':MAX_VALUE,
        '_source':['keywords_string']
    }

    es_keyword_results = es_flow_text.search(index=index_name_list,doc_type=flow_text_index_type,\
                        body=query_body_search)['hits']['hits']
    keywords_string = ''
    for mget_item in es_keyword_results:
        keywords_string += '&'
        keywords_string += mget_item['_source']['keywords_string']
    k_dict = extract_keywords(keywords_string)
    
    monitor_keywords_list = []
    for item_item in k_dict:
        monitor_keywords_list.append(item_item.word.encode('utf-8'))
    item['monitor_keywords'] = ','.join(monitor_keywords_list)
    mget_results_user = es_user_portrait.mget(index=profile_index_name,doc_type=profile_index_type,body={'ids':role_group_uids})['docs']
    item['nick_name'] = []
    for mget_item in mget_results_user:
        if mget_item['found']:
            content = mget_item['_source']
            item['nick_name'] = ''
            if content.has_key('name'):
                item['nick_name'] = content['name']
            item['location'] = ''
            if content.has_key('location'):
                item['location'] = get_user_location(json.loads(content['location']))
            item['gender'] = 0
            if content.has_key('gender'):
                gender_str = content['gender']
                if gender_str == 'male':
                    gender = 1
                elif gender_str == 'female':
                    gender = 2
            item['description'] = ''
            if content.has_key('description'):
                item['description'] = content['description']

    item['business_goal'] = u'渗透'
    item['daily_interests'] = u'旅游'
    item['age'] = 30
    item['career'] = u'自由职业'

    active_time_list_np = np.array(json.loads(item['active_time']))
    active_time_list_np_sort = np.argsort(-active_time_list_np)[:TOP_ACTIVE_TIME]
    item['active_time'] = active_time_list_np_sort.tolist()

    day_post_num_list = np.array(json.loads(item['day_post_num']))
    item['day_post_num'] = np.mean(day_post_num_list).tolist()
    item['role_name'] = role_name
    
    task_id_new = 'fb_' + domain_pinyin + '_' + role_en
    example_model_file_name = EXAMPLE_MODEL_PATH + task_id_new + '.json'
    try:
        with open(example_model_file_name,"w") as dump_f:
            json.dump(item,dump_f)
        item_dict = dict()
        item_dict['domain_name'] = domain_name
        item_dict['role_name'] = role_name
        es.index(index=fb_example_model_index_name,doc_type=fb_example_model_index_type,\
            body=item_dict,id=task_id_new)
        mark = True
    except:
        mark = False
    return mark
Exemple #5
0
def utils_get_influence(wxbot_id, period, startdate, enddate):
    start_ts, end_ts, period = dump_date(period, startdate, enddate)
    current_timestamp = int(time.time())
    current_date = ts2datetime(current_timestamp)
    if period == 0:    #获取今天的数据
        xnr_puid = load_wxxnr_redis_data(wxbot_id=wxbot_id, items=['puid'])['puid']
        current_time = datetime2ts(current_date)
        query_at_num = {
            'query':{
                'bool':{
                    'must':[
                        {'term':{'xnr_id':xnr_puid}},
                        {'term':{'at_flag':1}}
                    ]
                }
            }
        }
        #虚拟人今天被@数量
        wx_group_message_index_name = wx_group_message_index_name_pre + current_date
        try:
            results_xnr = es_xnr.count(index=wx_group_message_index_name,doc_type=wx_group_message_index_type,body=query_at_num)
            if results_xnr['_shards']['successful'] != 0:
               at_num_xnr = results_xnr['count']
            else:
                print 'es index rank error'
                at_num_xnr = 0
        except:
            at_num_xnr = 0
        # 截止目前所有被@总数
        wx_group_message_index_list = get_wx_groupmessage_index_list(WX_GROUP_MESSAGE_START_DATE_ASSESSMENT,ts2datetime(current_time))
        at_num_total = 0
        for index_name in wx_group_message_index_list:
            r = es_xnr.count(index=wx_group_message_index_name,doc_type=wx_group_message_index_type,body=query_at_num)
            if r['_shards']['successful'] != 0:
                at_num_total += r['count']
        #查询所有人被@的次数
        query_body_total_day = {
            'query':{
                'bool':{
                    'must':[
                        {'term':{'xnr_id':xnr_puid}},
                        {'wildcard':{'text':'*'+'@'+'*'}}
                    ]
                }
            }
        }
        try:
            results_total_day = es_xnr.count(index=wx_group_message_index_name,doc_type=wx_group_message_index_type,body=query_body_total_day)
            if results_total_day['_shards']['successful'] != 0:
               at_num_total_day = results_total_day['count']
            else:
                print 'es index rank error'
                at_num_total_day = 0
        except:
            at_num_total_day = 0
        #统计
        at_dict = {}
        at_dict['at_day'] = {}
        at_dict['at_total'] = {}
        at_dict['at_day'][current_time] = at_num_xnr
        at_dict['at_total'][current_time] = at_num_total
        influence = (float(math.log(at_num_xnr+1))/(math.log(at_num_total_day+1)+1))*100
        influence = round(influence,2)  # 保留两位小数
        at_dict['mark'] = influence
        return at_dict
    else:
        at_dict = {}
        at_dict['at_day'] = {}
        at_dict['at_total'] = {}
        query_body = {
            'query':{
                'filtered':{
                    'filter':{
                        'bool':{
                            'must':[
                                {'term':{'xnr_user_no':wxbot_id}},
                                {'range':{'timestamp':{'gte':start_ts,'lte':end_ts}}}
                            ]
                        }
                    }
                }
            },
            'size':MAX_SEARCH_SIZE,
            'sort':{'timestamp':{'order':'asc'}}
        }
        search_results = es_xnr.search(index=wx_xnr_history_count_index_name,doc_type=wx_xnr_history_count_index_type,\
                        body=query_body)['hits']['hits']
        #初始化
        ts_list = load_timestamp_list(start_ts, end_ts)
        for ts in ts_list:
            at_dict['at_day'][ts] = 0
            at_dict['at_total'][ts] = 0
        at_dict['mark'] = 0
        #填充数据
        for result in search_results:
            result = result['_source']
            timestamp = result['timestamp']
            at_dict['at_day'][timestamp] = result['daily_be_at_num']
            at_dict['at_total'][timestamp] = result['total_be_at_num']
            at_dict['mark'] = result['influence']
        return at_dict
Exemple #6
0
def lookup_active_user(classify_id, xnr_id, start_time, end_time):
    time_gap = end_time - start_time
    now_time = time.time()
    test_time_gap = datetime2ts(ts2datetime(now_time)) - datetime2ts(S_DATE_FB)
    if S_TYPE == 'test':
        today_date_time = datetime2ts(S_DATE_FB)
        start_time = start_time - test_time_gap
        end_time = end_time - test_time_gap

    from_date_ts = datetime2ts(ts2datetime(start_time))
    to_date_ts = datetime2ts(ts2datetime(end_time))

    bci_index_name = fb_bci_index_name_pre + ''.join(
        ts2datetime(end_time - DAY))

    userlist = lookup_xnr_friends(xnr_id)

    if classify_id == 1:
        condition_list = [{'bool': {'must': {'terms': {'uid': userlist}}}}]
    elif classify_id == 2:
        condition_list = [{
            'bool': {
                'must_not': [{
                    'terms': {
                        'uid': userlist
                    }
                }]
            }
        }]
    elif classify_id == 0:
        condition_list = [{'match_all': {}}]
    print userlist, classify_id, condition_list

    results = []
    for item in condition_list:
        query_body = {
            'query': item,
            'size': HOT_WEIBO_NUM,  #查询影响力排名前50的用户即可
            'sort': {
                'influence': {
                    'order': 'desc'
                }
            }
        }
        try:
            flow_text_exist=es_xnr.search(index=bci_index_name,\
                    doc_type=fb_bci_index_type,body=query_body)['hits']['hits']
            search_uid_list = [
                item['_source']['uid'] for item in flow_text_exist
            ]
            user_exist = es_xnr.search(index=facebook_user_index_name,\
                    doc_type=facebook_user_index_type,body={'query':{'terms':{'uid':search_uid_list}}})['hits']['hits']

            user_dict = dict()
            for item in user_exist:
                uid = item['_source']['uid']
                user_dict[uid] = item['_source']
            for item in flow_text_exist:
                influence = item['_source']['influence']
                active = item['_source']['active']
                uid = item['_source']['uid']
                try:
                    user_info = user_dict[uid]
                    uname = user_info['name']
                    location = user_info['locale']
                    link = user_info['link']
                except:
                    uname = ''
                    location = ''
                    link = ''
                results.append({'uid':uid, 'influence':influence, 'active':active, \
                        'uname': uname, 'location':location, 'link': link})
        except Exception, e:
            print e
            results = []
Exemple #7
0
def get_recommend_follows(task_detail):
    recommend_results = dict()
    # daily_interests_list = task_detail['daily_interests'].split(',')
    monitor_keywords_list = task_detail['monitor_keywords'].split(',')
    create_time = time.time()        
    if S_TYPE == 'test':
        create_time = datetime2ts(S_DATE)
    index_name_list = get_flow_text_index_list(create_time)
    '''#FB flow_text中没有daily_interests字段
    ## 日常兴趣关注
    try:
        query_body = {
            'query':{
                'filtered':{
                    'filter':{
                        'terms':{'daily_interests':daily_interests_list}
                    }
                }
            },
            # 'sort':{'user_fansnum':{'order':'desc'}},
            'size':DAILY_INTEREST_TOP_USER,
            '_source':['uid']
        }
        es_results = es_flow_text.search(index=index_name_list,doc_type='text',body=query_body)['hits']['hits']
        daily_interest_uid_set = set()
        for result in es_results:
            daily_interest_uid_set.add(result['_source']['uid'])
        daily_interest_uid_list = list(daily_interest_uid_set)
        es_daily_interests_results = es_user_profile.mget(index=profile_index_name,doc_type=profile_index_type,\
                                                body={'ids':daily_interest_uid_list})['docs']
        nick_name_dict = {} 
        es_daily_interests_results = es_daily_interests_results[:max(NICK_NAME_TOP,len(es_daily_interests_results))]
        for result in es_daily_interests_results:
            if result['found'] == True:
                result = result['_source']
                nick_name_dict[result['uid']] = result['nick_name']
            else:
                continue
        recommend_results['daily_interests'] = nick_name_dict

    except Exception,e:
        print e
        print '没有找到日常兴趣相符的用户'
        recommend_results['daily_interests'] = {}
    '''
    ## 监测词关注
    nest_query_list = []
    #文本中可能存在英文或者繁体字,所以都匹配一下
    monitor_en_keywords_list = trans(monitor_keywords_list, target_language='en')
    for i in range(len(monitor_keywords_list)):
        monitor_keyword = monitor_keywords_list[i]
        monitor_traditional_keyword = simplified2traditional(monitor_keyword)
        
        if len(monitor_en_keywords_list) == len(monitor_keywords_list): #确保翻译没出错
            monitor_en_keyword = monitor_en_keywords_list[i]
            nest_query_list.append({'wildcard':{'keywords_string':'*'+monitor_en_keyword+'*'}})
        
        nest_query_list.append({'wildcard':{'keywords_string':'*'+monitor_keyword+'*'}})
        nest_query_list.append({'wildcard':{'keywords_string':'*'+monitor_traditional_keyword+'*'}})
    try:
        query_body_monitor = {
            'query':{
                'bool':{
                    # 'must':nest_query_list
                    'should':nest_query_list
                }     
            },
            # 'sort':{'user_fansnum':{'order':'desc'}},
            'size':MONITOR_TOP_USER,
            '_source':['uid']
        }
        es_results = es_flow_text.search(index=index_name_list,doc_type='text',body=query_body_monitor)['hits']['hits']
        monitor_keywords_uid_set = set()
        for result in es_results:
            monitor_keywords_uid_set.add(result['_source']['uid'])
        monitor_keywords_uid_list = list(monitor_keywords_uid_set)

        es_monitor_keywords_results = es_user_profile.mget(index=profile_index_name,doc_type=profile_index_type,\
                                                body={'ids':monitor_keywords_uid_list})['docs']
        nick_name_dict = {}   
        es_monitor_keywords_results = es_monitor_keywords_results[:max(NICK_NAME_TOP,len(es_monitor_keywords_results))]     
        for result in es_monitor_keywords_results:
            if result['found'] == True:
                result = result['_source']
                nick_name_dict[result['uid']] = result['username']
            else:
                continue
        recommend_results['monitor_keywords'] = nick_name_dict
    except Exception,e:
        print e
        print '没有找到监测词相符的用户'
        recommend_results['monitor_keywords'] = {}
Exemple #8
0
def search_by_xnr_number(xnr_qq_number, current_date, group_qq_name):

    group_qq_name_list = group_qq_name.encode('utf-8').split(',')
    # 用于显示操作页面初始的所有群历史信息
    query_body = {
        "query": {
            "filtered": {
                "filter": {
                    "bool": {
                        "must": [{
                            "term": {
                                "xnr_qq_number": xnr_qq_number
                            }
                        }, {
                            'terms': {
                                'qq_group_nickname': group_qq_name_list
                            }
                        }]
                    }
                }
            }
        },
        "size": MAX_VALUE,
        "sort": {
            "timestamp": {
                "order": "desc"
            }
        }
    }

    enddate = current_date
    startdate = ts2datetime(
        datetime2ts(enddate) - group_message_windowsize * DAY)
    index_names = get_groupmessage_index_list(startdate, enddate)
    #print 'index_names::',index_names
    index_names.reverse()
    results = {}
    for index_name in index_names:
        # if not es_xnr.indices.exsits(index=index_name):
        #     continue
        try:
            result = es_xnr.search(index=index_name,
                                   doc_type=group_message_index_type,
                                   body=query_body)

            if results != {}:
                results['hits']['hits'].extend(result['hits']['hits'])

            else:
                results = result  #.copy()

        except:
            pass
    # results_new = []
    # for index_name in index_names:

    #     try:
    #         es_results = es_xnr.search(index=index_name, doc_type=group_message_index_type,body=query_body)['hits']['hits']
    #         print 'es_results::',es_results
    #         for es_result in es_results:
    #             es_result = es_result['_source']
    #             results_new.append(es_result)
    #     except:
    #         continue

    return results
Exemple #9
0
def lookup_active_weibouser(classify_id, weiboxnr_id, start_time, end_time):
    time_gap = end_time - start_time
    now_time = time.time()
    test_time_gap = datetime2ts(
        ts2datetime(now_time)) - datetime2ts(S_DATE_BCI)
    #print 'from, to:', ts2date(start_time), ts2date(end_time)
    today_date_time = end_time - DAY
    if S_TYPE == 'test':
        today_date_time = datetime2ts(S_DATE_BCI)
        start_time = start_time - test_time_gap
        end_time = end_time - test_time_gap

    from_date_ts = datetime2ts(ts2datetime(start_time))
    to_date_ts = datetime2ts(ts2datetime(end_time))
    #print 's_date_bci:', S_DATE_BCI
    #print 'from_date_ts, to_date_ts:', ts2date(from_date_ts), ts2date(to_date_ts)

    bci_index_name = weibo_bci_index_name_pre + ''.join(
        ts2datetime(today_date_time).split('-'))
    print 'bci_index_name:', bci_index_name
    print 'end_time:', ts2date(end_time)

    #step1: users condition
    #make sure the users range by classify choice
    userlist = lookup_weiboxnr_concernedusers(weiboxnr_id)

    if classify_id == 1:  #concrenedusers
        condition_list = [{'bool': {'must': {'terms': {'uid': userlist}}}}]
    elif classify_id == 2:  #unconcrenedusers
        condition_list = [{
            'bool': {
                'must_not': [{
                    'terms': {
                        'uid': userlist
                    }
                }]
            }
        }]
    elif classify_id == 0:
        condition_list = [{'match_all': {}}]
    print userlist, classify_id, condition_list

    #step 2:lookup users
    user_max_index = count_maxweibouser_influence(end_time - DAY)
    results = []
    for item in condition_list:
        query_body = {
            'query': item,
            'size': HOT_WEIBO_NUM,  #查询影响力排名前50的用户即可
            'sort': {
                'user_index': {
                    'order': 'desc'
                }
            }
        }
        try:
            #print 'query_body:', query_body
            flow_text_exist=es_user_portrait.search(index=bci_index_name,\
                    doc_type=weibo_bci_index_type,body=query_body)['hits']['hits']
            search_uid_list = [
                item['_source']['user'] for item in flow_text_exist
            ]
            weibo_user_exist = es_user_profile.search(index=profile_index_name,\
                    doc_type=profile_index_type,body={'query':{'terms':{'uid':search_uid_list}}})['hits']['hits']
            #print 'weibo_user_exist:', weibo_user_exist
            weibo_user_dict = dict()
            for item in weibo_user_exist:
                uid = item['_source']['uid']
                weibo_user_dict[uid] = item['_source']
            for item in flow_text_exist:
                #print 'item:', item['_source']
                influence = item['_source']['user_index'] / user_max_index * 100
                fans_num = item['_source']['user_fansnum']
                friends_num = item['_source']['user_friendsnum']
                total_number = item['_source']['total_number']
                uid = item['_source']['user']
                try:
                    weibo_user_info = weibo_user_dict[uid]
                    uname = weibo_user_info['nick_name']
                    location = weibo_user_info['user_location']
                    url = weibo_user_info['photo_url']
                except:
                    uname = ''
                    location = ''
                    url = ''
                #print 'uid:', uid
                results.append({'uid':uid, 'influence':influence, 'fans_num':fans_num, \
                        'total_number':total_number, 'friends_num':friends_num,\
                        'uname': uname, 'location':location, 'url': url})
                #print 'results:', results
                '''
                uid=item['_source']['uid']
                #微博数
                item['_source']['weibos_sum']=count_weibouser_weibosum(uid,end_time)
                #影响力
                user_index=count_weibouser_index(uid,end_time)
                if user_max_index >0:
                    item['_source']['influence']=user_index/user_max_index*100
                else:
                    item['_source']['influence']=0
                if item['_source']['influence']>=INFLUENCE_MIN:
                    results.append(item['_source'])
                '''
        except:
            results = []

    return results
Exemple #10
0
def lookup_hot_posts(from_ts, to_ts, weiboxnr_id, classify_id, order_id):
    #step 1 :adjust the time condition for time
    time_gap = to_ts - from_ts
    now_time = time.time()
    test_time_gap = datetime2ts(ts2datetime(now_time)) - datetime2ts(S_DATE)
    #print 'from, to:', from_ts, to_ts
    if S_TYPE == 'test':
        today_date_time = datetime2ts(S_DATE)
        from_ts = from_ts - test_time_gap
        #to_ts = to_ts - test_time_gap
        to_ts = from_ts + MAX_FLOW_TEXT_DAYS * DAY

    from_date_ts = datetime2ts(ts2datetime(from_ts))
    to_date_ts = datetime2ts(ts2datetime(to_ts))
    #print 'from_date_ts, to_date_ts:', ts2date(from_date_ts), ts2date(to_date_ts)
    #print from_date_ts,to_date_ts

    flow_text_index_name_list = []
    days_num = MAX_FLOW_TEXT_DAYS
    for i in range(0, (days_num + 1)):
        date_range_start_ts = to_date_ts - i * DAY
        date_range_start_datetime = ts2datetime(date_range_start_ts)
        index_name = flow_text_index_name_pre + date_range_start_datetime
        if es_flow_text.indices.exists(index=index_name):
            flow_text_index_name_list.append(index_name)
        else:
            pass

    if order_id == 1:  #按时间排序
        sort_condition_list = [{'timestamp': {'order': 'desc'}}]
    elif order_id == 2:  #按热度排序
        sort_condition_list = [{'retweeted': {'order': 'desc'}}]
    elif order_id == 3:  #按敏感度排序
        sort_condition_list = [{'sensitive': {'order': 'desc'}}]
    #else:                   #默认设为按时间排序
    #    sort_condition_list=[{'timestamp':{'order':'desc'}}]

    userslist = lookup_weiboxnr_concernedusers(weiboxnr_id)
    #全部用户 0,已关注用户 1,未关注用户-1
    range_time_list = {
        'range': {
            'timestamp': {
                'gte': int(from_ts),
                'lt': int(to_ts)
            }
        }
    }
    print range_time_list

    user_condition_list = []
    if classify_id == 1:
        user_condition_list = [{
            'bool': {
                'must': [{
                    'terms': {
                        'uid': userslist
                    }
                }, range_time_list]
            }
        }]
    elif classify_id == 2:
        user_condition_list = [{
            'bool': {
                'must': [range_time_list],
                'must_not': [{
                    'terms': {
                        'uid': userslist
                    }
                }]
            }
        }]
    elif classify_id == 0:
        user_condition_list = [{'bool': {'must': [range_time_list]}}]

    #print 'sort_condition_list',sort_condition_list
    #print 'user_condition_list',user_condition_list

    query_body = {
        'query': {
            'filtered': {
                'filter': user_condition_list
            }
        },
        'size': HOT_WEIBO_NUM,
        'sort': sort_condition_list
    }

    try:
        es_result=es_flow_text.search(index=flow_text_index_name_list,doc_type=flow_text_index_type,\
            body=query_body)['hits']['hits']
        hot_result = []
        for item in es_result:
            item['_source']['nick_name'] = get_user_nickname(
                item['_source']['uid'])
            hot_result.append(item['_source'])
    except:
        hot_result = []
    #print 'hot_result:', hot_result
    return hot_result
Exemple #11
0
def show_event_warming(xnr_user_no):
    now_time = int(time.time())
    #print 'first_time:',time.time()
    hashtag_list = get_hashtag()
    #print 'hashtag_list_time:',time.time()
    #print 'hashtag_list:::::::',hashtag_list
    if S_TYPE == 'test':
        test_day_date = S_DATE_EVENT_WARMING
        test_day_time = datetime2ts(test_day_date)
        flow_text_index_list = get_flow_text_index_list(test_day_time)
        #print flow_text_index_list
        hashtag_list = [('网络义勇军发布', 13), ('美国', 7), ('德国', 5), ('中国', 4),
                        ('清真食品', 3), ('反邪动态', 2), ('台海观察', 2), ('雷哥微评', 2),
                        ('中国军队', 1)]
        #hashtag_list=[('网络义勇军发布',13),('美国',7),('芒果TV',6),('德国',5),('中国',4),('清真食品',3),('反邪动态',2),('台海观察',2),('每日一药',2),('雷哥微评',2),('PPAP洗脑神曲',1),('中国军队',1)]
        #weibo_xnr_flow_text_listname=['flow_text_2016-11-26','flow_text_2016-11-25','flow_text_2016-11-24']
    else:
        flow_text_index_list = get_flow_text_index_list(now_time)
        #weibo_xnr_flow_text_listname=get_xnr_flow_text_index_list(now_time)

    #print flow_text_index_list,hashtag_list
    #虚拟人的粉丝列表和关注列表
    try:
        es_xnr_result = es_xnr.get(
            index=weibo_xnr_fans_followers_index_name,
            doc_type=weibo_xnr_fans_followers_index_type,
            id=xnr_user_no)['_source']
        followers_list = es_xnr_result['followers_list']
        fans_list = es_xnr_result['fans_list']
    except:
        followers_list = []
        fans_list = []

    #print 'weibo_xnr_fans_followers_time:',time.time()
    event_warming_list = []
    for event_item in hashtag_list:
        #print event_item,event_item[0]
        event_sensitive_count = 0
        event_warming_content = dict()  #事件名称、主要参与用户、典型微博、事件影响力、事件平均时间
        event_warming_content['event_name'] = event_item[0]
        event_influence_sum = 0
        event_time_sum = 0
        query_body = {
            'query': {
                'bool': {
                    'should': {
                        'wildcard': {
                            'text': '*' + event_item[0] + '*'
                        }
                    }
                }
            }
        }
        try:
            event_results = es_flow_text.search(
                index=flow_text_index_list,
                doc_type=flow_text_index_type,
                body=query_body)['hits']['hits']
            weibo_result = []
            fans_num_dict = dict()
            followers_num_dict = dict()
            alluser_num_dict = dict()
            #print event_results
            for item in event_results:
                if item['_source']['sensitive'] > 0:
                    event_sensitive_count = event_sensitive_count + 1
                    #统计用户信息
                    if alluser_num_dict.has_key(str(item['_source']['uid'])):
                        alluser_num_dict[str(
                            item['_source']['uid'])] = alluser_num_dict[str(
                                item['_source']['uid'])] + 1
                    else:
                        alluser_num_dict[str(item['_source']['uid'])] = 1

                    for fans_uid in fans_list:
                        if fans_uid == item['_source']['uid']:
                            if fans_num_dict.has_key(str(fans_uid)):
                                fans_num_dict[str(fans_uid)] = fans_num_dict[
                                    str(fans_uid)] + 1
                            else:
                                fans_num_dict[str(fans_uid)] = 1

                    for followers_uid in followers_list:
                        if followers_uid == item['_source']['uid']:
                            if followers_num_dict.has_key(str(followers_uid)):
                                fans_num_dict[str(
                                    followers_uid
                                )] = fans_num_dict[str(followers_uid)] + 1
                            else:
                                fans_num_dict[str(followers_uid)] = 1

                    #计算影响力
                    origin_influence_value = (item['_source']['comment'] +
                                              item['_source']['retweeted']) * (
                                                  1 +
                                                  item['_source']['sensitive'])
                    fans_value = judge_user_type(item['_source']['uid'],
                                                 fans_list)
                    followers_value = judge_user_type(item['_source']['uid'],
                                                      followers_list)
                    item['_source'][
                        'weibo_influence_value'] = origin_influence_value * (
                            fans_value + followers_value)
                    weibo_result.append(item['_source'])

                    #统计影响力、时间
                    event_influence_sum = event_influence_sum + item[
                        '_source']['weibo_influence_value']
                    event_time_sum = item['_source']['timestamp']

                #典型微博信息
                weibo_result.sort(key=lambda k:
                                  (k.get('weibo_influence_value', 0)),
                                  reverse=True)
                event_warming_content['main_weibo_info'] = weibo_result

                #事件影响力和事件时间
                number = len(event_results)
                event_warming_content[
                    'event_influence'] = event_influence_sum / number
                event_warming_content['event_time'] = event_time_sum / number
            else:
                pass
        except:
            event_warming_content['main_weibo_info'] = []
            event_warming_content['event_influence'] = []
            event_warming_content['event_time'] = []

        #print event_item[0],'event_search_time:',time.time()
        try:
            if event_sensitive_count > 0:
                #对用户进行排序
                temp_userid_dict = union_dict(fans_num_dict,
                                              followers_num_dict)
                main_userid_dict = union_dict(temp_userid_dict,
                                              alluser_num_dict)
                main_userid_dict = sorted(main_userid_dict.items(),
                                          key=lambda d: d[1],
                                          reverse=True)
                main_userid_list = []
                for i in xrange(0, len(main_userid_dict)):
                    main_userid_list.append(main_userid_dict[i][0])
                #print 'main_userid_list:',main_userid_list

                #主要参与用户信息
                main_user_info = []
                user_es_result = es_user_profile.mget(
                    index=profile_index_name,
                    doc_type=profile_index_type,
                    body={'ids': main_userid_list})['docs']
                for item in user_es_result:
                    #print 'item:',item
                    #print 'found:',item['found']
                    #print 'id:',item['_id']
                    user_dict = dict()
                    if item['found']:
                        user_dict['photo_url'] = item['_source']['photo_url']
                        user_dict['uid'] = item['_id']
                        user_dict['nick_name'] = item['_source']['nick_name']
                        user_dict['favoritesnum'] = item['_source'][
                            'favoritesnum']
                        user_dict['fansnum'] = item['_source']['fansnum']
                    else:
                        user_dict['photo_url'] = ''
                        user_dict['uid'] = item['_id']
                        user_dict['nick_name'] = ''
                        user_dict['favoritesnum'] = 0
                        user_dict['fansnum'] = 0
                    main_user_info.append(user_dict)
                event_warming_content['main_user_info'] = main_user_info
                #print 'main_user_info:',main_user_info

                #print user_es_result
                '''
                user_query_body={
                    'query':{
                        'filtered':{
                            'filter':{
                                'terms':{'uid':main_userid_list}
                            }
                        }
                    }
                }
                user_es_result=es_user_profile.search(index=profile_index_name,doc_type=profile_index_type,body=user_query_body)['hits']['hits']
                #print user_es_result
                main_user_info=[]
                for item in user_es_result:
                    main_user_info.append(item['_source'])
                event_warming_content['main_user_info']=main_user_info
                '''
            else:
                event_warming_content['main_user_info'] = []
        except:
            event_warming_content['main_user_info'] = []

        #print 'user_search_time:',time.time()
        if event_sensitive_count > 0:
            #print event_warming_content['event_name']
            event_warming_list.append(event_warming_content)
        else:
            pass
    #main_userid_list=['5536381570','2192435767','1070598590']
    #user_es_result=es_user_profile.mget(index=profile_index_name,doc_type=profile_index_type,body={'ids':main_userid_list})
    #print 'user_es_result',user_es_result
    #print 'end_time:',time.time()
    return event_warming_list
Exemple #12
0
def show_personnal_warming(xnr_user_no, day_time):
    #查询关注列表
    try:
        es_xnr_result = es_xnr.get(
            index=weibo_xnr_fans_followers_index_name,
            doc_type=weibo_xnr_fans_followers_index_type,
            id=xnr_user_no)['_source']
        followers_list = es_xnr_result['followers_list']
    except:
        followers_list = []

    #计算敏感度排名靠前的用户
    query_body = {
        'query': {
            'filtered': {
                'filter': {
                    'terms': {
                        'uid': followers_list
                    }
                }
            }
        },
        'aggs': {
            'followers_sensitive_num': {
                'terms': {
                    'field': 'uid'
                },
                'aggs': {
                    'sensitive_num': {
                        'sum': {
                            'field': 'sensitive'
                        }
                    }
                }
            }
        },
        'size': MAX_VALUE
    }

    #测试状态下时间设置
    if S_TYPE == 'test':
        test_day_date = S_DATE_BCI
        test_day_time = datetime2ts(test_day_date)
        flow_text_index_list = get_flow_text_index_list(test_day_time)
    else:
        flow_text_index_list = get_flow_text_index_list(day_time)
    #print flow_text_index_list

    try:
        first_sum_result=es_flow_text.search(index=flow_text_index_list,doc_type=flow_text_index_type,\
        body=query_body)['aggregations']['followers_sensitive_num']['buckets']
    except:
        first_sum_result = []

    #print first_sum_result
    top_userlist = []
    if USER_NUM < len(first_sum_result):
        temp_num = USER_NUM
    else:
        temp_num = len(first_sum_result)
    #print temp_num
    for i in xrange(0, temp_num):
        user_sensitive = first_sum_result[i]['sensitive_num']['value']
        if user_sensitive > 0:
            user_dict = dict()
            user_dict['uid'] = first_sum_result[i]['key']
            user_dict['sensitive'] = user_sensitive
            top_userlist.append(user_dict)
        else:
            pass

    #查询敏感用户的最敏感微博内容
    results = []
    for user in top_userlist:
        #print user
        user_detail = dict()
        user_detail['uid'] = user['uid']
        user_detail['user_sensitive'] = user['sensitive']
        try:
            user_result = es_user_profile.get(index=profile_index_name,
                                              doc_type=profile_index_type,
                                              id=user['uid'])['_source']
            user_detail['user_name'] = user_result['nick_name']
        except:
            user_detail['user_name'] = ''

        query_body = {
            'query': {
                'filtered': {
                    'filter': {
                        'bool': {
                            'must': [{
                                'term': {
                                    'uid': user['uid']
                                }
                            }, {
                                'range': {
                                    'sensitive': {
                                        'gte': 1,
                                        'lte': 100
                                    }
                                }
                            }]
                        }
                    }
                }
            },
            'size': USER_CONTENT_NUM,
            'sort': {
                'sensitive': {
                    'order': 'desc'
                }
            }
        }
        #if S_TYPE == 'test':
        try:
            second_result = es_flow_text.search(
                index=flow_text_index_list,
                doc_type=flow_text_index_type,
                body=query_body)['hits']['hits']
        except:
            second_result = []
        #else:
        #   second_result=es_xnr.search(index=weibo_xnr_flow_text_listname,doc_type=xnr_flow_text_index_type,body=query_body)['hits']['hits']
        s_result = []
        tem_word_one = '静坐'
        tem_word_two = '集合'
        for item in second_result:
            sensitive_words = item['_source']['sensitive_words_string']
            if ((sensitive_words == tem_word_one)
                    or (sensitive_words == tem_word_two)):
                pass
            else:
                s_result.append(item['_source'])
        s_result.sort(key=lambda k: (k.get('sensitive', 0)), reverse=True)
        user_detail['content'] = s_result
        results.append(user_detail)
    results.sort(key=lambda k: (k.get('user_sensitive', 0)), reverse=True)
    return results
Exemple #13
0
def show_speech_warming(xnr_user_no, show_type, day_time):
    #关注用户
    try:
        es_xnr_result = es_xnr.get(
            index=weibo_xnr_fans_followers_index_name,
            doc_type=weibo_xnr_fans_followers_index_type,
            id=xnr_user_no)['_source']
        followers_list = es_xnr_result['followers_list']
    except:
        followers_list = []

    show_condition_list = []
    if show_type == 0:  #全部用户
        show_condition_list.append(
            {'must': {
                'range': {
                    'sensitive': {
                        'gte': 1,
                        'lte': 100
                    }
                }
            }})
    elif show_type == 1:  #关注用户
        show_condition_list.append({
            'must': [{
                'terms': {
                    'uid': followers_list
                }
            }, {
                'range': {
                    'sensitive': {
                        'gte': 1,
                        'lte': 100
                    }
                }
            }]
        })
    elif show_type == 2:  #未关注用户
        show_condition_list.append({
            'must_not': {
                'terms': {
                    'uid': followers_list
                }
            },
            'must': {
                'range': {
                    'sensitive': {
                        'gte': 1,
                        'lte': 100
                    }
                }
            }
        })

    query_body = {
        'query': {
            'filtered': {
                'filter': {
                    'bool': show_condition_list[0]
                }
            }
        },
        'size': SPEECH_WARMING_NUM,
        'sort': {
            'sensitive': {
                'order': 'desc'
            }
        }
    }

    #测试状态下时间设置
    if S_TYPE == 'test':
        test_day_date = S_DATE_BCI
        test_day_time = datetime2ts(test_day_date)
        flow_text_index_list = get_flow_text_index_list(test_day_time)
    else:
        flow_text_index_list = get_flow_text_index_list(day_time)

    #try:
    results = es_flow_text.search(index=flow_text_index_list,
                                  doc_type=flow_text_index_type,
                                  body=query_body)['hits']['hits']
    result = []
    un_id_list = [
        '4045093692450438', '4045096116622444', '4045095374193153',
        '4045095567336676', '4045092304116237', '4045093297982719',
        '4045178576337277', '4044647661388452'
    ]
    for item in results:
        if item['_id'] in un_id_list:
            pass
        else:
            result.append(item['_source'])
    #except:
    #    result=[]
    return result
Exemple #14
0
def aggr_sen_users(xnr_qq_number, startdate, enddate):
    # print 'startdate:',startdate,type(startdate)
    start_ts = datetime2ts(startdate)
    end_ts = datetime2ts(enddate)
    query_body = {
        "query": {
            "bool": {
                "must": [{
                    'term': {
                        'xnr_qq_number': xnr_qq_number
                    }
                }, {
                    "term": {
                        "sensitive_flag": 1
                    }
                }, {
                    'range': {
                        'timestamp': {
                            'gte': start_ts,
                            'lt': end_ts
                        }
                    }
                }]
            }
        },
        "aggs": {
            "all_senusers": {
                # "terms":{"field": "speaker_qq_number"}
                "terms": {
                    "field": "speaker_nickname"
                }
            }
        }
    }

    #enddate = datetime.datetime.now().strftime('%Y-%m-%d')
    #startdate = ts2datetime(datetime2ts(enddate)-group_message_windowsize*DAY)
    index_names = get_groupmessage_index_list(startdate, enddate)

    print index_names
    results = []
    for index_name in index_names:
        try:
            result = es_xnr.search(index=index_name,\
                    doc_type=group_message_index_type,\
                    body=query_body)["aggregations"]["all_senusers"]["buckets"]
        except Exception, e:
            result = []
        print 'index_name,result:', index_name, result

        if result != []:
            for item in result:
                # print 'item:',item
                inner_item = {}
                # inner_item['qq_number'] = item['key']
                inner_item['qq_nick'] = item['key']
                inner_item['count'] = item['doc_count']
                info = get_speaker_info(item['key'], index_name)
                if info == {}:
                    # inner_item['qq_nick'] = ''
                    inner_item['qq_number'] = ''
                    inner_item['qq_groups'] = ''
                    inner_item['last_speak_ts'] = ''
                    inner_item['text'] = []
                else:
                    # inner_item['qq_nick'] = info['qq_nick']
                    inner_item['qq_number'] = info['qq_number']
                    inner_item['qq_groups'] = info['qq_groups']
                    inner_item['last_speak_ts'] = info['last_speak_ts']
                    inner_item['text'] = info['text']
                flag = 1
                for aa in results:  #检验是否已经在结果中
                    # if aa['qq_number'] == inner_item['qq_number']:
                    if aa['qq_nick'] == inner_item['qq_nick']:
                        aa['count'] += inner_item['count']
                        aa['last_speak_ts'] = inner_item['last_speak_ts']
                        aa['qq_groups'].update(
                            inner_item['qq_groups'])  # 多个群发言的更新
                        aa['text'].extend(inner_item['text'])
                        flag = 0
                        continue
                if flag:
                    results.append(inner_item)
Exemple #15
0
def utils_get_safe(wxbot_id, period, startdate, enddate):
    start_ts, end_ts, period = dump_date(period, startdate, enddate)
    current_timestamp = int(time.time())
    current_date = ts2datetime(current_timestamp)
    if period == 0:  #获取今天的数据
        current_time = datetime2ts(current_date)
        last_date = ts2datetime(current_time - DAY)

        speak_dict = {}
        speak_dict['speak_day'] = {}
        speak_dict['speak_total'] = {}
        xnr_puid = load_wxxnr_redis_data(wxbot_id=wxbot_id,
                                         items=['puid'])['puid']

        #获取xnr今日发言总数
        today_count = 0
        query_body = {
            'query': {
                'bool': {
                    'must': [{
                        'term': {
                            'speaker_id': xnr_puid
                        }
                    }, {
                        'term': {
                            'xnr_id': xnr_puid
                        }
                    }]
                }
            }
        }
        today_index_name = wx_group_message_index_name_pre + current_date
        try:
            today_count_result = es_xnr.count(
                index=today_index_name,
                doc_type=wx_group_message_index_type,
                body=query_body)
            if today_count_result['_shards']['successful'] != 0:
                today_count = today_count_result['count']
        except Exception, e:
            print 'today_count Exception: ', str(e)

        #获取xnr历史发言总数
        total_count = 0
        total_query_body = {
            'query': {
                'bool': {
                    'must': [{
                        'term': {
                            'xnr_user_no': wxbot_id
                        }
                    }, {
                        'term': {
                            'puid': xnr_puid
                        }
                    }, {
                        'term': {
                            'date_time': last_date
                        }
                    }]
                }
            }
        }
        total_index_name = wx_xnr_history_count_index_name
        try:
            total_count_result = es_xnr.search(
                index=total_index_name,
                doc_type=wx_xnr_history_count_index_type,
                body=total_query_body)
            if total_count_result['_shards']['successful'] != 0:
                total_count = total_count_result['hits']['hits'][0]['_source'][
                    'total_post_num']
        except Exception, e:
            print 'total_count Exception:', str(e)
Exemple #16
0
def get_penetration_qq_today(xnr_user_no):

    follow_group_sensitive = {}
    follow_group_sensitive['sensitive_info'] = {}

    get_result = es_xnr.get(index=qq_xnr_index_name,doc_type=qq_xnr_index_type,id=xnr_user_no)['_source']
    qq_number = get_result['qq_number']
    nickname = get_result['nickname']
    
    #group_list = get_result['qq_groups']
    group_list = []
    group_info = json.loads(get_result['group_info'])

    for key, value_dict in group_info.iteritems():
        group_name = value_dict['group_name']
        group_list.extend(group_name)

    if S_TYPE == 'test':
        current_time = datetime2ts(QQ_S_DATE_ASSESSMENT)
    else:
        current_time = int(time.time())
    
    current_date = ts2datetime(current_time)

    group_message_index_name = group_message_index_name_pre + current_date


    query_body_info = {
        'query':{
            'filtered':{
                'filter':{
                    'terms':{'qq_group_nickname':group_list}
                }
            }
        },
        'aggs':{
            'avg_sensitive':{
                'avg':{
                    'field':'sensitive_value'
                }
            }
        }
    }
    try:
        es_sensitive_result = es_xnr.search(index=group_message_index_name,doc_type=group_message_index_type,\
            body=query_body_info)['aggregations']
        sensitive_value = es_sensitive_result['avg_sensitive']['value']
        
        if sensitive_value == None:
            sensitive_value = 0.0
        follow_group_sensitive['sensitive_info'][current_time] = round(sensitive_value,2)
    except:
        follow_group_sensitive['sensitive_info'][current_time] = 0

    query_body_max = {
        'query':{
            'filtered':{
                'filter':{
                    'terms':{'qq_group_nickname':group_list}
                }
            }
        },
        'sort':{'sensitive_value':{'order':'desc'}}
    }
    try:
        max_results = es_xnr.search(index=group_message_index_name,doc_type=group_message_index_type,\
                        body=query_body_max)['hits']['hits']

        max_sensitive = max_results[0]['_source']['sensitive_value']
    except:
        max_sensitive = 0

    penetration = (math.log(sensitive_value+1)/(math.log(max_sensitive+1)+1))*100
    penetration = round(penetration,2)
    
    follow_group_sensitive['mark'] = penetration

    return follow_group_sensitive
Exemple #17
0
def get_recommend_follows(task_detail):
    recommend_results = dict()
    daily_interests_list = task_detail['daily_interests'].encode('utf-8').split(',')
    monitor_keywords_list = task_detail['monitor_keywords'].encode('utf-8').split(',')
    #print 'daily_interests_list::',daily_interests_list
    create_time = time.time()        
    if S_TYPE == 'test':
        create_time = datetime2ts(S_DATE)
    
    index_name_list = get_flow_text_index_list(create_time)
    
    ## 日常兴趣关注
    try:
        query_body = {
            'query':{
                'filtered':{
                    'filter':{
                        'terms':{'daily_interests':daily_interests_list}
                    }
                }
            },
            'sort':{'user_fansnum':{'order':'desc'}},
            'size':DAILY_INTEREST_TOP_USER,
            '_source':['uid']
        }

        es_results = es_flow_text.search(index=index_name_list,doc_type='text',body=query_body)['hits']['hits']
      
        daily_interest_uid_set = set()
        for result in es_results:
            daily_interest_uid_set.add(result['_source']['uid'])
        daily_interest_uid_list = list(daily_interest_uid_set)
        es_daily_interests_results = es_user_profile.mget(index=profile_index_name,doc_type=profile_index_type,\
                                                body={'ids':daily_interest_uid_list})['docs']
        nick_name_dict = {} 
        es_daily_interests_results = es_daily_interests_results[:max(NICK_NAME_TOP,len(es_daily_interests_results))]
        for result in es_daily_interests_results:
            if result['found'] == True:
                result = result['_source']
                nick_name_dict[result['uid']] = result['nick_name']
            else:
                continue
        recommend_results['daily_interests'] = nick_name_dict

    except:
        print '没有找到日常兴趣相符的用户'
        recommend_results['daily_interests'] = {}

    ## 监测词关注
    nest_query_list = []
    #print 'monitor_keywords_list:::',monitor_keywords_list
    for monitor_keyword in monitor_keywords_list:
        nest_query_list.append({'wildcard':{'keywords_string':'*'+monitor_keyword+'*'}})
    #print 'nest_query_list::',nest_query_list
    try:
        query_body_monitor = {
            'query':{
                        'bool':{
                            'must':nest_query_list
                        }     
            },
            'sort':{'user_fansnum':{'order':'desc'}},
            'size':MONITOR_TOP_USER,
            '_source':['uid']
        }
        #print '123'
        es_results = es_flow_text.search(index=index_name_list,doc_type='text',body=query_body_monitor)['hits']['hits']
        #print 'es_results::',es_results
        monitor_keywords_uid_set = set()
        for result in es_results:
            monitor_keywords_uid_set.add(result['_source']['uid'])
        monitor_keywords_uid_list = list(monitor_keywords_uid_set)

        es_monitor_keywords_results = es_user_profile.mget(index=profile_index_name,doc_type=profile_index_type,\
                                                body={'ids':monitor_keywords_uid_list})['docs']
        nick_name_dict = {}   
        es_monitor_keywords_results = es_monitor_keywords_results[:max(NICK_NAME_TOP,len(es_monitor_keywords_results))]     
        for result in es_monitor_keywords_results:
            if result['found'] == True:
                result = result['_source']
                nick_name_dict[result['uid']] = result['nick_name']
            else:
                continue
        recommend_results['monitor_keywords'] = nick_name_dict

    except:
        print '没有找到监测词相符的用户'
        recommend_results['monitor_keywords'] = {}

    # print 'recommend_results::',recommend_results
    return recommend_results
Exemple #18
0
def get_safe_qq_today(xnr_user_no):

    get_result = es_xnr.get(index=qq_xnr_index_name,doc_type=qq_xnr_index_type,id=xnr_user_no)['_source']
    qq_number = get_result['qq_number']

    if S_TYPE == 'test':
        current_time = datetime2ts(QQ_S_DATE_ASSESSMENT)
    else:
        current_time = int(time.time())
    
    current_date = ts2datetime(current_time)
    
    group_message_index_name = group_message_index_name_pre + current_date

    query_body = {
        'query':{
            'bool':{
                'must':[
                    {'term':{'speaker_qq_number':qq_number}},
                    {'term':{'xnr_qq_number':qq_number}}
                ]
            }
        }
    }

    count_result = es_xnr.count(index=group_message_index_name,doc_type=group_message_index_type,body=query_body)

    if count_result['_shards']['successful'] != 0:
        today_count = count_result['count']
    else:
        print 'es index rank error'
        today_count = 0

    last_date = ts2datetime(current_time-DAY)
    qq_xnr_history_count_index_name = qq_xnr_history_count_index_name_pre + last_date

    try:
        get_result = es.get(index=qq_xnr_history_count_index_name,doc_type=qq_xnr_history_count_index_type,\
                            id=_id_last)['_source']
        total_count_history = get_result['total_post_num']
    except:
        total_count_history = 0

    total_count_totay = total_count_history + today_count

    item_dict = dict()
    item_dict['speak_today'] = {}
    item_dict['speak_total'] = {}
    item_dict['speak_today'][current_time] = today_count
    item_dict['speak_total'][current_time] = total_count_totay


    query_body_total_day = {
        'query':{
            'filtered':{
                'filter':{
                    'term':{'xnr_qq_number':qq_number}
                }
            }
        },
        'aggs':{
            'all_speakers':{
                'terms':{'field':'speaker_qq_number',"order" : { "_count" : "desc" }}
            }
        }
    }

    try:

        results_total_day = es_xnr.search(index=group_message_index_name,doc_type=group_message_index_type,\
                    body=query_body_total_day)['aggregations']['all_speakers']['buckets']

        speaker_max = results_total_day[0]['doc_count']
    except:
        speaker_max = today_count

    safe_active = (float(math.log(today_count+1))/(math.log(speaker_max+1)+1))*100

    safe_active = round(safe_active,2)  # 保留两位小数
    
    item_dict['mark'] = safe_active

    return item_dict
Exemple #19
0
def show_event_warming(xnr_user_no,start_time,end_time):
    if S_TYPE == 'test':
        test_today_date = S_DATE_EVENT_WARMING
        test_time_gap = end_time - start_time
        today_datetime = datetime2ts(test_today_date)
        end_time = today_datetime
        start_time = end_time - test_time_gap
        end_datetime = datetime2ts(ts2datetime(end_time))
        start_datetime = datetime2ts(ts2datetime(start_time))
    else:
        now_time = int(time.time())
        today_datetime = datetime2ts(ts2datetime(now_time))
        end_datetime = datetime2ts(ts2datetime(end_time))
        start_datetime = datetime2ts(ts2datetime(start_time))

    event_warming=[]
    first_time=int(time.time())
    if today_datetime > end_datetime :
        print 'aaaa'
        event_warming = lookup_history_event_warming(xnr_user_no,start_time,end_time)
    else:
        if end_datetime == start_datetime:
            print 'bbbbb'
            event_warming = create_event_warning(xnr_user_no,end_time,write_mark=False)
        else:
            print 'cccc'
            #print 'before_time',int(time.time())

            today_event_warming = create_event_warning(xnr_user_no,end_time,write_mark=False)

            #print 'mid_time',int(time.time())
            history_event_warming = lookup_history_event_warming(xnr_user_no,start_time,today_datetime)
            #print 'final_time',int(time.time())
            #print 'today_event_warming:',today_event_warming
            #print 'history_event_warming:',history_event_warming

            history_event_warming.extend(today_event_warming)

            event_warming = history_event_warming
            #print 'event_warming:',event_warming
            #print start_datetime,end_datetime
    warming_list=[]
    event_name_list=[]
    #new_waining_list=[]
    for item in event_warming:
        event_name=item['event_name']
        item['main_user_info']=json.loads(item['main_user_info'])
        item['main_weibo_info']=json.loads(item['main_weibo_info']) 

        if event_name not in event_name_list:
            print 'event_name !!!!', event_name

            event_name_list.append(event_name)
            warming_list.append(item)
        else:
            old_event=[event for event in warming_list if event['event_name'] == event_name][0]
            new_warming_list = [event for event in warming_list if event['event_name'] != event_name]
            
            old_main_user_info = [event['main_user_info'] for event in warming_list if event['event_name'] == event_name][0]
            old_main_user_uids = [user['uid'] for user in old_main_user_info]
            now_uids = [u['uid'] for u in item['main_user_info']]
            new_uids = list(set(old_main_user_uids) - (set(old_main_user_uids) & set(now_uids)))
            print 'new_uid:',new_uids
            
            new_main_user_info = []
            for uid in new_uids:
                uid_info = [u for u in item['main_user_info'] if u['uid'] == uid]
                if uid_info:
                    new_main_user_info.append(uid_info[0])
                else:
                	pass
            old_event['main_user_info'].extend(new_main_user_info)

            old_main_weibo_info = [event['main_weibo_info'] for event in warming_list if event['event_name'] == event_name][0]
            old_main_mids = [content['mid'] for content in old_main_weibo_info]
            now_mids = [c['mid'] for c in item['main_weibo_info']]
            new_mids = list(set(old_main_mids) - (set(old_main_mids) & set(now_mids)))
            print 'new_mids',new_mids

            new_main_weibo_info = []
            for mid in new_mids:
                mid_info = [t for t in item['main_weibo_info'] if t['mid'] == mid]
                new_main_weibo_info.append(mid_info[0])
            old_event['main_weibo_info'].extend(new_main_weibo_info)

            old_event['event_influence']=old_event['event_influence']+item['event_influence']
            new_warming_list.append(old_event)
            warming_list = new_warming_list


    if warming_list:
        warming_list.sort(key=lambda k:(k.get('event_influence',0)),reverse=True)
    else:
        pass
    final_time=int(time.time())
    print 'time_coust:',final_time - first_time

    return warming_list    
Exemple #20
0
def get_influence_at_num_today(xnr_user_no):
    at_dict = {}
    at_dict['at_day'] = {}
    at_dict['at_total'] = {}

    get_result = es_xnr.get(index=qq_xnr_index_name,doc_type=qq_xnr_index_type,id=xnr_user_no)['_source']
    qq_number = get_result['qq_number']
    nickname = get_result['nickname']
        
    if S_TYPE == 'test':
        current_time = datetime2ts(QQ_S_DATE_ASSESSMENT)
    else:
        current_time = int(time.time())

    current_date = ts2datetime(current_time)
    group_message_index_name = group_message_index_name_pre + current_date
            
    #虚拟人今天被@数量
    query_body_xnr = {
        'query':{
            'bool':{
                'must':[
                    {'term':{'xnr_qq_number':qq_number}},
                    {'wildcard':{'text':'*'+'@ME'+'*'}}
                ]
            }
        }
    }
    
    try:
        results_xnr = es_xnr.count(index=group_message_index_name,doc_type=group_message_index_type,\
                    body=query_body_xnr)

        if results_xnr['_shards']['successful'] != 0:
           at_num_xnr = results_xnr['count']

        else:
            print 'es index rank error'
            at_num_xnr = 0
    except:
        at_num_xnr = 0


    # 得到历史总数
    current_time_last = current_time - DAY
    current_date_last = ts2datetime(current_time_last)
    qq_xnr_history_count_index_name = qq_xnr_history_count_index_name_pre + current_date_last

    try:
        result_last = es_xnr.get(index=qq_xnr_history_count_index_name,doc_type=qq_xnr_history_be_at_index_type,id=xnr_user_no)['_source']
        total_be_at_num_last = result_last['total_be_at_num']
    except:
        total_be_at_num_last = 0

    at_dict['at_day'][current_time] = at_num_xnr
    at_dict['at_total'][current_time]= at_num_xnr + total_be_at_num_last


    query_body_total_day = {
        'query':{
            'bool':{
                'must':[
                    {'term':{'xnr_qq_number':qq_number}},
                    {'wildcard':{'text':'*'+'@'+'*'}}
                ]
            }
        }
    }

    try:
        results_total_day = es_xnr.count(index=group_message_index_name,doc_type=group_message_index_type,\
                    body=query_body_total_day)

        if results_total_day['_shards']['successful'] != 0:
           at_num_total_day = results_total_day['count']
        else:
            print 'es index rank error'
            at_num_total_day = 0
    except:
        at_num_total_day = 0

    influence = (float(math.log(at_num_xnr+1))/(math.log(at_num_total_day+1)+1))*100

    influence = round(influence,2)  # 保留两位小数
    
    at_dict['mark'] = influence

    return at_dict
Exemple #21
0
def lookup_hot_posts(from_ts, to_ts, xnr_id, classify_id, order_id):
    time_gap = to_ts - from_ts
    now_time = time.time()
    test_time_gap = datetime2ts(ts2datetime(now_time)) - datetime2ts(S_DATE_FB)
    if S_TYPE == 'test':
        today_date_time = datetime2ts(S_DATE_FB)
        from_ts = from_ts - test_time_gap
        to_ts = to_ts - test_time_gap

    from_date_ts = datetime2ts(ts2datetime(from_ts))
    to_date_ts = datetime2ts(ts2datetime(to_ts))
    print 'from_date_ts, to_date_ts:', ts2date(from_date_ts), ts2date(
        to_date_ts)
    print from_date_ts, to_date_ts

    flow_text_index_name_list = get_timets_set_indexset_list(
        facebook_flow_text_index_name_pre, from_ts, to_ts)

    userslist = lookup_xnr_friends(xnr_id)
    #全部用户 0,好友 1,非好友-1
    range_time_list = {
        'range': {
            'timestamp': {
                'gte': int(from_ts),
                'lt': int(to_ts)
            }
        }
    }
    # print range_time_list

    user_condition_list = []
    if classify_id == 1:
        user_condition_list = [{
            'bool': {
                'must': [{
                    'terms': {
                        'uid': userslist
                    }
                }, range_time_list]
            }
        }]
    elif classify_id == 2:
        user_condition_list = [{
            'bool': {
                'must': [range_time_list],
                'must_not': [{
                    'terms': {
                        'uid': userslist
                    }
                }]
            }
        }]
    elif classify_id == 0:
        user_condition_list = [{'bool': {'must': [range_time_list]}}]

    query_body = {
        'query': {
            'filtered': {
                'filter': user_condition_list
            }
        },
        'size': MAX_HOT_POST_SIZE,
        'sort': {
            'timestamp': {
                'order': 'desc'
            }
        }
    }

    # try:
    es_result=es_xnr.search(index=flow_text_index_name_list,doc_type=facebook_flow_text_index_type,\
        body=query_body)['hits']['hits']
    hot_result = []
    for item in es_result:
        #查询三个指标字段
        fid_result = lookup_fid_attend_index(item['_source']['fid'], from_ts,
                                             to_ts)
        if fid_result:
            item['_source']['comment'] = fid_result['comment']
            item['_source']['share'] = fid_result['share']
            item['_source']['favorite'] = fid_result['favorite']
        else:
            item['_source']['comment'] = 0
            item['_source']['share'] = 0
            item['_source']['favorite'] = 0
            #查询用户昵称
        item['_source']['nick_name'] = get_user_nickname(
            item['_source']['uid'])
        hot_result.append(item['_source'])
    # except:
    # hot_result=[]

    if order_id == 1:  #按时间排序
        sort_condition = 'timestamp'
    elif order_id == 2:  #按热度排序
        sort_condition = 'retweeted'
    elif order_id == 3:  #按敏感度排序
        sort_condition = 'sensitive'
    else:  #默认设为按时间排序
        sort_conditiont = 'timestamp'
    if hot_result:
        hot_result.sort(key=lambda k: (k.get(sort_condition, 0)), reverse=True)
        hot_result = hot_result[:50]
    return hot_result
Exemple #22
0
def utils_get_safe(wxbot_id, period, startdate, enddate):
    start_ts, end_ts, period = dump_date(period, startdate, enddate)
    current_timestamp = int(time.time())
    current_date = ts2datetime(current_timestamp)
    if period == 0:     #获取今天的数据
        current_time = datetime2ts(current_date)
        last_date = ts2datetime(current_time-DAY)

        speak_dict = {}
        speak_dict['speak_day'] = {}
        speak_dict['speak_total'] = {}
        xnr_puid = load_wxxnr_redis_data(wxbot_id=wxbot_id, items=['puid'])['puid']

        #获取今日发言总数
        query_body = {
            'query':{
                'bool':{
                    'must':[
                        {'term':{'speaker_id': xnr_puid}},
                        {'term':{'xnr_id':xnr_puid}}
                    ]
                }
            }
        } 
        today_index_name = wx_group_message_index_name_pre + current_date
        today_count_result = es_xnr.count(index=today_index_name,doc_type=wx_group_message_index_type,body=query_body)
        if today_count_result['_shards']['successful'] != 0:
            today_count = today_count_result['count']
        else:
            print 'es index rank error'
            today_count = 0
        #获取历史发言总数
        total_query_body = {
            'query':{
                'bool':{
                    'must':[
                        {'term':{'xnr_user_no': wxbot_id}},
                        {'term':{'puid':xnr_puid}},
                        {'term':{'date_time':last_date}}
                    ]
                }
            }
        }
        total_index_name = wx_xnr_history_count_index_name
        try:
            total_count_result = es_xnr.search(index=total_index_name,doc_type=wx_xnr_history_count_index_type,body=total_query_body)
            if total_count_result['_shards']['successful'] != 0:
                total_count = total_count_result['hits']['hits'][0]['_source']['total_post_num']
        except Exception,e:
            print e
            total_count = 0
        #包括今天在内的发言总数
        total_count_totay = total_count + today_count
        #发言次数最大值
        query_body_total_day = {
            'query':{
                'filtered':{
                    'filter':{
                        'term':{'xnr_id':xnr_puid}
                    }
                }
            },
            'aggs':{
                'all_speakers':{
                    'terms':{'field':'speaker_id',"order" : { "_count" : "desc" }}
                }
            }
        }
        try:
            results_total_day = es_xnr.search(index=wx_group_message_index_name,doc_type=wx_group_message_index_type,\
                        body=query_body_total_day)['aggregations']['all_speakers']['buckets']
            speaker_max = results_total_day[0]['doc_count']
        except:
            speaker_max = today_count
        #整合
        speak_dict = dict()
        speak_dict['speak_today'] = {}
        speak_dict['speak_total'] = {}
        speak_dict['speak_today'][current_time] = today_count
        speak_dict['speak_total'][current_time] = total_count_totay
        safe_active = (float(math.log(today_count+1))/(math.log(speaker_max+1)+1))*100
        safe_active = round(safe_active,2)  # 保留两位小数
        speak_dict['mark'] = safe_active
        return speak_dict
Exemple #23
0
def get_generate_example_model(domain_name,role_name):

    domain_pinyin = pinyin.get(domain_name,format='strip',delimiter='_')
    role_en = domain_ch2en_dict[role_name]

    task_id = domain_pinyin + '_' + role_en

    es_result = es.get(index=weibo_role_index_name,doc_type=weibo_role_index_type,id=task_id)['_source']
    item = es_result
    print 'es_result:::',es_result
    # 政治倾向
    political_side = json.loads(item['political_side'])[0][0]

    if political_side == 'mid':
        item['political_side'] = u'中立'
    elif political_side == 'left':
        item['political_side'] = u'左倾'
    else:
        item['political_side'] = u'右倾'

    # 心理特征
    psy_feature_list = []

    psy_feature = json.loads(item['psy_feature'])

    for i in range(TOP_PSY_FEATURE):
        psy_feature_list.append(psy_feature[i][0])

    item['psy_feature'] = '&'.join(psy_feature_list)

    role_group_uids = json.loads(item['member_uids'])

    mget_results = es_user_portrait.mget(index=portrait_index_name,doc_type=portrait_index_type,body={'ids':role_group_uids})['docs']

    # topic_list = []
    # for mget_item in mget_results:
        
    #     if mget_item['found']:
    #         keywords_list = json.loads(mget_item['_source']['keywords'])
    #         topic_list.extend(keywords_list)
    
    # topic_keywords_dict = {}
    # for topic_item in topic_list:
    #     keyword = topic_item[0]
    #     keyword_count = topic_item[1]
    #     try:
    #         topic_keywords_dict[keyword] += keyword_count
    #     except:
    #         topic_keywords_dict[keyword] = keyword_count

    # monitor_keywords_list = []
    # for i in range(3):
        
    #     keyword_max = max(topic_keywords_dict,key=topic_keywords_dict.get)
    #     monitor_keywords_list.append(keyword_max)
    #     del topic_keywords_dict[keyword_max]

    # item['monitor_keywords'] = '&'.join(monitor_keywords_list)
    if S_TYPE == 'test':
        current_time  = datetime2ts(S_DATE)
    else:
        current_time = int(time.time())

    index_name_list = get_flow_text_index_list(current_time)

    query_body_search = {
        'query':{
            'filtered':{
                'filter':{
                    'terms':{'uid':role_group_uids}
                }
            }
        },
        'size':MAX_VALUE,
        '_source':['keywords_string']
    }
    
    es_keyword_results = es_flow_text.search(index=index_name_list,doc_type=flow_text_index_type,\
                        body=query_body_search)['hits']['hits']

    keywords_string = ''
    for mget_item in es_keyword_results:
        #print 'mget_item:::',mget_item
        #if mget_item['found']:
        keywords_string += '&'
        keywords_string += mget_item['_source']['keywords_string']
    
    k_dict = extract_keywords(keywords_string)
    
    monitor_keywords_list = []

    for item_item in k_dict:
        monitor_keywords_list.append(item_item.word.encode('utf-8'))

    item['monitor_keywords'] = ','.join(monitor_keywords_list)

    mget_results_user = es_user_portrait.mget(index=profile_index_name,doc_type=profile_index_type,body={'ids':role_group_uids})['docs']
    item['nick_name'] = []
    for mget_item in mget_results_user:
        #print 'mget_item:::',mget_item
        if mget_item['found']:
            item['nick_name'] = mget_item['_source']['nick_name']
            item['location'] = mget_item['_source']['user_location']
            item['gender'] = mget_item['_source']['sex']
            uid = mget_item['_source']['uid']
            try:
                profile_results = es_user_portrait.get(index=profile_index_name,doc_type=profile_index_type,id=uid)['_source']
                if profile_results['description']:
                    item['description'] = profile_results['description']
                    break
            except:
                pass


    item['business_goal'] = u'渗透'
    item['daily_interests'] = u'旅游'
    # if S_TYPE == 'test':
    #     user_mget_results = es.mget(index=profile_index_name,doc_type=profile_index_type,body={'ids':role_group_uids})['docs']
    #     if user_mget_results
    item['age'] = 30
    item['career'] = u'自由职业'

    active_time_list_np = np.array(json.loads(item['active_time']))
    active_time_list_np_sort = np.argsort(-active_time_list_np)[:TOP_ACTIVE_TIME]
    item['active_time'] = active_time_list_np_sort.tolist()

    day_post_num_list = np.array(json.loads(item['day_post_num']))
    item['day_post_num'] = np.mean(day_post_num_list).tolist()
    item['role_name'] = role_name
    
    task_id_new =domain_pinyin + '_' + role_en

    example_model_file_name = EXAMPLE_MODEL_PATH + task_id_new + '.json'
    
    try:
        with open(example_model_file_name,"w") as dump_f:
            json.dump(item,dump_f)

        item_dict = dict()
        #item_dict['xnr_user_no'] = xnr_user_no
        item_dict['domain_name'] = domain_name
        item_dict['role_name'] = role_name

        es.index(index=weibo_example_model_index_name,doc_type=weibo_example_model_index_type,\
            body=item_dict,id=task_id_new)

        mark = True
    except:
        mark = False

    return mark
Exemple #24
0
def utils_get_penetration(wxbot_id, period, startdate, enddate):
    start_ts, end_ts, period = dump_date(period, startdate, enddate)
    current_timestamp = int(time.time())
    current_date = ts2datetime(current_timestamp)
    if period == 0 :    #获取今天的数据
        current_time = datetime2ts(current_date)

        xnr_data = load_wxxnr_redis_data(wxbot_id=wxbot_id, items=['puid','groups_list'])
        puid = xnr_data['puid']
        group_list = xnr_data['groups_list']
        
        #查询1
        wx_group_message_index_name = wx_group_message_index_name_pre + current_date
        query_body_info = {
            'query':{
                'filtered':{
                    'filter':{
                        'terms':{'group_id':group_list}
                    }
                }
            },
            'aggs':{
                'avg_sensitive':{
                    'avg':{
                        'field':'sensitive_value'
                    }
                }
            }
        }
        try:
            es_sensitive_result = es_xnr.search(index=wx_group_message_index_name,doc_type=wx_group_message_index_type,body=query_body_info)['aggregations']
            sensitive_value = es_sensitive_result['avg_sensitive']['value']
            if sensitive_value == None:
                sensitive_value = 0
        except:
            sensitive_value = 0
        #查询2
        query_body_max = {
            'query':{
                'filtered':{
                    'filter':{
                        'terms':{'group_id':group_list}
                    }
                }
            },
            'sort':{'sensitive_value':{'order':'desc'}}
        }
        try:
            max_results = es_xnr.search(index=group_message_index_name,doc_type=group_message_index_type,\
                            body=query_body_max)['hits']['hits']
            max_sensitive = max_results[0]['_source']['sensitive_value']
        except:
            max_sensitive = 0
        #统计
        follow_group_sensitive = {'sensitive_info': {current_time: sensitive_value}}
        penetration = (math.log(sensitive_value+1)/(math.log(max_sensitive+1)+1))*100
        penetration = round(penetration,2)
        follow_group_sensitive['mark'] = penetration
        return follow_group_sensitive
    else:  
        follow_group_sensitive = {}
        follow_group_sensitive['sensitive_info'] = {}
        query_body = {
            'query':{
                'filtered':{
                    'filter':{
                        'bool':{
                            'must':[
                                {'term':{'xnr_user_no':wxbot_id}},
                                {'range':{'timestamp':{'gte':start_ts,'lte':end_ts}}}
                            ]
                        }
                    }
                }
            },
            'size':MAX_SEARCH_SIZE,
            'sort':{'timestamp':{'order':'asc'}}
        }
        search_results = es_xnr.search(index=wx_xnr_history_count_index_name,doc_type=wx_xnr_history_count_index_type,\
                        body=query_body)['hits']['hits']
        #初始化
        ts_list = load_timestamp_list(start_ts, end_ts)
        for ts in ts_list:
            follow_group_sensitive['sensitive_info'][ts]  = 0
        follow_group_sensitive['mark'] = 0 
        #填充数据
        for result in search_results:
            result = result['_source']
            timestamp = result['timestamp']
            follow_group_sensitive['sensitive_info'][timestamp] = result['daily_sensitive_num']
            follow_group_sensitive['mark'] = result['penetration']
        return follow_group_sensitive