Example #1
0
def get_hashtag(today_datetime):

    weibo_flow_text_index_name = get_day_flow_text_index_list(today_datetime)
    query_body = {
        'query': {
            'filtered': {
                'filter': {
                    'bool': {
                        'must': [{
                            'range': {
                                'sensitive': {
                                    'gte': 1
                                }
                            }
                        }]
                    }
                }
            }
        },
        'aggs': {
            'all_hashtag': {
                'terms': {
                    'field': 'hashtag'
                },
                'aggs': {
                    'sum_sensitive': {
                        'sum': {
                            'field': 'sensitive'
                        }
                    }
                }
            }
        },
        'size': EVENT_OFFLINE_COUNT
    }
    weibo_text_exist=es_flow_text.search(index=weibo_flow_text_index_name,doc_type=flow_text_index_type,\
                body=query_body)['aggregations']['all_hashtag']['buckets']

    hashtag_list = []
    for item in weibo_text_exist:
        event_dict = dict()
        if item['key']:
            # print item['key']
            event_dict['event_name'] = item['key']
            event_dict['event_count'] = item['doc_count']
            event_dict['event_sensitive'] = item['sum_sensitive']['value']
            hashtag_list.append(event_dict)
        else:
            pass

    hashtag_list.sort(key=lambda k:
                      (k.get('event_sensitive', 0), k.get('event_count', 0)),
                      reverse=True)
    # print hashtag_list
    return hashtag_list
Example #2
0
def create_speech_warning(xnr_user_no,start_time,end_time):
    #查询关注列表
    lookup_type='followers_list'
    followers_list=lookup_xnr_fans_followers(xnr_user_no,lookup_type)
    result = [] 
    query_body={
        'query':{
            'filtered':{
                'filter':{
                    'bool':{
                        'must':[
                            {'range':{'sensitive':{'gte':1}}},
                            {'range':{'timestamp':{'gte':start_time,'lte':end_time}}}
                         ]}
                }
            }
        },
        'size':MAX_HOT_POST_SIZE,
        'sort':{'sensitive':{'order':'desc'}}
    }

    flow_text_index_name=get_day_flow_text_index_list(end_time)

    results=es_flow_text.search(index=flow_text_index_name,doc_type=flow_text_index_type,body=query_body)['hits']['hits']
    
    warning_type = 'speech'
    r_result = remove_repeat(results,warning_type)
    for item in r_result:
        item['nick_name']=get_user_nickname(item['uid'])
        followers_mark = set_intersection(item['uid'],followers_list)
        if followers_mark != 0:
            item['content_type']='follow'
        else:
            item['content_type']='unfollow'

        item['validity']=0
        item['xnr_user_no']=xnr_user_no

        task_id=xnr_user_no+'_'+item['mid']

        #写入数据库
        today_date=ts2datetime(end_time)
        weibo_speech_warning_index_name=weibo_speech_warning_index_name_pre+today_date
        if not es_xnr.indices.exists(index=weibo_speech_warning_index_name):
            weibo_speech_warning_mappings(weibo_speech_warning_index_name)
        try:
            es_xnr.index(index=weibo_speech_warning_index_name,doc_type=weibo_speech_warning_index_type,body=item,id=task_id)
            mark=True
        except:
            mark=False

        result.append(mark)
    print 'speech_result::',result
    return result
Example #3
0
def create_speech_warning(xnr_user_no,today_datetime):
    #查询关注列表
    lookup_type='followers_list'
    followers_list=lookup_xnr_fans_followers(xnr_user_no,lookup_type)
    
    query_body={
        'query':{
            'filtered':{
                'filter':{
                    'bool':{'must':{'range':{'sensitive':{'gte':1}}}}
                }
            }
        },
        'size':MAX_SEARCH_SIZE,
        'sort':{'sensitive':{'order':'desc'}}
    }

    flow_text_index_name=get_day_flow_text_index_list(today_datetime)

    results=es_flow_text.search(index=flow_text_index_name,doc_type=flow_text_index_type,body=query_body)['hits']['hits']
    result=[]
    for item in results:
        item['_source']['nick_name']=get_user_nickname(item['_source']['uid'])
        if item['_source']['uid'] in followers_list:
            item['_source']['content_type']='follow'
        else:
            item['_source']['content_type']='unfollow'

        item['_source']['validity']=0
        item['_source']['xnr_user_no']=xnr_user_no

        task_id=xnr_user_no+'_'+item['_source']['mid']

        #写入数据库
        today_date=ts2datetime(today_datetime)
        weibo_speech_warning_index_name=weibo_speech_warning_index_name_pre+today_date
        try:
            es_xnr.index(index=weibo_speech_warning_index_name,doc_type=weibo_speech_warning_index_type,body=item['_source'],id=task_id)
            mark=True
        except:
            mark=False

        result.append(mark)
    return result
Example #4
0
def lookup_weibo_date_warming(keywords, today_datetime):
    keyword_query_list = []
    for keyword in keywords:
        # keyword = keyword.encode('utf-8')
        print 'keyword:', keyword, type(keyword)
        keyword_query_list.append({'wildcard': {'text': '*' + keyword + '*'}})
        # keyword_query_list.append({'wildcard':{'text':{'wildcard':'*'+keyword.encode('utf-8')+'*'}}})

    flow_text_index_name = get_day_flow_text_index_list(today_datetime)

    # keyword_query_list.append({'range':{'sensitive':{'gte':1}}})

    query_body = {
        'query': {
            'bool': {
                # 'must':[{'range':{'sensitive':{'gte':1}}}],
                'should': keyword_query_list
            }
        },
        'size': MAX_WARMING_SIZE,
        'sort': {
            'sensitive': {
                'order': 'desc'
            }
        }
    }
    if es_flow_text.indices.exists(index=flow_text_index_name):
        #try:
        temp_result = es_flow_text.search(index=flow_text_index_name,
                                          doc_type=flow_text_index_type,
                                          body=query_body)['hits']['hits']
        date_result = []
        print keyword_query_list
        for item in temp_result:
            # print 'item-text:', item['_source']['text'], type(item['_source']['text'])
            item['_source']['nick_name'] = get_user_nickname(
                item['_source']['uid'])
            date_result.append(item['_source'])
        #except:
        #        date_result=[]
    else:
        pass
    return date_result
Example #5
0
def create_event_warning(xnr_user_no,start_time,end_time):
    #获取事件名称
    today_datetime = start_time
    hashtag_list = get_hashtag(today_datetime)
    #print 'hashtag_list::',hashtag_list

    flow_text_index_name = get_day_flow_text_index_list(today_datetime)

    #虚拟人的粉丝列表和关注列表
    try:
        es_xnr_result=es_xnr.get(index=weibo_xnr_fans_followers_index_name,doc_type=weibo_xnr_fans_followers_index_type,id=xnr_user_no)['_source']
        followers_list=es_xnr_result['followers_list']
        fans_list=es_xnr_result['fans_list']
    except:
        followers_list=[]
        fans_list=[]

    event_warming_list=[]
    event_num=0
    for event_item in hashtag_list:
        event_sensitive_count=0
        event_warming_content=dict()     #事件名称、主要参与用户、典型微博、事件影响力、事件平均时间
        event_warming_content['event_name']=event_item['event_name']
        print 'event_name:',event_item
        event_num=event_num+1
        print 'event_num:::',event_num
        print 'first_time:::',int(time.time())
        event_influence_sum=0
        event_time_sum=0       
        query_body={
            'query':{
                # 'bool':{
                #     'must':[{'wildcard':{'text':'*'+event_item[0]+'*'}},
                #     {'range':{'sensitive':{'gte':1}}}]
                # }
                'filtered':{
                    'filter':{
                        'bool':{
                            'must':[
                                {'term':{'hashtag':event_item['event_name']}},
                                {'range':{'sensitive':{'gte':1}}},
                                {'range':{'timestamp':{'gte':start_time,'lte':end_time}}}
                            ]
                        }
                    }
                }
            },
            'size':MAX_WARMING_SIZE,
            'sort':{'sensitive':{'order':'desc'}}
        }
        #try:         
        event_results=es_flow_text.search(index=flow_text_index_name,doc_type=flow_text_index_type,body=query_body)['hits']['hits']
        print 'event:::',len(event_results),start_time,end_time
        if event_results:
            weibo_result=[]
            fans_num_dict=dict()
            followers_num_dict=dict()
            alluser_num_dict=dict()
            print 'sencond_time:::',int(time.time())
            for item in event_results:
                #print 'event_content:',item['_source']['text']          
                
                #统计用户信息
                if alluser_num_dict.has_key(str(item['_source']['uid'])):
                    followers_mark=set_intersection(item['_source']['uid'],followers_list)
                    if followers_mark > 0:
                        alluser_num_dict[str(item['_source']['uid'])]=alluser_num_dict[str(item['_source']['uid'])]+1*2
                    else:
                        alluser_num_dict[str(item['_source']['uid'])]=alluser_num_dict[str(item['_source']['uid'])]+1
                else:
                    alluser_num_dict[str(item['_source']['uid'])]=1                

                #计算影响力
                origin_influence_value=(1+item['_source']['comment']+item['_source']['retweeted'])*(1+item['_source']['sensitive'])
                # fans_value=judge_user_type(item['_source']['uid'],fans_list)
                followers_value=judge_user_type(item['_source']['uid'],followers_list)
                item['_source']['weibo_influence_value']=origin_influence_value*(followers_value)
                
                item['_source']['nick_name']=get_user_nickname(item['_source']['uid'])

                weibo_result.append(item['_source'])

                #统计影响力、时间
                event_influence_sum=event_influence_sum+item['_source']['weibo_influence_value']
                event_time_sum=event_time_sum+item['_source']['timestamp']            
        
            print 'third_time:::',int(time.time())
            #典型微博信息
            the_weibo_result=remove_repeat_v2(weibo_result)
            the_weibo_result.sort(key=lambda k:(k.get('weibo_influence_value',0)),reverse=True)
            event_warming_content['main_weibo_info']=json.dumps(the_weibo_result)

            #事件影响力和事件时间
            number=len(event_results)
            event_warming_content['event_influence']=event_influence_sum/number
            event_warming_content['event_time']=event_time_sum/number

        # except:
        #     event_warming_content['main_weibo_info']=[]
        #     event_warming_content['event_influence']=0
        #     event_warming_content['event_time']=0
        
        # try:
            #对用户进行排序
            alluser_num_dict=sorted(alluser_num_dict.items(),key=lambda d:d[1],reverse=True)
            main_userid_list=[]
            for i in xrange(0,len(alluser_num_dict)):
                main_userid_list.append(alluser_num_dict[i][0])

        #主要参与用户信息
            main_user_info=[]
            user_es_result=es_user_profile.mget(index=profile_index_name,doc_type=profile_index_type,body={'ids':main_userid_list})['docs']
            for item in user_es_result:

                user_dict=dict()
                if item['found']:
                    user_dict['photo_url']=item['_source']['photo_url']
                    user_dict['uid']=item['_id']
                    user_dict['nick_name']=item['_source']['nick_name']
                    user_dict['favoritesnum']=item['_source']['favoritesnum']
                    user_dict['fansnum']=item['_source']['fansnum']
                else:
                    user_dict['photo_url']=''
                    user_dict['uid']=item['_id']
                    user_dict['nick_name']=''
                    user_dict['favoritesnum']=0
                    user_dict['fansnum']=0
                main_user_info.append(user_dict)
            event_warming_content['main_user_info']=json.dumps(main_user_info)


        # except:
            # event_warming_content['main_user_info']=[]
            print 'fourth_time:::',int(time.time())
            event_warming_content['xnr_user_no']=xnr_user_no
            event_warming_content['validity']=0
            event_warming_content['timestamp']=today_datetime

            event_warming_list.append(event_warming_content)
        else:
        	pass
        print 'fifth_time:::',int(time.time())
    return event_warming_list
Example #6
0
def create_personal_warning(xnr_user_no,start_time,end_time):
    #查询关注列表
    lookup_type='followers_list'
    followers_list=lookup_xnr_fans_followers(xnr_user_no,lookup_type)

    #查询虚拟人uid
    xnr_uid=lookup_xnr_uid(xnr_user_no)

    #计算敏感度排名靠前的用户
    '''
    query_body={
        # 'query':{
        #     'filtered':{
        #         'filter':{
        #             'terms':{'uid':followers_list}
        #         }
        #     }
        # },
        'aggs':{
            'followers_sensitive_num':{
                'terms':{'field':'uid'},
                'range':{
                        'timestamp':{
                            'gte':start_time,
                            'lte':end_time
                        }
                        },
                'aggs':{
                    'sensitive_num':{
                        'sum':{'field':'sensitive'}
                    }
                }                  
            }
            },
        'size':MAX_SEARCH_SIZE
    }
    '''
    query_body={
        # 'query':{
        #     'filtered':{
        #         'filter':{
        #             'terms':{'uid':followers_list}
        #         }
        #     }
        # },
        'aggs':{
            'followers_sensitive_num':{
                'terms':{'field':'uid'},
                'aggs':{
                    'sensitive_num':{
                        'sum':{'field':'sensitive'}
                    }
                }                        
            }
            },
        'size':MAX_SEARCH_SIZE
    }
    flow_text_index_name=get_day_flow_text_index_list(end_time)
    
   # try:   
    first_sum_result=es_flow_text.search(index=flow_text_index_name,doc_type=flow_text_index_type,\
        body=query_body)['aggregations']['followers_sensitive_num']['buckets']
    #except:
     #`   first_sum_result=[]

    #print first_sum_result
    top_userlist=[]
    for i in xrange(0,len(first_sum_result)):
        user_sensitive=first_sum_result[i]['sensitive_num']['value']
        #ser_influence=first_sum_result[i]['influence_num']['value']
        #if (user_sensitive > 0) or (user_influence > 0) :
        if user_sensitive > 0:
            user_dict=dict()
            user_dict['uid']=first_sum_result[i]['key']
            followers_mark=judge_user_type(user_dict['uid'],followers_list)
            user_dict['sensitive']=user_sensitive*followers_mark
           # user_dict['influence']=user_influence*followers_mark
            top_userlist.append(user_dict)
        else:
            pass



    ####################################
    #如果是关注者则敏感度提升
    ####################################
    #查询敏感用户的敏感微博内容
    results=[]
    for user in top_userlist:
        #print user
        user_detail=dict()
        user_detail['uid']=user['uid']
        user_detail['user_sensitive']=user['sensitive']
        #user_detail['user_influence']=user['influence']
        # user_lookup_id=xnr_uid+'_'+user['uid']
        # print user_lookup_id
        # try:
        #     #user_result=es_xnr.get(index=weibo_feedback_follow_index_name,doc_type=weibo_feedback_follow_index_type,id=user_lookup_id)['_source']
        #     user_result=es_user_profile.get(index=profile_index_name,doc_type=profile_index_type,id=user['uid'])['_source']
        #     user_detail['user_name']=user_result['nick_name']
        # except:
        user_detail['user_name']=get_user_nickname(user['uid'])

        query_body={
            'query':{
                'filtered':{
                    'filter':{
                        'bool':{
                            'must':[
                                {'term':{'uid':user['uid']}},
                            #    {'range':{'timestamp':{'gte':start_time,'lte':end_time}}}
                                {'range':{'sensitive':{'gte':1}}},
                            #    {'range':{'retweeted':{'gte':1}}}
                            ]
                        }
                    }
                }
            },
            'size':MAX_WARMING_SIZE,
            'sort':{'sensitive':{'order':'desc'}}
        }

        try:
            second_result=es_flow_text.search(index=flow_text_index_name,doc_type=flow_text_index_type,body=query_body)['hits']['hits']
        except:
            second_result=[]
        warning_type = 'user'
        s_result=remove_repeat(second_result,warning_type)
        #tem_word_one = '静坐'
        #tem_word_two = '集合'
        # for item in second_result:
        #     #sensitive_words=item['_source']['sensitive_words_string']
        #     #if ((sensitive_words==tem_word_one) or (sensitive_words==tem_word_two)):
        #     #    pass
        #     #else:
        #     #查询用户昵称

        #     item['_source']['nick_name']=get_user_nickname(item['_source']['uid'])
        #     s_result.append(item['_source'])

        

        s_result.sort(key=lambda k:(k.get('sensitive',0)),reverse=True)
        user_detail['content']=json.dumps(s_result)

        user_detail['xnr_user_no']=xnr_user_no
        user_detail['validity']=0
        user_detail['timestamp']=end_time

        results.append(user_detail)
    #print 'person_wa:::',results
    return results