Esempio n. 1
0
def lookup_twitter_date_warming(keywords, today_datetime):
    keyword_query_list = []
    for keyword in keywords:
        #print 'keyword:',keyword
        keyword_query_list.append(
            {'wildcard': {
                'text': '*' + keyword.encode('utf-8') + '*'
            }})

    twitter_flow_text_index_name = get_timets_set_indexset_list(
        twitter_flow_text_index_name_pre, today_datetime, today_datetime)

    query_body = {
        'query': {
            'bool': {
                'should': keyword_query_list,
                'must': {
                    'range': {
                        'sensitive': {
                            'gte': 1
                        }
                    }
                }
            }
        },
        'size': MAX_WARMING_SIZE,
        'sort': {
            'sensitive': {
                'order': 'desc'
            }
        }
    }
    try:
        temp_result = es_xnr_2.search(index=twitter_flow_text_index_name,
                                      doc_type=twitter_flow_text_index_type,
                                      body=query_body)['hits']['hits']
        date_result = []
        print 'temp_result::', temp_result
        for item in temp_result:
            #查询三个指标字段
            tid_result = lookup_tid_attend_index(item['_source']['tid'],
                                                 today_datetime)
            if tid_result:
                item['_source']['comment'] = tid_result['comment']
                item['_source']['share'] = tid_result['share']
                item['_source']['favorite'] = tid_result['favorite']
            else:
                item['_source']['comment'] = 0
                item['_source']['share'] = 0
                item['_source']['favorite'] = 0

            #查询用户昵称
            item['_source']['nick_name'] = get_user_nickname(
                item['_source']['uid'])

            date_result.append(item['_source'])
    except:
        date_result = []
    return date_result
Esempio n. 2
0
def create_speech_warning(xnr_user_no,today_datetime):
    #查询关注列表
    lookup_type='followers_list'
    followers_list=lookup_xnr_fans_followers(xnr_user_no,lookup_type)
    
    query_body={
        'query':{
            'filtered':{
                'filter':{
                    'bool':{'must':{'range':{'sensitive':{'gte':1}}}}
                }
            }
        },
        'size':MAX_SEARCH_SIZE,
        'sort':{'sensitive':{'order':'desc'}}
    }
    twitter_flow_text_index_name=get_timets_set_indexset_list(twitter_flow_text_index_name_pre,today_datetime,today_datetime)
    #print twitter_flow_text_index_name
    results=es_xnr.search(index=twitter_flow_text_index_name,doc_type=twitter_flow_text_index_type,body=query_body)['hits']['hits']
    #print results
    result=[]
    for item in results:
        if item['_source']['uid'] in followers_list:
            item['_source']['content_type']='follow'
        else:
            item['_source']['content_type']='unfollow'

        item['_source']['validity']=0
        item['_source']['xnr_user_no']=xnr_user_no

        #查询三个指标字段
        tid_result=lookup_tid_attend_index(item['_source']['tid'],today_datetime)
        if tid_result:
            item['_source']['comment']=tid_result['comment']
            item['_source']['share']=tid_result['share']
            item['_source']['favorite']=tid_result['favorite']
        else:
            item['_source']['comment']=0
            item['_source']['share']=0
            item['_source']['favorite']=0 

        #查询用户昵称
        item['_source']['nick_name']=get_user_nickname(item['_source']['uid'])

        task_id=xnr_user_no+'_'+item['_source']['tid']

        #写入数据库
        today_date=ts2datetime(today_datetime)
        twitter_speech_warning_index_name=twitter_speech_warning_index_name_pre+today_date
        # try:
        es_xnr.index(index=twitter_speech_warning_index_name,doc_type=twitter_speech_warning_index_type,body=item['_source'],id=task_id)
        mark=True
        # except:
        #     mark=False

        result.append(mark)
    return result
Esempio n. 3
0
def create_speech_warning(xnr_user_no,today_datetime):
    #查询好友列表
    friends_list=lookup_xnr_friends(xnr_user_no)
    
    query_body={
        'query':{
            'filtered':{
                'filter':{
                    'bool':{'must':{'range':{'sensitive':{'gte':1}}}}
                }
            }
        },
        'size':MAX_SEARCH_SIZE,
        'sort':{'sensitive':{'order':'desc'}}
    }
    facebook_flow_text_index_name=get_timets_set_indexset_list(facebook_flow_text_index_name_pre,today_datetime,today_datetime)
    #print facebook_flow_text_index_name
    results=es_xnr_2.search(index=facebook_flow_text_index_name,doc_type=facebook_flow_text_index_type,body=query_body)['hits']['hits']
    #print results
    result=[]
    for item in results:
        if item['_source']['uid'] in friends_list:
            item['_source']['content_type']='friends'
        else:
            item['_source']['content_type']='unfriends'

        item['_source']['validity']=0
        item['_source']['xnr_user_no']=xnr_user_no

        #查询三个指标字段
        fid_result=lookup_fid_attend_index(item['_source']['fid'],today_datetime)
        if fid_result:
            item['_source']['comment']=fid_result['comment']
            item['_source']['share']=fid_result['share']
            item['_source']['favorite']=fid_result['favorite']
        else:
            item['_source']['comment']=0
            item['_source']['share']=0
            item['_source']['favorite']=0 

        #查询用户昵称
        item['_source']['nick_name']=get_user_nickname(item['_source']['uid'])       	

        task_id=xnr_user_no+'_'+item['_source']['fid']

        #写入数据库
        today_date=ts2datetime(today_datetime)
        facebook_speech_warning_index_name=facebook_speech_warning_index_name_pre+today_date
        #facebook_speech_warning_index_name=facebook_speech_warning_index_name_pre+FACEBOOK_FLOW_START_DATE
        # try:
        es_xnr_2.index(index=facebook_speech_warning_index_name,doc_type=facebook_speech_warning_index_type,body=item['_source'],id=task_id)
        mark=True
        # except:
        #     mark=False

        result.append(mark)
    return result
Esempio n. 4
0
def get_hashtag(today_datetime):

    facebook_flow_text_index_name = get_timets_set_indexset_list(
        facebook_flow_text_index_name_pre, today_datetime, today_datetime)
    query_body = {
        'query': {
            'filtered': {
                'filter': {
                    'bool': {
                        'must': [{
                            'range': {
                                'sensitive': {
                                    'gte': 1
                                }
                            }
                        }]
                    }
                }
            }
        },
        'aggs': {
            'all_hashtag': {
                'terms': {
                    'field': 'hashtag'
                },
                'aggs': {
                    'sum_sensitive': {
                        'sum': {
                            'field': 'sensitive'
                        }
                    }
                }
            }
        },
        'size': EVENT_OFFLINE_COUNT
    }
    flow_text_exist=es_xnr_2.search(index=facebook_flow_text_index_name,doc_type=facebook_flow_text_index_type,\
                body=query_body)['aggregations']['all_hashtag']['buckets']
    #print 'flow_text_exist:',flow_text_exist

    hashtag_list = []
    for item in flow_text_exist:
        event_dict = dict()
        if item['key']:
            event_dict['event_name'] = item['key']
            event_dict['event_count'] = item['doc_count']
            event_dict['event_sensitive'] = item['sum_sensitive']['value']
            hashtag_list.append(event_dict)
        else:
            pass

    hashtag_list.sort(key=lambda k:
                      (k.get('event_sensitive', 0), k.get('event_count', 0)),
                      reverse=True)
    # print hashtag_list
    return hashtag_list
Esempio n. 5
0
def lookup_tid_attend_index(tid,today_datetime):
    twitter_count_index_name=get_timets_set_indexset_list(twitter_count_index_name_pre,today_datetime,today_datetime)
    
    query_body={
    	'query':{
    		'filtered':{
    			'filter':{
    				'bool':{'must':{'term':{'tid':tid}}}
    			}
    		}
    	},
    	'size':MAX_WARMING_SIZE,
    	'sort':{'update_time':{'order':'desc'}}
    }
    try:
        result=es_xnr.search(index=twitter_count_index_name,doc_type=twitter_count_index_type,body=query_body)['hits']['hits']
        # print result
        tid_result=[]
        for item in result:
            tid_result.append(item['_source'])
    except:
        tid_result=[]
    return tid_result
Esempio n. 6
0
def create_event_warning(xnr_user_no, today_datetime, write_mark):
    #获取事件名称
    hashtag_list = get_hashtag(today_datetime)
    #print 'hashtag_list/:',hashtag_list

    facebook_flow_text_index_name = get_timets_set_indexset_list(
        facebook_flow_text_index_name_pre, today_datetime, today_datetime)

    #虚拟人的好友列表
    friends_list = lookup_xnr_friends(xnr_user_no)

    event_warming_list = []
    for event_item in hashtag_list:
        event_warming_content = dict()  #事件名称、主要参与用户、典型微博、事件影响力、事件平均时间
        event_warming_content['event_name'] = event_item['event_name']
        event_influence_sum = 0
        event_time_sum = 0
        query_body = {
            'query': {
                'filtered': {
                    'filter': {
                        'bool': {
                            'must': [{
                                'term': {
                                    'hashtag': event_item['event_name']
                                }
                            }, {
                                'range': {
                                    'sensitive': {
                                        'gte': 1
                                    }
                                }
                            }]
                        }
                    }
                }
            },
            'size': MAX_WARMING_SIZE,
            'sort': {
                'sensitive': {
                    'order': 'desc'
                }
            }
        }
        event_results = es_xnr_2.search(index=facebook_flow_text_index_name,
                                        doc_type=facebook_flow_text_index_type,
                                        body=query_body)['hits']['hits']
        if event_results:
            facebook_result = []
            friends_num_dict = dict()
            alluser_num_dict = dict()
            #print 'sencond_time:::',int(time.time())
            for item in event_results:
                #查询三个指标字段
                fid_result = lookup_fid_attend_index(item['_source']['fid'],
                                                     today_datetime)
                if fid_result:
                    item['_source']['comment'] = fid_result['comment']
                    item['_source']['share'] = fid_result['share']
                    item['_source']['favorite'] = fid_result['favorite']
                else:
                    item['_source']['comment'] = 0
                    item['_source']['share'] = 0
                    item['_source']['favorite'] = 0
                #print 'event_content:',item['_source']['text']
                #统计用户信息
                if alluser_num_dict.has_key(str(item['_source']['uid'])):
                    friends_mark = set_intersection(item['_source']['uid'],
                                                    friends_list)
                    if friends_mark > 0:
                        alluser_num_dict[str(
                            item['_source']['uid'])] = alluser_num_dict[str(
                                item['_source']['uid'])] + 1 * 2
                    else:
                        alluser_num_dict[str(
                            item['_source']['uid'])] = alluser_num_dict[str(
                                item['_source']['uid'])] + 1
                else:
                    alluser_num_dict[str(item['_source']['uid'])] = 1

                #计算影响力
                origin_influence_value = (1 + item['_source']['comment'] +
                                          item['_source']['share'] +
                                          item['_source']['favorite']) * (
                                              1 + item['_source']['sensitive'])
                friends_value = judge_user_type(item['_source']['uid'],
                                                friends_list)
                item['_source'][
                    'facebook_influence_value'] = origin_influence_value * friends_value

                #查询用户昵称
                item['_source']['nick_name'] = get_user_nickname(
                    item['_source']['uid'])
                facebook_result.append(item['_source'])

                #统计影响力、时间
                event_influence_sum = event_influence_sum + item['_source'][
                    'facebook_influence_value']
                event_time_sum = event_time_sum + item['_source']['timestamp']

            # print 'third_time:::',int(time.time())
            #典型信息
            facebook_result.sort(key=lambda k:
                                 (k.get('facebook_influence_value', 0)),
                                 reverse=True)
            event_warming_content['main_facebook_info'] = json.dumps(
                facebook_result)

            #事件影响力和事件时间
            number = len(event_results)
            event_warming_content[
                'event_influence'] = event_influence_sum / number
            event_warming_content['event_time'] = event_time_sum / number

            #对用户进行排序
            alluser_num_dict = sorted(alluser_num_dict.items(),
                                      key=lambda d: d[1],
                                      reverse=True)
            main_userid_list = []
            for i in xrange(0, len(alluser_num_dict)):
                main_userid_list.append(alluser_num_dict[i][0])

        #主要参与用户信息
            main_user_info = []
            user_es_result = es_xnr_2.mget(index=facebook_user_index_name,
                                           doc_type=facebook_user_index_type,
                                           body={'ids':
                                                 main_userid_list})['docs']
            # print 'user_es_result:',user_es_result
            for item in user_es_result:

                user_dict = dict()
                if item['found']:
                    user_dict['uid'] = item['_id']
                    user_dict['username'] = item['_source']['username']
                    if item['_source'].has_key('talking_about_count'):
                        user_dict['talking_about_count'] = item['_source'][
                            'talking_about_count']
                    else:
                        user_dict['talking_about_count'] = 0
                    if item['_source'].has_key('likes'):
                        user_dict['likes'] = item['_source']['likes']
                    else:
                        user_dict['likes'] = 0
                    if item['_source'].has_key('category'):
                        user_dict['category'] = item['_source']['category']
                    else:
                        user_dict['category'] = ''
                else:
                    # user_dict['icon']=''
                    user_dict['uid'] = item['_id']
                    user_dict['username'] = ''
                    user_dict['talking_about_count'] = 0
                    user_dict['likes'] = 0
                    user_dict['category'] = ''
                main_user_info.append(user_dict)
            event_warming_content['main_user_info'] = json.dumps(
                main_user_info)

            # print 'fourth_time:::',int(time.time())
            event_warming_content['xnr_user_no'] = xnr_user_no
            event_warming_content['validity'] = 0
            event_warming_content['timestamp'] = today_datetime
            now_time = int(time.time())
            # task_id=xnr_user_no+'_'+str(now_time)
            task_id = xnr_user_no + '_' + event_warming_content['event_name']

            #写入数据库
            if write_mark:
                # print 'today_datetime:::',ts2datetime(today_datetime)
                print 'task_id_event:', task_id
                mark = write_envent_warming(today_datetime,
                                            event_warming_content, task_id)
                event_warming_list.append(mark)
            else:
                event_warming_list.append(event_warming_content)

        else:
            pass
        # print 'fifth_time:::',int(time.time())
    return event_warming_list
Esempio n. 7
0
def create_personal_warning(xnr_user_no, today_datetime):
    #查询好友列表
    friends_list = lookup_xnr_friends(xnr_user_no)

    #查询虚拟人uid
    xnr_uid = lookup_xnr_uid(xnr_user_no)

    #计算敏感度排名靠前的用户
    query_body = {
        # 'query':{
        #     'filtered':{
        #         'filter':{
        #             'terms':{'uid':friends_list}
        #         }
        #     }
        # },
        'aggs': {
            'friends_sensitive_num': {
                'terms': {
                    'field': 'uid'
                },
                'aggs': {
                    'sensitive_num': {
                        'sum': {
                            'field': 'sensitive'
                        }
                    }
                }
            }
        },
        'size': MAX_SEARCH_SIZE
    }

    facebook_flow_text_index_name = get_timets_set_indexset_list(
        facebook_flow_text_index_name_pre, today_datetime, today_datetime)

    try:
        first_sum_result=es_xnr_2.search(index=facebook_flow_text_index_name,doc_type=facebook_flow_text_index_type,\
        body=query_body)['aggregations']['friends_sensitive_num']['buckets']
    except:
        first_sum_result = []

    #print 'first_sum_result',first_sum_result
    top_userlist = []
    for i in xrange(0, len(first_sum_result)):
        user_sensitive = first_sum_result[i]['sensitive_num']['value']
        if user_sensitive > 0:
            user_dict = dict()
            user_dict['uid'] = first_sum_result[i]['key']
            friends_mark = judge_user_type(user_dict['uid'], friends_list)
            user_dict['sensitive'] = user_sensitive * friends_mark
            top_userlist.append(user_dict)
        else:
            pass
    #####################
    #如果是好友,则用户敏感度计算值增加1.5倍
    #####################
    #查询敏感用户的敏感内容
    results = []
    for user in top_userlist:
        #print user
        user_detail = dict()
        user_detail['uid'] = user['uid']
        user_detail['user_sensitive'] = user['sensitive']
        user_lookup_id = user['uid']
        print user_lookup_id
        # try:
        #     #user_result=es_xnr.get(index=facebook_feedback_friends_index_name,doc_type=facebook_feedback_friends_index_type,id=user_lookup_id)['_source']
        #     user_result=es_xnr.get(index=facebook_user_index_name,doc_type=facebook_user_index_type,id=user['uid'])['_source']
        #     user_detail['user_name']=user_result['nick_name']
        # except:
        #     user_detail['user_name']=''
        user_detail['user_name'] = get_user_nickname(user['uid'])

        query_body = {
            'query': {
                'filtered': {
                    'filter': {
                        'bool': {
                            'must': [{
                                'term': {
                                    'uid': user['uid']
                                }
                            }, {
                                'range': {
                                    'sensitive': {
                                        'gte': 1
                                    }
                                }
                            }]
                        }
                    }
                }
            },
            'size': MAX_WARMING_SIZE,
            'sort': {
                'sensitive': {
                    'order': 'desc'
                }
            }
        }

        try:
            second_result = es_xnr_2.search(
                index=facebook_flow_text_index_name,
                doc_type=facebook_flow_text_index_type,
                body=query_body)['hits']['hits']
        except:
            second_result = []

        s_result = []
        for item in second_result:
            #查询三个指标字段
            fid_result = lookup_fid_attend_index(item['_source']['fid'],
                                                 today_datetime)
            if fid_result:
                item['_source']['comment'] = fid_result['comment']
                item['_source']['share'] = fid_result['share']
                item['_source']['favorite'] = fid_result['favorite']
            else:
                item['_source']['comment'] = 0
                item['_source']['share'] = 0
                item['_source']['favorite'] = 0
            #查询用户昵称
            item['_source']['nick_name'] = get_user_nickname(
                item['_source']['uid'])

            s_result.append(item['_source'])

        s_result.sort(key=lambda k: (k.get('sensitive', 0)), reverse=True)
        user_detail['content'] = json.dumps(s_result)

        user_detail['xnr_user_no'] = xnr_user_no
        user_detail['validity'] = 0
        user_detail['timestamp'] = today_datetime

        #写入数据库
        today_date = ts2datetime(today_datetime)
        facebook_user_warning_index_name = facebook_user_warning_index_name_pre + today_date

        task_id = xnr_user_no + '_' + user_detail['uid']
        if s_result:
            try:
                es_xnr_2.index(index=facebook_user_warning_index_name,
                               doc_type=facebook_user_warning_index_type,
                               body=user_detail,
                               id=task_id)
                mark = True
            except:
                mark = False
        else:
            pass

        results.append(mark)

    return results