def get_hashtag(today_datetime): weibo_flow_text_index_name = get_day_flow_text_index_list(today_datetime) query_body = { 'query': { 'filtered': { 'filter': { 'bool': { 'must': [{ 'range': { 'sensitive': { 'gte': 1 } } }] } } } }, 'aggs': { 'all_hashtag': { 'terms': { 'field': 'hashtag' }, 'aggs': { 'sum_sensitive': { 'sum': { 'field': 'sensitive' } } } } }, 'size': EVENT_OFFLINE_COUNT } weibo_text_exist=es_flow_text.search(index=weibo_flow_text_index_name,doc_type=flow_text_index_type,\ body=query_body)['aggregations']['all_hashtag']['buckets'] hashtag_list = [] for item in weibo_text_exist: event_dict = dict() if item['key']: # print item['key'] event_dict['event_name'] = item['key'] event_dict['event_count'] = item['doc_count'] event_dict['event_sensitive'] = item['sum_sensitive']['value'] hashtag_list.append(event_dict) else: pass hashtag_list.sort(key=lambda k: (k.get('event_sensitive', 0), k.get('event_count', 0)), reverse=True) # print hashtag_list return hashtag_list
def create_speech_warning(xnr_user_no,start_time,end_time): #查询关注列表 lookup_type='followers_list' followers_list=lookup_xnr_fans_followers(xnr_user_no,lookup_type) result = [] query_body={ 'query':{ 'filtered':{ 'filter':{ 'bool':{ 'must':[ {'range':{'sensitive':{'gte':1}}}, {'range':{'timestamp':{'gte':start_time,'lte':end_time}}} ]} } } }, 'size':MAX_HOT_POST_SIZE, 'sort':{'sensitive':{'order':'desc'}} } flow_text_index_name=get_day_flow_text_index_list(end_time) results=es_flow_text.search(index=flow_text_index_name,doc_type=flow_text_index_type,body=query_body)['hits']['hits'] warning_type = 'speech' r_result = remove_repeat(results,warning_type) for item in r_result: item['nick_name']=get_user_nickname(item['uid']) followers_mark = set_intersection(item['uid'],followers_list) if followers_mark != 0: item['content_type']='follow' else: item['content_type']='unfollow' item['validity']=0 item['xnr_user_no']=xnr_user_no task_id=xnr_user_no+'_'+item['mid'] #写入数据库 today_date=ts2datetime(end_time) weibo_speech_warning_index_name=weibo_speech_warning_index_name_pre+today_date if not es_xnr.indices.exists(index=weibo_speech_warning_index_name): weibo_speech_warning_mappings(weibo_speech_warning_index_name) try: es_xnr.index(index=weibo_speech_warning_index_name,doc_type=weibo_speech_warning_index_type,body=item,id=task_id) mark=True except: mark=False result.append(mark) print 'speech_result::',result return result
def create_speech_warning(xnr_user_no,today_datetime): #查询关注列表 lookup_type='followers_list' followers_list=lookup_xnr_fans_followers(xnr_user_no,lookup_type) query_body={ 'query':{ 'filtered':{ 'filter':{ 'bool':{'must':{'range':{'sensitive':{'gte':1}}}} } } }, 'size':MAX_SEARCH_SIZE, 'sort':{'sensitive':{'order':'desc'}} } flow_text_index_name=get_day_flow_text_index_list(today_datetime) results=es_flow_text.search(index=flow_text_index_name,doc_type=flow_text_index_type,body=query_body)['hits']['hits'] result=[] for item in results: item['_source']['nick_name']=get_user_nickname(item['_source']['uid']) if item['_source']['uid'] in followers_list: item['_source']['content_type']='follow' else: item['_source']['content_type']='unfollow' item['_source']['validity']=0 item['_source']['xnr_user_no']=xnr_user_no task_id=xnr_user_no+'_'+item['_source']['mid'] #写入数据库 today_date=ts2datetime(today_datetime) weibo_speech_warning_index_name=weibo_speech_warning_index_name_pre+today_date try: es_xnr.index(index=weibo_speech_warning_index_name,doc_type=weibo_speech_warning_index_type,body=item['_source'],id=task_id) mark=True except: mark=False result.append(mark) return result
def lookup_weibo_date_warming(keywords, today_datetime): keyword_query_list = [] for keyword in keywords: # keyword = keyword.encode('utf-8') print 'keyword:', keyword, type(keyword) keyword_query_list.append({'wildcard': {'text': '*' + keyword + '*'}}) # keyword_query_list.append({'wildcard':{'text':{'wildcard':'*'+keyword.encode('utf-8')+'*'}}}) flow_text_index_name = get_day_flow_text_index_list(today_datetime) # keyword_query_list.append({'range':{'sensitive':{'gte':1}}}) query_body = { 'query': { 'bool': { # 'must':[{'range':{'sensitive':{'gte':1}}}], 'should': keyword_query_list } }, 'size': MAX_WARMING_SIZE, 'sort': { 'sensitive': { 'order': 'desc' } } } if es_flow_text.indices.exists(index=flow_text_index_name): #try: temp_result = es_flow_text.search(index=flow_text_index_name, doc_type=flow_text_index_type, body=query_body)['hits']['hits'] date_result = [] print keyword_query_list for item in temp_result: # print 'item-text:', item['_source']['text'], type(item['_source']['text']) item['_source']['nick_name'] = get_user_nickname( item['_source']['uid']) date_result.append(item['_source']) #except: # date_result=[] else: pass return date_result
def create_event_warning(xnr_user_no,start_time,end_time): #获取事件名称 today_datetime = start_time hashtag_list = get_hashtag(today_datetime) #print 'hashtag_list::',hashtag_list flow_text_index_name = get_day_flow_text_index_list(today_datetime) #虚拟人的粉丝列表和关注列表 try: es_xnr_result=es_xnr.get(index=weibo_xnr_fans_followers_index_name,doc_type=weibo_xnr_fans_followers_index_type,id=xnr_user_no)['_source'] followers_list=es_xnr_result['followers_list'] fans_list=es_xnr_result['fans_list'] except: followers_list=[] fans_list=[] event_warming_list=[] event_num=0 for event_item in hashtag_list: event_sensitive_count=0 event_warming_content=dict() #事件名称、主要参与用户、典型微博、事件影响力、事件平均时间 event_warming_content['event_name']=event_item['event_name'] print 'event_name:',event_item event_num=event_num+1 print 'event_num:::',event_num print 'first_time:::',int(time.time()) event_influence_sum=0 event_time_sum=0 query_body={ 'query':{ # 'bool':{ # 'must':[{'wildcard':{'text':'*'+event_item[0]+'*'}}, # {'range':{'sensitive':{'gte':1}}}] # } 'filtered':{ 'filter':{ 'bool':{ 'must':[ {'term':{'hashtag':event_item['event_name']}}, {'range':{'sensitive':{'gte':1}}}, {'range':{'timestamp':{'gte':start_time,'lte':end_time}}} ] } } } }, 'size':MAX_WARMING_SIZE, 'sort':{'sensitive':{'order':'desc'}} } #try: event_results=es_flow_text.search(index=flow_text_index_name,doc_type=flow_text_index_type,body=query_body)['hits']['hits'] print 'event:::',len(event_results),start_time,end_time if event_results: weibo_result=[] fans_num_dict=dict() followers_num_dict=dict() alluser_num_dict=dict() print 'sencond_time:::',int(time.time()) for item in event_results: #print 'event_content:',item['_source']['text'] #统计用户信息 if alluser_num_dict.has_key(str(item['_source']['uid'])): followers_mark=set_intersection(item['_source']['uid'],followers_list) if followers_mark > 0: alluser_num_dict[str(item['_source']['uid'])]=alluser_num_dict[str(item['_source']['uid'])]+1*2 else: alluser_num_dict[str(item['_source']['uid'])]=alluser_num_dict[str(item['_source']['uid'])]+1 else: alluser_num_dict[str(item['_source']['uid'])]=1 #计算影响力 origin_influence_value=(1+item['_source']['comment']+item['_source']['retweeted'])*(1+item['_source']['sensitive']) # fans_value=judge_user_type(item['_source']['uid'],fans_list) followers_value=judge_user_type(item['_source']['uid'],followers_list) item['_source']['weibo_influence_value']=origin_influence_value*(followers_value) item['_source']['nick_name']=get_user_nickname(item['_source']['uid']) weibo_result.append(item['_source']) #统计影响力、时间 event_influence_sum=event_influence_sum+item['_source']['weibo_influence_value'] event_time_sum=event_time_sum+item['_source']['timestamp'] print 'third_time:::',int(time.time()) #典型微博信息 the_weibo_result=remove_repeat_v2(weibo_result) the_weibo_result.sort(key=lambda k:(k.get('weibo_influence_value',0)),reverse=True) event_warming_content['main_weibo_info']=json.dumps(the_weibo_result) #事件影响力和事件时间 number=len(event_results) event_warming_content['event_influence']=event_influence_sum/number event_warming_content['event_time']=event_time_sum/number # except: # event_warming_content['main_weibo_info']=[] # event_warming_content['event_influence']=0 # event_warming_content['event_time']=0 # try: #对用户进行排序 alluser_num_dict=sorted(alluser_num_dict.items(),key=lambda d:d[1],reverse=True) main_userid_list=[] for i in xrange(0,len(alluser_num_dict)): main_userid_list.append(alluser_num_dict[i][0]) #主要参与用户信息 main_user_info=[] user_es_result=es_user_profile.mget(index=profile_index_name,doc_type=profile_index_type,body={'ids':main_userid_list})['docs'] for item in user_es_result: user_dict=dict() if item['found']: user_dict['photo_url']=item['_source']['photo_url'] user_dict['uid']=item['_id'] user_dict['nick_name']=item['_source']['nick_name'] user_dict['favoritesnum']=item['_source']['favoritesnum'] user_dict['fansnum']=item['_source']['fansnum'] else: user_dict['photo_url']='' user_dict['uid']=item['_id'] user_dict['nick_name']='' user_dict['favoritesnum']=0 user_dict['fansnum']=0 main_user_info.append(user_dict) event_warming_content['main_user_info']=json.dumps(main_user_info) # except: # event_warming_content['main_user_info']=[] print 'fourth_time:::',int(time.time()) event_warming_content['xnr_user_no']=xnr_user_no event_warming_content['validity']=0 event_warming_content['timestamp']=today_datetime event_warming_list.append(event_warming_content) else: pass print 'fifth_time:::',int(time.time()) return event_warming_list
def create_personal_warning(xnr_user_no,start_time,end_time): #查询关注列表 lookup_type='followers_list' followers_list=lookup_xnr_fans_followers(xnr_user_no,lookup_type) #查询虚拟人uid xnr_uid=lookup_xnr_uid(xnr_user_no) #计算敏感度排名靠前的用户 ''' query_body={ # 'query':{ # 'filtered':{ # 'filter':{ # 'terms':{'uid':followers_list} # } # } # }, 'aggs':{ 'followers_sensitive_num':{ 'terms':{'field':'uid'}, 'range':{ 'timestamp':{ 'gte':start_time, 'lte':end_time } }, 'aggs':{ 'sensitive_num':{ 'sum':{'field':'sensitive'} } } } }, 'size':MAX_SEARCH_SIZE } ''' query_body={ # 'query':{ # 'filtered':{ # 'filter':{ # 'terms':{'uid':followers_list} # } # } # }, 'aggs':{ 'followers_sensitive_num':{ 'terms':{'field':'uid'}, 'aggs':{ 'sensitive_num':{ 'sum':{'field':'sensitive'} } } } }, 'size':MAX_SEARCH_SIZE } flow_text_index_name=get_day_flow_text_index_list(end_time) # try: first_sum_result=es_flow_text.search(index=flow_text_index_name,doc_type=flow_text_index_type,\ body=query_body)['aggregations']['followers_sensitive_num']['buckets'] #except: #` first_sum_result=[] #print first_sum_result top_userlist=[] for i in xrange(0,len(first_sum_result)): user_sensitive=first_sum_result[i]['sensitive_num']['value'] #ser_influence=first_sum_result[i]['influence_num']['value'] #if (user_sensitive > 0) or (user_influence > 0) : if user_sensitive > 0: user_dict=dict() user_dict['uid']=first_sum_result[i]['key'] followers_mark=judge_user_type(user_dict['uid'],followers_list) user_dict['sensitive']=user_sensitive*followers_mark # user_dict['influence']=user_influence*followers_mark top_userlist.append(user_dict) else: pass #################################### #如果是关注者则敏感度提升 #################################### #查询敏感用户的敏感微博内容 results=[] for user in top_userlist: #print user user_detail=dict() user_detail['uid']=user['uid'] user_detail['user_sensitive']=user['sensitive'] #user_detail['user_influence']=user['influence'] # user_lookup_id=xnr_uid+'_'+user['uid'] # print user_lookup_id # try: # #user_result=es_xnr.get(index=weibo_feedback_follow_index_name,doc_type=weibo_feedback_follow_index_type,id=user_lookup_id)['_source'] # user_result=es_user_profile.get(index=profile_index_name,doc_type=profile_index_type,id=user['uid'])['_source'] # user_detail['user_name']=user_result['nick_name'] # except: user_detail['user_name']=get_user_nickname(user['uid']) query_body={ 'query':{ 'filtered':{ 'filter':{ 'bool':{ 'must':[ {'term':{'uid':user['uid']}}, # {'range':{'timestamp':{'gte':start_time,'lte':end_time}}} {'range':{'sensitive':{'gte':1}}}, # {'range':{'retweeted':{'gte':1}}} ] } } } }, 'size':MAX_WARMING_SIZE, 'sort':{'sensitive':{'order':'desc'}} } try: second_result=es_flow_text.search(index=flow_text_index_name,doc_type=flow_text_index_type,body=query_body)['hits']['hits'] except: second_result=[] warning_type = 'user' s_result=remove_repeat(second_result,warning_type) #tem_word_one = '静坐' #tem_word_two = '集合' # for item in second_result: # #sensitive_words=item['_source']['sensitive_words_string'] # #if ((sensitive_words==tem_word_one) or (sensitive_words==tem_word_two)): # # pass # #else: # #查询用户昵称 # item['_source']['nick_name']=get_user_nickname(item['_source']['uid']) # s_result.append(item['_source']) s_result.sort(key=lambda k:(k.get('sensitive',0)),reverse=True) user_detail['content']=json.dumps(s_result) user_detail['xnr_user_no']=xnr_user_no user_detail['validity']=0 user_detail['timestamp']=end_time results.append(user_detail) #print 'person_wa:::',results return results