def lookup_twitter_date_warming(keywords, today_datetime): keyword_query_list = [] for keyword in keywords: #print 'keyword:',keyword keyword_query_list.append( {'wildcard': { 'text': '*' + keyword.encode('utf-8') + '*' }}) twitter_flow_text_index_name = get_timets_set_indexset_list( twitter_flow_text_index_name_pre, today_datetime, today_datetime) query_body = { 'query': { 'bool': { 'should': keyword_query_list, 'must': { 'range': { 'sensitive': { 'gte': 1 } } } } }, 'size': MAX_WARMING_SIZE, 'sort': { 'sensitive': { 'order': 'desc' } } } try: temp_result = es_xnr_2.search(index=twitter_flow_text_index_name, doc_type=twitter_flow_text_index_type, body=query_body)['hits']['hits'] date_result = [] print 'temp_result::', temp_result for item in temp_result: #查询三个指标字段 tid_result = lookup_tid_attend_index(item['_source']['tid'], today_datetime) if tid_result: item['_source']['comment'] = tid_result['comment'] item['_source']['share'] = tid_result['share'] item['_source']['favorite'] = tid_result['favorite'] else: item['_source']['comment'] = 0 item['_source']['share'] = 0 item['_source']['favorite'] = 0 #查询用户昵称 item['_source']['nick_name'] = get_user_nickname( item['_source']['uid']) date_result.append(item['_source']) except: date_result = [] return date_result
def create_speech_warning(xnr_user_no,today_datetime): #查询关注列表 lookup_type='followers_list' followers_list=lookup_xnr_fans_followers(xnr_user_no,lookup_type) query_body={ 'query':{ 'filtered':{ 'filter':{ 'bool':{'must':{'range':{'sensitive':{'gte':1}}}} } } }, 'size':MAX_SEARCH_SIZE, 'sort':{'sensitive':{'order':'desc'}} } twitter_flow_text_index_name=get_timets_set_indexset_list(twitter_flow_text_index_name_pre,today_datetime,today_datetime) #print twitter_flow_text_index_name results=es_xnr.search(index=twitter_flow_text_index_name,doc_type=twitter_flow_text_index_type,body=query_body)['hits']['hits'] #print results result=[] for item in results: if item['_source']['uid'] in followers_list: item['_source']['content_type']='follow' else: item['_source']['content_type']='unfollow' item['_source']['validity']=0 item['_source']['xnr_user_no']=xnr_user_no #查询三个指标字段 tid_result=lookup_tid_attend_index(item['_source']['tid'],today_datetime) if tid_result: item['_source']['comment']=tid_result['comment'] item['_source']['share']=tid_result['share'] item['_source']['favorite']=tid_result['favorite'] else: item['_source']['comment']=0 item['_source']['share']=0 item['_source']['favorite']=0 #查询用户昵称 item['_source']['nick_name']=get_user_nickname(item['_source']['uid']) task_id=xnr_user_no+'_'+item['_source']['tid'] #写入数据库 today_date=ts2datetime(today_datetime) twitter_speech_warning_index_name=twitter_speech_warning_index_name_pre+today_date # try: es_xnr.index(index=twitter_speech_warning_index_name,doc_type=twitter_speech_warning_index_type,body=item['_source'],id=task_id) mark=True # except: # mark=False result.append(mark) return result
def create_speech_warning(xnr_user_no,today_datetime): #查询好友列表 friends_list=lookup_xnr_friends(xnr_user_no) query_body={ 'query':{ 'filtered':{ 'filter':{ 'bool':{'must':{'range':{'sensitive':{'gte':1}}}} } } }, 'size':MAX_SEARCH_SIZE, 'sort':{'sensitive':{'order':'desc'}} } facebook_flow_text_index_name=get_timets_set_indexset_list(facebook_flow_text_index_name_pre,today_datetime,today_datetime) #print facebook_flow_text_index_name results=es_xnr_2.search(index=facebook_flow_text_index_name,doc_type=facebook_flow_text_index_type,body=query_body)['hits']['hits'] #print results result=[] for item in results: if item['_source']['uid'] in friends_list: item['_source']['content_type']='friends' else: item['_source']['content_type']='unfriends' item['_source']['validity']=0 item['_source']['xnr_user_no']=xnr_user_no #查询三个指标字段 fid_result=lookup_fid_attend_index(item['_source']['fid'],today_datetime) if fid_result: item['_source']['comment']=fid_result['comment'] item['_source']['share']=fid_result['share'] item['_source']['favorite']=fid_result['favorite'] else: item['_source']['comment']=0 item['_source']['share']=0 item['_source']['favorite']=0 #查询用户昵称 item['_source']['nick_name']=get_user_nickname(item['_source']['uid']) task_id=xnr_user_no+'_'+item['_source']['fid'] #写入数据库 today_date=ts2datetime(today_datetime) facebook_speech_warning_index_name=facebook_speech_warning_index_name_pre+today_date #facebook_speech_warning_index_name=facebook_speech_warning_index_name_pre+FACEBOOK_FLOW_START_DATE # try: es_xnr_2.index(index=facebook_speech_warning_index_name,doc_type=facebook_speech_warning_index_type,body=item['_source'],id=task_id) mark=True # except: # mark=False result.append(mark) return result
def get_hashtag(today_datetime): facebook_flow_text_index_name = get_timets_set_indexset_list( facebook_flow_text_index_name_pre, today_datetime, today_datetime) query_body = { 'query': { 'filtered': { 'filter': { 'bool': { 'must': [{ 'range': { 'sensitive': { 'gte': 1 } } }] } } } }, 'aggs': { 'all_hashtag': { 'terms': { 'field': 'hashtag' }, 'aggs': { 'sum_sensitive': { 'sum': { 'field': 'sensitive' } } } } }, 'size': EVENT_OFFLINE_COUNT } flow_text_exist=es_xnr_2.search(index=facebook_flow_text_index_name,doc_type=facebook_flow_text_index_type,\ body=query_body)['aggregations']['all_hashtag']['buckets'] #print 'flow_text_exist:',flow_text_exist hashtag_list = [] for item in flow_text_exist: event_dict = dict() if item['key']: event_dict['event_name'] = item['key'] event_dict['event_count'] = item['doc_count'] event_dict['event_sensitive'] = item['sum_sensitive']['value'] hashtag_list.append(event_dict) else: pass hashtag_list.sort(key=lambda k: (k.get('event_sensitive', 0), k.get('event_count', 0)), reverse=True) # print hashtag_list return hashtag_list
def lookup_tid_attend_index(tid,today_datetime): twitter_count_index_name=get_timets_set_indexset_list(twitter_count_index_name_pre,today_datetime,today_datetime) query_body={ 'query':{ 'filtered':{ 'filter':{ 'bool':{'must':{'term':{'tid':tid}}} } } }, 'size':MAX_WARMING_SIZE, 'sort':{'update_time':{'order':'desc'}} } try: result=es_xnr.search(index=twitter_count_index_name,doc_type=twitter_count_index_type,body=query_body)['hits']['hits'] # print result tid_result=[] for item in result: tid_result.append(item['_source']) except: tid_result=[] return tid_result
def create_event_warning(xnr_user_no, today_datetime, write_mark): #获取事件名称 hashtag_list = get_hashtag(today_datetime) #print 'hashtag_list/:',hashtag_list facebook_flow_text_index_name = get_timets_set_indexset_list( facebook_flow_text_index_name_pre, today_datetime, today_datetime) #虚拟人的好友列表 friends_list = lookup_xnr_friends(xnr_user_no) event_warming_list = [] for event_item in hashtag_list: event_warming_content = dict() #事件名称、主要参与用户、典型微博、事件影响力、事件平均时间 event_warming_content['event_name'] = event_item['event_name'] event_influence_sum = 0 event_time_sum = 0 query_body = { 'query': { 'filtered': { 'filter': { 'bool': { 'must': [{ 'term': { 'hashtag': event_item['event_name'] } }, { 'range': { 'sensitive': { 'gte': 1 } } }] } } } }, 'size': MAX_WARMING_SIZE, 'sort': { 'sensitive': { 'order': 'desc' } } } event_results = es_xnr_2.search(index=facebook_flow_text_index_name, doc_type=facebook_flow_text_index_type, body=query_body)['hits']['hits'] if event_results: facebook_result = [] friends_num_dict = dict() alluser_num_dict = dict() #print 'sencond_time:::',int(time.time()) for item in event_results: #查询三个指标字段 fid_result = lookup_fid_attend_index(item['_source']['fid'], today_datetime) if fid_result: item['_source']['comment'] = fid_result['comment'] item['_source']['share'] = fid_result['share'] item['_source']['favorite'] = fid_result['favorite'] else: item['_source']['comment'] = 0 item['_source']['share'] = 0 item['_source']['favorite'] = 0 #print 'event_content:',item['_source']['text'] #统计用户信息 if alluser_num_dict.has_key(str(item['_source']['uid'])): friends_mark = set_intersection(item['_source']['uid'], friends_list) if friends_mark > 0: alluser_num_dict[str( item['_source']['uid'])] = alluser_num_dict[str( item['_source']['uid'])] + 1 * 2 else: alluser_num_dict[str( item['_source']['uid'])] = alluser_num_dict[str( item['_source']['uid'])] + 1 else: alluser_num_dict[str(item['_source']['uid'])] = 1 #计算影响力 origin_influence_value = (1 + item['_source']['comment'] + item['_source']['share'] + item['_source']['favorite']) * ( 1 + item['_source']['sensitive']) friends_value = judge_user_type(item['_source']['uid'], friends_list) item['_source'][ 'facebook_influence_value'] = origin_influence_value * friends_value #查询用户昵称 item['_source']['nick_name'] = get_user_nickname( item['_source']['uid']) facebook_result.append(item['_source']) #统计影响力、时间 event_influence_sum = event_influence_sum + item['_source'][ 'facebook_influence_value'] event_time_sum = event_time_sum + item['_source']['timestamp'] # print 'third_time:::',int(time.time()) #典型信息 facebook_result.sort(key=lambda k: (k.get('facebook_influence_value', 0)), reverse=True) event_warming_content['main_facebook_info'] = json.dumps( facebook_result) #事件影响力和事件时间 number = len(event_results) event_warming_content[ 'event_influence'] = event_influence_sum / number event_warming_content['event_time'] = event_time_sum / number #对用户进行排序 alluser_num_dict = sorted(alluser_num_dict.items(), key=lambda d: d[1], reverse=True) main_userid_list = [] for i in xrange(0, len(alluser_num_dict)): main_userid_list.append(alluser_num_dict[i][0]) #主要参与用户信息 main_user_info = [] user_es_result = es_xnr_2.mget(index=facebook_user_index_name, doc_type=facebook_user_index_type, body={'ids': main_userid_list})['docs'] # print 'user_es_result:',user_es_result for item in user_es_result: user_dict = dict() if item['found']: user_dict['uid'] = item['_id'] user_dict['username'] = item['_source']['username'] if item['_source'].has_key('talking_about_count'): user_dict['talking_about_count'] = item['_source'][ 'talking_about_count'] else: user_dict['talking_about_count'] = 0 if item['_source'].has_key('likes'): user_dict['likes'] = item['_source']['likes'] else: user_dict['likes'] = 0 if item['_source'].has_key('category'): user_dict['category'] = item['_source']['category'] else: user_dict['category'] = '' else: # user_dict['icon']='' user_dict['uid'] = item['_id'] user_dict['username'] = '' user_dict['talking_about_count'] = 0 user_dict['likes'] = 0 user_dict['category'] = '' main_user_info.append(user_dict) event_warming_content['main_user_info'] = json.dumps( main_user_info) # print 'fourth_time:::',int(time.time()) event_warming_content['xnr_user_no'] = xnr_user_no event_warming_content['validity'] = 0 event_warming_content['timestamp'] = today_datetime now_time = int(time.time()) # task_id=xnr_user_no+'_'+str(now_time) task_id = xnr_user_no + '_' + event_warming_content['event_name'] #写入数据库 if write_mark: # print 'today_datetime:::',ts2datetime(today_datetime) print 'task_id_event:', task_id mark = write_envent_warming(today_datetime, event_warming_content, task_id) event_warming_list.append(mark) else: event_warming_list.append(event_warming_content) else: pass # print 'fifth_time:::',int(time.time()) return event_warming_list
def create_personal_warning(xnr_user_no, today_datetime): #查询好友列表 friends_list = lookup_xnr_friends(xnr_user_no) #查询虚拟人uid xnr_uid = lookup_xnr_uid(xnr_user_no) #计算敏感度排名靠前的用户 query_body = { # 'query':{ # 'filtered':{ # 'filter':{ # 'terms':{'uid':friends_list} # } # } # }, 'aggs': { 'friends_sensitive_num': { 'terms': { 'field': 'uid' }, 'aggs': { 'sensitive_num': { 'sum': { 'field': 'sensitive' } } } } }, 'size': MAX_SEARCH_SIZE } facebook_flow_text_index_name = get_timets_set_indexset_list( facebook_flow_text_index_name_pre, today_datetime, today_datetime) try: first_sum_result=es_xnr_2.search(index=facebook_flow_text_index_name,doc_type=facebook_flow_text_index_type,\ body=query_body)['aggregations']['friends_sensitive_num']['buckets'] except: first_sum_result = [] #print 'first_sum_result',first_sum_result top_userlist = [] for i in xrange(0, len(first_sum_result)): user_sensitive = first_sum_result[i]['sensitive_num']['value'] if user_sensitive > 0: user_dict = dict() user_dict['uid'] = first_sum_result[i]['key'] friends_mark = judge_user_type(user_dict['uid'], friends_list) user_dict['sensitive'] = user_sensitive * friends_mark top_userlist.append(user_dict) else: pass ##################### #如果是好友,则用户敏感度计算值增加1.5倍 ##################### #查询敏感用户的敏感内容 results = [] for user in top_userlist: #print user user_detail = dict() user_detail['uid'] = user['uid'] user_detail['user_sensitive'] = user['sensitive'] user_lookup_id = user['uid'] print user_lookup_id # try: # #user_result=es_xnr.get(index=facebook_feedback_friends_index_name,doc_type=facebook_feedback_friends_index_type,id=user_lookup_id)['_source'] # user_result=es_xnr.get(index=facebook_user_index_name,doc_type=facebook_user_index_type,id=user['uid'])['_source'] # user_detail['user_name']=user_result['nick_name'] # except: # user_detail['user_name']='' user_detail['user_name'] = get_user_nickname(user['uid']) query_body = { 'query': { 'filtered': { 'filter': { 'bool': { 'must': [{ 'term': { 'uid': user['uid'] } }, { 'range': { 'sensitive': { 'gte': 1 } } }] } } } }, 'size': MAX_WARMING_SIZE, 'sort': { 'sensitive': { 'order': 'desc' } } } try: second_result = es_xnr_2.search( index=facebook_flow_text_index_name, doc_type=facebook_flow_text_index_type, body=query_body)['hits']['hits'] except: second_result = [] s_result = [] for item in second_result: #查询三个指标字段 fid_result = lookup_fid_attend_index(item['_source']['fid'], today_datetime) if fid_result: item['_source']['comment'] = fid_result['comment'] item['_source']['share'] = fid_result['share'] item['_source']['favorite'] = fid_result['favorite'] else: item['_source']['comment'] = 0 item['_source']['share'] = 0 item['_source']['favorite'] = 0 #查询用户昵称 item['_source']['nick_name'] = get_user_nickname( item['_source']['uid']) s_result.append(item['_source']) s_result.sort(key=lambda k: (k.get('sensitive', 0)), reverse=True) user_detail['content'] = json.dumps(s_result) user_detail['xnr_user_no'] = xnr_user_no user_detail['validity'] = 0 user_detail['timestamp'] = today_datetime #写入数据库 today_date = ts2datetime(today_datetime) facebook_user_warning_index_name = facebook_user_warning_index_name_pre + today_date task_id = xnr_user_no + '_' + user_detail['uid'] if s_result: try: es_xnr_2.index(index=facebook_user_warning_index_name, doc_type=facebook_user_warning_index_type, body=user_detail, id=task_id) mark = True except: mark = False else: pass results.append(mark) return results