def load_xnr_info(): res = [] search_res = es.search(fb_xnr_index_name, fb_xnr_index_type, {'size': 999})['hits']['hits'] for item in search_res: source = item['_source'] fb_mail_account = source.get('fb_mail_account', '') fb_phone_account = source.get('fb_phone_account', '') account = '' if fb_mail_account: account = fb_mail_account elif fb_phone_account: account = fb_phone_account if account: xnr_user_no = source.get('xnr_user_no', '') ''' 旧的好友列表获取方式,弃用 @hanmc 2019-3-25 15:10:05 try: friends_list = es.get(index=fb_xnr_fans_followers_index_name, doc_type=fb_xnr_fans_followers_index_type, id=xnr_user_no)['_source']['fans_list'] except: friends_list = [] ''' # 新的好友列表获取方式 friends_list = [] query_body = { 'query': { 'bool': { 'must': [ { 'term': { 'xnr_no': xnr_user_no } }, ] } }, 'size': 99999 } friends_search_res = es.search(facebook_xnr_relations_index_name, facebook_xnr_relations_index_type, query_body)['hits']['hits'] for friends_item in friends_search_res: friends_list.append(friends_item['_source']['uid']) info = { 'root_uid': source.get('uid', ''), 'root_nick_name': source.get('nick_name', ''), 'xnr_user_no': xnr_user_no, 'account': account, 'password': source.get('password', ''), 'friends_list': friends_list } res.append(info) return res
def get_tweets_from_flow(monitor_keywords_list, sort_item_new): nest_query_list = [] for monitor_keyword in monitor_keywords_list: nest_query_list.append( {'wildcard': { 'keywords_string': '*' + monitor_keyword + '*' }}) query_body = { 'query': { 'bool': { 'should': nest_query_list } }, 'sort': [{ sort_item_new: { 'order': 'desc' } }, { 'timestamp': { 'order': 'desc' } }], 'size': TOP_WEIBOS_LIMIT } if S_TYPE == 'test': now_ts = datetime2ts(S_DATE_TW) else: now_ts = int(time.time()) datetime = ts2datetime(now_ts - 24 * 3600) index_name = twitter_flow_text_index_name_pre + datetime es_results = es.search(index=index_name, doc_type=twitter_flow_text_index_type, body=query_body)['hits']['hits'] if not es_results: es_results = es.search(index=index_name,doc_type=twitter_flow_text_index_type,\ body={'query':{'match_all':{}},'size':TOP_WEIBOS_LIMIT,\ 'sort':{sort_item_new:{'order':'desc'}}})['hits']['hits'] results_all = [] for result in es_results: result = result['_source'] uid = result['uid'] nick_name, photo_url = tw_uid2nick_name_photo(uid) result['nick_name'] = nick_name result['photo_url'] = photo_url results_all.append(result) return results_all
def get_submit_tweet_fb(task_detail): print 'get_submit_tweet_fb,start execute=========================' text = task_detail['text'] tweet_type = task_detail['tweet_type'] channel = task_detail['channel'] operate_type = task_detail['operate_type'] xnr_user_no = task_detail['xnr_user_no'] try: es_xnr_result = es.get(index=fb_xnr_index_name, doc_type=fb_xnr_index_type, id=xnr_user_no)['_source'] except Exception as e: print e print es_xnr_result fb_mail_account = es_xnr_result['fb_mail_account'] fb_phone_account = es_xnr_result['fb_phone_account'].strip() password = str(es_xnr_result['password'].strip()) print type(password), password print type('13018119931126731x'), '13018119931126731x' print es.search('fb_xnr', 'user', {}) if fb_phone_account: account_name = str(fb_phone_account) print type(account_name), account_name print type('+8613520874771'), '+8613520874771' elif fb_mail_account: account_name = fb_mail_account else: account_name = False if account_name: print '--------------------------------==================================-------------------------------------------------------' # add params to aliyunredis kn try: fb_tweet_params_dict = {} fb_tweet_params_dict["account_name"] = account_name fb_tweet_params_dict["password"] = password fb_tweet_params_dict["text"] = text fb_tweet_params_dict["tweet_type"] = tweet_type fb_tweet_params_dict["xnr_user_no"] = xnr_user_no fb_tweet_params_dict["channel"] = channel fb_tweet_params_dict["operate"] = operate_type print FB_TWEET_PARAMS, '===================================================fb params' ali_re.lpush(FB_TWEET_PARAMS, json.dumps(fb_tweet_params_dict)) mark = fb_publish(account_name, password, text, tweet_type, xnr_user_no) except Exeption as e: print e #mark = fb_publish('+8613520874771', '13018119931126731x', text, tweet_type, xnr_user_no) else: mark = False return mark
def get_hot_recommend_tweets(xnr_user_no, topic_field, sort_item): topic_field_en = topic_ch2en_dict[topic_field] if sort_item != 'compute_status': query_body = { 'query': { 'bool': { 'must': [{ 'filtered': { 'filter': { 'term': { 'topic_field': topic_field_en } } } }] } }, 'sort': { sort_item: { 'order': 'desc' } }, 'size': TOP_WEIBOS_LIMIT } current_time = time.time() if S_TYPE == 'test': current_time = datetime2ts(S_DATE_TW) #tw_social_sensing_index_name = tw_social_sensing_index_name_pre + ts2datetime(current_time) es_results = es.search(index=tw_social_sensing_index_name, doc_type=tw_social_sensing_index_type, body=query_body)['hits']['hits'] if not es_results: es_results = es.search(index=tw_social_sensing_index_name,doc_type=tw_social_sensing_index_type,\ body={'query':{'match_all':{}},'size':TOP_WEIBOS_LIMIT,\ 'sort':{sort_item:{'order':'desc'}}})['hits']['hits'] results_all = [] for result in es_results: result = result['_source'] uid = result['uid'] nick_name, photo_url = tw_uid2nick_name_photo(uid) result['nick_name'] = nick_name result['photo_url'] = photo_url results_all.append(result) return results_all
def update_baseinfo(uid_list=[]): user_baseinfo = {} fb_user_query_body = { 'query': { "filtered": { "filter": { "bool": { "must": [ { "terms": { "uid": uid_list } }, ] } } } }, 'size': MAX_SEARCH_SIZE, "fields": ["location", "gender", "name", "uid"] } search_results = es.search(index=facebook_user_index_name, doc_type=facebook_user_index_type, body=fb_user_query_body)['hits']['hits'] for item in search_results: content = item['fields'] uid = content['uid'][0] if not uid in user_baseinfo: user_baseinfo[uid] = { 'uid': str(uid), 'uname': '', 'gender': 0, 'location': '', } location = '' if content.has_key('location'): location_dict = json.loads(content.get('location')[0]) location = get_user_location(location_dict) gender = 0 if content.has_key('gender'): gender_str = content.get('gender')[0] if gender_str == 'male': gender = 1 elif gender_str == 'female': gender = 2 uname = '' if content.has_key('name'): uname = content.get('name')[0] user_baseinfo[uid]['location'] = location user_baseinfo[uid]['gender'] = gender user_baseinfo[uid]['uname'] = uname for uid in uid_list: if not uid in user_baseinfo: user_baseinfo[uid] = { 'uid': str(uid), 'uname': '', 'gender': 0, 'location': '', } return save_data2es(user_baseinfo)
def update_domain(uid_list=[]): if not uid_list: uid_list = load_uid_list() fb_flow_text_index_list = get_facebook_flow_text_index_list( load_timestamp(), TEST_MAX_FLOW_TEXT_DAYS) user_domain_data = {} #load num of text count_result = count_text_num(uid_list, fb_flow_text_index_list) #load baseinfo fb_user_query_body = { 'post_filter': { 'exists': { 'field': 'bio_str' } }, 'query': { "filtered": { "filter": { "bool": { "must": [ { "terms": { "uid": uid_list } }, ] } } } }, 'size': MAX_SEARCH_SIZE, "fields": ["bio_str", "category", "uid"] } try: search_results = es.search(index=facebook_user_index_name, doc_type=facebook_user_index_type, body=fb_user_query_body)['hits']['hits'] for item in search_results: content = item['fields'] uid = content['uid'][0] if not uid in user_domain_data: text_num = count_result[uid] user_domain_data[uid] = { 'bio_str': '', 'category': '', 'number_of_text': text_num } #对于长文本,Goslate 会在标点换行等分隔处把文本分拆为若干接近 2000 字节的子文本,再一一查询,最后将翻译结果拼接后返回用户。通过这种方式,Goslate 突破了文本长度的限制。 if content.has_key('category'): category = content.get('category')[0] else: category = '' if content.has_key('bio_str'): bio_str = content.get('bio_str')[0] else: bio_str = '' user_domain_data[uid]['bio_str'] = bio_str user_domain_data[uid]['category'] = category except Exception, e: print e
def influence_propagate(fid, index_name): query_body = { 'query': { 'term': { 'fid': fid } }, 'sort': { 'update_time': { 'order': 'desc' } } } #index_name = facebook_flow_text_index_name_pre + ts2datetime(current_time) search_results = es.search(index=index_name,doc_type=facebook_count_index_type,\ body=query_body)['hits']['hits'] if not search_results: propagate_num = 0 else: result = search_results[0]['_source'] share = result['share'] comment = result['comment'] favorite = result['favorite'] propagate_num = 5 * share + 3 * comment + 2 * favorite return propagate_num
def load_twitter_relations_base(xnr_user_no, relations_type): """ :param xnr_user_no: :param relations_type: pingtaiguanzhu 或者 pingtaifensi :return: """ li = [] query_body = { 'query': { 'bool': { 'must': [{ 'term': { 'xnr_no': xnr_user_no } }, { 'term': { relations_type: 1 } }] } }, 'size': 99999 } search_res = es.search(twitter_xnr_relations_index_name, twitter_xnr_relations_index_type, query_body)['hits']['hits'] for item in search_res: li.append(item['_source']['uid']) return li
def load_xnr_info(): res = [] search_res = es.search(tw_xnr_index_name, tw_xnr_index_type, {'size': 999})['hits']['hits'] for item in search_res: source = item['_source'] tw_mail_account = source.get('tw_mail_account', '') tw_phone_account = source.get('tw_phone_account', '') account = '' if tw_mail_account: account = tw_mail_account elif tw_phone_account: account = tw_phone_account if account: xnr_user_no = source.get('xnr_user_no', '') info = { 'root_uid': source.get('uid', ''), 'root_nick_name': source.get('nick_name', ''), 'xnr_user_no': xnr_user_no, 'account': account, 'password': source.get('password', ''), 'retry_times': 0, 'remark': '', } res.append(info) return res
def get_show_retweet_timing_list(xnr_user_no,start_ts,end_ts): query_body = { 'query':{ 'bool':{ 'must':[ {'term':{'xnr_user_no':xnr_user_no}}, {'range':{'timestamp_set':{'gte':start_ts,'lt':end_ts}}} ] } }, 'size':MAX_SEARCH_SIZE, 'sort':[ {'compute_status':{'order':'asc'}}, {'timestamp_set':{'order':'desc'}} ] } results = es.search(index=fb_xnr_retweet_timing_list_index_name,\ doc_type=fb_xnr_retweet_timing_list_index_type,body=query_body)['hits']['hits'] result_all = [] # print 'results:::',results for result in results: result = result['_source'] result_all.append(result) return result_alls
def load_twitter_pingtaiguanzhu_state(root_uid, uid): """ :param root_uid: :param uid: :return: 现在表中记录的xnr和user之间的平台关注关系 """ pingtaiguanzhu_state = 0 query_body = { 'query': { 'filtered': { 'filter': { 'bool': { 'must': [{ 'term': { 'xnr_uid': root_uid } }, { 'term': { 'uid': uid } }] } } } } } search_results = es_xnr_2.search(index=twitter_xnr_relations_index_name, doc_type=twitter_xnr_relations_index_type, body=query_body)['hits']['hits'] if search_results: print search_results pingtaiguanzhu_state = int( search_results[0]['_source']['pingtaiguanzhu']) return pingtaiguanzhu_state
def load_xnr_info(): res = [] search_res = es.search(tw_xnr_index_name, tw_xnr_index_type, {'size': 999})['hits']['hits'] for item in search_res: source = item['_source'] tw_mail_account = source.get('tw_mail_account', '') tw_phone_account = source.get('tw_phone_account', '') account = '' if tw_mail_account: account = tw_mail_account elif tw_phone_account: account = tw_phone_account if account: xnr_user_no = source.get('xnr_user_no', '') guanzhu_list, fensi_list = load_twitter_relations(xnr_user_no) info = { 'root_uid': source.get('uid', ''), 'root_nick_name': source.get('nick_name', ''), 'xnr_user_no': xnr_user_no, 'account': account, 'password': source.get('password', ''), 'guanzhu_list': guanzhu_list, 'fensi_list': fensi_list } res.append(info) return res
def load_xnr_info(): res = [] search_res = es.search(fb_xnr_index_name, fb_xnr_index_type, {'size': 999})['hits']['hits'] for item in search_res: source = item['_source'] fb_mail_account = source.get('fb_mail_account', '') fb_phone_account = source.get('fb_phone_account', '') account = '' if fb_mail_account: account = fb_mail_account elif fb_phone_account: account = fb_phone_account if account: xnr_user_no = source.get('xnr_user_no', '') try: friends_list = es.get( index=fb_xnr_fans_followers_index_name, doc_type=fb_xnr_fans_followers_index_type, id=xnr_user_no)['_source']['fans_list'] except: friends_list = [] info = { 'root_uid': source.get('uid', ''), 'root_nick_name': source.get('nick_name', ''), 'xnr_user_no': xnr_user_no, 'account': account, 'password': source.get('password', ''), 'friends_list': friends_list, 'retry_times': 0, 'remark': '', } res.append(info) return res
def get_tw_influence_relative(uid, influence): if S_TYPE == 'test': datetime = S_DATE_TW else: datetime = ts2datetime(time.time() - DAY) # new_datetime = datetime[0:4]+datetime[5:7]+datetime[8:10] new_datetime = datetime tw_bci_index_name = tw_bci_index_name_pre + new_datetime query_body = { 'query': { 'match_all': {} }, 'sort': { 'influence': { 'order': 'desc' } } } results = es_fb_tw.search(index=tw_bci_index_name, doc_type=tw_bci_index_type, body=query_body)['hits']['hits'] user_index_max = results[0]['_source']['influence'] if not user_index_max: #最大的为0,所有的都为0 return 0 else: influence_relative = influence / user_index_max return influence_relative
def get_user_xnr_list(user_account): query_body = { 'query': { 'filtered': { 'filter': { 'bool': { 'must': [{ 'term': { 'submitter': user_account } }, { 'term': { 'create_status': 2 } }] } } } }, 'size': USER_XNR_NUM } try: user_result = es_xnr_2.search(index=fb_xnr_index_name, doc_type=fb_xnr_index_type, body=query_body)['hits']['hits'] xnr_user_no_list = [] for item in user_result: xnr_user_no_list.append(item['_source']['xnr_user_no']) except: xnr_user_no_list = [] return xnr_user_no_list
def load_facebook_relation_uids(xnr_user_no, term_query_list): """ :param xnr_user_no: :param term_query_list: term语句列表 :return: 根据term语句,返回搜到的跟xnr有关系的人的uid列表 """ uids = [] query_body = { 'query': { 'filtered': { 'filter': { 'bool': { 'must': [ { 'term': { 'xnr_no': xnr_user_no } }, ] } } } } } query_body['query']['filtered']['filter']['bool']['must'].extend( term_query_list) search_results = es_xnr_2.search( index=facebook_xnr_relations_index_name, doc_type=facebook_xnr_relations_index_type, body=query_body)['hits']['hits'] for search_result in search_results: uid = search_result['_source']['uid'] uids.append(uid) return uids
def get_show_retweet_timing_list_future(xnr_user_no): start_ts = int(time.time()) query_body = { 'query':{ 'bool':{ 'must':[ {'term':{'xnr_user_no':xnr_user_no}}, {'range':{'timestamp_set':{'gte':start_ts}}} ] } }, 'size':MAX_SEARCH_SIZE, 'sort':[ {'compute_status':{'order':'asc'}}, {'timestamp_set':{'order':'desc'}} ] } # print 'query_body!!',query_body results = es.search(index=weibo_xnr_retweet_timing_list_index_name,\ doc_type=weibo_xnr_retweet_timing_list_index_type,body=query_body)['hits']['hits'] result_all = [] for result in results: result = result['_source'] result_all.append(result) return result_all
def lookup_twitter_date_warming(keywords, today_datetime): keyword_query_list = [] for keyword in keywords: #print 'keyword:',keyword keyword_query_list.append( {'wildcard': { 'text': '*' + keyword.encode('utf-8') + '*' }}) twitter_flow_text_index_name = get_timets_set_indexset_list( twitter_flow_text_index_name_pre, today_datetime, today_datetime) query_body = { 'query': { 'bool': { 'should': keyword_query_list, 'must': { 'range': { 'sensitive': { 'gte': 1 } } } } }, 'size': MAX_WARMING_SIZE, 'sort': { 'sensitive': { 'order': 'desc' } } } try: temp_result = es_xnr_2.search(index=twitter_flow_text_index_name, doc_type=twitter_flow_text_index_type, body=query_body)['hits']['hits'] date_result = [] print 'temp_result::', temp_result for item in temp_result: #查询三个指标字段 tid_result = lookup_tid_attend_index(item['_source']['tid'], today_datetime) if tid_result: item['_source']['comment'] = tid_result['comment'] item['_source']['share'] = tid_result['share'] item['_source']['favorite'] = tid_result['favorite'] else: item['_source']['comment'] = 0 item['_source']['share'] = 0 item['_source']['favorite'] = 0 #查询用户昵称 item['_source']['nick_name'] = get_user_nickname( item['_source']['uid']) date_result.append(item['_source']) except: date_result = [] return date_result
def get_un_trace_follow_operate(xnr_user_no,uid_string,nick_name_string): mark = False fail_nick_name_list = [] fail_uids = [] if uid_string: uid_list = uid_string.encode('utf-8').split(',') elif nick_name_string: nick_name_list = nick_name_string.encode('utf-8').split(',') uid_list = [] for nick_name in nick_name_list: query_body = { 'query':{ 'filtered':{ 'filter':{ 'term':{'nick_name':nick_name} } } }, '_source':['uid'] } try: uid_results = es.search(index=facebook_user_index_name,doc_type=facebook_user_index_type,\ body=query_body)['hits']['hits'] uid_result = uid_result[0]['_source'] uid = uid_result['uid'] uid_list.append(uid) except: fail_nick_name_list.append(nick_name) try: result = es.get(index=fb_xnr_fans_followers_index_name,doc_type=fb_xnr_fans_followers_index_type,\ id=xnr_user_no)['_source'] trace_follow_list = result['trace_follow_list'] # 共同uids comment_uids = list(set(trace_follow_list).intersection(set(uid_list))) # 取消失败uid fail_uids = list(set(comment_uids).difference(set(uid_list))) # 求差 trace_follow_list = list(set(trace_follow_list).difference(set(uid_list))) es.update(index=fb_xnr_fans_followers_index_name,doc_type=fb_xnr_fans_followers_index_type,\ id=xnr_user_no,body={'doc':{'trace_follow_list':trace_follow_list}}) mark = True except: mark = False return [mark,fail_uids,fail_nick_name_list]
def compute_full_keywords(): now_time = int(time.time()) date_time = ts2datetime(now_time) flow_text_index_name = facebook_flow_text_index_name_pre + date_time query_body = { 'aggs': { 'keywords': { 'terms': { 'field': 'keywords_string', 'size': 1000 } } } } flow_text_exist=es_xnr_2.search(index=flow_text_index_name,doc_type=facebook_flow_text_index_type,\ body=query_body)['aggregations']['keywords']['buckets'] word_dict = dict() word_dict_new = dict() keywords_string = '' for item in flow_text_exist: word = item['key'] count = item['doc_count'] word_dict[word] = count keywords_string += '&' keywords_string += item['key'] k_dict = extract_keywords(keywords_string) for item_item in k_dict: keyword = item_item.word # print 'keyword::',keyword,type(keyword) if word_dict.has_key(keyword): word_dict_new[keyword] = word_dict[keyword] else: word_dict_new[keyword] = 1 # print 'count:',word_dict_new[keyword] keywords_task_detail = dict() keywords_task_detail['date_time'] = date_time keywords_task_detail['timestamp'] = datetime2ts(date_time) keywords_task_detail['keyword_value_string'] = json.dumps(word_dict_new) keywords_task_id = date_time try: es_xnr_2.index(index=facebook_full_keyword_index_name, doc_type=facebook_full_keyword_index_type, body=keywords_task_detail, id=keywords_task_id) mark = True except: mark = False #print word_dict_new return mark
def search_tw_posts(uids, from_ts, to_ts): query_body = load_search_query(uids, from_ts, to_ts) print query_body search_results = es_xnr_2.search(index=twitter_flow_text_index_name_pre + '*', doc_type=twitter_flow_text_index_type, body=query_body)['hits']['hits'] return [item['_source'] for item in search_results]
def search_fb_posts(fb_xnr_user_no, from_ts, to_ts, extend_keywords_size=0): fb_keywords = load_fb_keywords(fb_xnr_user_no, extend_keywords_size) fb_query_body = load_query_body(fb_keywords) fb_index_list = load_index(facebook_flow_text_index_name_pre, from_ts, to_ts) print'222222222222 fb_index_list' print fb_index_list fb_search_results = es_xnr_2.search(index=fb_index_list, doc_type=facebook_flow_text_index_type, body=fb_query_body)['hits']['hits'] return [item['_source'] for item in fb_search_results]
def search_tw_posts(tw_xnr_user_no, from_ts, to_ts, extend_keywords_size=0): end_results = [] tw_keywords = load_tw_keywords(tw_xnr_user_no, extend_keywords_size) tw_query_body = load_query_body(tw_keywords) tw_index_list = load_index(twitter_flow_text_index_name_pre, from_ts, to_ts) print '3333333333333333333 tw_index_list' print tw_index_list tw_search_results = es_xnr_2.search(index=tw_index_list, doc_type=twitter_flow_text_index_type, body=tw_query_body)['hits']['hits'] return [item['_source'] for item in tw_search_results]
def create_speech_warning(xnr_user_no,today_datetime): #查询关注列表 lookup_type='followers_list' followers_list=lookup_xnr_fans_followers(xnr_user_no,lookup_type) query_body={ 'query':{ 'filtered':{ 'filter':{ 'bool':{'must':{'range':{'sensitive':{'gte':1}}}} } } }, 'size':MAX_SEARCH_SIZE, 'sort':{'sensitive':{'order':'desc'}} } twitter_flow_text_index_name=get_timets_set_indexset_list(twitter_flow_text_index_name_pre,today_datetime,today_datetime) #print twitter_flow_text_index_name results=es_xnr_2.search(index=twitter_flow_text_index_name,doc_type=twitter_flow_text_index_type,body=query_body)['hits']['hits'] #print results result=[] for item in results: if item['_source']['uid'] in followers_list: item['_source']['content_type']='follow' else: item['_source']['content_type']='unfollow' item['_source']['validity']=0 item['_source']['xnr_user_no']=xnr_user_no #查询三个指标字段 tid_result=lookup_tid_attend_index(item['_source']['tid'],today_datetime) if tid_result: item['_source']['comment']=tid_result['comment'] item['_source']['share']=tid_result['share'] item['_source']['favorite']=tid_result['favorite'] else: item['_source']['comment']=0 item['_source']['share']=0 item['_source']['favorite']=0 #查询用户昵称 item['_source']['nick_name']=get_user_nickname(item['_source']['uid']) task_id=xnr_user_no+'_'+item['_source']['tid'] #写入数据库 today_date=ts2datetime(today_datetime) twitter_speech_warning_index_name=twitter_speech_warning_index_name_pre+today_date # try: es_xnr_2.index(index=twitter_speech_warning_index_name,doc_type=twitter_speech_warning_index_type,body=item['_source'],id=task_id) mark=True # except: # mark=False result.append(mark) return result
def savedata2es(date, index_pre, index_type, data): config = { 'facebook_feedback_like_': ['uid', 'root_uid', 'timestamp', 'text', 'root_text', 'root_mid'], 'facebook_feedback_comment_': [ 'uid', 'root_uid', 'mid', 'timestamp', 'text', 'root_text', 'root_mid', 'comment_type' ], 'facebook_feedback_retweet_': [ 'uid', 'root_uid', 'mid', 'timestamp', 'text', 'root_text', 'root_mid' ], 'facebook_feedback_private_': ['uid', 'root_uid', 'timestamp', 'text', 'root_text', 'private_type'], 'facebook_feedback_friends': ['uid', 'root_uid'], 'facebook_feedback_at_': ['uid', 'root_uid', 'mid', 'timestamp', 'text'], } if index_pre in [ 'facebook_feedback_at_', 'facebook_feedback_comment_', 'facebook_feedback_retweet_', 'facebook_feedback_private_', 'facebook_feedback_like_' ]: index_name = index_pre + date search_index_name = index_pre + '*' else: index_name = index_pre search_index_name = index_name for d in data: query_body = { "query": { "filtered": { "filter": { "bool": { "must": [] } } } } } try: for field in config[index_pre]: query_body['query']['filtered']['filter']['bool'][ 'must'].append({'term': { field: d.get(field, '') }}) query_result = es.search(search_index_name, index_type, query_body)['hits']['hits'] if query_result: print es.update(index=index_name, doc_type=index_type, body={'doc': d}, id=query_result[0]['_id']) else: print es.index(index_name, index_type, d) except Exception, e: EXCEPTION += '\n savedata2es Exception: ' + str(e)
def get_hashtag(today_datetime): facebook_flow_text_index_name = get_timets_set_indexset_list( facebook_flow_text_index_name_pre, today_datetime, today_datetime) query_body = { 'query': { 'filtered': { 'filter': { 'bool': { 'must': [{ 'range': { 'sensitive': { 'gte': 1 } } }] } } } }, 'aggs': { 'all_hashtag': { 'terms': { 'field': 'hashtag' }, 'aggs': { 'sum_sensitive': { 'sum': { 'field': 'sensitive' } } } } }, 'size': EVENT_OFFLINE_COUNT } flow_text_exist=es_xnr_2.search(index=facebook_flow_text_index_name,doc_type=facebook_flow_text_index_type,\ body=query_body)['aggregations']['all_hashtag']['buckets'] #print 'flow_text_exist:',flow_text_exist hashtag_list = [] for item in flow_text_exist: event_dict = dict() if item['key']: event_dict['event_name'] = item['key'] event_dict['event_count'] = item['doc_count'] event_dict['event_sensitive'] = item['sum_sensitive']['value'] hashtag_list.append(event_dict) else: pass hashtag_list.sort(key=lambda k: (k.get('event_sensitive', 0), k.get('event_count', 0)), reverse=True) # print hashtag_list return hashtag_list
def load_fb_flow_text(fb_flow_text_index_list, uid_list, fb_flow_text_query_body={}): if not fb_flow_text_query_body: fb_flow_text_query_body = { 'query': { "filtered": { "filter": { "bool": { "must": [ { "terms": { "uid": uid_list } }, { 'range': { 'flag_ch': { 'gte': -1 } } }, ] } } } }, 'size': MAX_SEARCH_SIZE, "sort": { "timestamp": { "order": "desc" } }, "fields": ["text_ch", "uid"] } fb_flow_text = {} for index_name in fb_flow_text_index_list: try: search_results = es.search( index=index_name, doc_type=facebook_flow_text_index_type, body=fb_flow_text_query_body)['hits']['hits'] for item in search_results: content = item['fields'] uid = content['uid'][0] if not uid in fb_flow_text: fb_flow_text[uid] = {'text_dict': {}} if content.has_key('text_ch'): fb_flow_text[uid]['text_dict'][ item['_id']] = traditional2simplified( content['text_ch'][0] [:1800]) #对文本内容长度做出限制[:1800],以免翻译时麻烦 else: fb_flow_text[uid]['text_dict'][item['_id']] = '' except Exception, e: print e
def xnr_keywords_compute(xnr_user_no): #查询好友列表 followers_list=lookup_xnr_concernedusers(xnr_user_no) lookup_condition_list=[] print 'xnr_user_no, followers_list:', xnr_user_no, followers_list lookup_condition_list.append({'filtered':{'filter':{'bool':{'must':{'terms':{'uid':followers_list}}}}}}) #根据日期确定查询表 if S_TYPE == 'test': date_time = test_date else: now_time=int(time.time()) date_time=ts2datetime(now_time) flow_text_index_name=twitter_flow_text_index_name_pre+date_time #按日期统计 # print lookup_condition_list for item_condition in lookup_condition_list: query_body={ 'query':item_condition, 'aggs':{ 'keywords':{ 'terms':{ 'field':'keywords_string', 'size': 1000 } } } } flow_text_exist=es_xnr_2.search(index=flow_text_index_name,doc_type=twitter_flow_text_index_type,\ body=query_body)['aggregations']['keywords']['buckets'] # print 'flow_text_exist:',flow_text_exist word_dict = dict() word_dict_new = dict() keywords_string = '' for item in flow_text_exist: word = item['key'] count = item['doc_count'] word_dict[word] = count keywords_string += '&' keywords_string += item['key'] k_dict = extract_keywords(keywords_string) for item_item in k_dict: keyword = item_item.word # print 'keyword::',type(keyword) word_dict_new[keyword] = word_dict[keyword] return word_dict_new
def load_tw_uids(xnr_user_no): query_body = load_uid_query() if xnr_user_no != 'ALL': query_body['query']['filtered']['filter']['bool']['must'].append( {'term': { 'xnr_no': xnr_user_no }}) search_results = es_xnr_2.search(index=twitter_xnr_relations_index_name, doc_type=twitter_xnr_relations_index_type, body=query_body)['hits']['hits'] return [item['_source']['uid'] for item in search_results]
def load_uid_list(): uid_list = [] uid_list_query_body = {'size': MAX_SEARCH_SIZE} try: search_results = es.search(index=twitter_user_index_name, doc_type=twitter_user_index_type, body=uid_list_query_body)['hits']['hits'] for item in search_results: uid_list.append(item['_source']['uid']) except Exception, e: print e