def get_daily_recommend_tweets(theme, sort_item): if S_TYPE == 'test': now_ts = datetime2ts(S_DATE_TW) else: now_ts = int(time.time()) datetime = ts2datetime(now_ts) index_name = daily_interest_index_name_pre + '_' + datetime theme_en = daily_ch2en[theme] es_results = es.get(index=index_name, doc_type=daily_interest_index_type, id=theme_en)['_source'] content = json.loads(es_results['content']) results_all = [] for result in content: #result = result['_source'] uid = result['uid'] nick_name, photo_url = tw_uid2nick_name_photo(uid) result['nick_name'] = nick_name result['photo_url'] = photo_url results_all.append(result) return results_all
def get_tweets_from_flow(monitor_keywords_list, sort_item_new): nest_query_list = [] for monitor_keyword in monitor_keywords_list: nest_query_list.append( {'wildcard': { 'keywords_string': '*' + monitor_keyword + '*' }}) query_body = { 'query': { 'bool': { 'should': nest_query_list } }, 'sort': [{ sort_item_new: { 'order': 'desc' } }, { 'timestamp': { 'order': 'desc' } }], 'size': TOP_WEIBOS_LIMIT } if S_TYPE == 'test': now_ts = datetime2ts(S_DATE_TW) else: now_ts = int(time.time()) datetime = ts2datetime(now_ts - 24 * 3600) index_name = twitter_flow_text_index_name_pre + datetime es_results = es.search(index=index_name, doc_type=twitter_flow_text_index_type, body=query_body)['hits']['hits'] if not es_results: es_results = es.search(index=index_name,doc_type=twitter_flow_text_index_type,\ body={'query':{'match_all':{}},'size':TOP_WEIBOS_LIMIT,\ 'sort':{sort_item_new:{'order':'desc'}}})['hits']['hits'] results_all = [] for result in es_results: result = result['_source'] uid = result['uid'] nick_name, photo_url = tw_uid2nick_name_photo(uid) result['nick_name'] = nick_name result['photo_url'] = photo_url results_all.append(result) return results_all
def get_hot_recommend_tweets(xnr_user_no, topic_field, sort_item): topic_field_en = topic_ch2en_dict[topic_field] if sort_item != 'compute_status': query_body = { 'query': { 'bool': { 'must': [{ 'filtered': { 'filter': { 'term': { 'topic_field': topic_field_en } } } }] } }, 'sort': { sort_item: { 'order': 'desc' } }, 'size': TOP_WEIBOS_LIMIT } current_time = time.time() if S_TYPE == 'test': current_time = datetime2ts(S_DATE_TW) tw_social_sensing_index_name = tw_social_sensing_index_name_pre + ts2datetime( current_time) es_results = es.search(index=tw_social_sensing_index_name, doc_type=tw_social_sensing_index_type, body=query_body)['hits']['hits'] if not es_results: es_results = es.search(index=tw_social_sensing_index_name,doc_type=tw_social_sensing_index_type,\ body={'query':{'match_all':{}},'size':TOP_WEIBOS_LIMIT,\ 'sort':{sort_item:{'order':'desc'}}})['hits']['hits'] results_all = [] for result in es_results: result = result['_source'] uid = result['uid'] nick_name, photo_url = tw_uid2nick_name_photo(uid) result['nick_name'] = nick_name result['photo_url'] = photo_url results_all.append(result) return results_all
def read_tracing_followers_tweet(): if S_TYPE == 'test': query_body = { 'query': { 'term': { 'xnr_user_no': 'TXNR0001' } }, 'size': MAX_SEARCH_SIZE } else: query_body = {'query': {'match_all': {}}, 'size': MAX_SEARCH_SIZE} results = es_xnr.search(index=tw_xnr_fans_followers_index_name,doc_type=tw_xnr_fans_followers_index_type,\ body=query_body)['hits']['hits'] if results: for result in results: result = result['_source'] xnr_user_no = result['xnr_user_no'] trace_follow_list = result['trace_follow_list'] print 'trace_follow_list:::', trace_follow_list if S_TYPE == 'test': current_time = datetime2ts(S_DATE_TW) #trace_follow_list = TRACE_FOLLOW_LIST else: current_time = int(time.time()) current_date = ts2datetime(current_time) flow_text_index_name = twitter_flow_text_index_name_pre + current_date query_body_flow = { 'query': { 'filtered': { 'filter': { 'terms': { 'uid': trace_follow_list } } } }, 'size': MAX_SEARCH_SIZE } results_flow = es_xnr.search(index=flow_text_index_name,doc_type=flow_text_index_type,\ body=query_body_flow)['hits']['hits'] if results_flow: for result_flow in results_flow: result_flow = result_flow['_source'] tid = result_flow['tid'] #先判断 之前是否已经存过该tid task_id = xnr_user_no + '_' + tid try: # 如果已添加则跳过 es_xnr.get(index=tw_xnr_retweet_timing_list_index_name,doc_type=\ tw_xnr_retweet_timing_list_index_type,id=task_id)['_source'] continue except: # 如果未添加过则加入列表 task_detail = {} task_detail['xnr_user_no'] = xnr_user_no task_detail['tid'] = tid task_detail['text'] = result_flow['text'] task_detail['uid'] = result_flow['uid'] task_detail['nick_name'], task_detail[ 'photo_url'] = tw_uid2nick_name_photo( result_flow['uid']) task_detail['timestamp'] = result_flow['timestamp'] task_detail['timestamp_set'] = result_flow[ 'timestamp'] + random.randint( RETWEET_START_TS, RETWEET_END_TS) task_detail['compute_status'] = 0 es_xnr.index(index=tw_xnr_retweet_timing_list_index_name,doc_type=\ tw_xnr_retweet_timing_list_index_type,body=task_detail,id=task_id)