Ejemplo n.º 1
0
def get_daily_recommend_tweets(theme, sort_item):

    if S_TYPE == 'test':
        now_ts = datetime2ts(S_DATE_FB)
    else:
        now_ts = int(time.time())

    datetime = ts2datetime(now_ts)

    index_name = daily_interest_index_name_pre + '_' + datetime

    theme_en = daily_ch2en[theme]
    es_results = es.get(index=index_name,
                        doc_type=daily_interest_index_type,
                        id=theme_en)['_source']
    content = json.loads(es_results['content'])

    results_all = []
    for result in content:
        #result = result['_source']
        uid = result['uid']
        nick_name, photo_url = fb_uid2nick_name_photo(uid)
        result['nick_name'] = nick_name
        result['photo_url'] = photo_url
        results_all.append(result)
    return results_all
Ejemplo n.º 2
0
def get_tweets_from_flow(monitor_keywords_list, sort_item_new):

    nest_query_list = []
    for monitor_keyword in monitor_keywords_list:
        nest_query_list.append(
            {'wildcard': {
                'keywords_string': '*' + monitor_keyword + '*'
            }})

    query_body = {
        'query': {
            'bool': {
                'should': nest_query_list
            }
        },
        'sort': [{
            sort_item_new: {
                'order': 'desc'
            }
        }, {
            'timestamp': {
                'order': 'desc'
            }
        }],
        'size':
        TOP_WEIBOS_LIMIT
    }

    if S_TYPE == 'test':
        now_ts = datetime2ts(S_DATE_FB)
    else:
        now_ts = int(time.time())
    datetime = ts2datetime(now_ts - 24 * 3600)

    index_name = facebook_flow_text_index_name_pre + datetime

    es_results = es.search(index=index_name,
                           doc_type=facebook_flow_text_index_type,
                           body=query_body)['hits']['hits']

    if not es_results:
        es_results = es.search(index=index_name,doc_type=facebook_flow_text_index_type,\
                                body={'query':{'match_all':{}},'size':TOP_WEIBOS_LIMIT,\
                                'sort':{sort_item_new:{'order':'desc'}}})['hits']['hits']
    results_all = []
    for result in es_results:
        result = result['_source']
        uid = result['uid']
        nick_name, photo_url = fb_uid2nick_name_photo(uid)
        result['nick_name'] = nick_name
        result['photo_url'] = photo_url
        results_all.append(result)
    return results_all
Ejemplo n.º 3
0
def get_hot_recommend_tweets(xnr_user_no, topic_field, sort_item):

    topic_field_en = topic_ch2en_dict[topic_field]
    if sort_item != 'compute_status':
        query_body = {
            'query': {
                'bool': {
                    'must': [{
                        'filtered': {
                            'filter': {
                                'term': {
                                    'topic_field': topic_field_en
                                }
                            }
                        }
                    }]
                }
            },
            'sort': {
                sort_item: {
                    'order': 'desc'
                }
            },
            'size': TOP_WEIBOS_LIMIT
        }

        current_time = time.time()

        if S_TYPE == 'test':
            current_time = datetime2ts(S_DATE_FB)

        fb_social_sensing_index_name = fb_social_sensing_index_name_pre + ts2datetime(
            current_time)

        es_results = es.search(index=fb_social_sensing_index_name,
                               doc_type=fb_social_sensing_index_type,
                               body=query_body)['hits']['hits']

        if not es_results:
            es_results = es.search(index=fb_social_sensing_index_name,doc_type=fb_social_sensing_index_type,\
                                    body={'query':{'match_all':{}},'size':TOP_WEIBOS_LIMIT,\
                                    'sort':{sort_item:{'order':'desc'}}})['hits']['hits']
    results_all = []
    for result in es_results:
        result = result['_source']
        uid = result['uid']
        nick_name, photo_url = fb_uid2nick_name_photo(uid)
        result['nick_name'] = nick_name
        result['photo_url'] = photo_url
        results_all.append(result)
    return results_all
Ejemplo n.º 4
0
def read_tracing_followers_tweet():

    if S_TYPE == 'test':
        query_body = {
            'query':{
                'term':{'xnr_user_no':'FXNR0003'}
            },
            'size':MAX_SEARCH_SIZE
        }
        
    else:
        query_body = {
            'query':{
                'match_all':{}
            },
            'size':MAX_SEARCH_SIZE
        }


    results = es_xnr.search(index=fb_xnr_fans_followers_index_name,doc_type=fb_xnr_fans_followers_index_type,\
                body=query_body)['hits']['hits']
    if results:
        for result in results:
            result = result['_source']
            
            xnr_user_no = result['xnr_user_no']
            trace_follow_list = result['trace_follow_list']
            print 'trace_follow_list:::',trace_follow_list

            if S_TYPE == 'test':
                current_time = datetime2ts(S_DATE_FB)
                #trace_follow_list = TRACE_FOLLOW_LIST
            else:
                current_time = int(time.time())

            current_date = ts2datetime(current_time)

            flow_text_index_name = facebook_flow_text_index_name_pre + current_date

            query_body_flow = {
                'query':{
                    'filtered':{
                        'filter':{
                            'terms':{'uid':trace_follow_list}
                        }
                    }
                },
                'size':MAX_SEARCH_SIZE
            }

            results_flow = es_xnr.search(index=flow_text_index_name,doc_type=flow_text_index_type,\
                            body=query_body_flow)['hits']['hits']

            if results_flow:
                for result_flow in results_flow:
                    
                    result_flow = result_flow['_source']
                    fid = result_flow['fid']

                    #先判断 之前是否已经存过该fid
                    
                    task_id = xnr_user_no + '_' + fid
                    try:
                        # 如果已添加则跳过
                        es_xnr.get(index=fb_xnr_retweet_timing_list_index_name,doc_type=\
                            fb_xnr_retweet_timing_list_index_type,id=task_id)['_source']
                        continue

                    except:
                        # 如果未添加过则加入列表
                        task_detail = {}
                        task_detail['xnr_user_no'] = xnr_user_no
                        task_detail['fid'] = fid
                        task_detail['text'] = result_flow['text']
                        task_detail['uid'] = result_flow['uid']
                        task_detail['nick_name'],task_detail['photo_url'] = fb_uid2nick_name_photo(result_flow['uid'])
                        task_detail['timestamp'] = result_flow['timestamp']
                        task_detail['timestamp_set'] = result_flow['timestamp'] + random.randint(RETWEET_START_TS,RETWEET_END_TS)
                        task_detail['compute_status'] = 0

                        es_xnr.index(index=fb_xnr_retweet_timing_list_index_name,doc_type=\
                            fb_xnr_retweet_timing_list_index_type,body=task_detail,id=task_id)