コード例 #1
0
ファイル: tw_op_utils.py プロジェクト: feifanhanmc/xnr2
def get_daily_recommend_tweets(theme, sort_item):

    if S_TYPE == 'test':
        now_ts = datetime2ts(S_DATE_TW)
    else:
        now_ts = int(time.time())

    datetime = ts2datetime(now_ts)

    index_name = daily_interest_index_name_pre + '_' + datetime

    theme_en = daily_ch2en[theme]
    es_results = es.get(index=index_name,
                        doc_type=daily_interest_index_type,
                        id=theme_en)['_source']
    content = json.loads(es_results['content'])

    results_all = []
    for result in content:
        #result = result['_source']
        uid = result['uid']
        nick_name, photo_url = tw_uid2nick_name_photo(uid)
        result['nick_name'] = nick_name
        result['photo_url'] = photo_url
        results_all.append(result)
    return results_all
コード例 #2
0
ファイル: tw_op_utils.py プロジェクト: feifanhanmc/xnr2
def get_tweets_from_flow(monitor_keywords_list, sort_item_new):

    nest_query_list = []
    for monitor_keyword in monitor_keywords_list:
        nest_query_list.append(
            {'wildcard': {
                'keywords_string': '*' + monitor_keyword + '*'
            }})

    query_body = {
        'query': {
            'bool': {
                'should': nest_query_list
            }
        },
        'sort': [{
            sort_item_new: {
                'order': 'desc'
            }
        }, {
            'timestamp': {
                'order': 'desc'
            }
        }],
        'size':
        TOP_WEIBOS_LIMIT
    }

    if S_TYPE == 'test':
        now_ts = datetime2ts(S_DATE_TW)
    else:
        now_ts = int(time.time())
    datetime = ts2datetime(now_ts - 24 * 3600)

    index_name = twitter_flow_text_index_name_pre + datetime

    es_results = es.search(index=index_name,
                           doc_type=twitter_flow_text_index_type,
                           body=query_body)['hits']['hits']

    if not es_results:
        es_results = es.search(index=index_name,doc_type=twitter_flow_text_index_type,\
                                body={'query':{'match_all':{}},'size':TOP_WEIBOS_LIMIT,\
                                'sort':{sort_item_new:{'order':'desc'}}})['hits']['hits']
    results_all = []
    for result in es_results:
        result = result['_source']
        uid = result['uid']
        nick_name, photo_url = tw_uid2nick_name_photo(uid)
        result['nick_name'] = nick_name
        result['photo_url'] = photo_url
        results_all.append(result)
    return results_all
コード例 #3
0
def get_hot_recommend_tweets(xnr_user_no, topic_field, sort_item):

    topic_field_en = topic_ch2en_dict[topic_field]

    if sort_item != 'compute_status':
        query_body = {
            'query': {
                'bool': {
                    'must': [{
                        'filtered': {
                            'filter': {
                                'term': {
                                    'topic_field': topic_field_en
                                }
                            }
                        }
                    }]
                }
            },
            'sort': {
                sort_item: {
                    'order': 'desc'
                }
            },
            'size': TOP_WEIBOS_LIMIT
        }

        current_time = time.time()

        if S_TYPE == 'test':
            current_time = datetime2ts(S_DATE_TW)
        tw_social_sensing_index_name = tw_social_sensing_index_name_pre + ts2datetime(
            current_time)

        es_results = es.search(index=tw_social_sensing_index_name,
                               doc_type=tw_social_sensing_index_type,
                               body=query_body)['hits']['hits']

        if not es_results:
            es_results = es.search(index=tw_social_sensing_index_name,doc_type=tw_social_sensing_index_type,\
                                    body={'query':{'match_all':{}},'size':TOP_WEIBOS_LIMIT,\
                                    'sort':{sort_item:{'order':'desc'}}})['hits']['hits']
    results_all = []
    for result in es_results:
        result = result['_source']
        uid = result['uid']
        nick_name, photo_url = tw_uid2nick_name_photo(uid)
        result['nick_name'] = nick_name
        result['photo_url'] = photo_url
        results_all.append(result)
    return results_all
コード例 #4
0
def read_tracing_followers_tweet():

    if S_TYPE == 'test':
        query_body = {
            'query': {
                'term': {
                    'xnr_user_no': 'TXNR0001'
                }
            },
            'size': MAX_SEARCH_SIZE
        }

    else:
        query_body = {'query': {'match_all': {}}, 'size': MAX_SEARCH_SIZE}


    results = es_xnr.search(index=tw_xnr_fans_followers_index_name,doc_type=tw_xnr_fans_followers_index_type,\
                body=query_body)['hits']['hits']
    if results:
        for result in results:
            result = result['_source']

            xnr_user_no = result['xnr_user_no']
            trace_follow_list = result['trace_follow_list']
            print 'trace_follow_list:::', trace_follow_list

            if S_TYPE == 'test':
                current_time = datetime2ts(S_DATE_TW)
                #trace_follow_list = TRACE_FOLLOW_LIST
            else:
                current_time = int(time.time())

            current_date = ts2datetime(current_time)

            flow_text_index_name = twitter_flow_text_index_name_pre + current_date

            query_body_flow = {
                'query': {
                    'filtered': {
                        'filter': {
                            'terms': {
                                'uid': trace_follow_list
                            }
                        }
                    }
                },
                'size': MAX_SEARCH_SIZE
            }

            results_flow = es_xnr.search(index=flow_text_index_name,doc_type=flow_text_index_type,\
                            body=query_body_flow)['hits']['hits']

            if results_flow:
                for result_flow in results_flow:

                    result_flow = result_flow['_source']
                    tid = result_flow['tid']

                    #先判断 之前是否已经存过该tid

                    task_id = xnr_user_no + '_' + tid
                    try:
                        # 如果已添加则跳过
                        es_xnr.get(index=tw_xnr_retweet_timing_list_index_name,doc_type=\
                            tw_xnr_retweet_timing_list_index_type,id=task_id)['_source']
                        continue

                    except:
                        # 如果未添加过则加入列表
                        task_detail = {}
                        task_detail['xnr_user_no'] = xnr_user_no
                        task_detail['tid'] = tid
                        task_detail['text'] = result_flow['text']
                        task_detail['uid'] = result_flow['uid']
                        task_detail['nick_name'], task_detail[
                            'photo_url'] = tw_uid2nick_name_photo(
                                result_flow['uid'])
                        task_detail['timestamp'] = result_flow['timestamp']
                        task_detail['timestamp_set'] = result_flow[
                            'timestamp'] + random.randint(
                                RETWEET_START_TS, RETWEET_END_TS)
                        task_detail['compute_status'] = 0

                        es_xnr.index(index=tw_xnr_retweet_timing_list_index_name,doc_type=\
                            tw_xnr_retweet_timing_list_index_type,body=task_detail,id=task_id)