Ejemplo n.º 1
0
def get_maker(topic, new_peaks, new_bottom, ts_list, collection):
    begin_ts = ts_list[new_bottom[0]]
    end_ts = ts_list[new_peaks[0]]
    print 'get_maker news_bottom:', new_bottom[0]
    print 'get_maker news_peak:', new_peaks[0]
    print 'get_maker ts_list:', ts2date(ts_list[0])
    print 'get_maker start_ts:', ts2date(begin_ts)
    print 'get_maker end_ts:', ts2date(end_ts)
    if begin_ts > end_ts:
        begin_ts = ts_list[0]

    begin_ts = begin_ts - Hour
    filter_dict = get_filter_dict()
    query_dict = {'timestamp': {'$gte': begin_ts, '$lte': end_ts}}
    '''
    maker_list = collection.find(query_dict, filter_dict).sort('weight').limit(maker_news_count)
    if not maker_list:
        return []
    else:
        return maker_list
    '''
    input_news_list = collection.find(query_dict, filter_dict)
    # 第一个波段内所有新闻进行分词
    news_cut_list = cut_news(input_news_list)
    # 计算top50的关键词
    keywords_list = get_news_keywords(news_cut_list)
    # 计算波段内新闻的关键词占比weight
    weight_list = get_news_weight(news_cut_list, keywords_list)
    # 排序获取weight前20的news
    maker_list = get_top_weight_news(weight_list)

    if not maker_list:
        return []
    else:
        return maker_list
Ejemplo n.º 2
0
def get_maker(topic, new_peaks, new_bottom, ts_list, collection):
    begin_ts = ts_list[new_bottom[0]]
    end_ts = ts_list[new_peaks[0]]
    print 'get_maker news_bottom:', new_bottom[0]
    print 'get_maker news_peak:', new_peaks[0]
    print 'get_maker ts_list:', ts2date(ts_list[0])
    print 'get_maker start_ts:', ts2date(begin_ts)
    print 'get_maker end_ts:', ts2date(end_ts)
    if begin_ts > end_ts:
        begin_ts = ts_list[0]
    
    begin_ts = begin_ts - Hour
    filter_dict = get_filter_dict()
    query_dict = {'timestamp':{'$gte':begin_ts, '$lte':end_ts}}
    '''
    maker_list = collection.find(query_dict, filter_dict).sort('weight').limit(maker_news_count)
    if not maker_list:
        return []
    else:
        return maker_list
    '''
    input_news_list = collection.find(query_dict, filter_dict)
    # 第一个波段内所有新闻进行分词
    news_cut_list = cut_news(input_news_list)
    # 计算top50的关键词
    keywords_list = get_news_keywords(news_cut_list)
    # 计算波段内新闻的关键词占比weight
    weight_list = get_news_weight(news_cut_list, keywords_list)
    # 排序获取weight前20的news
    maker_list = get_top_weight_news(weight_list)
    
    if not maker_list:
        return []
    else:
        return maker_list
Ejemplo n.º 3
0
def sort_news_by_comment(query_dict, news_collection, comment_collection):
    results = []
    filter_dict = get_filter_dict()
    news_no_comment = news_collection.find(query_dict, filter_dict)
    for news in news_no_comment:
        #print 'news:', news
        news_id = news['id']
        comment_query_dict = {'news_id': news_id}
        news_comment = comment_collection.find(comment_query_dict) # 一条新闻对应的所有评论
        try:
            news['comments_count'] = len(news_comment) # news_id对应的评论数
        except:
            news['comments_count'] = 0
        results.append(news)

    sort_results = sorted(results, key=lambda x:x['comments_count'], reverse=True)
    
    return sort_results
Ejemplo n.º 4
0
def sort_news_by_comment(query_dict, news_collection, comment_collection):
    results = []
    filter_dict = get_filter_dict()
    news_no_comment = news_collection.find(query_dict, filter_dict)
    for news in news_no_comment:
        #print 'news:', news
        news_id = news['id']
        comment_query_dict = {'news_id': news_id}
        news_comment = comment_collection.find(
            comment_query_dict)  # 一条新闻对应的所有评论
        try:
            news['comments_count'] = len(news_comment)  # news_id对应的评论数
        except:
            news['comments_count'] = 0
        results.append(news)

    sort_results = sorted(results,
                          key=lambda x: x['comments_count'],
                          reverse=True)

    return sort_results