Exemple #1
0
def get_interval_count(topic, date, windowsize):
    results = []
    ts_list = []
    start_date = ts2datetime(datetime2ts(date) - windowsize * Day)
    unit = 900
    print 'start_date:', start_date
    start_ts = datetime2ts(start_date)
    ts_list = [start_ts]
    end_ts = datetime2ts(date)
    interval = (end_ts - start_ts) / during
    print 'interval:', interval
    print topic
    if MYSQL_TOPIC_LEN == 0:
        topic0 = topic[:20]
    else:
        topic0 = topic
    for i in range(interval, 0, -1):
        #print 'i:', i
        begin_ts = end_ts - during * i
        over_ts = begin_ts + during
        #print 'begin_ts:', begin_ts#ts2date(begin_ts)
        #print 'over_ts:', over_ts#ts2date(over_ts)
        ts_list.append(over_ts)
        items = db.session.query(PropagateCount).filter(PropagateCount.topic==topic0 ,\
                                                        PropagateCount.end<=over_ts ,\
                                                        PropagateCount.end>begin_ts ,\
                                                        PropagateCount.range==unit).all()
        #).all()

        if items:
            result = len(items)
        else:
            result = 0
        results.append(float(result))
    print 'detect_peak_bottom_line::', results
    new_zeros = detect_peaks(results)  # 返回峰值出现的时间区间的序号
    new_bottom = detect_bottom(results)  # get the first bottom
    print 'new_peaks:', new_zeros
    print 'new_bottom:', new_bottom
    # 存趋势时间范围
    # save_peak_bottom(new_zeros, new_bottom)
    #trend_maker = get_makers(topic, new_zeros, new_bottom, ts_list, topic_xapian_id)
    trend_maker = get_makers(topic, new_zeros, new_bottom, ts_list)
    print 'trend_makers:', trend_maker
    trend_pusher = get_pushers(topic, new_zeros, new_bottom, ts_list)
    print 'trend_pushers:', trend_pusher
    save_trend_maker(topic, date, windowsize, trend_maker)
    save_trend_pusher(topic, date, windowsize, trend_pusher)

    return trend_maker, trend_pusher
Exemple #2
0
def get_interval_count(topic, date, windowsize, topic_xapian_id):
    results = [0]
    ts_list = []
    start_date = ts2datetime(datetime2ts(date) - windowsize * Day)
    #unit = 900
    print 'start_date:', start_date
    start_ts = datetime2ts(start_date)
    ts_list = [start_ts]
    end_ts = datetime2ts(date)
    interval = (end_ts - start_ts) / during
    print 'interval:', interval
    for i in range(interval, 0, -1):
        #print 'i:', i
        begin_ts = end_ts - during * i
        over_ts = begin_ts + during
        #print 'begin_ts:', ts2date(begin_ts)
        #print 'over_ts:', ts2date(over_ts)
        ts_list.append(over_ts)
        items = db.session.query(PropagateCount).filter(PropagateCount.topic==topic ,\
                                                        PropagateCount.end<=over_ts ,\
                                                        PropagateCount.end>begin_ts ,\
                                                        PropagateCount.range==unit).all()
        if items:
            result = Merge_propagate(items)
        else:
            result = 0 
        results.append(float(result))
    print 'detect_peak_bottom_line::', results
    new_zeros = detect_peaks(results) # 返回峰值出现的时间区间的序号
    new_bottom  = detect_bottom(results)  # get the first bottom
    print 'new_peaks:', new_zeros
    print 'new_bottom:', new_bottom
    # 存趋势时间范围
    # save_peak_bottom(new_zeros, new_bottom)
    trend_maker = get_makers(topic, new_zeros, new_bottom, ts_list, topic_xapian_id)
    print 'trend_makers:', trend_maker
    
    trend_pusher = get_pushers(topic,new_zeros, new_bottom, ts_list, topic_xapian_id)
    print 'trend_pushers:', trend_pusher

    save_trend_maker(topic, date, windowsize, trend_maker, topic_xapian_id)
    save_trend_pusher(topic, date, windowsize, trend_pusher, topic_xapian_id)
    
    return trend_maker, trend_pusher
Exemple #3
0
def trend_user(topic, start_ts, end_ts, news_collection, comment_collection):
    '''利用时间分析部分的计算结果
    '''
    ts_list, count_list = get_interval_count(topic, start_ts, end_ts)
    # 波峰
    new_peaks = detect_peaks(count_list)
    print 'news_peaks:', new_peaks
    # 波谷
    new_bottom = detect_bottom(count_list)
    print 'news_bottom:', new_bottom
    # trend_maker
    trend_maker = get_maker(topic, new_peaks, new_bottom, ts_list, news_collection)
    print 'len(trend_maker):', len(trend_maker)
    # trend_pusher
    trend_pusher = get_pusher(topic, new_peaks, new_bottom, ts_list, news_collection, comment_collection)
    print 'len(trend_pusher):', len(trend_pusher)

    save_trend_maker(topic, start_ts, end_ts, trend_maker)
    save_trend_pusher(topic, start_ts, end_ts, trend_pusher)
Exemple #4
0
def trend_user(topic, start_ts, end_ts, news_collection, comment_collection):
    '''利用时间分析部分的计算结果
    '''
    ts_list, count_list = get_interval_count(topic, start_ts, end_ts)
    # 波峰
    new_peaks = detect_peaks(count_list)
    print 'news_peaks:', new_peaks
    # 波谷
    new_bottom = detect_bottom(count_list)
    print 'news_bottom:', new_bottom
    # trend_maker
    trend_maker = get_maker(topic, new_peaks, new_bottom, ts_list,
                            news_collection)
    print 'len(trend_maker):', len(trend_maker)
    # trend_pusher
    trend_pusher = get_pusher(topic, new_peaks, new_bottom, ts_list,
                              news_collection, comment_collection)
    print 'len(trend_pusher):', len(trend_pusher)

    save_trend_maker(topic, start_ts, end_ts, trend_maker)
    save_trend_pusher(topic, start_ts, end_ts, trend_pusher)
def get_interval_count(topic, date, windowsize):

    index_name = index_event_analysis_results
    index_type = type_event_analysis_results
    results = []
    ts_list = []
    start_date = ts2datetime(datetime2ts(date) - windowsize * Day)
    unit = 900
    print 'start_date:', start_date
    start_ts = datetime2ts(start_date)
    ts_list = [start_ts]
    end_ts = datetime2ts(date)
    interval = (end_ts - start_ts) / during
    print 'interval:', interval
    print topic
    '''
    if MYSQL_TOPIC_LEN == 0:
    	topic0 = topic[:20]
    else:
        topic0=topic
    '''
    for i in range(interval, 0, -1):
        #print 'i:', i
        begin_ts = long(end_ts) - during * i
        over_ts = begin_ts + during
        #print 'begin_ts:', begin_ts#ts2date(begin_ts)
        #print 'over_ts:', over_ts#ts2date(over_ts)
        ts_list.append(over_ts)
        '''
        items = db.session.query(PropagateCount).filter(PropagateCount.topic==topic0 ,\
                                                        PropagateCount.end<=over_ts ,\
                                                        PropagateCount.end>begin_ts ,\
                                                        PropagateCount.range==unit).all()
                                                        #).all()
        '''
        '''
        query_body = {
            'query':{
                'bool':{
                    'must':[
                        {'range':{'end_ts':{'gt':begin_ts,'lte':over_ts}}},
                        {'term':{'en_name':topic0}},
                        {'term':{'range':unit}}
                    ]
                }
            },
            'size': 1000000  # 返回条数限制 待删
        }

        items = weibo_es.search(index=index_name,doc_type=index_type,body=query_body)['hits']['hits']
        
        '''
        query_body = {
            'query': {
                'bool': {
                    'must': [{
                        'term': {
                            'en_name': topic
                        }
                    }]
                }
            },
            'size': 1000000
        }

        es_results = weibo_es.search(index=index_name,
                                     doc_type=index_type,
                                     body=query_body)['hits']['hits']
        #print 'results::::::::::',results
        print 'len_results:::::::::::', len(es_results)
        count = 0
        for result in es_results:
            result = result['_source']
            time_results = json.loads(result['time_results'])
            count_results = time_results['count']
            print 'type_time_results:::::::', type(time_results)
            time_time = time_results.keys()
            print 'time_results.keys:::::', time_time.sort()
            #print 'time_results.keys:::::',len(time_time.sort())

            if time_results['during'] == unit:
                print 'count_results.keys()::::;', count_results.keys()
                for end_ts_count in count_results.keys():

                    if end_ts_count > begin_ts and end_ts_count <= over_ts:
                        count += 1
        '''
        if items:
            result = len(items)
        else:
            result = 0
        results.append(float(result))
        '''
        '''
        if count:
            result = count
        else:
            result = 0
        '''
        results.append(float(count))
        print 'results::::::::::', results
        #print abababa
    print 'detect_peak_bottom_line::::::', results
    new_zeros = detect_peaks(results)  # 返回峰值出现的时间区间的序号
    new_bottom = detect_bottom(results)  # get the first bottom
    print 'new_zeros:::::::::::::::::', new_zeros
    print 'new_bottom::::::::::::::::', new_bottom
    print 'ts_list:::::::::::::::::::', ts_list
    # 存趋势时间范围
    # save_peak_bottom(new_zeros, new_bottom)
    #trend_maker = get_makers(topic, new_zeros, new_bottom, ts_list, topic_xapian_id)
    trend_maker = get_makers(topic, new_zeros, new_bottom, ts_list)
    print 'trend_makers:', trend_maker
    trend_pusher = get_pushers(topic, new_zeros, new_bottom, ts_list)
    print 'trend_pushers:', trend_pusher
    #save_trend_maker(topic, date, windowsize, trend_maker)
    maker_results = save_trend_maker_es(topic, date, windowsize, trend_maker)
    #save_trend_pusher(topic, date, windowsize, trend_pusher)
    pusher_results = save_trend_pusher_es(topic, date, windowsize,
                                          trend_pusher)

    return maker_results, pusher_results