Python ts2HourlyTime Examples, time_utils.ts2HourlyTime Python Examples

Example #1

0

Show file

File: cron_sentiment_keyword.py Project: huxiaoqian/project

def sentiment_keywords(xapian_search_weibo, start_ts, over_ts, during=Hour, sort_field='reposts_count', save_fields=RESP_ITER_KEYS, w_limit=TOP_WEIBOS_LIMIT, k_limit=TOP_KEYWORDS_LIMIT):
    start_ts = int(start_ts)
    over_ts = int(over_ts)

    over_ts = ts2HourlyTime(over_ts, during)
    interval = (over_ts - start_ts) / during

    for i in range(interval, 0, -1):
    	emotions_data = {}
        emotions_weibo = {}
        
        begin_ts = over_ts - during * i
        end_ts = begin_ts + during
        print begin_ts, end_ts, ' starts calculate'

        query_dict = {
	        'timestamp': {'$gt': begin_ts, '$lt': end_ts},
	    }

        for k, v in emotions_kv.iteritems():
            query_dict['sentiment'] = v
            print xapian_search_weibo.search(query=query_dict, count_only=True)
            mset = xapian_search_weibo.search(query=query_dict, sort_by=[sort_field], max_offset=w_limit, mset_direct=True)
            top_ws = top_weibos(gen_mset_iter(xapian_search_weibo, mset, fields=save_fields), top=w_limit)
            keywords_with_count = top_keywords(gen_mset_iter(xapian_search_weibo, mset, fields=['terms']), top=k_limit)

            emotions_data[v] = [end_ts, keywords_with_count]
            emotions_weibo[v] = [end_ts, top_ws]

            print k, v, ', emotion keywords length: ', len(keywords_with_count), ', emotion weibos length: ', len(top_ws)

        print date, '%s %s saved emotions keywords and weibos' % (begin_ts, end_ts)
        save_count_results(emotions_data, during, TOP_KEYWORDS_LIMIT)
        save_weibos_results(emotions_weibo, during, TOP_WEIBOS_LIMIT)

Example #2

0

Show file

def propagateCronTopic(topic, start_ts, over_ts, sort_field=SORT_FIELD, \
    save_fields=RESP_ITER_KEYS, during=Fifteenminutes, w_limit=TOP_WEIBOS_LIMIT, k_limit=TOP_KEYWORDS_LIMIT):
	if topic and topic != '':
		start_ts = int(start_ts)
		over_ts = int(over_ts)

		over_ts = ts2HourlyTime(over_ts, during)
		interval = (over_ts - start_ts) / during

		for i in range(interval,0,-1):	#每15分钟计算一次
			mtype_count = {}	#每类微博的数量
			mtype_kcount = {}	#每类微博的TOPK关键词
			mtype_weibo = {}	#三种类型的微博

			begin_ts = over_ts - during * i
			end_ts = begin_ts + during
			#print begin_ts,end_ts
			#print begin_ts, end_ts, 'topic %s starts calculate' % topic.encode('utf-8')
			mtype_count = compute_mtype_count(topic, begin_ts, end_ts)
			mtype_kcount = compute_mtype_keywords(topic, begin_ts, end_ts ,k_limit)
			mtype_weibo = compute_mtype_weibo(topic,begin_ts,end_ts,w_limit)

			save_results('count', topic, mtype_count, during)
			save_results('kcount', topic, mtype_kcount, during, k_limit, w_limit)
			save_results('weibo', topic, mtype_weibo, during, k_limit)

Example #3

0

Show file

File: cron_topic_propagatev2_forwebo.py Project: SwoJa/ruman

def propagateCronTopic(news_id, start_ts, over_ts, during=Fifteenminutes, w_limit=TOP_MESSAGE_LIMIT, k_limit=TOP_KEYWORDS_LIMIT):
    
    start_ts = int(start_ts)
    over_ts = int(over_ts)

    over_ts = ts2HourlyTime(over_ts, during)
    interval = (over_ts - start_ts) / during

    for i in range(interval,0,-1):  #每15分钟计算一次
        # message_type_count = {}    #五类消息的数量
        # mtype_kcount = {}   #五类消息的TOPK关键词
        # mtype_content = {}    #五种类型的内容，原系统是按转发数排序，不知效果如何

        begin_ts = over_ts - during * i
        end_ts = begin_ts + during

        # print news_id,begin_ts,end_ts
        #print begin_ts, end_ts, 'topic %s starts calculate' % topic.encode('utf-8')
        mtype_count = compute_mtype_count(news_id, begin_ts, end_ts)
        # print mtype_count
        # print mtype_count
        # mtype_kcount = compute_mtype_keywords(topic, begin_ts, end_ts ,k_limit)
        # allsource_traceback = compute_allsource_traceback(news_id,begin_ts,end_ts,w_limit)
        # print allsource_traceback
        save_results('count', news_id, mtype_count, during)

Example #4

0

Show file

File: cron_sentiment_count.py Project: huxiaoqian/project

def sentiment_count(xapian_search_weibo, start_ts, over_ts, during=Hour):
    start_ts = int(start_ts)
    over_ts = int(over_ts)

    over_ts = ts2HourlyTime(over_ts, during)
    interval = (over_ts - start_ts) / during

    for i in range(interval, 0, -1):
        emotions_data = {}

        begin_ts = over_ts - during * i
        end_ts = begin_ts + during
        print begin_ts, end_ts, ' starts calculate'

        query_dict = {
            'timestamp': {'$gt': begin_ts, '$lt': end_ts},
        }

        for k, v in emotions_kv.iteritems():
            query_dict['sentiment'] = v
            count = xapian_search_weibo.search(query=query_dict, count_only=True)
            emotions_data[v] = [end_ts, count]

        print date, 'saved: ', emotions_data 
        save_count_results(emotions_data, during)

Example #5

0

Show file

File: cron_topic_propagate.py Project: lvleilei/screen

def propagateCronTopic(topic,
                       start_ts,
                       over_ts,
                       during=Fifteenminutes,
                       w_limit=TOP_MESSAGE_LIMIT,
                       k_limit=TOP_KEYWORDS_LIMIT):
    if topic and topic != '':
        start_ts = int(start_ts)
        over_ts = int(over_ts)

        over_ts = ts2HourlyTime(over_ts, during)
        interval = (over_ts - start_ts) / during

        for i in range(interval, 0, -1):  #每15分钟计算一次
            message_type_count = {}  #五类消息的数量
            mtype_kcount = {}  #五类消息的TOPK关键词
            mtype_content = {}  #五种类型的内容，原系统是按转发数排序，不知效果如何

            begin_ts = over_ts - during * i
            end_ts = begin_ts + during

            print begin_ts, end_ts
            #print begin_ts, end_ts, 'topic %s starts calculate' % topic.encode('utf-8')
            mtype_count = compute_mtype_count(topic, begin_ts, end_ts)
            # print mtype_count
            # mtype_kcount = compute_mtype_keywords(topic, begin_ts, end_ts ,k_limit)
            allsource_traceback = compute_allsource_traceback(
                topic, begin_ts, end_ts, w_limit)
            # print allsource_traceback
            save_results('count', topic, mtype_count, during)

Example #6

0

Show file

def sentimentCronTopic(topic, xapian_search_weibo, start_ts, over_ts, sort_field=SORT_FIELD, save_fields=RESP_ITER_KEYS, \
    during=Fifteenminutes, w_limit=TOP_WEIBOS_LIMIT, k_limit=TOP_KEYWORDS_LIMIT):
    if topic and topic != '':
        start_ts = int(start_ts)
        over_ts = int(over_ts)

        over_ts = ts2HourlyTime(over_ts, during)
        interval = (over_ts - start_ts) / during

        for i in range(interval, 0, -1):
            emotions_kcount = {}
            emotions_count = {}
            emotions_weibo = {}

            begin_ts = over_ts - during * i
            end_ts = begin_ts + during
            print begin_ts, end_ts, 'topic %s starts calculate' % topic.encode(
                'utf-8')

            query_dict = {
                'timestamp': {
                    '$gt': begin_ts,
                    '$lt': end_ts
                },
                '$and': [
                    {
                        '$or': [{
                            'message_type': 1
                        }, {
                            'message_type': 3
                        }]
                    },
                ]
            }
            for k, v in emotions_kv.iteritems():
                query_dict['sentiment'] = v
                count, results = xapian_search_weibo.search(query=query_dict,
                                                            fields=save_fields)

                mset = xapian_search_weibo.search(query=query_dict, sort_by=[sort_field], \
                                                  max_offset=w_limit, mset_direct=True)

                kcount = top_keywords(gen_mset_iter(xapian_search_weibo,
                                                    mset,
                                                    fields=['terms']),
                                      top=k_limit)
                top_ws = top_weibos(results, top=w_limit)

                emotions_count[v] = [end_ts, count]
                emotions_kcount[v] = [end_ts, kcount]
                emotions_weibo[v] = [end_ts, top_ws]

            save_rt_results('count', topic, emotions_count, during)
            save_rt_results('kcount', topic, emotions_kcount, during, k_limit,
                            w_limit)
            save_rt_results('weibos', topic, emotions_weibo, during, k_limit,
                            w_limit)

Example #7

0

Show file

File: utils.py Project: zhhhzhang/xnr1

def cul_key_weibo_time_count(task_source, task_id, news_topics, start_ts,
                             over_ts, during):

    if S_TYPE == 'test':
        if task_source == 'weibo':
            start_ts = datetime2ts(S_DATE) - 5 * 24 * 3600
            over_ts = datetime2ts(S_DATE)
        else:
            start_ts = datetime2ts(S_DATE_FB) - 5 * 24 * 3600
            over_ts = datetime2ts(S_DATE_FB)

    key_weibo_time_count = {}
    time_dict = {}
    during = Day
    for clusterid, keywords in news_topics.iteritems(
    ):  #{u'd2e97cf7-fc43-4982-8405-2d215b3e1fea': [u'\u77e5\u8bc6', u'\u5e7f\u5dde', u'\u9009\u624b']}
        if len(keywords) > 0:
            start_ts = int(start_ts)
            over_ts = int(over_ts)

            over_ts = ts2HourlyTime(over_ts, during)
            interval = (over_ts - start_ts) / during

            for i in range(interval, 0, -1):  #时间段取每900秒的

                begin_ts = over_ts - during * i
                end_ts = begin_ts + during
                must_list = []
                must_list.append(
                    {'range': {
                        'timestamp': {
                            'gte': begin_ts,
                            'lt': end_ts
                        }
                    }})
                temp = []
                for word in keywords:
                    sentence = {
                        'wildcard': {
                            'keywords_string': '*' + word + '*'
                        }
                    }
                    temp.append(sentence)
                must_list.append({'bool': {'should': temp}})

                query_body = {'query': {'bool': {'must': must_list}}}
                key_weibo = es_intel.search(index=task_id,
                                            doc_type=task_source,
                                            body=query_body)
                key_weibo_count = key_weibo['hits']['total']  #分时间段的类的数量
                time_dict[ts2datetime(end_ts)] = key_weibo_count

            key_weibo_time_count[clusterid] = sorted(time_dict.items(),
                                                     key=lambda x: x[0])
    return key_weibo_time_count

Example #8

0

Show file

File: cron_topic_sentiment.py Project: huxiaoqian/project

def sentimentCronTopic(topic, xapian_search_weibo, start_ts, over_ts, sort_field=SORT_FIELD, save_fields=RESP_ITER_KEYS, during=Fifteenminutes, w_limit=TOP_WEIBOS_LIMIT, k_limit=TOP_KEYWORDS_LIMIT):
    if topic and topic != '':
        start_ts = int(start_ts)
        over_ts = int(over_ts)

        over_ts = ts2HourlyTime(over_ts, during)
        interval = (over_ts - start_ts) / during

        topics = topic.strip().split(',')

        for i in range(interval, 0, -1):
            emotions_count = {}
            emotions_kcount = {}
            emotions_weibo = {}

            begin_ts = over_ts - during * i
            end_ts = begin_ts + during
            print begin_ts, end_ts, 'topic %s starts calculate' % topic.encode('utf-8')

            query_dict = {
                'timestamp': {'$gt': begin_ts, '$lt': end_ts},
                '$or': []
            }

            for topic in topics:
                query_dict['$or'].append({'text': topic})

            for k, v in emotions_kv.iteritems():
                query_dict['sentiment'] = v
                scount = xapian_search_weibo.search(query=query_dict, count_only=True)
                mset = xapian_search_weibo.search(query=query_dict, sort_by=[sort_field], \
                                                  max_offset=w_limit, mset_direct=True)
                kcount = top_keywords(gen_mset_iter(xapian_search_weibo, mset, fields=['terms']), top=k_limit)
                top_ws = top_weibos(gen_mset_iter(xapian_search_weibo, mset, fields=save_fields), top=w_limit)

                emotions_count[v] = [end_ts, scount]
                emotions_kcount[v] = [end_ts, kcount]
                emotions_weibo[v] = [end_ts, top_ws]

                print k, v, ', emotions count: ', emotions_count, ', emotion keywords length: ', len(kcount), ', emotion weibos length: ', len(top_ws)

            print '%s %s saved emotions counts, keywords and weibos' % (begin_ts, end_ts)
            save_rt_results('count', topic, emotions_count, during)
            save_rt_results('kcount', topic, emotions_kcount, during, k_limit, w_limit)
            save_rt_results('weibos', topic, emotions_weibo, during, k_limit, w_limit)

Example #9

0

Show file

def sentimentTopic_new(topic,start_ts, over_ts, sort_field=SORT_FIELD, save_fields=RESP_ITER_KEYS, \
    during=Fifteenminutes, w_limit=TOP_WEIBOS_LIMIT, k_limit=TOP_KEYWORDS_LIMIT ):
    if topic and topic != '':
        start_ts = int(start_ts)
        over_ts = int(over_ts)

        over_ts = ts2HourlyTime(over_ts, during)
        interval = (over_ts - start_ts) / during

        sentiment_results = {}
        sentiment_results['during'] = during
        sentiment_results['count'] = {}
        sentiment_results['geo_count'] = {}

        for i in range(interval, 0, -1):  #时间段取每900秒的
            emotions_kcount = {}  #每类情感的TOPK关键词
            emotions_count = {}  #每类情感的数量
            emotions_weibo = {}  #每类情感的微博

            begin_ts = over_ts - during * i
            end_ts = begin_ts + during
            #test(topic,begin_ts,end_ts)

            print begin_ts, end_ts  #, 'topic %s starts calculate' % topic.encode('utf-8')
            emotions_count = compute_sentiment_count(topic, begin_ts, end_ts,
                                                     during)

            # emotions_kcount = compute_sentiment_keywords(topic,begin_ts,end_ts,k_limit,w_limit,during)
            #emotions_weibo,emotions_geo_count = compute_sentiment_weibo(topic,begin_ts,end_ts,k_limit,w_limit,during)

            sentiment_results['count'][end_ts] = emotions_count

            #sentiment_results['weibo'][end_ts] = emotions_weibo

            # save_rt_results('count', topic, emotions_count, during)  #  '1':[end_ts,4],
            # save_rt_results('kcount', topic, emotions_kcount, during, k_limit, w_limit)
            # save_rt_results('weibos', topic, emotions_weibo, during, k_limit, w_limit)
        emotions_geo_count = compute_sentiment_weibo(topic, start_ts, over_ts,
                                                     k_limit, w_limit, during)

        sentiment_results['geo_count'] = emotions_geo_count

        sentiment_results = json.dumps(sentiment_results)
        save_rt_results_es('sentiment_results', topic, sentiment_results,
                           during)

Example #10

0

Show file

File: cron_news_topic_propagate.py Project: NeilWang6/case

def propagateCronNewsTopic(topic, mongo_collection, start_ts, over_ts, sort_field=SORT_FIELD, \
    during=Fifteenminutes, n_limit=TOP_NEWS_LIMIT, k_limit=TOP_KEYWORDS_LIMIT):
    if topic and topic != '':
        start_ts = int(start_ts)
        over_ts = int(over_ts)
        over_ts = ts2HourlyTime(over_ts, during)
        interval = (over_ts - start_ts) / during

        for i in range(interval, 0, -1):
            begin_ts = over_ts - during * i
            end_ts = begin_ts + during
            # print begin_ts, end_ts, 'topic %s starts calculate' % topic.encode('utf-8')

            news = []
            news_count = []
            news_kcount = []

            query_dict = {
                'timestamp': {'$gte': begin_ts, '$lt': end_ts}
            }
            fields_dict = get_filter_dict()

            results_list = mongo_collection.find(query_dict, fields_dict)

            origin_forward_dict = {'origin':[], 'forward':[]}
            for weibo_result in results_list:
                if weibo_result['source_from_name'] and weibo_result['transmit_name']:
                    origin_forward_dict['forward'].append(weibo_result)
                elif weibo_result['source_from_name']:
                    origin_forward_dict['origin'].append(weibo_result)
                else:
                    continue

            for k, v_list in origin_forward_dict.iteritems():
                mtype = mtype_kv_news[k]
                count, kcount, top_ns = top_news_keywords(v_list, news_top=n_limit, keywords_top = k_limit)

                news = [end_ts, top_ns]
                news_count = [end_ts, count]
                news_kcount = [end_ts, kcount]

                save_ws_news_results(topic, mtype, news, during, n_limit)
                save_pc_news_results(topic, mtype, news_count, during)
                save_kc_news_results(topic, mtype, news_kcount, during, k_limit)

Example #11

0

Show file

File: cron_field_sentiment.py Project: huxiaoqian/project

def sentiment_field(domain, xapian_search_weibo, start_ts, over_ts, sort_field='reposts_count', save_fields=RESP_ITER_KEYS, during=Hour, w_limit=TOP_WEIBOS_LIMIT, k_limit=TOP_KEYWORDS_LIMIT):
    if domain_uids != []:
        start_ts = int(start_ts)
        over_ts = int(over_ts)

        over_ts = ts2HourlyTime(over_ts, during)
        interval = (over_ts - start_ts) / during

        for i in range(interval, 0, -1):
            emotions_count = {}
            emotions_kcount = {}
            emotions_weibo = {}

            begin_ts = over_ts - during * i
            end_ts = begin_ts + during
            print begin_ts, end_ts, 'domain %s starts calculate' % domain

            query_dict = {
                'timestamp': {'$gt': begin_ts, '$lt': end_ts},
                '$or': []
            }

            for uid in domain_uids:
                query_dict['$or'].append({'user': uid})

            for k, v in emotions_kv.iteritems():
                query_dict['sentiment'] = v
                scount = xapian_search_weibo.search(query=query_dict, count_only=True)
                mset = xapian_search_weibo.search(query=query_dict, sort_by=[sort_field], \
                                                  max_offset=w_limit, mset_direct=True)
                kcount = top_keywords(gen_mset_iter(xapian_search_weibo, mset, fields=['terms']), top=k_limit)
                top_ws = top_weibos(gen_mset_iter(xapian_search_weibo, mset, fields=save_fields), top=w_limit)

                emotions_count[v] = [end_ts, scount]
                emotions_kcount[v] = [end_ts, kcount]
                emotions_weibo[v] = [end_ts, top_ws]

                print k, v, ', emotions count: ', emotions_count, ', emotion keywords length: ', len(kcount), ', emotion weibos length: ', len(top_ws)

            print domain, date, ' %s %s saved emotions counts, keywords and weibos' % (begin_ts, end_ts)
            save_count_results(DOMAIN_LIST.index(domain), emotions_count, during)
            save_kcount_results(DOMAIN_LIST.index(domain), emotions_kcount, during, TOP_KEYWORDS_LIMIT)
            save_weibos_results(DOMAIN_LIST.index(domain), emotions_weibo, during, TOP_WEIBOS_LIMIT)

Example #12

0

Show file

File: cron_topic_propagate.py Project: NeilWang6/case

def propagateCronTopic(topic, xapian_search_weibo, start_ts, over_ts, sort_field=SORT_FIELD, \
    save_fields=RESP_ITER_KEYS, during=Fifteenminutes, w_limit=TOP_WEIBOS_LIMIT, k_limit=TOP_KEYWORDS_LIMIT):
    if topic and topic != '':
        start_ts = int(start_ts)
        over_ts = int(over_ts)
        over_ts = ts2HourlyTime(over_ts, during)
        interval = (over_ts - start_ts) / during

        for i in range(interval, 0, -1):
            begin_ts = over_ts - during * i
            end_ts = begin_ts + during
            print begin_ts, end_ts, 'topic %s starts calculate' % topic.encode(
                'utf-8')

            mtype_count = {}
            mtype_kcount = {}  # mtype_kcount={mtype:[terms]}
            mtype_weibo = {}  # mtype_weibo={mtype:weibo}

            query_dict = {'timestamp': {'$gt': begin_ts, '$lt': end_ts}}

            for k, v in mtype_kv.iteritems():
                query_dict['message_type'] = v

                count, results = xapian_search_weibo.search(query=query_dict,
                                                            fields=fields_list)

                mset = xapian_search_weibo.search(query=query_dict, sort_by=[sort_field], \
                                                  max_offset=w_limit, mset_direct=True)

                kcount = top_keywords(gen_mset_iter(xapian_search_weibo,
                                                    mset,
                                                    fields=['terms']),
                                      top=k_limit)
                top_ws = top_weibos(results, top=w_limit)

                mtype_count[v] = [end_ts, count]
                mtype_kcount[v] = [end_ts, kcount]
                mtype_weibo[v] = [end_ts, top_ws]

            save_pc_results(topic, mtype_count, during)
            save_kc_results(topic, mtype_kcount, during, k_limit)
            save_ws_results(topic, mtype_weibo, during, w_limit)

Example #13

0

Show file

File: cron_topic_sentiment.py Project: huxiaoqian/case

def sentimentCronTopic(topic, xapian_search_weibo, start_ts, over_ts, sort_field=SORT_FIELD, save_fields=RESP_ITER_KEYS, \
    during=Fifteenminutes, w_limit=TOP_WEIBOS_LIMIT, k_limit=TOP_KEYWORDS_LIMIT):
    if topic and topic != '':
        start_ts = int(start_ts)
        over_ts = int(over_ts)

        over_ts = ts2HourlyTime(over_ts, during)
        interval = (over_ts - start_ts) / during

        for i in range(interval, 0, -1):
            emotions_kcount = {}
            emotions_count = {}
            emotions_weibo = {}

            begin_ts = over_ts - during * i
            end_ts = begin_ts + during
            print begin_ts, end_ts, 'topic %s starts calculate' % topic.encode('utf-8')

            query_dict = {
                'timestamp': {'$gt': begin_ts, '$lt': end_ts},
                '$and': [
                    {'$or': [{'message_type': 1}, {'message_type': 3}]},
                ]
            }
            for k, v in emotions_kv.iteritems():
                query_dict['sentiment'] = v
                count, results = xapian_search_weibo.search(query=query_dict, fields=save_fields)

                mset = xapian_search_weibo.search(query=query_dict, sort_by=[sort_field], \
                                                  max_offset=w_limit, mset_direct=True)

                kcount = top_keywords(gen_mset_iter(xapian_search_weibo, mset, fields=['terms']), top=k_limit)
                top_ws = top_weibos(results, top=w_limit)

                emotions_count[v] = [end_ts, count]
                emotions_kcount[v] = [end_ts, kcount]
                emotions_weibo[v] = [end_ts, top_ws]

            save_rt_results('count', topic, emotions_count, during)
            save_rt_results('kcount', topic, emotions_kcount, during, k_limit, w_limit)
            save_rt_results('weibos', topic, emotions_weibo, during, k_limit, w_limit)

Example #14

0

Show file

File: cron_topic_propagate.py Project: huxiaoqian/case

def propagateCronTopic(topic, xapian_search_weibo, start_ts, over_ts, sort_field=SORT_FIELD, \
    save_fields=RESP_ITER_KEYS, during=Fifteenminutes, w_limit=TOP_WEIBOS_LIMIT, k_limit=TOP_KEYWORDS_LIMIT):
    if topic and topic != '':
        start_ts = int(start_ts)
        over_ts = int(over_ts)
        over_ts = ts2HourlyTime(over_ts, during)
        interval = (over_ts - start_ts) / during

        for i in range(interval, 0, -1):
            begin_ts = over_ts - during * i
            end_ts = begin_ts + during
            print begin_ts, end_ts, 'topic %s starts calculate' % topic.encode('utf-8')
            
            mtype_count = {}
            mtype_kcount = {} # mtype_kcount={mtype:[terms]}
            mtype_weibo = {} # mtype_weibo={mtype:weibo}

            query_dict = {
                'timestamp': {'$gt': begin_ts, '$lt': end_ts}
            }

            for k, v in mtype_kv.iteritems():
                query_dict['message_type'] = v
                
                count, results = xapian_search_weibo.search(query=query_dict, fields=fields_list)

                mset = xapian_search_weibo.search(query=query_dict, sort_by=[sort_field], \
                                                  max_offset=w_limit, mset_direct=True)

                kcount = top_keywords(gen_mset_iter(xapian_search_weibo, mset, fields=['terms']), top=k_limit)
                top_ws = top_weibos(results, top=w_limit)

                mtype_count[v] = [end_ts, count]
                mtype_kcount[v] = [end_ts, kcount]
                mtype_weibo[v] = [end_ts, top_ws]

            save_pc_results(topic, mtype_count, during)
            save_kc_results(topic, mtype_kcount, during, k_limit)
            save_ws_results(topic, mtype_weibo, during, w_limit)

Example #15

0

Show file

File: cron_topic_propagate.py Project: gaofeifei/knowledge-management

def propagateCronTopic(topic, start_ts, over_ts, sort_field=SORT_FIELD, \
    save_fields=RESP_ITER_KEYS, during=Fifteenminutes, w_limit=TOP_WEIBOS_LIMIT, k_limit=TOP_KEYWORDS_LIMIT):
    if topic and topic != '':
        start_ts = int(start_ts)
        over_ts = int(over_ts)

        over_ts = ts2HourlyTime(over_ts, during)
        interval = (over_ts - start_ts) / during

        time_results = []

        for i in range(interval, 0, -1):  #每15分钟计算一次
            mtype_count = {}  #每类微博的数量
            mtype_kcount = {}  #每类微博的TOPK关键词
            mtype_weibo = {}  #三种类型的微博

            begin_ts = over_ts - during * i
            end_ts = begin_ts + during
            mtype_count = compute_mtype_count(topic, begin_ts, end_ts, during)

            time_results.append([end_ts, mtype_count])

        save_results_es('time_results', topic, time_results, during)

Example #16

0

Show file

File: count_keyword.py Project: yuanhuiru/xnr2

def cul_key_weibo_time_count(topic, news_topics, start_ts, over_ts, during):
    key_weibo_time_count = {}
    time_dict = {}
    for clusterid, keywords in news_topics.iteritems(
    ):  #{u'd2e97cf7-fc43-4982-8405-2d215b3e1fea': [u'\u77e5\u8bc6', u'\u5e7f\u5dde', u'\u9009\u624b']}
        start_ts = int(start_ts)
        over_ts = int(over_ts)

        over_ts = ts2HourlyTime(over_ts, during)
        interval = (over_ts - start_ts) / during

        for i in range(interval, 0, -1):  #时间段取每900秒的

            begin_ts = over_ts - during * i
            end_ts = begin_ts + during
            must_list = []
            must_list.append(
                {'range': {
                    'timestamp': {
                        'gte': begin_ts,
                        'lt': end_ts
                    }
                }})
            temp = []
            for word in keywords:
                sentence = {"wildcard": {"keywords_string": "*" + word + "*"}}
                temp.append(sentence)
            must_list.append({'bool': {'should': temp}})

            query_body = {"query": {"bool": {"must": must_list}}}
            key_weibo = weibo_es.search(index=topic,
                                        doc_type=weibo_index_type,
                                        body=query_body)
            key_weibo_count = key_weibo['hits']['total']  #分时间段的类的数量
            time_dict[end_ts] = key_weibo_count
        key_weibo_time_count[clusterid] = time_dict
        return key_weibo_time_count

Example #17

0

Show file

File: cron_topic_sentiment.py Project: yuanhr/info_consume

def sentimentTopic(topic,start_ts, over_ts, sort_field=SORT_FIELD, save_fields=RESP_ITER_KEYS, \
    during=Fifteenminutes, w_limit=TOP_WEIBOS_LIMIT, k_limit=TOP_KEYWORDS_LIMIT ):
    if topic and topic != '':
        start_ts = int(start_ts)
        over_ts = int(over_ts)

        over_ts = ts2HourlyTime(over_ts, during)
        interval = (over_ts - start_ts) / during

        for i in range(interval, 0, -1):  #时间段取每900秒的
            emotions_kcount = {}  #每类情感的TOPK关键词
            emotions_count = {}  #每类情感的数量
            emotions_weibo = {}  #每类情感的微博

            begin_ts = over_ts - during * i
            end_ts = begin_ts + during
            #test(topic,begin_ts,end_ts)

            print begin_ts, end_ts  #, 'topic %s starts calculate' % topic.encode('utf-8')
            emotions_count = compute_sentiment_count(topic, begin_ts, end_ts,
                                                     during)
            # emotions_kcount = compute_sentiment_keywords(topic,begin_ts,end_ts,k_limit,w_limit,during)
            emotions_weibo = compute_sentiment_weibo(topic, begin_ts, end_ts,
                                                     k_limit, w_limit, during)

Example #18

0

Show file

File: cron_news_topic_city.py Project: huxiaoqian/case

def cityCronTopicNews(topic, mongo_collection, start_ts, over_ts, during=Fifteenminutes, n_limit=TOP_NEWS_LIMIT):
    if topic and topic != '':
        start_ts = int(start_ts)
        over_ts = int(over_ts)

        over_ts = ts2HourlyTime(over_ts, during)
        interval = (over_ts - start_ts) / during

        topics = topic.strip().split(',')
        for i in range(interval, 0, -1):
            ccount_dict = {}
            for k, v in mtype_kv_news.iteritems():
                ccount_dict[k] = {}

            begin_ts = over_ts - during * i
            end_ts = begin_ts + during
            first_timestamp = end_ts
            first_item = {}
            news = []

            query_dict = {
                'timestamp': {'$gt': begin_ts, '$lt': end_ts},
            }
            fields_dict = get_filter_dict()

            results_list = mongo_collection.find(query_dict, fields_dict).sort([(SORT_FIELD,1)])

            for weibo_result in results_list:
                if (weibo_result['timestamp'] <= first_timestamp ):
                    first_timestamp = weibo_result['timestamp']
                    first_item = weibo_result

                if weibo_result['source_from_name'] and weibo_result['transmit_name']:
                    source = media2city(weibo_result['source_from_name'])
                    if source:
                        try:
                            ccount_dict['forward'][source] += 1
                        except KeyError:
                            ccount_dict['forward'][source] = 1
                        """
                        try:
                            ccount_dict['sum'][source] += 1
                        except KeyError:
                            ccount_dict['sum'][source] = 1
                        """
                elif weibo_result['source_from_name']:
                    source = media2city(weibo_result['source_from_name'])
                    if source:
                        try:
                            ccount_dict['origin'][source] += 1
                        except KeyError:
                            ccount_dict['origin'][source] = 1
                        """
                        try:
                            ccount_dict['sum'][source] += 1
                        except KeyError:
                            ccount_dict['sum'][source] = 1
                        """
                else:
                    continue

                weibo_result['source_from_area'] = source # 添加区域字段
                news.append(weibo_result)

            for k, v in mtype_kv_news.iteritems():
                results = [end_ts, ccount_dict[k]]
                save_rt_results(topic,v, results, during, first_item)

            sorted_news = sorted(news, key=lambda k: k[SORT_FIELD], reverse=True)
            sorted_news = sorted_news[:n_limit]
            save_ns_results(topic, end_ts, during, n_limit, sorted_news)

Example #19

0

Show file

File: cron_news_topic_city.py Project: zhangxiangyuu/knowledge_revised

def cityCronTopicNews(topic,
                      mongo_collection,
                      start_ts,
                      over_ts,
                      during=Fifteenminutes,
                      n_limit=TOP_NEWS_LIMIT):
    if topic and topic != '':
        start_ts = int(start_ts)
        over_ts = int(over_ts)

        over_ts = ts2HourlyTime(over_ts, during)
        interval = (over_ts - start_ts) / during

        topics = topic.strip().split(',')
        for i in range(interval, 0, -1):
            ccount_dict = {}
            for k, v in mtype_kv_news.iteritems():
                ccount_dict[k] = {}

            begin_ts = over_ts - during * i
            end_ts = begin_ts + during
            first_timestamp = end_ts
            first_item = {}
            news = []

            query_dict = {
                'timestamp': {
                    '$gt': begin_ts,
                    '$lt': end_ts
                },
            }
            fields_dict = get_filter_dict()

            results_list = mongo_collection.find(query_dict,
                                                 fields_dict).sort([
                                                     (SORT_FIELD, 1)
                                                 ])

            for weibo_result in results_list:
                if (weibo_result['timestamp'] <= first_timestamp):
                    first_timestamp = weibo_result['timestamp']
                    first_item = weibo_result

                if weibo_result['source_from_name'] and weibo_result[
                        'transmit_name']:
                    source = media2city(weibo_result['source_from_name'])
                    if source:
                        try:
                            ccount_dict['forward'][source] += 1
                        except KeyError:
                            ccount_dict['forward'][source] = 1
                        """
                        try:
                            ccount_dict['sum'][source] += 1
                        except KeyError:
                            ccount_dict['sum'][source] = 1
                        """
                elif weibo_result['source_from_name']:
                    source = media2city(weibo_result['source_from_name'])
                    if source:
                        try:
                            ccount_dict['origin'][source] += 1
                        except KeyError:
                            ccount_dict['origin'][source] = 1
                        """
                        try:
                            ccount_dict['sum'][source] += 1
                        except KeyError:
                            ccount_dict['sum'][source] = 1
                        """
                else:
                    continue

                weibo_result['source_from_area'] = source  # 添加区域字段
                news.append(weibo_result)

            for k, v in mtype_kv_news.iteritems():
                results = [end_ts, ccount_dict[k]]
                save_rt_results(topic, v, results, during, first_item)

            sorted_news = sorted(news,
                                 key=lambda k: k[SORT_FIELD],
                                 reverse=True)
            sorted_news = sorted_news[:n_limit]
            save_ns_results(topic, end_ts, during, n_limit, sorted_news)

Example #20

0

Show file

File: cron_topic_city.py Project: huxiaoqian/case

def cityCronTopic(topic, xapian_search_weibo, start_ts, over_ts, during=Fifteenminutes, n_limit=TOP_WEIBOS_LIMIT):
    if topic and topic != '':
        start_ts = int(start_ts)
        over_ts = int(over_ts)

        over_ts = ts2HourlyTime(over_ts, during)
        interval = (over_ts - start_ts) / during

        topics = topic.strip().split(',')
        for i in range(interval, 0, -1):
            mtype_ccount = {}  # mtype为message_type，ccount为{city：count}

            begin_ts = over_ts - during * i
            end_ts = begin_ts + during
            weibos = []

            query_dict = {
                'timestamp': {'$gt': begin_ts, '$lt': end_ts},
            }

            for k, v in mtype_kv.iteritems():
                ccount={}
                first_timestamp = end_ts
                first_item = {}
                query_dict['message_type'] = v
                count,weibo_results = xapian_search_weibo.search(query=query_dict, fields=fields_list)# weibo_results是在指定时间段、topic、message_type的微博匹配集
                for weibo_result in weibo_results():
                    if (weibo_result['timestamp'] <= first_timestamp ):
                        first_timestamp = weibo_result['timestamp']
                        first_item = weibo_result
                    try:
                        if (len(weibo_result['geo'].split('.')) == 4):
                            city = IP2city(weibo_result['geo'])
                            if city:
                                try:
                                    ccount[city] += 1   
                                except KeyError:
                                    ccount[city] = 1    
                            else:
                                continue
                        else:
                            city = geo2city(weibo_result['geo'])
                            if city:
                                try:
                                    ccount[city] += 1   
                                except KeyError:
                                    ccount[city] = 1    
                            else:
                                continue
                    except:
                        continue

                    if (v == 1) or (v == 3): # 只存储原创和转发
                        weibos.append(weibo_result)

                mtype_ccount[v] = [end_ts, ccount]
            save_rt_results(topic, mtype_ccount, during, first_item)

            sorted_weibos = sorted(weibos, key=lambda k: k[SORT_FIELD], reverse=True)
            sorted_weibos = sorted_weibos[:n_limit]
            save_ws_results(topic, end_ts, during, n_limit, sorted_weibos)

Example #21

0

Show file

File: real_time_topic_cron_check.py Project: huxiaoqian/project

def sentimentRealTimeTopic(query, start_ts, end_ts, save_fields=RESP_ITER_KEYS, during=Fifteenminutes, calc='all', w_limit=TOP_WEIBOS_LIMIT, k_limit=TOP_KEYWORDS_LIMIT, sort_field=SORT_FIELD):
    if query and query != '':

    	start_ts = int(start_ts)
    	over_ts = int(end_ts)
    	
        over_ts = ts2HourlyTime(over_ts, during)
        interval = (over_ts - start_ts) / during

        for i in range(interval, 0, -1):

    	    emotions_count = {}
    	    emotions_kcount = {}
    	    emotions_weibo = {}

    	    begin_ts = over_ts - during * i
            xapian_datestr = datetime.date.fromtimestamp(begin_ts).isoformat()
            xapian_search_weibo = getXapianWeiboByDate(xapian_datestr.replace('-', ''))
            if not xapian_search_weibo:
                return

            end_ts = begin_ts + during
            print begin_ts, end_ts, 'topic realtime %s starts calculate' % query.encode('utf-8')

            query_dict = {
                'timestamp': {'$gt': begin_ts, '$lt': end_ts},
                '$or': []
            }

            for term in query.strip().split(','):
                if term:
                    query_dict['$or'].append({'text': [term]})

            if calc == 'all':
                for k, v in emotions_kv.iteritems():
                    query_dict['sentiment'] = v
                    scount = xapian_search_weibo.search(query=query_dict, count_only=True)
                    mset = xapian_search_weibo.search(query=query_dict, sort_by=[sort_field], \
                                                      max_offset=w_limit, mset_direct=True)
                    kcount = top_keywords(gen_mset_iter(xapian_search_weibo, mset, fields=['terms']), top=k_limit)
                    top_ws = top_weibos(gen_mset_iter(xapian_search_weibo, mset, fields=save_fields), top=w_limit)
                    emotions_count[v] = [end_ts, scount]
                    emotions_kcount[v] = [end_ts, kcount]
                    emotions_weibo[v] = [end_ts, top_ws]

                    print k, v, ', emotions count: ', emotions_count, ', keywords length: ', len(kcount), ', weibos count: ', len(top_ws)
                
                print 'save emotions count, keywords and weibo'
                save_rt_results('count', query, emotions_count, during)
                save_rt_results('kcount', query, emotions_kcount, during, klimit=k_limit)
                save_rt_results('weibos', query, emotions_weibo, during, wlimit=w_limit)
                
            elif calc == 'count':
                for k, v in emotions_kv.iteritems():
                    query_dict['sentiment'] = v
                    scount = xapian_search_weibo.search(query=query_dict, count_only=True)
                    emotions_count[v] = [end_ts, scount]

                save_rt_results('count', query, emotions_count, during)
            
            else:
                if calc == 'kcount':
                    for k, v in emotions_kv.iteritems():
                        query_dict['sentiment'] = v
                        count, get_results = xapian_search_weibo.search(query=query_dict, fields=RESP_ITER_KEYS, \
                                                                            sort_by=[SORT_FIELD], max_offset=w_limit)
                        kcount = top_keywords(get_results, top=k_limit)
                        emotions_kcount[v] = [end_ts, kcount]
                    
                    save_rt_results('kcount', query, emotions_kcount, during, TOP_KEYWORDS_LIMIT)

                if calc == 'weibos':
                    for k, v in emotions_kv.iteritems():
                        query_dict['sentiment'] = v
                        count, get_results = xapian_search_weibo.search(query=query_dict, fields=RESP_ITER_KEYS, \
                                                                        sort_by=[sort_field], max_offset=w_limit)
                        top_ws = top_weibos(get_results, top=w_limit)
                        emotions_weibo[v] = [end_ts, top_ws]
                    
                    save_rt_results('weibos', query, emotions_weibo, during, TOP_WEIBOS_LIMIT)

Example #22

0

Show file

File: cron_topic_city.py Project: NeilWang6/case

def cityCronTopic(topic,
                  xapian_search_weibo,
                  start_ts,
                  over_ts,
                  during=Fifteenminutes,
                  n_limit=TOP_WEIBOS_LIMIT):
    if topic and topic != '':
        start_ts = int(start_ts)
        over_ts = int(over_ts)

        over_ts = ts2HourlyTime(over_ts, during)
        interval = (over_ts - start_ts) / during

        topics = topic.strip().split(',')
        for i in range(interval, 0, -1):
            mtype_ccount = {}  # mtype为message_type，ccount为{city：count}

            begin_ts = over_ts - during * i
            end_ts = begin_ts + during
            weibos = []

            query_dict = {
                'timestamp': {
                    '$gt': begin_ts,
                    '$lt': end_ts
                },
            }

            for k, v in mtype_kv.iteritems():
                ccount = {}
                first_timestamp = end_ts
                first_item = {}
                query_dict['message_type'] = v
                count, weibo_results = xapian_search_weibo.search(
                    query=query_dict, fields=fields_list
                )  # weibo_results是在指定时间段、topic、message_type的微博匹配集
                for weibo_result in weibo_results():
                    if (weibo_result['timestamp'] <= first_timestamp):
                        first_timestamp = weibo_result['timestamp']
                        first_item = weibo_result
                    try:
                        if (len(weibo_result['geo'].split('.')) == 4):
                            city = IP2city(weibo_result['geo'])
                            if city:
                                try:
                                    ccount[city] += 1
                                except KeyError:
                                    ccount[city] = 1
                            else:
                                continue
                        else:
                            city = geo2city(weibo_result['geo'])
                            if city:
                                try:
                                    ccount[city] += 1
                                except KeyError:
                                    ccount[city] = 1
                            else:
                                continue
                    except:
                        continue

                    if (v == 1) or (v == 3):  # 只存储原创和转发
                        weibos.append(weibo_result)

                mtype_ccount[v] = [end_ts, ccount]
            save_rt_results(topic, mtype_ccount, during, first_item)

            sorted_weibos = sorted(weibos,
                                   key=lambda k: k[SORT_FIELD],
                                   reverse=True)
            sorted_weibos = sorted_weibos[:n_limit]
            save_ws_results(topic, end_ts, during, n_limit, sorted_weibos)

Example #23

0

Show file

File: cron_topic_sentiment.py Project: yangwangsmile/test2

def sentimentCronTopic(topic, weibos_list, start_ts, over_ts, sort_field=SORT_FIELD,
                       save_fields=RESP_ITER_KEYS, during=Fifteenminutes, w_limit=TOP_WEIBOS_LIMIT,
                       k_limit=TOP_KEYWORDS_LIMIT):
    import sys

    sys.path.append('../triple_classifier/')
    from triple_sentiment_classifier import triple_classifier

    start_ts = int(start_ts)
    over_ts = int(over_ts)

    over_ts = ts2HourlyTime(over_ts, during)
    interval = (over_ts - start_ts) / during
    logFile.write('start_ts: ' + str(start_ts) + '\r\n')
    logFile.write('over_ts: ' + str(over_ts) + '\r\n')
    logFile.write('during: ' + str(during) + '\r\n')
    logFile.write('interval: ' + str(interval) + '\r\n')

    for i in range(interval, 0, -1):
        begin_ts = over_ts - during * i
        end_ts = begin_ts + during

        emotions_count = {}
        emotions_kcount = {}
        emotions_weibo = {}
        emotions_rcount = {}
        weiboIDs = {}

        for k, v in emotions_kv.iteritems():
            zero = 0
            emotions_count[v] = [end_ts, 0]
            emotions_kcount[v] = [end_ts, '']
            emotions_weibo[v] = [end_ts, []]
            weiboIDs[v] = [end_ts, []]
            # print begin_ts, end_ts, 'topic %s starts calculate' % topic.encode('utf-8')
        slide = get_weibos_slide(weibos_list, begin_ts, end_ts)
        string = ['', '', '', '']

        emo0 = 0
        emo1 = 1

        for weibo in slide:
            sentiment, emo = triple_classifier(weibo)

            if sentiment != 0 and emo == 0:
                emo0 += 1
            elif sentiment != 0 and emo == 1:
                emo1 += 1
            # words = jieba.cut(weibo['text'], cut_all=False)
            weibo['sentiment'] = sentiment
            string[sentiment] = string[sentiment] + weibo['text']

            if sentiment != 0:
                emotions_count[sentiment][1] += 1
                #                kcount = emotions_kcount[sentiment][1]
                emotions_weibo[sentiment][1].append(weibo)
            else:
                zero += 1

        for k, v in emotions_kv.iteritems():
            #            sorted_kcount = sorted(emotions_kcount[v][1].iteritems(), key=lambda(k, v):v, reverse=False)
            #            sorted_kcount = { k: v for k, v in sorted_kcount[len(sorted_kcount)-k_limit:]}
            #            emotions_kcount[v][1] = sorted_kcount
            sorted_weibos = sorted(emotions_weibo[v][1], key=lambda i: i[sort_field], reverse=False)
            emotions_weibo[v][1] = sorted_weibos[len(sorted_weibos) - w_limit:]

            for item in emotions_weibo[v][1]:
                weiboIDs[v][1].append(item['key'])

            wordd = {}

            if string[v] != '':
                words = GetKeyWords(string[v].encode('utf-8'), 5, True)

            word_list = words.split('#')
            for word in word_list:
                token = word.split(r'/')
                if (len(token) == 3 and not (token[0] in STOPWORDS)):
                    #                    wordd.append({token[0]:token[2]})
                    wordd[token[0]] = token[2]
            emotions_kcount[v][1] = wordd

        print emo0, emo1
        print zero, emotions_count[1][1], emotions_count[2][1], emotions_count[3][1]
        save_rt_results('count', topic, emotions_count, during)
        save_rt_results('kcount', topic, emotions_kcount, during, k_limit, w_limit)
        save_rt_results('weibos', topic, weiboIDs, during, k_limit, w_limit)

        j = interval - i
        logFile.write('finish ' + str(j) + ' slide' + '\r\n')

Example #24

0

Show file

File: cron_topic_city.py Project: zengxy/info_consume

def cityTopic(topic,
              start_ts,
              over_ts,
              during=Fifteenminutes,
              n_limit=TOP_WEIBOS_LIMIT):
    if topic and topic != '':
        start_ts = int(start_ts)
        over_ts = int(over_ts)

        over_ts = ts2HourlyTime(over_ts, during)
        interval = (over_ts - start_ts) / during

        #topics = topic.strip().split(',')
        for i in range(interval, 0, -1):
            mtype_ccount = {}  # mtype为message_type，ccount为{city：count}
            begin_ts = over_ts - during * i
            end_ts = begin_ts + during
            print begin_ts, end_ts, topic
            weibos = []
            first_item = {}
            for k, v in mtype_kv.iteritems():  #v代表转发、评论、原创
                province_dict = {}
                city_dict = {}
                query_body = {  #按message_type得到微博
                    'query': {
                        'bool': {
                            'must': [{
                                'term': {
                                    'message_type': v
                                }
                            }, {
                                'range': {
                                    'timestamp': {
                                        'gte': begin_ts,
                                        'lt': end_ts
                                    }
                                }
                            }]
                        }
                    },
                    'sort': {
                        SORT_FIELD: {
                            "order": "desc"
                        }
                    },
                    'size': n_limit
                }
                mtype_weibo = weibo_es.search(index=topic,
                                              doc_type=weibo_index_type,
                                              body=query_body)['hits']['hits']
                #save_ws_results(topic, end_ts, during, n_limit, mtype_weibo)
                #微博直接保存下来
                if len(mtype_weibo) == 0:
                    continue
                first_item = mtype_weibo[0]['_source']
                #数每个地方的不同类型的数量
                for weibo in mtype_weibo:  #对于每条微博
                    try:
                        geo = weibo['_source']['geo'].encode('utf8')
                    except:
                        continue
                    #print geo,type(geo)
                    province, city = split_city(geo)
                    #print province,city
                    if province != 'unknown':
                        try:
                            province_dict[province][city] += 1
                            province_dict[province]['total'] += 1
                        except:
                            province_dict[province] = {}
                            province_dict[province][city] = 1
                            province_dict[province]['total'] = 1
                        save_ws_results(topic, end_ts, during, n_limit,
                                        province, city, weibo)
                        # try:
                        #     city_dict[city] += 1
                        # except:
                        #     city_dict[city] = 1
                        # try:
                        #     province_dict[province].append(city_dict)
                        # except:
                        #     province_dict[province] = []
                        #     province_dict[province].append(city_dict)
                        # try:
                        #     province_dict[province] += 1
                        # except:
                        #     province_dict[province] = 1
                # sorted_province_dict = sorted(province_dict.items(), key=lambda x: x[0], reverse=False)[:n_limit]  #就是x[0]
                # sorted_city_dict = sorted(city_dict.items(), key=lambda x: x[0], reverse=False)[:n_limit]
                # print sorted_province_dict
                # print sorted_city_dict
                ccount = province_dict
                # ccount['province'] = sorted_province_dict
                # ccount['city'] = sorted_city_dict
                mtype_ccount[v] = [
                    end_ts, ccount
                ]  #{'message_type':[shijian,{['province':('provice':cishu),()],'city':[(city:cishu)}]}
                #print mtype_ccount
                save_rt_results(topic, mtype_ccount, during, first_item)

Example #25

0

Show file

File: cron_topic_city.py Project: zhangxiangyuu/knowledge_revised

def cityTopic(topic,start_ts,over_ts,during=Fifteenminutes, n_limit=TOP_WEIBOS_LIMIT):
    if topic and topic != '':
        start_ts = int(start_ts)
        over_ts = int(over_ts)

        over_ts = ts2HourlyTime(over_ts, during)
        interval = (over_ts - start_ts) / during


        item_exist = es_event.get(index=event_analysis_name,doc_type=event_type,id=topic)['_source']
        try:
            geo_result = json.loads(item_exist['geo_results'])
        except:
            geo_result = {}


        #topics = topic.strip().split(',')
        for i in range(interval, 0, -1):
            mtype_ccount = {}  # mtype为message_type，ccount为{city：count}
            begin_ts = over_ts - during * i
            end_ts = begin_ts + during
            # print begin_ts,end_ts,topic
            weibos = []
            first_item = {}
            
            for k,v in mtype_kv.iteritems(): #v代表转发、评论、原创

                #geo_result['geo_cityCount'][end_ts][v] = []

                #geo_result = {}
                #city_dict = {}
                query_body = {   #按message_type得到微博
                    'query':{
                        'bool':{
                            'must':[
                                {'term':{'message_type':v}},  
                                # {'term':{'en_name':topic}},
                                {'range':{
                                    'timestamp':{'gte': begin_ts, 'lt':end_ts} 
                                }
                            }]
                        }
                    },
                    'sort':{SORT_FIELD:{"order":"desc"}},
                    'size':n_limit
                    }
                # print topic,event_text_type,query_body
                mtype_weibo = es_event.search(index=topic,doc_type=event_text_type,body=query_body)['hits']['hits']
                # print len(mtype_weibo)
                #save_ws_results(topic, end_ts, during, n_limit, mtype_weibo)    
                #微博直接保存下来
                # print '160',es_event,event_text,event_text_type,query_body,len(mtype_weibo)
                if len(mtype_weibo) == 0:
                    continue
                first_item = mtype_weibo[0]['_source']
                #数每个地方的不同类型的数量
                for weibo in mtype_weibo:  #对于每条微博
                    try:
                        geo = weibo['_source']['geo'].encode('utf8')
                    except:
                        continue
                    #print geo,type(geo)
                    province,city = split_city(geo)
                    #print province,city

                    
                    if province != 'unknown':
                        try:
                            geo_result[v][province][city]+=1
                            geo_result[v][province]['total']+=1
                        except:
                            try:
                                geo_result[v][province][city]=1
                                geo_result[v][province]['total']+=1
                            except:
                                try:
                                    geo_result[v][province]={city:1,'total':1}
                                except:
                                    try:
                                        geo_result[v]={province:{city:1,'total':1}}
                                    except:
                                        geo_result={v:{province:{city:1,'total':1}}}

                        
                    
                        # geo_result[v][province][city] += 1  
                        # try:
                        #     geo_result[v][province]['total'] += 1
                        # except:
                        #     try:
                        #         geo_result[v][province]['total']=1
                        #     except:
                        #         geo_result[v]={province:{'total':1}}

                                
                #geo_result[end_ts][v] = geo_result
                #print mtype_ccount   v:message type
                #save_rt_results(topic, mtype_ccount, during, first_item)

        save_rt_results_es(topic, geo_result)

        return geo_result