Python top_keywordsの例、xapian_case.utils.top_keywords Pythonの例

コード例 #1

0

ファイルを表示

def sentimentCronTopic(topic, xapian_search_weibo, start_ts, over_ts, sort_field=SORT_FIELD, save_fields=RESP_ITER_KEYS, \
    during=Fifteenminutes, w_limit=TOP_WEIBOS_LIMIT, k_limit=TOP_KEYWORDS_LIMIT):
    if topic and topic != '':
        start_ts = int(start_ts)
        over_ts = int(over_ts)

        over_ts = ts2HourlyTime(over_ts, during)
        interval = (over_ts - start_ts) / during

        for i in range(interval, 0, -1):
            emotions_kcount = {}
            emotions_count = {}
            emotions_weibo = {}

            begin_ts = over_ts - during * i
            end_ts = begin_ts + during
            print begin_ts, end_ts, 'topic %s starts calculate' % topic.encode(
                'utf-8')

            query_dict = {
                'timestamp': {
                    '$gt': begin_ts,
                    '$lt': end_ts
                },
                '$and': [
                    {
                        '$or': [{
                            'message_type': 1
                        }, {
                            'message_type': 3
                        }]
                    },
                ]
            }
            for k, v in emotions_kv.iteritems():
                query_dict['sentiment'] = v
                count, results = xapian_search_weibo.search(query=query_dict,
                                                            fields=save_fields)

                mset = xapian_search_weibo.search(query=query_dict, sort_by=[sort_field], \
                                                  max_offset=w_limit, mset_direct=True)

                kcount = top_keywords(gen_mset_iter(xapian_search_weibo,
                                                    mset,
                                                    fields=['terms']),
                                      top=k_limit)
                top_ws = top_weibos(results, top=w_limit)

                emotions_count[v] = [end_ts, count]
                emotions_kcount[v] = [end_ts, kcount]
                emotions_weibo[v] = [end_ts, top_ws]

            save_rt_results('count', topic, emotions_count, during)
            save_rt_results('kcount', topic, emotions_kcount, during, k_limit,
                            w_limit)
            save_rt_results('weibos', topic, emotions_weibo, during, k_limit,
                            w_limit)

コード例 #2

0

ファイルを表示

ファイル: cron_topic_propagate.py プロジェクト: NeilWang6/case

def propagateCronTopic(topic, xapian_search_weibo, start_ts, over_ts, sort_field=SORT_FIELD, \
    save_fields=RESP_ITER_KEYS, during=Fifteenminutes, w_limit=TOP_WEIBOS_LIMIT, k_limit=TOP_KEYWORDS_LIMIT):
    if topic and topic != '':
        start_ts = int(start_ts)
        over_ts = int(over_ts)
        over_ts = ts2HourlyTime(over_ts, during)
        interval = (over_ts - start_ts) / during

        for i in range(interval, 0, -1):
            begin_ts = over_ts - during * i
            end_ts = begin_ts + during
            print begin_ts, end_ts, 'topic %s starts calculate' % topic.encode(
                'utf-8')

            mtype_count = {}
            mtype_kcount = {}  # mtype_kcount={mtype:[terms]}
            mtype_weibo = {}  # mtype_weibo={mtype:weibo}

            query_dict = {'timestamp': {'$gt': begin_ts, '$lt': end_ts}}

            for k, v in mtype_kv.iteritems():
                query_dict['message_type'] = v

                count, results = xapian_search_weibo.search(query=query_dict,
                                                            fields=fields_list)

                mset = xapian_search_weibo.search(query=query_dict, sort_by=[sort_field], \
                                                  max_offset=w_limit, mset_direct=True)

                kcount = top_keywords(gen_mset_iter(xapian_search_weibo,
                                                    mset,
                                                    fields=['terms']),
                                      top=k_limit)
                top_ws = top_weibos(results, top=w_limit)

                mtype_count[v] = [end_ts, count]
                mtype_kcount[v] = [end_ts, kcount]
                mtype_weibo[v] = [end_ts, top_ws]

            save_pc_results(topic, mtype_count, during)
            save_kc_results(topic, mtype_kcount, during, k_limit)
            save_ws_results(topic, mtype_weibo, during, w_limit)

コード例 #3

0

ファイルを表示

ファイル: cron_topic_sentiment.py プロジェクト: huxiaoqian/case

def sentimentCronTopic(topic, xapian_search_weibo, start_ts, over_ts, sort_field=SORT_FIELD, save_fields=RESP_ITER_KEYS, \
    during=Fifteenminutes, w_limit=TOP_WEIBOS_LIMIT, k_limit=TOP_KEYWORDS_LIMIT):
    if topic and topic != '':
        start_ts = int(start_ts)
        over_ts = int(over_ts)

        over_ts = ts2HourlyTime(over_ts, during)
        interval = (over_ts - start_ts) / during

        for i in range(interval, 0, -1):
            emotions_kcount = {}
            emotions_count = {}
            emotions_weibo = {}

            begin_ts = over_ts - during * i
            end_ts = begin_ts + during
            print begin_ts, end_ts, 'topic %s starts calculate' % topic.encode('utf-8')

            query_dict = {
                'timestamp': {'$gt': begin_ts, '$lt': end_ts},
                '$and': [
                    {'$or': [{'message_type': 1}, {'message_type': 3}]},
                ]
            }
            for k, v in emotions_kv.iteritems():
                query_dict['sentiment'] = v
                count, results = xapian_search_weibo.search(query=query_dict, fields=save_fields)

                mset = xapian_search_weibo.search(query=query_dict, sort_by=[sort_field], \
                                                  max_offset=w_limit, mset_direct=True)

                kcount = top_keywords(gen_mset_iter(xapian_search_weibo, mset, fields=['terms']), top=k_limit)
                top_ws = top_weibos(results, top=w_limit)

                emotions_count[v] = [end_ts, count]
                emotions_kcount[v] = [end_ts, kcount]
                emotions_weibo[v] = [end_ts, top_ws]

            save_rt_results('count', topic, emotions_count, during)
            save_rt_results('kcount', topic, emotions_kcount, during, k_limit, w_limit)
            save_rt_results('weibos', topic, emotions_weibo, during, k_limit, w_limit)

コード例 #4

0

ファイルを表示

ファイル: cron_topic_propagate.py プロジェクト: huxiaoqian/case

def propagateCronTopic(topic, xapian_search_weibo, start_ts, over_ts, sort_field=SORT_FIELD, \
    save_fields=RESP_ITER_KEYS, during=Fifteenminutes, w_limit=TOP_WEIBOS_LIMIT, k_limit=TOP_KEYWORDS_LIMIT):
    if topic and topic != '':
        start_ts = int(start_ts)
        over_ts = int(over_ts)
        over_ts = ts2HourlyTime(over_ts, during)
        interval = (over_ts - start_ts) / during

        for i in range(interval, 0, -1):
            begin_ts = over_ts - during * i
            end_ts = begin_ts + during
            print begin_ts, end_ts, 'topic %s starts calculate' % topic.encode('utf-8')
            
            mtype_count = {}
            mtype_kcount = {} # mtype_kcount={mtype:[terms]}
            mtype_weibo = {} # mtype_weibo={mtype:weibo}

            query_dict = {
                'timestamp': {'$gt': begin_ts, '$lt': end_ts}
            }

            for k, v in mtype_kv.iteritems():
                query_dict['message_type'] = v
                
                count, results = xapian_search_weibo.search(query=query_dict, fields=fields_list)

                mset = xapian_search_weibo.search(query=query_dict, sort_by=[sort_field], \
                                                  max_offset=w_limit, mset_direct=True)

                kcount = top_keywords(gen_mset_iter(xapian_search_weibo, mset, fields=['terms']), top=k_limit)
                top_ws = top_weibos(results, top=w_limit)

                mtype_count[v] = [end_ts, count]
                mtype_kcount[v] = [end_ts, kcount]
                mtype_weibo[v] = [end_ts, top_ws]

            save_pc_results(topic, mtype_count, during)
            save_kc_results(topic, mtype_kcount, during, k_limit)
            save_ws_results(topic, mtype_weibo, during, w_limit)

コード例 #5

0

ファイルを表示

def cron_index_topic(topic, begin_ts = BEGIN_TS, end_ts = END_TS):
    if topic and topic != '':
        topic = topic.strip()
        query_dict = {
                'timestamp':{'$gt':begin_ts,'$lt':end_ts},
                'topics':topic
                }

        count,results = s.search(query=query_dict, sort_by=[SORT_FIELD], fields=RESP_ITER_KEYS)
        count2,results2 = s.search(query=query_dict, sort_by=[SORT_FIELD], fields=RESP_ITER_KEYS)
        user_raw_list = []
        time_list = []
        sublist_by_time=[]
        city_dict = {}


        # 测试city_dict是否可用
        # city = geo2city('219.224.135.46')
        # print city,city_dict.setdefault(city,0)
        # city_dict[city] += 1
        # print 'city_dict[city]',city_dict[city]
        print 'count',count

        for r in results():
            user_raw_list.append(r['user']) # 原始用户列表

            time_list.append(r['timestamp']) # 时间列表

            sublist_by_time.append((r['timestamp'],r['reposts_count'],r['user'],r['source'],r['text'],r['comments_count'],r['geo'])) # 部分字段构成子列表

            city = geo2city(r['geo'])
            # print city
            city_dict.setdefault(city,0)
            city_dict[city] += 1 # 字典相应键值加1
            # print 'city_dict[city]',city_dict[city]
            # print '_id',r['_id']
            # print 'user',r['user']
            # print 'timestamp',ts2datetime_full(r['timestamp'])
            # print 'reposts_count',r['reposts_count']

        user_count = len(list(set(user_raw_list))) # 去重后的参与人数
        print 'user_count',user_count

        topic_begin_ts = ts2datetime_full(time_list[0])
        topic_end_ts = ts2datetime_full(time_list[-1])
        print 'topic_begin_ts',topic_begin_ts
        print 'topic_end_ts',topic_end_ts

        final_list_by_time = select_by_time(sublist_by_time,O_LIMIT)
        final_list_by_media =select_by_media(sublist_by_time, M_LIMIT)
        print 'final_list_by_media'
        for item in final_list_by_media:
            print 'timestamp', ts2datetime_full(item['timestamp'])
            print 'reposts_count',item['reposts_count']
            print 'user',item['user']
            print 'domain',uid2domain(item['user'])
            print 'source',item['source']
            print 'text',item['text']
            print 'comments_count', item['comments_count']
            print 'geo', item['geo']
            print 'username', item['username']
            print 'profile_image_url', item['profile_image_url']

        print 'final_list_by_time'
        for item in final_list_by_time:
            print 'timestamp', ts2datetime_full(item['timestamp'])
            print 'reposts_count',item['reposts_count']
            print 'user',item['user']
            print 'source',item['source']
            print 'text',item['text']
            print 'comments_count', item['comments_count']
            print 'geo', item['geo']
            print 'username', item['username']
            print 'profile_image_url', item['profile_image_url']

        top_city_list = top_city(city_dict)
        '''
        for city in top_city_list:
            print 'top_city_list',city
        '''

        top_keywords_list = top_keywords(results2, top = K_LIMIT)
        '''
        print 'top_keywords'
        for keyword in top_keywords_list:
            print 'keyword',keyword[0].decode('utf-8').encode('utf-8'),keyword[1]
        '''
        save_rt_results(topic, count, user_count, time_list,\
                top_city_list, top_keywords_list, final_list_by_time, final_list_by_media)

コード例 #6

0

ファイルを表示

ファイル: community_information.py プロジェクト: NeilWang6/case

def community_result(community_user_list, topic, date, windowsize):
    #change
    end_ts = datetime2ts(date)
    start_ts = end_ts - windowsize * Day
    xapian_search_weibo = getXapianWeiboByTopic(topic, start_ts, end_ts)
    query_dict = {'$or': []}
    for uid in community_user_list:
        query_dict['$or'].append({'user': int(uid)})
    community_info = []
    count, weibo_results = xapian_search_weibo.search(query=query_dict,
                                                      fields=weibo_fields_list)
    if count == 0:
        return None, None, None
    sentiment_count = {}
    for weibo in weibo_results():
        uid = weibo['user']
        _id = weibo['_id']
        result = user_search.search_by_id(uid, fields=user_fields_list)
        if result:
            name = result['name']
            location = result['location']
            friends_count = result['friends_count']
            followers_count = result['followers_count']
            created_at = result['created_at']
            statuses_count = result['statuses_count']
            profile_image_url = result['profile_image_url']
        else:
            name = u'未知'
            location = u'未知'
            friends_count = u'未知'
            followers_count = u'未知'
            created_at = u'未知'
            statuses_count = u'未知'
            profile_image_url = u'no'

        text = weibo['text']
        timestamp = weibo['timestamp']
        date = ts2date(timestamp)
        reposts_count = weibo['reposts_count']
        source = weibo['source']
        geo = weibo['geo']
        comments_count = weibo['comments_count']
        sentiment = weibo['sentiment']
        sentiment_name = emotions_kv[sentiment]
        weibo_link = weiboinfo2url(uid, _id)
        domain = uid2domain(uid)

        try:
            sentiment_count[sentiment] += 1
        except KeyError:
            sentiment_count[sentiment] = 1
        community_info.append([
            _id, name, location, friends_count, followers_count, created_at,
            statuses_count, profile_image_url, text, date, reposts_count,
            source, geo, comments_count, sentiment_name, weibo_link, domain
        ])

    sort_community_info = sorted(community_info,
                                 key=lambda x: x[10],
                                 reverse=True)  #以转发量排序

    mset = xapian_search_weibo.search(query=query_dict,
                                      max_offset=50,
                                      mset_direct=True)
    top_keyword = top_keywords(gen_mset_iter(xapian_search_weibo,
                                             mset,
                                             fields=['terms']),
                               top=50)

    sort_top_keyword = sorted(top_keyword, key=lambda x: x[1], reverse=True)

    new_sentiment_list = []
    for sentiment in sentiment_count:
        sentiment_ch = emotions_kv[int(sentiment)]
        num = sentiment_count[sentiment]
        ratio = float(num) / float(count)
        new_sentiment_list.append([sentiment_ch, num, ratio])

    return sort_community_info, sort_top_keyword, new_sentiment_list, query_dict

コード例 #7

0

ファイルを表示

ファイル: neighbor_util.py プロジェクト: huxiaoqian/case

def get_info(neighbor_list, topic, date, windowsize):
    end_ts = datetime2ts(date)
    start_ts = end_ts - windowsize * Day
    xapian_search_weibo = getXapianWeiboByTopic(topic, start_ts, end_ts)
    query_dict = {
        '$or' : []
        }
    for uid in neighbor_list:
        query_dict['$or'].append({'user': int(uid)})
    neighbor_info = []
    count, weibo_results = xapian_search_weibo.search(query=query_dict, fields= weibo_fields_list)
    if count==0:
        return None, None, None
    sentiment_count = {}
    for weibo in weibo_results():
        uid = weibo['user']
        _id = weibo['_id']
        result = user_search.search_by_id(uid, fields=user_fields_list)
        if result:
            name = result['name']
            location = result['location']
            friends_count = result['friends_count']
            followers_count = result['followers_count']
            created_at = result['created_at']
            statuses_count = result['statuses_count']
            profile_image_url = result['profile_image_url']
        else:
            name = u'未知'
            location = u'未知'
            friends_count = u'未知'
            followers_count = u'未知'
            created_at = u'未知'
            statuses_count = u'未知'
            profile_image_url = u'no'
            
        text = weibo['text']
        timestamp = weibo['timestamp']
        date = ts2date(timestamp)
        reposts_count = weibo['reposts_count']
        source = weibo['source']
        geo = weibo['geo']
        comments_count = weibo['comments_count']
        sentiment = weibo['sentiment']
        sentiment_name = emotions_kv[sentiment]
        weibo_link = weiboinfo2url(uid, _id)

        try:
            sentiment_count[sentiment] += 1
        except KeyError:
            sentiment_count[sentiment] = 1
        neighbor_info.append([_id, name, location, friends_count, followers_count, created_at, statuses_count, profile_image_url, text, date, reposts_count, source, geo, comments_count, sentiment_name,weibo_link, uid])
    
    sort_neighbor_info = sorted(neighbor_info, key=lambda x:x[10], reverse=True) #以转发量排序
    
    mset = xapian_search_weibo.search(query=query_dict, max_offset=50, mset_direct=True)
    top_keyword = top_keywords(gen_mset_iter(xapian_search_weibo, mset, fields=['terms']), top=50)

    sort_top_keyword = sorted(top_keyword, key=lambda x:x[1], reverse=True)

    new_sentiment_list = []
    for sentiment in sentiment_count:
        sentiment_ch = emotions_kv[int(sentiment)]
        num = sentiment_count[sentiment]
        ratio = float(num) / float(count)
        new_sentiment_list.append([sentiment_ch, num, ratio])
   
    return sort_neighbor_info, sort_top_keyword, new_sentiment_list, query_dict

コード例 #8

0

ファイルを表示

ファイル: community_information.py プロジェクト: huxiaoqian/case

def community_result(community_user_list, topic, date, windowsize):
    # change
    end_ts = datetime2ts(date)
    start_ts = end_ts - windowsize * Day
    xapian_search_weibo = getXapianWeiboByTopic(topic, start_ts, end_ts)
    query_dict = {"$or": []}
    for uid in community_user_list:
        query_dict["$or"].append({"user": int(uid)})
    community_info = []
    count, weibo_results = xapian_search_weibo.search(query=query_dict, fields=weibo_fields_list)
    if count == 0:
        return None, None, None
    sentiment_count = {}
    for weibo in weibo_results():
        uid = weibo["user"]
        _id = weibo["_id"]
        result = user_search.search_by_id(uid, fields=user_fields_list)
        if result:
            name = result["name"]
            location = result["location"]
            friends_count = result["friends_count"]
            followers_count = result["followers_count"]
            created_at = result["created_at"]
            statuses_count = result["statuses_count"]
            profile_image_url = result["profile_image_url"]
        else:
            name = u"未知"
            location = u"未知"
            friends_count = u"未知"
            followers_count = u"未知"
            created_at = u"未知"
            statuses_count = u"未知"
            profile_image_url = u"no"

        text = weibo["text"]
        timestamp = weibo["timestamp"]
        date = ts2date(timestamp)
        reposts_count = weibo["reposts_count"]
        source = weibo["source"]
        geo = weibo["geo"]
        comments_count = weibo["comments_count"]
        sentiment = weibo["sentiment"]
        sentiment_name = emotions_kv[sentiment]
        weibo_link = weiboinfo2url(uid, _id)
        domain = uid2domain(uid)

        try:
            sentiment_count[sentiment] += 1
        except KeyError:
            sentiment_count[sentiment] = 1
        community_info.append(
            [
                _id,
                name,
                location,
                friends_count,
                followers_count,
                created_at,
                statuses_count,
                profile_image_url,
                text,
                date,
                reposts_count,
                source,
                geo,
                comments_count,
                sentiment_name,
                weibo_link,
                domain,
            ]
        )

    sort_community_info = sorted(community_info, key=lambda x: x[10], reverse=True)  # 以转发量排序

    mset = xapian_search_weibo.search(query=query_dict, max_offset=50, mset_direct=True)
    top_keyword = top_keywords(gen_mset_iter(xapian_search_weibo, mset, fields=["terms"]), top=50)

    sort_top_keyword = sorted(top_keyword, key=lambda x: x[1], reverse=True)

    new_sentiment_list = []
    for sentiment in sentiment_count:
        sentiment_ch = emotions_kv[int(sentiment)]
        num = sentiment_count[sentiment]
        ratio = float(num) / float(count)
        new_sentiment_list.append([sentiment_ch, num, ratio])

    return sort_community_info, sort_top_keyword, new_sentiment_list, query_dict