Ejemplo n.º 1
0
def keywords_data(area='global'):
    """
    /keywords_data 接口已备好,只是差领域数据
    """
    query = request.args.get('query', '')
    query = query.strip()
    ts = request.args.get('ts', '')
    ts = long(ts)
    emotion = request.args.get('emotion', 'all')
    during = request.args.get('during', 24*3600)
    during = int(during)

    begin_ts = ts - during
    end_ts = ts

    query_dict = {
        'timestamp': {'$gt': begin_ts, '$lt': end_ts},
    }
    if emotion != 'all':
        query_dict['sentiment'] = emotions_kv[emotion]
    if query:
        query_dict['$or'] = []
        for term in query.split(','):
            if term:
                query_dict['$or'].append({'text': term})

    count, get_results = xapian_search_sentiment.search(query=query_dict, max_offset=100000, sort_by=['-reposts_count'], fields=['terms'])
    keywords_with_count = top_keywords(get_results, top=50)
    keywords_with_count = [list(i) for i in keywords_with_count]

    return json.dumps(keywords_with_count)
Ejemplo n.º 2
0
def getPeaksInfo(sorted_peak_x, peak_x, ts_lis, query):
    time_lis = {}
    for i in peak_x:
        ts = ts_lis[i]
        during = 24 * 3600
        begin_ts = ts - during
        end_ts = ts
        title_text = {'happy': [], 'angry': [], 'sad': []}
        title = {'happy': 'A', 'angry': 'B', 'sad': 'C'}
        for emotion in emotions_kv.keys():
            query_dict = {
                'timestamp': {'$gt': begin_ts, '$lt': end_ts},
                'sentiment': emotions_kv[emotion],
                '$or': []
            }
            for term in query.split(','):
                if term:
                    query_dict['$or'].append({'text': [term]})     
            count, get_results = xapian_search_sentiment.search(query=query_dict, fields=['terms', 'text', 'user'])
            keywords_with_10count = top_keywords(get_results, top=10)
            title_text[emotion] = ','.join([tp[0] for tp in keywords_with_10count])
            title[emotion] = title[emotion] + str(sorted_peak_x.index(i))

        time_lis[i] = {
            'ts': end_ts * 1000,
            'title': title,
            'text': title_text
        }

    return time_lis
Ejemplo n.º 3
0
uids = set()
for r in get_results():
    uids.add(r['user'])

print len(uids)
"""

print 'query5:'
begin_ts1 = time.mktime(datetime.datetime(2013, 1, 1).timetuple())

query_dict = {
    'timestamp': {'$gt': begin_ts1, '$lt': begin_ts1 + 3600},
}
count, get_results = s.search(query=query_dict, fields=['terms'])
print count
print top_keywords(get_results, top=10)

# 下面的用法由于接口的修改暂时没有维护, 但具有参考价值
"""
print 'query2:'
query_dict = {'$and': [{'text': [u'中国'], 'uid': 1217743083},
                       {'uid': 1217743083},
                       {'$or': [{'ts': {'gt': 0,
                                        'lt': 1334450340}},
                                {'uid': 0000000000}]}],
              '$not': {'text': u'宝马', 'name': u'白之兔'},
              'name': u'袁岳'
              }

results = s.search(query=query_dict, sort_by=['-ts'], fields=['text', 'ts', 'name'])