def sentiment_keywords(xapian_search_weibo, start_ts, over_ts, during=Hour, sort_field='reposts_count', save_fields=RESP_ITER_KEYS, w_limit=TOP_WEIBOS_LIMIT, k_limit=TOP_KEYWORDS_LIMIT): start_ts = int(start_ts) over_ts = int(over_ts) over_ts = ts2HourlyTime(over_ts, during) interval = (over_ts - start_ts) / during for i in range(interval, 0, -1): emotions_data = {} emotions_weibo = {} begin_ts = over_ts - during * i end_ts = begin_ts + during print begin_ts, end_ts, ' starts calculate' query_dict = { 'timestamp': {'$gt': begin_ts, '$lt': end_ts}, } for k, v in emotions_kv.iteritems(): query_dict['sentiment'] = v print xapian_search_weibo.search(query=query_dict, count_only=True) mset = xapian_search_weibo.search(query=query_dict, sort_by=[sort_field], max_offset=w_limit, mset_direct=True) top_ws = top_weibos(gen_mset_iter(xapian_search_weibo, mset, fields=save_fields), top=w_limit) keywords_with_count = top_keywords(gen_mset_iter(xapian_search_weibo, mset, fields=['terms']), top=k_limit) emotions_data[v] = [end_ts, keywords_with_count] emotions_weibo[v] = [end_ts, top_ws] print k, v, ', emotion keywords length: ', len(keywords_with_count), ', emotion weibos length: ', len(top_ws) print date, '%s %s saved emotions keywords and weibos' % (begin_ts, end_ts) save_count_results(emotions_data, during, TOP_KEYWORDS_LIMIT) save_weibos_results(emotions_weibo, during, TOP_WEIBOS_LIMIT)
def sentimentCronTopic(topic, xapian_search_weibo, start_ts, over_ts, sort_field=SORT_FIELD, save_fields=RESP_ITER_KEYS, during=Fifteenminutes, w_limit=TOP_WEIBOS_LIMIT, k_limit=TOP_KEYWORDS_LIMIT): if topic and topic != '': start_ts = int(start_ts) over_ts = int(over_ts) over_ts = ts2HourlyTime(over_ts, during) interval = (over_ts - start_ts) / during topics = topic.strip().split(',') for i in range(interval, 0, -1): emotions_count = {} emotions_kcount = {} emotions_weibo = {} begin_ts = over_ts - during * i end_ts = begin_ts + during print begin_ts, end_ts, 'topic %s starts calculate' % topic.encode('utf-8') query_dict = { 'timestamp': {'$gt': begin_ts, '$lt': end_ts}, '$or': [] } for topic in topics: query_dict['$or'].append({'text': topic}) for k, v in emotions_kv.iteritems(): query_dict['sentiment'] = v scount = xapian_search_weibo.search(query=query_dict, count_only=True) mset = xapian_search_weibo.search(query=query_dict, sort_by=[sort_field], \ max_offset=w_limit, mset_direct=True) kcount = top_keywords(gen_mset_iter(xapian_search_weibo, mset, fields=['terms']), top=k_limit) top_ws = top_weibos(gen_mset_iter(xapian_search_weibo, mset, fields=save_fields), top=w_limit) emotions_count[v] = [end_ts, scount] emotions_kcount[v] = [end_ts, kcount] emotions_weibo[v] = [end_ts, top_ws] print k, v, ', emotions count: ', emotions_count, ', emotion keywords length: ', len(kcount), ', emotion weibos length: ', len(top_ws) print '%s %s saved emotions counts, keywords and weibos' % (begin_ts, end_ts) save_rt_results('count', topic, emotions_count, during) save_rt_results('kcount', topic, emotions_kcount, during, k_limit, w_limit) save_rt_results('weibos', topic, emotions_weibo, during, k_limit, w_limit)
def sentiment_field(domain, xapian_search_weibo, start_ts, over_ts, sort_field='reposts_count', save_fields=RESP_ITER_KEYS, during=Hour, w_limit=TOP_WEIBOS_LIMIT, k_limit=TOP_KEYWORDS_LIMIT): if domain_uids != []: start_ts = int(start_ts) over_ts = int(over_ts) over_ts = ts2HourlyTime(over_ts, during) interval = (over_ts - start_ts) / during for i in range(interval, 0, -1): emotions_count = {} emotions_kcount = {} emotions_weibo = {} begin_ts = over_ts - during * i end_ts = begin_ts + during print begin_ts, end_ts, 'domain %s starts calculate' % domain query_dict = { 'timestamp': {'$gt': begin_ts, '$lt': end_ts}, '$or': [] } for uid in domain_uids: query_dict['$or'].append({'user': uid}) for k, v in emotions_kv.iteritems(): query_dict['sentiment'] = v scount = xapian_search_weibo.search(query=query_dict, count_only=True) mset = xapian_search_weibo.search(query=query_dict, sort_by=[sort_field], \ max_offset=w_limit, mset_direct=True) kcount = top_keywords(gen_mset_iter(xapian_search_weibo, mset, fields=['terms']), top=k_limit) top_ws = top_weibos(gen_mset_iter(xapian_search_weibo, mset, fields=save_fields), top=w_limit) emotions_count[v] = [end_ts, scount] emotions_kcount[v] = [end_ts, kcount] emotions_weibo[v] = [end_ts, top_ws] print k, v, ', emotions count: ', emotions_count, ', emotion keywords length: ', len(kcount), ', emotion weibos length: ', len(top_ws) print domain, date, ' %s %s saved emotions counts, keywords and weibos' % (begin_ts, end_ts) save_count_results(DOMAIN_LIST.index(domain), emotions_count, during) save_kcount_results(DOMAIN_LIST.index(domain), emotions_kcount, during, TOP_KEYWORDS_LIMIT) save_weibos_results(DOMAIN_LIST.index(domain), emotions_weibo, during, TOP_WEIBOS_LIMIT)
def sentimentRealTimeTopic(query, start_ts, end_ts, save_fields=RESP_ITER_KEYS, during=Fifteenminutes, calc='all', w_limit=TOP_WEIBOS_LIMIT, k_limit=TOP_KEYWORDS_LIMIT, sort_field=SORT_FIELD): if query and query != '': start_ts = int(start_ts) over_ts = int(end_ts) over_ts = ts2HourlyTime(over_ts, during) interval = (over_ts - start_ts) / during for i in range(interval, 0, -1): emotions_count = {} emotions_kcount = {} emotions_weibo = {} begin_ts = over_ts - during * i xapian_datestr = datetime.date.fromtimestamp(begin_ts).isoformat() xapian_search_weibo = getXapianWeiboByDate(xapian_datestr.replace('-', '')) if not xapian_search_weibo: return end_ts = begin_ts + during print begin_ts, end_ts, 'topic realtime %s starts calculate' % query.encode('utf-8') query_dict = { 'timestamp': {'$gt': begin_ts, '$lt': end_ts}, '$or': [] } for term in query.strip().split(','): if term: query_dict['$or'].append({'text': [term]}) if calc == 'all': for k, v in emotions_kv.iteritems(): query_dict['sentiment'] = v scount = xapian_search_weibo.search(query=query_dict, count_only=True) mset = xapian_search_weibo.search(query=query_dict, sort_by=[sort_field], \ max_offset=w_limit, mset_direct=True) kcount = top_keywords(gen_mset_iter(xapian_search_weibo, mset, fields=['terms']), top=k_limit) top_ws = top_weibos(gen_mset_iter(xapian_search_weibo, mset, fields=save_fields), top=w_limit) emotions_count[v] = [end_ts, scount] emotions_kcount[v] = [end_ts, kcount] emotions_weibo[v] = [end_ts, top_ws] print k, v, ', emotions count: ', emotions_count, ', keywords length: ', len(kcount), ', weibos count: ', len(top_ws) print 'save emotions count, keywords and weibo' save_rt_results('count', query, emotions_count, during) save_rt_results('kcount', query, emotions_kcount, during, klimit=k_limit) save_rt_results('weibos', query, emotions_weibo, during, wlimit=w_limit) elif calc == 'count': for k, v in emotions_kv.iteritems(): query_dict['sentiment'] = v scount = xapian_search_weibo.search(query=query_dict, count_only=True) emotions_count[v] = [end_ts, scount] save_rt_results('count', query, emotions_count, during) else: if calc == 'kcount': for k, v in emotions_kv.iteritems(): query_dict['sentiment'] = v count, get_results = xapian_search_weibo.search(query=query_dict, fields=RESP_ITER_KEYS, \ sort_by=[SORT_FIELD], max_offset=w_limit) kcount = top_keywords(get_results, top=k_limit) emotions_kcount[v] = [end_ts, kcount] save_rt_results('kcount', query, emotions_kcount, during, TOP_KEYWORDS_LIMIT) if calc == 'weibos': for k, v in emotions_kv.iteritems(): query_dict['sentiment'] = v count, get_results = xapian_search_weibo.search(query=query_dict, fields=RESP_ITER_KEYS, \ sort_by=[sort_field], max_offset=w_limit) top_ws = top_weibos(get_results, top=w_limit) emotions_weibo[v] = [end_ts, top_ws] save_rt_results('weibos', query, emotions_weibo, during, TOP_WEIBOS_LIMIT)