def sentimentCronTopic(topic, xapian_search_weibo, start_ts, over_ts, sort_field=SORT_FIELD, save_fields=RESP_ITER_KEYS, \ during=Fifteenminutes, w_limit=TOP_WEIBOS_LIMIT, k_limit=TOP_KEYWORDS_LIMIT): if topic and topic != '': start_ts = int(start_ts) over_ts = int(over_ts) over_ts = ts2HourlyTime(over_ts, during) interval = (over_ts - start_ts) / during for i in range(interval, 0, -1): emotions_kcount = {} emotions_count = {} emotions_weibo = {} begin_ts = over_ts - during * i end_ts = begin_ts + during print begin_ts, end_ts, 'topic %s starts calculate' % topic.encode( 'utf-8') query_dict = { 'timestamp': { '$gt': begin_ts, '$lt': end_ts }, '$and': [ { '$or': [{ 'message_type': 1 }, { 'message_type': 3 }] }, ] } for k, v in emotions_kv.iteritems(): query_dict['sentiment'] = v count, results = xapian_search_weibo.search(query=query_dict, fields=save_fields) mset = xapian_search_weibo.search(query=query_dict, sort_by=[sort_field], \ max_offset=w_limit, mset_direct=True) kcount = top_keywords(gen_mset_iter(xapian_search_weibo, mset, fields=['terms']), top=k_limit) top_ws = top_weibos(results, top=w_limit) emotions_count[v] = [end_ts, count] emotions_kcount[v] = [end_ts, kcount] emotions_weibo[v] = [end_ts, top_ws] save_rt_results('count', topic, emotions_count, during) save_rt_results('kcount', topic, emotions_kcount, during, k_limit, w_limit) save_rt_results('weibos', topic, emotions_weibo, during, k_limit, w_limit)
def propagateCronTopic(topic, xapian_search_weibo, start_ts, over_ts, sort_field=SORT_FIELD, \ save_fields=RESP_ITER_KEYS, during=Fifteenminutes, w_limit=TOP_WEIBOS_LIMIT, k_limit=TOP_KEYWORDS_LIMIT): if topic and topic != '': start_ts = int(start_ts) over_ts = int(over_ts) over_ts = ts2HourlyTime(over_ts, during) interval = (over_ts - start_ts) / during for i in range(interval, 0, -1): begin_ts = over_ts - during * i end_ts = begin_ts + during print begin_ts, end_ts, 'topic %s starts calculate' % topic.encode( 'utf-8') mtype_count = {} mtype_kcount = {} # mtype_kcount={mtype:[terms]} mtype_weibo = {} # mtype_weibo={mtype:weibo} query_dict = {'timestamp': {'$gt': begin_ts, '$lt': end_ts}} for k, v in mtype_kv.iteritems(): query_dict['message_type'] = v count, results = xapian_search_weibo.search(query=query_dict, fields=fields_list) mset = xapian_search_weibo.search(query=query_dict, sort_by=[sort_field], \ max_offset=w_limit, mset_direct=True) kcount = top_keywords(gen_mset_iter(xapian_search_weibo, mset, fields=['terms']), top=k_limit) top_ws = top_weibos(results, top=w_limit) mtype_count[v] = [end_ts, count] mtype_kcount[v] = [end_ts, kcount] mtype_weibo[v] = [end_ts, top_ws] save_pc_results(topic, mtype_count, during) save_kc_results(topic, mtype_kcount, during, k_limit) save_ws_results(topic, mtype_weibo, during, w_limit)
def sentimentCronTopic(topic, xapian_search_weibo, start_ts, over_ts, sort_field=SORT_FIELD, save_fields=RESP_ITER_KEYS, \ during=Fifteenminutes, w_limit=TOP_WEIBOS_LIMIT, k_limit=TOP_KEYWORDS_LIMIT): if topic and topic != '': start_ts = int(start_ts) over_ts = int(over_ts) over_ts = ts2HourlyTime(over_ts, during) interval = (over_ts - start_ts) / during for i in range(interval, 0, -1): emotions_kcount = {} emotions_count = {} emotions_weibo = {} begin_ts = over_ts - during * i end_ts = begin_ts + during print begin_ts, end_ts, 'topic %s starts calculate' % topic.encode('utf-8') query_dict = { 'timestamp': {'$gt': begin_ts, '$lt': end_ts}, '$and': [ {'$or': [{'message_type': 1}, {'message_type': 3}]}, ] } for k, v in emotions_kv.iteritems(): query_dict['sentiment'] = v count, results = xapian_search_weibo.search(query=query_dict, fields=save_fields) mset = xapian_search_weibo.search(query=query_dict, sort_by=[sort_field], \ max_offset=w_limit, mset_direct=True) kcount = top_keywords(gen_mset_iter(xapian_search_weibo, mset, fields=['terms']), top=k_limit) top_ws = top_weibos(results, top=w_limit) emotions_count[v] = [end_ts, count] emotions_kcount[v] = [end_ts, kcount] emotions_weibo[v] = [end_ts, top_ws] save_rt_results('count', topic, emotions_count, during) save_rt_results('kcount', topic, emotions_kcount, during, k_limit, w_limit) save_rt_results('weibos', topic, emotions_weibo, during, k_limit, w_limit)
def propagateCronTopic(topic, xapian_search_weibo, start_ts, over_ts, sort_field=SORT_FIELD, \ save_fields=RESP_ITER_KEYS, during=Fifteenminutes, w_limit=TOP_WEIBOS_LIMIT, k_limit=TOP_KEYWORDS_LIMIT): if topic and topic != '': start_ts = int(start_ts) over_ts = int(over_ts) over_ts = ts2HourlyTime(over_ts, during) interval = (over_ts - start_ts) / during for i in range(interval, 0, -1): begin_ts = over_ts - during * i end_ts = begin_ts + during print begin_ts, end_ts, 'topic %s starts calculate' % topic.encode('utf-8') mtype_count = {} mtype_kcount = {} # mtype_kcount={mtype:[terms]} mtype_weibo = {} # mtype_weibo={mtype:weibo} query_dict = { 'timestamp': {'$gt': begin_ts, '$lt': end_ts} } for k, v in mtype_kv.iteritems(): query_dict['message_type'] = v count, results = xapian_search_weibo.search(query=query_dict, fields=fields_list) mset = xapian_search_weibo.search(query=query_dict, sort_by=[sort_field], \ max_offset=w_limit, mset_direct=True) kcount = top_keywords(gen_mset_iter(xapian_search_weibo, mset, fields=['terms']), top=k_limit) top_ws = top_weibos(results, top=w_limit) mtype_count[v] = [end_ts, count] mtype_kcount[v] = [end_ts, kcount] mtype_weibo[v] = [end_ts, top_ws] save_pc_results(topic, mtype_count, during) save_kc_results(topic, mtype_kcount, during, k_limit) save_ws_results(topic, mtype_weibo, during, w_limit)
def community_result(community_user_list, topic, date, windowsize): #change end_ts = datetime2ts(date) start_ts = end_ts - windowsize * Day xapian_search_weibo = getXapianWeiboByTopic(topic, start_ts, end_ts) query_dict = {'$or': []} for uid in community_user_list: query_dict['$or'].append({'user': int(uid)}) community_info = [] count, weibo_results = xapian_search_weibo.search(query=query_dict, fields=weibo_fields_list) if count == 0: return None, None, None sentiment_count = {} for weibo in weibo_results(): uid = weibo['user'] _id = weibo['_id'] result = user_search.search_by_id(uid, fields=user_fields_list) if result: name = result['name'] location = result['location'] friends_count = result['friends_count'] followers_count = result['followers_count'] created_at = result['created_at'] statuses_count = result['statuses_count'] profile_image_url = result['profile_image_url'] else: name = u'未知' location = u'未知' friends_count = u'未知' followers_count = u'未知' created_at = u'未知' statuses_count = u'未知' profile_image_url = u'no' text = weibo['text'] timestamp = weibo['timestamp'] date = ts2date(timestamp) reposts_count = weibo['reposts_count'] source = weibo['source'] geo = weibo['geo'] comments_count = weibo['comments_count'] sentiment = weibo['sentiment'] sentiment_name = emotions_kv[sentiment] weibo_link = weiboinfo2url(uid, _id) domain = uid2domain(uid) try: sentiment_count[sentiment] += 1 except KeyError: sentiment_count[sentiment] = 1 community_info.append([ _id, name, location, friends_count, followers_count, created_at, statuses_count, profile_image_url, text, date, reposts_count, source, geo, comments_count, sentiment_name, weibo_link, domain ]) sort_community_info = sorted(community_info, key=lambda x: x[10], reverse=True) #以转发量排序 mset = xapian_search_weibo.search(query=query_dict, max_offset=50, mset_direct=True) top_keyword = top_keywords(gen_mset_iter(xapian_search_weibo, mset, fields=['terms']), top=50) sort_top_keyword = sorted(top_keyword, key=lambda x: x[1], reverse=True) new_sentiment_list = [] for sentiment in sentiment_count: sentiment_ch = emotions_kv[int(sentiment)] num = sentiment_count[sentiment] ratio = float(num) / float(count) new_sentiment_list.append([sentiment_ch, num, ratio]) return sort_community_info, sort_top_keyword, new_sentiment_list, query_dict
def get_info(neighbor_list, topic, date, windowsize): end_ts = datetime2ts(date) start_ts = end_ts - windowsize * Day xapian_search_weibo = getXapianWeiboByTopic(topic, start_ts, end_ts) query_dict = { '$or' : [] } for uid in neighbor_list: query_dict['$or'].append({'user': int(uid)}) neighbor_info = [] count, weibo_results = xapian_search_weibo.search(query=query_dict, fields= weibo_fields_list) if count==0: return None, None, None sentiment_count = {} for weibo in weibo_results(): uid = weibo['user'] _id = weibo['_id'] result = user_search.search_by_id(uid, fields=user_fields_list) if result: name = result['name'] location = result['location'] friends_count = result['friends_count'] followers_count = result['followers_count'] created_at = result['created_at'] statuses_count = result['statuses_count'] profile_image_url = result['profile_image_url'] else: name = u'未知' location = u'未知' friends_count = u'未知' followers_count = u'未知' created_at = u'未知' statuses_count = u'未知' profile_image_url = u'no' text = weibo['text'] timestamp = weibo['timestamp'] date = ts2date(timestamp) reposts_count = weibo['reposts_count'] source = weibo['source'] geo = weibo['geo'] comments_count = weibo['comments_count'] sentiment = weibo['sentiment'] sentiment_name = emotions_kv[sentiment] weibo_link = weiboinfo2url(uid, _id) try: sentiment_count[sentiment] += 1 except KeyError: sentiment_count[sentiment] = 1 neighbor_info.append([_id, name, location, friends_count, followers_count, created_at, statuses_count, profile_image_url, text, date, reposts_count, source, geo, comments_count, sentiment_name,weibo_link, uid]) sort_neighbor_info = sorted(neighbor_info, key=lambda x:x[10], reverse=True) #以转发量排序 mset = xapian_search_weibo.search(query=query_dict, max_offset=50, mset_direct=True) top_keyword = top_keywords(gen_mset_iter(xapian_search_weibo, mset, fields=['terms']), top=50) sort_top_keyword = sorted(top_keyword, key=lambda x:x[1], reverse=True) new_sentiment_list = [] for sentiment in sentiment_count: sentiment_ch = emotions_kv[int(sentiment)] num = sentiment_count[sentiment] ratio = float(num) / float(count) new_sentiment_list.append([sentiment_ch, num, ratio]) return sort_neighbor_info, sort_top_keyword, new_sentiment_list, query_dict
def community_result(community_user_list, topic, date, windowsize): # change end_ts = datetime2ts(date) start_ts = end_ts - windowsize * Day xapian_search_weibo = getXapianWeiboByTopic(topic, start_ts, end_ts) query_dict = {"$or": []} for uid in community_user_list: query_dict["$or"].append({"user": int(uid)}) community_info = [] count, weibo_results = xapian_search_weibo.search(query=query_dict, fields=weibo_fields_list) if count == 0: return None, None, None sentiment_count = {} for weibo in weibo_results(): uid = weibo["user"] _id = weibo["_id"] result = user_search.search_by_id(uid, fields=user_fields_list) if result: name = result["name"] location = result["location"] friends_count = result["friends_count"] followers_count = result["followers_count"] created_at = result["created_at"] statuses_count = result["statuses_count"] profile_image_url = result["profile_image_url"] else: name = u"未知" location = u"未知" friends_count = u"未知" followers_count = u"未知" created_at = u"未知" statuses_count = u"未知" profile_image_url = u"no" text = weibo["text"] timestamp = weibo["timestamp"] date = ts2date(timestamp) reposts_count = weibo["reposts_count"] source = weibo["source"] geo = weibo["geo"] comments_count = weibo["comments_count"] sentiment = weibo["sentiment"] sentiment_name = emotions_kv[sentiment] weibo_link = weiboinfo2url(uid, _id) domain = uid2domain(uid) try: sentiment_count[sentiment] += 1 except KeyError: sentiment_count[sentiment] = 1 community_info.append( [ _id, name, location, friends_count, followers_count, created_at, statuses_count, profile_image_url, text, date, reposts_count, source, geo, comments_count, sentiment_name, weibo_link, domain, ] ) sort_community_info = sorted(community_info, key=lambda x: x[10], reverse=True) # 以转发量排序 mset = xapian_search_weibo.search(query=query_dict, max_offset=50, mset_direct=True) top_keyword = top_keywords(gen_mset_iter(xapian_search_weibo, mset, fields=["terms"]), top=50) sort_top_keyword = sorted(top_keyword, key=lambda x: x[1], reverse=True) new_sentiment_list = [] for sentiment in sentiment_count: sentiment_ch = emotions_kv[int(sentiment)] num = sentiment_count[sentiment] ratio = float(num) / float(count) new_sentiment_list.append([sentiment_ch, num, ratio]) return sort_community_info, sort_top_keyword, new_sentiment_list, query_dict