def get_propagate_peak_news(topic, start_ts, end_ts): lis = [] ts_lis = [] total_days = (end_ts - start_ts) / During for i in range(total_days+1): ts = start_ts + During * i count = 0 for k, v in mtype_kv.iteritems(): dcount = ReadPropagateNews(topic, ts, During, v) if dcount: count += sum(dcount['dcount'].values()) lis.append(float(count)) ts_lis.append(ts2date(ts)) if not lis or not len(lis): return {} new_zeros = detect_peaks(lis) time_lis = {} for idx, point_idx in enumerate(new_zeros): timestamp = ts_lis[point_idx] time_lis[idx] = { 'ts': timestamp, 'title': 'E'+str(idx) } return {'ts':ts_lis, 'count_list':lis, 'peak': time_lis}
def get_news_trend_pusher(topic, start_ts, end_ts, rank_method, news_skip, news_limit_count): results = [] print "topic, start_ts, end_ts, rank_method:", topic.encode("utf-8"), ts2date(start_ts), ts2date( end_ts ), rank_method items = ( db.session.query(TrendPusherNews) .filter(TrendPusherNews.topic == topic, TrendPusherNews.start_ts == start_ts, TrendPusherNews.end_ts == end_ts) .all() ) if not items or items == []: return [] for item in items: row = [] news_id = item.news_id news_id = deal_with(news_id) timestamp = item.timestamp comments_count = item.comments_count news_info = json.loads(item.news_info) url = news_info["url"] summary = news_info["summary"] datetime = news_info["datetime"] source_from_name = news_info["source_from_name"] content168 = news_info["content168"] title = news_info["title"] # weight = news_info['weight'] transmit_name = news_info["transmit_name"] # if len(transmit_name)==0: # transmit_name = u'未知' same_news_num = news_info["same_news_num"] row = [ news_id, url, summary, timestamp, datetime, source_from_name, content168, title, same_news_num, transmit_name, comments_count, ] results.append(row) if rank_method == "comments_count": sort_results = sorted(results, key=lambda x: x[10], reverse=True) # 评论数逆序排列 elif rank_method == "timestamp": sort_results = sorted(results, key=lambda x: x[3]) # 时间戳正序排列 # elif rank_method=='weight': # sort_results = sorted(results, key=lambda x:x[10], reverse=True) # 相关度逆序排序 return sort_results[news_skip : news_limit_count + news_skip]
def user_weibo(): """微博列表页面 """ # 要素 yaosu = 'moodlens' # 话题关键词 topic = request.args.get('query', default_topic) # 时间范围: 20130901-20130901 time_range = request.args.get('time_range', default_timerange) # 时间粒度: 3600 point_interval = request.args.get('point_interval', None) if not point_interval: point_interval = default_pointInterval else: for pi in pointIntervals: if pi['en'] == int(point_interval): point_interval = pi break weibos = [] tar_location = u'地域未知' tar_nickname = u'昵称未知' tar_profile_image_url = '#' tar_followers_count = u'粉丝数未知' tar_friends_count = u'关注数未知' tar_user_url = '#' uid = request.args.get('uid', None) if uid: count, results = xapian_search_weibo.search(query={'user': int(uid)}, sort_by=['timestamp'], \ fields=['id', 'user', 'text', 'reposts_count', 'comments_count', 'geo', 'timestamp']) for r in results(): r['weibo_url'] = 'http://weibo.com/' r['user_url'] = 'http://weibo.com/u/' + str(uid) r['created_at'] = ts2date(r['timestamp']) weibos.append(r) user_info = acquire_user_by_id(uid) if user_info: tar_name = user_info['name'] tar_location = user_info['location'] tar_profile_image_url = user_info['profile_image_url'] tar_friends_count = user_info['friends_count'] tar_followers_count = user_info['followers_count'] tar_user_url = 'http://weibo.com/u/' + str(uid) return render_template('index/weibolist.html', yaosu=yaosu, time_range=time_range, \ topic=topic, pointInterval=point_interval, pointIntervals=pointIntervals, \ gaishu_yaosus=gaishu_yaosus, deep_yaosus=deep_yaosus, tar_location=tar_location, \ tar_profile_image_url=tar_profile_image_url, \ statuses=weibos, tar_name=tar_name, tar_friends_count=tar_friends_count, \ tar_followers_count=tar_followers_count, tar_user_url=tar_user_url)
def parseNews(news): news_dict = {} news = _json_loads(news) if not news: return {} for weibo in news: try: _id = deal_with(weibo['_id']) replies = 1 weibo['timestamp'] = ts2date(weibo['timestamp']) weibo['content168'] = weibo['content168'] news_dict[_id] = [replies, weibo] except: continue return news_dict
def parseWeibos(weibos): weibo_dict = {} weibos = _json_loads(weibos) if not weibos: return {} for weibo in weibos: try: _id = weibo['_id'] username, profileimage = getuserinfo(weibo['user']) # get username and profile_image_url reposts_count = weibo['reposts_count'] weibo['weibo_link'] = weiboinfo2url(weibo['user'], _id) weibo['name'] = username weibo['profile_image_url'] = profileimage weibo['date'] = ts2date(weibo['timestamp']) weibo_dict[_id] = [reposts_count, weibo] except: continue return weibo_dict
def parseWeibos(weibos): weibo_dict = {} weibos = _json_loads(weibos) if not weibos: return {} for weibo in weibos: try: _id = weibo["_id"] username, profileimage = getuserinfo(weibo["user"]) # print 'username', profileimage reposts_count = weibo["reposts_count"] # print 'reposts_count', reposts_count weibo["weibo_link"] = weiboinfo2url(weibo["user"], _id) weibo["username"] = username weibo["profile_image_url"] = profileimage weibo["timestamp"] = ts2date(weibo["timestamp"]) # print 'weibo:', weibo weibo_dict[_id] = [reposts_count, weibo] except: continue # print 'there :', weibo_dict return weibo_dict
def read_uid_weibos(topic, date, windowsize, uid): # change end_ts = datetime2ts(date) start_ts = end_ts - Day * windowsize xapian_search_weibo = getXapianWeiboByTopic(topic, start_ts, end_ts) query_dict = {"user": uid} count, results = xapian_search_weibo.search(query=query_dict, fields=weibo_fields_list) if count == 0: weibo_list = [] else: weibo_list = [] for weibo in results(): wid = weibo["_id"] uid = weibo["user"] result = user_search.search_by_id(uid, fields=user_fields_list) if result: name = result["name"] location = result["location"] friends_count = result["friends_count"] followers_count = result["followers_count"] created_at = result["created_at"] statuses_count = result["statuses_count"] profile_image_url = result["profile_image_url"] else: name = u"未知" location = u"未知" friends_count = u"未知" followers_count = u"未知" created_at = u"未知" statuses_count = u"未知" profile_image_url = u"no" text = weibo["text"] geo = weibo["geo"] source = weibo["source"] timestamp = weibo["timestamp"] date = ts2date(timestamp) reposts_count = weibo["reposts_count"] comments_count = weibo["comments_count"] weibo_link = weiboinfo2url(uid, wid) domain = uid2domain(uid) row = [ wid, uid, name, location, friends_count, followers_count, created_at, statuses_count, profile_image_url, date, text, geo, source, reposts_count, comments_count, weibo_link, ] weibo_list.append(row) sort_weibo_list = sorted(weibo_list, key=lambda x: x[9]) return sort_weibo_list
def c_weibo_by_ts(topic, date, windowsize, uid, network_type, cid, rank_method): real_topic_id = acquire_real_topic_id(topic, date, windowsize) if not real_topic_id: return None, None, None # 该话题存在进行下面的计算 key_pre = str(real_topic_id) + '_' + str(date) + '_' + str(windowsize) # 选择有向图进行社区信息的计算 if network_type=='source_graph': key = str(GRAPH_PATH)+key_pre + '_gg_graph.gexf' elif network_type=='direct_superior_graph': key = str(GRAPH_PATH)+key_pre + '_ds_udg_graph.gexf' g = nx.read_gexf(key) # 获取图结构中节点uid对应的社区包括的节点list community_user_list = get_community_user(g, uid, cid) # 考虑节点社区属性存放的位置 # change end_ts = datetime2ts(date) start_ts = end_ts - Day * windowsize xapian_search_weibo = getXapianWeiboByTopic(topic, start_ts, end_ts) query_dict = { '$or' : [] } for uid in community_user_list: query_dict['$or'].append({'user': int(uid)}) community_info = [] count, weibo_results = xapian_search_weibo.search(query=query_dict, fields= weibo_fields_list) if count==0: return None for weibo in weibo_results(): uid = weibo['user'] _id = weibo['_id'] result = user_search.search_by_id(uid, fields=user_fields_list) if result: name = result['name'] location = result['location'] friends_count = result['friends_count'] followers_count = result['followers_count'] created_at = result['created_at'] statuses_count = result['statuses_count'] profile_image_url = result['profile_image_url'] else: name = u'未知' location = u'未知' friends_count = u'未知' followers_count = u'未知' created_at = u'未知' statuses_count = u'未知' profile_image_url = u'no' text = weibo['text'] timestamp = weibo['timestamp'] date = ts2date(timestamp) reposts_count = weibo['reposts_count'] source = weibo['source'] geo = weibo['geo'] comments_count = weibo['comments_count'] sentiment = weibo['sentiment'] sentiment_name = emotions_kv[sentiment] weibo_link = weiboinfo2url(uid, _id) community_info.append([_id, name, location, friends_count, followers_count, created_at, statuses_count, profile_image_url, text, date, reposts_count, source, geo, comments_count, sentiment_name,weibo_link, uid, timestamp]) if rank_method == 'reposts_count': sort_community_info = sorted(community_info, key=lambda x:x[10], reverse=True) #以转发量排序 else: sort_community_info = sorted(community_info, key=lambda x:x[17]) # 以时间戳排序 return sort_community_info
def get_info(neighbor_list, topic, date, windowsize): end_ts = datetime2ts(date) start_ts = end_ts - windowsize * Day xapian_search_weibo = getXapianWeiboByTopic(topic, start_ts, end_ts) query_dict = { '$or' : [] } for uid in neighbor_list: query_dict['$or'].append({'user': int(uid)}) neighbor_info = [] count, weibo_results = xapian_search_weibo.search(query=query_dict, fields= weibo_fields_list) if count==0: return None, None, None sentiment_count = {} for weibo in weibo_results(): uid = weibo['user'] _id = weibo['_id'] result = user_search.search_by_id(uid, fields=user_fields_list) if result: name = result['name'] location = result['location'] friends_count = result['friends_count'] followers_count = result['followers_count'] created_at = result['created_at'] statuses_count = result['statuses_count'] profile_image_url = result['profile_image_url'] else: name = u'未知' location = u'未知' friends_count = u'未知' followers_count = u'未知' created_at = u'未知' statuses_count = u'未知' profile_image_url = u'no' text = weibo['text'] timestamp = weibo['timestamp'] date = ts2date(timestamp) reposts_count = weibo['reposts_count'] source = weibo['source'] geo = weibo['geo'] comments_count = weibo['comments_count'] sentiment = weibo['sentiment'] sentiment_name = emotions_kv[sentiment] weibo_link = weiboinfo2url(uid, _id) try: sentiment_count[sentiment] += 1 except KeyError: sentiment_count[sentiment] = 1 neighbor_info.append([_id, name, location, friends_count, followers_count, created_at, statuses_count, profile_image_url, text, date, reposts_count, source, geo, comments_count, sentiment_name,weibo_link, uid]) sort_neighbor_info = sorted(neighbor_info, key=lambda x:x[10], reverse=True) #以转发量排序 mset = xapian_search_weibo.search(query=query_dict, max_offset=50, mset_direct=True) top_keyword = top_keywords(gen_mset_iter(xapian_search_weibo, mset, fields=['terms']), top=50) sort_top_keyword = sorted(top_keyword, key=lambda x:x[1], reverse=True) new_sentiment_list = [] for sentiment in sentiment_count: sentiment_ch = emotions_kv[int(sentiment)] num = sentiment_count[sentiment] ratio = float(num) / float(count) new_sentiment_list.append([sentiment_ch, num, ratio]) return sort_neighbor_info, sort_top_keyword, new_sentiment_list, query_dict
def community_result(community_user_list, topic, date, windowsize): # change end_ts = datetime2ts(date) start_ts = end_ts - windowsize * Day xapian_search_weibo = getXapianWeiboByTopic(topic, start_ts, end_ts) query_dict = {"$or": []} for uid in community_user_list: query_dict["$or"].append({"user": int(uid)}) community_info = [] count, weibo_results = xapian_search_weibo.search(query=query_dict, fields=weibo_fields_list) if count == 0: return None, None, None sentiment_count = {} for weibo in weibo_results(): uid = weibo["user"] _id = weibo["_id"] result = user_search.search_by_id(uid, fields=user_fields_list) if result: name = result["name"] location = result["location"] friends_count = result["friends_count"] followers_count = result["followers_count"] created_at = result["created_at"] statuses_count = result["statuses_count"] profile_image_url = result["profile_image_url"] else: name = u"未知" location = u"未知" friends_count = u"未知" followers_count = u"未知" created_at = u"未知" statuses_count = u"未知" profile_image_url = u"no" text = weibo["text"] timestamp = weibo["timestamp"] date = ts2date(timestamp) reposts_count = weibo["reposts_count"] source = weibo["source"] geo = weibo["geo"] comments_count = weibo["comments_count"] sentiment = weibo["sentiment"] sentiment_name = emotions_kv[sentiment] weibo_link = weiboinfo2url(uid, _id) domain = uid2domain(uid) try: sentiment_count[sentiment] += 1 except KeyError: sentiment_count[sentiment] = 1 community_info.append( [ _id, name, location, friends_count, followers_count, created_at, statuses_count, profile_image_url, text, date, reposts_count, source, geo, comments_count, sentiment_name, weibo_link, domain, ] ) sort_community_info = sorted(community_info, key=lambda x: x[10], reverse=True) # 以转发量排序 mset = xapian_search_weibo.search(query=query_dict, max_offset=50, mset_direct=True) top_keyword = top_keywords(gen_mset_iter(xapian_search_weibo, mset, fields=["terms"]), top=50) sort_top_keyword = sorted(top_keyword, key=lambda x: x[1], reverse=True) new_sentiment_list = [] for sentiment in sentiment_count: sentiment_ch = emotions_kv[int(sentiment)] num = sentiment_count[sentiment] ratio = float(num) / float(count) new_sentiment_list.append([sentiment_ch, num, ratio]) return sort_community_info, sort_top_keyword, new_sentiment_list, query_dict