def subeventpie(): """子观点占比 """ topic_name = request.args.get('query', default_topic_name) # 话题名 # topic_name = u'APEC2014-微博' topicid = em.getEventIDByName(topic_name) eventcomment = EventComments(topicid) comments = eventcomment.getAllNewsComments() cluster_ratio = dict() for comment in comments: if 'clusterid' in comment: clusterid = comment['clusterid'] try: cluster_ratio[clusterid] += 1 except KeyError: cluster_ratio[clusterid] = 1 results = dict() total_count = sum(cluster_ratio.values()) for clusterid, ratio in cluster_ratio.iteritems(): feature = eventcomment.get_feature_words(clusterid) if feature and len(feature): results[','.join(feature[:3])] = float(ratio) / float(total_count) return json.dumps(results)
def ratio(): """子观点占比 """ topic_name = request.args.get('query', default_topic_name) # 话题名 news_id = request.args.get('news_id', default_news_id) topicid = em.getEventIDByName(topic_name) eventcomment = EventComments(topicid) comments = eventcomment.getNewsComments(news_id) cluster_ratio = dict() for comment in comments: if 'clusterid' in comment: clusterid = comment['clusterid'] try: cluster_ratio[clusterid] += 1 except KeyError: cluster_ratio[clusterid] = 1 results = dict() total_count = sum(cluster_ratio.values()) for clusterid, ratio in cluster_ratio.iteritems(): feature = eventcomment.get_feature_words(clusterid) if feature and len(feature): results[','.join(feature[:3])] = float(ratio) / float(total_count) return json.dumps(results)
def sentimentpie(): """ 情绪占比 """ topic_name = request.args.get('query', default_topic_name) # 话题名 # topic_name = u'APEC2014-微博' topicid = em.getEventIDByName(topic_name) eventcomment = EventComments(topicid) comments = eventcomment.getAllNewsComments() senti_dict = {0: '中性', 1: '积极', 2: '愤怒', 3: '悲伤'} senti_ratio = dict() for comment in comments: if 'sentiment' in comment: sentiment = comment['sentiment'] try: senti_ratio[sentiment] += 1 except KeyError: senti_ratio[sentiment] = 1 results = dict() total_count = sum(senti_ratio.values()) for sentiment, ratio in senti_ratio.iteritems(): label = senti_dict[sentiment] if label and len(label): results[label] = float(ratio) / float(total_count) return json.dumps(results)
def sentiratio(): """ 情绪占比 """ topic_name = request.args.get('query', default_topic_name) # 话题名 news_id = request.args.get('news_id', default_news_id) topicid = em.getEventIDByName(topic_name) eventcomment = EventComments(topicid) comments = eventcomment.getNewsComments(news_id) senti_dict = { 0:'中性', 1:'积极', 2:'愤怒', 3:'悲伤' } senti_ratio = dict() for comment in comments: if 'sentiment' in comment: sentiment = comment['sentiment'] try: senti_ratio[sentiment] += 1 except KeyError: senti_ratio[sentiment] = 1 results = dict() total_count = sum(senti_ratio.values()) for sentiment, ratio in senti_ratio.iteritems(): label = senti_dict[sentiment] if label and len(label): results[label] = float(ratio) / float(total_count) return json.dumps(results)
def commments(): """ 查看有无评论 """ topic_name = request.args.get('query', default_topic_name) news_id = request.args.get('news_id', default_news_id) topicid = em.getEventIDByName(topic_name) eventcomment = EventComments(topicid) comments = eventcomment.getNewsComments(news_id) if comments: return json.dumps({"status": "success"}) else: return json.dumps({"status": "fail"})
def commments(): """ 查看有无评论 """ topic_name = request.args.get('query', default_topic_name) news_id = request.args.get('news_id', default_news_id) topicid = em.getEventIDByName(topic_name) eventcomment = EventComments(topicid) comments = eventcomment.getNewsComments(news_id) if comments: return json.dumps({"status":"success"}) else: return json.dumps({"status":"fail"})
def keywords(): """关键词 """ topic_name = request.args.get('query', default_topic_name) # 话题名 news_id = request.args.get('news_id', default_news_id) topicid = em.getEventIDByName(topic_name) eventcomment = EventComments(topicid) clusterids = eventcomment.get_cluster_ids(news_id) results = dict() for clusterid in clusterids: fwords = eventcomment.get_feature_words(clusterid) results[clusterid] = [fwords[:5], fwords] return json.dumps(results)
def index(): """返回页面 """ topic_name = request.args.get('query', default_topic_name) # 话题名 news_id = request.args.get('news_id', default_news_id) topicid = em.getEventIDByName(topic_name) eventcomment = EventComments(topicid) news = News(news_id, topicid) news_subeventid = news.get_news_subeventid() news_url = news.get_news_url() comments = eventcomment.getNewsComments(news_id) if not comments: return 'no comments' return render_template('index/comment.html', topic=topic_name, topic_id=topicid, \ news_id=news_id, news_subeventid=news_subeventid, news_url=news_url)
def cluster(): """展现聚类结果 """ topic_name = request.args.get('query', default_topic_name) # 话题名 news_id = request.args.get('news_id', default_news_id) sort_by = request.args.get('sort', 'weight') topicid = em.getEventIDByName(topic_name) eventcomment = EventComments(topicid) comments = eventcomment.getNewsComments(news_id) cluster_results = dict() for comment in comments: if 'clusterid' in comment: clusterid = comment['clusterid'] try: cluster_results[clusterid].append(comment) except KeyError: cluster_results[clusterid] = [comment] ''' sentiment_dict = dict() for clusterid, comments in cluster_results.iteritems(): positive = 0 negative = 0 for c in comments: if c['sentiment'] == 1: positive += 1 if c['sentiment'] in [2, 3]: negative += 1 sentiment_dict[clusterid] = u'(积极:' + str(positive) + ',' + u'消极:' + str(negative) + ')' ''' results = dict() for clusterid in cluster_results: feature = eventcomment.get_feature_words(clusterid) if feature and len(feature): cluster_results[clusterid].sort(key=lambda c: c[sort_by], reverse=True) results[clusterid] = [ ','.join(feature[:5]), cluster_results[clusterid] ] return json.dumps(results)
def urlsearch(): """返回页面 """ topic_name = request.args.get('query', default_topic_name) # 话题名 topicid = em.getEventIDByName(topic_name) news_url = request.args.get('url', default_news_url) # news url news_url = 'http://news.sina.com.cn/c/2014-10-09/145630963839.shtml' event = Event(topicid) news_id = event.get_news_id_by_url(news_url) if not news_id: return json.dumps({"news_id":None}) eventcomment = EventComments(topicid) comments = eventcomment.getNewsComments(news_id) if not comments: return json.dumps({"news_id":None}) return json.dumps({"news_id":news_id})
def urlsearch(): """返回页面 """ topic_name = request.args.get('query', default_topic_name) # 话题名 topicid = em.getEventIDByName(topic_name) news_url = request.args.get('url', default_news_url) # news url news_url = 'http://news.sina.com.cn/c/2014-10-09/145630963839.shtml' event = Event(topicid) news_id = event.get_news_id_by_url(news_url) if not news_id: return json.dumps({"news_id": None}) eventcomment = EventComments(topicid) comments = eventcomment.getNewsComments(news_id) if not comments: return json.dumps({"news_id": None}) return json.dumps({"news_id": news_id})
def sentiment(): """ 主观微博 """ topic_name = request.args.get('query', default_topic_name) # topic_name = u'APEC2014-微博' topicid = em.getEventIDByName(topic_name) eventcomment = EventComments(topicid) comments = eventcomment.getAllNewsComments() sentiment_comments = dict() for comment in comments: if 'sentiment' in comment: sentiment = comment['sentiment'] try: sentiment_comments[sentiment].append(comment) except KeyError: sentiment_comments[sentiment] = [comment] return json.dumps(sentiment_comments)
def cluster(): """展现聚类结果 """ topic_name = request.args.get('query', default_topic_name) # 话题名 news_id = request.args.get('news_id', default_news_id) sort_by = request.args.get('sort', 'weight') topicid = em.getEventIDByName(topic_name) eventcomment = EventComments(topicid) comments = eventcomment.getNewsComments(news_id) cluster_results = dict() for comment in comments: if 'clusterid' in comment: clusterid = comment['clusterid'] try: cluster_results[clusterid].append(comment) except KeyError: cluster_results[clusterid] = [comment] ''' sentiment_dict = dict() for clusterid, comments in cluster_results.iteritems(): positive = 0 negative = 0 for c in comments: if c['sentiment'] == 1: positive += 1 if c['sentiment'] in [2, 3]: negative += 1 sentiment_dict[clusterid] = u'(积极:' + str(positive) + ',' + u'消极:' + str(negative) + ')' ''' results = dict() for clusterid in cluster_results: feature = eventcomment.get_feature_words(clusterid) if feature and len(feature): cluster_results[clusterid].sort(key=lambda c: c[sort_by], reverse=True) results[clusterid] = [','.join(feature[:5]), cluster_results[clusterid]] return json.dumps(results)
def index(): """返回页面 """ topic_name = request.args.get('query', default_weibo_topic_name) # 话题名 news_id = request.args.get('news_id', default_weibo_news_id) topicid = em.getEventIDByName(topic_name) cluster_num = request.args.get('cluster_num', default_cluster_num) cluster_eva_min_size = request.args.get('cluster_eva_min_size', default_cluster_eva_min_size) vsm = request.args.get('vsm', default_vsm) calculation_label = int(request.args.get('calcu', 1)) # 默认进行重新计算, 0表示从从已有结果数据文件加载数据 news = News(news_id, topicid) news_subeventid = news.get_news_subeventid() if not news_subeventid: news_subeventid = 'None' eventcomment = EventComments(topicid) comments = eventcomment.getNewsComments(news_id) return render_template('index/weibo.html', topic=topic_name, topic_id=topicid, \ news_id=news_id, news_subeventid=news_subeventid, cluster_num=cluster_num,\ cluster_eva_min_size=cluster_eva_min_size, vsm=vsm, calc_label=calculation_label)
def index(): """返回页面 """ topic_name = request.args.get('query', default_weibo_topic_name) # 话题名 # topic_name = u'APEC2014-微博' news_id = request.args.get('news_id', default_weibo_news_id) topicid = em.getEventIDByName(topic_name) min_cluster_num = request.args.get('min_cluster_num', default_min_cluster_num) max_cluster_num = request.args.get('max_cluster_num', default_max_cluster_num) cluster_eva_min_size = request.args.get('cluster_eva_min_size', default_cluster_eva_min_size) vsm = request.args.get('vsm', default_vsm) news = News(news_id, topicid) news_subeventid = news.get_news_subeventid() if not news_subeventid: news_subeventid = 'None' eventcomment = EventComments(topicid) comments = eventcomment.getNewsComments(news_id) return render_template('index/weibo.html', topic=topic_name, topic_id=topicid, \ news_id=news_id, news_subeventid=news_subeventid, min_cluster_num=min_cluster_num,\ max_cluster_num=max_cluster_num, cluster_eva_min_size=cluster_eva_min_size, vsm=vsm)
def sentiment(): """评论情绪 """ topic_name = request.args.get('query', default_topic_name) # 话题名 news_id = request.args.get('news_id', default_news_id) sort_by = request.args.get('sort', 'weight') topicid = em.getEventIDByName(topic_name) eventcomment = EventComments(topicid) comments = eventcomment.getNewsComments(news_id) sentiment_comments = dict() for comment in comments: if 'sentiment' in comment: sentiment = comment['sentiment'] try: sentiment_comments[sentiment].append(comment) except KeyError: sentiment_comments[sentiment] = [comment] for sentiment in sentiment_comments: sentiment_comments[sentiment].sort(key=lambda c:c[sort_by], reverse=True) return json.dumps(sentiment_comments)
def sentiment(): """评论情绪 """ topic_name = request.args.get('query', default_topic_name) # 话题名 news_id = request.args.get('news_id', default_news_id) sort_by = request.args.get('sort', 'weight') topicid = em.getEventIDByName(topic_name) eventcomment = EventComments(topicid) comments = eventcomment.getNewsComments(news_id) sentiment_comments = dict() for comment in comments: if 'sentiment' in comment: sentiment = comment['sentiment'] try: sentiment_comments[sentiment].append(comment) except KeyError: sentiment_comments[sentiment] = [comment] for sentiment in sentiment_comments: sentiment_comments[sentiment].sort(key=lambda c: c[sort_by], reverse=True) return json.dumps(sentiment_comments)
def comments_list(): if os.path.exists(temp_file): os.remove(temp_file) AB_PATH = os.path.join(os.path.dirname(os.path.abspath(__file__)), '../../public/') sys.path.append(AB_PATH) from comment_module import comments_calculation_v2 topicid = request.args.get('topicid', default_topic_id) subeventid = request.args.get('subeventid', 'global') min_cluster_num = request.args.get('min_cluster_num', default_min_cluster_num) max_cluster_num = request.args.get('max_cluster_num', default_max_cluster_num) cluster_eva_min_size = request.args.get('cluster_eva_min_size', default_cluster_eva_min_size) vsm = request.args.get('vsm', default_vsm) ec = EventComments(topicid) if subeventid == 'global': comments = ec.getAllNewsComments() else: comments = ec.getCommentsBySubeventid(subeventid) if not comments: return json.dumps({"status":"fail"}) cal_results = comments_calculation_v2(comments, int(min_cluster_num), int(max_cluster_num), int(cluster_eva_min_size), vsm) features = cal_results['cluster_infos']['features'] item_infos = cal_results['item_infos'] senti_dict = { 0:'中性', 1:'积极', 2:'愤怒', 3:'悲伤' } cluster_ratio = dict() senti_ratio = dict() sentiment_results = dict() cluster_results = dict() for comment in item_infos: if ('clusterid' in comment) and (comment['clusterid'][:8] != 'nonsense') : clusterid = comment['clusterid'] try: cluster_ratio[clusterid] += 1 except KeyError: cluster_ratio[clusterid] = 1 try: cluster_results[clusterid].append(comment) except KeyError: cluster_results[clusterid] = [comment] if ('sentiment' in comment) and (comment['sentiment'] in senti_dict) and ('clusterid' in comment) \ and (comment['clusterid'][:8] != 'nonsense'): sentiment = comment['sentiment'] try: senti_ratio[sentiment] += 1 except KeyError: senti_ratio[sentiment] = 1 try: sentiment_results[sentiment].append(comment) except KeyError: sentiment_results[sentiment] = [comment] ratio_results = dict() ratio_total_count = sum(cluster_ratio.values()) for clusterid, ratio in cluster_ratio.iteritems(): if clusterid in features: feature = features[clusterid] if feature and len(feature): ratio_results[','.join(feature[:3])] = float(ratio) / float(ratio_total_count) sentiratio_results = dict() sentiratio_total_count = sum(senti_ratio.values()) for sentiment, ratio in senti_ratio.iteritems(): if sentiment in senti_dict: label = senti_dict[sentiment] if label and len(label): sentiratio_results[label] = float(ratio) / float(sentiratio_total_count) # 情感分类去重 sentiment_dump_dict = dict() for sentiment, contents in sentiment_results.iteritems(): dump_dict = dict() for comment in contents: same_from_sentiment = comment["same_from_sentiment"] try: dump_dict[same_from_sentiment].append(comment) except KeyError: dump_dict[same_from_sentiment] = [comment] sentiment_dump_dict[sentiment] = dump_dict # 子观点分类去重 cluster_dump_dict = dict() for clusterid, contents in cluster_results.iteritems(): if clusterid in features: feature = features[clusterid] if feature and len(feature): dump_dict = dict() for comment in contents: same_from_cluster = comment["same_from"] try: dump_dict[same_from_cluster].append(comment) except KeyError: dump_dict[same_from_cluster] = [comment] cluster_dump_dict[clusterid] = dump_dict dump_file = open(temp_file, 'w') dump_file.write(json.dumps({"features":features, "senti_dump_dict":sentiment_dump_dict,\ "cluster_dump_dict":cluster_dump_dict})); dump_file.close(); return json.dumps({"ratio":ratio_results, "sentiratio":sentiratio_results,})
def comments_list(): taskid = request.args.get('taskid', default_task_id) cluster_num = request.args.get('cluster_num', '') #若无此参数,取-1;否则取用户设定值 if cluster_num == '': cluster_num = default_cluster_num cluster_eva_min_size = request.args.get('cluster_eva_min_size', default_cluster_eva_min_size) vsm = request.args.get('vsm', default_vsm) temp_file = taskid + temp_file_post if os.path.exists(temp_file): os.remove(temp_file) ec = EventComments(taskid) comments = ec.getAllNewsComments() if not comments: return json.dumps({"status": "fail"}) cal_results = comments_calculation_v2( comments, cluster_eva_min_size=int(cluster_eva_min_size), version=vsm) features = cal_results['cluster_infos']['features'] item_infos = cal_results['item_infos'] senti_dict = {0: '中性', 1: '积极', 2: '愤怒', 3: '悲伤'} cluster_ratio = dict() senti_ratio = dict() sentiment_results = dict() cluster_results = dict() for comment in item_infos: if ('clusterid' in comment) and (comment['clusterid'][:8] != 'nonsense'): clusterid = comment['clusterid'] try: cluster_ratio[clusterid] += 1 except KeyError: cluster_ratio[clusterid] = 1 try: cluster_results[clusterid].append(comment) except KeyError: cluster_results[clusterid] = [comment] if ('sentiment' in comment) and (comment['sentiment'] in senti_dict) and ('clusterid' in comment) \ and (comment['clusterid'][:8] != 'nonsense'): sentiment = comment['sentiment'] try: senti_ratio[sentiment] += 1 except KeyError: senti_ratio[sentiment] = 1 try: sentiment_results[sentiment].append(comment) except KeyError: sentiment_results[sentiment] = [comment] ratio_results = dict() ratio_total_count = sum(cluster_ratio.values()) for clusterid, ratio in cluster_ratio.iteritems(): if clusterid in features: feature = features[clusterid] if feature and len(feature): ratio_results[','.join( feature[:3])] = float(ratio) / float(ratio_total_count) sentiratio_results = dict() sentiratio_total_count = sum(senti_ratio.values()) for sentiment, ratio in senti_ratio.iteritems(): if sentiment in senti_dict: label = senti_dict[sentiment] if label and len(label): sentiratio_results[label] = float(ratio) / float( sentiratio_total_count) # 情感分类去重 sentiment_dump_dict = dict() for sentiment, contents in sentiment_results.iteritems(): dump_dict = dict() for comment in contents: same_from_sentiment = comment["same_from_sentiment"] try: dump_dict[same_from_sentiment].append(comment) except KeyError: dump_dict[same_from_sentiment] = [comment] sentiment_dump_dict[sentiment] = dump_dict # 子观点分类去重 cluster_dump_dict = dict() for clusterid, contents in cluster_results.iteritems(): if clusterid in features: feature = features[clusterid] if feature and len(feature): dump_dict = dict() for comment in contents: same_from_cluster = comment["same_from"] try: dump_dict[same_from_cluster].append(comment) except KeyError: dump_dict[same_from_cluster] = [comment] cluster_dump_dict[clusterid] = dump_dict dump_file = open(temp_file, 'w') dump_file.write(json.dumps({"features":features, "senti_dump_dict":sentiment_dump_dict,\ "cluster_dump_dict":cluster_dump_dict, "ratio":ratio_results, "sentiratio": sentiratio_results})) dump_file.close() return json.dumps({ "ratio": ratio_results, "sentiratio": sentiratio_results })
def comments_list(): if os.path.exists(temp_file): os.remove(temp_file) AB_PATH = os.path.join(os.path.dirname(os.path.abspath(__file__)), '../../public/') sys.path.append(AB_PATH) from comment_module import comments_calculation_v2 topicid = request.args.get('topicid', default_topic_id) subeventid = request.args.get('subeventid', 'global') min_cluster_num = request.args.get('min_cluster_num', default_min_cluster_num) max_cluster_num = request.args.get('max_cluster_num', default_max_cluster_num) cluster_eva_min_size = request.args.get('cluster_eva_min_size', default_cluster_eva_min_size) vsm = request.args.get('vsm', default_vsm) ec = EventComments(topicid) if subeventid == 'global': comments = ec.getAllNewsComments() else: comments = ec.getCommentsBySubeventid(subeventid) if not comments: return json.dumps({"status": "fail"}) cal_results = comments_calculation_v2(comments, int(min_cluster_num), int(max_cluster_num), int(cluster_eva_min_size), vsm) features = cal_results['cluster_infos']['features'] item_infos = cal_results['item_infos'] senti_dict = {0: '中性', 1: '积极', 2: '愤怒', 3: '悲伤'} cluster_ratio = dict() senti_ratio = dict() sentiment_results = dict() cluster_results = dict() for comment in item_infos: if ('clusterid' in comment) and (comment['clusterid'][:8] != 'nonsense'): clusterid = comment['clusterid'] try: cluster_ratio[clusterid] += 1 except KeyError: cluster_ratio[clusterid] = 1 try: cluster_results[clusterid].append(comment) except KeyError: cluster_results[clusterid] = [comment] if ('sentiment' in comment) and (comment['sentiment'] in senti_dict) and ('clusterid' in comment) \ and (comment['clusterid'][:8] != 'nonsense'): sentiment = comment['sentiment'] try: senti_ratio[sentiment] += 1 except KeyError: senti_ratio[sentiment] = 1 try: sentiment_results[sentiment].append(comment) except KeyError: sentiment_results[sentiment] = [comment] ratio_results = dict() ratio_total_count = sum(cluster_ratio.values()) for clusterid, ratio in cluster_ratio.iteritems(): if clusterid in features: feature = features[clusterid] if feature and len(feature): ratio_results[','.join( feature[:3])] = float(ratio) / float(ratio_total_count) sentiratio_results = dict() sentiratio_total_count = sum(senti_ratio.values()) for sentiment, ratio in senti_ratio.iteritems(): if sentiment in senti_dict: label = senti_dict[sentiment] if label and len(label): sentiratio_results[label] = float(ratio) / float( sentiratio_total_count) # 情感分类去重 sentiment_dump_dict = dict() for sentiment, contents in sentiment_results.iteritems(): dump_dict = dict() for comment in contents: same_from_sentiment = comment["same_from_sentiment"] try: dump_dict[same_from_sentiment].append(comment) except KeyError: dump_dict[same_from_sentiment] = [comment] sentiment_dump_dict[sentiment] = dump_dict # 子观点分类去重 cluster_dump_dict = dict() for clusterid, contents in cluster_results.iteritems(): if clusterid in features: feature = features[clusterid] if feature and len(feature): dump_dict = dict() for comment in contents: same_from_cluster = comment["same_from"] try: dump_dict[same_from_cluster].append(comment) except KeyError: dump_dict[same_from_cluster] = [comment] cluster_dump_dict[clusterid] = dump_dict dump_file = open(temp_file, 'w') dump_file.write(json.dumps({"features":features, "senti_dump_dict":sentiment_dump_dict,\ "cluster_dump_dict":cluster_dump_dict})) dump_file.close() return json.dumps({ "ratio": ratio_results, "sentiratio": sentiratio_results, })
def ratio(): """子观点占比 """ if os.path.exists(temp_file): os.remove(temp_file) topic_name = request.args.get('query', default_weibo_topic_name) # 话题名 news_id = request.args.get('news_id', default_weibo_news_id) topicid = em.getEventIDByName(topic_name) cluster_num = request.args.get('cluster_num', default_cluster_num) if cluster_num == default_cluster_num: cluster_num = -1 cluster_eva_min_size = request.args.get('cluster_eva_min_size', default_cluster_eva_min_size) vsm = request.args.get('vsm', default_vsm) calculation_label = int(request.args.get('calcu', 1)) # 默认进行重新计算, 0表示从从已有结果数据文件加载数据 eventcomment = EventComments(topicid) comments = eventcomment.getNewsComments(news_id) if not comments: return json.dumps({"status":"fail"}) cal_results = comments_calculation_v2(comments, cluster_num=cluster_num, \ cluster_eva_min_size=int(cluster_eva_min_size), version=vsm) features = cal_results['cluster_infos']['features'] item_infos = cal_results['item_infos'] cluster_ratio = dict() senti_ratio = dict() sentiment_results = dict() cluster_results = dict() for comment in item_infos: if ('clusterid' in comment) and (comment['clusterid'][:8] != 'nonsense') : clusterid = comment['clusterid'] try: cluster_ratio[clusterid] += 1 except KeyError: cluster_ratio[clusterid] = 1 try: cluster_results[clusterid].append(comment) except KeyError: cluster_results[clusterid] = [comment] if ('sentiment' in comment) and (comment['sentiment'] in emotions_vk_v1) \ and ('clusterid' in comment) \ and (comment['clusterid'][:8] != 'nonsense'): sentiment = comment['sentiment'] try: senti_ratio[sentiment] += 1 except KeyError: senti_ratio[sentiment] = 1 try: sentiment_results[sentiment].append(comment) except KeyError: sentiment_results[sentiment] = [comment] ratio_results = dict() ratio_total_count = sum(cluster_ratio.values()) for clusterid, ratio in cluster_ratio.iteritems(): if clusterid in features: feature = features[clusterid] if feature and len(feature): ratio_results[','.join(feature[:3])] = float(ratio) / float(ratio_total_count) sentiratio_results = dict() sentiratio_total_count = sum(senti_ratio.values()) for sentiment, ratio in senti_ratio.iteritems(): if sentiment in emotions_vk_v1: label = emotions_vk_v1[sentiment] if label and len(label): sentiratio_results[label] = float(ratio) / float(sentiratio_total_count) # 情感分类去重 sentiment_dump_dict = dict() for sentiment, contents in sentiment_results.iteritems(): dump_dict = dict() for comment in contents: same_from_sentiment = comment["same_from_sentiment"] try: dump_dict[same_from_sentiment].append(comment) except KeyError: dump_dict[same_from_sentiment] = [comment] sentiment_dump_dict[sentiment] = dump_dict # 子观点分类去重 cluster_dump_dict = dict() for clusterid, contents in cluster_results.iteritems(): if clusterid in features: feature = features[clusterid] if feature and len(feature): dump_dict = dict() for comment in contents: same_from_cluster = comment["same_from"] try: dump_dict[same_from_cluster].append(comment) except KeyError: dump_dict[same_from_cluster] = [comment] cluster_dump_dict[clusterid] = dump_dict dump_file = open(temp_file, 'w') dump_file.write(json.dumps({"features":features, "senti_dump_dict":sentiment_dump_dict,\ "cluster_dump_dict":cluster_dump_dict})); dump_file.close(); return json.dumps({"ratio":ratio_results, "sentiratio":sentiratio_results,})
def comments_list(): """计算饼图数据,并将饼图数据和去重后的推荐文本写到文件 """ topicid = request.args.get('topicid', default_topic_id) subeventid = request.args.get('subeventid', 'global') cluster_num = request.args.get('cluster_num', default_cluster_num) if cluster_num == default_cluster_num: cluster_num = -1 cluster_eva_min_size = request.args.get('cluster_eva_min_size', default_cluster_eva_min_size) vsm = request.args.get('vsm', default_vsm) ec = EventComments(topicid) if subeventid == 'global': comments = ec.getAllNewsComments() else: comments = ec.getCommentsBySubeventid(subeventid) if not comments: return json.dumps({"status":"fail"}) cal_results = comments_calculation_v2(comments, cluster_num=int(cluster_num), \ cluster_eva_min_size=int(cluster_eva_min_size), version=vsm) features = cal_results['cluster_infos']['features'] item_infos = cal_results['item_infos'] cluster_ratio = dict() senti_ratio = dict() sentiment_results = dict() cluster_results = dict() for comment in item_infos: if ('clusterid' in comment) and (comment['clusterid'][:8] != 'nonsense') : clusterid = comment['clusterid'] try: cluster_ratio[clusterid] += 1 except KeyError: cluster_ratio[clusterid] = 1 try: cluster_results[clusterid].append(comment) except KeyError: cluster_results[clusterid] = [comment] if ('sentiment' in comment) and (comment['sentiment'] in emotions_vk_v1) and ('clusterid' in comment) \ and (comment['clusterid'][:8] != 'nonsense'): sentiment = comment['sentiment'] try: senti_ratio[sentiment] += 1 except KeyError: senti_ratio[sentiment] = 1 try: sentiment_results[sentiment].append(comment) except KeyError: sentiment_results[sentiment] = [comment] ratio_results = dict() ratio_total_count = sum(cluster_ratio.values()) for clusterid, ratio in cluster_ratio.iteritems(): if clusterid in features: feature = features[clusterid] if feature and len(feature): ratio_results[','.join(feature[:3])] = float(ratio) / float(ratio_total_count) sentiratio_results = dict() sentiratio_total_count = sum(senti_ratio.values()) for sentiment, ratio in senti_ratio.iteritems(): if sentiment in emotions_vk_v1: label = emotions_vk_v1[sentiment] if label and len(label): sentiratio_results[label] = float(ratio) / float(sentiratio_total_count) # 情感分类去重 sentiment_dump_dict = dict() for sentiment, contents in sentiment_results.iteritems(): dump_dict = dict() for comment in contents: same_from_sentiment = comment["same_from_sentiment"] try: dump_dict[same_from_sentiment].append(comment) except KeyError: dump_dict[same_from_sentiment] = [comment] sentiment_dump_dict[sentiment] = dump_dict # 子观点分类去重 cluster_dump_dict = dict() for clusterid, contents in cluster_results.iteritems(): if clusterid in features: feature = features[clusterid] if feature and len(feature): dump_dict = dict() for comment in contents: same_from_cluster = comment["same_from"] try: dump_dict[same_from_cluster].append(comment) except KeyError: dump_dict[same_from_cluster] = [comment] cluster_dump_dict[clusterid] = dump_dict dump_file = open(temp_file, 'w') dump_file.write(json.dumps({"features":features, "senti_dump_dict":sentiment_dump_dict,\ "cluster_dump_dict":cluster_dump_dict})); dump_file.close(); return json.dumps({"ratio":ratio_results, "sentiratio":sentiratio_results,})