def ratio(): """子观点占比 """ if os.path.exists(temp_file): os.remove(temp_file) topic_name = request.args.get('query', default_weibo_topic_name) # 话题名 news_id = request.args.get('news_id', default_weibo_news_id) topicid = em.getEventIDByName(topic_name) cluster_num = request.args.get('cluster_num', default_cluster_num) if cluster_num == default_cluster_num: cluster_num = -1 cluster_eva_min_size = request.args.get('cluster_eva_min_size', default_cluster_eva_min_size) vsm = request.args.get('vsm', default_vsm) calculation_label = int(request.args.get('calcu', 1)) # 默认进行重新计算, 0表示从从已有结果数据文件加载数据 eventcomment = EventComments(topicid) comments = eventcomment.getNewsComments(news_id) if not comments: return json.dumps({"status":"fail"}) cal_results = comments_calculation_v2(comments, cluster_num=cluster_num, \ cluster_eva_min_size=int(cluster_eva_min_size), version=vsm) features = cal_results['cluster_infos']['features'] item_infos = cal_results['item_infos'] cluster_ratio = dict() senti_ratio = dict() sentiment_results = dict() cluster_results = dict() for comment in item_infos: if ('clusterid' in comment) and (comment['clusterid'][:8] != 'nonsense') : clusterid = comment['clusterid'] try: cluster_ratio[clusterid] += 1 except KeyError: cluster_ratio[clusterid] = 1 try: cluster_results[clusterid].append(comment) except KeyError: cluster_results[clusterid] = [comment] if ('sentiment' in comment) and (comment['sentiment'] in emotions_vk_v1) \ and ('clusterid' in comment) \ and (comment['clusterid'][:8] != 'nonsense'): sentiment = comment['sentiment'] try: senti_ratio[sentiment] += 1 except KeyError: senti_ratio[sentiment] = 1 try: sentiment_results[sentiment].append(comment) except KeyError: sentiment_results[sentiment] = [comment] ratio_results = dict() ratio_total_count = sum(cluster_ratio.values()) for clusterid, ratio in cluster_ratio.iteritems(): if clusterid in features: feature = features[clusterid] if feature and len(feature): ratio_results[','.join(feature[:3])] = float(ratio) / float(ratio_total_count) sentiratio_results = dict() sentiratio_total_count = sum(senti_ratio.values()) for sentiment, ratio in senti_ratio.iteritems(): if sentiment in emotions_vk_v1: label = emotions_vk_v1[sentiment] if label and len(label): sentiratio_results[label] = float(ratio) / float(sentiratio_total_count) # 情感分类去重 sentiment_dump_dict = dict() for sentiment, contents in sentiment_results.iteritems(): dump_dict = dict() for comment in contents: same_from_sentiment = comment["same_from_sentiment"] try: dump_dict[same_from_sentiment].append(comment) except KeyError: dump_dict[same_from_sentiment] = [comment] sentiment_dump_dict[sentiment] = dump_dict # 子观点分类去重 cluster_dump_dict = dict() for clusterid, contents in cluster_results.iteritems(): if clusterid in features: feature = features[clusterid] if feature and len(feature): dump_dict = dict() for comment in contents: same_from_cluster = comment["same_from"] try: dump_dict[same_from_cluster].append(comment) except KeyError: dump_dict[same_from_cluster] = [comment] cluster_dump_dict[clusterid] = dump_dict dump_file = open(temp_file, 'w') dump_file.write(json.dumps({"features":features, "senti_dump_dict":sentiment_dump_dict,\ "cluster_dump_dict":cluster_dump_dict})); dump_file.close(); return json.dumps({"ratio":ratio_results, "sentiratio":sentiratio_results,})
def news_comments_list(taskid, start_ts, over_ts, weibo_list, cluster_num=-1, cluster_eva_min_size=default_cluster_eva_min_size, vsm=default_vsm, calculation_label=1): #weibo_list把微博读进来 """计算饼图数据,并将饼图数据和去重后的推荐文本写到文件 taskid = request.args.get('taskid', default_task_id) cluster_num = request.args.get('cluster_num', default_cluster_num) #若无此参数,取-1;否则取用户设定值 if cluster_num == default_cluster_num: cluster_num = -1 cluster_eva_min_size = request.args.get('cluster_eva_min_size', default_cluster_eva_min_size) vsm = request.args.get('vsm', default_vsm) calculation_label = int(request.args.get('calcu', 1)) # 默认进行重新计算, 0表示从从已有结果数据文件加载数据 """ params = {"taskid": taskid, "cluster_num": cluster_num, "cluster_eva_min_size": cluster_eva_min_size, \ "vsm": vsm, "calculation_label": calculation_label} comments = weibo_list logfile = os.path.join(LOG_FOLDER, taskid + '.log') cal_results = comments_calculation_v2(comments, logfile=logfile, cluster_num=cluster_num, \ cluster_eva_min_size=int(cluster_eva_min_size), version=vsm) #print cal_results features = cal_results['cluster_infos']['features'] item_infos = cal_results['item_infos'] cluster_ratio = dict() senti_ratio = dict() sentiment_results = dict() cluster_results = dict() rub_results = [] # 过滤前文本数 before_filter_count = len(item_infos) # 过滤后文本数 after_filter_count = 0 download_items = [] for comment in item_infos: # print comment download_item = {} download_item["id"] = comment["id"] download_item["title"] = comment["title"] download_item["text"] = comment["text"] # download_item["timestamp"] = comment["timestamp"] download_item["datetime"] = comment["datetime"] download_item["clusterid"] = comment["clusterid"] download_item["sentiment"] = comment["sentiment"] download_item["ad_label"] = comment["ad_label"] if (comment["clusterid"][:8] != 'nonsense') and (comment["clusterid"] != 'other'): download_item["duplicate"] = comment["duplicate"] download_item["same_from"] = comment["same_from"] download_items.append(download_item) if ('clusterid' in comment) and (comment['clusterid'][:8] != 'nonsense'): clusterid = comment['clusterid'] try: cluster_ratio[clusterid] += 1 except KeyError: cluster_ratio[clusterid] = 1 try: cluster_results[clusterid].append(comment) except KeyError: cluster_results[clusterid] = [comment] if ('sentiment' in comment) and (comment['sentiment'] in emotions_vk_v1) and ('clusterid' in comment) \ and (comment['clusterid'][:8] != 'nonsense'): sentiment = comment['sentiment'] try: senti_ratio[sentiment] += 1 except KeyError: senti_ratio[sentiment] = 1 try: sentiment_results[sentiment].append(comment) except KeyError: sentiment_results[sentiment] = [comment] after_filter_count += 1 if comment['clusterid'][:8] == 'nonsense': rub_results.append(comment) ratio_results = dict() ratio_total_count = sum(cluster_ratio.values()) for clusterid, ratio in cluster_ratio.iteritems(): if clusterid in features: feature = features[clusterid] if feature and len(feature): ratio_results[','.join( feature[:3])] = float(ratio) / float(ratio_total_count) sentiratio_results = dict() sentiratio_total_count = sum(senti_ratio.values()) for sentiment, ratio in senti_ratio.iteritems(): if sentiment in emotions_vk_v1: label = emotions_vk_v1[sentiment] if label and len(label): sentiratio_results[label] = float(ratio) / float( sentiratio_total_count) # 情感分类去重 sentiment_dump_dict = dict() for sentiment, contents in sentiment_results.iteritems(): dump_dict = dict() for comment in contents: same_from_sentiment = comment["same_from_sentiment"] try: dump_dict[same_from_sentiment].append(comment) except KeyError: dump_dict[same_from_sentiment] = [comment] sentiment_dump_dict[sentiment] = dump_dict # 子观点分类去重 cluster_dump_dict = dict() for clusterid, contents in cluster_results.iteritems(): if clusterid in features: feature = features[clusterid] if feature and len(feature): dump_dict = dict() for comment in contents: same_from_cluster = comment["same_from"] try: dump_dict[same_from_cluster].append(comment) except KeyError: dump_dict[same_from_cluster] = [comment] for k, v in dump_dict.iteritems(): sort_dump_dict = sorted(v, key=lambda x: x['weight'], reverse=True) cluster_dump_dict[clusterid] = sort_dump_dict # dump_file = open(task_result_file+'_news', 'w') # dump_file.write(json.dumps({"params": params, "features":features, "senti_dump_dict":sentiment_dump_dict, \ # "cluster_dump_dict":cluster_dump_dict, "ratio":ratio_results, "sentiratio": sentiratio_results, \ # "before_filter_count": before_filter_count, "after_filter_count": after_filter_count})) # dump_file.close() # new_file = open(task_result_file+'_news_2','w') # print task_result_file+'2' #所有的微博 # for i in xrange(0,len(download_items)): # new_file.write(json.dumps(download_items[i])+'\n') # new_file.close #task = taskid.split('_') #index_body={'name':task[0],'start_ts':task[1],'end_ts':task[2],'features':json.dumps(features),'cluster_dump_dict':json.dumps(cluster_dump_dict)} index_body = { 'name': taskid, 'start_ts': start_ts, 'end_ts': over_ts, 'features': json.dumps(features), 'cluster_dump_dict': json.dumps(cluster_dump_dict) } weibo_es.index(index=topics_river_index_name, doc_type=topics_river_index_type, id=taskid, body=index_body) return json.dumps({ "features": features, "cluster_dump_dict": cluster_dump_dict })
def news_comments_list(task_source, taskid, weibo_list, cluster_num=-1, cluster_eva_min_size=default_cluster_eva_min_size, vsm=default_vsm, calculation_label=1): #weibo_list把微博读进来 """计算饼图数据,并将饼图数据和去重后的推荐文本写到文件 """ print 'weibo_list..len...', len(weibo_list) params = {"taskid": taskid, "cluster_num": cluster_num, "cluster_eva_min_size": cluster_eva_min_size, \ "vsm": vsm, "calculation_label": calculation_label} comments = weibo_list logfile = os.path.join(LOG_FOLDER, taskid + '.log') cal_results = comments_calculation_v2(comments, logfile=logfile, cluster_num=cluster_num, \ cluster_eva_min_size=int(cluster_eva_min_size), version=vsm) #print cal_results features = cal_results['cluster_infos']['features'] item_infos = cal_results['item_infos'] cluster_ratio = dict() senti_ratio = dict() sentiment_results = dict() cluster_results = dict() rub_results = [] # 过滤前文本数 before_filter_count = len(item_infos) # 过滤后文本数 after_filter_count = 0 download_items = [] for comment in item_infos: # print comment download_item = {} download_item["id"] = comment["id"] download_item["title"] = comment["title"] download_item["text"] = comment["text"] # download_item["timestamp"] = comment["timestamp"] download_item["datetime"] = comment["datetime"] download_item["clusterid"] = comment["clusterid"] download_item["sentiment"] = comment["sentiment"] download_item["ad_label"] = comment["ad_label"] if (comment["clusterid"][:8] != 'nonsense') and (comment["clusterid"] != 'other'): download_item["duplicate"] = comment["duplicate"] download_item["same_from"] = comment["same_from"] download_items.append(download_item) if ('clusterid' in comment) and (comment['clusterid'][:8] != 'nonsense'): clusterid = comment['clusterid'] try: cluster_ratio[clusterid] += 1 except KeyError: cluster_ratio[clusterid] = 1 try: cluster_results[clusterid].append(comment) except KeyError: cluster_results[clusterid] = [comment] if ('sentiment' in comment) and (comment['sentiment'] in emotions_vk_v1) and ('clusterid' in comment) \ and (comment['clusterid'][:8] != 'nonsense'): sentiment = comment['sentiment'] try: senti_ratio[sentiment] += 1 except KeyError: senti_ratio[sentiment] = 1 try: sentiment_results[sentiment].append(comment) except KeyError: sentiment_results[sentiment] = [comment] after_filter_count += 1 if comment['clusterid'][:8] == 'nonsense': rub_results.append(comment) ratio_results = dict() ratio_total_count = sum(cluster_ratio.values()) for clusterid, ratio in cluster_ratio.iteritems(): if clusterid in features: feature = features[clusterid] if feature and len(feature): ratio_results[','.join( feature[:3])] = float(ratio) / float(ratio_total_count) sentiratio_results = dict() sentiratio_total_count = sum(senti_ratio.values()) for sentiment, ratio in senti_ratio.iteritems(): if sentiment in emotions_vk_v1: label = emotions_vk_v1[sentiment] if label and len(label): sentiratio_results[label] = float(ratio) / float( sentiratio_total_count) # 情感分类去重 sentiment_dump_dict = dict() for sentiment, contents in sentiment_results.iteritems(): dump_dict = dict() for comment in contents: same_from_sentiment = comment["same_from_sentiment"] try: dump_dict[same_from_sentiment].append(comment) except KeyError: dump_dict[same_from_sentiment] = [comment] sentiment_dump_dict[sentiment] = dump_dict # 子观点分类去重 cluster_dump_dict = dict() for clusterid, contents in cluster_results.iteritems(): if clusterid in features: feature = features[clusterid] if feature and len(feature): dump_dict = dict() for comment in contents: same_from_cluster = comment["same_from"] try: dump_dict[same_from_cluster].append(comment) except KeyError: dump_dict[same_from_cluster] = [comment] for k, v in dump_dict.iteritems(): sort_dump_dict = sorted(v, key=lambda x: x['weight'], reverse=True) cluster_dump_dict[clusterid] = sort_dump_dict #task = taskid.split('_') index_body = { 'name': taskid, 'features': json.dumps(features), 'cluster_dump_dict': json.dumps(cluster_dump_dict) } es_intel.index(index=topics_river_index_name, doc_type=topics_river_index_type, id=taskid, body=index_body) return json.dumps({ "features": features, "cluster_dump_dict": cluster_dump_dict })
def comments_list(): if os.path.exists(temp_file): os.remove(temp_file) AB_PATH = os.path.join(os.path.dirname(os.path.abspath(__file__)), '../../public/') sys.path.append(AB_PATH) from comment_module import comments_calculation_v2 topicid = request.args.get('topicid', default_topic_id) subeventid = request.args.get('subeventid', 'global') min_cluster_num = request.args.get('min_cluster_num', default_min_cluster_num) max_cluster_num = request.args.get('max_cluster_num', default_max_cluster_num) cluster_eva_min_size = request.args.get('cluster_eva_min_size', default_cluster_eva_min_size) vsm = request.args.get('vsm', default_vsm) ec = EventComments(topicid) if subeventid == 'global': comments = ec.getAllNewsComments() else: comments = ec.getCommentsBySubeventid(subeventid) if not comments: return json.dumps({"status": "fail"}) cal_results = comments_calculation_v2(comments, int(min_cluster_num), int(max_cluster_num), int(cluster_eva_min_size), vsm) features = cal_results['cluster_infos']['features'] item_infos = cal_results['item_infos'] senti_dict = {0: '中性', 1: '积极', 2: '愤怒', 3: '悲伤'} cluster_ratio = dict() senti_ratio = dict() sentiment_results = dict() cluster_results = dict() for comment in item_infos: if ('clusterid' in comment) and (comment['clusterid'][:8] != 'nonsense'): clusterid = comment['clusterid'] try: cluster_ratio[clusterid] += 1 except KeyError: cluster_ratio[clusterid] = 1 try: cluster_results[clusterid].append(comment) except KeyError: cluster_results[clusterid] = [comment] if ('sentiment' in comment) and (comment['sentiment'] in senti_dict) and ('clusterid' in comment) \ and (comment['clusterid'][:8] != 'nonsense'): sentiment = comment['sentiment'] try: senti_ratio[sentiment] += 1 except KeyError: senti_ratio[sentiment] = 1 try: sentiment_results[sentiment].append(comment) except KeyError: sentiment_results[sentiment] = [comment] ratio_results = dict() ratio_total_count = sum(cluster_ratio.values()) for clusterid, ratio in cluster_ratio.iteritems(): if clusterid in features: feature = features[clusterid] if feature and len(feature): ratio_results[','.join( feature[:3])] = float(ratio) / float(ratio_total_count) sentiratio_results = dict() sentiratio_total_count = sum(senti_ratio.values()) for sentiment, ratio in senti_ratio.iteritems(): if sentiment in senti_dict: label = senti_dict[sentiment] if label and len(label): sentiratio_results[label] = float(ratio) / float( sentiratio_total_count) # 情感分类去重 sentiment_dump_dict = dict() for sentiment, contents in sentiment_results.iteritems(): dump_dict = dict() for comment in contents: same_from_sentiment = comment["same_from_sentiment"] try: dump_dict[same_from_sentiment].append(comment) except KeyError: dump_dict[same_from_sentiment] = [comment] sentiment_dump_dict[sentiment] = dump_dict # 子观点分类去重 cluster_dump_dict = dict() for clusterid, contents in cluster_results.iteritems(): if clusterid in features: feature = features[clusterid] if feature and len(feature): dump_dict = dict() for comment in contents: same_from_cluster = comment["same_from"] try: dump_dict[same_from_cluster].append(comment) except KeyError: dump_dict[same_from_cluster] = [comment] cluster_dump_dict[clusterid] = dump_dict dump_file = open(temp_file, 'w') dump_file.write(json.dumps({"features":features, "senti_dump_dict":sentiment_dump_dict,\ "cluster_dump_dict":cluster_dump_dict})) dump_file.close() return json.dumps({ "ratio": ratio_results, "sentiratio": sentiratio_results, })
def comments_list(): taskid = request.args.get('taskid', default_task_id) cluster_num = request.args.get('cluster_num', '') #若无此参数,取-1;否则取用户设定值 if cluster_num == '': cluster_num = default_cluster_num cluster_eva_min_size = request.args.get('cluster_eva_min_size', default_cluster_eva_min_size) vsm = request.args.get('vsm', default_vsm) temp_file = taskid + temp_file_post if os.path.exists(temp_file): os.remove(temp_file) ec = EventComments(taskid) comments = ec.getAllNewsComments() if not comments: return json.dumps({"status": "fail"}) cal_results = comments_calculation_v2( comments, cluster_eva_min_size=int(cluster_eva_min_size), version=vsm) features = cal_results['cluster_infos']['features'] item_infos = cal_results['item_infos'] senti_dict = {0: '中性', 1: '积极', 2: '愤怒', 3: '悲伤'} cluster_ratio = dict() senti_ratio = dict() sentiment_results = dict() cluster_results = dict() for comment in item_infos: if ('clusterid' in comment) and (comment['clusterid'][:8] != 'nonsense'): clusterid = comment['clusterid'] try: cluster_ratio[clusterid] += 1 except KeyError: cluster_ratio[clusterid] = 1 try: cluster_results[clusterid].append(comment) except KeyError: cluster_results[clusterid] = [comment] if ('sentiment' in comment) and (comment['sentiment'] in senti_dict) and ('clusterid' in comment) \ and (comment['clusterid'][:8] != 'nonsense'): sentiment = comment['sentiment'] try: senti_ratio[sentiment] += 1 except KeyError: senti_ratio[sentiment] = 1 try: sentiment_results[sentiment].append(comment) except KeyError: sentiment_results[sentiment] = [comment] ratio_results = dict() ratio_total_count = sum(cluster_ratio.values()) for clusterid, ratio in cluster_ratio.iteritems(): if clusterid in features: feature = features[clusterid] if feature and len(feature): ratio_results[','.join( feature[:3])] = float(ratio) / float(ratio_total_count) sentiratio_results = dict() sentiratio_total_count = sum(senti_ratio.values()) for sentiment, ratio in senti_ratio.iteritems(): if sentiment in senti_dict: label = senti_dict[sentiment] if label and len(label): sentiratio_results[label] = float(ratio) / float( sentiratio_total_count) # 情感分类去重 sentiment_dump_dict = dict() for sentiment, contents in sentiment_results.iteritems(): dump_dict = dict() for comment in contents: same_from_sentiment = comment["same_from_sentiment"] try: dump_dict[same_from_sentiment].append(comment) except KeyError: dump_dict[same_from_sentiment] = [comment] sentiment_dump_dict[sentiment] = dump_dict # 子观点分类去重 cluster_dump_dict = dict() for clusterid, contents in cluster_results.iteritems(): if clusterid in features: feature = features[clusterid] if feature and len(feature): dump_dict = dict() for comment in contents: same_from_cluster = comment["same_from"] try: dump_dict[same_from_cluster].append(comment) except KeyError: dump_dict[same_from_cluster] = [comment] cluster_dump_dict[clusterid] = dump_dict dump_file = open(temp_file, 'w') dump_file.write(json.dumps({"features":features, "senti_dump_dict":sentiment_dump_dict,\ "cluster_dump_dict":cluster_dump_dict, "ratio":ratio_results, "sentiratio": sentiratio_results})) dump_file.close() return json.dumps({ "ratio": ratio_results, "sentiratio": sentiratio_results })
def comments_list(): if os.path.exists(temp_file): os.remove(temp_file) AB_PATH = os.path.join(os.path.dirname(os.path.abspath(__file__)), '../../public/') sys.path.append(AB_PATH) from comment_module import comments_calculation_v2 topicid = request.args.get('topicid', default_topic_id) subeventid = request.args.get('subeventid', 'global') min_cluster_num = request.args.get('min_cluster_num', default_min_cluster_num) max_cluster_num = request.args.get('max_cluster_num', default_max_cluster_num) cluster_eva_min_size = request.args.get('cluster_eva_min_size', default_cluster_eva_min_size) vsm = request.args.get('vsm', default_vsm) ec = EventComments(topicid) if subeventid == 'global': comments = ec.getAllNewsComments() else: comments = ec.getCommentsBySubeventid(subeventid) if not comments: return json.dumps({"status":"fail"}) cal_results = comments_calculation_v2(comments, int(min_cluster_num), int(max_cluster_num), int(cluster_eva_min_size), vsm) features = cal_results['cluster_infos']['features'] item_infos = cal_results['item_infos'] senti_dict = { 0:'中性', 1:'积极', 2:'愤怒', 3:'悲伤' } cluster_ratio = dict() senti_ratio = dict() sentiment_results = dict() cluster_results = dict() for comment in item_infos: if ('clusterid' in comment) and (comment['clusterid'][:8] != 'nonsense') : clusterid = comment['clusterid'] try: cluster_ratio[clusterid] += 1 except KeyError: cluster_ratio[clusterid] = 1 try: cluster_results[clusterid].append(comment) except KeyError: cluster_results[clusterid] = [comment] if ('sentiment' in comment) and (comment['sentiment'] in senti_dict) and ('clusterid' in comment) \ and (comment['clusterid'][:8] != 'nonsense'): sentiment = comment['sentiment'] try: senti_ratio[sentiment] += 1 except KeyError: senti_ratio[sentiment] = 1 try: sentiment_results[sentiment].append(comment) except KeyError: sentiment_results[sentiment] = [comment] ratio_results = dict() ratio_total_count = sum(cluster_ratio.values()) for clusterid, ratio in cluster_ratio.iteritems(): if clusterid in features: feature = features[clusterid] if feature and len(feature): ratio_results[','.join(feature[:3])] = float(ratio) / float(ratio_total_count) sentiratio_results = dict() sentiratio_total_count = sum(senti_ratio.values()) for sentiment, ratio in senti_ratio.iteritems(): if sentiment in senti_dict: label = senti_dict[sentiment] if label and len(label): sentiratio_results[label] = float(ratio) / float(sentiratio_total_count) # 情感分类去重 sentiment_dump_dict = dict() for sentiment, contents in sentiment_results.iteritems(): dump_dict = dict() for comment in contents: same_from_sentiment = comment["same_from_sentiment"] try: dump_dict[same_from_sentiment].append(comment) except KeyError: dump_dict[same_from_sentiment] = [comment] sentiment_dump_dict[sentiment] = dump_dict # 子观点分类去重 cluster_dump_dict = dict() for clusterid, contents in cluster_results.iteritems(): if clusterid in features: feature = features[clusterid] if feature and len(feature): dump_dict = dict() for comment in contents: same_from_cluster = comment["same_from"] try: dump_dict[same_from_cluster].append(comment) except KeyError: dump_dict[same_from_cluster] = [comment] cluster_dump_dict[clusterid] = dump_dict dump_file = open(temp_file, 'w') dump_file.write(json.dumps({"features":features, "senti_dump_dict":sentiment_dump_dict,\ "cluster_dump_dict":cluster_dump_dict})); dump_file.close(); return json.dumps({"ratio":ratio_results, "sentiratio":sentiratio_results,})
def comments_list(): """计算饼图数据,并将饼图数据和去重后的推荐文本写到文件 """ taskid = request.args.get('taskid', default_task_id) cluster_num = request.args.get('cluster_num', default_cluster_num) #若无此参数,取-1;否则取用户设定值 if cluster_num == default_cluster_num: cluster_num = -1 cluster_eva_min_size = request.args.get('cluster_eva_min_size', default_cluster_eva_min_size) vsm = request.args.get('vsm', default_vsm) calculation_label = int(request.args.get('calcu', 1)) # 默认进行重新计算, 0表示从从已有结果数据文件加载数据 params = {"taskid": taskid, "cluster_num": cluster_num, "cluster_eva_min_size": cluster_eva_min_size, \ "vsm": vsm, "calculation_label": calculation_label} task_result_file = os.path.join(RESULT_FOLDER, taskid) if os.path.exists(task_result_file) and calculation_label == 0: # 从已有数据文件加载结果集 with open(task_result_file) as dump_file: dump_dict = json.loads(dump_file.read()) ratio_results = dump_dict["ratio"] sentiratio_results = dump_dict["sentiratio"] before_filter_count = dump_dict["before_filter_count"] after_filter_count = dump_dict["after_filter_count"] return json.dumps({"ratio": ratio_results, "sentiratio": sentiratio_results, \ "before_filter_count": before_filter_count, "after_filter_count": after_filter_count}) comments = [] task_input_file = os.path.join(UPLOAD_FOLDER, taskid) if os.path.exists(task_input_file): with open(task_input_file) as f: for line in f: comments.append(json.loads(line.strip())) if not comments or not len(comments): return json.dumps({"status":"fail"}) logfile = os.path.join(LOG_FOLDER, taskid + '.log') cal_results = comments_calculation_v2(comments, logfile=logfile, cluster_num=cluster_num, \ cluster_eva_min_size=int(cluster_eva_min_size), version=vsm) features = cal_results['cluster_infos']['features'] item_infos = cal_results['item_infos'] cluster_ratio = dict() senti_ratio = dict() sentiment_results = dict() cluster_results = dict() rub_results = [] # 过滤前文本数 before_filter_count = len(item_infos) # 过滤后文本数 after_filter_count = 0 for comment in item_infos: if ('clusterid' in comment) and (comment['clusterid'][:8] != 'nonsense') : clusterid = comment['clusterid'] try: cluster_ratio[clusterid] += 1 except KeyError: cluster_ratio[clusterid] = 1 try: cluster_results[clusterid].append(comment) except KeyError: cluster_results[clusterid] = [comment] if ('sentiment' in comment) and (comment['sentiment'] in emotions_vk_v1) and ('clusterid' in comment) \ and (comment['clusterid'][:8] != 'nonsense'): sentiment = comment['sentiment'] try: senti_ratio[sentiment] += 1 except KeyError: senti_ratio[sentiment] = 1 try: sentiment_results[sentiment].append(comment) except KeyError: sentiment_results[sentiment] = [comment] after_filter_count += 1 if comment['clusterid'][:8] == 'nonsense': rub_results.append(comment) ratio_results = dict() ratio_total_count = sum(cluster_ratio.values()) for clusterid, ratio in cluster_ratio.iteritems(): if clusterid in features: feature = features[clusterid] if feature and len(feature): ratio_results[','.join(feature[:3])] = float(ratio) / float(ratio_total_count) sentiratio_results = dict() sentiratio_total_count = sum(senti_ratio.values()) for sentiment, ratio in senti_ratio.iteritems(): if sentiment in emotions_vk_v1: label = emotions_vk_v1[sentiment] if label and len(label): sentiratio_results[label] = float(ratio) / float(sentiratio_total_count) # 情感分类去重 sentiment_dump_dict = dict() for sentiment, contents in sentiment_results.iteritems(): dump_dict = dict() for comment in contents: same_from_sentiment = comment["same_from_sentiment"] try: dump_dict[same_from_sentiment].append(comment) except KeyError: dump_dict[same_from_sentiment] = [comment] sentiment_dump_dict[sentiment] = dump_dict # 子观点分类去重 cluster_dump_dict = dict() for clusterid, contents in cluster_results.iteritems(): if clusterid in features: feature = features[clusterid] if feature and len(feature): dump_dict = dict() for comment in contents: same_from_cluster = comment["same_from"] try: dump_dict[same_from_cluster].append(comment) except KeyError: dump_dict[same_from_cluster] = [comment] cluster_dump_dict[clusterid] = dump_dict dump_file = open(task_result_file, 'w') dump_file.write(json.dumps({"params": params, "features":features, "senti_dump_dict":sentiment_dump_dict, \ "cluster_dump_dict":cluster_dump_dict, "ratio":ratio_results, "sentiratio": sentiratio_results, \ "before_filter_count": before_filter_count, "after_filter_count": after_filter_count, \ "rub_results": rub_results})) dump_file.close() return json.dumps({"ratio": ratio_results, "sentiratio": sentiratio_results, \ "before_filter_count": before_filter_count, "after_filter_count": after_filter_count})
def comments_list(): """计算饼图数据,并将饼图数据和去重后的推荐文本写到文件 """ topicid = request.args.get('topicid', default_topic_id) subeventid = request.args.get('subeventid', 'global') cluster_num = request.args.get('cluster_num', default_cluster_num) if cluster_num == default_cluster_num: cluster_num = -1 cluster_eva_min_size = request.args.get('cluster_eva_min_size', default_cluster_eva_min_size) vsm = request.args.get('vsm', default_vsm) ec = EventComments(topicid) if subeventid == 'global': comments = ec.getAllNewsComments() else: comments = ec.getCommentsBySubeventid(subeventid) if not comments: return json.dumps({"status":"fail"}) cal_results = comments_calculation_v2(comments, cluster_num=int(cluster_num), \ cluster_eva_min_size=int(cluster_eva_min_size), version=vsm) features = cal_results['cluster_infos']['features'] item_infos = cal_results['item_infos'] cluster_ratio = dict() senti_ratio = dict() sentiment_results = dict() cluster_results = dict() for comment in item_infos: if ('clusterid' in comment) and (comment['clusterid'][:8] != 'nonsense') : clusterid = comment['clusterid'] try: cluster_ratio[clusterid] += 1 except KeyError: cluster_ratio[clusterid] = 1 try: cluster_results[clusterid].append(comment) except KeyError: cluster_results[clusterid] = [comment] if ('sentiment' in comment) and (comment['sentiment'] in emotions_vk_v1) and ('clusterid' in comment) \ and (comment['clusterid'][:8] != 'nonsense'): sentiment = comment['sentiment'] try: senti_ratio[sentiment] += 1 except KeyError: senti_ratio[sentiment] = 1 try: sentiment_results[sentiment].append(comment) except KeyError: sentiment_results[sentiment] = [comment] ratio_results = dict() ratio_total_count = sum(cluster_ratio.values()) for clusterid, ratio in cluster_ratio.iteritems(): if clusterid in features: feature = features[clusterid] if feature and len(feature): ratio_results[','.join(feature[:3])] = float(ratio) / float(ratio_total_count) sentiratio_results = dict() sentiratio_total_count = sum(senti_ratio.values()) for sentiment, ratio in senti_ratio.iteritems(): if sentiment in emotions_vk_v1: label = emotions_vk_v1[sentiment] if label and len(label): sentiratio_results[label] = float(ratio) / float(sentiratio_total_count) # 情感分类去重 sentiment_dump_dict = dict() for sentiment, contents in sentiment_results.iteritems(): dump_dict = dict() for comment in contents: same_from_sentiment = comment["same_from_sentiment"] try: dump_dict[same_from_sentiment].append(comment) except KeyError: dump_dict[same_from_sentiment] = [comment] sentiment_dump_dict[sentiment] = dump_dict # 子观点分类去重 cluster_dump_dict = dict() for clusterid, contents in cluster_results.iteritems(): if clusterid in features: feature = features[clusterid] if feature and len(feature): dump_dict = dict() for comment in contents: same_from_cluster = comment["same_from"] try: dump_dict[same_from_cluster].append(comment) except KeyError: dump_dict[same_from_cluster] = [comment] cluster_dump_dict[clusterid] = dump_dict dump_file = open(temp_file, 'w') dump_file.write(json.dumps({"features":features, "senti_dump_dict":sentiment_dump_dict,\ "cluster_dump_dict":cluster_dump_dict})); dump_file.close(); return json.dumps({"ratio":ratio_results, "sentiratio":sentiratio_results,})