def handler(event, context): dynamodb = boto3.resource('dynamodb') table_scores = dynamodb.Table('scores') table_articles = dynamodb.Table('articles') response_summary = table_articles.scan() data_articles = response_summary['Items'] df_articles = pd.DataFrame.from_dict(data_articles) #df_articles = df_articles[:3] table_summary = dynamodb.Table('summary') response_summary = table_summary.scan() data_summary = response_summary['Items'] df_summary = pd.DataFrame.from_dict(data_summary) #df_summary = df_summary[:3] print(df_articles) print('-----------') print((df_summary)) df = pd.merge(df_articles, df_summary, on='url') print('-----------') print(df) for index, row in df.iterrows(): c1 = row['summary_a'] c2 = row['summary_b'] r = row['article_text'] rouge = Rouge() a = rouge.get_scores(c1, r) score_a = a[0]['rouge-1']['f'] print('score a') print(score_a) b = rouge.get_scores(c2, r) score_b = b[0]['rouge-1']['f'] print('score b') print(score_b) if score_a > score_b: best = 'distilbert-base-uncased' else: best = 'bert-large-uncased' table_summary.update_item( Key={ 'url': row['url'], }, UpdateExpression= "set score_a = :p, score_b = :r, best_summarizer = :b", ExpressionAttributeValues={ ':p': str(score_a), ':r': str(score_b), ':b': best }, ) return {'statusCode': 200, 'body': json.dumps('Scores Calculated')}
def rougeScoreExcludeStopWords(hyp_str, ref_str): """ 返回两个句子去除stop word 后的 :param hyp_str: 原文对应的句子 :param ref_str: SR中的PIO句子 :return: """ rouge = Rouge() hyp_str = excludeStopWords(hyp_str) ref_str = excludeStopWords(ref_str) return rouge.get_scores(hyp_str, ref_str)
def evaluate(self, article, ref): dec = self.abstract(article) scores = rouge.get_scores(dec, ref) rouge_1 = sum([x["rouge-1"]["f"] for x in scores]) / len(scores) rouge_2 = sum([x["rouge-2"]["f"] for x in scores]) / len(scores) rouge_l = sum([x["rouge-l"]["f"] for x in scores]) / len(scores) return { 'dec': dec, 'rouge_1': rouge_1, 'rouge_2': rouge_2, 'rouge_l': rouge_l }
def evaluate_batch(self, article): self.setup_valid() batch = self.batcher.next_batch() start_id = self.vocab.word2id(data.START_DECODING) end_id = self.vocab.word2id(data.STOP_DECODING) unk_id = self.vocab.word2id(data.UNKNOWN_TOKEN) decoded_sents = [] ref_sents = [] article_sents = [] rouge = Rouge() while batch is not None: enc_batch, enc_lens, enc_padding_mask, enc_batch_extend_vocab, extra_zeros, ct_e = get_enc_data( batch) with T.autograd.no_grad(): enc_batch = self.model.embeds(enc_batch) enc_out, enc_hidden = self.model.encoder(enc_batch, enc_lens) #-----------------------Summarization---------------------------------------------------- with T.autograd.no_grad(): pred_ids = beam_search(enc_hidden, enc_out, enc_padding_mask, ct_e, extra_zeros, enc_batch_extend_vocab, self.model, start_id, end_id, unk_id) for i in range(len(pred_ids)): decoded_words = data.outputids2words(pred_ids[i], self.vocab, batch.art_oovs[i]) if len(decoded_words) < 2: decoded_words = "xxx" else: decoded_words = " ".join(decoded_words) decoded_sents.append(decoded_words) abstract = batch.original_abstracts[i] article = batch.original_articles[i] ref_sents.append(abstract) article_sents.append(article) batch = self.batcher.next_batch() load_file = self.opt.load_model if article: self.print_original_predicted(decoded_sents, ref_sents, article_sents, load_file) scores = rouge.get_scores(decoded_sents, ref_sents) rouge_1 = sum([x["rouge-1"]["f"] for x in scores]) / len(scores) rouge_2 = sum([x["rouge-2"]["f"] for x in scores]) / len(scores) rouge_l = sum([x["rouge-l"]["f"] for x in scores]) / len(scores) logger.info(load_file + " rouge_1:" + "%.4f" % rouge_1 + " rouge_2:" + "%.4f" % rouge_2 + " rouge_l:" + "%.4f" % rouge_l)
def compute_rouge(source, target): """计算rouge-1、rouge-2、rouge-l """ source, target = ' '.join(source), ' '.join(target) try: scores = rouge.get_scores(hyps=source, refs=target) return { 'rouge-1': scores[0]['rouge-1']['f'], 'rouge-2': scores[0]['rouge-2']['f'], 'rouge-l': scores[0]['rouge-l']['f'], } except ValueError: return { 'rouge-1': 0.0, 'rouge-2': 0.0, 'rouge-l': 0.0, }
def rougeScoreExcludeStopWords(hyp_str,ref_str): ''' 返回两个句子去除stop word 后的 :param hyp_str: 原文对应的句子 :param ref_str: SR中的PIO句子 :return: ''' rouge = Rouge() hyp_str = " ".join([word for word in hyp_str.translate(str.maketrans('', '', string.punctuation)).split() if word not in stopwords.words('english') ]) # print(hyp_str) if not hyp_str.strip(): hyp_str = hyp_str + '*' ref_str = " ".join([word for word in ref_str.translate(str.maketrans('', '', string.punctuation)).split() if word not in stopwords.words('english') ]) # print(ref_str) if not ref_str.strip(): ref_str = ref_str + '*' return rouge.get_scores(hyp_str,ref_str)
"Ignoring file " + file + " due to parse errors or short length.") continue try: sentences_vec = common.model.embed_sentences(sentences) except TypeError: common.log_message( "ERROR", "Ignoring file " + file + " due to embedding error.") continue Y_rouge_list = [] # Compute ROUGE score for each sentence for sentence, sentence_vec in zip(sentences, sentences_vec): try: rouge_str = rouge.get_scores(summary, sentence) except ValueError: common.log_message("ERROR", str(file) + " -- Error computing ROUGE of sentence " +\ sentence) continue # Y_rouge_list.append(common.rouge_to_list(rouge_str)[0][2]) Y_rouge_list.append(common.rouge_to_list(rouge_str)) Y_list.append(Y_rouge_list) Y_idx_list.append(np.argmax(Y_rouge_list, axis=0)[0][0]) Y_rouge_list = [] X_list.append(sentences_vec) file_list_save.append(file) files_counter += 1
def compute_scores(start, end, context, a1, a2): score = 0.0 for i in range(0, len(start)): predicted_span = ' '.join(context[i][start[i]:end[i]+1]) score += max(rouge.get_scores(predicted_span, a1[i])[0]['rouge-l']['f'], rouge.get_scores(predicted_span, a2[i])[0]['rouge-l']['f']) return score
def generate_summary(filename): global global_offset_mean sentences_text, sentences_vec, ground_truth_text, ground_truth_vec = preprocess_text(filename) stat = [] global top_n_counter # print("Sentences = ", sentences_text) # print("Sentences vec = ", sentences_vec) # print("Ground truth = ", ground_truth_text) # print("Ground truth vec = ", ground_truth_vec) if len(sentences_text) < 10: return None text_mean_vec = np.mean(sentences_vec, axis=0) # Compute text mean without ground-truth if parser.use_ground_truth == "True": text_mean_diff_vec = np.subtract(text_mean_vec, ground_truth_vec) else: text_mean_diff_vec = text_mean_vec if parser.mean_ground_truth == "True": text_mean_diff_vec = np.subtract(text_mean_vec, global_offset_mean) sentence_idx = 0 for sentence_vec in sentences_vec: # Compute sentence distance from text text mean sentence_vec_dist = np.linalg.norm((text_mean_vec, np.add(text_mean_diff_vec[0], sentence_vec))) # Compute ROUGE scores for sentences try: rouge_str = rouge.get_scores(ground_truth_text, sentences_text[sentence_idx]) except ValueError: stat.append([sentence_idx, sentence_vec_dist, [[0, 0, 0], [0, 0, 0], [0, 0, 0]]]) sentence_idx += 1 continue stat.append([sentence_idx, sentence_vec_dist, rouge_to_list(rouge_str)]) sentence_idx += 1 # Compute vector summary stat.sort(key=lambda x: x[1]) sentences_vector_idx = [] best_vector_str = "" for sentence in stat: if len(best_vector_str) > int(parser.max_summary_length): break # print("sentence idx = ", sentence[0]) # print("vector str = ", best_vector_str) best_vector_str += sentences_text[sentence[0]] + ". " sentences_vector_idx.append(sentence[0]) # print("Vector final = ", best_vector_str) # idx = 0 # for sentence in sentences_text: # print(idx, sentence) # idx += 1 # Compute ROUGE summary stat.sort(key=lambda x: x[2][0], reverse=True) sentences_rouge_idx = [] best_rouge_str = "" for sentence in stat: if len(best_rouge_str) > int(parser.max_summary_length): break best_rouge_str += sentences_text[sentence[0]] + ". " sentences_rouge_idx.append(sentence[0]) log_file.write("\n\n-----------------------------------------------------------------\n") log_file.write("Processing file " + str(filename) + "\n") log_file.write("Best vector indexes = " + str(sentences_vector_idx) + "\n") try: log_file.write("ROUGE Scores = " + str(rouge_to_list(rouge.get_scores(ground_truth_text, best_vector_str))) + "\n") except ValueError: log_file_error.write("[" + filename + "] Error computing ROUGE score for summary vector\n") log_file.write("Vector summary = " + str(best_vector_str) + "\n") log_file.write("Best ROUGE indexes " + str(sentences_rouge_idx) + "\n") log_file.write("ROUGE summary = " + str(best_rouge_str) + "\n") try: log_file.write("ROUGE Scores = " + str(rouge_to_list(rouge.get_scores(ground_truth_text, best_rouge_str))) + "\n") except ValueError: log_file_error.write("[" + filename + "] Error computing ROUGE score for ROUGE vector\n") log_file.write("Ground truth summary = " + ground_truth_text); for sentence_idx in sentences_vector_idx: for idx in range(0, top_n): if sentence_idx in sentences_rouge_idx[:idx]: top_n_counter[idx] += 1 try: stat_vec = rouge_to_list(rouge.get_scores(ground_truth_text, best_vector_str)) except: stat_vec = [[0, 0, 0], [0, 0, 0], [0, 0, 0]] try: stat_rouge = rouge_to_list(rouge.get_scores(ground_truth_text, best_rouge_str)) except: stat_rouge = [[0, 0, 0], [0, 0, 0], [0, 0, 0]] stats_vec.append(stat_vec) stats_rouge.append(stat_rouge)
def calculateRouge(hyp_Path, ref_Path,option): ''' 计算PIO信息与原文中每个句子的相似度 :param hyp_Path:原文所在文件夹 :param ref_Path:SR中PIO存储的json文件 :option {'textOriginal','textStem','textExcludeStopWord'} :return: 结果存储在原文所在文件的sheet2中 ''' with open(ref_Path, 'r') as load_f: pio_json = json.load(load_f) for pio in pio_json['content']: title = pio['Title'] #PIO信息分别为, pio['Participants'] pio['Interventions'] pio['Outcomes'] exist = False #标记pio参考文献在原文文件夹中是否存在 year = 2001 if len(title.split(' ')) <2 : year = 2001 if len(title.split(' ')) >2: year = 2001 if len(title.split(' ')) == 2: year = int(title.split(' ')[1][0:4]) if year >= 2000: #去掉2000年前的论文 path_list = os.listdir(hyp_Path) for filename in path_list: #在文件夹中查找与该参考文献对应的原文标题 str = filename.split('_') if str[0] == pio['Title'] and filename.endswith('.xls'): exist = True break if exist == True: #标记pio参考文献在原文文件夹中存在 rd = xlrd.open_workbook(hyp_Path+'/'+filename) sheet = rd.sheet_by_index(0) #原文所在表格 nrows = sheet.nrows # ncols = sheet.ncols wb = copy(rd) try: sheet1 = wb.get_sheet(1) # sheet1.write(range(0,nrows+1),range(0,27),'') except Exception as err: sheet1 = wb.add_sheet('ROUGE Matrix', cell_overwrite_ok=True) # 增加一个工作表,记录ROUGE矩阵 sheet1.write_merge(0, 0, 1, 9, 'P{rouge-1[f,p,r]}{rouge-2[f,p,r]}{rouge-l[f,p,r]}') sheet1.write_merge(0, 0, 10, 18, 'I{rouge-1[f,p,r]}{rouge-2[f,p,r]}{rouge-l[f,p,r]}') sheet1.write_merge(0, 0, 19, 27, 'O{rouge-1[f,p,r]}{rouge-2[f,p,r]}{rouge-l[f,p,r]}') rouge = Rouge() for i in range(0,nrows): sheet1.write(i+1, 0, i+1) tempStr = bytes.decode(sheet.cell(i, 0).value.encode('utf-8')) # textOriginal.append(tempStr) #存储原始文本 # textExcludeStopWord.append(excludeStopWords(tempStr)) #原始文本去除stop words # textStem.append(lancaster_stemmer.stem(tempStr)) #原始文本进行词干提取 textOriginal = tempStr textExcludeStopWord = excludeStopWords(tempStr) textStem = lancaster_stemmer.stem(tempStr) if option == 'textOriginal': #原文本与PIO相似度 score_p = rouge.get_scores(textOriginal, pio['Participants']) score_i = rouge.get_scores(textOriginal, pio['Interventions']) score_o = rouge.get_scores(textOriginal, pio['Outcomes']) if option == 'textStem': #提取词干后 文本与PIO相似度 score_p = rouge.get_scores(textStem, lancaster_stemmer.stem(pio['Participants'])) score_i = rouge.get_scores(textStem, lancaster_stemmer.stem(pio['Interventions'])) score_o = rouge.get_scores(textStem, lancaster_stemmer.stem(pio['Outcomes'])) if option == 'textExcludeStopWord': # 去除stop words后 文本与PIO相似度 score_p = rouge.get_scores(textExcludeStopWord, excludeStopWords(pio['Participants'])) score_i = rouge.get_scores(textExcludeStopWord, excludeStopWords(pio['Interventions'])) score_o = rouge.get_scores(textExcludeStopWord, excludeStopWords(pio['Outcomes'])) writeRouge(i + 1, 1, score_p, sheet1) writeRouge(i + 1, 10, score_i, sheet1) writeRouge(i + 1, 19, score_o, sheet1) if not os.path.exists(hyp_Path+'/'+option): os.makedirs(hyp_Path+'/'+option) wb.save(hyp_Path+'/'+option+'/'+filename.split('_')[0]+'_'+option+'.xls') print(filename + ' ROUGE Matrix has generated')
total_loss+=loss[0].item() optimizer.step() if count ==100: print("batch : ",count," loss : ",loss[0].iem()) count+=1 print(epoch," epoch loss = ",total_loss/count) ''' model.load_state_dict(torch.load("nlp_final.pt")) model.eval() pred_summaries, test_summaries = [], [] for input_ids_e, attention_mask_e, input_ids_d, attention_mask_d in test_loader: generated = model.generate( input_ids_e.to(device), do_sample=True, top_k=0, decoder_start_token_id=model.config.decoder.pad_token_id, max_length=121) output = tokenizer.decode(generated[0], skip_special_tokens=True) pred_summaries.append(output) output = tokenizer.decode(input_ids_d[0], skip_special_tokens=True) test_summaries.append(output) import rouge from rouge import Rouge rouge = Rouge() scores = rouge.get_scores(pred_summaries, test_summaries, avg=True) print("scores = ", scores)
def process_example(filename): text = open(parser.dataset_dir + "/" + filename).read() sentences, ground_truth = preprocess_text(text) if len(sentences) < 30: return [], None, None sentences_vec = sent2vec_model.embed_sentences(sentences) ground_truth_vec = sent2vec_model.embed_sentence(ground_truth) # Compute mean vector of whole text text_mean_vec = np.mean(sentences_vec, axis=0) # Extract ground-truth vector from whole text text_mean_diff_vec = np.subtract(text_mean_vec, ground_truth_vec) sentence_idx = 0 sentences_dist = [] for sentence_vec in sentences_vec: sentence_vec_dist = np.linalg.norm((text_mean_vec, np.add(sentence_vec, text_mean_diff_vec[0]))) try: rouge_str = rouge.get_scores(ground_truth, sentences[sentence_idx]) except ValueError: sentence_idx += 1 continue sentences_dist.append([sentence_idx, sentence_vec_dist, rouge_to_list(rouge_str)]) sentence_idx += 1 if len(sentences_dist) == 0: return [], None, None # Sort sentences based on closest vector distance sentences_dist.sort(key=lambda x: x[1]) best_vector_idx = sentences_dist[0][0] sentences_vector_idx = [] best_vector_str = "" for sentence in sentences_dist: if len(best_vector_str) > 200: break best_vector_str += sentences[sentence[0]] + ". " sentences_vector_idx.append(sentence[0]) # Sort based on best ROUGE-1 score sentences_dist.sort(key=lambda x: x[2][0], reverse=True) best_rouge_idx = sentences_dist[0][0] sentences_rouge_idx = [] best_rouge_str = "" for sentence in sentences_dist: if len(best_rouge_str) > int(parser.max_summary_length): break best_rouge_str += sentences[sentence[0]] + ". " sentences_rouge_idx.append(sentence[0]) log_file = open(parser.log_file_name, "a") log_file.write("\n\nProcessing " + filename) log_file.write("\n* GROUND TRUTH = " + ground_truth) log_file.write("\n* BEST VECTOR SUMMARY = " + best_vector_str) log_file.write("\n* BEST ROUGE SUMMARY = " + best_rouge_str) log_file.write("\n* ROUGE VECTOR SCORES = " + str(rouge_to_list(rouge.get_scores(ground_truth, best_vector_str)))) log_file.write("\n* ROUGE BEST SCORES = " + str(rouge_to_list(rouge.get_scores(ground_truth, best_rouge_str)))) log_file.close() best_vector_vec = sent2vec_model.embed_sentence(best_vector_str) if parser.generate_tsne == "True": sentences_vec_tsne = np.vstack((sentences_vec, text_mean_vec, ground_truth_vec, best_vector_vec)) first_sentence = True U, s, Vh = np.linalg.svd(sentences_vec_tsne, full_matrices=False) for i in range(len(sentences_vec)): fig = plt.gcf() fig.set_size_inches(5, 5) if first_sentence == True: plt.plot(U[i, 0], U[i, 1], 'go', label='sentence', markersize=16) first_sentence = False else: plt.plot(U[i, 0], U[i, 1], 'go', markersize=16) # Text mean plt.plot(U[len(sentences_vec), 0], U[len(sentences_vec), 1], 'bs', label='text mean', markersize=16) plt.plot(U[len(sentences_vec) + 1, 0], U[len(sentences_vec) + 1, 1], 'r^', label='ground truth', markersize=14) # Plot vector selected sentences first_sentence = True for i in sentences_vector_idx: if first_sentence == True: plt.plot(U[i, 0], U[i, 1], 'm+', label='vector selected sentences', markersize=24) first_sentence = False else: plt.plot(U[i, 0], U[i, 1], 'm+', markersize=24) # Plot best rouge selected sentences first_sentence = True for i in sentences_rouge_idx: if first_sentence == True: plt.plot(U[i, 0], U[i, 1], 'yx', label='rouge selected sentences', markersize=24) first_sentence = False else: plt.plot(U[i, 0], U[i, 1], 'yx', markersize=24) # Save TSNE file plt.xlim((-1, 1)) plt.ylim((-1, 1)) legend = plt.legend(loc='upper center', bbox_to_anchor=(0.5,-0.05), ncol=4, prop={'size': 6}) for leg_handle in legend.legendHandles: leg_handle._legmarker.set_markersize(6) plt.savefig("tsne/" + filename + ".png", format="png") plt.clf() return [rouge_to_list(rouge.get_scores(ground_truth, best_vector_str)), rouge_to_list(rouge.get_scores(ground_truth, best_rouge_str)) ], best_vector_str, best_rouge_str
sentences_vec = common.model.embed_sentences(sentences) text_mean_vec = np.mean(sentences_vec, axis=0) # Extract ground-truth vector from summary text_mean_diff_vec = np.subtract(text_mean_vec, summary_vec) sentences_dist = [] sentence_idx = 0 # For each sentence, compute offset in relation to ground-truth summary for sentence_vec in sentences_vec: sentence_vec_dist = np.linalg.norm( (text_mean_vec, np.add(sentence_vec, text_mean_diff_vec))) try: rouge_str = rouge.get_scores(summary, sentences[sentence_idx]) except ValueError: # Ignore sentences witch is not possible to compute ROUGE scores common.log_message("ERROR", str(file) + " -- Error computing ROUGE of sentence " +\ sentences[sentence_idx]) sentence_idx += 1 continue # Add sentences and statistis for further classification sentences_dist.append([file, sentence_idx, len(sentences[sentence_idx]), sentence_vec_dist, \ common.rouge_to_list(rouge_str)]) sentence_idx += 1 # Sort sentences list based on closest vector distance and shortest sentence sentences_dist.sort(key=lambda x: (x[3], x[2]))
def decode(self): start = time.time() counter = 0 batch = self.batcher.next_batch() decoded_result = [] refered_result = [] article_result = [] while batch is not None: # Run beam search to get best Hypothesis best_summary = self.beam_search(batch) # Extract the output ids from the hypothesis and convert back to words output_ids = [int(t) for t in best_summary.tokens[1:]] decoded_words = data.outputids2words( output_ids, self.vocab, (batch.art_oovs[0] if config.pointer_gen else None)) # Remove the [STOP] token from decoded_words, if necessary try: fst_stop_idx = decoded_words.index(data.STOP_DECODING) decoded_words = decoded_words[:fst_stop_idx] except ValueError: decoded_words = decoded_words original_abstract_sents = batch.original_abstracts_sents[0] article = batch.original_articles[0] #write_for_rouge(original_abstract_sents, decoded_words, counter, # self._rouge_ref_dir, self._rouge_dec_dir) decoded_sents = [] while len(decoded_words) > 0: try: fst_period_idx = decoded_words.index(".") except ValueError: fst_period_idx = len(decoded_words) sent = decoded_words[:fst_period_idx + 1] decoded_words = decoded_words[fst_period_idx + 1:] decoded_sents.append(' '.join(sent)) # pyrouge calls a perl script that puts the data into HTML files. # Therefore we need to make our output HTML safe. decoded_sents = [make_html_safe(w) for w in decoded_sents] reference_sents = [ make_html_safe(w) for w in original_abstract_sents ] decoded_result.append(' '.join(decoded_sents)) refered_result.append(' '.join(reference_sents)) article_result.append(article) counter += 1 if counter % 1000 == 0: print('%d example in %d sec' % (counter, time.time() - start)) start = time.time() batch = self.batcher.next_batch() print("Decoder has finished reading dataset for single_pass.") print("Now starting ROUGE eval...") load_file = self.model_path_name self.print_original_predicted(decoded_result, refered_result, article_result, load_file) rouge = Rouge() scores = rouge.get_scores(decoded_result, refered_result) rouge_1 = sum([x["rouge-1"]["f"] for x in scores]) / len(scores) rouge_2 = sum([x["rouge-2"]["f"] for x in scores]) / len(scores) rouge_l = sum([x["rouge-l"]["f"] for x in scores]) / len(scores) rouge_1_r = sum([x["rouge-1"]["r"] for x in scores]) / len(scores) rouge_2_r = sum([x["rouge-2"]["r"] for x in scores]) / len(scores) rouge_l_r = sum([x["rouge-l"]["r"] for x in scores]) / len(scores) rouge_1_p = sum([x["rouge-1"]["p"] for x in scores]) / len(scores) rouge_2_p = sum([x["rouge-2"]["p"] for x in scores]) / len(scores) rouge_l_p = sum([x["rouge-l"]["p"] for x in scores]) / len(scores) log_str = " rouge_1:" + "%.4f" % rouge_1 + " rouge_2:" + "%.4f" % rouge_2 + " rouge_l:" + "%.4f" % rouge_l log_str_r = " rouge_1_r:" + "%.4f" % rouge_1_r + " rouge_2_r:" + "%.4f" % rouge_2_r + " rouge_l_r:" + "%.4f" % rouge_l_r logger.info(load_file + " rouge_1:" + "%.4f" % rouge_1 + " rouge_2:" + "%.4f" % rouge_2 + " rouge_l:" + "%.4f" % rouge_l) log_str_p = " rouge_1_p:" + "%.4f" % rouge_1_p + " rouge_2_p:" + "%.4f" % rouge_2_p + " rouge_l_p:" + "%.4f" % rouge_l_p results_file = os.path.join(self._decode_dir, "ROUGE_results.txt") with open(results_file, "w") as f: f.write(log_str + '\n') f.write(log_str_r + '\n') f.write(log_str_p + '\n')