def handler(event, context):

    dynamodb = boto3.resource('dynamodb')
    table_scores = dynamodb.Table('scores')

    table_articles = dynamodb.Table('articles')
    response_summary = table_articles.scan()
    data_articles = response_summary['Items']
    df_articles = pd.DataFrame.from_dict(data_articles)
    #df_articles = df_articles[:3]

    table_summary = dynamodb.Table('summary')
    response_summary = table_summary.scan()
    data_summary = response_summary['Items']
    df_summary = pd.DataFrame.from_dict(data_summary)
    #df_summary = df_summary[:3]

    print(df_articles)
    print('-----------')
    print((df_summary))
    df = pd.merge(df_articles, df_summary, on='url')
    print('-----------')
    print(df)

    for index, row in df.iterrows():
        c1 = row['summary_a']
        c2 = row['summary_b']
        r = row['article_text']
        rouge = Rouge()
        a = rouge.get_scores(c1, r)
        score_a = a[0]['rouge-1']['f']
        print('score a')
        print(score_a)

        b = rouge.get_scores(c2, r)
        score_b = b[0]['rouge-1']['f']
        print('score b')
        print(score_b)

        if score_a > score_b:
            best = 'distilbert-base-uncased'
        else:
            best = 'bert-large-uncased'

        table_summary.update_item(
            Key={
                'url': row['url'],
            },
            UpdateExpression=
            "set score_a = :p, score_b = :r, best_summarizer = :b",
            ExpressionAttributeValues={
                ':p': str(score_a),
                ':r': str(score_b),
                ':b': best
            },
        )

    return {'statusCode': 200, 'body': json.dumps('Scores Calculated')}
Esempio n. 2
0
def rougeScoreExcludeStopWords(hyp_str, ref_str):
    """
    返回两个句子去除stop word 后的
    :param hyp_str: 原文对应的句子
    :param ref_str: SR中的PIO句子
    :return:
    """
    rouge = Rouge()
    hyp_str = excludeStopWords(hyp_str)
    ref_str = excludeStopWords(ref_str)
    return rouge.get_scores(hyp_str, ref_str)
Esempio n. 3
0
 def evaluate(self, article, ref):
     dec = self.abstract(article)
     scores = rouge.get_scores(dec, ref)
     rouge_1 = sum([x["rouge-1"]["f"] for x in scores]) / len(scores)
     rouge_2 = sum([x["rouge-2"]["f"] for x in scores]) / len(scores)
     rouge_l = sum([x["rouge-l"]["f"] for x in scores]) / len(scores)
     return {
         'dec': dec,
         'rouge_1': rouge_1,
         'rouge_2': rouge_2,
         'rouge_l': rouge_l
     }
Esempio n. 4
0
    def evaluate_batch(self, article):

        self.setup_valid()
        batch = self.batcher.next_batch()
        start_id = self.vocab.word2id(data.START_DECODING)
        end_id = self.vocab.word2id(data.STOP_DECODING)
        unk_id = self.vocab.word2id(data.UNKNOWN_TOKEN)
        decoded_sents = []
        ref_sents = []
        article_sents = []
        rouge = Rouge()
        while batch is not None:
            enc_batch, enc_lens, enc_padding_mask, enc_batch_extend_vocab, extra_zeros, ct_e = get_enc_data(
                batch)
            with T.autograd.no_grad():
                enc_batch = self.model.embeds(enc_batch)
                enc_out, enc_hidden = self.model.encoder(enc_batch, enc_lens)

            #-----------------------Summarization----------------------------------------------------
            with T.autograd.no_grad():
                pred_ids = beam_search(enc_hidden, enc_out, enc_padding_mask,
                                       ct_e, extra_zeros,
                                       enc_batch_extend_vocab, self.model,
                                       start_id, end_id, unk_id)

            for i in range(len(pred_ids)):
                decoded_words = data.outputids2words(pred_ids[i], self.vocab,
                                                     batch.art_oovs[i])
                if len(decoded_words) < 2:
                    decoded_words = "xxx"
                else:
                    decoded_words = " ".join(decoded_words)
                decoded_sents.append(decoded_words)
                abstract = batch.original_abstracts[i]
                article = batch.original_articles[i]
                ref_sents.append(abstract)
                article_sents.append(article)

            batch = self.batcher.next_batch()

        load_file = self.opt.load_model

        if article:
            self.print_original_predicted(decoded_sents, ref_sents,
                                          article_sents, load_file)

        scores = rouge.get_scores(decoded_sents, ref_sents)
        rouge_1 = sum([x["rouge-1"]["f"] for x in scores]) / len(scores)
        rouge_2 = sum([x["rouge-2"]["f"] for x in scores]) / len(scores)
        rouge_l = sum([x["rouge-l"]["f"] for x in scores]) / len(scores)
        logger.info(load_file + " rouge_1:" + "%.4f" % rouge_1 + " rouge_2:" +
                    "%.4f" % rouge_2 + " rouge_l:" + "%.4f" % rouge_l)
Esempio n. 5
0
def compute_rouge(source, target):
    """计算rouge-1、rouge-2、rouge-l
    """
    source, target = ' '.join(source), ' '.join(target)
    try:
        scores = rouge.get_scores(hyps=source, refs=target)
        return {
            'rouge-1': scores[0]['rouge-1']['f'],
            'rouge-2': scores[0]['rouge-2']['f'],
            'rouge-l': scores[0]['rouge-l']['f'],
        }
    except ValueError:
        return {
            'rouge-1': 0.0,
            'rouge-2': 0.0,
            'rouge-l': 0.0,
        }
Esempio n. 6
0
def rougeScoreExcludeStopWords(hyp_str,ref_str):
    '''
    返回两个句子去除stop word 后的
    :param hyp_str: 原文对应的句子
    :param ref_str: SR中的PIO句子
    :return:
    '''
    rouge = Rouge()
    hyp_str = " ".join([word for word in hyp_str.translate(str.maketrans('', '', string.punctuation)).split()
                        if word not in stopwords.words('english') ])
    # print(hyp_str)
    if not hyp_str.strip():
        hyp_str = hyp_str + '*'
    ref_str = " ".join([word for word in ref_str.translate(str.maketrans('', '', string.punctuation)).split()
                        if word not in stopwords.words('english') ])
    # print(ref_str)
    if not ref_str.strip():
        ref_str = ref_str + '*'
    return rouge.get_scores(hyp_str,ref_str)
Esempio n. 7
0
            "Ignoring file " + file + " due to parse errors or short length.")
        continue

    try:
        sentences_vec = common.model.embed_sentences(sentences)
    except TypeError:
        common.log_message(
            "ERROR", "Ignoring file " + file + " due to embedding error.")
        continue

    Y_rouge_list = []

    # Compute ROUGE score for each sentence
    for sentence, sentence_vec in zip(sentences, sentences_vec):
        try:
            rouge_str = rouge.get_scores(summary, sentence)
        except ValueError:
            common.log_message("ERROR", str(file) + "  -- Error computing ROUGE of sentence " +\
                                sentence)
            continue

        # Y_rouge_list.append(common.rouge_to_list(rouge_str)[0][2])
        Y_rouge_list.append(common.rouge_to_list(rouge_str))

    Y_list.append(Y_rouge_list)
    Y_idx_list.append(np.argmax(Y_rouge_list, axis=0)[0][0])
    Y_rouge_list = []
    X_list.append(sentences_vec)
    file_list_save.append(file)

    files_counter += 1
Esempio n. 8
0
def compute_scores(start, end, context, a1, a2):
    score = 0.0
    for i in range(0, len(start)):
        predicted_span = ' '.join(context[i][start[i]:end[i]+1])
        score += max(rouge.get_scores(predicted_span, a1[i])[0]['rouge-l']['f'], rouge.get_scores(predicted_span, a2[i])[0]['rouge-l']['f'])
    return score
def generate_summary(filename):
    global global_offset_mean
    sentences_text, sentences_vec, ground_truth_text, ground_truth_vec = preprocess_text(filename)
    stat = []
    global top_n_counter

    # print("Sentences = ", sentences_text)
    # print("Sentences vec = ", sentences_vec)
    # print("Ground truth = ", ground_truth_text)
    # print("Ground truth vec = ", ground_truth_vec)

    if len(sentences_text) < 10:
        return None

    text_mean_vec = np.mean(sentences_vec, axis=0)

    # Compute text mean without ground-truth
    if parser.use_ground_truth == "True":
        text_mean_diff_vec = np.subtract(text_mean_vec, ground_truth_vec)
    else:
        text_mean_diff_vec = text_mean_vec

    if parser.mean_ground_truth == "True":
        text_mean_diff_vec = np.subtract(text_mean_vec, global_offset_mean)

    sentence_idx = 0
    for sentence_vec in sentences_vec:
        # Compute sentence distance from text text mean
        sentence_vec_dist = np.linalg.norm((text_mean_vec, np.add(text_mean_diff_vec[0], sentence_vec)))

        # Compute ROUGE scores for sentences
        try:
            rouge_str = rouge.get_scores(ground_truth_text, sentences_text[sentence_idx])
        except ValueError:
            stat.append([sentence_idx, sentence_vec_dist, [[0, 0, 0], [0, 0, 0], [0, 0, 0]]])
            sentence_idx += 1
            continue

        stat.append([sentence_idx, sentence_vec_dist, rouge_to_list(rouge_str)])
        sentence_idx += 1

    # Compute vector summary
    stat.sort(key=lambda x: x[1])
    sentences_vector_idx = []
    best_vector_str = ""

    for sentence in stat:
        if len(best_vector_str) > int(parser.max_summary_length):
            break
        # print("sentence idx = ", sentence[0])
        # print("vector str = ", best_vector_str)
        best_vector_str += sentences_text[sentence[0]] + ". "
        sentences_vector_idx.append(sentence[0])

    # print("Vector final = ", best_vector_str)

    # idx = 0
    # for sentence in sentences_text:
    #     print(idx, sentence)
    #     idx += 1

    # Compute ROUGE summary
    stat.sort(key=lambda x: x[2][0], reverse=True)
    sentences_rouge_idx = []
    best_rouge_str = ""

    for sentence in stat:
        if len(best_rouge_str) > int(parser.max_summary_length):
            break
        best_rouge_str += sentences_text[sentence[0]] + ". "
        sentences_rouge_idx.append(sentence[0])

    log_file.write("\n\n-----------------------------------------------------------------\n")
    log_file.write("Processing file " + str(filename) + "\n")
    log_file.write("Best vector indexes = " + str(sentences_vector_idx) + "\n")
    try:
        log_file.write("ROUGE Scores = " + str(rouge_to_list(rouge.get_scores(ground_truth_text, best_vector_str))) + "\n")
    except ValueError:
        log_file_error.write("[" + filename + "] Error computing ROUGE score for summary vector\n")

    log_file.write("Vector summary = " + str(best_vector_str) + "\n")
    log_file.write("Best ROUGE indexes " + str(sentences_rouge_idx) + "\n")
    log_file.write("ROUGE summary = " + str(best_rouge_str) + "\n")
    try:
        log_file.write("ROUGE Scores = " + str(rouge_to_list(rouge.get_scores(ground_truth_text, best_rouge_str))) + "\n")
    except ValueError:
        log_file_error.write("[" + filename + "] Error computing ROUGE score for ROUGE vector\n")

    log_file.write("Ground truth summary = " + ground_truth_text);

    for sentence_idx in sentences_vector_idx:
        for idx in range(0, top_n):
            if sentence_idx in sentences_rouge_idx[:idx]:
                top_n_counter[idx] += 1

    try:
        stat_vec = rouge_to_list(rouge.get_scores(ground_truth_text, best_vector_str))
    except:
        stat_vec = [[0, 0, 0], [0, 0, 0], [0, 0, 0]]

    try:
        stat_rouge = rouge_to_list(rouge.get_scores(ground_truth_text, best_rouge_str))
    except:
        stat_rouge = [[0, 0, 0], [0, 0, 0], [0, 0, 0]]

    stats_vec.append(stat_vec)
    stats_rouge.append(stat_rouge)
Esempio n. 10
0
def calculateRouge(hyp_Path, ref_Path,option):
    '''
    计算PIO信息与原文中每个句子的相似度
    :param hyp_Path:原文所在文件夹
    :param ref_Path:SR中PIO存储的json文件
    :option {'textOriginal','textStem','textExcludeStopWord'}
    :return: 结果存储在原文所在文件的sheet2中
    '''
    with open(ref_Path, 'r') as load_f:
        pio_json = json.load(load_f)
    for pio in pio_json['content']:
        title = pio['Title']
        #PIO信息分别为, pio['Participants']  pio['Interventions'] pio['Outcomes']
        exist = False #标记pio参考文献在原文文件夹中是否存在
        year = 2001
        if len(title.split(' ')) <2 :
            year = 2001
        if len(title.split(' ')) >2:
            year = 2001
        if len(title.split(' ')) == 2:
            year = int(title.split(' ')[1][0:4])
        if year >= 2000:  #去掉2000年前的论文
            path_list = os.listdir(hyp_Path)
            for filename in path_list:  #在文件夹中查找与该参考文献对应的原文标题
                str = filename.split('_')
                if str[0] == pio['Title'] and filename.endswith('.xls'):
                    exist = True
                    break
            if exist == True:  #标记pio参考文献在原文文件夹中存在
                rd = xlrd.open_workbook(hyp_Path+'/'+filename)
                sheet = rd.sheet_by_index(0) #原文所在表格
                nrows = sheet.nrows
                # ncols = sheet.ncols
                wb = copy(rd)
                try:
                    sheet1 = wb.get_sheet(1)
                    # sheet1.write(range(0,nrows+1),range(0,27),'')
                except Exception as err:
                    sheet1 = wb.add_sheet('ROUGE Matrix', cell_overwrite_ok=True)  # 增加一个工作表,记录ROUGE矩阵
                sheet1.write_merge(0, 0, 1, 9, 'P{rouge-1[f,p,r]}{rouge-2[f,p,r]}{rouge-l[f,p,r]}')
                sheet1.write_merge(0, 0, 10, 18, 'I{rouge-1[f,p,r]}{rouge-2[f,p,r]}{rouge-l[f,p,r]}')
                sheet1.write_merge(0, 0, 19, 27, 'O{rouge-1[f,p,r]}{rouge-2[f,p,r]}{rouge-l[f,p,r]}')
                rouge = Rouge()
                for i in range(0,nrows):
                    sheet1.write(i+1, 0, i+1)
                    tempStr = bytes.decode(sheet.cell(i, 0).value.encode('utf-8'))
                    # textOriginal.append(tempStr) #存储原始文本
                    # textExcludeStopWord.append(excludeStopWords(tempStr)) #原始文本去除stop words
                    # textStem.append(lancaster_stemmer.stem(tempStr)) #原始文本进行词干提取

                    textOriginal = tempStr
                    textExcludeStopWord = excludeStopWords(tempStr)
                    textStem = lancaster_stemmer.stem(tempStr)

                    if option == 'textOriginal':
                        #原文本与PIO相似度
                        score_p = rouge.get_scores(textOriginal, pio['Participants'])
                        score_i = rouge.get_scores(textOriginal, pio['Interventions'])
                        score_o = rouge.get_scores(textOriginal, pio['Outcomes'])
                    if option == 'textStem':
                        #提取词干后 文本与PIO相似度
                        score_p = rouge.get_scores(textStem, lancaster_stemmer.stem(pio['Participants']))
                        score_i = rouge.get_scores(textStem, lancaster_stemmer.stem(pio['Interventions']))
                        score_o = rouge.get_scores(textStem, lancaster_stemmer.stem(pio['Outcomes']))
                    if option == 'textExcludeStopWord':
                        # 去除stop words后 文本与PIO相似度
                        score_p = rouge.get_scores(textExcludeStopWord, excludeStopWords(pio['Participants']))
                        score_i = rouge.get_scores(textExcludeStopWord, excludeStopWords(pio['Interventions']))
                        score_o = rouge.get_scores(textExcludeStopWord, excludeStopWords(pio['Outcomes']))

                    writeRouge(i + 1, 1, score_p, sheet1)
                    writeRouge(i + 1, 10, score_i, sheet1)
                    writeRouge(i + 1, 19, score_o, sheet1)
                if not os.path.exists(hyp_Path+'/'+option):
                    os.makedirs(hyp_Path+'/'+option)
                wb.save(hyp_Path+'/'+option+'/'+filename.split('_')[0]+'_'+option+'.xls')
                print(filename + ' ROUGE Matrix has generated')
Esempio n. 11
0
    total_loss+=loss[0].item()
    optimizer.step()
    if count ==100:
      print("batch : ",count," loss : ",loss[0].iem())
    count+=1
  print(epoch," epoch loss = ",total_loss/count)
'''
model.load_state_dict(torch.load("nlp_final.pt"))
model.eval()

pred_summaries, test_summaries = [], []

for input_ids_e, attention_mask_e, input_ids_d, attention_mask_d in test_loader:
    generated = model.generate(
        input_ids_e.to(device),
        do_sample=True,
        top_k=0,
        decoder_start_token_id=model.config.decoder.pad_token_id,
        max_length=121)
    output = tokenizer.decode(generated[0], skip_special_tokens=True)
    pred_summaries.append(output)
    output = tokenizer.decode(input_ids_d[0], skip_special_tokens=True)
    test_summaries.append(output)

import rouge
from rouge import Rouge
rouge = Rouge()
scores = rouge.get_scores(pred_summaries, test_summaries, avg=True)

print("scores = ", scores)
def process_example(filename):
    text = open(parser.dataset_dir + "/" + filename).read()

    sentences, ground_truth = preprocess_text(text)

    if len(sentences) < 30:
        return [], None, None

    sentences_vec = sent2vec_model.embed_sentences(sentences)
    ground_truth_vec = sent2vec_model.embed_sentence(ground_truth)

    # Compute mean vector of whole text
    text_mean_vec = np.mean(sentences_vec, axis=0)

    # Extract ground-truth vector from whole text
    text_mean_diff_vec = np.subtract(text_mean_vec, ground_truth_vec)

    sentence_idx = 0
    sentences_dist = []

    for sentence_vec in sentences_vec:
        sentence_vec_dist = np.linalg.norm((text_mean_vec, np.add(sentence_vec, text_mean_diff_vec[0])))

        try:
            rouge_str = rouge.get_scores(ground_truth, sentences[sentence_idx])
        except ValueError:
            sentence_idx += 1
            continue

        sentences_dist.append([sentence_idx, sentence_vec_dist,
                                rouge_to_list(rouge_str)])
        sentence_idx += 1


    if len(sentences_dist) == 0:
        return [], None, None

    # Sort sentences based on closest vector distance
    sentences_dist.sort(key=lambda x: x[1])
    best_vector_idx = sentences_dist[0][0]
    sentences_vector_idx = []
    best_vector_str = ""

    for sentence in sentences_dist:
        if len(best_vector_str) > 200:
            break
        best_vector_str += sentences[sentence[0]] + ". "
        sentences_vector_idx.append(sentence[0])


    # Sort based on best ROUGE-1 score
    sentences_dist.sort(key=lambda x: x[2][0], reverse=True)
    best_rouge_idx = sentences_dist[0][0]
    sentences_rouge_idx = []
    best_rouge_str = ""

    for sentence in sentences_dist:
        if len(best_rouge_str) > int(parser.max_summary_length):
            break
        best_rouge_str += sentences[sentence[0]] + ". "
        sentences_rouge_idx.append(sentence[0])

    log_file = open(parser.log_file_name, "a")

    log_file.write("\n\nProcessing " + filename)
    log_file.write("\n* GROUND TRUTH = " + ground_truth)
    log_file.write("\n* BEST VECTOR SUMMARY = " + best_vector_str)
    log_file.write("\n* BEST ROUGE SUMMARY = " + best_rouge_str)
    log_file.write("\n* ROUGE VECTOR SCORES = " + str(rouge_to_list(rouge.get_scores(ground_truth, best_vector_str))))
    log_file.write("\n* ROUGE BEST SCORES = " + str(rouge_to_list(rouge.get_scores(ground_truth, best_rouge_str))))
    log_file.close()

    best_vector_vec = sent2vec_model.embed_sentence(best_vector_str)

    if parser.generate_tsne == "True":
        sentences_vec_tsne = np.vstack((sentences_vec, text_mean_vec, ground_truth_vec, best_vector_vec))

        first_sentence = True
        U, s, Vh = np.linalg.svd(sentences_vec_tsne, full_matrices=False)

        for i in range(len(sentences_vec)):
            fig = plt.gcf()
            fig.set_size_inches(5, 5)

            if first_sentence == True:
                plt.plot(U[i, 0], U[i, 1], 'go', label='sentence',
                        markersize=16)
                first_sentence = False
            else:
                plt.plot(U[i, 0], U[i, 1], 'go', markersize=16)

        # Text mean
        plt.plot(U[len(sentences_vec), 0], U[len(sentences_vec), 1], 'bs', label='text mean', markersize=16)
        plt.plot(U[len(sentences_vec) + 1, 0], U[len(sentences_vec) + 1, 1], 'r^', label='ground truth', markersize=14)

        # Plot vector selected sentences
        first_sentence = True
        for i in sentences_vector_idx:
            if first_sentence == True:
                plt.plot(U[i, 0], U[i, 1], 'm+', label='vector selected sentences', markersize=24)
                first_sentence = False
            else:
                plt.plot(U[i, 0], U[i, 1], 'm+', markersize=24)

        # Plot best rouge selected sentences
        first_sentence = True
        for i in sentences_rouge_idx:
            if first_sentence == True:
                plt.plot(U[i, 0], U[i, 1], 'yx', label='rouge selected sentences', markersize=24)
                first_sentence = False
            else:
                plt.plot(U[i, 0], U[i, 1], 'yx', markersize=24)


        # Save TSNE file
        plt.xlim((-1, 1))
        plt.ylim((-1, 1))
        legend = plt.legend(loc='upper center', bbox_to_anchor=(0.5,-0.05), ncol=4, prop={'size': 6})

        for leg_handle in legend.legendHandles:
            leg_handle._legmarker.set_markersize(6)

        plt.savefig("tsne/" + filename + ".png", format="png")
        plt.clf()

    return [rouge_to_list(rouge.get_scores(ground_truth, best_vector_str)),
            rouge_to_list(rouge.get_scores(ground_truth, best_rouge_str))
            ], best_vector_str, best_rouge_str
    sentences_vec = common.model.embed_sentences(sentences)
    text_mean_vec = np.mean(sentences_vec, axis=0)

    # Extract ground-truth vector from summary
    text_mean_diff_vec = np.subtract(text_mean_vec, summary_vec)

    sentences_dist = []
    sentence_idx = 0

    # For each sentence, compute offset in relation to ground-truth summary
    for sentence_vec in sentences_vec:
        sentence_vec_dist = np.linalg.norm(
            (text_mean_vec, np.add(sentence_vec, text_mean_diff_vec)))

        try:
            rouge_str = rouge.get_scores(summary, sentences[sentence_idx])
        except ValueError:
            # Ignore sentences witch is not possible to compute ROUGE scores
            common.log_message("ERROR", str(file) + "  -- Error computing ROUGE of sentence " +\
                                sentences[sentence_idx])
            sentence_idx += 1
            continue

        # Add sentences and statistis for further classification
        sentences_dist.append([file, sentence_idx, len(sentences[sentence_idx]), sentence_vec_dist, \
                                common.rouge_to_list(rouge_str)])
        sentence_idx += 1

    # Sort sentences list based on closest vector distance and shortest sentence
    sentences_dist.sort(key=lambda x: (x[3], x[2]))
Esempio n. 14
0
    def decode(self):
        start = time.time()
        counter = 0
        batch = self.batcher.next_batch()

        decoded_result = []
        refered_result = []
        article_result = []
        while batch is not None:
            # Run beam search to get best Hypothesis
            best_summary = self.beam_search(batch)

            # Extract the output ids from the hypothesis and convert back to words
            output_ids = [int(t) for t in best_summary.tokens[1:]]
            decoded_words = data.outputids2words(
                output_ids, self.vocab,
                (batch.art_oovs[0] if config.pointer_gen else None))

            # Remove the [STOP] token from decoded_words, if necessary
            try:
                fst_stop_idx = decoded_words.index(data.STOP_DECODING)
                decoded_words = decoded_words[:fst_stop_idx]
            except ValueError:
                decoded_words = decoded_words

            original_abstract_sents = batch.original_abstracts_sents[0]
            article = batch.original_articles[0]

            #write_for_rouge(original_abstract_sents, decoded_words, counter,
            #                self._rouge_ref_dir, self._rouge_dec_dir)
            decoded_sents = []
            while len(decoded_words) > 0:
                try:
                    fst_period_idx = decoded_words.index(".")
                except ValueError:
                    fst_period_idx = len(decoded_words)
                sent = decoded_words[:fst_period_idx + 1]
                decoded_words = decoded_words[fst_period_idx + 1:]
                decoded_sents.append(' '.join(sent))

# pyrouge calls a perl script that puts the data into HTML files.
# Therefore we need to make our output HTML safe.
            decoded_sents = [make_html_safe(w) for w in decoded_sents]
            reference_sents = [
                make_html_safe(w) for w in original_abstract_sents
            ]
            decoded_result.append(' '.join(decoded_sents))
            refered_result.append(' '.join(reference_sents))
            article_result.append(article)
            counter += 1
            if counter % 1000 == 0:
                print('%d example in %d sec' % (counter, time.time() - start))
                start = time.time()

            batch = self.batcher.next_batch()

        print("Decoder has finished reading dataset for single_pass.")
        print("Now starting ROUGE eval...")
        load_file = self.model_path_name
        self.print_original_predicted(decoded_result, refered_result,
                                      article_result, load_file)

        rouge = Rouge()
        scores = rouge.get_scores(decoded_result, refered_result)
        rouge_1 = sum([x["rouge-1"]["f"] for x in scores]) / len(scores)
        rouge_2 = sum([x["rouge-2"]["f"] for x in scores]) / len(scores)
        rouge_l = sum([x["rouge-l"]["f"] for x in scores]) / len(scores)
        rouge_1_r = sum([x["rouge-1"]["r"] for x in scores]) / len(scores)
        rouge_2_r = sum([x["rouge-2"]["r"] for x in scores]) / len(scores)
        rouge_l_r = sum([x["rouge-l"]["r"] for x in scores]) / len(scores)
        rouge_1_p = sum([x["rouge-1"]["p"] for x in scores]) / len(scores)
        rouge_2_p = sum([x["rouge-2"]["p"] for x in scores]) / len(scores)
        rouge_l_p = sum([x["rouge-l"]["p"] for x in scores]) / len(scores)
        log_str = " rouge_1:" + "%.4f" % rouge_1 + " rouge_2:" + "%.4f" % rouge_2 + " rouge_l:" + "%.4f" % rouge_l
        log_str_r = " rouge_1_r:" + "%.4f" % rouge_1_r + " rouge_2_r:" + "%.4f" % rouge_2_r + " rouge_l_r:" + "%.4f" % rouge_l_r
        logger.info(load_file + " rouge_1:" + "%.4f" % rouge_1 + " rouge_2:" +
                    "%.4f" % rouge_2 + " rouge_l:" + "%.4f" % rouge_l)
        log_str_p = " rouge_1_p:" + "%.4f" % rouge_1_p + " rouge_2_p:" + "%.4f" % rouge_2_p + " rouge_l_p:" + "%.4f" % rouge_l_p
        results_file = os.path.join(self._decode_dir, "ROUGE_results.txt")
        with open(results_file, "w") as f:
            f.write(log_str + '\n')
            f.write(log_str_r + '\n')
            f.write(log_str_p + '\n')