def betterAnswer(baseline, new, questionBody):
    infile_true = open("./input/BioASQ-trainingDataset5b.json", 'r')
    data_true = json.load(infile_true)

    for (i, question_i) in enumerate(data_true['questions']):
        if question_i['body'].strip() == questionBody.strip():
            r = Rouge()
            manual_summmary = question_i['ideal_answer'][0]
            [precision_base, recall_base,
             f_score_base] = r.rouge_l([baseline], [manual_summmary])
            [precision_new, recall_new,
             f_score_new] = r.rouge_l([new], [manual_summmary])

            print "============================================"
            print "Ideal_answer \n"
            print manual_summmary
            print "Fused_answer %f %f \n" % (precision_new, recall_new)
            print new
            print "Baseline_answer %f %f \n" % (precision_base, recall_base)
            print baseline

            print "============================================"
            if f_score_base < f_score_new:
                print "11111"
                return new
            else:
                print "22222"
                return baseline
    return None
def read_dataset():
    dataset = {}
    r = Rouge()
    threshold = .70
    stories = get_dataset()

    for i in range(0, 2000):
        labeled_articles = set()
        story = stories[i]['story']
        highlights = stories[i]['highlights']

        for sent in story:
            for highlight in highlights:
                [precision, recall, f_score] = r.rouge_l(sent, highlight)

                if f_score > threshold:
                    labeled_articles.add((sent, 'yes'))
                else:
                    labeled_articles.add((sent, 'no'))

        dataset[i] = labeled_articles

    return dataset
def score_model(test_pairs, model, model_id, nb_examples, output_type):
    scores = [0, 0, 0, 0]
    rouge_calc = RougeCalculator(stopwords=True, lang="en")
    pyRouge = Rouge()
    if output_type == 'greedy':
        results = predict_greedy(test_pairs,
                                 _range=(0, nb_examples),
                                 model=model)
    else:
        results = predict_from_data(test_pairs,
                                    _range=(0, nb_examples),
                                    model=model)
    summaries = []
    novelty_dist = []
    for d in range(11):
        novelty_dist.append([])
    for k in results:
        el = results[k]
        ref = " ".join([t for t in el['ref'].split('EOS')[0].split(" ")])
        summary = " ".join(
            [t for t in el[output_type].split('EOS')[0].split(" ")])

        scores[0] += rouge_calc.rouge_1(summary, ref)
        scores[1] += rouge_calc.rouge_2(summary, ref)
        rouge_l = rouge_calc.rouge_l(summary, ref)
        ''''''
        n = novelty.compute_novelty(ref, el['text'], 3)
        novelty_dist[int(n * 10)].append(rouge_l)
        '''
        print(round(rouge_calc.rouge_2(summary , ref), 3), round(rouge_l, 3), len(summary.split(" ")), summary)
        print(ref)
        print(rouge_calc.rouge_2(summary , ref), rouge_l)
        print(summary)
        print(ref)
        print()
        '''
        scores[2] += rouge_l
        if rouge_l < 0.20 or True:
            summaries.append((rouge_l, el[output_type].split('EOS')[0]))

    for i in range(10):
        print((i + 1) * 10,
              sum(novelty_dist[i]) / len(novelty_dist[i]),
              len(novelty_dist[i]))
    '''
Esempio n. 4
0
from PyRouge.pyrouge import Rouge
r = Rouge()

system_generated_summary = "The Kyrgyz President pushed through the law requiring the use of ink during the upcoming Parliamentary and Presidential elections In an effort to live up to its reputation in the 1990s as an island of democracy. The use of ink is one part of a general effort to show commitment towards more open elections. improper use of this type of ink can cause additional problems as the elections in Afghanistan showed. The use of ink and readers by itself is not a panacea for election ills."

manual_summmary = "The use of invisible ink and ultraviolet readers in the elections of the Kyrgyz Republic which is a small, mountainous state of the former Soviet republic, causing both worries and guarded optimism among different sectors of the population. Though the actual technology behind the ink is not complicated, the presence of ultraviolet light (of the kind used to verify money) causes the ink to glow with a neon yellow light. But, this use of the new technology has caused a lot of problems. "

[precision, recall, f_score] = r.rouge_l([system_generated_summary], [manual_summmary])

print("Precision is :"+str(precision)+"\nRecall is :"+str(recall)+"\nF Score is :"+str(f_score))
from PyRouge.pyrouge import Rouge

r = Rouge()

# A simple eample of how rouge can be calculated
#print r.rouge_l([[1, 7, 6, 7, 5], [0, 2, 8, 3, 5]], [[1, 2, 3, 4, 5], [3, 9, 5]])

# A more practical example of how it can be used for summary evaluation
system_generated_summary = " The Kyrgyz President pushed through the law requiring the use of ink during the upcoming Parliamentary and Presidential elections In an effort to live up to its reputation in the 1990s as an island of democracy. The use of ink is one part of a general effort to show commitment towards more open elections. improper use of this type of ink can cause additional problems as the elections in Afghanistan showed. The use of ink and readers by itself is not a panacea for election ills."
manual_summmary = " The use of invisible ink and ultraviolet readers in the elections of the Kyrgyz Republic which is a small, mountainous state of the former Soviet republic, causing both worries and guarded optimism among different sectors of the population. Though the actual technology behind the ink is not complicated, the presence of ultraviolet light (of the kind used to verify money) causes the ink to glow with a neon yellow light. But, this use of the new technology has caused a lot of problems. "

#print r.rouge_l([system_generated_summary], [manual_summmary])
ACTUALABSTRACT = '<s> a native american from a tribe not recognized by the feds wins the return of his eagle feathers . </s> <s> an irs accountant is fired for insisting on carrying a symbolic sikh knife to work . </s> <s> a group of chicago pastors takes on city hall over its permits for new churches and loses . </s>'
GENERATEDABSTRACT = '<go> <s> united states have been growing since the u.s. religious freedom restoration act . </s> <s> the united states have been growing since the u.s. religious freedom restoration act . </s> <s> new : `` there is reason to doubt whether these state-level religious protections '' </s> <end>'
[precision, recall, f_score] = r.rouge_l([ACTUALABSTRACT], [GENERATEDABSTRACT])

print("Precision is :"+str(precision)+"\nRecall is :"+str(recall)+"\nF Score is :"+str(f_score))
Esempio n. 6
0
from tensorflow.keras.layers import Input, LSTM, Embedding, Dense, Concatenate, TimeDistributed
from tensorflow.keras.models import Model
from tensorflow.keras.callbacks import EarlyStopping
import warnings
pd.set_option("display.max_colwidth", 200)
warnings.filterwarnings("ignore")


import tensorflow as tf
import os
from tensorflow.python.keras.layers import Layer
from tensorflow.python.keras import backend as K


from PyRouge.pyrouge import Rouge
r = Rouge()

class AttentionLayer(Layer):
    """
    This class implements Bahdanau attention (https://arxiv.org/pdf/1409.0473.pdf).
    There are three sets of weights introduced W_a, U_a, and V_a
     """

    def __init__(self, **kwargs):
        super(AttentionLayer, self).__init__(**kwargs)

    def build(self, input_shape):
        assert isinstance(input_shape, list)
        # Create a trainable weight variable for this layer.

        self.W_a = self.add_weight(name='W_a',
import nltk
from itertools import zip_longest
from nltk.tokenize import word_tokenize
from nltk.translate.bleu_score import sentence_bleu
from PyRouge.pyrouge import Rouge

r = Rouge()
list = []


def accuracy():
    with open(
            "C:/BigDataAnalyticsAppns/Tutorial 6 Source Code/medium-show-and-tell-caption-generator-master/etc/pred.txt"
    ) as f2, open(
            "C:/BigDataAnalyticsAppns/Tutorial 6 Source Code/medium-show-and-tell-caption-generator-master/etc/true_text.txt"
    ) as f1:
        k = 0
        for line in f2:
            list.append(line)
        for line1 in f1:
            i = 0
            y_true = list[i]
            y_true_line = []
            i = i + 1
            y_pred = (line1)
            BLEUscore = sentence_bleu(word_tokenize(y_true),
                                      word_tokenize(y_pred),
                                      weights=(1, 0, 0, 0))
            [precision, recall, f_score] = r.rouge_l([y_true], [y_pred])
            print("Precision is :" + str(precision) + "\nRecall is :" +
                  str(recall) + "\nF Score is :" + str(f_score))
from PyRouge.pyrouge import Rouge

r = Rouge()
fptr1 = open('test.eval_titles.txt')
fptr2 = open('eval_articles.1_300000.txt')
system_summaries = fptr1.readlines()  #.split()
model_summaries = fptr2.readlines()  #.split()
avg_p = avg_r = avg_f1 = 0
for i in range(len(system_summaries)):
    [precision, recall, f_score] = r.rouge_l([system_summaries[i]],
                                             [model_summaries[i]])
    avg_p += precision
    avg_r += recall
    avg_f1 += f_score

    print("Sentence:", i)
    print("Human:", system_summaries[i])
    print("Model:", model_summaries[i])
    print("Precision is :" + str(precision) + "\nRecall is :" + str(recall) +
          "\nF Score is :" + str(f_score))
    print()

print("----------------------Final eval-------------------")
print("Precision:", (float)(avg_p / len(system_summaries)))
print("Recall:", (float)(avg_r / len(system_summaries)))
print("F1-score:", (float)(avg_f1 / len(system_summaries)))
Esempio n. 9
0
def main(args):
    # Setting
    warnings.simplefilter("ignore", UserWarning)
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    # Args Parser
    hj_method = args.hj_method
    kr_method = args.kr_method
    batch_size = args.batch_size
    beam_size = args.beam_size
    hidden_size = args.hidden_size
    embed_size = args.embed_size
    vocab_size = args.vocab_size
    max_len = args.max_len
    padding_index = args.pad_id
    n_layers = args.n_layers
    stop_ix = args.stop_ix

    # Load saved model & Word2vec
    save_path = 'save_{}_{}_{}_maxlen_{}'.format(vocab_size, hj_method,
                                                 kr_method, max_len)
    save_list = sorted(glob.glob(f'./save/{save_path}/*.*'))
    save_pt = save_list[-1]
    print('Will load {} pt file...'.format(save_pt))
    word2vec_hj = Word2Vec.load('./w2v/word2vec_hj_{}_{}.model'.format(
        vocab_size, hj_method))

    # SentencePiece model load
    spm_kr = spm.SentencePieceProcessor()
    spm_kr.Load("./spm/m_korean_{}.model".format(vocab_size))

    # Test data load
    with open('./test_dat.pkl', 'rb') as f:
        test_dat = pickle.load(f)

    test_dataset = CustomDataset(test_dat['test_hanja'],
                                 test_dat['test_korean'])
    test_loader = getDataLoader(test_dataset,
                                pad_index=padding_index,
                                shuffle=False,
                                batch_size=batch_size)

    # Model load
    print('Model loading...')
    encoder = Encoder(vocab_size,
                      embed_size,
                      hidden_size,
                      word2vec_hj,
                      n_layers=n_layers,
                      padding_index=padding_index)
    decoder = Decoder(embed_size,
                      hidden_size,
                      vocab_size,
                      n_layers=n_layers,
                      padding_index=padding_index)
    seq2seq = Seq2Seq(encoder, decoder, beam_size).cuda()
    #optimizer = optim.Adam(seq2seq.parameters(), lr=lr, weight_decay=w_decay)
    #scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=scheduler_step_size, gamma=lr_decay)
    print(seq2seq)

    print('Testing...')
    start_time = time.time()
    results = test(seq2seq,
                   test_loader,
                   vocab_size,
                   load_pt=save_pt,
                   stop_ix=stop_ix)
    print(time.time() - start_time)
    print('Done!')

    print("Decoding...")
    pred_list = list()
    for result_text in tqdm(results):
        text = torch.Tensor(result_text).squeeze().tolist()
        text = [int(x) for x in text]
        prediction_sentence = spm_kr.decode_ids(
            text).strip()  # Decode with strip
        pred_list.append(prediction_sentence)
    ref_list = list()
    for ref_text in tqdm(test_dat['test_korean'][:stop_ix]):
        ref_list.append(spm_kr.decode_ids(ref_text).strip())
    print('Done!')

    with open(f'./save/{save_path}/test_result.pkl', 'wb') as f:
        pickle.dump({
            'pred': pred_list,
            'reference': ref_list,
        }, f)
    print('Save file; /test_dat.pkl')

    # Calculate BLEU Score
    print('Calculate BLEU4, METEOR, Rogue-L...')
    chencherry = SmoothingFunction()
    bleu4 = corpus_bleu(test_dat['reference'],
                        test_dat['pred'],
                        smoothing_function=chencherry.method4)
    print('BLEU Score is {}'.format(bleu4))

    # Calculate METEOR Score
    meteor = meteor_score(test_dat['reference'], test_dat['pred'])
    print('METEOR Score is {}'.format(meteor))

    # Calculate Rouge-L Score
    r = Rouge()
    total_test_length = len(test_dat['reference'])
    precision_all = 0
    recall_all = 0
    f_score_all = 0
    for i in range(total_test_length):
        [precision, recall, f_score] = r.rouge_l([test_dat['reference'][i]],
                                                 [test_dat['pred'][i]])
        precision_all += precision
        recall_all += recall
        f_score_all += f_score
    print('Precision : {}'.foramt(round(precision_all / total_test_length, 4)))
    print('Recall : {}'.foramt(round(recall_all / total_test_length, 4)))
    print('F Score : {}'.foramt(round(f_score_all / total_test_length, 4)))
Esempio n. 10
0
from PyRouge.pyrouge import Rouge

r = Rouge()

# A simple eample of how rouge can be calculated
print r.rouge_l([[1, 7, 6, 7, 5], [0, 2, 8, 3, 5]],
                [[1, 2, 3, 4, 5], [3, 9, 5]])

# A more practical example of how it can be used for summary evaluation
system_generated_summary = " The Kyrgyz President pushed through the law requiring the use of ink during the upcoming Parliamentary and Presidential elections In an effort to live up to its reputation in the 1990s as an island of democracy. The use of ink is one part of a general effort to show commitment towards more open elections. improper use of this type of ink can cause additional problems as the elections in Afghanistan showed. The use of ink and readers by itself is not a panacea for election ills."
manual_summmary = " The use of invisible ink and ultraviolet readers in the elections of the Kyrgyz Republic which is a small, mountainous state of the former Soviet republic, causing both worries and guarded optimism among different sectors of the population. Though the actual technology behind the ink is not complicated, the presence of ultraviolet light (of the kind used to verify money) causes the ink to glow with a neon yellow light. But, this use of the new technology has caused a lot of problems. "

print r.rouge_l([system_generated_summary], [manual_summmary])
# Here, we have to first process the 3 sentences. Not adjacent -- form unigrams and bigrams -- compute ROUGE-N precision, ROUGE-N recall
# ROUGE-N  f1 score
# Also compute the ROUGE-L score

# Run this code using Python 2.7, otherwise the string.translate() does not work

from PyRouge.pyrouge import Rouge
import pandas as pd
from nltk import word_tokenize
from nltk.util import ngrams
import string

import math

# Computing ROUGE-L precision, recall and F1 score
r_summ_evaluate = Rouge()

# Trying with a sample summary
# system_generated_summary = " The Kyrgyz President pushed through the law requiring the use of ink during the upcoming Parliamentary and Presidential elections In an effort to live up to its reputation in the 1990s as an island of democracy. The use of ink is one part of a general effort to show commitment towards more open elections. improper use of this type of ink can cause additional problems as the elections in Afghanistan showed. The use of ink and readers by itself is not a panacea for election ills."
# manual_summmary = " The use of invisible ink and ultraviolet readers in the elections of the Kyrgyz Republic which is a small, mountainous state of the former Soviet republic, causing both worries and guarded optimism among different sectors of the population. Though the actual technology behind the ink is not complicated, the presence of ultraviolet light (of the kind used to verify money) causes the ink to glow with a neon yellow light. But, this use of the new technology has caused a lot of problems. "

# print(r_summ_evaluate.rouge_l([system_generated_summary], [manual_summmary]))

# Here we read the file which mentions the 3 sentences for each technique, in three adjacent columns
#summ_data_readpath = '/home/soumyadeep/PycharmProjects/MSThesisWorkMay2019/JournalCodes/data/GoldDataOnly36_12thSept.csv'
# 1 : X.AUTHID, 2 : spec_domain, 3: site.content  4-6: GOLD, 7-9 : LEAD, 10-12 : RAND, 13-15 : INCONST, 16-18 : INCONST_NEG, 19-21 : HYBRID1

#summ_data_readpath = '/home/soumyadeep/PycharmProjects/MSThesisWorkMay2019/JournalCodes/data/GoldDataAllAspectsWithHybrid_13thSept.csv'
# 1 : X.AUTHID, 2 : spec_domain, 3: site.content  4-6: GOLD, 7-9: LEAD, 10-12: Hybrid, 13-15: Incons, 16-18: Neg, 19-21: Ctr, 22-24: InconsNeg
summ_data_readpath = '/home/soumyadeep/PycharmProjects/MSThesisWorkMay2019/JournalCodes/data/GoldDataAllAspects_Annotate_Partial_17thSept.csv'
Esempio n. 12
0
 def rouge_l(S, I):
     r = Rouge()
     [precision, recall, f_score] = r.rouge_l([S], [I])
     return f_score
Esempio n. 13
0
from PyRouge.pyrouge import Rouge

r = Rouge()

# A simple eample of how rouge can be calculated
print r.rouge_l([[1, 7, 6, 7, 5], [0, 2, 8, 3, 5]], [[1, 2, 3, 4, 5], [3, 9, 5]])

# A more practical example of how it can be used for summary evaluation
system_generated_summary = " The Kyrgyz President pushed through the law requiring the use of ink during the upcoming Parliamentary and Presidential elections In an effort to live up to its reputation in the 1990s as an island of democracy. The use of ink is one part of a general effort to show commitment towards more open elections. improper use of this type of ink can cause additional problems as the elections in Afghanistan showed. The use of ink and readers by itself is not a panacea for election ills."
manual_summmary = " The use of invisible ink and ultraviolet readers in the elections of the Kyrgyz Republic which is a small, mountainous state of the former Soviet republic, causing both worries and guarded optimism among different sectors of the population. Though the actual technology behind the ink is not complicated, the presence of ultraviolet light (of the kind used to verify money) causes the ink to glow with a neon yellow light. But, this use of the new technology has caused a lot of problems. "

print r.rouge_l([system_generated_summary], [manual_summmary])