def betterAnswer(baseline, new, questionBody):
    infile_true = open("./input/BioASQ-trainingDataset5b.json", 'r')
    data_true = json.load(infile_true)

    for (i, question_i) in enumerate(data_true['questions']):
        if question_i['body'].strip() == questionBody.strip():
            r = Rouge()
            manual_summmary = question_i['ideal_answer'][0]
            [precision_base, recall_base,
             f_score_base] = r.rouge_l([baseline], [manual_summmary])
            [precision_new, recall_new,
             f_score_new] = r.rouge_l([new], [manual_summmary])

            print "============================================"
            print "Ideal_answer \n"
            print manual_summmary
            print "Fused_answer %f %f \n" % (precision_new, recall_new)
            print new
            print "Baseline_answer %f %f \n" % (precision_base, recall_base)
            print baseline

            print "============================================"
            if f_score_base < f_score_new:
                print "11111"
                return new
            else:
                print "22222"
                return baseline
    return None
def read_dataset():
    dataset = {}
    r = Rouge()
    threshold = .70
    stories = get_dataset()

    for i in range(0, 2000):
        labeled_articles = set()
        story = stories[i]['story']
        highlights = stories[i]['highlights']

        for sent in story:
            for highlight in highlights:
                [precision, recall, f_score] = r.rouge_l(sent, highlight)

                if f_score > threshold:
                    labeled_articles.add((sent, 'yes'))
                else:
                    labeled_articles.add((sent, 'no'))

        dataset[i] = labeled_articles

    return dataset
Esempio n. 3
0
from PyRouge.pyrouge import Rouge
r = Rouge()

system_generated_summary = "The Kyrgyz President pushed through the law requiring the use of ink during the upcoming Parliamentary and Presidential elections In an effort to live up to its reputation in the 1990s as an island of democracy. The use of ink is one part of a general effort to show commitment towards more open elections. improper use of this type of ink can cause additional problems as the elections in Afghanistan showed. The use of ink and readers by itself is not a panacea for election ills."

manual_summmary = "The use of invisible ink and ultraviolet readers in the elections of the Kyrgyz Republic which is a small, mountainous state of the former Soviet republic, causing both worries and guarded optimism among different sectors of the population. Though the actual technology behind the ink is not complicated, the presence of ultraviolet light (of the kind used to verify money) causes the ink to glow with a neon yellow light. But, this use of the new technology has caused a lot of problems. "

[precision, recall, f_score] = r.rouge_l([system_generated_summary], [manual_summmary])

print("Precision is :"+str(precision)+"\nRecall is :"+str(recall)+"\nF Score is :"+str(f_score))
from PyRouge.pyrouge import Rouge

r = Rouge()

# A simple eample of how rouge can be calculated
#print r.rouge_l([[1, 7, 6, 7, 5], [0, 2, 8, 3, 5]], [[1, 2, 3, 4, 5], [3, 9, 5]])

# A more practical example of how it can be used for summary evaluation
system_generated_summary = " The Kyrgyz President pushed through the law requiring the use of ink during the upcoming Parliamentary and Presidential elections In an effort to live up to its reputation in the 1990s as an island of democracy. The use of ink is one part of a general effort to show commitment towards more open elections. improper use of this type of ink can cause additional problems as the elections in Afghanistan showed. The use of ink and readers by itself is not a panacea for election ills."
manual_summmary = " The use of invisible ink and ultraviolet readers in the elections of the Kyrgyz Republic which is a small, mountainous state of the former Soviet republic, causing both worries and guarded optimism among different sectors of the population. Though the actual technology behind the ink is not complicated, the presence of ultraviolet light (of the kind used to verify money) causes the ink to glow with a neon yellow light. But, this use of the new technology has caused a lot of problems. "

#print r.rouge_l([system_generated_summary], [manual_summmary])
ACTUALABSTRACT = '<s> a native american from a tribe not recognized by the feds wins the return of his eagle feathers . </s> <s> an irs accountant is fired for insisting on carrying a symbolic sikh knife to work . </s> <s> a group of chicago pastors takes on city hall over its permits for new churches and loses . </s>'
GENERATEDABSTRACT = '<go> <s> united states have been growing since the u.s. religious freedom restoration act . </s> <s> the united states have been growing since the u.s. religious freedom restoration act . </s> <s> new : `` there is reason to doubt whether these state-level religious protections '' </s> <end>'
[precision, recall, f_score] = r.rouge_l([ACTUALABSTRACT], [GENERATEDABSTRACT])

print("Precision is :"+str(precision)+"\nRecall is :"+str(recall)+"\nF Score is :"+str(f_score))
Esempio n. 5
0
        if((i!=0 and i!=target_word_index['sostok']) and i!=target_word_index['eostok']):
            newString=newString+reverse_target_word_index[i]+' '
    return newString

def seq2text(input_seq):
    newString=''
    for i in input_seq:
        if(i!=0):
            newString=newString+reverse_source_word_index[i]+' '
    return newString


precision_avg = 0.0
recall_avg = 0.0
fscore_avg = 0.0


for i in range(0,100):
    print("Review:",seq2text(x_tr[i]))
    print("Original summary:",seq2summary(y_tr[i]))
    print("Predicted summary:",decode_sequence(x_tr[i].reshape(1,max_text_len)))
    precision, recall, f_score] = r.rouge_l([decode_sequence(x_tr[i].reshape(1,max_text_len))],seq2summary(y_tr[i]))
    precision_avg = precision_avg + precision
    recall_avg = recall_avg + recall
    fscore_avg = fscore_avg + f_score
    print("Precision is :"+str(precision)+"\nRecall is :"+str(recall)+"\nF Score is :"+str(f_score))
    print("\n")

print("Overall precision is: ", precision_avg/100)
print("Overall recall is: ",recall_avg/100)
print("Overall fscore is: ",fscore_avg/100)
from PyRouge.pyrouge import Rouge

r = Rouge()
fptr1 = open('test.eval_titles.txt')
fptr2 = open('eval_articles.1_300000.txt')
system_summaries = fptr1.readlines()  #.split()
model_summaries = fptr2.readlines()  #.split()
avg_p = avg_r = avg_f1 = 0
for i in range(len(system_summaries)):
    [precision, recall, f_score] = r.rouge_l([system_summaries[i]],
                                             [model_summaries[i]])
    avg_p += precision
    avg_r += recall
    avg_f1 += f_score

    print("Sentence:", i)
    print("Human:", system_summaries[i])
    print("Model:", model_summaries[i])
    print("Precision is :" + str(precision) + "\nRecall is :" + str(recall) +
          "\nF Score is :" + str(f_score))
    print()

print("----------------------Final eval-------------------")
print("Precision:", (float)(avg_p / len(system_summaries)))
print("Recall:", (float)(avg_r / len(system_summaries)))
print("F1-score:", (float)(avg_f1 / len(system_summaries)))
Esempio n. 7
0
def main(args):
    # Setting
    warnings.simplefilter("ignore", UserWarning)
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    # Args Parser
    hj_method = args.hj_method
    kr_method = args.kr_method
    batch_size = args.batch_size
    beam_size = args.beam_size
    hidden_size = args.hidden_size
    embed_size = args.embed_size
    vocab_size = args.vocab_size
    max_len = args.max_len
    padding_index = args.pad_id
    n_layers = args.n_layers
    stop_ix = args.stop_ix

    # Load saved model & Word2vec
    save_path = 'save_{}_{}_{}_maxlen_{}'.format(vocab_size, hj_method,
                                                 kr_method, max_len)
    save_list = sorted(glob.glob(f'./save/{save_path}/*.*'))
    save_pt = save_list[-1]
    print('Will load {} pt file...'.format(save_pt))
    word2vec_hj = Word2Vec.load('./w2v/word2vec_hj_{}_{}.model'.format(
        vocab_size, hj_method))

    # SentencePiece model load
    spm_kr = spm.SentencePieceProcessor()
    spm_kr.Load("./spm/m_korean_{}.model".format(vocab_size))

    # Test data load
    with open('./test_dat.pkl', 'rb') as f:
        test_dat = pickle.load(f)

    test_dataset = CustomDataset(test_dat['test_hanja'],
                                 test_dat['test_korean'])
    test_loader = getDataLoader(test_dataset,
                                pad_index=padding_index,
                                shuffle=False,
                                batch_size=batch_size)

    # Model load
    print('Model loading...')
    encoder = Encoder(vocab_size,
                      embed_size,
                      hidden_size,
                      word2vec_hj,
                      n_layers=n_layers,
                      padding_index=padding_index)
    decoder = Decoder(embed_size,
                      hidden_size,
                      vocab_size,
                      n_layers=n_layers,
                      padding_index=padding_index)
    seq2seq = Seq2Seq(encoder, decoder, beam_size).cuda()
    #optimizer = optim.Adam(seq2seq.parameters(), lr=lr, weight_decay=w_decay)
    #scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=scheduler_step_size, gamma=lr_decay)
    print(seq2seq)

    print('Testing...')
    start_time = time.time()
    results = test(seq2seq,
                   test_loader,
                   vocab_size,
                   load_pt=save_pt,
                   stop_ix=stop_ix)
    print(time.time() - start_time)
    print('Done!')

    print("Decoding...")
    pred_list = list()
    for result_text in tqdm(results):
        text = torch.Tensor(result_text).squeeze().tolist()
        text = [int(x) for x in text]
        prediction_sentence = spm_kr.decode_ids(
            text).strip()  # Decode with strip
        pred_list.append(prediction_sentence)
    ref_list = list()
    for ref_text in tqdm(test_dat['test_korean'][:stop_ix]):
        ref_list.append(spm_kr.decode_ids(ref_text).strip())
    print('Done!')

    with open(f'./save/{save_path}/test_result.pkl', 'wb') as f:
        pickle.dump({
            'pred': pred_list,
            'reference': ref_list,
        }, f)
    print('Save file; /test_dat.pkl')

    # Calculate BLEU Score
    print('Calculate BLEU4, METEOR, Rogue-L...')
    chencherry = SmoothingFunction()
    bleu4 = corpus_bleu(test_dat['reference'],
                        test_dat['pred'],
                        smoothing_function=chencherry.method4)
    print('BLEU Score is {}'.format(bleu4))

    # Calculate METEOR Score
    meteor = meteor_score(test_dat['reference'], test_dat['pred'])
    print('METEOR Score is {}'.format(meteor))

    # Calculate Rouge-L Score
    r = Rouge()
    total_test_length = len(test_dat['reference'])
    precision_all = 0
    recall_all = 0
    f_score_all = 0
    for i in range(total_test_length):
        [precision, recall, f_score] = r.rouge_l([test_dat['reference'][i]],
                                                 [test_dat['pred'][i]])
        precision_all += precision
        recall_all += recall
        f_score_all += f_score
    print('Precision : {}'.foramt(round(precision_all / total_test_length, 4)))
    print('Recall : {}'.foramt(round(recall_all / total_test_length, 4)))
    print('F Score : {}'.foramt(round(f_score_all / total_test_length, 4)))
Esempio n. 8
0
from PyRouge.pyrouge import Rouge

r = Rouge()

# A simple eample of how rouge can be calculated
print r.rouge_l([[1, 7, 6, 7, 5], [0, 2, 8, 3, 5]],
                [[1, 2, 3, 4, 5], [3, 9, 5]])

# A more practical example of how it can be used for summary evaluation
system_generated_summary = " The Kyrgyz President pushed through the law requiring the use of ink during the upcoming Parliamentary and Presidential elections In an effort to live up to its reputation in the 1990s as an island of democracy. The use of ink is one part of a general effort to show commitment towards more open elections. improper use of this type of ink can cause additional problems as the elections in Afghanistan showed. The use of ink and readers by itself is not a panacea for election ills."
manual_summmary = " The use of invisible ink and ultraviolet readers in the elections of the Kyrgyz Republic which is a small, mountainous state of the former Soviet republic, causing both worries and guarded optimism among different sectors of the population. Though the actual technology behind the ink is not complicated, the presence of ultraviolet light (of the kind used to verify money) causes the ink to glow with a neon yellow light. But, this use of the new technology has caused a lot of problems. "

print r.rouge_l([system_generated_summary], [manual_summmary])
    ctr_side_2 = list(ngrams(ctr1_tokens, 2)) + list(ngrams(
        ctr2_tokens, 2)) + list(ngrams(ctr3_tokens, 2))
    inc_neg_side_2 = list(ngrams(inconst_neg1_tokens, 2)) + list(
        ngrams(inconst_neg2_tokens, 2)) + list(ngrams(inconst_neg3_tokens, 2))

    #rouge2_rand_list.append(rouge_one(rand_side_2, gold_side_2))
    rouge2_lead_list.append(rouge_one(lead_side_2, gold_side_2))
    rouge2_hybrid1_list.append(rouge_one(hybrid_side_2, gold_side_2))
    rouge2_inconst_list.append(rouge_one(inconst_side_2, gold_side_2))
    rouge2_neg_list.append(rouge_one(neg_side_2, gold_side_2))
    rouge2_ctr_list.append(rouge_one(ctr_side_2, gold_side_2))
    rouge2_inconst_neg_list.append(rouge_one(inc_neg_side_2, gold_side_2))
    # 1 : X.AUTHID, 2 : spec_domain, 3: site.content  4-6: GOLD, 7-9: LEAD, 10-12: Hybrid, 13-15: Incons, 16-18: Neg, 19-21: Ctr, 22-24: InconsNeg

    rouge_lcs_lead_list.append(
        r_summ_evaluate.rouge_l(" ".join(summ_data.iloc[row_ind, 3:6]),
                                " ".join(summ_data.iloc[row_ind, 6:9])))
    rouge_lcs_hybrid1_list.append(
        r_summ_evaluate.rouge_l(" ".join(summ_data.iloc[row_ind, 3:6]),
                                " ".join(summ_data.iloc[row_ind, 9:12])))
    #rouge_lcs_rand_list.append(r_summ_evaluate.rouge_l(" ".join(summ_data.iloc[row_ind, 3:6]), " ".join(summ_data.iloc[row_ind, 9:12])))
    rouge_lcs_inconst_list.append(
        r_summ_evaluate.rouge_l(" ".join(summ_data.iloc[row_ind, 3:6]),
                                " ".join(summ_data.iloc[row_ind, 12:15])))
    rouge_lcs_neg_list.append(
        r_summ_evaluate.rouge_l(" ".join(summ_data.iloc[row_ind, 3:6]),
                                " ".join(summ_data.iloc[row_ind, 15:18])))
    rouge_lcs_ctr_list.append(
        r_summ_evaluate.rouge_l(" ".join(summ_data.iloc[row_ind, 3:6]),
                                " ".join(summ_data.iloc[row_ind, 18:21])))
    rouge_lcs_inconst_neg_list.append(
        r_summ_evaluate.rouge_l(" ".join(summ_data.iloc[row_ind, 3:6]),
Esempio n. 10
0
 def rouge_l(S, I):
     r = Rouge()
     [precision, recall, f_score] = r.rouge_l([S], [I])
     return f_score
Esempio n. 11
0
from PyRouge.pyrouge import Rouge

r = Rouge()

# A simple eample of how rouge can be calculated
print r.rouge_l([[1, 7, 6, 7, 5], [0, 2, 8, 3, 5]], [[1, 2, 3, 4, 5], [3, 9, 5]])

# A more practical example of how it can be used for summary evaluation
system_generated_summary = " The Kyrgyz President pushed through the law requiring the use of ink during the upcoming Parliamentary and Presidential elections In an effort to live up to its reputation in the 1990s as an island of democracy. The use of ink is one part of a general effort to show commitment towards more open elections. improper use of this type of ink can cause additional problems as the elections in Afghanistan showed. The use of ink and readers by itself is not a panacea for election ills."
manual_summmary = " The use of invisible ink and ultraviolet readers in the elections of the Kyrgyz Republic which is a small, mountainous state of the former Soviet republic, causing both worries and guarded optimism among different sectors of the population. Though the actual technology behind the ink is not complicated, the presence of ultraviolet light (of the kind used to verify money) causes the ink to glow with a neon yellow light. But, this use of the new technology has caused a lot of problems. "

print r.rouge_l([system_generated_summary], [manual_summmary])