예제 #1
0
파일: filter.py 프로젝트: vaibhavad/imojie
def get_data(inp_fp):
    inp_f = open(inp_fp, 'r')
    extD = dict()
    graphD = dict()
    node_edgeD = dict()
    for line in inp_f:
        line = line.strip('\n')
        sentence, extraction, confidence = line.split('\t')
        if sentence not in extD:
            extD[sentence] = list()

        already_added = False
        for added_extraction, _ in extD[sentence]:
            if extraction == added_extraction:
                already_added = True
        if already_added:
            continue

        extD[sentence].append([extraction, confidence])
    for key in extD.keys():
        graphD[key] = dict()
        cnt = 0
        for item in extD[key]:
            graphD[key][cnt] = item[0]
            cnt += 1
    sent_list = []
    sent_dict = extD
    for key in tqdm(sent_dict):
        sent_list.append(key)
        node_edgeD[key] = dict()
        num = len(sent_dict[key])
        key_sum = 0
        for i in range(num):
            key_sum += math.exp(float(extD[key][i][1]))

        for i in range(num):
            node_edgeD[key][i] = (math.exp(float(extD[key][i][1])))

        edge_sum = 0
        for i in range(0, num):
            for j in range(i + 1, num):
                sent1 = ''.join(sent_dict[key][i])
                sent2 = ''.join(sent_dict[key][j])

                recall, precision, rouge = rouge_n_sentence_level(
                    sent1, sent2, 2)
                edge_sum += rouge
        for i in range(0, num):
            for j in range(i + 1, num):
                sent1 = ''.join(sent_dict[key][i])
                sent2 = ''.join(sent_dict[key][j])
                recall, precision, rouge = rouge_n_sentence_level(
                    sent1, sent2, 2)

                node_edgeD[key][(i, j)] = rouge
    return sent_list, extD, graphD, node_edgeD
예제 #2
0
def docalc2(args):
    hyp, ref = args
    hyp = hyp.split()
    ref = ref.split()
    scores = {
        'rouge-s': rouge.rouge_s_sentence_level(hyp, ref).precision,
        'rouge-w': rouge.rouge_w_sentence_level(hyp, ref).precision,
        'rouge-l': rouge.rouge_l_sentence_level(hyp, ref).precision,
        'rouge-1': rouge.rouge_n_sentence_level(hyp, ref, 1).precision,
        'rouge-2': rouge.rouge_n_sentence_level(hyp, ref, 2).precision,
        'rouge-3': rouge.rouge_n_sentence_level(hyp, ref, 3).precision,
        'rouge-4': rouge.rouge_n_sentence_level(hyp, ref, 4).precision
    }

    return scores
def get_rouge_score(ref_definitions: list, hyp_definition: str):
    ref_definitions = ' '.join(ref_definitions).split()
    hyp_definition = hyp_definition.split()

    recall, precision, rouge_F = rouge_n_sentence_level(
        hyp_definition, ref_definitions, 1)

    return rouge_F
예제 #4
0
 def score(self, pairs):
     rouges_1 = []
     rouges_2 = []
     rouges_l = []
     for pair in pairs:
         target, hypo = pair
         # Calculate ROUGE-2.
         _, _, rouge_1 = rouge_n_sentence_level(hypo, target, 1)
         _, _, rouge_2 = rouge_n_sentence_level(hypo, target, 2)
         _, _, rouge_l = rouge_l_summary_level(hypo, target)
         rouges_1.append(rouge_1)
         rouges_2.append(rouge_2)
         rouges_l.append(rouge_l)
     return {
         'ROUGE-1-F (avg)': np.average(rouges_1),
         'ROUGE-2-F (avg)': np.average(rouges_2),
         'ROUGE-L-F (avg)': np.average(rouges_l),
     }
예제 #5
0
    list_rouge_r2 = []
    list_recall_r2 = []
    list_precision_r2 = []
    list_rouge_r1 = []
    list_recall_r1 = []
    list_precision_r1 = []

    for i in range(0, len(reference_sentences)):
        clear_output(wait=True)
        print(i)
        reference_sentence = reference_sentences[i].split()
        summary_sentence = summary_sentences_list[iteration][i].split()

        # Calculate ROUGE-2.
        recall_r2, precision_r2, rouge_r2 = rouge_n_sentence_level(
            summary_sentence, reference_sentence, 2)

        list_rouge_r2.append(rouge_r2)
        list_recall_r2.append(recall_r2)
        list_precision_r2.append(precision_r2)

        # Calculate ROUGE-1.
        recall_r1, precision_r1, rouge_r1 = rouge_n_sentence_level(
            summary_sentence, reference_sentence, 1)

        list_rouge_r1.append(rouge_r1)
        list_recall_r1.append(recall_r1)
        list_precision_r1.append(precision_r1)

    mean_rouge_r2_list[iteration] = statistics.mean(list_rouge_r2)
    mean_rouge_r1_list[iteration] = statistics.mean(list_rouge_r1)
예제 #6
0
#!/usr/bin/env python3
"""Examples."""
from rouge import rouge_n_sentence_level
from rouge import rouge_l_sentence_level
from rouge import rouge_n_summary_level
from rouge import rouge_l_summary_level
from rouge import rouge_w_sentence_level
from rouge import rouge_w_summary_level

if __name__ == '__main__':
    # The use of sentence level rouges.
    reference_sentence = 'the police killed the gunman'.split()
    summary_sentence = 'the gunman police killed'.split()

    print('Sentence level:')
    score = rouge_n_sentence_level(summary_sentence, reference_sentence, 1)
    print('ROUGE-1: %f' % score.f1_measure)

    _, _, rouge_2 = rouge_n_sentence_level(summary_sentence,
                                           reference_sentence, 2)
    print('ROUGE-2: %f' % rouge_2)

    _, _, rouge_l = rouge_l_sentence_level(summary_sentence,
                                           reference_sentence)
    print('ROUGE-L: %f' % rouge_l)

    _, _, rouge_w = rouge_w_sentence_level(summary_sentence,
                                           reference_sentence)
    print('ROUGE-W: %f' % rouge_w)

    # The use of summary level rouges.
예제 #7
0
def inference_monodirectional(x_test, y_test, x_tokenizer, y_tokenizer):
    max_text_len=300
    max_summary_len=12

    model = tf.keras.models.load_model("models/monodirectional/10epochs/saved_model")

    reverse_target_word_index=y_tokenizer.index_word
    reverse_source_word_index=x_tokenizer.index_word
    target_word_index=y_tokenizer.word_index


    latent_dim = 300
    embedding_dim=100

    # Encode the input sequence to get the feature vector
    encoder_inputs = model.input[0]   # input_1
    encoder_outputs, state_h, state_c = model.layers[4].output 
    encoder_model = Model(inputs=encoder_inputs,outputs=[encoder_outputs, state_h, state_c])

    # Decoder setup
    # Below tensors will hold the states of the previous time step
    decoder_inputs = model.input[1]
    decoder_state_input_h = Input(shape=(latent_dim,), name='dec_st_in_h')
    decoder_state_input_c = Input(shape=(latent_dim,), name='dec_st_in_c')
    decoder_hidden_state_input = Input(shape=(max_text_len,latent_dim))

    # Get the embeddings of the decoder sequence
    dec_emb_layer = model.layers[3]
    dec_emb2= dec_emb_layer(decoder_inputs) 
    # To predict the next word in the sequence, set the initial states to the states from the previous time step
    decoder_lstm = model.layers[5]
    decoder_outputs2, state_h2, state_c2 = decoder_lstm(dec_emb2, initial_state=[decoder_state_input_h, decoder_state_input_c])

    #attention inference
    attn_layer = model.layers[6]
    attn_out_inf, attn_states_inf = attn_layer([decoder_hidden_state_input, decoder_outputs2])
    decoder_inf_concat = Concatenate(axis=-1, name='concat')([decoder_outputs2, attn_out_inf])

    # A dense softmax layer to generate prob dist. over the target vocabulary
    decoder_dense = model.layers[8]
    decoder_outputs2 = decoder_dense(decoder_inf_concat) 

    # Final decoder model
    decoder_model = Model(
        [decoder_inputs] + [decoder_hidden_state_input,decoder_state_input_h, decoder_state_input_c],
        [decoder_outputs2] + [state_h2, state_c2])
    
    for i in range(0,1):
        print("Review:",seq2text(x_test[i], reverse_source_word_index))
        print("Original summary:",seq2summary(y_test[i],target_word_index, reverse_target_word_index))
        print("Predicted summary:",decode_sequence(x_test[i].reshape(1,max_text_len), encoder_model, target_word_index, decoder_model, reverse_target_word_index, max_summary_len))
        print("\n")
    
    original_text = []
    original_summary = []
    created_summary = []

    for i in range(0,2000):
        #barra caricamiento
        progress(i, 2000, status='Doing very long job')

        original_text.append(seq2text(x_test[i], reverse_source_word_index))
        original_summary.append(seq2summary(y_test[i], target_word_index, reverse_target_word_index))
        created_summary.append(decode_sequence(x_test[i].reshape(1,max_text_len), encoder_model, target_word_index, decoder_model, reverse_target_word_index, max_summary_len))

    results = pd.DataFrame()
    results["Original_text"] = original_text
    results["Original_summary"] = original_summary
    results["Created_summary"] = created_summary

    results.to_csv("models/monodirectional/10epochs/results_predictions_mono_10.csv")

    results=pd.read_csv("models/monodirectional/10epochs/results_predictions_mono_10.csv")
    results["Created_summary"].replace(np.nan, 'NaN', inplace=True)

    reference_sentences = results["Original_summary"].to_list()
    summary_sentences = results["Created_summary"].to_list()

    list_rouge_r2 = []
    list_recall_r2 = []
    list_precision_r2 = []
    list_rouge_r1= []
    list_recall_r1 = []
    list_precision_r1 = []

    for i in range(0, len(reference_sentences)):
        #barra di frate maronno
        progress(i, len(reference_sentences), status='Doing very long job')

        reference_sentence = reference_sentences[i].split()
        summary_sentence = summary_sentences[i].split()
        
        # Calculate ROUGE-2.
        recall_r2, precision_r2, rouge_r2 = rouge_n_sentence_level(summary_sentence, reference_sentence, 2)

        list_rouge_r2.append(rouge_r2)
        list_recall_r2.append(recall_r2)
        list_precision_r2.append(precision_r2)

        # Calculate ROUGE-1.
        recall_r1, precision_r1, rouge_r1 = rouge_n_sentence_level(summary_sentence, reference_sentence, 1)

        list_rouge_r1.append(rouge_r1)
        list_recall_r1.append(recall_r1)
        list_precision_r1.append(precision_r1)

    mean_rouge_r2 = statistics.mean(list_rouge_r2)  
    mean_rouge_r1 = statistics.mean(list_rouge_r1)  

    print("Mean ROUGE-2: ", mean_rouge_r2)
    print("Mean ROUGE-1: ", mean_rouge_r1)