def get_data(inp_fp): inp_f = open(inp_fp, 'r') extD = dict() graphD = dict() node_edgeD = dict() for line in inp_f: line = line.strip('\n') sentence, extraction, confidence = line.split('\t') if sentence not in extD: extD[sentence] = list() already_added = False for added_extraction, _ in extD[sentence]: if extraction == added_extraction: already_added = True if already_added: continue extD[sentence].append([extraction, confidence]) for key in extD.keys(): graphD[key] = dict() cnt = 0 for item in extD[key]: graphD[key][cnt] = item[0] cnt += 1 sent_list = [] sent_dict = extD for key in tqdm(sent_dict): sent_list.append(key) node_edgeD[key] = dict() num = len(sent_dict[key]) key_sum = 0 for i in range(num): key_sum += math.exp(float(extD[key][i][1])) for i in range(num): node_edgeD[key][i] = (math.exp(float(extD[key][i][1]))) edge_sum = 0 for i in range(0, num): for j in range(i + 1, num): sent1 = ''.join(sent_dict[key][i]) sent2 = ''.join(sent_dict[key][j]) recall, precision, rouge = rouge_n_sentence_level( sent1, sent2, 2) edge_sum += rouge for i in range(0, num): for j in range(i + 1, num): sent1 = ''.join(sent_dict[key][i]) sent2 = ''.join(sent_dict[key][j]) recall, precision, rouge = rouge_n_sentence_level( sent1, sent2, 2) node_edgeD[key][(i, j)] = rouge return sent_list, extD, graphD, node_edgeD
def docalc2(args): hyp, ref = args hyp = hyp.split() ref = ref.split() scores = { 'rouge-s': rouge.rouge_s_sentence_level(hyp, ref).precision, 'rouge-w': rouge.rouge_w_sentence_level(hyp, ref).precision, 'rouge-l': rouge.rouge_l_sentence_level(hyp, ref).precision, 'rouge-1': rouge.rouge_n_sentence_level(hyp, ref, 1).precision, 'rouge-2': rouge.rouge_n_sentence_level(hyp, ref, 2).precision, 'rouge-3': rouge.rouge_n_sentence_level(hyp, ref, 3).precision, 'rouge-4': rouge.rouge_n_sentence_level(hyp, ref, 4).precision } return scores
def get_rouge_score(ref_definitions: list, hyp_definition: str): ref_definitions = ' '.join(ref_definitions).split() hyp_definition = hyp_definition.split() recall, precision, rouge_F = rouge_n_sentence_level( hyp_definition, ref_definitions, 1) return rouge_F
def score(self, pairs): rouges_1 = [] rouges_2 = [] rouges_l = [] for pair in pairs: target, hypo = pair # Calculate ROUGE-2. _, _, rouge_1 = rouge_n_sentence_level(hypo, target, 1) _, _, rouge_2 = rouge_n_sentence_level(hypo, target, 2) _, _, rouge_l = rouge_l_summary_level(hypo, target) rouges_1.append(rouge_1) rouges_2.append(rouge_2) rouges_l.append(rouge_l) return { 'ROUGE-1-F (avg)': np.average(rouges_1), 'ROUGE-2-F (avg)': np.average(rouges_2), 'ROUGE-L-F (avg)': np.average(rouges_l), }
list_rouge_r2 = [] list_recall_r2 = [] list_precision_r2 = [] list_rouge_r1 = [] list_recall_r1 = [] list_precision_r1 = [] for i in range(0, len(reference_sentences)): clear_output(wait=True) print(i) reference_sentence = reference_sentences[i].split() summary_sentence = summary_sentences_list[iteration][i].split() # Calculate ROUGE-2. recall_r2, precision_r2, rouge_r2 = rouge_n_sentence_level( summary_sentence, reference_sentence, 2) list_rouge_r2.append(rouge_r2) list_recall_r2.append(recall_r2) list_precision_r2.append(precision_r2) # Calculate ROUGE-1. recall_r1, precision_r1, rouge_r1 = rouge_n_sentence_level( summary_sentence, reference_sentence, 1) list_rouge_r1.append(rouge_r1) list_recall_r1.append(recall_r1) list_precision_r1.append(precision_r1) mean_rouge_r2_list[iteration] = statistics.mean(list_rouge_r2) mean_rouge_r1_list[iteration] = statistics.mean(list_rouge_r1)
#!/usr/bin/env python3 """Examples.""" from rouge import rouge_n_sentence_level from rouge import rouge_l_sentence_level from rouge import rouge_n_summary_level from rouge import rouge_l_summary_level from rouge import rouge_w_sentence_level from rouge import rouge_w_summary_level if __name__ == '__main__': # The use of sentence level rouges. reference_sentence = 'the police killed the gunman'.split() summary_sentence = 'the gunman police killed'.split() print('Sentence level:') score = rouge_n_sentence_level(summary_sentence, reference_sentence, 1) print('ROUGE-1: %f' % score.f1_measure) _, _, rouge_2 = rouge_n_sentence_level(summary_sentence, reference_sentence, 2) print('ROUGE-2: %f' % rouge_2) _, _, rouge_l = rouge_l_sentence_level(summary_sentence, reference_sentence) print('ROUGE-L: %f' % rouge_l) _, _, rouge_w = rouge_w_sentence_level(summary_sentence, reference_sentence) print('ROUGE-W: %f' % rouge_w) # The use of summary level rouges.
def inference_monodirectional(x_test, y_test, x_tokenizer, y_tokenizer): max_text_len=300 max_summary_len=12 model = tf.keras.models.load_model("models/monodirectional/10epochs/saved_model") reverse_target_word_index=y_tokenizer.index_word reverse_source_word_index=x_tokenizer.index_word target_word_index=y_tokenizer.word_index latent_dim = 300 embedding_dim=100 # Encode the input sequence to get the feature vector encoder_inputs = model.input[0] # input_1 encoder_outputs, state_h, state_c = model.layers[4].output encoder_model = Model(inputs=encoder_inputs,outputs=[encoder_outputs, state_h, state_c]) # Decoder setup # Below tensors will hold the states of the previous time step decoder_inputs = model.input[1] decoder_state_input_h = Input(shape=(latent_dim,), name='dec_st_in_h') decoder_state_input_c = Input(shape=(latent_dim,), name='dec_st_in_c') decoder_hidden_state_input = Input(shape=(max_text_len,latent_dim)) # Get the embeddings of the decoder sequence dec_emb_layer = model.layers[3] dec_emb2= dec_emb_layer(decoder_inputs) # To predict the next word in the sequence, set the initial states to the states from the previous time step decoder_lstm = model.layers[5] decoder_outputs2, state_h2, state_c2 = decoder_lstm(dec_emb2, initial_state=[decoder_state_input_h, decoder_state_input_c]) #attention inference attn_layer = model.layers[6] attn_out_inf, attn_states_inf = attn_layer([decoder_hidden_state_input, decoder_outputs2]) decoder_inf_concat = Concatenate(axis=-1, name='concat')([decoder_outputs2, attn_out_inf]) # A dense softmax layer to generate prob dist. over the target vocabulary decoder_dense = model.layers[8] decoder_outputs2 = decoder_dense(decoder_inf_concat) # Final decoder model decoder_model = Model( [decoder_inputs] + [decoder_hidden_state_input,decoder_state_input_h, decoder_state_input_c], [decoder_outputs2] + [state_h2, state_c2]) for i in range(0,1): print("Review:",seq2text(x_test[i], reverse_source_word_index)) print("Original summary:",seq2summary(y_test[i],target_word_index, reverse_target_word_index)) print("Predicted summary:",decode_sequence(x_test[i].reshape(1,max_text_len), encoder_model, target_word_index, decoder_model, reverse_target_word_index, max_summary_len)) print("\n") original_text = [] original_summary = [] created_summary = [] for i in range(0,2000): #barra caricamiento progress(i, 2000, status='Doing very long job') original_text.append(seq2text(x_test[i], reverse_source_word_index)) original_summary.append(seq2summary(y_test[i], target_word_index, reverse_target_word_index)) created_summary.append(decode_sequence(x_test[i].reshape(1,max_text_len), encoder_model, target_word_index, decoder_model, reverse_target_word_index, max_summary_len)) results = pd.DataFrame() results["Original_text"] = original_text results["Original_summary"] = original_summary results["Created_summary"] = created_summary results.to_csv("models/monodirectional/10epochs/results_predictions_mono_10.csv") results=pd.read_csv("models/monodirectional/10epochs/results_predictions_mono_10.csv") results["Created_summary"].replace(np.nan, 'NaN', inplace=True) reference_sentences = results["Original_summary"].to_list() summary_sentences = results["Created_summary"].to_list() list_rouge_r2 = [] list_recall_r2 = [] list_precision_r2 = [] list_rouge_r1= [] list_recall_r1 = [] list_precision_r1 = [] for i in range(0, len(reference_sentences)): #barra di frate maronno progress(i, len(reference_sentences), status='Doing very long job') reference_sentence = reference_sentences[i].split() summary_sentence = summary_sentences[i].split() # Calculate ROUGE-2. recall_r2, precision_r2, rouge_r2 = rouge_n_sentence_level(summary_sentence, reference_sentence, 2) list_rouge_r2.append(rouge_r2) list_recall_r2.append(recall_r2) list_precision_r2.append(precision_r2) # Calculate ROUGE-1. recall_r1, precision_r1, rouge_r1 = rouge_n_sentence_level(summary_sentence, reference_sentence, 1) list_rouge_r1.append(rouge_r1) list_recall_r1.append(recall_r1) list_precision_r1.append(precision_r1) mean_rouge_r2 = statistics.mean(list_rouge_r2) mean_rouge_r1 = statistics.mean(list_rouge_r1) print("Mean ROUGE-2: ", mean_rouge_r2) print("Mean ROUGE-1: ", mean_rouge_r1)