def print_embedding_scores(target_lines, gt_lines, w2v): r = embedding_metrics.average(gt_lines, target_lines, w2v) print("Embedding Average Score: %f +/- %f ( %f )" % (r[0], r[1], r[2])) r = embedding_metrics.greedy_match(gt_lines, target_lines, w2v) print("Greedy Matching Score: %f +/- %f ( %f )" % (r[0], r[1], r[2])) r = embedding_metrics.extrema_score(gt_lines, target_lines, w2v) print("Extrema Score: %f +/- %f ( %f )" % (r[0], r[1], r[2]))
def cal_relevance(generated, reference, embedding): # embedding V* E generated = [[g] for g in generated] reference = [[s] for s in reference] #bp() relevance_score = [0.0, 0.0, 0.0] relevance_score[0] = greedy_match(reference, generated, embedding) relevance_score[1] = average_score(reference, generated, embedding) relevance_score[2] = extrema_score(reference, generated, embedding) return relevance_score
def _evaluate(self, sess, batcher, ground_file, result_file): batcher.reset() num_per_epoch = batcher.sample_num // 100 print('number per epoch', num_per_epoch) ground_sent_list = list() generate_sent_list = list() ppl = 0 for _ in range(num_per_epoch): context_vecs, context_sent_len, context_conv_len, response_vecs, response_idx, response_n = batcher.generate( ) mask_matrix = np.zeros( [np.shape(response_n)[0], self.params['max_r_words']], np.int32) for ind, row in enumerate(mask_matrix): row[:response_n[ind]] = 1 batch_data = { self.model.encode_input: context_vecs, self.model.encode_sent_len: context_sent_len, self.model.encode_conv_len: context_conv_len, self.model.is_training: False, self.model.ans_vec: response_vecs, self.model.y: response_idx, self.model.y_mask: mask_matrix } loss, test_ans, test_dist = sess.run([ self.model.test_loss, self.model.answer_word_test, self.model.distribution_word_test ], feed_dict=batch_data) test_ans = np.transpose(np.array(test_ans), (1, 0)) for i in range(len(response_n)): ground_a = list() for l in range(self.params['max_r_words']): word = response_idx[i][l] ground_a.append(batcher.idx_to_word[word]) if batcher.idx_to_word[word] == '<end>': break ground_sent = ' '.join(ground_a) ground_sent_list.append(ground_sent) generate_a = list() for l in range(self.params['max_r_words']): word = test_ans[i][l] generate_a.append(batcher.idx_to_word[word]) if batcher.idx_to_word[word] == '<end>': break generate_sent = ' '.join(generate_a) generate_sent_list.append(generate_sent) test_dist = np.transpose(np.array(test_dist), (1, 0, 2)) for i in range(len(response_n)): ppl += perplexity.calculate_perplexity(test_dist[i], response_idx[i], response_n[i]) ppl = ppl / (num_per_epoch * 100) ground_sents = '\n'.join(ground_sent_list) generate_sents = '\n'.join(generate_sent_list) with open(result_file, 'w') as fw: fw.write(generate_sents) with open(ground_file, 'w') as fw: fw.write(ground_sents) avg_r = embedding_metrics.average(ground_file, result_file, self.w2v) print("Embedding Average Score: %f +/- %f ( %f )" % (avg_r[0], avg_r[1], avg_r[2])) greedy_r = embedding_metrics.greedy_match(ground_file, result_file, self.w2v) print("Greedy Matching Score: %f +/- %f ( %f )" % (greedy_r[0], greedy_r[1], greedy_r[2])) extrema_r = embedding_metrics.extrema_score(ground_file, result_file, self.w2v) print("Extrema Score: %f +/- %f ( %f )" % (extrema_r[0], extrema_r[1], extrema_r[2])) print("perplexity: %f" % (ppl)) # bleu = BLEU.bleu_val(ground_file, result_file) # print("BLEU Score: %f" % bleu) return avg_r[0] + greedy_r[0] + extrema_r[0]
def _evaluate(self, sess, batcher, ground_file, result_file): batcher.reset() num_per_epoch = batcher.sample_num // 100 print('number per epoch', num_per_epoch) ground_sent_list = list() generate_sent_list = list() all_loss = 0 ppl = 0 hw = 0 for _ in range(num_per_epoch): context_vecs, context_sent_len, context_conv_len, response_vecs, response_idx, response_n, \ response_vecs_forward, response_idx_forward, response_n_forward = batcher.generate() mask_matrix = np.zeros( [np.shape(response_n)[0], self.params['max_r_words']], np.int32) mask_matrix_forward = np.zeros([ np.shape(response_n_forward)[0], self.params['max_r_f_words'] ], np.int32) for ind, row in enumerate(mask_matrix): row[:response_n[ind]] = 1 for ind, row in enumerate(mask_matrix_forward): row[:response_n_forward[ind]] = 1 batch_data = { self.model.encode_input: context_vecs, self.model.encode_sent_len: context_sent_len, self.model.encode_conv_len: context_conv_len, self.model.is_training: False, } forward_test_ans = sess.run(self.model.forward_answer_word_test, feed_dict=batch_data) forward_test_ans = np.transpose(np.array(forward_test_ans), (1, 0)) forward_generation_num = np.zeros([np.shape(response_n)[0]], np.int32) for i in range(len(response_n)): forward_a = list() for l in range(self.params['max_r_f_words']): word = forward_test_ans[i][l] if batcher.idx_to_word[word] == '<start>': break forward_a.append(batcher.idx_to_word[word]) forward_a.reverse() forward_generation_num[i] = len(forward_a) forward_vec = list() for word in forward_a: forward_vec.append( batcher.embedding[batcher.word_to_idx[word]]) if len(forward_a) != 0: response_vecs[i, :len(forward_a), :] = forward_vec print(forward_a, end=' ') forward_generation = np.zeros( [np.shape(response_n)[0], self.params['max_r_words']], np.int32) for ind, row in enumerate(forward_generation): row[:forward_generation_num[ind]] = 1 batch_data = { self.model.encode_input: context_vecs, self.model.encode_sent_len: context_sent_len, self.model.encode_conv_len: context_conv_len, self.model.is_training: False, self.model.ans_vec_entire: response_vecs, self.model.y_entire: response_idx, self.model.y_mask_entire: mask_matrix, self.model.ans_vec_forward: response_vecs_forward, self.model.y_forward: response_idx_forward, self.model.y_mask_forward: mask_matrix_forward, self.model.y_forward_generation: forward_generation } loss, test_ans, test_dist = sess.run([ self.model.test_loss, self.model.answer_word_test, self.model.distribution_word_test ], feed_dict=batch_data) all_loss += loss test_ans = np.transpose(np.array(test_ans), (1, 0)) for i in range(len(response_n)): ground_a = list() for l in range(self.params['max_r_words']): word = response_idx[i][l] ground_a.append(batcher.idx_to_word[word]) if batcher.idx_to_word[word] == '<end>': break ground_sent = ' '.join(ground_a) ground_sent_list.append(ground_sent) generate_a = list() for l in range(self.params['max_r_words']): if l < forward_generation_num[i]: word = forward_test_ans[i][forward_generation_num[i] - 1 - l] else: word = test_ans[i][l] generate_a.append(batcher.idx_to_word[word]) if batcher.idx_to_word[word] == '<end>': break generate_sent = ' '.join(generate_a) generate_sent_list.append(generate_sent) print(generate_a) test_dist = np.transpose(np.array(test_dist), (1, 0, 2)) for i in range(len(response_n)): # print(test_dist[i].shape,response_idx[i],response_n[i]) # ppl += perplexity.calculate_perplexity(test_dist[i],response_idx[i],response_n[i]) # hw += self_information.word_h(test_dist[i],response_idx[i],response_n[i]) ppl += perplexity.calculate_perplexity( test_dist[i], test_ans[i], len(generate_sent_list[i].split())) hw += self_information.word_h( test_dist[i], test_ans[i], len(generate_sent_list[i].split())) ppl = ppl / (num_per_epoch * 100) hw = hw / (num_per_epoch * 100) avg_loss = all_loss / num_per_epoch ground_sents = '\n'.join(ground_sent_list) generate_sents = '\n'.join(generate_sent_list) with open(result_file, 'w') as fw: fw.write(generate_sents) with open(ground_file, 'w') as fw: fw.write(ground_sents) avg_r = embedding_metrics.average(ground_file, result_file, self.w2v) print("Embedding Average Score: %f +/- %f ( %f )" % (avg_r[0], avg_r[1], avg_r[2])) greedy_r = embedding_metrics.greedy_match(ground_file, result_file, self.w2v) print("Greedy Matching Score: %f +/- %f ( %f )" % (greedy_r[0], greedy_r[1], greedy_r[2])) extrema_r = embedding_metrics.extrema_score(ground_file, result_file, self.w2v) print("Extrema Score: %f +/- %f ( %f )" % (extrema_r[0], extrema_r[1], extrema_r[2])) print("perplexity: %f" % (ppl)) print("wh: %f" % (hw)) print('avg loss: %f' % (avg_loss)) # bleu = BLEU.bleu_val(ground_file, result_file) # print("BLEU Score: %f" % bleu) return avg_r[0] + greedy_r[0] + extrema_r[0]
print('Computing test') print('Decoding') import time start = time.time() hyps = utils.decode_sentences(testgen, model, tokenizer.index_word, k=1, cond=True, BOS=tokenizer.word_index[datagen.BOS]) print('Decoding time:' + str(time.time() - start)) print('Hypothesis set', len(hyps)) for i in range(10): print('Source:', testgen.data[0][i]) print('Hypothesis:', hyps[i]) print('Target:', testgen.data[1][i]) print('#############################') bleu = sacrebleu.raw_corpus_bleu(hyps, [testgen.data[1][:len(hyps)]]) r = embedding_metrics.greedy_match(hyps, testgen.data[1][:len(hyps)], 'data/gnews-embeddings300.bin') greedy = "Greedy Matching Score: %f +/- %f ( %f )" % (r[0], r[1], r[2]) print(bleu) print(greedy) with open(modelname[:-3] + '.score', 'w') as f: f.write(str(bleu)) f.write('\n') f.write(greedy) f.write('\n')