예제 #1
0
def print_embedding_scores(target_lines, gt_lines, w2v):
    r = embedding_metrics.average(gt_lines, target_lines, w2v)
    print("Embedding Average Score: %f +/- %f ( %f )" % (r[0], r[1], r[2]))

    r = embedding_metrics.greedy_match(gt_lines, target_lines, w2v)
    print("Greedy Matching Score: %f +/- %f ( %f )" % (r[0], r[1], r[2]))

    r = embedding_metrics.extrema_score(gt_lines, target_lines, w2v)
    print("Extrema Score: %f +/- %f ( %f )" % (r[0], r[1], r[2]))
예제 #2
0
def cal_relevance(generated, reference, embedding):  # embedding V* E
    generated = [[g] for g in generated]
    reference = [[s] for s in reference]

    #bp()
    relevance_score = [0.0, 0.0, 0.0]
    relevance_score[0] = greedy_match(reference, generated, embedding)
    relevance_score[1] = average_score(reference, generated, embedding)
    relevance_score[2] = extrema_score(reference, generated, embedding)
    return relevance_score
예제 #3
0
    def _evaluate(self, sess, batcher, ground_file, result_file):
        batcher.reset()
        num_per_epoch = batcher.sample_num // 100
        print('number per epoch', num_per_epoch)
        ground_sent_list = list()
        generate_sent_list = list()
        ppl = 0
        for _ in range(num_per_epoch):
            context_vecs, context_sent_len, context_conv_len, response_vecs, response_idx, response_n = batcher.generate(
            )

            mask_matrix = np.zeros(
                [np.shape(response_n)[0], self.params['max_r_words']],
                np.int32)
            for ind, row in enumerate(mask_matrix):
                row[:response_n[ind]] = 1
            batch_data = {
                self.model.encode_input: context_vecs,
                self.model.encode_sent_len: context_sent_len,
                self.model.encode_conv_len: context_conv_len,
                self.model.is_training: False,
                self.model.ans_vec: response_vecs,
                self.model.y: response_idx,
                self.model.y_mask: mask_matrix
            }

            loss, test_ans, test_dist = sess.run([
                self.model.test_loss, self.model.answer_word_test,
                self.model.distribution_word_test
            ],
                                                 feed_dict=batch_data)

            test_ans = np.transpose(np.array(test_ans), (1, 0))
            for i in range(len(response_n)):
                ground_a = list()
                for l in range(self.params['max_r_words']):
                    word = response_idx[i][l]
                    ground_a.append(batcher.idx_to_word[word])
                    if batcher.idx_to_word[word] == '<end>':
                        break
                ground_sent = ' '.join(ground_a)
                ground_sent_list.append(ground_sent)

                generate_a = list()
                for l in range(self.params['max_r_words']):
                    word = test_ans[i][l]
                    generate_a.append(batcher.idx_to_word[word])
                    if batcher.idx_to_word[word] == '<end>':
                        break
                generate_sent = ' '.join(generate_a)
                generate_sent_list.append(generate_sent)

            test_dist = np.transpose(np.array(test_dist), (1, 0, 2))
            for i in range(len(response_n)):
                ppl += perplexity.calculate_perplexity(test_dist[i],
                                                       response_idx[i],
                                                       response_n[i])

        ppl = ppl / (num_per_epoch * 100)

        ground_sents = '\n'.join(ground_sent_list)
        generate_sents = '\n'.join(generate_sent_list)

        with open(result_file, 'w') as fw:
            fw.write(generate_sents)

        with open(ground_file, 'w') as fw:
            fw.write(ground_sents)

        avg_r = embedding_metrics.average(ground_file, result_file, self.w2v)
        print("Embedding Average Score: %f +/- %f ( %f )" %
              (avg_r[0], avg_r[1], avg_r[2]))

        greedy_r = embedding_metrics.greedy_match(ground_file, result_file,
                                                  self.w2v)
        print("Greedy Matching Score: %f +/- %f ( %f )" %
              (greedy_r[0], greedy_r[1], greedy_r[2]))

        extrema_r = embedding_metrics.extrema_score(ground_file, result_file,
                                                    self.w2v)
        print("Extrema Score: %f +/- %f ( %f )" %
              (extrema_r[0], extrema_r[1], extrema_r[2]))

        print("perplexity: %f" % (ppl))

        # bleu = BLEU.bleu_val(ground_file, result_file)
        # print("BLEU Score: %f" % bleu)
        return avg_r[0] + greedy_r[0] + extrema_r[0]
    def _evaluate(self, sess, batcher, ground_file, result_file):
        batcher.reset()
        num_per_epoch = batcher.sample_num // 100
        print('number per epoch', num_per_epoch)
        ground_sent_list = list()
        generate_sent_list = list()
        all_loss = 0
        ppl = 0
        hw = 0
        for _ in range(num_per_epoch):


            context_vecs, context_sent_len, context_conv_len, response_vecs, response_idx, response_n, \
                response_vecs_forward, response_idx_forward, response_n_forward = batcher.generate()

            mask_matrix = np.zeros(
                [np.shape(response_n)[0], self.params['max_r_words']],
                np.int32)
            mask_matrix_forward = np.zeros([
                np.shape(response_n_forward)[0], self.params['max_r_f_words']
            ], np.int32)
            for ind, row in enumerate(mask_matrix):
                row[:response_n[ind]] = 1
            for ind, row in enumerate(mask_matrix_forward):
                row[:response_n_forward[ind]] = 1
            batch_data = {
                self.model.encode_input: context_vecs,
                self.model.encode_sent_len: context_sent_len,
                self.model.encode_conv_len: context_conv_len,
                self.model.is_training: False,
            }

            forward_test_ans = sess.run(self.model.forward_answer_word_test,
                                        feed_dict=batch_data)
            forward_test_ans = np.transpose(np.array(forward_test_ans), (1, 0))
            forward_generation_num = np.zeros([np.shape(response_n)[0]],
                                              np.int32)
            for i in range(len(response_n)):
                forward_a = list()
                for l in range(self.params['max_r_f_words']):
                    word = forward_test_ans[i][l]
                    if batcher.idx_to_word[word] == '<start>':
                        break
                    forward_a.append(batcher.idx_to_word[word])
                forward_a.reverse()
                forward_generation_num[i] = len(forward_a)
                forward_vec = list()
                for word in forward_a:
                    forward_vec.append(
                        batcher.embedding[batcher.word_to_idx[word]])
                if len(forward_a) != 0:
                    response_vecs[i, :len(forward_a), :] = forward_vec

            print(forward_a, end=' ')
            forward_generation = np.zeros(
                [np.shape(response_n)[0], self.params['max_r_words']],
                np.int32)
            for ind, row in enumerate(forward_generation):
                row[:forward_generation_num[ind]] = 1

            batch_data = {
                self.model.encode_input: context_vecs,
                self.model.encode_sent_len: context_sent_len,
                self.model.encode_conv_len: context_conv_len,
                self.model.is_training: False,
                self.model.ans_vec_entire: response_vecs,
                self.model.y_entire: response_idx,
                self.model.y_mask_entire: mask_matrix,
                self.model.ans_vec_forward: response_vecs_forward,
                self.model.y_forward: response_idx_forward,
                self.model.y_mask_forward: mask_matrix_forward,
                self.model.y_forward_generation: forward_generation
            }

            loss, test_ans, test_dist = sess.run([
                self.model.test_loss, self.model.answer_word_test,
                self.model.distribution_word_test
            ],
                                                 feed_dict=batch_data)
            all_loss += loss
            test_ans = np.transpose(np.array(test_ans), (1, 0))
            for i in range(len(response_n)):
                ground_a = list()
                for l in range(self.params['max_r_words']):
                    word = response_idx[i][l]
                    ground_a.append(batcher.idx_to_word[word])
                    if batcher.idx_to_word[word] == '<end>':
                        break
                ground_sent = ' '.join(ground_a)
                ground_sent_list.append(ground_sent)

                generate_a = list()
                for l in range(self.params['max_r_words']):
                    if l < forward_generation_num[i]:
                        word = forward_test_ans[i][forward_generation_num[i] -
                                                   1 - l]
                    else:
                        word = test_ans[i][l]
                    generate_a.append(batcher.idx_to_word[word])
                    if batcher.idx_to_word[word] == '<end>':
                        break
                generate_sent = ' '.join(generate_a)
                generate_sent_list.append(generate_sent)
            print(generate_a)
            test_dist = np.transpose(np.array(test_dist), (1, 0, 2))
            for i in range(len(response_n)):
                # print(test_dist[i].shape,response_idx[i],response_n[i])
                # ppl += perplexity.calculate_perplexity(test_dist[i],response_idx[i],response_n[i])
                # hw += self_information.word_h(test_dist[i],response_idx[i],response_n[i])
                ppl += perplexity.calculate_perplexity(
                    test_dist[i], test_ans[i],
                    len(generate_sent_list[i].split()))
                hw += self_information.word_h(
                    test_dist[i], test_ans[i],
                    len(generate_sent_list[i].split()))

        ppl = ppl / (num_per_epoch * 100)
        hw = hw / (num_per_epoch * 100)
        avg_loss = all_loss / num_per_epoch

        ground_sents = '\n'.join(ground_sent_list)
        generate_sents = '\n'.join(generate_sent_list)

        with open(result_file, 'w') as fw:
            fw.write(generate_sents)

        with open(ground_file, 'w') as fw:
            fw.write(ground_sents)

        avg_r = embedding_metrics.average(ground_file, result_file, self.w2v)
        print("Embedding Average Score: %f +/- %f ( %f )" %
              (avg_r[0], avg_r[1], avg_r[2]))

        greedy_r = embedding_metrics.greedy_match(ground_file, result_file,
                                                  self.w2v)
        print("Greedy Matching Score: %f +/- %f ( %f )" %
              (greedy_r[0], greedy_r[1], greedy_r[2]))

        extrema_r = embedding_metrics.extrema_score(ground_file, result_file,
                                                    self.w2v)
        print("Extrema Score: %f +/- %f ( %f )" %
              (extrema_r[0], extrema_r[1], extrema_r[2]))

        print("perplexity: %f" % (ppl))
        print("wh: %f" % (hw))
        print('avg loss: %f' % (avg_loss))

        # bleu = BLEU.bleu_val(ground_file, result_file)
        # print("BLEU Score: %f" % bleu)
        return avg_r[0] + greedy_r[0] + extrema_r[0]
예제 #5
0
    print('Computing test')
    print('Decoding')
    import time
    start = time.time()
    hyps = utils.decode_sentences(testgen,
                                  model,
                                  tokenizer.index_word,
                                  k=1,
                                  cond=True,
                                  BOS=tokenizer.word_index[datagen.BOS])
    print('Decoding time:' + str(time.time() - start))
    print('Hypothesis set', len(hyps))
    for i in range(10):
        print('Source:', testgen.data[0][i])
        print('Hypothesis:', hyps[i])
        print('Target:', testgen.data[1][i])
        print('#############################')

    bleu = sacrebleu.raw_corpus_bleu(hyps, [testgen.data[1][:len(hyps)]])
    r = embedding_metrics.greedy_match(hyps, testgen.data[1][:len(hyps)],
                                       'data/gnews-embeddings300.bin')
    greedy = "Greedy Matching Score: %f +/- %f ( %f )" % (r[0], r[1], r[2])
    print(bleu)
    print(greedy)

    with open(modelname[:-3] + '.score', 'w') as f:
        f.write(str(bleu))
        f.write('\n')
        f.write(greedy)
        f.write('\n')