Esempio n. 1
0
 def eval(self, train_step):
     with self.eval_graph.as_default():
         self.eval_saver.restore(self.eval_session, self.model_file)
         bleu_score = 0
         target_results = []
         output_results = []
         for step in range(0, self.eval_reader.data_size):
             data = next(self.eval_data)
             in_seq = data['in_seq']
             in_seq_len = data['in_seq_len']
             target_seq = data['target_seq']
             target_seq_len = data['target_seq_len']
             outputs = self.eval_session.run(
                     self.eval_output,
                     feed_dict={
                         self.eval_in_seq: in_seq,
                         self.eval_in_seq_len: in_seq_len})
             for i in range(len(outputs)):
                 output = outputs[i]
                 target = target_seq[i]
                 output_text = reader.decode_text(output,
                         self.eval_reader.vocabs).split(' ')
                 target_text = reader.decode_text(target[1:],
                         self.eval_reader.vocabs).split(' ')
                 prob = int(self.eval_reader.data_size * self.batch_size / 10)
                 target_results.append([target_text])
                 output_results.append(output_text)
                 if random.randint(1, prob) == 1:
                     print('====================')
                     input_text = reader.decode_text(in_seq[i],
                             self.eval_reader.vocabs)
                     print('src:' + input_text)
                     print('output: ' + ' '.join(output_text))
                     print('target: ' + ' '.join(target_text))
         return bleu.compute_bleu(target_results, output_results)[0] * 100
Esempio n. 2
0
def _bleu(ref_file, trans_file, subword_option=None):
    """Compute BLEU scores and handling BPE."""
    max_order = 4
    smooth = False

    ref_files = [ref_file]
    reference_text = []
    for reference_filename in ref_files:
        with codecs.getreader("utf-8")(tf.gfile.GFile(reference_filename,
                                                      "rb")) as fh:
            reference_text.append(fh.readlines())

    per_segment_references = []
    for references in zip(*reference_text):
        reference_list = []
        for reference in references:
            reference = _clean(reference, subword_option)
            reference_list.append(reference.split(" "))
        per_segment_references.append(reference_list)

    translations = []
    with codecs.getreader("utf-8")(tf.gfile.GFile(trans_file, "rb")) as fh:
        for line in fh:
            line = _clean(line, subword_option=None)
            translations.append(line.split(" "))

    # bleu_score, precisions, bp, ratio, translation_length, reference_length
    bleu_score, _, _, _, _, _ = bleu.compute_bleu(per_segment_references,
                                                  translations, max_order,
                                                  smooth)
    return 100 * bleu_score
Esempio n. 3
0
def _evaluate(eval_fn, input_fn, decode_fn, path, config):
    graph = tf.Graph()
    with graph.as_default():
        features = input_fn()
        refs = features["references"]
        placeholders = {
            "source": tf.placeholder(tf.int32, [None, None], "source"),
            "source_length": tf.placeholder(tf.int32, [None], "source_length")
        }
        predictions = eval_fn(placeholders)
        predictions = predictions[0][:, 0, :]

        all_refs = [[] for _ in range(len(refs))]
        all_outputs = []

        sess_creator = tf.train.ChiefSessionCreator(checkpoint_dir=path,
                                                    config=config)

        with tf.train.MonitoredSession(session_creator=sess_creator) as sess:
            while not sess.should_stop():
                feats = sess.run(features)
                outputs = sess.run(predictions,
                                   feed_dict={
                                       placeholders["source"]:
                                       feats["source"],
                                       placeholders["source_length"]:
                                       feats["source_length"]
                                   })
                # shape: [batch, len]
                outputs = outputs.tolist()
                # shape: ([batch, len], ..., [batch, len])
                references = [item.tolist() for item in feats["references"]]

                all_outputs.extend(outputs)

                for i in range(len(refs)):
                    all_refs[i].extend(references[i])

        decoded_symbols = decode_fn(all_outputs)
        decoded_refs = [decode_fn(refs) for refs in all_refs]
        decoded_refs = [list(x) for x in zip(*decoded_refs)]

        return bleu.compute_bleu(decoded_symbols, decoded_refs)
Esempio n. 4
0
def main(args):
    data_pth = "data/%s" % args.data_name
    train_pth = os.path.join(data_pth, "train_data.txt")
    train_data = MonoTextData(train_pth, True, vocab=100000)
    vocab = train_data.vocab
    source_pth = os.path.join(data_pth, "test_data.txt")
    target_pth = args.target_path
    eval_data = MonoTextData(target_pth, True, vocab=vocab)
    source = pd.read_csv(source_pth, names=['label', 'content'], sep='\t')
    target = pd.read_csv(target_pth, names=['label', 'content'], sep='\t')

    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

    # Classification Accuracy
    model = CNNClassifier(len(vocab), 300, [1, 2, 3, 4, 5], 500,
                          0.5).to(device)
    model.load_state_dict(
        torch.load("checkpoint/%s-classifier.pt" % args.data_name))
    model.eval()
    eval_data, eval_label = eval_data.create_data_batch_labels(
        64, device, batch_first=True)
    acc = 100 * evaluate(model, eval_data, eval_label)
    print("Acc: %.2f" % acc)

    # BLEU Score
    total_bleu = 0.0
    sources = []
    targets = []
    for i in range(source.shape[0]):
        s = source.content[i].split()
        t = target.content[i].split()
        sources.append([s])
        targets.append(t)

    total_bleu += compute_bleu(sources, targets)[0]
    total_bleu *= 100
    print("Bleu: %.2f" % total_bleu)
Esempio n. 5
0
File: eval.py Progetto: wyb330/nmt
def compute_bleu_score(references, translations, max_order=4, smooth=False):
    bleu_score, _, _, _, _, _ = bleu.compute_bleu(references, translations, max_order, smooth)
    print(bleu_score)
    return bleu_score * 100
Esempio n. 6
0
def evaluate_bleu(refs, preds, bleu_n=4):
    """Compute Blue score"""
    eval_res = compute_bleu(refs, preds, max_order=bleu_n)
    return eval_res[0]