Esempio n. 1
0
    def translate(self, data_iter, step, attn_debug=False):

        self.model.eval()
        output_path = self.args.result_path + '.%d.output' % step
        output_file = codecs.open(output_path, 'w', 'utf-8')
        gold_path = self.args.result_path + '.%d.gold_test' % step
        pred_path = self.args.result_path + '.%d.pred_test' % step
        ex_single_path = self.args.result_path + '.%d.ex_test' % step + ".short"
        ex_context_path = self.args.result_path + '.%d.ex_test' % step + ".long"
        gold_out_file = codecs.open(gold_path, 'w', 'utf-8')
        pred_out_file = codecs.open(pred_path, 'w', 'utf-8')
        short_ex_out_file = codecs.open(ex_single_path, 'w', 'utf-8')
        long_ex_out_file = codecs.open(ex_context_path, 'w', 'utf-8')
        # pred_results, gold_results = [], []

        ct = 0
        with torch.no_grad():
            rouge = Rouge()
            for batch in data_iter:
                doc_data, summ_data = self.translate_batch(batch)
                translations = self.from_batch_test(batch, doc_data)

                for idx in range(len(translations)):
                    origin_sent, doc_extract, context_doc_extract, \
                        doc_pred, lead = translations[idx]
                    if ct % 100 == 0:
                        print("Processing %d" % ct)
                    output_file.write("ID      : %d\n" % ct)
                    output_file.write(
                        "ORIGIN  : " +
                        origin_sent.replace('<S>', '\n          ') + "\n")
                    gold_data = summ_data[idx]
                    output_file.write("GOLD    : " + gold_data + "\n")
                    output_file.write("LEAD    : " + lead + "\n")
                    output_file.write("DOC_EX  : " + doc_extract.strip() +
                                      "\n")
                    output_file.write("DOC_CONT: " +
                                      context_doc_extract.strip() + "\n")
                    output_file.write("DOC_GEN : " + doc_pred.strip() + "\n")

                    gold_list = gold_data.strip().split()
                    lead_list = lead.strip().replace("[unused2]", "").replace(
                        "[unused3]", "").split()
                    rouge_score = rouge.get_scores(lead, gold_data)
                    bleu_score = sentence_bleu(
                        [gold_list],
                        lead_list,
                        smoothing_function=SmoothingFunction().method1)
                    output_file.write(
                        "LEAD     bleu & rouge-f 1/2/l:    %.4f & %.4f/%.4f/%.4f\n"
                        % (bleu_score, rouge_score[0]["rouge-1"]["f"],
                           rouge_score[0]["rouge-2"]["f"],
                           rouge_score[0]["rouge-l"]["f"]))

                    doc_extract_list = doc_extract.strip().replace(
                        "[unused2]", "").replace("[unused3]", "").split()
                    rouge_score = rouge.get_scores(doc_extract, gold_data)
                    bleu_score = sentence_bleu(
                        [gold_list],
                        doc_extract_list,
                        smoothing_function=SmoothingFunction().method1)
                    output_file.write(
                        "DOC_EX   bleu & rouge-f 1/2/l:    %.4f & %.4f/%.4f/%.4f\n"
                        % (bleu_score, rouge_score[0]["rouge-1"]["f"],
                           rouge_score[0]["rouge-2"]["f"],
                           rouge_score[0]["rouge-l"]["f"]))

                    doc_context_list = context_doc_extract.strip().replace(
                        "[unused2]", "").replace("[unused3]", "").split()
                    rouge_score = rouge.get_scores(context_doc_extract,
                                                   gold_data)
                    bleu_score = sentence_bleu(
                        [gold_list],
                        doc_context_list,
                        smoothing_function=SmoothingFunction().method1)
                    output_file.write(
                        "DOC_CONT bleu & rouge-f 1/2/l:    %.4f & %.4f/%.4f/%.4f\n"
                        % (bleu_score, rouge_score[0]["rouge-1"]["f"],
                           rouge_score[0]["rouge-2"]["f"],
                           rouge_score[0]["rouge-l"]["f"]))

                    doc_long_list = doc_pred.strip().replace(
                        "[unused2]", "").replace("[unused3]", "").split()
                    rouge_score = rouge.get_scores(doc_pred, gold_data)
                    bleu_score = sentence_bleu(
                        [gold_list],
                        doc_long_list,
                        smoothing_function=SmoothingFunction().method1)
                    output_file.write(
                        "DOC_GEN  bleu & rouge-f 1/2/l:    %.4f & %.4f/%.4f/%.4f\n\n"
                        % (bleu_score, rouge_score[0]["rouge-1"]["f"],
                           rouge_score[0]["rouge-2"]["f"],
                           rouge_score[0]["rouge-l"]["f"]))

                    short_ex_out_file.write(doc_extract.strip().replace(
                        "[unused2]", "").replace("[unused3]", "") + '\n')
                    long_ex_out_file.write(context_doc_extract.strip().replace(
                        "[unused2]", "").replace("[unused3]", "") + '\n')
                    pred_out_file.write(doc_pred.strip().replace(
                        "[unused2]", "").replace("[unused3]", "") + '\n')
                    gold_out_file.write(gold_data.strip() + '\n')
                    ct += 1
                pred_out_file.flush()
                short_ex_out_file.flush()
                long_ex_out_file.flush()
                gold_out_file.flush()
                output_file.flush()

        pred_out_file.close()
        short_ex_out_file.close()
        long_ex_out_file.close()
        gold_out_file.close()
        output_file.close()

        if (step != -1):
            ex_short_bleu = test_bleu(gold_path, ex_single_path)
            ex_long_bleu = test_bleu(gold_path, ex_context_path)
            pred_bleu = test_bleu(gold_path, pred_path)

            file_rouge = FilesRouge(hyp_path=ex_single_path,
                                    ref_path=gold_path)
            ex_short_rouges = file_rouge.get_scores(avg=True)

            file_rouge = FilesRouge(hyp_path=ex_context_path,
                                    ref_path=gold_path)
            ex_long_rouges = file_rouge.get_scores(avg=True)

            file_rouge = FilesRouge(hyp_path=pred_path, ref_path=gold_path)
            pred_rouges = file_rouge.get_scores(avg=True)

            self.logger.info(
                'Gold Length at step %d: %.2f\n' %
                (step, test_length(gold_path, gold_path, ratio=False)))
            self.logger.info('Short Extraction Length ratio at step %d: %.2f' %
                             (step, test_length(ex_single_path, gold_path)))
            self.logger.info('Short Extraction Bleu at step %d: %.2f' %
                             (step, ex_short_bleu * 100))
            self.logger.info('Short Extraction Rouges at step %d \n%s' %
                             (step, rouge_results_to_str(ex_short_rouges)))
            self.logger.info('Long Extraction Length ratio at step %d: %.2f' %
                             (step, test_length(ex_context_path, gold_path)))
            self.logger.info('Long Extraction Bleu at step %d: %.2f' %
                             (step, ex_long_bleu * 100))
            self.logger.info('Long Extraction Rouges at step %d \n%s' %
                             (step, rouge_results_to_str(ex_long_rouges)))
            self.logger.info('Prediction Length ratio at step %d: %.2f' %
                             (step, test_length(pred_path, gold_path)))
            self.logger.info('Prediction Bleu at step %d: %.2f' %
                             (step, pred_bleu * 100))
            self.logger.info('Prediction Rouges at step %d \n%s' %
                             (step, rouge_results_to_str(pred_rouges)))
    def eval(self,
             filesrc,
             filetgt,
             output_file_name,
             batch_size=64,
             max_batches=None,
             device="cpu",
             keep_chance=0.9):
        if self.encoder is None or self.decoder is None:
            print('Model not loaded!')
            return

        self.encoder.to(device)
        self.decoder.to(device)

        with open(output_file_name, 'w') as output_file:
            testloader = test_data_loader(filesrc=filesrc,
                                          filetgt=filetgt,
                                          output_file=output_file,
                                          model=self,
                                          batch_size=batch_size,
                                          max_batches=max_batches,
                                          keep_chance=keep_chance,
                                          device=device)

            self.encoder.eval()
            self.decoder.eval()

            start = time.time()

            scores = []
            i = 0
            for batch_candidate, batch_references in testloader:
                cur_score = sentence_bleu(
                    batch_references,
                    batch_candidate,
                    smoothing_function=SmoothingFunction().method3)
                scores.append(cur_score)
                i += 1
                print('', file=output_file)
                print('Sample {0:d}, BLEU score: {1:0.4f}'.format(
                    i, cur_score))
                print('', file=output_file)
                print('Sample {0:d}, BLEU score: {1:0.4f}'.format(
                    i, cur_score),
                      file=output_file)
                print('', file=output_file)
                print('=' * 30, file=output_file)
                print('', file=output_file)

            print('=' * 50, file=output_file)
            print('', file=output_file)
            print('= ' * 25, file=output_file)
            print('', file=output_file)
            print(
                'Average BLEU score: {0:0.4f}, minimum score: {1:0.4f}, maximum score: {2:0.4f}, median score: {3:0.4f}'
                .format(np.mean(scores), min(scores), max(scores),
                        np.median(scores)),
                file=output_file)
            print(
                'Average BLEU score: {0:0.4f}, minimum score: {1:0.4f}, maximum score: {2:0.4f}, median score: {3:0.4f}'
                .format(np.mean(scores), min(scores), max(scores),
                        np.median(scores)))

        return scores, testloader.input_lens
Esempio n. 3
0
def train(encoder_decoder: EncoderDecoder, model_dump_path,
          train_data_loader: DataLoader, model_name,
          val_data_loader: DataLoader, keep_prob, teacher_forcing_schedule, lr,
          max_length, early_stopping, patience, beam_width):
    global_step = 0
    loss_function = torch.nn.NLLLoss(ignore_index=0)
    optimizer = AdamW(
        encoder_decoder.parameters(),
        lr=lr,  # args.learning_rate - default is 5e-5, our notebook had 2e-5
        eps=1e-8  # args.adam_epsilon  - default is 1e-8.
    )
    # optimizer = optim.Adam(encoder_decoder.parameters(), lr=lr)
    model_path = model_dump_path + model_name + '/'
    history = {
        'val_loss': [],
        'best_epoch': -1,
        'best_loss': float("inf"),
        'prev_loss': float("inf")
    }

    for epoch, teacher_forcing in enumerate(teacher_forcing_schedule):
        print('epoch %i' % epoch, flush=True)

        for batch_idx, (tweet_idxs, news_idxs, target_idxs, tweet_tokens,
                        news_tokens,
                        hashtag_tokens) in enumerate(tqdm(train_data_loader)):
            # tweet_idxs have dim (batch_size x max_tweet_len)
            # news_idxs have dim (batch_size x max_news_len)
            # hashtag_idxs have dim (batch_size x max_hashtag_len)

            lengths_tweets = (tweet_idxs != 0).long().sum(dim=1)
            lengths_news = (news_idxs != 0).long().sum(dim=1)

            optimizer.zero_grad()
            output_log_probs, output_seqs, cov_loss = encoder_decoder(
                tweet_idxs,
                news_idxs,
                lengths_tweets,
                lengths_news,
                beam_width,
                targets=target_idxs,
                keep_prob=keep_prob,
                teacher_forcing=teacher_forcing)

            batch_size = tweet_idxs.shape[0]

            flattened_outputs = output_log_probs.contiguous().view(
                batch_size * max_length, -1)

            batch_loss = loss_function(
                flattened_outputs,
                target_idxs.contiguous().view(-1)) + cov_loss
            batch_loss.backward()
            optimizer.step()

            batch_outputs = trim_seqs(output_seqs)

            batch_targets = [[list(seq[seq > 0])]
                             for seq in list(to_np(target_idxs))]

            batch_bleu_score = corpus_bleu(
                batch_targets,
                batch_outputs,
                smoothing_function=SmoothingFunction().method1)

            if global_step < 10 or (global_step % 10 == 0
                                    and global_step < 100) or (global_step %
                                                               100 == 0):
                tweet_string = "do you think brett kavanaugh should be confirmed as a justice on the supreme court"
                news_string = "leading catholic publication turns on brett kavanaugh says his nomination to the supreme court should be withdrawn"
                output_string = encoder_decoder.get_response(
                    tweet_string, news_string)
                writer.add_text('kavanaugh',
                                output_string,
                                global_step=global_step)
                print("Global Step: ", global_step, ' kavanaugh ',
                      output_string)

            if global_step % 100 == 0:
                writer.add_scalar('train_batch_loss', batch_loss, global_step)
                writer.add_scalar('train_batch_bleu_score', batch_bleu_score,
                                  global_step)
                print("Global Step: ", global_step, ' train_batch_loss ',
                      batch_loss)
                print("Global Step: ", global_step, ' train_batch_bleu_score ',
                      batch_bleu_score)

                # for tag, value in encoder_decoder.named_parameters():
                #     tag = tag.replace('.', '/')
                #     writer.add_histogram('weights/' + tag, value, global_step, bins='doane')
                #     writer.add_histogram('grads/' + tag, to_np(value.grad), global_step, bins='doane')

            global_step += 1

        with torch.no_grad():
            val_loss, val_bleu_score = evaluate(encoder_decoder,
                                                val_data_loader)
            history["val_loss"].append(val_loss)

        writer.add_scalar('val_loss', val_loss, global_step=global_step)
        writer.add_scalar('val_bleu_score',
                          val_bleu_score,
                          global_step=global_step)

        encoder_embeddings = encoder_decoder.encoder.embedding.weight.data
        encoder_vocab = encoder_decoder.lang.tok_to_idx.keys()
        writer.add_embedding(encoder_embeddings,
                             metadata=encoder_vocab,
                             global_step=0,
                             tag='encoder_embeddings')

        decoder_embeddings = encoder_decoder.decoder.embedding.weight.data
        decoder_vocab = encoder_decoder.lang.tok_to_idx.keys()
        writer.add_embedding(decoder_embeddings,
                             metadata=decoder_vocab,
                             global_step=0,
                             tag='decoder_embeddings')

        tweet_string = "should ask dr ford 1 important question why did you go upstairs to go to the bathroom 99 of 2 story houses have a bathroom downstairs common sense bedrooms are upstairs cbc news fox news real donald trump cnn"
        news_string = "america supreme court brett kavanuagh senate judiciary committee christine blasey ford cnn fox news supreme court bill clinton kavanaugh new york ford donald trump rod rosenstein new york times trump trump maine alaska tara d sonenshine us george washington university elliott school of international affairs christine blasey ford senate judiciary committee fox news ford fox news chris wallace ford wallace bret baier ford brit hume fox news bret baier ford chris wallace david mack andrew napolitano ford rachel mitchell fox news andrew napolitano ford rachel mitchell keith boykin fox kavanaugh ford ford twitter america supreme court brett kavanuagh senate judiciary committee christine blasey ford cnn fox news supreme court bill clinton kavanaugh new york ford donald trump rod rosenstein new york times trump trump maine alaska tara d sonenshine us george washington university elliott school of international affairs fox news sean hannity donald trump fox news fox news cnn msnbc pbs news gallupknight foundation gallupknight foundation gallup knight foundation christine blasey ford senate judiciary committee fox news ford fox news chris wallace ford wallace bret baier ford brit hume fox news bret baier ford chris wallace"
        output_string = encoder_decoder.get_response(tweet_string, news_string)
        writer.add_text('christine blasey ford',
                        output_string,
                        global_step=global_step)
        print("Global Step: ", global_step, ' christine blasey ford ',
              output_string)

        print('val loss: %.5f, val BLEU score: %.5f' %
              (val_loss, val_bleu_score),
              flush=True)
        torch.save(encoder_decoder,
                   "%s%s_%i.pt" % (model_path, model_name, epoch))

        print('-' * 100, flush=True)

        if history['val_loss'][-1] < history['best_loss'] or history[
                'val_loss'][-1] < history['prev_loss']:
            history['best_loss'] = history['val_loss'][-1]
            history['best_epoch'] = epoch
        elif early_stopping and epoch - history['best_epoch'] > patience:
            # early stopping
            print(
                "Early stopping at epoch {0}, best result at epoch {1}".format(
                    epoch, history['best_epoch']))
            break
        history['prev_loss'] = history['val_loss'][-1]
Esempio n. 4
0
#-*- coding:utf-8 -*-
'''
[AI502] Deep Learning Assignment
"Attention is all you need" Implementation
20193640 Jungwon Choi
'''
import torch
import torch.nn as nn
import numpy as np
import sys

from nltk.translate.bleu_score import sentence_bleu
from nltk.translate.bleu_score import SmoothingFunction  # for short sentence

smoothing_func = SmoothingFunction().method4

#===============================================================================
''' Validate sequence '''


def val(model, val_loader, criterion, dataloader):
    model.eval()
    device = next(model.parameters()).device.index
    losses = []
    total_iter = len(val_loader)
    sum_bleu = 0.0
    num_sentence = 0.0
    sos_idx = dataloader.sos_idx

    with torch.no_grad():
        for i, batch in enumerate(val_loader):
Esempio n. 5
0
 def calc_bleu(self, reference, hypothesis, weight):
     return nltk.translate.bleu_score.sentence_bleu(
         reference,
         hypothesis,
         weight,
         smoothing_function=SmoothingFunction().method1)
Esempio n. 6
0
import random
import logging

import numpy as np
import tensorflow as tf
from nltk.translate.bleu_score import corpus_bleu
from nltk.translate.bleu_score import SmoothingFunction

import network
from utils import *
from vocab import Vocabulary, build_unify_vocab
from config import load_arguments
from dataloader.multi_style_dataloader import MultiStyleDataloader
from dataloader.online_dataloader import OnlineDataloader

smoothie = SmoothingFunction().method4

logger = logging.getLogger(__name__)


def evaluation(sess,
               args,
               batches,
               model,
               classifier,
               classifier_vocab,
               domain_classifer,
               domain_vocab,
               output_path,
               write_dict,
               save_samples=False,
Esempio n. 7
0
def processAlignments(data, folder, inputfile, outputType, num, refs=False):
    with open(folder + "/" + ntpath.basename(inputfile) + '.ali.js',
              'w',
              encoding='utf-8') as out_a_js:
        with open(folder + "/" + ntpath.basename(inputfile) + '.src.js',
                  'w',
                  encoding='utf-8') as out_s_js:
            with open(folder + "/" + ntpath.basename(inputfile) + '.trg.js',
                      'w',
                      encoding='utf-8') as out_t_js:
                with open(folder + "/" + ntpath.basename(inputfile) +
                          '.con.js',
                          'w',
                          encoding='utf-8') as out_c_js:
                    with open(folder + "/" + ntpath.basename(inputfile) +
                              '.sc.js',
                              'w',
                              encoding='utf-8') as out_sc_js:
                        out_a_js.write(u'var alignments = [\n')
                        out_s_js.write(u'var sources = [\n')
                        out_t_js.write(u'var targets = [\n')
                        out_c_js.write(u'var confidences = [\n')
                        out_sc_js.write(u'var sentence_confidences = [\n')
                        num = int(num) - 1
                        if num > -1 and (num < len(data)):
                            data = [data[num]]
                        elif num >= len(data):
                            print(
                                'The selected sentence number is higher than the sentence count!\n'
                            )
                            printHelp()
                            sys.exit()
                        for i in range(0, len(data)):
                            (src, tgt, rawAli) = data[i]
                            #In case the source string is empty
                            if rawAli.ndim == 1:
                                rawAli = np.array([rawAli])
                            #In case both source & target string is both empty, or of length 1 without eos
                            elif rawAli.ndim == 0:
                                rawAli = np.array([[rawAli]])
                            ali = [
                                l[:len(list(filter(None, tgt)))]
                                for l in rawAli[:len(src)]
                            ]

                            srcTotal = []
                            trgTotal = []
                            tali = np.array(ali).transpose()
                            for a in range(0, len(ali)):
                                srcTotal.append(
                                    str(
                                        math.pow(
                                            math.e, -0.05 * math.pow(
                                                (getCP([ali[a]]) +
                                                 getEnt([ali[a]]) +
                                                 getRevEnt([ali[a]])), 2))))
                            for a in range(0, len(tali)):
                                trgTotal.append(
                                    str(
                                        math.pow(
                                            math.e, -0.05 * math.pow(
                                                (getCP([tali[a]]) +
                                                 getEnt([tali[a]]) +
                                                 getRevEnt([tali[a]])), 2))))

                            JoinedSource = " ".join(src)
                            JoinedTarget = " ".join(tgt)
                            StrippedSource = ''.join(
                                c for c in JoinedSource
                                if unicodedata.category(c).startswith(
                                    'L')).replace('EOS', '').replace(
                                        'quot', '').replace('apos', '')
                            StrippedTarget = ''.join(
                                c for c in JoinedTarget
                                if unicodedata.category(c).startswith(
                                    'L')).replace('EOS', '').replace(
                                        'quot', '').replace('apos', '')

                            #Get the confidence metrics
                            CDP = round(getCP(ali), 10)
                            APout = round(getEnt(ali), 10)
                            APin = round(getRevEnt(ali), 10)
                            Total = round(CDP + APout + APin, 10)

                            #Can we calculate BLEU?
                            bleuNumber = -1
                            if (refs):
                                try:
                                    from nltk.translate import bleu
                                    from nltk.translate.bleu_score import SmoothingFunction
                                    sm = SmoothingFunction()
                                    refNumber = i if num < 0 else num
                                    deBpeRef = " ".join(
                                        refs[refNumber]).replace('@@ ', '')
                                    deBpeHyp = JoinedTarget.replace(
                                        '@@ ', '').replace('<EOS>',
                                                           '').strip()
                                    bleuNumber = round(
                                        bleu([deBpeRef.split()],
                                             deBpeHyp.split(),
                                             smoothing_function=sm.method3) *
                                        100, 2)
                                    bleuScore = u', ' + repr(bleuNumber)
                                except ImportError:
                                    sys.stdout.write(
                                        'NLTK not found! BLEU will not be calculated\n'
                                    )
                                    refs = False
                                    bleuScore = u''
                            else:
                                bleuScore = u''

                            jls = JoinedSource.replace('@@ ', '').replace(
                                '<EOS>', '').replace('&quot;', '"').replace(
                                    "&apos;",
                                    "'").replace("&amp;",
                                                 "&").replace("@-@",
                                                              "-").strip()
                            jlt = JoinedTarget.replace('@@ ', '').replace(
                                '<EOS>', '').replace('&quot;', '"').replace(
                                    "&apos;",
                                    "'").replace("&amp;",
                                                 "&").replace("@-@",
                                                              "-").strip()
                            longest = longestCommonSubstring(jls, jlt).strip()
                            similarity = len(longest) / len(jlt)

                            #Penalize sentences with more than 4 tokens
                            if (len(tgt) > 4) and (similarity > 0.3):
                                #The more similar, the higher penalty
                                #It's worse to have more words with a higher similarity
                                #Let's make it between 0.7 and about 1.5 for veeeery long sentences
                                multiplier = ((0.8 + (len(tgt) * 0.01)) *
                                              (3 - ((1 - similarity) * 5)) *
                                              (0.7 + similarity) *
                                              math.tan(similarity))
                                Total = round(CDP + APout + APin - multiplier,
                                              10)

                            # e^(-1(x^2))
                            CDP_pr = round(
                                math.pow(math.e, -1 * math.pow(CDP, 2)) * 100,
                                2)
                            # e^(-0.05(x^2))
                            APout_pr = round(
                                math.pow(math.e, -0.05 * math.pow(APout, 2)) *
                                100, 2)
                            APin_pr = round(
                                math.pow(math.e, -0.05 * math.pow(APin, 2)) *
                                100, 2)
                            Total_pr = round(
                                math.pow(math.e, -0.05 * math.pow(Total, 2)) *
                                100, 2)
                            # 1-e^(-0.0001(x^2))
                            Len = round((1 - math.pow(
                                math.e, -0.0001 *
                                math.pow(len(JoinedSource), 2))) * 100, 2)

                            out_s_js.write('["' +
                                           JoinedSource.replace(' ', '", "') +
                                           '"], \n')
                            out_t_js.write('["' +
                                           JoinedTarget.replace(' ', '", "') +
                                           '"], \n')
                            out_c_js.write(u'[' + repr(CDP_pr) + u', ' +
                                           repr(APout_pr) + u', ' +
                                           repr(APin_pr) + u', ' +
                                           repr(Total_pr) + u', ' + repr(Len) +
                                           u', ' + repr(len(JoinedSource)) +
                                           u', ' +
                                           repr(round(similarity * 100, 2)) +
                                           bleuScore + u'], \n')
                            out_sc_js.write(u'[[' + ", ".join(srcTotal) +
                                            u'], ' + u'[' +
                                            ", ".join(trgTotal) + u'], ' +
                                            u'], \n')

                            word = 0
                            out_a_js.write(u'[')
                            for ali_i in ali:
                                linePartC = 0
                                for ali_j in ali_i:
                                    # Maybe worth playing around with this for transformer (and convolutional) NMT output
                                    # if ali_j < 0.15:
                                    # ali_j = 0
                                    out_a_js.write(u'[' + repr(word) + u', ' +
                                                   str(np.round(ali_j, 8)) +
                                                   u', ' + repr(linePartC) +
                                                   u'], ')
                                    linePartC += 1
                                    if outputType == 'color':
                                        printColor(ali_j)
                                    elif outputType == 'block':
                                        printBlock(ali_j)
                                    elif outputType == 'block2':
                                        printBlock2(ali_j)
                                if outputType != 'web' and outputType != 'compare':
                                    sys.stdout.write(src[word].encode(
                                        'utf-8',
                                        errors='replace').decode('utf-8'))
                                word += 1
                                if outputType != 'web' and outputType != 'compare':
                                    sys.stdout.write('\n')

                            # write target sentences
                            #build 2d array
                            occupied_to = []
                            outchars = []
                            outchars.append([])
                            tw = 0
                            for tword in tgt:
                                columns = len(tgt)
                                # Some characters use multiple symbols. Need to decode and then encode...
                                twchars = list(tword)
                                twlen = len(twchars)
                                xpos = tw * 2
                                emptyline = 0

                                for el in range(0, len(occupied_to)):
                                    # if occupied, move to a new line!
                                    if occupied_to[el] < xpos:
                                        emptyline = el
                                        if len(outchars) < emptyline + 1:
                                            # add a new row
                                            outchars.append([])
                                        break
                                    if el == len(occupied_to) - 1:
                                        emptyline = el + 1
                                        if len(outchars) < emptyline + 1:
                                            outchars.append([])

                                for column in range(0, xpos):
                                    if len(outchars[emptyline]) <= column:
                                        outchars[emptyline].append(' ')

                                for charindex in range(0, twlen):
                                    if xpos + charindex == len(
                                            outchars[emptyline]):
                                        outchars[emptyline].append(
                                            twchars[charindex])
                                    else:
                                        outchars[emptyline][
                                            charindex] = twchars[charindex]

                                if len(occupied_to) <= emptyline:
                                    occupied_to.append(xpos + twlen + 1)
                                else:
                                    occupied_to[emptyline] = xpos + twlen + 1
                                tw += 1

                            #print 2d array
                            if outputType != 'web' and outputType != 'compare':
                                for liline in outchars:
                                    sys.stdout.write(''.join(liline).encode(
                                        'utf-8', errors='replace').decode(
                                            'utf-8') + '\n')
                                # print scores
                                sys.stdout.write(
                                    '\nCoverage Deviation Penalty: \t\t' +
                                    repr(round(CDP, 8)) + ' (' + repr(CDP_pr) +
                                    '%)' + '\n')
                                sys.stdout.write(
                                    'Input Absentmindedness Penalty: \t' +
                                    repr(round(APin, 8)) + ' (' +
                                    repr(APin_pr) + '%)' + '\n')
                                sys.stdout.write(
                                    'Output Absentmindedness Penalty: \t' +
                                    repr(round(APout, 8)) + ' (' +
                                    repr(APout_pr) + '%)' + '\n')
                                sys.stdout.write('Confidence: \t\t\t\t' +
                                                 repr(round(Total, 8)) + ' (' +
                                                 repr(Total_pr) + '%)' + '\n')
                                sys.stdout.write(
                                    'Similarity: \t\t\t\t' +
                                    repr(round(similarity * 100, 2)) + '%' +
                                    '\n')
                                if bleuNumber > -1:
                                    sys.stdout.write('BLEU: \t\t\t\t\t' +
                                                     repr(bleuNumber) + '\n')

                            # write target sentences
                            word = 0
                            out_a_js.write(u'], \n')
                            if outputType != 'web' and outputType != 'compare':
                                sys.stdout.write('\n')
                        out_a_js.write(u'\n]')
                        out_s_js.write(u']')
                        out_t_js.write(u']')
                        out_c_js.write(u']')
                        out_sc_js.write(u']')
Esempio n. 8
0
    def translate(self, data_iter, step, attn_debug=False):

        self.model.eval()
        output_path = self.args.result_path + '.%d.output' % step
        output_file = codecs.open(output_path, 'w', 'utf-8')
        gold_path = self.args.result_path + '.%d.gold_test' % step
        pred_path = self.args.result_path + '.%d.pred_test' % step
        gold_out_file = codecs.open(gold_path, 'w', 'utf-8')
        pred_out_file = codecs.open(pred_path, 'w', 'utf-8')
        # pred_results, gold_results = [], []

        ct = 0
        ext_acc_num = 0
        ext_pred_num = 0
        ext_gold_num = 0

        with torch.no_grad():
            rouge = Rouge()
            for batch in data_iter:
                output_data, tgt_data, ext_pred, ext_gold = self.translate_batch(
                    batch)
                translations = self.from_batch_test(batch, output_data,
                                                    tgt_data)

                for idx in range(len(translations)):
                    origin_sent, pred_summ, gold_data = translations[idx]
                    if ct % 100 == 0:
                        print("Processing %d" % ct)
                    output_file.write("ID      : %d\n" % ct)
                    output_file.write("ORIGIN  : \n    " +
                                      origin_sent.replace('<S>', '\n    ') +
                                      "\n")
                    output_file.write("GOLD    : " + gold_data.strip() + "\n")
                    output_file.write("DOC_GEN : " + pred_summ.strip() + "\n")
                    rouge_score = rouge.get_scores(pred_summ, gold_data)
                    bleu_score = sentence_bleu(
                        [gold_data.split()],
                        pred_summ.split(),
                        smoothing_function=SmoothingFunction().method1)
                    output_file.write(
                        "DOC_GEN  bleu & rouge-f 1/2/l:    %.4f & %.4f/%.4f/%.4f\n"
                        % (bleu_score, rouge_score[0]["rouge-1"]["f"],
                           rouge_score[0]["rouge-2"]["f"],
                           rouge_score[0]["rouge-l"]["f"]))
                    # ext f1 calculate
                    acc_num = len(ext_pred[idx] + ext_gold[idx]) - len(
                        set(ext_pred[idx] + ext_gold[idx]))
                    pred_num = len(ext_pred[idx])
                    gold_num = len(ext_gold[idx])
                    ext_acc_num += acc_num
                    ext_pred_num += pred_num
                    ext_gold_num += gold_num
                    f1, p, r = test_f1(acc_num, pred_num, gold_num)
                    output_file.write(
                        "EXT_GOLD: [" +
                        ','.join([str(i)
                                  for i in sorted(ext_gold[idx])]) + "]\n")
                    output_file.write(
                        "EXT_PRED: [" +
                        ','.join([str(i)
                                  for i in sorted(ext_pred[idx])]) + "]\n")
                    output_file.write(
                        "EXT_SCORE  P/R/F1:    %.4f/%.4f/%.4f\n\n" %
                        (p, r, f1))
                    pred_out_file.write(pred_summ.strip() + '\n')
                    gold_out_file.write(gold_data.strip() + '\n')
                    ct += 1
                pred_out_file.flush()
                gold_out_file.flush()
                output_file.flush()

        pred_out_file.close()
        gold_out_file.close()
        output_file.close()

        if (step != -1):
            pred_bleu = test_bleu(pred_path, gold_path)
            file_rouge = FilesRouge(hyp_path=pred_path, ref_path=gold_path)
            pred_rouges = file_rouge.get_scores(avg=True)
            f1, p, r = test_f1(ext_acc_num, ext_pred_num, ext_gold_num)
            self.logger.info(
                'Ext Sent Score at step %d: \n>> P/R/F1: %.2f/%.2f/%.2f' %
                (step, p * 100, r * 100, f1 * 100))
            self.logger.info(
                'Gold Length at step %d: %.2f' %
                (step, test_length(gold_path, gold_path, ratio=False)))
            self.logger.info('Prediction Length ratio at step %d: %.2f' %
                             (step, test_length(pred_path, gold_path)))
            self.logger.info('Prediction Bleu at step %d: %.2f' %
                             (step, pred_bleu * 100))
            self.logger.info('Prediction Rouges at step %d: \n%s' %
                             (step, rouge_results_to_str(pred_rouges)))
Esempio n. 9
0
    def compute_bleu(self, predictions):

        # Hide warnings
        warnings.filterwarnings('ignore')

        # NLTK
        # Download Punkt tokenizer (for word_tokenize method)
        # Download stopwords (for stopword removal)
        nltk.download('punkt')
        nltk.download('stopwords')

        # English Stopwords
        stops = set(stopwords.words("english"))

        # Stemming
        stemmer = SnowballStemmer("english")

        # Remove punctuation from string
        translator = str.maketrans('', '', string.punctuation)

        candidate_pairs = self.readresult(predictions)

        gt_pairs = self.readresult(self.gt)

        # Define max score and current score
        max_score = len(gt_pairs)
        current_score = 0

        i = 0
        for image_key in candidate_pairs:

            # Get candidate and GT caption
            candidate_caption = candidate_pairs[image_key]
            gt_caption = gt_pairs[image_key]

            # Optional - Go to lowercase
            if not VqaMedEvaluator.case_sensitive:
                candidate_caption = candidate_caption.lower()
                gt_caption = gt_caption.lower()

            # Split caption into individual words (remove punctuation)
            candidate_words = nltk.tokenize.word_tokenize(
                candidate_caption.translate(translator))
            gt_words = nltk.tokenize.word_tokenize(
                gt_caption.translate(translator))

            # Optional - Remove stopwords
            if VqaMedEvaluator.remove_stopwords:
                candidate_words = [
                    word for word in candidate_words
                    if word.lower() not in stops
                ]
                gt_words = [
                    word for word in gt_words if word.lower() not in stops
                ]

            # Optional - Apply stemming
            if VqaMedEvaluator.stemming:
                candidate_words = [
                    stemmer.stem(word) for word in candidate_words
                ]
                gt_words = [stemmer.stem(word) for word in gt_words]

            # Calculate BLEU score for the current caption
            try:
                # If both the GT and candidate are empty, assign a score of 1 for this caption
                if len(gt_words) == 0 and len(candidate_words) == 0:
                    bleu_score = 1
                # Calculate the BLEU score
                else:
                    bleu_score = nltk.translate.bleu_score.sentence_bleu(
                        [gt_words],
                        candidate_words,
                        smoothing_function=SmoothingFunction().method0)
            # Handle problematic cases where BLEU score calculation is impossible
            except ZeroDivisionError:
                pass
                #raise Exception('Problem with {} {}', gt_words, candidate_words)

            # Increase calculated score
            current_score += bleu_score

        return current_score / max_score
Esempio n. 10
0
 def get_sentence_bleu(self, example, hyp):
     return sentence_bleu(
         [tokenize_for_bleu_eval(example.meta['example_dict']['snippet'])],
         tokenize_for_bleu_eval(hyp.decanonical_code),
         smoothing_function=SmoothingFunction().method3)
Esempio n. 11
0
    def evaluate_dataset(self,
                         dataset,
                         decode_results,
                         fast_mode=False,
                         args=None):
        output_plaintext_file = None
        if args and args.save_decode_to:
            output_plaintext_file = open(args.save_decode_to + '.txt',
                                         'w',
                                         encoding='utf-8')
        examples = dataset.examples if isinstance(dataset,
                                                  Dataset) else dataset
        assert len(examples) == len(decode_results)

        # speed up, cache tokenization results
        if not hasattr(examples[0], 'reference_code_tokens'):
            for example in examples:
                setattr(
                    example, 'reference_code_tokens',
                    tokenize_for_bleu_eval(
                        example.meta['example_dict']['snippet']))

        if not hasattr(decode_results[0][0], 'decanonical_code_tokens'):
            for i, example in enumerate(examples):
                hyp_list = decode_results[i]
                # here we prune any hypothesis that throws an error when converting back to the decanonical code!
                # This modifies the decode_results in-place!
                filtered_hyp_list = []
                for hyp in hyp_list:
                    if not hasattr(hyp, 'decanonical_code'):
                        try:
                            hyp.decanonical_code = decanonicalize_code(
                                hyp.code, slot_map=example.meta['slot_map'])
                            if hyp.decanonical_code:
                                hyp.decanonical_code_tokens = tokenize_for_bleu_eval(
                                    hyp.decanonical_code)
                                filtered_hyp_list.append(hyp)
                        except:
                            pass

                decode_results[i] = filtered_hyp_list

        if fast_mode:
            references = [e.reference_code_tokens for e in examples]
            hypotheses = [
                hyp_list[0].decanonical_code_tokens if hyp_list else []
                for hyp_list in decode_results
            ]

            bleu_tup = compute_bleu([[x] for x in references],
                                    hypotheses,
                                    smooth=False)
            bleu = bleu_tup[0]

            return bleu
        else:
            tokenized_ref_snippets = []
            hyp_code_tokens = []
            best_hyp_code_tokens = []
            sm_func = SmoothingFunction().method3
            sent_bleu_scores = []
            oracle_bleu_scores = []
            oracle_exact_match = []
            for example, hyp_list in zip(examples, decode_results):
                tokenized_ref_snippets.append(example.reference_code_tokens)
                example_hyp_bleu_scores = []
                if hyp_list:
                    for i, hyp in enumerate(hyp_list):
                        hyp.bleu_score = sentence_bleu(
                            [example.reference_code_tokens],
                            hyp.decanonical_code_tokens,
                            smoothing_function=sm_func)
                        hyp.is_correct = self.is_hyp_correct(example, hyp)

                        example_hyp_bleu_scores.append(hyp.bleu_score)

                    top_decanonical_code_tokens = hyp_list[
                        0].decanonical_code_tokens
                    sent_bleu_score = hyp_list[0].bleu_score
                    best_hyp_idx = np.argmax(example_hyp_bleu_scores)
                    oracle_sent_bleu = example_hyp_bleu_scores[best_hyp_idx]
                    _best_hyp_code_tokens = hyp_list[
                        best_hyp_idx].decanonical_code_tokens
                else:
                    top_decanonical_code_tokens = []
                    sent_bleu_score = 0.
                    oracle_sent_bleu = 0.
                    _best_hyp_code_tokens = []

                # write results to file
                if output_plaintext_file:
                    output_plaintext_file.write(
                        " ".join(top_decanonical_code_tokens) + '\n')
                oracle_exact_match.append(
                    any(hyp.is_correct for hyp in hyp_list))
                hyp_code_tokens.append(top_decanonical_code_tokens)
                sent_bleu_scores.append(sent_bleu_score)
                oracle_bleu_scores.append(oracle_sent_bleu)
                best_hyp_code_tokens.append(_best_hyp_code_tokens)

            bleu_tup = compute_bleu([[x] for x in tokenized_ref_snippets],
                                    hyp_code_tokens,
                                    smooth=False)
            corpus_bleu = bleu_tup[0]

            bleu_tup = compute_bleu([[x] for x in tokenized_ref_snippets],
                                    best_hyp_code_tokens,
                                    smooth=False)
            oracle_corpus_bleu = bleu_tup[0]

            avg_sent_bleu = np.average(sent_bleu_scores)
            oracle_avg_sent_bleu = np.average(oracle_bleu_scores)
            exact = sum([
                1 if h == r else 0
                for h, r in zip(hyp_code_tokens, tokenized_ref_snippets)
            ]) / float(len(examples))
            oracle_exact_match = np.average(oracle_exact_match)

            return {
                'corpus_bleu': corpus_bleu,
                'oracle_corpus_bleu': oracle_corpus_bleu,
                'avg_sent_bleu': avg_sent_bleu,
                'oracle_avg_sent_bleu': oracle_avg_sent_bleu,
                'exact_match': exact,
                'oracle_exact_match': oracle_exact_match
            }
Esempio n. 12
0
def train():
    # argparse
    parser = argparse.ArgumentParser(description='manual to this script')
    parser.add_argument('--mode', type=str, default="de2en")
    parser.add_argument('--gpu', type=str, default=0)
    parser.add_argument('--save_dir', type=str, default="result")
    parser.add_argument('--save_file', type=str, default="bleu.txt")
    parser.add_argument('--save_log', type=str, default="logdir")
    parser.add_argument('--task', type=str, default="task2")
    parser.add_argument('--set', type=str, default="val")
    args = parser.parse_args()
    mode = args.mode
    os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu
    save_path = args.save_log
    save_file = args.save_file
    save_dir = args.save_dir
    task = args.task
    set = args.set
    if not os.path.exists(save_path): os.mkdir(save_path)
    if not os.path.exists(save_dir): os.mkdir(save_dir)
    # prepare
    de2idx, idx2de = load_de_vocab()
    en2idx, idx2en = load_en_vocab()
    translator = eval("idx2{}".format(mode[-2:]))  # !!!
    # load_graph
    g = Graph(is_training=True, beam_width=1, mode=mode)
    g_val = Graph(is_training=False, beam_width=5, mode=mode)
    print("Graph loaded")
    # Load data
    X, Image_index, Y, Targets = load_rl_data(language=mode[:2])
    images = np.load(image_path.format("train"))
    num_batch = int(math.ceil(len(X) / hp.batch_size))
    x_val, Targets_val, idents = load_test_rl_data(set=set,
                                                   task=task,
                                                   language=mode[:2])
    num_batch_val = int(math.ceil(len(x_val) / hp.batch_size_test))

    # prepare ref file
    if task == "task2":
        f_ref1 = open("{}/{}_ref_1".format(save_dir, mode), "w+")
        f_ref2 = open("{}/{}_ref_2".format(save_dir, mode), "w+")
        f_ref3 = open("{}/{}_ref_3".format(save_dir, mode), "w+")
        f_ref4 = open("{}/{}_ref_4".format(save_dir, mode), "w+")
        f_ref5 = open("{}/{}_ref_5".format(save_dir, mode), "w+")
        f_ref = [f_ref1, f_ref2, f_ref3, f_ref4, f_ref5]
    else:
        f_ref1 = open("{}/{}_ref_1".format(save_dir, mode), "w+")
        f_ref = [f_ref1]
    for i in range(len(Targets_val)):
        for k, l in enumerate(f_ref):
            if task == "task2":
                l.write(Targets_val[i][k] + "\n")
            else:
                l.write(Targets_val[i] + "\n")
    for sth in f_ref:
        sth.close()
        concat(sth.name)
    val_sum = len(Targets_val)
    temp = []
    for file in f_ref:
        with open(file.name, "r") as h:
            refs = [i.strip() for i in h.readlines()]
            temp.extend(refs)
    Target_val_split = []
    for i in range(val_sum):
        temp1 = []
        for j in range(len(f_ref)):
            temp1.append(temp[i + j * val_sum].split())
        Target_val_split.append(temp1)
    save_num = 1
    best_of_now = 0.0
    pre_bleu = 0.0
    # saver
    saver1 = tf.train.Saver(var_list=g.value_list, max_to_keep=100)
    if mode == "de2en":
        saver2 = tf.train.Saver(var_list=g.value_list_en)
    else:
        saver2 = tf.train.Saver(var_list=g.value_list_de)
    saver_val = tf.train.Saver(var_list=g_val.value_list)
    # config
    config = tf.ConfigProto(allow_soft_placement=True)
    config.gpu_options.allow_growth = True
    with tf.Session(config=config, graph=g.graph) as sess:
        ## Restore parameters
        sess.run(tf.global_variables_initializer())
        if mode == "de2en":
            # saver1.restore(sess, "../rl/{}".format(mode) + "/step_1430")
            saver1.restore(sess, "{}_pre_y".format(mode) + "/step_6500")
            saver2.restore(
                sess,
                eval("hp.logdir_cap_{}".format(mode[-2:])) +
                "/model_step_3999")
        elif mode == "en2de":
            # saver1.restore(sess, "../rl/{}".format(mode)+ "/step_3363")
            saver1.restore(sess, "{}_pre_y".format(mode) + "/step_6000")
            saver2.restore(
                sess,
                eval("hp.logdir_cap_{}".format(mode[-2:])) +
                "/model_step_9999")
        print("Restored!")
        # lr = hp.lr
        num = 0
        for epoch in range(hp.num_epochs):
            for i in range(num_batch):
                step = epoch * num_batch + i + 1
                lr = hp.lr  #* min(pow(step, -0.5),step * pow(hp.warmup_step, -1.5))
                image = images[Image_index[i * hp.batch_size:(i + 1) *
                                           hp.batch_size]]
                x = X[i * hp.batch_size:(i + 1) * hp.batch_size]
                y = Y[i * hp.batch_size:(i + 1) * hp.batch_size]
                if step % 100 == 0:
                    # prepare save file
                    f_hypo = open(
                        "{}/{}_hypo_{}".format(save_dir, mode, save_num), "w+")
                    save_num += 1
                    # save log
                    saver1.save(sess, save_path + "/step_{}".format(step))
                    # write file
                    with tf.Session(graph=g_val.graph) as sess_val:
                        sess_val.run(tf.global_variables_initializer())
                        saver_val.restore(
                            sess_val, tf.train.latest_checkpoint(save_path))
                        for j in range(num_batch_val):
                            # cal_pred
                            x_val_batch = x_val[j *
                                                hp.batch_size_test:(j + 1) *
                                                hp.batch_size_test]
                            feed_dict = {
                                g_val.x: x_val_batch,
                                g_val.dropout_rate_tran: 0.0,
                                g_val.is_inference: True
                            }
                            preds = sess_val.run(g_val.preds, feed_dict)
                            preds = np.concatenate(
                                (preds[:, 1:], np.zeros(
                                    (len(x_val_batch), 1))),
                                axis=1)
                            # corporate
                            for pred in preds:  # sentence-wisex_val_batch
                                got = " ".join(
                                    translator[idx]
                                    for idx in pred).split("</S>")[0].strip()
                                f_hypo.write(got + "\n")
                        f_hypo.close()
                        concat(f_hypo.name)
                        with open(f_hypo.name, "r") as f:
                            hypos = [i.strip().split() for i in f.readlines()]
                            n_bleu = corpus_bleu(
                                Target_val_split,
                                hypos,
                                smoothing_function=SmoothingFunction().method2)
                            with open(save_file, "a+") as h:
                                h.write(str(n_bleu) + "\n")
                        #     if n_bleu>best_of_now:
                        #         best_of_now = n_bleu
                        #         num = 0
                        #     elif n_bleu < pre_bleu:
                        #         num += 1
                        #         if num==2:
                        #             lr = lr*0.7
                        #             num = 0
                        #     pre_bleu = n_bleu
                #sample from image
                feed_dict_sample = {
                    g.dropout_rate: 0.0,
                    g.lstm_drop_rate: 0.0,
                    g.image: image,
                    g.is_inference: True
                }
                preds = sess.run(g.preds_sample, feed_dict_sample)
                preds = process(preds) * preds
                mix_y, sample_index = mix(preds, y, x_ratio=0.5)  # !!!!!
                #train
                feed_dict = {
                    g.x: x,
                    g.image: image,
                    g.dropout_rate: hp.dropout_rate,
                    g.dropout_rate_tran: hp.dropout_rate_tran,
                    g.lstm_drop_rate: hp.lstm_drop_rate,
                    g.index: sample_index,
                    g.y: mix_y,
                    g.is_inference: False,
                    g.lr: lr
                }
                _, loss = sess.run([g.train_op, g.loss], feed_dict)
                print(loss)
Esempio n. 13
0
def score(data_path: str, encoder_path: str, vocab_path: str,
          captions_file: str, sample_length: int = 30, N=4, smoothing='method1', output: str=None):
    dump = torch.load(encoder_path, map_location=lambda storage, loc: storage)
    reference_df = pd.read_json(captions_file)
    reference_df['filename'] = reference_df['filename'].apply(
        lambda x: int(os.path.splitext(os.path.basename(x))[0]))
    reference_grouped_df = reference_df.groupby(
        ['filename'])['tokens'].apply(list).to_dict()
    encodermodel = dump['encodermodel']
    decodermodel = dump['decodermodel']
    # Some scaler (sklearn standardscaler)
    scaler = dump['scaler']
    # Also load previous training config
    config_parameters = dump['config']

    vocab = torch.load(vocab_path)
    # load images from previous
    encodermodel = encodermodel.to(DEVICE).eval()
    decodermodel = decodermodel.to(DEVICE).eval()
    smoother = SmoothingFunction()
    smoothing_fun = getattr(smoother, smoothing)
    kaldi_string = parsecopyfeats(
        data_path, **config_parameters['feature_args'])
    bleu_score = []
    human_bleu_score = []
    bleu_weights = [1./N]*N
    with stdout_or_file(output) as writer:
        with torch.no_grad():
            for k, features in kaldi_io.read_mat_ark(kaldi_string):
                k = int(k)
                if k not in reference_grouped_df:
                    continue
                features = scaler.transform(features)
                # Add single batch dimension
                features = torch.from_numpy(features).to(DEVICE).unsqueeze(0)
                # Generate an caption embedding
                encoded_feature, hiddens = encodermodel(features)
                sampled_ids = decodermodel.sample(
                    encoded_feature, states=hiddens, maxlength=sample_length)
                # (1, max_seq_length) -> (max_seq_length)
                sampled_ids = sampled_ids[0].cpu().numpy()

                # Convert word_ids to words
                candidate = []
                for word_id in sampled_ids:
                    word = vocab.idx2word[word_id]
                    # Dont add start, end tokens
                    if word == '<end>':
                        break
                    elif word == '<start>':
                        continue
                    candidate.append(word)
                reference_sent = reference_grouped_df[k]
                #human_avg_score = []
                #bleu_avg_score = []
                human_scores = []
                system_scores = []
                if len(reference_sent) <= 1:
                    continue
                for turn in range(len(reference_sent)):
                    human_cand = reference_sent[turn]
                    human_ref = [x for i, x in enumerate(
                        reference_sent) if i != turn]
                    #human_avg_score.append(
                    human_scores.append(
                        sentence_bleu(
                            human_ref, human_cand,
                            smoothing_function=smoothing_fun,
                            weights=bleu_weights))
                    #bleu_avg_score.append(
                    system_scores.append(
                        sentence_bleu(
                            human_ref,
                            candidate,
                            smoothing_function=smoothing_fun,
                            weights=bleu_weights))


                #human_bleu = sum(human_scores)/len(human_scores)
                human_bleu = max(human_scores)
                #bleu_score_all_ref = sum(system_scores)/len(system_scores)
                bleu_score_all_ref = max(system_scores)

                human_bleu_score.append(human_bleu)
                bleu_score.append(bleu_score_all_ref)

            writer.write("BLEU-{} Scores\n".format(N))
            writer.write("System {:10.3f}\n".format(np.mean(bleu_score)))
            writer.write("Human {:10.3f}\n".format(np.mean(human_bleu_score)))
Esempio n. 14
0
#
#      http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from nltk.translate.bleu_score import sentence_bleu, SmoothingFunction
import csv

TEXT_FILE = './samples/text_val.12'
LABELS_FILE = './samples/labels_val.12'

smoothing_foonction = SmoothingFunction()
semples = []
semples.append([
    'Input Sentence', 'Input Label', 'Generated Sentence', 'Predicted Label',
    'BLEU'
])
with open(TEXT_FILE, 'r') as input_file_text:
    lines_text = input_file_text.readlines()
    with open(LABELS_FILE, 'r') as input_file_labels:
        lines_labels = input_file_labels.readlines()
        for i in range(0, len(lines_text) - 1, 2):
            input_sentence = lines_text[i].strip()
            generated_sentence = lines_text[i + 1].strip()
            input_label = 1 - int(lines_labels[i])
            predicted_label = int(lines_labels[i + 1])
Esempio n. 15
0
 def get_bleu(self):
     ngram = self.gram
     bleu = list()
     reference = self.get_reference()
     weight = tuple((1. / ngram for _ in range(ngram)))
     with open(self.test_data, encoding='utf-8') as test_data:
         for hypothesis in test_data:
             hypothesis = nltk.word_tokenize(hypothesis)
             bleu.append(nltk.translate.bleu_score.sentence_bleu(reference, hypothesis, weight,
                                                                 smoothing_function=SmoothingFunction().method1))
     return sum(bleu) / len(bleu)
Esempio n. 16
0
def main(data_path):

    try:
        tpu = tf.distribute.cluster_resolver.TPUClusterResolver()
        print('Running on TPU {}'.format(tpu.cluster_spec().as_dict()['worker']))
    except ValueError:
        tpu = None

    if tpu:
        tf.config.experimental_connect_to_cluster(tpu)
        tf.tpu.experimental.initialize_tpu_system(tpu)
        strategy = tf.distribute.experimental.TPUStrategy(tpu)
    else:
        strategy = tf.distribute.get_strategy()

    print("REPLICAS: {}".format(strategy.num_replicas_in_sync))

    # Maximum sentence length
    MAX_LENGTH = 40
    # Maximum number of samples to preprocess
    MAX_SAMPLES = 15000
    # For tf.data.Dataset
    BATCH_SIZE = 64 * strategy.num_replicas_in_sync
    BUFFER_SIZE = 20000
    # For Transformer
    NUM_LAYERS = 2
    D_MODEL = 256
    NUM_HEADS = 8
    UNITS = 512
    DROPOUT = 0.1
    EPOCHS = 40

    print('---')
    print('Loading the data...')
    path_to_zip = tf.keras.utils.get_file(
        'cornell_movie_dialogs.zip',
        origin=
        'http://www.cs.cornell.edu/~cristian/data/cornell_movie_dialogs_corpus.zip',
        extract=True)

    path_to_dataset = os.path.join(
        os.path.dirname(path_to_zip), "cornell movie-dialogs corpus")

    path_to_movie_lines = os.path.join(path_to_dataset, 'movie_lines.txt')
    path_to_movie_conversations = os.path.join(path_to_dataset,
                                              'movie_conversations.txt')

    def preprocess_sentence(sentence):
      sentence = sentence.lower().strip()
      # creating a space between a word and the punctuation following it
      # eg: "he is a boy." => "he is a boy ."
      sentence = re.sub(r"([?.!,])", r" \1 ", sentence)
      sentence = re.sub(r'[" "]+', " ", sentence)
      # removing contractions
      sentence = re.sub(r"i'm", "i am", sentence)
      sentence = re.sub(r"he's", "he is", sentence)
      sentence = re.sub(r"she's", "she is", sentence)
      sentence = re.sub(r"it's", "it is", sentence)
      sentence = re.sub(r"that's", "that is", sentence)
      sentence = re.sub(r"what's", "that is", sentence)
      sentence = re.sub(r"where's", "where is", sentence)
      sentence = re.sub(r"how's", "how is", sentence)
      sentence = re.sub(r"\'ll", " will", sentence)
      sentence = re.sub(r"\'ve", " have", sentence)
      sentence = re.sub(r"\'re", " are", sentence)
      sentence = re.sub(r"\'d", " would", sentence)
      sentence = re.sub(r"\'re", " are", sentence)
      sentence = re.sub(r"won't", "will not", sentence)
      sentence = re.sub(r"can't", "cannot", sentence)
      sentence = re.sub(r"n't", " not", sentence)
      sentence = re.sub(r"n'", "ng", sentence)
      sentence = re.sub(r"'bout", "about", sentence)
      # replacing everything with space except (a-z, A-Z, ".", "?", "!", ",")
      sentence = re.sub(r"[^a-zA-Z?.!,]+", " ", sentence)
      sentence = sentence.strip()
      return sentence

    print('Preprocessing the data...')
    def load_conversations():
      # dictionary of line id to text
      id2line = {}
      with open(path_to_movie_lines, errors='ignore') as file:
        lines = file.readlines()
      for line in lines:
        parts = line.replace('\n', '').split(' +++$+++ ')
        id2line[parts[0]] = parts[4]

      inputs, outputs = [], []
      with open(path_to_movie_conversations, 'r') as file:
        lines = file.readlines()
      for line in lines:
        parts = line.replace('\n', '').split(' +++$+++ ')
        # get conversation in a list of line ID
        conversation = [line[1:-1] for line in parts[3][1:-1].split(', ')]
        for i in range(len(conversation) - 1):
          inputs.append(preprocess_sentence(id2line[conversation[i]]))
          outputs.append(preprocess_sentence(id2line[conversation[i + 1]]))
          if len(inputs) >= MAX_SAMPLES:
            return inputs, outputs
      return inputs, outputs


    questions, answers = load_conversations()

    print('Train-Test split...')
    X_train = questions[:round(0.8*len(questions))]
    y_train = answers[:round(0.8*len(answers))]

    X_test = questions[round(0.8*len(questions)):]
    y_test = answers[round(0.8*len(answers)):]

    print('Tokenizing...')
    # Build tokenizer using tfds for both questions and answers
    tokenizer = tfds.features.text.SubwordTextEncoder.build_from_corpus(
        questions + answers, target_vocab_size=2**13)

    print('START and END tags appended...')
    # Define start and end token to indicate the start and end of a sentence
    START_TOKEN, END_TOKEN = [tokenizer.vocab_size], [tokenizer.vocab_size + 1]

    # Vocabulary size plus start and end token
    VOCAB_SIZE = tokenizer.vocab_size + 2

    # Tokenize, filter and pad sentences
    def tokenize_and_filter(inputs, outputs):
      tokenized_inputs, tokenized_outputs = [], []
      
      for (sentence1, sentence2) in zip(inputs, outputs):
        # tokenize sentence
        sentence1 = START_TOKEN + tokenizer.encode(sentence1) + END_TOKEN
        sentence2 = START_TOKEN + tokenizer.encode(sentence2) + END_TOKEN
        # check tokenized sentence max length
        if len(sentence1) <= MAX_LENGTH and len(sentence2) <= MAX_LENGTH:
          tokenized_inputs.append(sentence1)
          tokenized_outputs.append(sentence2)
      
      # pad tokenized sentences
      tokenized_inputs = tf.keras.preprocessing.sequence.pad_sequences(
          tokenized_inputs, maxlen=MAX_LENGTH, padding='post')
      tokenized_outputs = tf.keras.preprocessing.sequence.pad_sequences(
          tokenized_outputs, maxlen=MAX_LENGTH, padding='post')
      
      return tokenized_inputs, tokenized_outputs


    questions, answers = tokenize_and_filter(questions, answers)
    print('---')
    print('Vocab size: {}'.format(VOCAB_SIZE))
    print('Number of samples: {}'.format(len(questions)))


    # decoder inputs use the previous target as input
    # remove START_TOKEN from targets
    dataset = tf.data.Dataset.from_tensor_slices((
        {
            'inputs': questions[:12000],
            'dec_inputs': answers[:12000, :-1]
        },
        {
            'outputs': answers[:12000, 1:]
        },
    ))

    dataset = dataset.cache()
    dataset = dataset.shuffle(BUFFER_SIZE)
    dataset = dataset.batch(BATCH_SIZE)
    dataset = dataset.prefetch(tf.data.experimental.AUTOTUNE)

    # VALIDATION DATASET
    # remove START_TOKEN from targets
    val_dataset = tf.data.Dataset.from_tensor_slices((
        {
            'inputs': questions[12000:],
            'dec_inputs': answers[12000:, :-1]
        },
        {
            'outputs': answers[12000:, 1:]
        },
    ))

    val_dataset = val_dataset.cache()
    val_dataset = val_dataset.shuffle(BUFFER_SIZE)
    val_dataset = val_dataset.batch(BATCH_SIZE)
    val_dataset = val_dataset.prefetch(tf.data.experimental.AUTOTUNE)

    print(dataset)
    print(val_dataset)

    print('Creating scaled dot product attention...')
    def scaled_dot_product_attention(query, key, value, mask):
      """Calculate the attention weights. """
      matmul_qk = tf.matmul(query, key, transpose_b=True)

      # scale matmul_qk
      depth = tf.cast(tf.shape(key)[-1], tf.float32)
      logits = matmul_qk / tf.math.sqrt(depth)

      # add the mask to zero out padding tokens
      if mask is not None:
        logits += (mask * -1e9)

      # softmax is normalized on the last axis (seq_len_k)
      attention_weights = tf.nn.softmax(logits, axis=-1)

      output = tf.matmul(attention_weights, value)

      return output

    print('Creating Multi head attention...')
    class MultiHeadAttention(tf.keras.layers.Layer):

      def __init__(self, d_model, num_heads, name="multi_head_attention"):
        super(MultiHeadAttention, self).__init__(name=name)
        self.num_heads = num_heads
        self.d_model = d_model

        assert d_model % self.num_heads == 0

        self.depth = d_model // self.num_heads

        self.query_dense = tf.keras.layers.Dense(units=d_model)
        self.key_dense = tf.keras.layers.Dense(units=d_model)
        self.value_dense = tf.keras.layers.Dense(units=d_model)

        self.dense = tf.keras.layers.Dense(units=d_model)

      def get_config(self):
            config = super(MultiHeadAttention,self).get_config()
            config.update({
                'num_heads':self.num_heads,
                'd_model':self.d_model,
            })
            return config

      def split_heads(self, inputs, batch_size):
        inputs = tf.reshape(
            inputs, shape=(batch_size, -1, self.num_heads, self.depth))
        return tf.transpose(inputs, perm=[0, 2, 1, 3])

      def call(self, inputs):
        query, key, value, mask = inputs['query'], inputs['key'], inputs[
            'value'], inputs['mask']
        batch_size = tf.shape(query)[0]

        # linear layers
        query = self.query_dense(query)
        key = self.key_dense(key)
        value = self.value_dense(value)

        # split heads
        query = self.split_heads(query, batch_size)
        key = self.split_heads(key, batch_size)
        value = self.split_heads(value, batch_size)

        # scaled dot-product attention
        scaled_attention = scaled_dot_product_attention(query, key, value, mask)

        scaled_attention = tf.transpose(scaled_attention, perm=[0, 2, 1, 3])

        # concatenation of heads
        concat_attention = tf.reshape(scaled_attention,
                                      (batch_size, -1, self.d_model))

        # final linear layer
        outputs = self.dense(concat_attention)

        return outputs

    def create_padding_mask(x):
      mask = tf.cast(tf.math.equal(x, 0), tf.float32)
      # (batch_size, 1, 1, sequence length)
      return mask[:, tf.newaxis, tf.newaxis, :]

    def create_look_ahead_mask(x):
      seq_len = tf.shape(x)[1]
      look_ahead_mask = 1 - tf.linalg.band_part(tf.ones((seq_len, seq_len)), -1, 0)
      padding_mask = create_padding_mask(x)
      return tf.maximum(look_ahead_mask, padding_mask)

    print('Creating positional Encoding...')
    class PositionalEncoding(tf.keras.layers.Layer):

      def __init__(self, position, d_model):
        super(PositionalEncoding, self).__init__()
        self.pos_encoding = self.positional_encoding(position, d_model)

      def get_config(self):

            config = super(PositionalEncoding, self).get_config()
            config.update({
                'position': self.position,
                'd_model': self.d_model,
                
            })
            return config

      def get_angles(self, position, i, d_model):
        angles = 1 / tf.pow(10000, (2 * (i // 2)) / tf.cast(d_model, tf.float32))
        return position * angles

      def positional_encoding(self, position, d_model):
        angle_rads = self.get_angles(
            position=tf.range(position, dtype=tf.float32)[:, tf.newaxis],
            i=tf.range(d_model, dtype=tf.float32)[tf.newaxis, :],
            d_model=d_model)
        # apply sin to even index in the array
        sines = tf.math.sin(angle_rads[:, 0::2])
        # apply cos to odd index in the array
        cosines = tf.math.cos(angle_rads[:, 1::2])

        pos_encoding = tf.concat([sines, cosines], axis=-1)
        pos_encoding = pos_encoding[tf.newaxis, ...]
        return tf.cast(pos_encoding, tf.float32)

      def call(self, inputs):
        return inputs + self.pos_encoding[:, :tf.shape(inputs)[1], :]

    print('Defining Encoder Layer...')
    def encoder_layer(units, d_model, num_heads, dropout, name="encoder_layer"):
      inputs = tf.keras.Input(shape=(None, d_model), name="inputs")
      padding_mask = tf.keras.Input(shape=(1, 1, None), name="padding_mask")

      attention = MultiHeadAttention(
          d_model, num_heads, name="attention")({
              'query': inputs,
              'key': inputs,
              'value': inputs,
              'mask': padding_mask
          })
      attention = tf.keras.layers.Dropout(rate=dropout)(attention)
      attention = tf.keras.layers.LayerNormalization(epsilon=1e-6)(inputs + attention)

      outputs = tf.keras.layers.Dense(units=units, activation='relu')(attention)
      outputs = tf.keras.layers.Dense(units=d_model)(outputs)
      outputs = tf.keras.layers.Dropout(rate=dropout)(outputs)
      outputs = tf.keras.layers.LayerNormalization(epsilon=1e-6)(attention + outputs)

      return tf.keras.Model(
          inputs=[inputs, padding_mask], outputs=outputs, name=name)

    sample_encoder_layer = encoder_layer(
        units=512,
        d_model=128,
        num_heads=4,
        dropout=0.3,
        name="sample_encoder_layer")

    #tf.keras.utils.plot_model(
    #   sample_encoder_layer, to_file='encoder_layer.png', show_shapes=True)

    print('Defining encoder...')
    def encoder(vocab_size,
                num_layers,
                units,
                d_model,
                num_heads,
                dropout,
                name="encoder"):
      inputs = tf.keras.Input(shape=(None,), name="inputs")
      padding_mask = tf.keras.Input(shape=(1, 1, None), name="padding_mask")

      embeddings = tf.keras.layers.Embedding(vocab_size, d_model)(inputs)
      embeddings *= tf.math.sqrt(tf.cast(d_model, tf.float32))
      embeddings = PositionalEncoding(vocab_size, d_model)(embeddings)

      outputs = tf.keras.layers.Dropout(rate=dropout)(embeddings)
      for i in range(int(num_layers)):
        outputs = encoder_layer(
            units=units,
            d_model=d_model,
            num_heads=num_heads,
            dropout=dropout,
            name="encoder_layer_{}".format(i),
        )([outputs, padding_mask])

      return tf.keras.Model(
          inputs=[inputs, padding_mask], outputs=outputs, name=name)
      
    sample_encoder = encoder(
        vocab_size=8192,
        num_layers=2,
        units=512,
        d_model=128,
        num_heads=4,
        dropout=0.3,
        name="sample_encoder")

    print('Defining decoder layer...')
    def decoder_layer(units, d_model, num_heads, dropout, name="decoder_layer"):
      inputs = tf.keras.Input(shape=(None, d_model), name="inputs")
      enc_outputs = tf.keras.Input(shape=(None, d_model), name="encoder_outputs")
      look_ahead_mask = tf.keras.Input(
          shape=(1, None, None), name="look_ahead_mask")
      padding_mask = tf.keras.Input(shape=(1, 1, None), name='padding_mask')

      attention1 = MultiHeadAttention(
          d_model, num_heads, name="attention_1")(inputs={
              'query': inputs,
              'key': inputs,
              'value': inputs,
              'mask': look_ahead_mask
          })
      attention1 = tf.keras.layers.LayerNormalization(
          epsilon=1e-6)(attention1 + inputs)

      attention2 = MultiHeadAttention(
          d_model, num_heads, name="attention_2")(inputs={
              'query': attention1,
              'key': enc_outputs,
              'value': enc_outputs,
              'mask': padding_mask
          })
      attention2 = tf.keras.layers.Dropout(rate=dropout)(attention2)
      attention2 = tf.keras.layers.LayerNormalization(
          epsilon=1e-6)(attention2 + attention1)

      outputs = tf.keras.layers.Dense(units=units, activation='relu')(attention2)
      outputs = tf.keras.layers.Dense(units=d_model)(outputs)
      outputs = tf.keras.layers.Dropout(rate=dropout)(outputs)
      outputs = tf.keras.layers.LayerNormalization(
          epsilon=1e-6)(outputs + attention2)

      return tf.keras.Model(
          inputs=[inputs, enc_outputs, look_ahead_mask, padding_mask],
          outputs=outputs,
          name=name)
      

    sample_decoder_layer = decoder_layer(
        units=512,
        d_model=128,
        num_heads=4,
        dropout=0.3,
        name="sample_decoder_layer")

    print('Defining decoder...')
    def decoder(vocab_size,
                num_layers,
                units,
                d_model,
                num_heads,
                dropout,
                name='decoder'):
      inputs = tf.keras.Input(shape=(None,), name='inputs')
      enc_outputs = tf.keras.Input(shape=(None, d_model), name='encoder_outputs')
      look_ahead_mask = tf.keras.Input(
          shape=(1, None, None), name='look_ahead_mask')
      padding_mask = tf.keras.Input(shape=(1, 1, None), name='padding_mask')
      
      embeddings = tf.keras.layers.Embedding(vocab_size, d_model)(inputs)
      embeddings *= tf.math.sqrt(tf.cast(d_model, tf.float32))
      embeddings = PositionalEncoding(vocab_size, d_model)(embeddings)

      outputs = tf.keras.layers.Dropout(rate=dropout)(embeddings)

      for i in range(int(num_layers)):
        outputs = decoder_layer(
            units=units,
            d_model=d_model,
            num_heads=num_heads,
            dropout=dropout,
            name='decoder_layer_{}'.format(i),
        )(inputs=[outputs, enc_outputs, look_ahead_mask, padding_mask])

      return tf.keras.Model(
          inputs=[inputs, enc_outputs, look_ahead_mask, padding_mask],
          outputs=outputs,
          name=name)
      
    sample_decoder = decoder(
        vocab_size=8192,
        num_layers=2,
        units=512,
        d_model=128,
        num_heads=4,
        dropout=0.3,
        name="sample_decoder")

    print('Defining Transformer...')
    def transformer(vocab_size, #VOCAB_SIZE
                    num_layers, #2
                    units,      #512
                    d_model,    #256
                    num_heads,  #8
                    dropout,    #0.1
                    name="transformer"):
      inputs = tf.keras.Input(shape=(None,), name="inputs")
      dec_inputs = tf.keras.Input(shape=(None,), name="dec_inputs")

      enc_padding_mask = tf.keras.layers.Lambda(
          create_padding_mask, output_shape=(1, 1, None),
          name='enc_padding_mask')(inputs)
      # mask the future tokens for decoder inputs at the 1st attention block
      look_ahead_mask = tf.keras.layers.Lambda(
          create_look_ahead_mask,
          output_shape=(1, None, None),
          name='look_ahead_mask')(dec_inputs)
      # mask the encoder outputs for the 2nd attention block
      dec_padding_mask = tf.keras.layers.Lambda(
          create_padding_mask, output_shape=(1, 1, None),
          name='dec_padding_mask')(inputs)

      enc_outputs = encoder(
          vocab_size=vocab_size,
          num_layers=num_layers,
          units=units,
          d_model=d_model,
          num_heads=num_heads,
          dropout=dropout,
      )(inputs=[inputs, enc_padding_mask])

      dec_outputs = decoder(
          vocab_size=vocab_size,
          num_layers=num_layers,
          units=units,
          d_model=d_model,
          num_heads=num_heads,
          dropout=dropout,
      )(inputs=[dec_inputs, enc_outputs, look_ahead_mask, dec_padding_mask])

      outputs = tf.keras.layers.Dense(units=vocab_size, name="outputs")(dec_outputs)

      return tf.keras.Model(inputs=[inputs, dec_inputs], outputs=outputs, name=name)

    sample_transformer = transformer(
        vocab_size=8192,
        num_layers=4,
        units=512,
        d_model=128,
        num_heads=4,
        dropout=0.3,
        name="sample_transformer")


    def loss_function(y_true, y_pred):
      y_true = tf.reshape(y_true, shape=(-1, MAX_LENGTH - 1))
      
      loss = tf.keras.losses.SparseCategoricalCrossentropy(
          from_logits=True, reduction='none')(y_true, y_pred)

      mask = tf.cast(tf.not_equal(y_true, 0), tf.float32)
      loss = tf.multiply(loss, mask)

      return tf.reduce_mean(loss)


    def loss_function(y_true, y_pred):
      y_true = tf.reshape(y_true, shape=(-1, MAX_LENGTH - 1))
      
      loss = tf.keras.losses.SparseCategoricalCrossentropy(
          from_logits=True, reduction='none')(y_true, y_pred)

      mask = tf.cast(tf.not_equal(y_true, 0), tf.float32)
      loss = tf.multiply(loss, mask)

      return tf.reduce_mean(loss)


    class CustomSchedule(tf.keras.optimizers.schedules.LearningRateSchedule):

      def __init__(self, d_model, warmup_steps=4000):
        super(CustomSchedule, self).__init__()

        self.d_model = d_model
        self.d_model = tf.cast(self.d_model, tf.float32)

        self.warmup_steps = warmup_steps
      
      def get_config(self):
            return {"d_model": self.d_model,"warmup_steps":self.warmup_steps}

      def __call__(self, step):
        arg1 = tf.math.rsqrt(step)
        arg2 = step * (self.warmup_steps**-1.5)

        return tf.math.rsqrt(self.d_model) * tf.math.minimum(arg1, arg2)

    class CustomSchedule(tf.keras.optimizers.schedules.LearningRateSchedule):

      def __init__(self, d_model, warmup_steps=4000):
        super(CustomSchedule, self).__init__()

        self.d_model = d_model
        self.d_model = tf.cast(self.d_model, tf.float32)

        self.warmup_steps = warmup_steps
      
      def get_config(self):
            return {"d_model": self.d_model,"warmup_steps":self.warmup_steps}

      def __call__(self, step):
        arg1 = tf.math.rsqrt(step)
        arg2 = step * (self.warmup_steps**-1.5)

        return tf.math.rsqrt(self.d_model) * tf.math.minimum(arg1, arg2)

    print('Compiling the Model...') 
    # initialize and compile model within strategy scope
    with strategy.scope():
      model = transformer(
          vocab_size=VOCAB_SIZE,
          num_layers=NUM_LAYERS,
          units=UNITS,
          d_model=D_MODEL,
          num_heads=NUM_HEADS,
          dropout=DROPOUT)

      model.compile(optimizer=optimizer, loss=loss_function, metrics=[accuracy])

    model.summary()
    print('---')
    #UNCOMMENT TO TRAIN THE MODEL
    '''
    import datetime

    logdir = os.path.join("logs", datetime.datetime.now().strftime("%Y%m%d-%H%M%S"))
    tensorboard_callback = tf.keras.callbacks.TensorBoard(logdir, histogram_freq=1)

    for i in range(10):
        model.fit(dataset, epochs=10, validation_data=val_dataset, callbacks = [tensorboard_callback])
        model.save_weights('transformer_weights_'+str(i+1)+'.h5')
    
    '''
    print('Loading the model weights...')
    loaded_model = transformer(
          vocab_size=VOCAB_SIZE,
          num_layers=NUM_LAYERS,
          units=UNITS,
          d_model=D_MODEL,  
          num_heads=NUM_HEADS,
          dropout=DROPOUT)

    loaded_model.compile(optimizer=optimizer, loss=loss_function, metrics=[accuracy])
    loaded_model.load_weights(os.path.join(data_path, 'transformer_weights_100.h5'))

    import pandas as pd

    def textPreprocess(input_text):

      def removeAccents(input_text):
          strange='ąćęłńóśżź'
          ascii_replacements='acelnoszz'
          translator=str.maketrans(strange,ascii_replacements)
          return input_text.translate(translator)

      def removeSpecial(input_text):
          special='[^A-Za-z0-9 ]+'
          return re.sub(special, '', input_text)

      def removeTriplicated(input_text):
          return re.compile(r'(.)\1{2,}', re.IGNORECASE).sub(r'\1', input_text)

      return removeTriplicated(removeSpecial(removeAccents(input_text.lower())))


    def evaluate(sentence, model):
      sentence = textPreprocess(sentence)

      sentence = tf.expand_dims(
          START_TOKEN + tokenizer.encode(sentence) + END_TOKEN, axis=0)

      output = tf.expand_dims(START_TOKEN, 0)

      for i in range(MAX_LENGTH):
        predictions = model(inputs=[sentence, output], training=False)

        # select the last word from the seq_len dimension
        predictions = predictions[:, -1:, :]
        predicted_id = tf.cast(tf.argmax(predictions, axis=-1), tf.int32)

        # return the result if the predicted_id is equal to the end token
        if tf.equal(predicted_id, END_TOKEN[0]):
          break

        # concatenated the predicted_id to the output which is given to the decoder as its input.
        output = tf.concat([output, predicted_id], axis=-1)

      return tf.squeeze(output, axis=0)


    def predict(sentence,model):
      prediction = evaluate(sentence,model)

      predicted_sentence = tokenizer.decode(
          [i for i in prediction if i < tokenizer.vocab_size])

      return predicted_sentence

    print("---")
    print("FIVE EXAMPLES: TRAIN SENTENCE PREDICTIONS: ")
    print('---')
    for x,y in zip(X_train[:2], y_train[:2]):
        output = predict(x, loaded_model)
        print('Question           : ', str(x))
        print('Actual Response    : ', str(y))
        print('Predicted Response : ', predict(x, loaded_model))
        print("---")

    print("---")
    print("FIVE EXAMPLES: TEST SENTENCE PREDICTIONS: ")
    print('---')
    for x,y in zip(X_test[:2], y_test[:2]):
        output = predict(x, loaded_model)
        print('Question           :', str(x))
        print('Actual Response    : ', str(y))
        print('Predicted Response : ', predict(x, loaded_model))
        print("---")
    ("---")

    import nltk
    from nltk.translate.bleu_score import SmoothingFunction
    from nltk.translate.bleu_score import sentence_bleu

    c = SmoothingFunction()
    bleuScoresTrain = []

    for x,y in zip(X_train, y_train):
        actualOutput = y
        predictedOutput = predict(x, loaded_model)
        ref = actualOutput.split(' ')
        pred = predictedOutput.split(' ')

        if len(ref) >= 4 and len(pred) >= 4:
            BLEUscore = sentence_bleu([ref], pred, smoothing_function = c.method2)
        elif len(ref) >= 3 and len(pred) >= 3:
            BLEUscore = sentence_bleu([ref], pred, weights = (1/3, 1/3, 1/3), smoothing_function = c.method2)
        elif len(ref) >= 2 and len(pred) >= 2:
            BLEUscore = sentence_bleu([ref], pred, weights = (0.5, 0.5), smoothing_function = c.method2)
        else:
            BLEUscore = sentence_bleu([ref], pred, weights = [1], smoothing_function = c.method2)
        bleuScoresTrain.append(BLEUscore)
    print('---')
    print("The Bleu score for Train data is: ", sum(bleuScoresTrain)/float(len(bleuScoresTrain)))
    print('---')
    bleuScoresTest = []

    for x,y in zip(X_test, y_test):
        actualOutput = y
        predictedOutput = predict(x, loaded_model)
        ref = actualOutput.split(' ')
        pred = predictedOutput.split(' ')

        if len(ref) >= 4 and len(pred) >= 4:
            BLEUscore = sentence_bleu([ref], pred, smoothing_function = c.method2)
        elif len(ref) >= 3 and len(pred) >= 3:
            BLEUscore = sentence_bleu([ref], pred, weights = (1/3, 1/3, 1/3), smoothing_function = c.method2)
        elif len(ref) >= 2 and len(pred) >= 2:
            BLEUscore = sentence_bleu([ref], pred, weights = (0.5, 0.5), smoothing_function = c.method2)
        else:
            BLEUscore = sentence_bleu([ref], pred, weights = [1], smoothing_function = c.method2)
        bleuScoresTest.append(BLEUscore)
    print('---')
    print("The Bleu score for Test data is : ", sum(bleuScoresTest)/float(len(bleuScoresTest)))
    print('---')
    print('END!')
Esempio n. 17
0
def validate(val_loader,
             encoder,
             decoder,
             criterion,
             vocab,
             epoch,
             total_step,
             start_step=1,
             start_loss=0.0,
             start_bleu=0.0):
    """Validate the model for one epoch using the provided parameters. 
    Return the epoch's average validation loss and Bleu-4 score."""

    # Switch to validation mode
    encoder.eval()
    decoder.eval()

    # Initialize smoothing function
    smoothing = SmoothingFunction()

    # Keep track of validation loss and Bleu-4 score
    total_loss = start_loss
    total_bleu_4 = start_bleu

    # Start time for every 100 steps
    start_val_time = time.time()

    # Disable gradient calculation because we are in inference mode
    with torch.no_grad():
        for i_step in range(start_step, total_step + 1):
            # Randomly sample a caption length, and sample indices with that length
            indices = val_loader.dataset.get_indices()
            # Create a batch sampler to retrieve a batch with the sampled indices
            new_sampler = data.sampler.SubsetRandomSampler(indices=indices)
            val_loader.batch_sampler.sampler = new_sampler

            # Obtain the batch
            for batch in val_loader:
                images, captions = batch[0], batch[1]
                break

            # Move to GPU if CUDA is available
            if torch.cuda.is_available():
                images = images.cuda()
                captions = captions.cuda()

            # Pass the inputs through the CNN-RNN model
            features = encoder(images)
            outputs = decoder(features, captions)

            # Calculate the total Bleu-4 score for the batch
            batch_bleu_4 = 0.0
            # Iterate over outputs. Note: outputs[i] is a caption in the batch
            # outputs[i, j, k] contains the model's predicted score i.e. how
            # likely the j-th token in the i-th caption in the batch is the
            # k-th token in the vocabulary.
            for i in range(len(outputs)):
                predicted_ids = []
                for scores in outputs[i]:
                    # Find the index of the token that has the max score
                    predicted_ids.append(scores.argmax().item())
                # Convert word ids to actual words
                predicted_word_list = word_list(predicted_ids, vocab)
                caption_word_list = word_list(captions[i].numpy(), vocab)
                # Calculate Bleu-4 score and append it to the batch_bleu_4 list
                batch_bleu_4 += sentence_bleu(
                    [caption_word_list],
                    predicted_word_list,
                    smoothing_function=smoothing.method1)
            total_bleu_4 += batch_bleu_4 / len(outputs)

            # Calculate the batch loss
            loss = criterion(outputs.view(-1, len(vocab)), captions.view(-1))
            total_loss += loss.item()

            # Get validation statistics
            stats = "Epoch %d, Val step [%d/%d], %ds, Loss: %.4f, Perplexity: %5.4f, Bleu-4: %.4f" \
                    % (epoch, i_step, total_step, time.time() - start_val_time,
                       loss.item(), np.exp(loss.item()), batch_bleu_4 / len(outputs))

            # Print validation statistics (on same line)
            print("\r" + stats, end == "")
            sys.stdout.flush()

            # Print validation statistics (on different line) and reset time
            if i_step % PRINT_EVERY == 0:
                print("\r" + stats)
                filename = os.path.join(
                    "/home/osboxes/image_captioning/example",
                    "val-model-{}{}.pkl".format(epoch, i_step))
                save_val_checkpoint(filename, encoder, decoder, total_loss,
                                    total_bleu_4, epoch, i_step)
                start_val_time = time.time()

        return total_loss / total_step, total_bleu_4 / total_step
def nltk_sentence_bleu(hypothesis, reference, order=4):
    cc = SmoothingFunction()
    return nltk.translate.bleu([reference], hypothesis)
Esempio n. 19
0
def calculate_bleu2(reference: str, hypothesis: str) -> float:
    return sentence_bleu([reference.split()],
                         hypothesis.split(),
                         weights=(0.5, 0.5),
                         smoothing_function=SmoothingFunction().method2)
Esempio n. 20
0
 def grade_marker(self, marker_text):
     chencherry = SmoothingFunction()
     reference = self.key_sentences
     candidate = marker_text
     bleu = sentence_bleu(reference, candidate, weights=(1, 0, 0, 0), smoothing_function=chencherry.method1)
     return bleu
Esempio n. 21
0
def bleu(original, translated, n=4):
    weights = [1 / n] * n
    return sentence_bleu([p(original)], p(translated), weights=weights, smoothing_function=SmoothingFunction().method7)
Esempio n. 22
0
    return input_ids, token_type_ids


args = Config()
ckpt_path = 'ckpt/VEID/model.bin'
tokenizer = BertTokenizer.from_pretrained(args.model_checkpoint,
                                          do_lower_case=True)
tokenizer.add_special_tokens(SPECIAL_TOKENS_DICT)
model_config = GPT2Config.from_pretrained(args.model_checkpoint)

model = VEID(model_config)
ckpt = torch.load(ckpt_path, map_location='cpu')
model.load_state_dict(ckpt['model'])

smooth = SmoothingFunction()
meter = AverageMeter()

with open('data/new_small_train.json', 'r', encoding='utf-8') as f:
    data = json.load(f)

total_num = len(data)
current_num = 0

case_study = []

for dia in data:
    #print(dia)
    case = {}
    print(current_num, '/', total_num)
    case['history'] = dia['history']
def evaluation(args):
    source = pickle_load(os.path.join(args.model_path, 'source.pkl'))
    target = pickle_load(os.path.join(args.model_path, 'target.pkl'))
    target_test = pickle_load(os.path.join(args.model_path, 'target_test.pkl'))
    setting = load_setting(os.path.join(args.model_path, 'setting.yaml'))
    start_id, end_id = setting['start_id'], setting['end_id']
    type_size = setting['type_size']
    player_size = setting['player_size']
    team_size = setting['team_size']
    detail_size = setting['detail_size']
    detail_dim = setting['detail_dim']
    src_embed = setting['src_embed']
    event_size = setting['event_size']
    vocab_size = setting['vocab_size']
    trg_embed = setting['trg_embed']
    hidden = setting['hidden']
    start_id = setting['start_id']
    end_id = setting['end_id']
    class_weight = None
    mlp_layers = setting['mlp_layers']
    max_length = setting['max_length']
    dropout = setting['dropout']
    loss_weight = None
    disc_loss = setting['disc_loss']
    loss_func = setting['loss_func']
    net = setting['net']
    dataset = setting['dataset']
    numbering = setting['numbering']
    reverse_decode = setting['reverse_decode']
    home_player_tag = target.word_to_id.get(target.home_player_tag)
    away_player_tag = target.word_to_id.get(target.away_player_tag)
    home_team_tag = target.word_to_id.get(target.home_team_tag)
    away_team_tag = target.word_to_id.get(target.away_team_tag)
    test = OptaDataset(path=dataset + '.test',
                       fields={
                           'source': source,
                           'target': target_test
                       })
    test20 = OptaDataset(path=dataset + '.test',
                         fields={
                             'source': source,
                             'target': target_test
                         },
                         limit_length=20)
    test15 = OptaDataset(path=dataset + '.test',
                         fields={
                             'source': source,
                             'target': target_test
                         },
                         limit_length=15)
    test10 = OptaDataset(path=dataset + '.test',
                         fields={
                             'source': source,
                             'target': target_test
                         },
                         limit_length=10)

    if 'disc' in net:
        content_word_size = len(target.content_word_to_id)
    print('vocab size: {}'.format(vocab_size))
    if net == 'plain':
        model = MLPEncoder2AttentionDecoder(type_size,
                                            player_size,
                                            team_size,
                                            detail_size,
                                            detail_dim,
                                            src_embed,
                                            event_size,
                                            vocab_size,
                                            trg_embed,
                                            hidden,
                                            start_id,
                                            end_id,
                                            class_weight,
                                            mlp_layers,
                                            max_length,
                                            dropout,
                                            IGNORE_LABEL,
                                            reverse_decode=reverse_decode)
    elif net == 'tmpl':
        model = MLPEncoder2AttentionDecoder(type_size,
                                            player_size,
                                            team_size,
                                            detail_size,
                                            detail_dim,
                                            src_embed,
                                            event_size,
                                            vocab_size,
                                            trg_embed,
                                            hidden,
                                            start_id,
                                            end_id,
                                            class_weight,
                                            mlp_layers,
                                            max_length,
                                            dropout,
                                            IGNORE_LABEL,
                                            source.id_to_player,
                                            home_player_tag,
                                            away_player_tag,
                                            source.id_to_team,
                                            home_team_tag,
                                            away_team_tag,
                                            target.player_to_id,
                                            target.players,
                                            reverse_decode=reverse_decode)
    elif net == 'gate':
        model = MLPEncoder2GatedAttentionDecoder(type_size,
                                                 player_size,
                                                 team_size,
                                                 detail_size,
                                                 detail_dim,
                                                 src_embed,
                                                 event_size,
                                                 vocab_size,
                                                 trg_embed,
                                                 hidden,
                                                 start_id,
                                                 end_id,
                                                 class_weight,
                                                 mlp_layers,
                                                 max_length,
                                                 dropout,
                                                 IGNORE_LABEL,
                                                 reverse_decode=reverse_decode)
    elif net == 'gate-tmpl':
        model = MLPEncoder2GatedAttentionDecoder(type_size,
                                                 player_size,
                                                 team_size,
                                                 detail_size,
                                                 detail_dim,
                                                 src_embed,
                                                 event_size,
                                                 vocab_size,
                                                 trg_embed,
                                                 hidden,
                                                 start_id,
                                                 end_id,
                                                 class_weight,
                                                 mlp_layers,
                                                 max_length,
                                                 dropout,
                                                 IGNORE_LABEL,
                                                 source.id_to_player,
                                                 home_player_tag,
                                                 away_player_tag,
                                                 source.id_to_team,
                                                 home_team_tag,
                                                 away_team_tag,
                                                 target.player_to_id,
                                                 target.players,
                                                 reverse_decode=reverse_decode)
    elif net == 'disc':
        model = DiscriminativeMLPEncoder2AttentionDecoder(
            type_size,
            player_size,
            team_size,
            detail_size,
            detail_dim,
            src_embed,
            event_size,
            vocab_size,
            content_word_size,
            trg_embed,
            hidden,
            start_id,
            end_id,
            class_weight,
            loss_weight,
            disc_loss,
            loss_func,
            mlp_layers,
            max_length,
            dropout,
            IGNORE_LABEL,
            reverse_decode=reverse_decode)
    elif net == 'disc-tmpl':
        model = DiscriminativeMLPEncoder2AttentionDecoder(
            type_size,
            player_size,
            team_size,
            detail_size,
            detail_dim,
            src_embed,
            event_size,
            vocab_size,
            content_word_size,
            trg_embed,
            hidden,
            start_id,
            end_id,
            class_weight,
            loss_weight,
            disc_loss,
            loss_func,
            mlp_layers,
            max_length,
            dropout,
            IGNORE_LABEL,
            source.id_to_player,
            home_player_tag,
            away_player_tag,
            source.id_to_team,
            home_team_tag,
            away_team_tag,
            target.player_to_id,
            target.players,
            reverse_decode=reverse_decode)
    elif net == 'gate-disc':
        model = DiscriminativeMLPEncoder2GatedAttentionDecoder(
            type_size,
            player_size,
            team_size,
            detail_size,
            detail_dim,
            src_embed,
            event_size,
            vocab_size,
            content_word_size,
            trg_embed,
            hidden,
            start_id,
            end_id,
            class_weight,
            loss_weight,
            disc_loss,
            loss_func,
            mlp_layers,
            max_length,
            dropout,
            IGNORE_LABEL,
            reverse_decode=reverse_decode)
    elif net == 'gate-disc-tmpl':
        model = DiscriminativeMLPEncoder2GatedAttentionDecoder(
            type_size,
            player_size,
            team_size,
            detail_size,
            detail_dim,
            src_embed,
            event_size,
            vocab_size,
            content_word_size,
            trg_embed,
            hidden,
            start_id,
            end_id,
            class_weight,
            loss_weight,
            disc_loss,
            loss_func,
            mlp_layers,
            max_length,
            dropout,
            IGNORE_LABEL,
            source.id_to_player,
            home_player_tag,
            away_player_tag,
            source.id_to_team,
            home_team_tag,
            away_team_tag,
            target.player_to_id,
            target.players,
            reverse_decode=reverse_decode)
    if numbering:
        model.player_id = target.player_id
        model.team_id = target.team_id
    # load best model
    if args.gpu is not None:
        model.use_gpu(args.gpu)
    model.id_to_word = target.id_to_word
    model.load_model(os.path.join(args.model_path, 'best.model'))
    batch_size = args.batch
    src_test_iter = SequentialIterator(test.source,
                                       batch_size,
                                       None,
                                       event_size,
                                       source.fillvalue,
                                       gpu=args.gpu)
    src_test20_iter = SequentialIterator(test20.source,
                                         batch_size,
                                         None,
                                         event_size,
                                         source.fillvalue,
                                         gpu=args.gpu)
    src_test15_iter = SequentialIterator(test15.source,
                                         batch_size,
                                         None,
                                         event_size,
                                         source.fillvalue,
                                         gpu=args.gpu)
    src_test10_iter = SequentialIterator(test10.source,
                                         batch_size,
                                         None,
                                         event_size,
                                         source.fillvalue,
                                         gpu=args.gpu)
    trg_test_iter = Iterator(test.target,
                             batch_size,
                             wrapper=EndTokenIdRemoval(end_id),
                             gpu=None)
    trg_test20_iter = Iterator(test20.target,
                               batch_size,
                               wrapper=EndTokenIdRemoval(end_id),
                               gpu=None)
    trg_test15_iter = Iterator(test15.target,
                               batch_size,
                               wrapper=EndTokenIdRemoval(end_id),
                               gpu=None)
    trg_test10_iter = Iterator(test10.target,
                               batch_size,
                               wrapper=EndTokenIdRemoval(end_id),
                               gpu=None)

    with open('./dataset/player_list.json.new') as f:
        id_to_player = json.load(f)
    with open('./dataset/team_list.json.new') as f:
        id_to_team = json.load(f)

    def convert(ind, no_tag=False):
        if 'player' in ind:
            if no_tag:
                i = ind.replace('player', '')
                return id_to_player.get(i, ind)
            else:
                return ind
        elif 'team' in ind:
            if no_tag:
                i = ind.replace('team', '')
                return id_to_team.get(i, ind)
            else:
                return ind
        else:
            return ind

    if 'disc' in net:
        bleu_score, accuracy, hypotheses = evaluate_bleu_and_accuracy(
            model, src_test_iter, trg_test_iter)
        bleu_score20, _, hypotheses20 = evaluate_bleu_and_accuracy(
            model, src_test20_iter, trg_test20_iter)
        bleu_score15, _, hypotheses15 = evaluate_bleu_and_accuracy(
            model, src_test15_iter, trg_test15_iter)
        bleu_score10, _, hypotheses10 = evaluate_bleu_and_accuracy(
            model, src_test10_iter, trg_test10_iter)
    else:
        bleu_score, hypotheses = evaluate_bleu(model, src_test_iter,
                                               trg_test_iter)
        bleu_score20, hypotheses20 = evaluate_bleu(model, src_test20_iter,
                                                   trg_test20_iter)
        bleu_score15, hypotheses15 = evaluate_bleu(model, src_test15_iter,
                                                   trg_test15_iter)
        bleu_score10, hypotheses10 = evaluate_bleu(model, src_test10_iter,
                                                   trg_test10_iter)

    print('best score: {}'.format(bleu_score))
    print('best score20: {}'.format(bleu_score20))
    print('best score15: {}'.format(bleu_score15))
    print('best score10: {}'.format(bleu_score10))
    # save hypothesis
    hypotheses_for_save = [
        ' '.join([convert(y, True) for y in h]) for h in hypotheses
    ]
    hypotheses20_for_save = [
        ' '.join([convert(y, True) for y in h]) for h in hypotheses20
    ]
    hypotheses15_for_save = [
        ' '.join([convert(y, True) for y in h]) for h in hypotheses15
    ]
    hypotheses10_for_save = [
        ' '.join([convert(y, True) for y in h]) for h in hypotheses10
    ]
    references_for_save = [
        ' '.join(convert(y, True) for y in r[0]) for r in test.target
    ]
    references20_for_save = [
        ' '.join(convert(y, True) for y in r[0]) for r in test20.target
    ]
    references15_for_save = [
        ' '.join(convert(y, True) for y in r[0]) for r in test15.target
    ]
    references10_for_save = [
        ' '.join(convert(y, True) for y in r[0]) for r in test10.target
    ]
    TextFile(os.path.join(args.model_path, 'hypo'), hypotheses_for_save).save()
    TextFile(os.path.join(args.model_path, 'hypo_len20'),
             hypotheses20_for_save).save()
    TextFile(os.path.join(args.model_path, 'hypo_len15'),
             hypotheses15_for_save).save()
    TextFile(os.path.join(args.model_path, 'hypo_len10'),
             hypotheses10_for_save).save()
    TextFile(os.path.join('./dataset', 'ref'), references_for_save).save()
    TextFile(os.path.join('./dataset', 'ref_len20'),
             references20_for_save).save()
    TextFile(os.path.join('./dataset', 'ref_len15'),
             references15_for_save).save()
    TextFile(os.path.join('./dataset', 'ref_len10'),
             references10_for_save).save()
    # generate readable text
    result = []
    for ref, hyp in zip(test.target.data, hypotheses):
        if type(ref) == tuple:
            ref = ref[0]
        ref = ' '.join([convert(y) for y in ref]).split()
        try:
            bleu_score = sentence_bleu(
                [ref], hyp, smoothing_function=SmoothingFunction().method1)
        except:
            bleu_score = 0
        ref = ' '.join([convert(y, True) for y in ref]).split()
        hyp = ' '.join([convert(y, True) for y in hyp]).split()
        result.append((' '.join(ref), ' '.join(hyp), bleu_score))
    inputs = []
    for xs in test20.source.data:
        data = []
        for x in xs[:5]:
            event = event_type_mapper.get(x[0], x[0])
            player = id_to_player.get(str(x[1]), x[1])
            team = id_to_team.get(str(x[2]), x[2])
            detail = ','.join(
                [qualifier_type_mapper.get(i[-1], i[-1]) for i in x[-1]])
            data.append('event: {} player: {} team: {} detail: {}'.format(
                event, player, team, detail))
        inputs.append('\n'.join(data))
    result = [[x, *y] for x, y in zip(inputs, result)]
    result = sorted(result, key=lambda x: -x[-1])
    TextFile(os.path.join(args.model_path, 'test20_gate_disc_tmpl.txt'), [
        'src:\n{}\nref: {}\nhyp: {}\nbleu: {}\n##\n'.format(*x) for x in result
    ]).save()
def compute_bleu(reference, output):
    cc = SmoothingFunction()
    return sentence_bleu(reference, output,weights=(1.0, 0.0, 0.0, 0.0), smoothing_function=cc.method1)
Esempio n. 25
0
def compute_bleu(output, reference):
    cc = SmoothingFunction()
    return sentence_bleu([reference], output, weights=(0.25, 0.25, 0.25, 0.25), smoothing_function=cc.method1)
Esempio n. 26
0
    def __init__(self, candidate, motion):
        self.smoothing = SmoothingFunction().method2
        self.claims_list = '../dataset/claims.txt'
        # self.candidate = word_tokenize(candidate.lower())
        # self.motion = motion.lower()
        self.candidate = candidate
        self.motion = motion

        self.reference = {}
        # self.lexicon_dictionary = pickle.load(open('../data_histo_no_article/lexicon-dict.pkl', 'rb'))
        self.lexicon_dictionary = pickle.load(open('lexicon-dict.pkl', 'rb'))

        # Make evaluation data
        fobj = csv.reader(open(self.claims_list, "rb"), delimiter='\t')
        for idx, line in enumerate(fobj):
            if idx == 0: continue
            sentence_temp = unicodedata.normalize(
                'NFKD', line[2].decode('utf-8')).encode('ascii', 'ignore')
            sentence_temp = sentence_temp.replace('.', '')
            sentence_temp = sentence_temp.replace('?', '')
            sentence_temp = sentence_temp.replace('"', '')
            sentence_temp = sentence_temp.replace('\'', '')
            sentence_temp = sentence_temp.replace('(', '')
            sentence_temp = sentence_temp.replace(')', '')
            sentence_temp = sentence_temp.replace('%', '')
            sentence_temp = sentence_temp.replace('$', '')
            sentence_temp = sentence_temp.replace(',', '')
            cleaned_claim = sentence_temp.replace('[REF]', '')
            cleaned_claim = cleaned_claim.lower()

            sentence_motion = unicodedata.normalize(
                'NFKD', line[0].decode('utf-8')).encode('ascii', 'ignore')

            tokenized_claim = word_tokenize(cleaned_claim.lower())
            for idx, word in enumerate(tokenized_claim):
                if word not in self.lexicon_dictionary:
                    print("%s is not in dictionary" % word)
                    tokenized_claim[idx] = '<unk>'
                    # cleaned_claim = cleaned_claim.replace(word,'444')
            # tokenized_claim = word_tokenize(cleaned_claim)
            tokenized_claim.append('<eos>')

            # a=[1,2,3,4,5,1,2,3,4,5,1]
            # for n,i in enumerate(a):
            # 	if i==1:
            # 		a[n]=10

            print tokenized_claim
            # print cleaned_claim

            if len(tokenized_claim) < 5:
                continue
            else:
                if not sentence_motion.lower() in self.reference:
                    self.reference[sentence_motion.lower()] = []
                self.reference[sentence_motion.lower()].append(
                    word_tokenize(cleaned_claim.lower()))

        # Write to CSV
        with open(
                'evaluation_' +
                datetime.datetime.now().strftime("%Y-%m-%d:%H:%M:%S") + '.csv',
                'wb') as csvfile:
            csvWriter = csv.writer(csvfile, delimiter='	')
            csvWriter.writerow([
                'No', 'Motion', 'Claim', 'Bleu Score', 'Unigram', 'Bigram',
                'Trigram', '4-Gram', 'Cumulative'
            ])

            for i in range(len(candidate)):
                c = word_tokenize(self.candidate[i].lower())
                m = self.motion[i].lower()

                score1 = sentence_bleu(self.reference[m],
                                       c,
                                       smoothing_function=self.smoothing)
                score2 = sentence_bleu(self.reference[m],
                                       c,
                                       weights=(1, 0, 0, 0),
                                       smoothing_function=self.smoothing)
                score3 = sentence_bleu(self.reference[m],
                                       c,
                                       weights=(0, 1, 0, 0),
                                       smoothing_function=self.smoothing)
                score4 = sentence_bleu(self.reference[m],
                                       c,
                                       weights=(0, 0, 1, 0),
                                       smoothing_function=self.smoothing)
                score5 = sentence_bleu(self.reference[m],
                                       c,
                                       weights=(0, 0, 0, 1),
                                       smoothing_function=self.smoothing)
                score6 = sentence_bleu(self.reference[m],
                                       c,
                                       weights=(0.25, 0.25, 0.25, 0.25),
                                       smoothing_function=self.smoothing)
                csvWriter.writerow([
                    i + 1, self.motion[i], self.candidate[i], score1, score2,
                    score3, score4, score5, score6
                ])

        print 'Evaluation Done! Saved To Disk!'
Esempio n. 27
0
 def __init__(self):
     self.rouge = Rouge()
     self.smooth = SmoothingFunction().method1
     self.best_bleu = 0.
Esempio n. 28
0
import json
import argparse
import numpy as np

from nltk import bleu
from rouge import Rouge
from collections import defaultdict
from nltk.translate.bleu_score import SmoothingFunction

smoothing = SmoothingFunction().method1
weights = [0.25] * 4
rouge = Rouge()


def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("--out_dir",
                        type=str,
                        required=True,
                        help="The directory of the outputs")
    args = parser.parse_args()

    print("\t".join(["Setup", "LM", "BLEU", "ROUGE"]))

    for setup in [
            "rationale", "multi", "update_rationale", "update_type_rationale"
    ]:
        for lm in ["bart-large", "gpt2-xl"]:

            # Compute BLEU and ROUGE from the text predictions
            data = [
Esempio n. 29
0
                                                  vocab, True)

        eval_scores = evaluate(target_words, predicted_words)
        #*print('Target words shape: ' + str(caption.size()))
        #*print('Target words: ' + str(target_words))
        #*print('Predicted words are: ' + str(predicted_words))
        for imgs, tgt, pdt in zip(img_paths, target_words, predicted_words):
            if imgs in target_caption_full.keys():
                target_caption_full[imgs].extend(tgt)
                candidate_caption_full[imgs].extend([pdt])
            else:
                candidate_caption_full[imgs] = []
                target_caption_full[imgs] = tgt
                candidate_caption_full[imgs].append(pdt)

        sf = SmoothingFunction()
        #*bleu1_corpus.append(corpus_bleu(target_words, predicted_words, weights=(1, 0, 0, 0), smoothing_function=sf.method4))
        #*bleu4_corpus.append(corpus_bleu(target_words, predicted_words, weights=(0.25, 0.25, 0.25, 0.25), smoothing_function=sf.method4))
        bleu1.append(eval_scores['Bleu_1'])
        bleu2.append(eval_scores['Bleu_2'])
        bleu3.append(eval_scores['Bleu_3'])
        bleu4.append(eval_scores['Bleu_4'])
        cider.append(eval_scores['CIDEr'])
        rouge.append(eval_scores['ROUGE_L'])

        #*assert round(bleu1_corpus[-1], 3) == round(bleu1[-1], 3)
        #*assert round(bleu4_corpus[-1], 3) == round(bleu4[-1], 3)

        if (idx + 1) % 100 == 0:  # 10
            print(
                "Step %d - %0.4f test loss, %0.2f time, %.3f BLEU1, %.3f BLEU2, %.3f BLEU3, %.3f BLEU4, %.3f CIDEr, %.3f ROUGE_L."
Esempio n. 30
0
def bleu4(reference_captions, predicted_caption):
    return 100 * sentence_bleu(reference_captions, predicted_caption,
                               weights=(0, 0, 0, 1), smoothing_function=SmoothingFunction().method1)