예제 #1
0
def test_corpus_bleu():
    hy = "I have a pen"
    hy = word_tokenize(hy)
    re = "I have a apple"
    re = word_tokenize(re)
    res = [re]
    assert math.isclose(bleu_score.sentence_bleu(res, hy),
                        corpus_bleu([hy], [res]))
예제 #2
0
def evaluate(model, val_loader, criterion, optimizer, device, writer):
    """Evaluate model for 1 epoch.
    
    Inputs: model, val_loader, criterioin, optimizer, device, writer
        model: The model to be evaluated.
        val_loader: DataLoader of validation Dataset.
        criterion: Loss function.
        optimizer: Optimizer of model.
        device: Pytorch device.
        writer: Tensorboard summary writer.
        
    Outputs: loss, score
        loss: Loss of current epoch.
        score: Bleu score of current epoch.
    """
    total_loss = 0
    total_length = 0
    total_score = 0
    total_num = 0
    num_batchs = len(val_loader)

    model.eval()

    with torch.no_grad():
        for batch in val_loader:
            sequence_en, sequence_de, seq_len_en, seq_len_de = batch
            sequence_en = sequence_en.to(device)
            sequence_de = sequence_de.to(device)
            shifted_sequence_de = sequence_de[:, :-1]

            _, pad_mask_en = create_mask(sequence_en)
            pad_mask_en = pad_mask_en.to(device)
            future_mask, pad_mask_de = create_mask(shifted_sequence_de)
            future_mask = future_mask.to(device)
            pad_mask_de = pad_mask_de.to(device)

            logit = model(sequence_en, shifted_sequence_de, future_mask,
                          pad_mask_en, pad_mask_de)
            loss = criterion(input=logit.contiguous().view(-1, logit.size(-1)),
                             target=sequence_de[:, 1:].contiguous().view(-1))
            length = sum(seq_len_de) - len(seq_len_de)

            total_loss += loss
            total_length += length

            batch_score = 0
            for b, target in enumerate(sequence_de):
                predict = torch.argmax(logit[b, :seq_len_de[b] - 1, :], dim=1)
                batch_score += bleu_score.sentence_bleu(
                    [target[1:seq_len_de[b]].cpu().numpy()],
                    predict.cpu().numpy(),
                    smoothing_function=bleu_smoothing)
                total_num += 1
            total_score += batch_score

    return total_loss / total_length, total_score / total_num
예제 #3
0
    def calc_bleu(self, gen_sentence, ans_sentence):
        if type(gen_sentence) == str:
            gen_sentence = gen_sentence.split(" ")
        if type(ans_sentence) == str:
            ans_sentence = ans_sentence.split(" ")

        anses = [ans_sentence]

        BLEUscore = bleu_score.sentence_bleu(anses,
                                             gen_sentence,
                                             weights=(0.5, 0.5))
        return BLEUscore
예제 #4
0
def bleu(pred, answer, mode="1-gram"):
    if mode == "1-gram":
        weights = [1.0]
    elif mode == "2-gram":
        weights = [0.5, 0.5]
    elif mode == "3-gram":
        weights = [0.3, 0.3, 0.3]
    elif mode == "4-gram":
        weights = [0.25, 0.25, 0.25, 0.25]
    else:
        sys.stdout.write("Not support mode")
        sys.exit()

    return bleu_score.sentence_bleu([pred], answer, weights=weights)
예제 #5
0
def compute_bleu_score(predictedCaptions, trueCaptions, mode="4-gram"):
    if mode == "1-gram":
        weights = [1.0]
    elif mode == "2-gram":
        weights = [0.5, 0.5]
    elif mode == "3-gram":
        weights = [0.33, 0.33, 0.33]
    elif mode == "4-gram":
        weights = [0.25, 0.25, 0.25, 0.25]
    else:
        sys.stdout.write("Not support mode")
        sys.exit()

    return bleu_score.sentence_bleu([predictedCaptions],
                                    trueCaptions,
                                    weights=weights)
예제 #6
0
def compute_sentences_ranking(video_captions):
    """ returns [(sentence0, similarity), ..., (sentence19, similarity)]
    """
    sentences_global_ranking = []

    if config.experiment == 'experiment1':
        bfs = True
        embeddings = []
        labels = []
        for sentence in video_captions.sentences:
            sentence_embedding = sentence.get_sentence_embedding(bfs)
            if len(
                    sentence_embedding
            ) > 0:  # there are sentences without senses (i.e. 'its a t') --> no embedding!
                embeddings.append(sentence_embedding)
                labels.append(sentence.sentence)

        embeddings_mean = np.mean(embeddings, axis=0)
        distances = [
            scipy.spatial.distance.cosine(embedding, embeddings_mean)
            for embedding in embeddings
        ]
        for i, distance in enumerate(distances):
            sentences_global_ranking.append(
                (video_captions.sentences[i].sentence, distance))

    elif config.experiment == 'experiment5':
        chencherry = SmoothingFunction()
        for i, sentence1 in enumerate(video_captions.sentences):
            scores = [
                bleu_score.sentence_bleu([sentence2.sentence.split(' ')],
                                         sentence1.sentence.split(' '),
                                         smoothing_function=chencherry.method4)
                for j, sentence2 in enumerate(video_captions.sentences)
            ]  # if i != j]  # if we add 1 to all, result shouldn't change
            score = sum(scores) / len(scores)
            sentences_global_ranking.append((sentence1.sentence, score))

    else:
        result = np.zeros([20, 20])
        for i, sentence1 in enumerate(video_captions.sentences):
            for j, sentence2 in enumerate(video_captions.sentences):
                similarities = []
                for token1_id in sentence1.tokens_id_list:

                    # find most similar token to sentence1.token1 in sentence2.tokens
                    most_similar_token_in_sentence = (None, float('-inf'))
                    for token2_id in sentence2.tokens_id_list:
                        if (token1_id, token2_id
                            ) in config.tokens_set.tokens_similarities_closest:
                            similarity = config.tokens_set.tokens_similarities_closest[
                                (token1_id, token2_id)]
                            if similarity > most_similar_token_in_sentence[1]:
                                most_similar_token_in_sentence = (token2_id,
                                                                  similarity)

                    # store token similarity (depending on the experiments we check if it is over threshold)
                    if most_similar_token_in_sentence[0] is not None:
                        if config.experiment in [
                                'experiment4', 'experiment4symmetrical'
                        ]:
                            if most_similar_token_in_sentence[1] > config.th1:
                                similarities.append(
                                    (most_similar_token_in_sentence[0], 1.0)
                                )  # for each token we add 1 instead of similarity
                            else:
                                similarities.append((None, 0))
                        elif config.experiment == 'experiment3':
                            if most_similar_token_in_sentence[1] > config.th1:
                                similarities.append(
                                    most_similar_token_in_sentence)
                            else:
                                similarities.append((None, 0))
                        elif config.experiment == 'experiment2':
                            similarities.append(most_similar_token_in_sentence)

                # compute and store similarity between sentence1 and sentence2
                if len(similarities) > 0:
                    sentences_similarity = float(
                        sum([a[1] for a in similarities])) / len(similarities)
                else:
                    sentences_similarity = 0

                result[i, j] = sentences_similarity

        # we make the similarities symmetrical
        if config.experiment == 'experiment4symmetrical':
            for i in range(0, len(result)):
                for j in range(0, len(result)):
                    symmetric_similarity = 0
                    if result[i, j] + result[j, i] != 0:
                        symmetric_similarity = (result[i, j] +
                                                result[j, i]) / 2
                    result[i, j] = symmetric_similarity
                    result[j, i] = symmetric_similarity

        # compute sentences similarity to all others (array of size 20)
        sentences_similarities = (np.sum(result, axis=1)) / result.shape[
            1]  # sentences similarities normalized between 0 and 1
        for i, similarity in enumerate(sentences_similarities):
            sentences_global_ranking.append(
                (video_captions.sentences[i].sentence, similarity))

    return sentences_global_ranking
예제 #7
0
#!/usr/bin/env python
# -*- coding: utf8 -*-

# for python3
# txt1にreference、txt2にMT outputを入れてsentence_BLEUを取得
from nltk import word_tokenize
from nltk import bleu_score
from nltk.translate.bleu_score import SmoothingFunction
cc = SmoothingFunction()

txt1 = open("txt1.txt", encoding='utf-8').read().splitlines()
txt2 = open("txt2.txt", encoding='utf-8').read().splitlines()
l = len(txt1)
b = [1] * l

for i in range(l):
    ref = word_tokenize(txt1[i])
    hyp = word_tokenize(txt2[i])
    b[i] = str(bleu_score.sentence_bleu([ref], hyp, smoothing_function=cc.method7))

f = open('bleu.txt', 'w')
b = "\n".join(b)
f.write(b)
f.close()
예제 #8
0
def train(model, train_loader, criterion, optimizer, device, writer, epoch,
          print_steps):
    """Train model for 1 epoch.
    
    Inputs: model, train_loader, criterioin, optimizer, device, writer, epoch, print_steps
        model: The model to be trained.
        train_loader: DataLoader of train Dataset.
        criterion: Loss function.
        optimizer: Optimizer of model.
        device: Pytorch device.
        writer: Tensorboard summary writer.
        epoch: Index of current epoch.
        print_steps: Interval of steps to print log.
        
    Outputs: loss, score
        loss: Loss of current epoch.
        score: Bleu score of current epoch.
    """
    total_loss = 0
    total_length = 0  # sum of lengths of sequences
    total_score = 0
    total_num = 0  # number of datas
    step = 0
    num_batchs = len(train_loader)

    model.train()

    for batch in train_loader:
        optimizer.zero_grad()

        # learning rate schedule
        for param in optimizer.param_groups:
            param["lr"] = learning_rate_schedule(train.global_step)

        sequence_en, sequence_de, seq_len_en, seq_len_de = batch
        sequence_en = sequence_en.to(device)
        sequence_de = sequence_de.to(device)
        # except <EOS> token (or PAD)
        shifted_sequence_de = sequence_de[:, :-1]

        _, pad_mask_en = create_mask(sequence_en)
        pad_mask_en = pad_mask_en.to(device)
        future_mask, pad_mask_de = create_mask(shifted_sequence_de)
        future_mask = future_mask.to(device)
        pad_mask_de = pad_mask_de.to(device)

        # logit: [batch, time, vocab]
        logit = model(sequence_en, shifted_sequence_de, future_mask,
                      pad_mask_en, pad_mask_de)
        loss = criterion(input=logit.contiguous().view(-1, logit.size(-1)),
                         target=sequence_de[:, 1:].contiguous().view(-1))
        # except <SOS> token
        length = sum(seq_len_de) - len(seq_len_de)

        total_loss += loss
        total_length += length
        """calculate bleu score"""
        batch_score = 0
        for b, target in enumerate(sequence_de):
            # target, predict: [time]
            predict = torch.argmax(logit[b, :seq_len_de[b] - 1, :], dim=1)
            batch_score += bleu_score.sentence_bleu(
                [target[1:seq_len_de[b]].cpu().numpy()],
                predict.cpu().numpy(),
                smoothing_function=bleu_smoothing)
            total_num += 1
        """"""
        total_score += batch_score

        loss.backward()
        optimizer.step()

        if step % print_steps == 0:
            print(
                "epoch: {}/{}, batch: {}/{}, loss: {}, bleu score: {}".format(
                    epoch, hparams.max_epochs, step + 1, num_batchs,
                    loss / length, batch_score / len(seq_len_de)))
            # update graph in tensorboard
            writer.add_scalar("Loss", loss / length, train.global_step)
            writer.add_scalar("Bleu score", batch_score / len(seq_len_de),
                              train.global_step)

        step += 1
        train.global_step += 1

    # return loss & bleu_score of epoch
    return total_loss / total_length, total_score / total_num