Beispiel #1
0
def get_validation_metrics(model: NeuralMachineTranslator,
                           iterations: int,
                           training_evaluator: Evaluator,
                           validation_evaluator: Evaluator,
                           training_iterator: BucketIterator,
                           validation_iterator: BucketIterator) -> Tuple[Metrics, Metrics]:
    # get predictor
    predictor = Predictor(model)

    validation_evaluator.clear_sentences()
    # loop over validation sentences and add predictions to evaluator
    for i in range(iterations):
        validation_batch = next(iter(validation_iterator))
        predicted_sentence, _ = predictor.predict(validation_batch)
        validation_evaluator.add_sentences(validation_batch.trg[0], predicted_sentence, model.EOS)

    # get validation metrics
    validation_metrics = Metrics(validation_evaluator.bleu(), validation_evaluator.ter(), 0)

    training_evaluator.clear_sentences()
    # get 50 batches from training data and add predictions to evaluator
    for i in range(50):
        batch = next(iter(training_iterator))
        predicted_sentence, _ = predictor.predict(batch)
        training_evaluator.add_sentences(batch.trg[0], predicted_sentence, model.EOS)

    # get training metrics
    training_metrics = Metrics(training_evaluator.bleu(), training_evaluator.ter(), 0)

    return validation_metrics, training_metrics
    training_batches = next(
        iter(
            BucketIterator(
                dataset=training_data,
                batch_size=1,
                train=True,
                sort_key=lambda x: interleave_keys(len(x.src), len(x.trg)))))

    predictor = Predictor(model)
    evaluator = Evaluator(training_data.english.vocab,
                          training_data.french.vocab)

    # evaluator.add_sentences(input_data.trg[0], predictor.predict(input_data))
    for i in range((len(data) // model.batch_size) + 1):
        sentence = next(iter(input_data))
        predicted_sentence, _ = predictor.predict(sentence)
        evaluator.add_sentences(sentence.trg[0], predicted_sentence, eos_token)
    #
    # for i in range((len(data) // batch_size) + 1):
    #     sentence = next(iter(input_data))
    #     src, trg = evaluator.convert_sentences(sentence)
    #     file.write(' '.join(src) + '\n')
    #     file.write(' '.join(trg) + '\n')
    #     file.write('\n')

    print('bleu:', evaluator.bleu())
    print('ter: ', evaluator.ter())

    evaluator.write_to_file('results/beam_search')
Beispiel #3
0
    batch_size = 1
    input_data = BucketIterator(
        dataset=data,
        train=True,
        batch_size=batch_size,
        # sort_key=lambda x: interleave_keys(len(x.src), len(x.trg))
    )

    training_batches = next(
        iter(
            BucketIterator(
                dataset=training_data,
                batch_size=10,
                train=True,
                sort_key=lambda x: interleave_keys(len(x.src), len(x.trg)))))

    predictor = Predictor(model)
    evaluator = Evaluator(training_data.english.vocab,
                          training_data.french.vocab)

    # evaluator.add_sentences(input_data.trg[0], predictor.predict(input_data))
    for i in range(10):
        sentence = next(iter(input_data))
        predicted_sentence, attention = predictor.predict(sentence)
        evaluator.add_sentences(sentence.trg[0],
                                predicted_sentence,
                                attention=attention)

    attentions = len(evaluator.attention_weights)
    for i in range(attentions):
        evaluator.plot_attention(evaluator.attention_weights[i])
Beispiel #4
0
def train_epochs(
    training_data: ParallelData,
    embedding_dimension: int,
    n_epochs: int,
    batch_size: int,
    max_sentence_length: int,
    evaluator: Evaluator,
    validation_evaluator: Evaluator,
    dropout=0.3,
    learning_rate=0.01,
    max_iterations_per_epoch=math.inf,
    teacher_forcing=False
) -> Dict[int, Metrics]:

    n_english = len(training_data.english.vocab)
    n_french = len(training_data.french.vocab)

    # iterators
    train_iterator = Iterator(dataset=training_data, batch_size=batch_size,
                                    sort_key=lambda x: interleave_keys(len(x.src), len(x.trg)), train=True)

    validation_data = TestData("data/BPE/valid/val.BPE", training_data.english.vocab, training_data.french.vocab)
    validation_iterations = (len(validation_data) // batch_size) + 1
    validation_iterator = Iterator(dataset=validation_data, batch_size=batch_size,
                                         sort_key=lambda x: interleave_keys(len(x.src), len(x.trg)), train=True)

    iterations_per_epoch = min(max_iterations_per_epoch, (len(training_data) // batch_size) + 1)

    model = NeuralMachineTranslator(
        embedding_dimension,
        n_french,
        max_sentence_length,
        dropout,
        n_english,
        n_english,
        2*embedding_dimension,
        batch_size,
        training_data.english.vocab.stoi['<EOS>'],
        training_data.english.vocab.stoi['<SOS>'],
        training_data.english.vocab.stoi['<PAD>'],
        max_prediction_length=max_sentence_length
    )

    model.train()

    optimizer = optim.SGD(model.parameters(), lr=learning_rate)
    print("Parameters to train: ")
    for name, param in model.named_parameters():
        if param.requires_grad:
            print(name)
    print()

    metrics = {}
    validation_metrics = {}
    training_metrics = {}

    print("Start training..")
    for epoch in range(1, n_epochs + 1):

        epoch_loss = 0
        iteration_loss = 0

        start_time = time()
        for iteration in range(iterations_per_epoch):

            # set gradients to zero
            optimizer.zero_grad()
            model.zero_grad()

            # get next batch
            batch = next(iter(train_iterator))

            # forward pass
            prediction, loss = train(batch, model, teacher_forcing)

            # # backward pass final step without retaining graph
            loss.backward()
            torch.nn.utils.clip_grad_norm_(model.parameters(), 5.)

            # update parameters final step
            optimizer.step()

            # save losses and add predicted sentences to evaluator
            epoch_loss += loss.item()
            iteration_loss += loss.item()
            evaluator.add_sentences(batch.trg[0], prediction, model.EOS)

            if iteration > 1 and iteration % 200 == 0:
                current_time = (time() - start_time) / 200
                print('batch {}/{}'.format(iteration, iterations_per_epoch))
                print('average loss per batch: {:5.3}'.format(iteration_loss / 200))
                print("time per batch {:3}".format(current_time))
                iteration_loss = 0
                start_time = time()

        # save evaluation metrics
        metrics[epoch] = Metrics(evaluator.bleu(), evaluator.ter(), float(epoch_loss))
        evaluator.write_to_file('output/predictions_epoch{}'.format(epoch))

        # clear sentences from evaluator
        evaluator.clear_sentences()

        print(
            'Epoch {}: training metrics: loss {:.3}, BLEU {:.3}, TER {:.3}, LR {:.3}'.format(
                epoch, float(metrics[epoch].loss), float(metrics[epoch].BLEU), float(metrics[epoch].TER), float(learning_rate)
            )
        )

        print("Getting validation metrics..")

        validation_metrics[epoch], training_metrics[epoch] = get_validation_metrics(
            model,
            validation_iterations,
            evaluator,
            validation_evaluator,
            train_iterator,
            validation_iterator
        )

        # clear sentences out of evaluators
        evaluator.clear_sentences()
        validation_evaluator.clear_sentences()
        print(
            'Epoch {}: validation metrics: BLEU {:.3}, TER {:.3}'.format(
                epoch, float(validation_metrics[epoch].BLEU), float(validation_metrics[epoch].TER)
            )
        )

        if epoch > 1 and metrics[epoch].loss > metrics[epoch - 1].loss:
            learning_rate /= 4
            optimizer = optim.SGD(model.parameters(), lr=learning_rate)

        with open('training_progress.csv', 'w') as file:
            filewriter = csv.writer(file)
            filewriter.writerow(['Epoch', 'loss', 'BLEU', 'TER'])
            for epoch, metric in metrics.items():
                filewriter.writerow([epoch, metric.loss, metric.BLEU, metric.TER])

        with open('validation_progress.csv', 'w') as file:
            filewriter = csv.writer(file)
            filewriter.writerow(['Epoch', 'training BLEU', 'valid BLEU', 'training TER', 'valid TER'])
            for epoch, metric in validation_metrics.items():
                filewriter.writerow([epoch, training_metrics[epoch].BLEU, validation_metrics[epoch].BLEU,
                                     training_metrics[epoch].TER, validation_metrics[epoch].TER])

        with open('output/model_epoch{}.pickle'.format(epoch), 'wb') as file:
            pickle.dump(model, file)

    return metrics