def get_validation_metrics(model: NeuralMachineTranslator, iterations: int, training_evaluator: Evaluator, validation_evaluator: Evaluator, training_iterator: BucketIterator, validation_iterator: BucketIterator) -> Tuple[Metrics, Metrics]: # get predictor predictor = Predictor(model) validation_evaluator.clear_sentences() # loop over validation sentences and add predictions to evaluator for i in range(iterations): validation_batch = next(iter(validation_iterator)) predicted_sentence, _ = predictor.predict(validation_batch) validation_evaluator.add_sentences(validation_batch.trg[0], predicted_sentence, model.EOS) # get validation metrics validation_metrics = Metrics(validation_evaluator.bleu(), validation_evaluator.ter(), 0) training_evaluator.clear_sentences() # get 50 batches from training data and add predictions to evaluator for i in range(50): batch = next(iter(training_iterator)) predicted_sentence, _ = predictor.predict(batch) training_evaluator.add_sentences(batch.trg[0], predicted_sentence, model.EOS) # get training metrics training_metrics = Metrics(training_evaluator.bleu(), training_evaluator.ter(), 0) return validation_metrics, training_metrics
training_batches = next( iter( BucketIterator( dataset=training_data, batch_size=1, train=True, sort_key=lambda x: interleave_keys(len(x.src), len(x.trg))))) predictor = Predictor(model) evaluator = Evaluator(training_data.english.vocab, training_data.french.vocab) # evaluator.add_sentences(input_data.trg[0], predictor.predict(input_data)) for i in range((len(data) // model.batch_size) + 1): sentence = next(iter(input_data)) predicted_sentence, _ = predictor.predict(sentence) evaluator.add_sentences(sentence.trg[0], predicted_sentence, eos_token) # # for i in range((len(data) // batch_size) + 1): # sentence = next(iter(input_data)) # src, trg = evaluator.convert_sentences(sentence) # file.write(' '.join(src) + '\n') # file.write(' '.join(trg) + '\n') # file.write('\n') print('bleu:', evaluator.bleu()) print('ter: ', evaluator.ter()) evaluator.write_to_file('results/beam_search')
def train_epochs( training_data: ParallelData, embedding_dimension: int, n_epochs: int, batch_size: int, max_sentence_length: int, evaluator: Evaluator, validation_evaluator: Evaluator, dropout=0.3, learning_rate=0.01, max_iterations_per_epoch=math.inf, teacher_forcing=False ) -> Dict[int, Metrics]: n_english = len(training_data.english.vocab) n_french = len(training_data.french.vocab) # iterators train_iterator = Iterator(dataset=training_data, batch_size=batch_size, sort_key=lambda x: interleave_keys(len(x.src), len(x.trg)), train=True) validation_data = TestData("data/BPE/valid/val.BPE", training_data.english.vocab, training_data.french.vocab) validation_iterations = (len(validation_data) // batch_size) + 1 validation_iterator = Iterator(dataset=validation_data, batch_size=batch_size, sort_key=lambda x: interleave_keys(len(x.src), len(x.trg)), train=True) iterations_per_epoch = min(max_iterations_per_epoch, (len(training_data) // batch_size) + 1) model = NeuralMachineTranslator( embedding_dimension, n_french, max_sentence_length, dropout, n_english, n_english, 2*embedding_dimension, batch_size, training_data.english.vocab.stoi['<EOS>'], training_data.english.vocab.stoi['<SOS>'], training_data.english.vocab.stoi['<PAD>'], max_prediction_length=max_sentence_length ) model.train() optimizer = optim.SGD(model.parameters(), lr=learning_rate) print("Parameters to train: ") for name, param in model.named_parameters(): if param.requires_grad: print(name) print() metrics = {} validation_metrics = {} training_metrics = {} print("Start training..") for epoch in range(1, n_epochs + 1): epoch_loss = 0 iteration_loss = 0 start_time = time() for iteration in range(iterations_per_epoch): # set gradients to zero optimizer.zero_grad() model.zero_grad() # get next batch batch = next(iter(train_iterator)) # forward pass prediction, loss = train(batch, model, teacher_forcing) # # backward pass final step without retaining graph loss.backward() torch.nn.utils.clip_grad_norm_(model.parameters(), 5.) # update parameters final step optimizer.step() # save losses and add predicted sentences to evaluator epoch_loss += loss.item() iteration_loss += loss.item() evaluator.add_sentences(batch.trg[0], prediction, model.EOS) if iteration > 1 and iteration % 200 == 0: current_time = (time() - start_time) / 200 print('batch {}/{}'.format(iteration, iterations_per_epoch)) print('average loss per batch: {:5.3}'.format(iteration_loss / 200)) print("time per batch {:3}".format(current_time)) iteration_loss = 0 start_time = time() # save evaluation metrics metrics[epoch] = Metrics(evaluator.bleu(), evaluator.ter(), float(epoch_loss)) evaluator.write_to_file('output/predictions_epoch{}'.format(epoch)) # clear sentences from evaluator evaluator.clear_sentences() print( 'Epoch {}: training metrics: loss {:.3}, BLEU {:.3}, TER {:.3}, LR {:.3}'.format( epoch, float(metrics[epoch].loss), float(metrics[epoch].BLEU), float(metrics[epoch].TER), float(learning_rate) ) ) print("Getting validation metrics..") validation_metrics[epoch], training_metrics[epoch] = get_validation_metrics( model, validation_iterations, evaluator, validation_evaluator, train_iterator, validation_iterator ) # clear sentences out of evaluators evaluator.clear_sentences() validation_evaluator.clear_sentences() print( 'Epoch {}: validation metrics: BLEU {:.3}, TER {:.3}'.format( epoch, float(validation_metrics[epoch].BLEU), float(validation_metrics[epoch].TER) ) ) if epoch > 1 and metrics[epoch].loss > metrics[epoch - 1].loss: learning_rate /= 4 optimizer = optim.SGD(model.parameters(), lr=learning_rate) with open('training_progress.csv', 'w') as file: filewriter = csv.writer(file) filewriter.writerow(['Epoch', 'loss', 'BLEU', 'TER']) for epoch, metric in metrics.items(): filewriter.writerow([epoch, metric.loss, metric.BLEU, metric.TER]) with open('validation_progress.csv', 'w') as file: filewriter = csv.writer(file) filewriter.writerow(['Epoch', 'training BLEU', 'valid BLEU', 'training TER', 'valid TER']) for epoch, metric in validation_metrics.items(): filewriter.writerow([epoch, training_metrics[epoch].BLEU, validation_metrics[epoch].BLEU, training_metrics[epoch].TER, validation_metrics[epoch].TER]) with open('output/model_epoch{}.pickle'.format(epoch), 'wb') as file: pickle.dump(model, file) return metrics