torch.manual_seed(config.seed); np.random.seed(config.seed) # Initialize model model = LanguageModel(config=config) #CTCModel(config=config) print(model) # Generate datasets train_dataset, valid_dataset, test_dataset = get_text_datasets(config) trainer = Trainer(model=model, config=config) if restart: trainer.load_checkpoint() # Train the final model if train: for epoch in range(config.num_epochs): print("========= Epoch %d of %d =========" % (epoch+1, config.num_epochs)) train_loss = trainer.train(train_dataset) model = model.cpu() valid_loss = trainer.test(valid_dataset, set="valid") if torch.cuda.is_available(): model = model.cuda() print("========= Results: epoch %d of %d =========" % (epoch+1, config.num_epochs)) print("train loss: %.2f| valid loss: %.2f\n" % (train_loss, valid_loss) ) trainer.save_checkpoint() trainer.load_best_model() test_loss = trainer.test(test_dataset, set="test") print("========= Test results =========") print("test loss: %.2f \n" % (test_WER, test_loss) )
trainer = Trainer(model=model, config=config) if train: print("Training the controller...") trainer.train_controller(train_dataset) print("Done.") for epoch in range(config.num_epochs): print("========= Epoch %d of %d =========" % (epoch + 1, config.num_epochs)) train_WER, train_loss, train_FLOPs_mean, train_FLOPs_std = trainer.train( train_dataset) if epoch % config.validation_period == 0: model.sample_based_on_surprisal_during_testing = False valid_WER_random, valid_loss_random, valid_FLOPs_mean_random, valid_FLOPs_std_random = trainer.test( valid_dataset, set="valid") trainer.save_checkpoint(WER=valid_WER_random, sampling_method="random") model.sample_based_on_surprisal_during_testing = True valid_WER_surprisal, valid_loss_surprisal, valid_FLOPs_mean_surprisal, valid_FLOPs_std_surprisal = trainer.test( valid_dataset, set="valid") trainer.save_checkpoint(WER=valid_WER_surprisal, sampling_method="surprisal") print("========= Results: epoch %d of %d =========" % (epoch + 1, config.num_epochs)) print("train WER: %.2f| train loss: %.2f| train FLOPs: %d" % (train_WER * 100, train_loss, train_FLOPs_mean)) print( "valid WER: %.2f| valid loss: %.2f| valid FLOPs: %d (random sampling)" % (valid_WER_random * 100, valid_loss_random, valid_FLOPs_mean_random)) print(
for epoch in range(config.num_epochs): print("========= Epoch %d of %d =========" % (epoch + 1, config.num_epochs)) train_WER, train_loss = trainer.train(train_dataset) if epoch % config.validation_period == 0: model = model.cpu() valid_WER, valid_loss = trainer.test(valid_dataset, set="valid") if torch.cuda.is_available(): model = model.cuda() print("========= Results: epoch %d of %d =========" % (epoch + 1, config.num_epochs)) print( "train WER: %.2f| train loss: %.2f| valid WER: %.2f| valid loss: %.2f\n" % (train_WER, train_loss, valid_WER, valid_loss)) trainer.save_checkpoint(WER=valid_WER) trainer.load_best_model() test_WER, test_loss = trainer.test(test_dataset, set="test") print("========= Test results =========") print("test WER: %.2f| test loss: %.2f \n" % (test_WER, test_loss)) def G2P(word): x = train_dataset.word_tokenizer.EncodeAsIds(word) T = [len(x)] x = torch.tensor(x).unsqueeze(0).long() y = model.infer(x, T) phonemes = train_dataset.phoneme_tokenizer.DecodeIds(y[0]) return phonemes
import torch from models import HMM from data import get_datasets, read_config from training import Trainer # Generate datasets from text file path = "data" N = 128 config = read_config(N,path) train_dataset, valid_dataset = get_datasets(config) checkpoint_path = "." # Initialize model model = HMM(config=config) # Train the model num_epochs = 10 trainer = Trainer(model, config, lr=0.003) trainer.load_checkpoint(checkpoint_path) for epoch in range(num_epochs): print("========= Epoch %d of %d =========" % (epoch+1, num_epochs)) train_loss = trainer.train(train_dataset) valid_loss = trainer.test(valid_dataset) trainer.save_checkpoint(epoch, checkpoint_path) print("========= Results: epoch %d of %d =========" % (epoch+1, num_epochs)) print("train loss: %.2f| valid loss: %.2f\n" % (train_loss, valid_loss) )